os/textandloc/fontservices/textshaperplugin/IcuSource/layout/KhmerReordering.h
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/fontservices/textshaperplugin/IcuSource/layout/KhmerReordering.h	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,133 @@
     1.4 +/*
     1.5 + *
     1.6 + * (C) Copyright IBM Corp. 1998-2004 - All Rights Reserved 
     1.7 + *
     1.8 + * This file is a modification of the ICU file IndicReordering.h
     1.9 + * by Jens Herden and Javier Sola for Khmer language 
    1.10 + *
    1.11 + */
    1.12 +
    1.13 +#ifndef __KHMERREORDERING_H
    1.14 +#define __KHMERREORDERING_H
    1.15 +
    1.16 +/**
    1.17 + * \file
    1.18 + * \internal
    1.19 + */
    1.20 +
    1.21 +// #include "LETypes.h"
    1.22 +// #include "OpenTypeTables.h"
    1.23 +
    1.24 +U_NAMESPACE_BEGIN
    1.25 +
    1.26 +class LEGlyphStorage;
    1.27 +
    1.28 +// Vocabulary 
    1.29 +//     Base ->         A consonant or an independent vowel in its full (not subscript) form. It is the 
    1.30 +//                     center of the syllable, it can be souranded by coeng (subscript) consonants, vowels,
    1.31 +//                     split vowels, signs... but there is only one base in a syllable, it has to be coded as
    1.32 +//                     the first character of the syllable.
    1.33 +//     split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).  
    1.34 +//                     Khmer language has five of them. Khmer split vowels either have one part before the
    1.35 +//                     base and one after the base or they have a part before the base and a part above the base.
    1.36 +//                     The first part of all Khmer split vowels is the same character, identical to 
    1.37 +//                     the glyph of Khmer dependent vowel SRA EI   
    1.38 +//     coeng -->  modifier used in Khmer to construct coeng (subscript) consonants 
    1.39 +//                Differently than indian languages, the coeng modifies the consonant that follows it,
    1.40 +//                not the one preceding it  Each consonant has two forms, the base form and the subscript form
    1.41 +//                the base form is the normal one (using the consonants code-point), the subscript form is
    1.42 +//                displayed when the combination coeng + consonant is encountered.
    1.43 +//     Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
    1.44 +//     Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
    1.45 +//     Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
    1.46 +//     Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
    1.47 +//                          if it is attached to a consonant of the first series or a consonant of the second series
    1.48 +//                          Most consonants have an equivalent in the other series, but some of theme exist only in
    1.49 +//                          one series (for example SA). If we want to use the consonant SA with a vowel sound that
    1.50 +//                          can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
    1.51 +//                          of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
    1.52 +//                          x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and 
    1.53 +//                          MUSIKATOAN a second series consonant to have a first series vowel sound.
    1.54 +//                          Consonant shifter are both normally supercript marks, but, when they are followed by a
    1.55 +//                          superscript, they change shape and take the form of subscript dependent vowel SRA U.
    1.56 +//                          If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
    1.57 +//                          should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
    1.58 +//                          be placed after the coeng consonant.
    1.59 +//     Dependent vowel ->   In khmer dependent vowels can be placed above, below, before or after the base                             
    1.60 +//                          Each vowel has its own position. Only one vowel per syllable is allowed.
    1.61 +//     Signs            ->  Khmer has above signs and post signs. Only one above sign and/or one post sign are
    1.62 +//                          Allowed in a syllable.
    1.63 +//
    1.64 +//     
    1.65 +
    1.66 +struct KhmerClassTable    // This list must include all types of components that can be used inside a syllable
    1.67 +{
    1.68 +    enum CharClassValues  // order is important here! This order must be the same that is found in each horizontal 
    1.69 +                          // line in the statetable for Khmer (file KhmerReordering.cpp).
    1.70 +    {
    1.71 +        CC_RESERVED             =  0,
    1.72 +        CC_CONSONANT            =  1, // consonant of type 1 or independent vowel
    1.73 +        CC_CONSONANT2           =  2, // Consonant of type 2
    1.74 +        CC_CONSONANT3           =  3, // Consonant of type 3 
    1.75 +        CC_ZERO_WIDTH_NJ_MARK   =  4, // Zero Width non joiner character (0x200C)
    1.76 +        CC_CONSONANT_SHIFTER    =  5, 
    1.77 +        CC_ROBAT                =  6, // Khmer special diacritic accent -treated differently in state table
    1.78 +        CC_COENG                =  7, // Subscript consonant combining character
    1.79 +        CC_DEPENDENT_VOWEL      =  8, 
    1.80 +        CC_SIGN_ABOVE           =  9,
    1.81 +        CC_SIGN_AFTER           = 10,
    1.82 +        CC_ZERO_WIDTH_J_MARK    = 11, // Zero width joiner character
    1.83 +        CC_COUNT                = 12  // This is the number of character classes
    1.84 +    };
    1.85 +
    1.86 +    enum CharClassFlags
    1.87 +    {
    1.88 +        CF_CLASS_MASK    = 0x0000FFFF,
    1.89 +
    1.90 +        CF_CONSONANT     = 0x01000000,  // flag to speed up comparing
    1.91 +        CF_SPLIT_VOWEL   = 0x02000000,  // flag for a split vowel -> the first part is added in front of the syllable
    1.92 +        CF_DOTTED_CIRCLE = 0x04000000,  // add a dotted circle if a character with this flag is the first in a syllable
    1.93 +        CF_COENG         = 0x08000000,  // flag to speed up comparing
    1.94 +        CF_SHIFTER       = 0x10000000,  // flag to speed up comparing
    1.95 +        CF_ABOVE_VOWEL   = 0x20000000,  // flag to speed up comparing
    1.96 +
    1.97 +        // position flags
    1.98 +        CF_POS_BEFORE    = 0x00080000,
    1.99 +        CF_POS_BELOW     = 0x00040000,
   1.100 +        CF_POS_ABOVE     = 0x00020000,
   1.101 +        CF_POS_AFTER     = 0x00010000,
   1.102 +        CF_POS_MASK      = 0x000f0000
   1.103 +    };
   1.104 +
   1.105 +    typedef le_uint32 CharClass;
   1.106 +
   1.107 +    typedef le_int32 ScriptFlags;
   1.108 +
   1.109 +    LEUnicode firstChar;   // for Khmer this will become x1780
   1.110 +    LEUnicode lastChar;    //  and this x17DF
   1.111 +    const CharClass *classTable;
   1.112 +
   1.113 +    CharClass getCharClass(LEUnicode ch) const;
   1.114 +
   1.115 +    static const KhmerClassTable *getKhmerClassTable();
   1.116 +};
   1.117 +
   1.118 +
   1.119 +class KhmerReordering /* not : public UObject because all methods are static */ {
   1.120 +public:
   1.121 +    static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
   1.122 +        LEUnicode *outChars, LEGlyphStorage &glyphStorage);
   1.123 +
   1.124 +    static const LETag *getFeatureOrder();
   1.125 +
   1.126 +private:
   1.127 +    // do not instantiate
   1.128 +    KhmerReordering();
   1.129 +
   1.130 +    static le_int32 findSyllable(const KhmerClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
   1.131 +
   1.132 +};
   1.133 +
   1.134 +
   1.135 +U_NAMESPACE_END
   1.136 +#endif