os/textandloc/fontservices/textshaperplugin/IcuSource/layout/KhmerReordering.h
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
/*
sl@0
     2
 *
sl@0
     3
 * (C) Copyright IBM Corp. 1998-2004 - All Rights Reserved 
sl@0
     4
 *
sl@0
     5
 * This file is a modification of the ICU file IndicReordering.h
sl@0
     6
 * by Jens Herden and Javier Sola for Khmer language 
sl@0
     7
 *
sl@0
     8
 */
sl@0
     9
sl@0
    10
#ifndef __KHMERREORDERING_H
sl@0
    11
#define __KHMERREORDERING_H
sl@0
    12
sl@0
    13
/**
sl@0
    14
 * \file
sl@0
    15
 * \internal
sl@0
    16
 */
sl@0
    17
sl@0
    18
// #include "LETypes.h"
sl@0
    19
// #include "OpenTypeTables.h"
sl@0
    20
sl@0
    21
U_NAMESPACE_BEGIN
sl@0
    22
sl@0
    23
class LEGlyphStorage;
sl@0
    24
sl@0
    25
// Vocabulary 
sl@0
    26
//     Base ->         A consonant or an independent vowel in its full (not subscript) form. It is the 
sl@0
    27
//                     center of the syllable, it can be souranded by coeng (subscript) consonants, vowels,
sl@0
    28
//                     split vowels, signs... but there is only one base in a syllable, it has to be coded as
sl@0
    29
//                     the first character of the syllable.
sl@0
    30
//     split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).  
sl@0
    31
//                     Khmer language has five of them. Khmer split vowels either have one part before the
sl@0
    32
//                     base and one after the base or they have a part before the base and a part above the base.
sl@0
    33
//                     The first part of all Khmer split vowels is the same character, identical to 
sl@0
    34
//                     the glyph of Khmer dependent vowel SRA EI   
sl@0
    35
//     coeng -->  modifier used in Khmer to construct coeng (subscript) consonants 
sl@0
    36
//                Differently than indian languages, the coeng modifies the consonant that follows it,
sl@0
    37
//                not the one preceding it  Each consonant has two forms, the base form and the subscript form
sl@0
    38
//                the base form is the normal one (using the consonants code-point), the subscript form is
sl@0
    39
//                displayed when the combination coeng + consonant is encountered.
sl@0
    40
//     Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
sl@0
    41
//     Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
sl@0
    42
//     Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
sl@0
    43
//     Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
sl@0
    44
//                          if it is attached to a consonant of the first series or a consonant of the second series
sl@0
    45
//                          Most consonants have an equivalent in the other series, but some of theme exist only in
sl@0
    46
//                          one series (for example SA). If we want to use the consonant SA with a vowel sound that
sl@0
    47
//                          can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
sl@0
    48
//                          of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
sl@0
    49
//                          x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and 
sl@0
    50
//                          MUSIKATOAN a second series consonant to have a first series vowel sound.
sl@0
    51
//                          Consonant shifter are both normally supercript marks, but, when they are followed by a
sl@0
    52
//                          superscript, they change shape and take the form of subscript dependent vowel SRA U.
sl@0
    53
//                          If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
sl@0
    54
//                          should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
sl@0
    55
//                          be placed after the coeng consonant.
sl@0
    56
//     Dependent vowel ->   In khmer dependent vowels can be placed above, below, before or after the base                             
sl@0
    57
//                          Each vowel has its own position. Only one vowel per syllable is allowed.
sl@0
    58
//     Signs            ->  Khmer has above signs and post signs. Only one above sign and/or one post sign are
sl@0
    59
//                          Allowed in a syllable.
sl@0
    60
//
sl@0
    61
//     
sl@0
    62
sl@0
    63
struct KhmerClassTable    // This list must include all types of components that can be used inside a syllable
sl@0
    64
{
sl@0
    65
    enum CharClassValues  // order is important here! This order must be the same that is found in each horizontal 
sl@0
    66
                          // line in the statetable for Khmer (file KhmerReordering.cpp).
sl@0
    67
    {
sl@0
    68
        CC_RESERVED             =  0,
sl@0
    69
        CC_CONSONANT            =  1, // consonant of type 1 or independent vowel
sl@0
    70
        CC_CONSONANT2           =  2, // Consonant of type 2
sl@0
    71
        CC_CONSONANT3           =  3, // Consonant of type 3 
sl@0
    72
        CC_ZERO_WIDTH_NJ_MARK   =  4, // Zero Width non joiner character (0x200C)
sl@0
    73
        CC_CONSONANT_SHIFTER    =  5, 
sl@0
    74
        CC_ROBAT                =  6, // Khmer special diacritic accent -treated differently in state table
sl@0
    75
        CC_COENG                =  7, // Subscript consonant combining character
sl@0
    76
        CC_DEPENDENT_VOWEL      =  8, 
sl@0
    77
        CC_SIGN_ABOVE           =  9,
sl@0
    78
        CC_SIGN_AFTER           = 10,
sl@0
    79
        CC_ZERO_WIDTH_J_MARK    = 11, // Zero width joiner character
sl@0
    80
        CC_COUNT                = 12  // This is the number of character classes
sl@0
    81
    };
sl@0
    82
sl@0
    83
    enum CharClassFlags
sl@0
    84
    {
sl@0
    85
        CF_CLASS_MASK    = 0x0000FFFF,
sl@0
    86
sl@0
    87
        CF_CONSONANT     = 0x01000000,  // flag to speed up comparing
sl@0
    88
        CF_SPLIT_VOWEL   = 0x02000000,  // flag for a split vowel -> the first part is added in front of the syllable
sl@0
    89
        CF_DOTTED_CIRCLE = 0x04000000,  // add a dotted circle if a character with this flag is the first in a syllable
sl@0
    90
        CF_COENG         = 0x08000000,  // flag to speed up comparing
sl@0
    91
        CF_SHIFTER       = 0x10000000,  // flag to speed up comparing
sl@0
    92
        CF_ABOVE_VOWEL   = 0x20000000,  // flag to speed up comparing
sl@0
    93
sl@0
    94
        // position flags
sl@0
    95
        CF_POS_BEFORE    = 0x00080000,
sl@0
    96
        CF_POS_BELOW     = 0x00040000,
sl@0
    97
        CF_POS_ABOVE     = 0x00020000,
sl@0
    98
        CF_POS_AFTER     = 0x00010000,
sl@0
    99
        CF_POS_MASK      = 0x000f0000
sl@0
   100
    };
sl@0
   101
sl@0
   102
    typedef le_uint32 CharClass;
sl@0
   103
sl@0
   104
    typedef le_int32 ScriptFlags;
sl@0
   105
sl@0
   106
    LEUnicode firstChar;   // for Khmer this will become x1780
sl@0
   107
    LEUnicode lastChar;    //  and this x17DF
sl@0
   108
    const CharClass *classTable;
sl@0
   109
sl@0
   110
    CharClass getCharClass(LEUnicode ch) const;
sl@0
   111
sl@0
   112
    static const KhmerClassTable *getKhmerClassTable();
sl@0
   113
};
sl@0
   114
sl@0
   115
sl@0
   116
class KhmerReordering /* not : public UObject because all methods are static */ {
sl@0
   117
public:
sl@0
   118
    static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
sl@0
   119
        LEUnicode *outChars, LEGlyphStorage &glyphStorage);
sl@0
   120
sl@0
   121
    static const LETag *getFeatureOrder();
sl@0
   122
sl@0
   123
private:
sl@0
   124
    // do not instantiate
sl@0
   125
    KhmerReordering();
sl@0
   126
sl@0
   127
    static le_int32 findSyllable(const KhmerClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
sl@0
   128
sl@0
   129
};
sl@0
   130
sl@0
   131
sl@0
   132
U_NAMESPACE_END
sl@0
   133
#endif