1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/fontservices/textshaperplugin/IcuSource/layout/KhmerReordering.h Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,133 @@
1.4 +/*
1.5 + *
1.6 + * (C) Copyright IBM Corp. 1998-2004 - All Rights Reserved
1.7 + *
1.8 + * This file is a modification of the ICU file IndicReordering.h
1.9 + * by Jens Herden and Javier Sola for Khmer language
1.10 + *
1.11 + */
1.12 +
1.13 +#ifndef __KHMERREORDERING_H
1.14 +#define __KHMERREORDERING_H
1.15 +
1.16 +/**
1.17 + * \file
1.18 + * \internal
1.19 + */
1.20 +
1.21 +// #include "LETypes.h"
1.22 +// #include "OpenTypeTables.h"
1.23 +
1.24 +U_NAMESPACE_BEGIN
1.25 +
1.26 +class LEGlyphStorage;
1.27 +
1.28 +// Vocabulary
1.29 +// Base -> A consonant or an independent vowel in its full (not subscript) form. It is the
1.30 +// center of the syllable, it can be souranded by coeng (subscript) consonants, vowels,
1.31 +// split vowels, signs... but there is only one base in a syllable, it has to be coded as
1.32 +// the first character of the syllable.
1.33 +// split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
1.34 +// Khmer language has five of them. Khmer split vowels either have one part before the
1.35 +// base and one after the base or they have a part before the base and a part above the base.
1.36 +// The first part of all Khmer split vowels is the same character, identical to
1.37 +// the glyph of Khmer dependent vowel SRA EI
1.38 +// coeng --> modifier used in Khmer to construct coeng (subscript) consonants
1.39 +// Differently than indian languages, the coeng modifies the consonant that follows it,
1.40 +// not the one preceding it Each consonant has two forms, the base form and the subscript form
1.41 +// the base form is the normal one (using the consonants code-point), the subscript form is
1.42 +// displayed when the combination coeng + consonant is encountered.
1.43 +// Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
1.44 +// Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
1.45 +// Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
1.46 +// Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
1.47 +// if it is attached to a consonant of the first series or a consonant of the second series
1.48 +// Most consonants have an equivalent in the other series, but some of theme exist only in
1.49 +// one series (for example SA). If we want to use the consonant SA with a vowel sound that
1.50 +// can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
1.51 +// of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
1.52 +// x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
1.53 +// MUSIKATOAN a second series consonant to have a first series vowel sound.
1.54 +// Consonant shifter are both normally supercript marks, but, when they are followed by a
1.55 +// superscript, they change shape and take the form of subscript dependent vowel SRA U.
1.56 +// If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
1.57 +// should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
1.58 +// be placed after the coeng consonant.
1.59 +// Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base
1.60 +// Each vowel has its own position. Only one vowel per syllable is allowed.
1.61 +// Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are
1.62 +// Allowed in a syllable.
1.63 +//
1.64 +//
1.65 +
1.66 +struct KhmerClassTable // This list must include all types of components that can be used inside a syllable
1.67 +{
1.68 + enum CharClassValues // order is important here! This order must be the same that is found in each horizontal
1.69 + // line in the statetable for Khmer (file KhmerReordering.cpp).
1.70 + {
1.71 + CC_RESERVED = 0,
1.72 + CC_CONSONANT = 1, // consonant of type 1 or independent vowel
1.73 + CC_CONSONANT2 = 2, // Consonant of type 2
1.74 + CC_CONSONANT3 = 3, // Consonant of type 3
1.75 + CC_ZERO_WIDTH_NJ_MARK = 4, // Zero Width non joiner character (0x200C)
1.76 + CC_CONSONANT_SHIFTER = 5,
1.77 + CC_ROBAT = 6, // Khmer special diacritic accent -treated differently in state table
1.78 + CC_COENG = 7, // Subscript consonant combining character
1.79 + CC_DEPENDENT_VOWEL = 8,
1.80 + CC_SIGN_ABOVE = 9,
1.81 + CC_SIGN_AFTER = 10,
1.82 + CC_ZERO_WIDTH_J_MARK = 11, // Zero width joiner character
1.83 + CC_COUNT = 12 // This is the number of character classes
1.84 + };
1.85 +
1.86 + enum CharClassFlags
1.87 + {
1.88 + CF_CLASS_MASK = 0x0000FFFF,
1.89 +
1.90 + CF_CONSONANT = 0x01000000, // flag to speed up comparing
1.91 + CF_SPLIT_VOWEL = 0x02000000, // flag for a split vowel -> the first part is added in front of the syllable
1.92 + CF_DOTTED_CIRCLE = 0x04000000, // add a dotted circle if a character with this flag is the first in a syllable
1.93 + CF_COENG = 0x08000000, // flag to speed up comparing
1.94 + CF_SHIFTER = 0x10000000, // flag to speed up comparing
1.95 + CF_ABOVE_VOWEL = 0x20000000, // flag to speed up comparing
1.96 +
1.97 + // position flags
1.98 + CF_POS_BEFORE = 0x00080000,
1.99 + CF_POS_BELOW = 0x00040000,
1.100 + CF_POS_ABOVE = 0x00020000,
1.101 + CF_POS_AFTER = 0x00010000,
1.102 + CF_POS_MASK = 0x000f0000
1.103 + };
1.104 +
1.105 + typedef le_uint32 CharClass;
1.106 +
1.107 + typedef le_int32 ScriptFlags;
1.108 +
1.109 + LEUnicode firstChar; // for Khmer this will become x1780
1.110 + LEUnicode lastChar; // and this x17DF
1.111 + const CharClass *classTable;
1.112 +
1.113 + CharClass getCharClass(LEUnicode ch) const;
1.114 +
1.115 + static const KhmerClassTable *getKhmerClassTable();
1.116 +};
1.117 +
1.118 +
1.119 +class KhmerReordering /* not : public UObject because all methods are static */ {
1.120 +public:
1.121 + static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
1.122 + LEUnicode *outChars, LEGlyphStorage &glyphStorage);
1.123 +
1.124 + static const LETag *getFeatureOrder();
1.125 +
1.126 +private:
1.127 + // do not instantiate
1.128 + KhmerReordering();
1.129 +
1.130 + static le_int32 findSyllable(const KhmerClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
1.131 +
1.132 +};
1.133 +
1.134 +
1.135 +U_NAMESPACE_END
1.136 +#endif