sl@0
|
1 |
/*
|
sl@0
|
2 |
*
|
sl@0
|
3 |
* (C) Copyright IBM Corp. 1998-2004 - All Rights Reserved
|
sl@0
|
4 |
*
|
sl@0
|
5 |
* This file is a modification of the ICU file IndicReordering.h
|
sl@0
|
6 |
* by Jens Herden and Javier Sola for Khmer language
|
sl@0
|
7 |
*
|
sl@0
|
8 |
*/
|
sl@0
|
9 |
|
sl@0
|
10 |
#ifndef __KHMERREORDERING_H
|
sl@0
|
11 |
#define __KHMERREORDERING_H
|
sl@0
|
12 |
|
sl@0
|
13 |
/**
|
sl@0
|
14 |
* \file
|
sl@0
|
15 |
* \internal
|
sl@0
|
16 |
*/
|
sl@0
|
17 |
|
sl@0
|
18 |
// #include "LETypes.h"
|
sl@0
|
19 |
// #include "OpenTypeTables.h"
|
sl@0
|
20 |
|
sl@0
|
21 |
U_NAMESPACE_BEGIN
|
sl@0
|
22 |
|
sl@0
|
23 |
class LEGlyphStorage;
|
sl@0
|
24 |
|
sl@0
|
25 |
// Vocabulary
|
sl@0
|
26 |
// Base -> A consonant or an independent vowel in its full (not subscript) form. It is the
|
sl@0
|
27 |
// center of the syllable, it can be souranded by coeng (subscript) consonants, vowels,
|
sl@0
|
28 |
// split vowels, signs... but there is only one base in a syllable, it has to be coded as
|
sl@0
|
29 |
// the first character of the syllable.
|
sl@0
|
30 |
// split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
|
sl@0
|
31 |
// Khmer language has five of them. Khmer split vowels either have one part before the
|
sl@0
|
32 |
// base and one after the base or they have a part before the base and a part above the base.
|
sl@0
|
33 |
// The first part of all Khmer split vowels is the same character, identical to
|
sl@0
|
34 |
// the glyph of Khmer dependent vowel SRA EI
|
sl@0
|
35 |
// coeng --> modifier used in Khmer to construct coeng (subscript) consonants
|
sl@0
|
36 |
// Differently than indian languages, the coeng modifies the consonant that follows it,
|
sl@0
|
37 |
// not the one preceding it Each consonant has two forms, the base form and the subscript form
|
sl@0
|
38 |
// the base form is the normal one (using the consonants code-point), the subscript form is
|
sl@0
|
39 |
// displayed when the combination coeng + consonant is encountered.
|
sl@0
|
40 |
// Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
|
sl@0
|
41 |
// Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
|
sl@0
|
42 |
// Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
|
sl@0
|
43 |
// Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
|
sl@0
|
44 |
// if it is attached to a consonant of the first series or a consonant of the second series
|
sl@0
|
45 |
// Most consonants have an equivalent in the other series, but some of theme exist only in
|
sl@0
|
46 |
// one series (for example SA). If we want to use the consonant SA with a vowel sound that
|
sl@0
|
47 |
// can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
|
sl@0
|
48 |
// of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
|
sl@0
|
49 |
// x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
|
sl@0
|
50 |
// MUSIKATOAN a second series consonant to have a first series vowel sound.
|
sl@0
|
51 |
// Consonant shifter are both normally supercript marks, but, when they are followed by a
|
sl@0
|
52 |
// superscript, they change shape and take the form of subscript dependent vowel SRA U.
|
sl@0
|
53 |
// If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
|
sl@0
|
54 |
// should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
|
sl@0
|
55 |
// be placed after the coeng consonant.
|
sl@0
|
56 |
// Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base
|
sl@0
|
57 |
// Each vowel has its own position. Only one vowel per syllable is allowed.
|
sl@0
|
58 |
// Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are
|
sl@0
|
59 |
// Allowed in a syllable.
|
sl@0
|
60 |
//
|
sl@0
|
61 |
//
|
sl@0
|
62 |
|
sl@0
|
63 |
struct KhmerClassTable // This list must include all types of components that can be used inside a syllable
|
sl@0
|
64 |
{
|
sl@0
|
65 |
enum CharClassValues // order is important here! This order must be the same that is found in each horizontal
|
sl@0
|
66 |
// line in the statetable for Khmer (file KhmerReordering.cpp).
|
sl@0
|
67 |
{
|
sl@0
|
68 |
CC_RESERVED = 0,
|
sl@0
|
69 |
CC_CONSONANT = 1, // consonant of type 1 or independent vowel
|
sl@0
|
70 |
CC_CONSONANT2 = 2, // Consonant of type 2
|
sl@0
|
71 |
CC_CONSONANT3 = 3, // Consonant of type 3
|
sl@0
|
72 |
CC_ZERO_WIDTH_NJ_MARK = 4, // Zero Width non joiner character (0x200C)
|
sl@0
|
73 |
CC_CONSONANT_SHIFTER = 5,
|
sl@0
|
74 |
CC_ROBAT = 6, // Khmer special diacritic accent -treated differently in state table
|
sl@0
|
75 |
CC_COENG = 7, // Subscript consonant combining character
|
sl@0
|
76 |
CC_DEPENDENT_VOWEL = 8,
|
sl@0
|
77 |
CC_SIGN_ABOVE = 9,
|
sl@0
|
78 |
CC_SIGN_AFTER = 10,
|
sl@0
|
79 |
CC_ZERO_WIDTH_J_MARK = 11, // Zero width joiner character
|
sl@0
|
80 |
CC_COUNT = 12 // This is the number of character classes
|
sl@0
|
81 |
};
|
sl@0
|
82 |
|
sl@0
|
83 |
enum CharClassFlags
|
sl@0
|
84 |
{
|
sl@0
|
85 |
CF_CLASS_MASK = 0x0000FFFF,
|
sl@0
|
86 |
|
sl@0
|
87 |
CF_CONSONANT = 0x01000000, // flag to speed up comparing
|
sl@0
|
88 |
CF_SPLIT_VOWEL = 0x02000000, // flag for a split vowel -> the first part is added in front of the syllable
|
sl@0
|
89 |
CF_DOTTED_CIRCLE = 0x04000000, // add a dotted circle if a character with this flag is the first in a syllable
|
sl@0
|
90 |
CF_COENG = 0x08000000, // flag to speed up comparing
|
sl@0
|
91 |
CF_SHIFTER = 0x10000000, // flag to speed up comparing
|
sl@0
|
92 |
CF_ABOVE_VOWEL = 0x20000000, // flag to speed up comparing
|
sl@0
|
93 |
|
sl@0
|
94 |
// position flags
|
sl@0
|
95 |
CF_POS_BEFORE = 0x00080000,
|
sl@0
|
96 |
CF_POS_BELOW = 0x00040000,
|
sl@0
|
97 |
CF_POS_ABOVE = 0x00020000,
|
sl@0
|
98 |
CF_POS_AFTER = 0x00010000,
|
sl@0
|
99 |
CF_POS_MASK = 0x000f0000
|
sl@0
|
100 |
};
|
sl@0
|
101 |
|
sl@0
|
102 |
typedef le_uint32 CharClass;
|
sl@0
|
103 |
|
sl@0
|
104 |
typedef le_int32 ScriptFlags;
|
sl@0
|
105 |
|
sl@0
|
106 |
LEUnicode firstChar; // for Khmer this will become x1780
|
sl@0
|
107 |
LEUnicode lastChar; // and this x17DF
|
sl@0
|
108 |
const CharClass *classTable;
|
sl@0
|
109 |
|
sl@0
|
110 |
CharClass getCharClass(LEUnicode ch) const;
|
sl@0
|
111 |
|
sl@0
|
112 |
static const KhmerClassTable *getKhmerClassTable();
|
sl@0
|
113 |
};
|
sl@0
|
114 |
|
sl@0
|
115 |
|
sl@0
|
116 |
class KhmerReordering /* not : public UObject because all methods are static */ {
|
sl@0
|
117 |
public:
|
sl@0
|
118 |
static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
|
sl@0
|
119 |
LEUnicode *outChars, LEGlyphStorage &glyphStorage);
|
sl@0
|
120 |
|
sl@0
|
121 |
static const LETag *getFeatureOrder();
|
sl@0
|
122 |
|
sl@0
|
123 |
private:
|
sl@0
|
124 |
// do not instantiate
|
sl@0
|
125 |
KhmerReordering();
|
sl@0
|
126 |
|
sl@0
|
127 |
static le_int32 findSyllable(const KhmerClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
|
sl@0
|
128 |
|
sl@0
|
129 |
};
|
sl@0
|
130 |
|
sl@0
|
131 |
|
sl@0
|
132 |
U_NAMESPACE_END
|
sl@0
|
133 |
#endif
|