Symaptic: os/textandloc/fontservices/textshaperplugin/IcuSource/layout/KhmerReordering.h@bde4ae8d615e (annotated)

sl@0	1	/*
sl@0	2	*
sl@0	3	* (C) Copyright IBM Corp. 1998-2004 - All Rights Reserved
sl@0	4	*
sl@0	5	* This file is a modification of the ICU file IndicReordering.h
sl@0	6	* by Jens Herden and Javier Sola for Khmer language
sl@0	7	*
sl@0	8	*/
sl@0	9
sl@0	10	#ifndef __KHMERREORDERING_H
sl@0	11	#define __KHMERREORDERING_H
sl@0	12
sl@0	13	/**
sl@0	14	* \file
sl@0	15	* \internal
sl@0	16	*/
sl@0	17
sl@0	18	// #include "LETypes.h"
sl@0	19	// #include "OpenTypeTables.h"
sl@0	20
sl@0	21	U_NAMESPACE_BEGIN
sl@0	22
sl@0	23	class LEGlyphStorage;
sl@0	24
sl@0	25	// Vocabulary
sl@0	26	// Base -> A consonant or an independent vowel in its full (not subscript) form. It is the
sl@0	27	// center of the syllable, it can be souranded by coeng (subscript) consonants, vowels,
sl@0	28	// split vowels, signs... but there is only one base in a syllable, it has to be coded as
sl@0	29	// the first character of the syllable.
sl@0	30	// split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
sl@0	31	// Khmer language has five of them. Khmer split vowels either have one part before the
sl@0	32	// base and one after the base or they have a part before the base and a part above the base.
sl@0	33	// The first part of all Khmer split vowels is the same character, identical to
sl@0	34	// the glyph of Khmer dependent vowel SRA EI
sl@0	35	// coeng --> modifier used in Khmer to construct coeng (subscript) consonants
sl@0	36	// Differently than indian languages, the coeng modifies the consonant that follows it,
sl@0	37	// not the one preceding it Each consonant has two forms, the base form and the subscript form
sl@0	38	// the base form is the normal one (using the consonants code-point), the subscript form is
sl@0	39	// displayed when the combination coeng + consonant is encountered.
sl@0	40	// Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
sl@0	41	// Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
sl@0	42	// Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
sl@0	43	// Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
sl@0	44	// if it is attached to a consonant of the first series or a consonant of the second series
sl@0	45	// Most consonants have an equivalent in the other series, but some of theme exist only in
sl@0	46	// one series (for example SA). If we want to use the consonant SA with a vowel sound that
sl@0	47	// can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
sl@0	48	// of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
sl@0	49	// x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
sl@0	50	// MUSIKATOAN a second series consonant to have a first series vowel sound.
sl@0	51	// Consonant shifter are both normally supercript marks, but, when they are followed by a
sl@0	52	// superscript, they change shape and take the form of subscript dependent vowel SRA U.
sl@0	53	// If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
sl@0	54	// should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
sl@0	55	// be placed after the coeng consonant.
sl@0	56	// Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base
sl@0	57	// Each vowel has its own position. Only one vowel per syllable is allowed.
sl@0	58	// Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are
sl@0	59	// Allowed in a syllable.
sl@0	60	//
sl@0	61	//
sl@0	62
sl@0	63	struct KhmerClassTable // This list must include all types of components that can be used inside a syllable
sl@0	64	{
sl@0	65	enum CharClassValues // order is important here! This order must be the same that is found in each horizontal
sl@0	66	// line in the statetable for Khmer (file KhmerReordering.cpp).
sl@0	67	{
sl@0	68	CC_RESERVED = 0,
sl@0	69	CC_CONSONANT = 1, // consonant of type 1 or independent vowel
sl@0	70	CC_CONSONANT2 = 2, // Consonant of type 2
sl@0	71	CC_CONSONANT3 = 3, // Consonant of type 3
sl@0	72	CC_ZERO_WIDTH_NJ_MARK = 4, // Zero Width non joiner character (0x200C)
sl@0	73	CC_CONSONANT_SHIFTER = 5,
sl@0	74	CC_ROBAT = 6, // Khmer special diacritic accent -treated differently in state table
sl@0	75	CC_COENG = 7, // Subscript consonant combining character
sl@0	76	CC_DEPENDENT_VOWEL = 8,
sl@0	77	CC_SIGN_ABOVE = 9,
sl@0	78	CC_SIGN_AFTER = 10,
sl@0	79	CC_ZERO_WIDTH_J_MARK = 11, // Zero width joiner character
sl@0	80	CC_COUNT = 12 // This is the number of character classes
sl@0	81	};
sl@0	82
sl@0	83	enum CharClassFlags
sl@0	84	{
sl@0	85	CF_CLASS_MASK = 0x0000FFFF,
sl@0	86
sl@0	87	CF_CONSONANT = 0x01000000, // flag to speed up comparing
sl@0	88	CF_SPLIT_VOWEL = 0x02000000, // flag for a split vowel -> the first part is added in front of the syllable
sl@0	89	CF_DOTTED_CIRCLE = 0x04000000, // add a dotted circle if a character with this flag is the first in a syllable
sl@0	90	CF_COENG = 0x08000000, // flag to speed up comparing
sl@0	91	CF_SHIFTER = 0x10000000, // flag to speed up comparing
sl@0	92	CF_ABOVE_VOWEL = 0x20000000, // flag to speed up comparing
sl@0	93
sl@0	94	// position flags
sl@0	95	CF_POS_BEFORE = 0x00080000,
sl@0	96	CF_POS_BELOW = 0x00040000,
sl@0	97	CF_POS_ABOVE = 0x00020000,
sl@0	98	CF_POS_AFTER = 0x00010000,
sl@0	99	CF_POS_MASK = 0x000f0000
sl@0	100	};
sl@0	101
sl@0	102	typedef le_uint32 CharClass;
sl@0	103
sl@0	104	typedef le_int32 ScriptFlags;
sl@0	105
sl@0	106	LEUnicode firstChar; // for Khmer this will become x1780
sl@0	107	LEUnicode lastChar; // and this x17DF
sl@0	108	const CharClass *classTable;
sl@0	109
sl@0	110	CharClass getCharClass(LEUnicode ch) const;
sl@0	111
sl@0	112	static const KhmerClassTable *getKhmerClassTable();
sl@0	113	};
sl@0	114
sl@0	115
sl@0	116	class KhmerReordering /* not : public UObject because all methods are static */ {
sl@0	117	public:
sl@0	118	static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
sl@0	119	LEUnicode *outChars, LEGlyphStorage &glyphStorage);
sl@0	120
sl@0	121	static const LETag *getFeatureOrder();
sl@0	122
sl@0	123	private:
sl@0	124	// do not instantiate
sl@0	125	KhmerReordering();
sl@0	126
sl@0	127	static le_int32 findSyllable(const KhmerClassTable classTable, const LEUnicode chars, le_int32 prev, le_int32 charCount);
sl@0	128
sl@0	129	};
sl@0	130
sl@0	131
sl@0	132	U_NAMESPACE_END
sl@0	133	#endif

author	sl@SLION-WIN7.fritz.box
	Fri, 15 Jun 2012 03:10:57 +0200
changeset 0	bde4ae8d615e
permissions	-rw-r--r--