os/textandloc/fontservices/textshaperplugin/IcuSource/layout/IndicReordering.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2  *
     3  * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
     4  *
     5  */
     6 
     7 #ifndef __INDICREORDERING_H
     8 #define __INDICREORDERING_H
     9 
    10 /**
    11  * \file
    12  * \internal
    13  */
    14 
    15 #include "LETypes.h"
    16 #include "OpenTypeTables.h"
    17 
    18 U_NAMESPACE_BEGIN
    19 
    20 // Characters that get refered to by name...
    21 #define C_SIGN_ZWNJ           0x200C
    22 #define C_SIGN_ZWJ            0x200D
    23 
    24 // Character class values
    25 #define CC_RESERVED               		0U
    26 #define CC_VOWEL_MODIFIER         		1U
    27 #define CC_STRESS_MARK            		2U
    28 #define CC_INDEPENDENT_VOWEL      		3U
    29 #define CC_INDEPENDENT_VOWEL_2    		4U
    30 #define CC_CONSONANT              		5U
    31 #define CC_CONSONANT_WITH_NUKTA   		6U
    32 #define CC_NUKTA                  		7U
    33 #define CC_DEPENDENT_VOWEL        		8U
    34 #define CC_SPLIT_VOWEL_PIECE_1    		9U
    35 #define CC_SPLIT_VOWEL_PIECE_2   		10U
    36 #define CC_SPLIT_VOWEL_PIECE_3   		11U
    37 #define CC_VIRAMA                		12U
    38 #define CC_ZERO_WIDTH_MARK       		13U
    39 // Added by Nokia -- special case
    40 #define CC_INDEPENDENT_VOWEL_A			14U
    41 // Added by Nokia -- special case with independent vowel A
    42 #define CC_DEPENDENT_VOWEL_CANDRA_E		15U
    43 // Added by Nokia -- special case for Kannada Ra
    44 #define CC_CONSONANT_KANNADA_BENGALI_RA	16U
    45 // Added by Nokia -- special case for Tamil independent vowel O
    46 #define CC_INDEPENDENT_VOWEL_TAMIL_O	17U
    47 
    48 // Added by Nokia -- special case for Gurmukhi Bearer ARA and independent A 
    49 #define CC_GUR_BEARER_A					18U
    50 // Added by Nokia -- special case for Gurmukhi Bearer IRI 
    51 #define CC_GUR_BEARER_I					19U
    52 // Added by Nokia -- special case for Gurmukhi Bearer URA
    53 #define CC_GUR_BEARER_U					20U
    54 
    55 // Added by Nokia -- special case for Gurmukhi dependent Vowel which can combine with Bearer ARA
    56 #define CC_GUR_DEPENDENT_VOWEL_A		21U
    57 // Added by Nokia -- special case for Gurmukhi dependent Vowel which can combine with Bearer IRI
    58 #define CC_GUR_DEPENDENT_VOWEL_I		22U
    59 // Added by Nokia -- special case for Gurmukhi dependent Vowel which can combine with Bearer URA
    60 #define CC_GUR_DEPENDENT_VOWEL_U		23U
    61 
    62 #define CC_COUNT                 		24U
    63 
    64 // Character class flags
    65 #define CF_CLASS_MASK    0x0000FFFFU
    66 
    67 #define CF_CONSONANT     0x80000000U
    68 
    69 #define CF_REPH          0x40000000U
    70 #define CF_VATTU         0x20000000U
    71 #define CF_BELOW_BASE    0x10000000U
    72 #define CF_POST_BASE     0x08000000U
    73 #define CF_LENGTH_MARK   0x04000000U
    74 // Added by Nokia -- special case for Bengali Ya
    75 #define CF_CONSONANT_BENGALI_YA			0x00800000U
    76 
    77 // Added by Nokia -- 1922 Malayalam Chillu -->
    78 #define CF_CONSONANT_MLYL_CHILLU      0x00400000U
    79 #define CC_DEPENDENT_VOWEL_SIGN_MLYL_E 0xD46U
    80 #define CC_CONSONANT_MLYL_YA 0xD2FU
    81 #define CC_CONSONANT_MLYL_VA 0xD35U
    82 #define CC_CONSONANT_MLYL_RA 0xD30U
    83 #define CC_CONSONANT_MLYL_RRA 0xD31U
    84 #define CC_CONSONANT_MLYL_LLA 0xD33U
    85 #define CC_CONSONANT_MLYL_LLLA 0xD34U
    86 // <-- 1922 mlyl
    87 
    88 #define CF_POS_BEFORE    0x00300000U
    89 #define CF_POS_BELOW     0x00200000U
    90 #define CF_POS_ABOVE     0x00100000U
    91 #define CF_POS_AFTER     0x00000000U
    92 #define CF_POS_MASK      0x00300000U
    93 
    94 #define CF_INDEX_MASK    0x000F0000U
    95 #define CF_INDEX_SHIFT   16
    96 
    97 // Script flag bits
    98 #define SF_MATRAS_AFTER_BASE     0x80000000U
    99 #define SF_REPH_AFTER_BELOW      0x40000000U
   100 #define SF_EYELASH_RA            0x20000000U
   101 #define SF_MPRE_FIXUP            0x10000000U
   102 //Added by Nokia for matra combined with final form of YA in Gurmukhi
   103 //Flags SF_MATRAS_AFTER_BASE and SF_MATRAS_AFTER_POSTBASE cannot coexist.
   104 //with SF_MATRAS_AFTER_BASE setting: Mbelow, Mabove and Mpost are attached to base consonant.
   105 //With SF_MATRAS_AFTER_AFTERBASE setting: Mbelow, Mabove and Mpost are attached to postbase consonant
   106 //without SF_MATRAS_AFTER_BASE or SF_MATRAS_AFTER_AFTERBASE setting: Mbelow, Mabove are attached to
   107 //base (or base+below-base consonant cluster), but Mpost attached to postbase
   108 #define SF_MATRAS_AFTER_POSTBASE		 0x08000000U
   109 
   110 #define SF_POST_BASE_LIMIT_MASK  0x0000FFFFU
   111 #define SF_NO_POST_BASE_LIMIT    0x00007FFFU
   112 
   113 typedef LEUnicode SplitMatra[3];
   114 
   115 class MPreFixups;
   116 class LEGlyphStorage;
   117 
   118 struct IndicClassTable
   119 {
   120     typedef le_uint32 CharClass;
   121     typedef le_uint32 ScriptFlags;
   122 
   123     LEUnicode firstChar;
   124     LEUnicode lastChar;
   125     le_int32 worstCaseExpansion;
   126     ScriptFlags scriptFlags;
   127     const CharClass *classTable;
   128     const SplitMatra *splitMatraTable;
   129 
   130     inline le_int32 getWorstCaseExpansion() const;
   131 
   132     CharClass getCharClass(LEUnicode ch) const;
   133 
   134     inline const SplitMatra *getSplitMatra(CharClass charClass) const;
   135 
   136     inline le_bool isVowelModifier(LEUnicode ch) const;
   137     inline le_bool isStressMark(LEUnicode ch) const;
   138     inline le_bool isConsonant(LEUnicode ch) const;
   139     inline le_bool isReph(LEUnicode ch) const;
   140     inline le_bool isVirama(LEUnicode ch) const;
   141     inline le_bool isNukta(LEUnicode ch) const;
   142     inline le_bool isVattu(LEUnicode ch) const;
   143     inline le_bool isMatra(LEUnicode ch) const;
   144     inline le_bool isSplitMatra(LEUnicode ch) const;
   145     inline le_bool isLengthMark(LEUnicode ch) const;
   146     inline le_bool hasPostOrBelowBaseForm(LEUnicode ch) const;
   147     inline le_bool hasPostBaseForm(LEUnicode ch) const;
   148     inline le_bool hasBelowBaseForm(LEUnicode ch) const;
   149     // Added by Nokia: For special case Bengali Ya
   150     inline le_bool isBengaliYa(LEUnicode ch) const;
   151     // Added by Nokia -- 1922 mlyl -->
   152     inline le_bool isMlylChillu(LEUnicode ch) const;
   153     // <-- 1922 mlyl
   154     
   155     inline static le_bool isVowelModifier(CharClass charClass);
   156     inline static le_bool isStressMark(CharClass charClass);
   157     inline static le_bool isConsonant(CharClass charClass);
   158     inline static le_bool isReph(CharClass charClass);
   159     inline static le_bool isVirama(CharClass charClass);
   160     inline static le_bool isNukta(CharClass charClass);
   161     inline static le_bool isVattu(CharClass charClass);
   162     inline static le_bool isMatra(CharClass charClass);
   163     inline static le_bool isSplitMatra(CharClass charClass);
   164     inline static le_bool isLengthMark(CharClass charClass);
   165     inline static le_bool hasPostOrBelowBaseForm(CharClass charClass);
   166     inline static le_bool hasPostBaseForm(CharClass charClass);
   167     inline static le_bool hasBelowBaseForm(CharClass charClass);
   168     // Added by Nokia: For special case Bengali Ya
   169     inline static le_bool isBengaliYa(CharClass charClass);
   170     // Added by Nokia -- 1922 mlyl -->
   171     inline static le_bool isMlylChillu(CharClass charClass);
   172     // <-- 1922 mlyl
   173     
   174     static const IndicClassTable *getScriptClassTable(le_int32 scriptCode);
   175 };
   176 
   177 class IndicReordering /* not : public UObject because all methods are static */ {
   178 public:
   179     static le_int32 getWorstCaseExpansion(le_int32 scriptCode);
   180 
   181     static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
   182         LEUnicode *outChars, LEGlyphStorage &glyphStorage,
   183         MPreFixups **outMPreFixups, LEErrorCode& success);
   184 
   185     static void adjustMPres(MPreFixups *mpreFixups, LEGlyphStorage &glyphStorage,
   186         LEErrorCode& success);
   187 
   188     static const LETag *getFeatureOrder();
   189 
   190 private:
   191     // do not instantiate
   192     IndicReordering();
   193 
   194     static le_int32 findSyllable(const IndicClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
   195 
   196 };
   197 
   198 inline le_int32 IndicClassTable::getWorstCaseExpansion() const
   199 {
   200     return worstCaseExpansion;
   201 }
   202 
   203 inline const SplitMatra *IndicClassTable::getSplitMatra(CharClass charClass) const
   204 {
   205     le_int32 index = (charClass & CF_INDEX_MASK) >> CF_INDEX_SHIFT;
   206 
   207     return &splitMatraTable[index - 1];
   208 }
   209 
   210 inline le_bool IndicClassTable::isVowelModifier(CharClass charClass)
   211 {
   212     return (charClass & CF_CLASS_MASK) == CC_VOWEL_MODIFIER;
   213 }
   214 
   215 inline le_bool IndicClassTable::isStressMark(CharClass charClass)
   216 {
   217     return (charClass & CF_CLASS_MASK) == CC_STRESS_MARK;
   218 }
   219 
   220 inline le_bool IndicClassTable::isConsonant(CharClass charClass)
   221 {
   222     return (charClass & CF_CONSONANT) != 0;
   223 }
   224 
   225 inline le_bool IndicClassTable::isReph(CharClass charClass)
   226 {
   227     return (charClass & CF_REPH) != 0;
   228 }
   229 
   230 inline le_bool IndicClassTable::isNukta(CharClass charClass)
   231 {
   232     return (charClass & CF_CLASS_MASK) == CC_NUKTA;
   233 }
   234 
   235 inline le_bool IndicClassTable::isVirama(CharClass charClass)
   236 {
   237     return (charClass & CF_CLASS_MASK) == CC_VIRAMA;
   238 }
   239 
   240 inline le_bool IndicClassTable::isVattu(CharClass charClass)
   241 {
   242     return (charClass & CF_VATTU) != 0;
   243 }
   244 
   245 inline le_bool IndicClassTable::isMatra(CharClass charClass)
   246 {
   247     charClass &= CF_CLASS_MASK;
   248 	
   249 	// Added special CANDRA E char class check to enable formation of Devanagari CANDRA A
   250     // Added speical CC_GUR_DEPENDENT_VOWEL_* to support decompositions of the independent
   251     // vowels into a sequence of a vowel bearer and a depending vowel sign
   252     return charClass >= CC_DEPENDENT_VOWEL && charClass <= CC_SPLIT_VOWEL_PIECE_3
   253     	   || charClass == CC_DEPENDENT_VOWEL_CANDRA_E
   254     	   || charClass == CC_GUR_DEPENDENT_VOWEL_A
   255     	   || charClass == CC_GUR_DEPENDENT_VOWEL_I
   256     	   || charClass == CC_GUR_DEPENDENT_VOWEL_U;
   257 }
   258 
   259 inline le_bool IndicClassTable::isSplitMatra(CharClass charClass)
   260 {
   261     return (charClass & CF_INDEX_MASK) != 0;
   262 }
   263 
   264 inline le_bool IndicClassTable::isLengthMark(CharClass charClass)
   265 {
   266     return (charClass & CF_LENGTH_MARK) != 0;
   267 }
   268 
   269 inline le_bool IndicClassTable::hasPostOrBelowBaseForm(CharClass charClass)
   270 {
   271     return (charClass & (CF_POST_BASE | CF_BELOW_BASE)) != 0;
   272 }
   273 
   274 inline le_bool IndicClassTable::hasPostBaseForm(CharClass charClass)
   275 {
   276     return (charClass & CF_POST_BASE) != 0;
   277 }
   278 
   279 inline le_bool IndicClassTable::hasBelowBaseForm(CharClass charClass)
   280 {
   281     return (charClass & CF_BELOW_BASE) != 0;
   282 }
   283 
   284 // Added by Nokia -- For determining whether a character is a Bengali Ya
   285 inline le_bool IndicClassTable::isBengaliYa(CharClass charClass)
   286 	{
   287 	    return (charClass & CF_CONSONANT_BENGALI_YA) != 0;
   288 	}
   289 
   290 // Added by Nokia -- 1922 mlyl -->
   291 inline le_bool IndicClassTable::isMlylChillu(CharClass charClass)
   292 {
   293     return (charClass & CF_CONSONANT_MLYL_CHILLU) != 0;
   294 }
   295 // <-- 1922 mlyl
   296 
   297 inline le_bool IndicClassTable::isVowelModifier(LEUnicode ch) const
   298 {
   299     return isVowelModifier(getCharClass(ch));
   300 }
   301 
   302 inline le_bool IndicClassTable::isStressMark(LEUnicode ch) const
   303 {
   304     return isStressMark(getCharClass(ch));
   305 }
   306 
   307 inline le_bool IndicClassTable::isConsonant(LEUnicode ch) const
   308 {
   309     return isConsonant(getCharClass(ch));
   310 }
   311 
   312 inline le_bool IndicClassTable::isReph(LEUnicode ch) const
   313 {
   314     return isReph(getCharClass(ch));
   315 }
   316 
   317 inline le_bool IndicClassTable::isVirama(LEUnicode ch) const
   318 {
   319     return isVirama(getCharClass(ch));
   320 }
   321 
   322 inline le_bool IndicClassTable::isNukta(LEUnicode ch) const
   323 {
   324     return isNukta(getCharClass(ch));
   325 }
   326 
   327 inline le_bool IndicClassTable::isVattu(LEUnicode ch) const
   328 {
   329     return isVattu(getCharClass(ch));
   330 }
   331 
   332 inline le_bool IndicClassTable::isMatra(LEUnicode ch) const
   333 {
   334     return isMatra(getCharClass(ch));
   335 }
   336 
   337 inline le_bool IndicClassTable::isSplitMatra(LEUnicode ch) const
   338 {
   339     return isSplitMatra(getCharClass(ch));
   340 }
   341 
   342 inline le_bool IndicClassTable::isLengthMark(LEUnicode ch) const
   343 {
   344     return isLengthMark(getCharClass(ch));
   345 }
   346 
   347 inline le_bool IndicClassTable::hasPostOrBelowBaseForm(LEUnicode ch) const
   348 {
   349     return hasPostOrBelowBaseForm(getCharClass(ch));
   350 }
   351 
   352 inline le_bool IndicClassTable::hasPostBaseForm(LEUnicode ch) const
   353 {
   354     return hasPostBaseForm(getCharClass(ch));
   355 }
   356 
   357 inline le_bool IndicClassTable::hasBelowBaseForm(LEUnicode ch) const
   358 {
   359     return hasBelowBaseForm(getCharClass(ch));
   360 }
   361 
   362 // Added by Nokia -- For determining whether a character is a Bengali Ya
   363 inline le_bool IndicClassTable::isBengaliYa(LEUnicode ch) const
   364 {
   365     return isBengaliYa(getCharClass(ch));
   366 }
   367 
   368 // Added by Nokia -- 1922 mlyl -->
   369 inline le_bool IndicClassTable::isMlylChillu(LEUnicode ch) const
   370 {
   371     return isMlylChillu(getCharClass(ch));
   372 }
   373 // <-- 1922 mlyl
   374 
   375 U_NAMESPACE_END
   376 #endif