os/textandloc/fontservices/textshaperplugin/IcuSource/layout/IndicReordering.h
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
/*
sl@0
     2
 *
sl@0
     3
 * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
sl@0
     4
 *
sl@0
     5
 */
sl@0
     6
sl@0
     7
#ifndef __INDICREORDERING_H
sl@0
     8
#define __INDICREORDERING_H
sl@0
     9
sl@0
    10
/**
sl@0
    11
 * \file
sl@0
    12
 * \internal
sl@0
    13
 */
sl@0
    14
sl@0
    15
#include "LETypes.h"
sl@0
    16
#include "OpenTypeTables.h"
sl@0
    17
sl@0
    18
U_NAMESPACE_BEGIN
sl@0
    19
sl@0
    20
// Characters that get refered to by name...
sl@0
    21
#define C_SIGN_ZWNJ           0x200C
sl@0
    22
#define C_SIGN_ZWJ            0x200D
sl@0
    23
sl@0
    24
// Character class values
sl@0
    25
#define CC_RESERVED               		0U
sl@0
    26
#define CC_VOWEL_MODIFIER         		1U
sl@0
    27
#define CC_STRESS_MARK            		2U
sl@0
    28
#define CC_INDEPENDENT_VOWEL      		3U
sl@0
    29
#define CC_INDEPENDENT_VOWEL_2    		4U
sl@0
    30
#define CC_CONSONANT              		5U
sl@0
    31
#define CC_CONSONANT_WITH_NUKTA   		6U
sl@0
    32
#define CC_NUKTA                  		7U
sl@0
    33
#define CC_DEPENDENT_VOWEL        		8U
sl@0
    34
#define CC_SPLIT_VOWEL_PIECE_1    		9U
sl@0
    35
#define CC_SPLIT_VOWEL_PIECE_2   		10U
sl@0
    36
#define CC_SPLIT_VOWEL_PIECE_3   		11U
sl@0
    37
#define CC_VIRAMA                		12U
sl@0
    38
#define CC_ZERO_WIDTH_MARK       		13U
sl@0
    39
// Added by Nokia -- special case
sl@0
    40
#define CC_INDEPENDENT_VOWEL_A			14U
sl@0
    41
// Added by Nokia -- special case with independent vowel A
sl@0
    42
#define CC_DEPENDENT_VOWEL_CANDRA_E		15U
sl@0
    43
// Added by Nokia -- special case for Kannada Ra
sl@0
    44
#define CC_CONSONANT_KANNADA_BENGALI_RA	16U
sl@0
    45
// Added by Nokia -- special case for Tamil independent vowel O
sl@0
    46
#define CC_INDEPENDENT_VOWEL_TAMIL_O	17U
sl@0
    47
sl@0
    48
// Added by Nokia -- special case for Gurmukhi Bearer ARA and independent A 
sl@0
    49
#define CC_GUR_BEARER_A					18U
sl@0
    50
// Added by Nokia -- special case for Gurmukhi Bearer IRI 
sl@0
    51
#define CC_GUR_BEARER_I					19U
sl@0
    52
// Added by Nokia -- special case for Gurmukhi Bearer URA
sl@0
    53
#define CC_GUR_BEARER_U					20U
sl@0
    54
sl@0
    55
// Added by Nokia -- special case for Gurmukhi dependent Vowel which can combine with Bearer ARA
sl@0
    56
#define CC_GUR_DEPENDENT_VOWEL_A		21U
sl@0
    57
// Added by Nokia -- special case for Gurmukhi dependent Vowel which can combine with Bearer IRI
sl@0
    58
#define CC_GUR_DEPENDENT_VOWEL_I		22U
sl@0
    59
// Added by Nokia -- special case for Gurmukhi dependent Vowel which can combine with Bearer URA
sl@0
    60
#define CC_GUR_DEPENDENT_VOWEL_U		23U
sl@0
    61
sl@0
    62
#define CC_COUNT                 		24U
sl@0
    63
sl@0
    64
// Character class flags
sl@0
    65
#define CF_CLASS_MASK    0x0000FFFFU
sl@0
    66
sl@0
    67
#define CF_CONSONANT     0x80000000U
sl@0
    68
sl@0
    69
#define CF_REPH          0x40000000U
sl@0
    70
#define CF_VATTU         0x20000000U
sl@0
    71
#define CF_BELOW_BASE    0x10000000U
sl@0
    72
#define CF_POST_BASE     0x08000000U
sl@0
    73
#define CF_LENGTH_MARK   0x04000000U
sl@0
    74
// Added by Nokia -- special case for Bengali Ya
sl@0
    75
#define CF_CONSONANT_BENGALI_YA			0x00800000U
sl@0
    76
sl@0
    77
// Added by Nokia -- 1922 Malayalam Chillu -->
sl@0
    78
#define CF_CONSONANT_MLYL_CHILLU      0x00400000U
sl@0
    79
#define CC_DEPENDENT_VOWEL_SIGN_MLYL_E 0xD46U
sl@0
    80
#define CC_CONSONANT_MLYL_YA 0xD2FU
sl@0
    81
#define CC_CONSONANT_MLYL_VA 0xD35U
sl@0
    82
#define CC_CONSONANT_MLYL_RA 0xD30U
sl@0
    83
#define CC_CONSONANT_MLYL_RRA 0xD31U
sl@0
    84
#define CC_CONSONANT_MLYL_LLA 0xD33U
sl@0
    85
#define CC_CONSONANT_MLYL_LLLA 0xD34U
sl@0
    86
// <-- 1922 mlyl
sl@0
    87
sl@0
    88
#define CF_POS_BEFORE    0x00300000U
sl@0
    89
#define CF_POS_BELOW     0x00200000U
sl@0
    90
#define CF_POS_ABOVE     0x00100000U
sl@0
    91
#define CF_POS_AFTER     0x00000000U
sl@0
    92
#define CF_POS_MASK      0x00300000U
sl@0
    93
sl@0
    94
#define CF_INDEX_MASK    0x000F0000U
sl@0
    95
#define CF_INDEX_SHIFT   16
sl@0
    96
sl@0
    97
// Script flag bits
sl@0
    98
#define SF_MATRAS_AFTER_BASE     0x80000000U
sl@0
    99
#define SF_REPH_AFTER_BELOW      0x40000000U
sl@0
   100
#define SF_EYELASH_RA            0x20000000U
sl@0
   101
#define SF_MPRE_FIXUP            0x10000000U
sl@0
   102
//Added by Nokia for matra combined with final form of YA in Gurmukhi
sl@0
   103
//Flags SF_MATRAS_AFTER_BASE and SF_MATRAS_AFTER_POSTBASE cannot coexist.
sl@0
   104
//with SF_MATRAS_AFTER_BASE setting: Mbelow, Mabove and Mpost are attached to base consonant.
sl@0
   105
//With SF_MATRAS_AFTER_AFTERBASE setting: Mbelow, Mabove and Mpost are attached to postbase consonant
sl@0
   106
//without SF_MATRAS_AFTER_BASE or SF_MATRAS_AFTER_AFTERBASE setting: Mbelow, Mabove are attached to
sl@0
   107
//base (or base+below-base consonant cluster), but Mpost attached to postbase
sl@0
   108
#define SF_MATRAS_AFTER_POSTBASE		 0x08000000U
sl@0
   109
sl@0
   110
#define SF_POST_BASE_LIMIT_MASK  0x0000FFFFU
sl@0
   111
#define SF_NO_POST_BASE_LIMIT    0x00007FFFU
sl@0
   112
sl@0
   113
typedef LEUnicode SplitMatra[3];
sl@0
   114
sl@0
   115
class MPreFixups;
sl@0
   116
class LEGlyphStorage;
sl@0
   117
sl@0
   118
struct IndicClassTable
sl@0
   119
{
sl@0
   120
    typedef le_uint32 CharClass;
sl@0
   121
    typedef le_uint32 ScriptFlags;
sl@0
   122
sl@0
   123
    LEUnicode firstChar;
sl@0
   124
    LEUnicode lastChar;
sl@0
   125
    le_int32 worstCaseExpansion;
sl@0
   126
    ScriptFlags scriptFlags;
sl@0
   127
    const CharClass *classTable;
sl@0
   128
    const SplitMatra *splitMatraTable;
sl@0
   129
sl@0
   130
    inline le_int32 getWorstCaseExpansion() const;
sl@0
   131
sl@0
   132
    CharClass getCharClass(LEUnicode ch) const;
sl@0
   133
sl@0
   134
    inline const SplitMatra *getSplitMatra(CharClass charClass) const;
sl@0
   135
sl@0
   136
    inline le_bool isVowelModifier(LEUnicode ch) const;
sl@0
   137
    inline le_bool isStressMark(LEUnicode ch) const;
sl@0
   138
    inline le_bool isConsonant(LEUnicode ch) const;
sl@0
   139
    inline le_bool isReph(LEUnicode ch) const;
sl@0
   140
    inline le_bool isVirama(LEUnicode ch) const;
sl@0
   141
    inline le_bool isNukta(LEUnicode ch) const;
sl@0
   142
    inline le_bool isVattu(LEUnicode ch) const;
sl@0
   143
    inline le_bool isMatra(LEUnicode ch) const;
sl@0
   144
    inline le_bool isSplitMatra(LEUnicode ch) const;
sl@0
   145
    inline le_bool isLengthMark(LEUnicode ch) const;
sl@0
   146
    inline le_bool hasPostOrBelowBaseForm(LEUnicode ch) const;
sl@0
   147
    inline le_bool hasPostBaseForm(LEUnicode ch) const;
sl@0
   148
    inline le_bool hasBelowBaseForm(LEUnicode ch) const;
sl@0
   149
    // Added by Nokia: For special case Bengali Ya
sl@0
   150
    inline le_bool isBengaliYa(LEUnicode ch) const;
sl@0
   151
    // Added by Nokia -- 1922 mlyl -->
sl@0
   152
    inline le_bool isMlylChillu(LEUnicode ch) const;
sl@0
   153
    // <-- 1922 mlyl
sl@0
   154
    
sl@0
   155
    inline static le_bool isVowelModifier(CharClass charClass);
sl@0
   156
    inline static le_bool isStressMark(CharClass charClass);
sl@0
   157
    inline static le_bool isConsonant(CharClass charClass);
sl@0
   158
    inline static le_bool isReph(CharClass charClass);
sl@0
   159
    inline static le_bool isVirama(CharClass charClass);
sl@0
   160
    inline static le_bool isNukta(CharClass charClass);
sl@0
   161
    inline static le_bool isVattu(CharClass charClass);
sl@0
   162
    inline static le_bool isMatra(CharClass charClass);
sl@0
   163
    inline static le_bool isSplitMatra(CharClass charClass);
sl@0
   164
    inline static le_bool isLengthMark(CharClass charClass);
sl@0
   165
    inline static le_bool hasPostOrBelowBaseForm(CharClass charClass);
sl@0
   166
    inline static le_bool hasPostBaseForm(CharClass charClass);
sl@0
   167
    inline static le_bool hasBelowBaseForm(CharClass charClass);
sl@0
   168
    // Added by Nokia: For special case Bengali Ya
sl@0
   169
    inline static le_bool isBengaliYa(CharClass charClass);
sl@0
   170
    // Added by Nokia -- 1922 mlyl -->
sl@0
   171
    inline static le_bool isMlylChillu(CharClass charClass);
sl@0
   172
    // <-- 1922 mlyl
sl@0
   173
    
sl@0
   174
    static const IndicClassTable *getScriptClassTable(le_int32 scriptCode);
sl@0
   175
};
sl@0
   176
sl@0
   177
class IndicReordering /* not : public UObject because all methods are static */ {
sl@0
   178
public:
sl@0
   179
    static le_int32 getWorstCaseExpansion(le_int32 scriptCode);
sl@0
   180
sl@0
   181
    static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
sl@0
   182
        LEUnicode *outChars, LEGlyphStorage &glyphStorage,
sl@0
   183
        MPreFixups **outMPreFixups, LEErrorCode& success);
sl@0
   184
sl@0
   185
    static void adjustMPres(MPreFixups *mpreFixups, LEGlyphStorage &glyphStorage,
sl@0
   186
        LEErrorCode& success);
sl@0
   187
sl@0
   188
    static const LETag *getFeatureOrder();
sl@0
   189
sl@0
   190
private:
sl@0
   191
    // do not instantiate
sl@0
   192
    IndicReordering();
sl@0
   193
sl@0
   194
    static le_int32 findSyllable(const IndicClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
sl@0
   195
sl@0
   196
};
sl@0
   197
sl@0
   198
inline le_int32 IndicClassTable::getWorstCaseExpansion() const
sl@0
   199
{
sl@0
   200
    return worstCaseExpansion;
sl@0
   201
}
sl@0
   202
sl@0
   203
inline const SplitMatra *IndicClassTable::getSplitMatra(CharClass charClass) const
sl@0
   204
{
sl@0
   205
    le_int32 index = (charClass & CF_INDEX_MASK) >> CF_INDEX_SHIFT;
sl@0
   206
sl@0
   207
    return &splitMatraTable[index - 1];
sl@0
   208
}
sl@0
   209
sl@0
   210
inline le_bool IndicClassTable::isVowelModifier(CharClass charClass)
sl@0
   211
{
sl@0
   212
    return (charClass & CF_CLASS_MASK) == CC_VOWEL_MODIFIER;
sl@0
   213
}
sl@0
   214
sl@0
   215
inline le_bool IndicClassTable::isStressMark(CharClass charClass)
sl@0
   216
{
sl@0
   217
    return (charClass & CF_CLASS_MASK) == CC_STRESS_MARK;
sl@0
   218
}
sl@0
   219
sl@0
   220
inline le_bool IndicClassTable::isConsonant(CharClass charClass)
sl@0
   221
{
sl@0
   222
    return (charClass & CF_CONSONANT) != 0;
sl@0
   223
}
sl@0
   224
sl@0
   225
inline le_bool IndicClassTable::isReph(CharClass charClass)
sl@0
   226
{
sl@0
   227
    return (charClass & CF_REPH) != 0;
sl@0
   228
}
sl@0
   229
sl@0
   230
inline le_bool IndicClassTable::isNukta(CharClass charClass)
sl@0
   231
{
sl@0
   232
    return (charClass & CF_CLASS_MASK) == CC_NUKTA;
sl@0
   233
}
sl@0
   234
sl@0
   235
inline le_bool IndicClassTable::isVirama(CharClass charClass)
sl@0
   236
{
sl@0
   237
    return (charClass & CF_CLASS_MASK) == CC_VIRAMA;
sl@0
   238
}
sl@0
   239
sl@0
   240
inline le_bool IndicClassTable::isVattu(CharClass charClass)
sl@0
   241
{
sl@0
   242
    return (charClass & CF_VATTU) != 0;
sl@0
   243
}
sl@0
   244
sl@0
   245
inline le_bool IndicClassTable::isMatra(CharClass charClass)
sl@0
   246
{
sl@0
   247
    charClass &= CF_CLASS_MASK;
sl@0
   248
	
sl@0
   249
	// Added special CANDRA E char class check to enable formation of Devanagari CANDRA A
sl@0
   250
    // Added speical CC_GUR_DEPENDENT_VOWEL_* to support decompositions of the independent
sl@0
   251
    // vowels into a sequence of a vowel bearer and a depending vowel sign
sl@0
   252
    return charClass >= CC_DEPENDENT_VOWEL && charClass <= CC_SPLIT_VOWEL_PIECE_3
sl@0
   253
    	   || charClass == CC_DEPENDENT_VOWEL_CANDRA_E
sl@0
   254
    	   || charClass == CC_GUR_DEPENDENT_VOWEL_A
sl@0
   255
    	   || charClass == CC_GUR_DEPENDENT_VOWEL_I
sl@0
   256
    	   || charClass == CC_GUR_DEPENDENT_VOWEL_U;
sl@0
   257
}
sl@0
   258
sl@0
   259
inline le_bool IndicClassTable::isSplitMatra(CharClass charClass)
sl@0
   260
{
sl@0
   261
    return (charClass & CF_INDEX_MASK) != 0;
sl@0
   262
}
sl@0
   263
sl@0
   264
inline le_bool IndicClassTable::isLengthMark(CharClass charClass)
sl@0
   265
{
sl@0
   266
    return (charClass & CF_LENGTH_MARK) != 0;
sl@0
   267
}
sl@0
   268
sl@0
   269
inline le_bool IndicClassTable::hasPostOrBelowBaseForm(CharClass charClass)
sl@0
   270
{
sl@0
   271
    return (charClass & (CF_POST_BASE | CF_BELOW_BASE)) != 0;
sl@0
   272
}
sl@0
   273
sl@0
   274
inline le_bool IndicClassTable::hasPostBaseForm(CharClass charClass)
sl@0
   275
{
sl@0
   276
    return (charClass & CF_POST_BASE) != 0;
sl@0
   277
}
sl@0
   278
sl@0
   279
inline le_bool IndicClassTable::hasBelowBaseForm(CharClass charClass)
sl@0
   280
{
sl@0
   281
    return (charClass & CF_BELOW_BASE) != 0;
sl@0
   282
}
sl@0
   283
sl@0
   284
// Added by Nokia -- For determining whether a character is a Bengali Ya
sl@0
   285
inline le_bool IndicClassTable::isBengaliYa(CharClass charClass)
sl@0
   286
	{
sl@0
   287
	    return (charClass & CF_CONSONANT_BENGALI_YA) != 0;
sl@0
   288
	}
sl@0
   289
sl@0
   290
// Added by Nokia -- 1922 mlyl -->
sl@0
   291
inline le_bool IndicClassTable::isMlylChillu(CharClass charClass)
sl@0
   292
{
sl@0
   293
    return (charClass & CF_CONSONANT_MLYL_CHILLU) != 0;
sl@0
   294
}
sl@0
   295
// <-- 1922 mlyl
sl@0
   296
sl@0
   297
inline le_bool IndicClassTable::isVowelModifier(LEUnicode ch) const
sl@0
   298
{
sl@0
   299
    return isVowelModifier(getCharClass(ch));
sl@0
   300
}
sl@0
   301
sl@0
   302
inline le_bool IndicClassTable::isStressMark(LEUnicode ch) const
sl@0
   303
{
sl@0
   304
    return isStressMark(getCharClass(ch));
sl@0
   305
}
sl@0
   306
sl@0
   307
inline le_bool IndicClassTable::isConsonant(LEUnicode ch) const
sl@0
   308
{
sl@0
   309
    return isConsonant(getCharClass(ch));
sl@0
   310
}
sl@0
   311
sl@0
   312
inline le_bool IndicClassTable::isReph(LEUnicode ch) const
sl@0
   313
{
sl@0
   314
    return isReph(getCharClass(ch));
sl@0
   315
}
sl@0
   316
sl@0
   317
inline le_bool IndicClassTable::isVirama(LEUnicode ch) const
sl@0
   318
{
sl@0
   319
    return isVirama(getCharClass(ch));
sl@0
   320
}
sl@0
   321
sl@0
   322
inline le_bool IndicClassTable::isNukta(LEUnicode ch) const
sl@0
   323
{
sl@0
   324
    return isNukta(getCharClass(ch));
sl@0
   325
}
sl@0
   326
sl@0
   327
inline le_bool IndicClassTable::isVattu(LEUnicode ch) const
sl@0
   328
{
sl@0
   329
    return isVattu(getCharClass(ch));
sl@0
   330
}
sl@0
   331
sl@0
   332
inline le_bool IndicClassTable::isMatra(LEUnicode ch) const
sl@0
   333
{
sl@0
   334
    return isMatra(getCharClass(ch));
sl@0
   335
}
sl@0
   336
sl@0
   337
inline le_bool IndicClassTable::isSplitMatra(LEUnicode ch) const
sl@0
   338
{
sl@0
   339
    return isSplitMatra(getCharClass(ch));
sl@0
   340
}
sl@0
   341
sl@0
   342
inline le_bool IndicClassTable::isLengthMark(LEUnicode ch) const
sl@0
   343
{
sl@0
   344
    return isLengthMark(getCharClass(ch));
sl@0
   345
}
sl@0
   346
sl@0
   347
inline le_bool IndicClassTable::hasPostOrBelowBaseForm(LEUnicode ch) const
sl@0
   348
{
sl@0
   349
    return hasPostOrBelowBaseForm(getCharClass(ch));
sl@0
   350
}
sl@0
   351
sl@0
   352
inline le_bool IndicClassTable::hasPostBaseForm(LEUnicode ch) const
sl@0
   353
{
sl@0
   354
    return hasPostBaseForm(getCharClass(ch));
sl@0
   355
}
sl@0
   356
sl@0
   357
inline le_bool IndicClassTable::hasBelowBaseForm(LEUnicode ch) const
sl@0
   358
{
sl@0
   359
    return hasBelowBaseForm(getCharClass(ch));
sl@0
   360
}
sl@0
   361
sl@0
   362
// Added by Nokia -- For determining whether a character is a Bengali Ya
sl@0
   363
inline le_bool IndicClassTable::isBengaliYa(LEUnicode ch) const
sl@0
   364
{
sl@0
   365
    return isBengaliYa(getCharClass(ch));
sl@0
   366
}
sl@0
   367
sl@0
   368
// Added by Nokia -- 1922 mlyl -->
sl@0
   369
inline le_bool IndicClassTable::isMlylChillu(LEUnicode ch) const
sl@0
   370
{
sl@0
   371
    return isMlylChillu(getCharClass(ch));
sl@0
   372
}
sl@0
   373
// <-- 1922 mlyl
sl@0
   374
sl@0
   375
U_NAMESPACE_END
sl@0
   376
#endif