sl@0: // Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: // All rights reserved. sl@0: // This component and the accompanying materials are made available sl@0: // under the terms of the License "Eclipse Public License v1.0" sl@0: // which accompanies this distribution, and is available sl@0: // at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: // sl@0: // Initial Contributors: sl@0: // Nokia Corporation - initial contribution. sl@0: // sl@0: // Contributors: sl@0: // sl@0: // Description: sl@0: // e32\include\unicode.h sl@0: // The TUnicode class contains a Unicode value. It is provided for convenience in implementing the sl@0: // character attribute retrieval functions. It also contains: sl@0: // structures used to store and search the tables of character information: sl@0: // when modifying these, please remember that they form part of tables that must be initialised as aggregates, sl@0: // so they cannot have constructors, non-public members, base classes or virtual functions. I have used 'struct' sl@0: // rather than class to make that clear. sl@0: // default constructor that sets the stored Unicode value to 0xFFFF - an invalid character sl@0: // constructors and conversion functions for converting between integers and TUnicode objects sl@0: // functions to retrieve the categories and attributes sl@0: // The categories are explained in 'unicode_fields.txt', which is a key to the fields of the data file sl@0: // 'unidata2.txt'; these files are supplied on the CD-ROM that comes with the book 'The Unicode Standard, sl@0: // Version 2.0'. sl@0: // Because the category constants must be available to users they are defined not here but in the TChar sl@0: // class in e32std.h. sl@0: // sl@0: // WARNING: This file contains some APIs which are internal and are subject sl@0: // to change without notice. Such APIs should therefore not be used sl@0: // outside the Kernel and Hardware Services package. sl@0: // sl@0: sl@0: /** sl@0: @file sl@0: @internalTechnology sl@0: */ sl@0: sl@0: sl@0: #ifndef __UNICODE_H__ sl@0: #define __UNICODE_H__ 1 sl@0: sl@0: #include sl@0: sl@0: /* sl@0: A structure to contain the raw data about a Unicode character: sl@0: it must not have a constructor because an array of these in unitable.cpp is initialised as an aggregate. sl@0: */ sl@0: struct TUnicodeData sl@0: { sl@0: // bit values for iFlags sl@0: enum sl@0: { sl@0: EHasLowerCase = 1, // adding the case offset gives the lower case form sl@0: EHasUpperCase = 2, // subtracting the case offset gives the upper case form sl@0: EHasTitleCase = 4, // a title case form exists that is distinct from the upper case form sl@0: EMirrored = 8, // this character is replaced by a mirror-image in right-to-left text sl@0: ENumericFlags = 0x70, // one of these flags is set if this number has a numeric value sl@0: ENonNumeric = 0x00, // this character has no numeric value sl@0: ESmallNumeric = 0x10, // numeric in the range 0..255 (see iDigitOffset) sl@0: EFiveHundred = 0x20, // numeric with the value 500 sl@0: EOneThousand = 0x30, // numeric with the value 1000 sl@0: EFiveThousand = 0x40, // numeric with the value 5000 sl@0: ETenThousand = 0x50, // numeric with the value 10000 sl@0: EHundredThousand = 0x60, // numeric with the value 100000 sl@0: EFraction = 0x70 // numeric with a fractional value sl@0: }; sl@0: sl@0: TUint8 iCategory; // general category sl@0: TUint8 iBdCategory; // bidirectional category sl@0: TUint8 iCombiningClass; // combining class sl@0: TInt8 iDigitOffset; // if this character has a small numeric value, the difference between the low sl@0: // 8 bits of the character code and the numeric value sl@0: TInt16 iCaseOffset; // offset to other case; subtract to get upper case, add to get lower sl@0: // case (this makes it more likely that characters sl@0: // differing only by case have the same data, making the table smaller) sl@0: TUint8 iFlags; // flags: does this character have a lower case form, etc. sl@0: }; sl@0: sl@0: /* sl@0: A structure for Unicode plane information. sl@0: An array of 17 elements should be defined in unitable.cpp, which is generated sl@0: by the readtype tool. All characters in a plane are divided into blocks. All sl@0: blocks in a plane have the same block size. Block size can be 2, 4, 8, etc. sl@0: Any field in this structure can be calculated from any other field. Such sl@0: 'redundant' information is just for faster runtime speed. sl@0: For example, a plane has block size of 16, which is 2 ^ 4. The code number sl@0: will be 4. The mask for block will be 0xFFF0, which means high 12 bit indicates sl@0: block index. The mask for code point will be 0x000F, which means the lower 4 sl@0: bits indicates index in block. sl@0: */ sl@0: struct TUnicodePlane sl@0: { sl@0: TUint8 iCodesPerBlock; // how many bits are used to represent code points (for example if there were 4096 blocks (12 bits), this would be 4 bits) sl@0: TUint16 iMaskForBlock; // mask of 16 bits for blocks (for example 8 bits would be 0xff00) sl@0: TUint16 iMaskForCodePoint; // mask of 16 bits for index in block (for example 8 bits would be 0x00ff) sl@0: }; sl@0: sl@0: /* sl@0: A structure for a range of Unicode characters with the same raw data; must not have a sl@0: constructor because an array of these in unitable.cpp is initialised as an aggregate. sl@0: sl@0: @deprecated sl@0: */ sl@0: struct TUnicodeDataRange sl@0: { sl@0: TUint16 iRangeStart; // Unicode value of the start of the range of characters sl@0: TInt16 iIndex; // index into an array of character information structures (-1 means data no available) sl@0: }; sl@0: sl@0: /* sl@0: A structure to hold a set of overriding character data sl@0: */ sl@0: struct TUnicodeDataSet sl@0: { sl@0: const TUnicodeData *iData; // array of character data structures sl@0: const TUnicodeDataRange *iRange; // array of ranges referring to elements of iData sl@0: TInt iRanges; // number of elements in the array of ranges sl@0: }; sl@0: sl@0: // A structure to hold the standard character data sl@0: struct TStandardUnicodeDataSet sl@0: { sl@0: const TUint16* iIndex1; // first trie index: 4096 elements indexed by high 12 bits of Unicode value sl@0: const TUint16* iIndex2; // second trie index, indexed by values in iIndex1 sl@0: const TUnicodeData *iData; // array of character data structures, indexed by values in iIndex2, offset sl@0: // by low 4 bits of Unicode value sl@0: }; sl@0: sl@0: /* sl@0: A class to hold a Unicode character and provide functions for characterisation (e.g., is this character lowercase?) sl@0: composition (e.g., create a character from a base character and an accent), and decomposition sl@0: (e.g., remove the accent from this character if there is one). sl@0: */ sl@0: class TUnicode sl@0: { sl@0: public: sl@0: sl@0: // Constructors sl@0: TUnicode() { iCode = 0xFFFF; } sl@0: TUnicode(TUint c) : iCode(c) {} sl@0: operator TUint() const { return iCode; } sl@0: sl@0: // Attribute retrieval (functions used by the ExecHandler class, etc., in ekern.dll take IMPORT_C) sl@0: void GetInfo(TChar::TCharInfo& aInfo,const TUnicodeDataSet *aOverridingDataSet) const; sl@0: IMPORT_C TChar::TCategory GetCategory(const TUnicodeDataSet *aOverridingDataSet) const; sl@0: TChar::TBdCategory GetBdCategory(const TUnicodeDataSet *aOverridingDataSet) const; sl@0: TInt GetCombiningClass(const TUnicodeDataSet *aOverridingDataSet) const; sl@0: IMPORT_C TUint GetLowerCase(const TUnicodeDataSet *aOverridingDataSet) const; sl@0: IMPORT_C TUint GetUpperCase(const TUnicodeDataSet *aOverridingDataSet) const; sl@0: TUint GetTitleCase(const TUnicodeDataSet *aOverridingDataSet) const; sl@0: TBool IsMirrored(const TUnicodeDataSet *aOverridingDataSet) const; sl@0: TInt GetNumericValue(const TUnicodeDataSet *aOverridingDataSet) const; sl@0: TChar::TCjkWidth GetCjkWidth() const; sl@0: IMPORT_C TUint Fold(TInt aFlags,const TUnicodeDataSet *aOverridingDataSet) const; sl@0: sl@0: // Utilities sl@0: static TInt Compare(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2); sl@0: sl@0: private: sl@0: const TUnicodeData& GetData(const TUnicodeDataSet *aOverridingDataSet) const; sl@0: const TUnicodeData *GetDataFromDataSet(const TUnicodeDataSet& aDataSet) const; sl@0: TUint GetLowerCase(const TUnicodeData& aData) const; sl@0: TUint GetUpperCase(const TUnicodeData& aData) const; sl@0: TUint GetTitleCase(const TUnicodeData& aData) const; sl@0: TInt GetNumericValue(const TUnicodeData& aData) const; sl@0: sl@0: TUint iCode; // not TUint16 because values in the extended range from 0x10000 to 0xFFFFF may be used. sl@0: sl@0: public: sl@0: #ifndef __KERNEL_MODE__ sl@0: static const TUint16 FoldTable[256]; // fold table (strip accents, fold case) for the range 0..255 sl@0: static const TUint16 CjkWidthFoldTable[256];// width fold table (convert from width variants) for range 0xFF00..0xFFFF sl@0: #else sl@0: static const TUint16* FoldTable; sl@0: static const TUint16* CjkWidthFoldTable; sl@0: #endif sl@0: }; sl@0: sl@0: #endif // __UNICODE_H__