1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/kernelhwsrv/kernel/eka/include/unicode.h Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,181 @@
1.4 +// Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
1.5 +// All rights reserved.
1.6 +// This component and the accompanying materials are made available
1.7 +// under the terms of the License "Eclipse Public License v1.0"
1.8 +// which accompanies this distribution, and is available
1.9 +// at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.10 +//
1.11 +// Initial Contributors:
1.12 +// Nokia Corporation - initial contribution.
1.13 +//
1.14 +// Contributors:
1.15 +//
1.16 +// Description:
1.17 +// e32\include\unicode.h
1.18 +// The TUnicode class contains a Unicode value. It is provided for convenience in implementing the
1.19 +// character attribute retrieval functions. It also contains:
1.20 +// structures used to store and search the tables of character information:
1.21 +// when modifying these, please remember that they form part of tables that must be initialised as aggregates,
1.22 +// so they cannot have constructors, non-public members, base classes or virtual functions. I have used 'struct'
1.23 +// rather than class to make that clear.
1.24 +// default constructor that sets the stored Unicode value to 0xFFFF - an invalid character
1.25 +// constructors and conversion functions for converting between integers and TUnicode objects
1.26 +// functions to retrieve the categories and attributes
1.27 +// The categories are explained in 'unicode_fields.txt', which is a key to the fields of the data file
1.28 +// 'unidata2.txt'; these files are supplied on the CD-ROM that comes with the book 'The Unicode Standard,
1.29 +// Version 2.0'.
1.30 +// Because the category constants must be available to users they are defined not here but in the TChar
1.31 +// class in e32std.h.
1.32 +//
1.33 +// WARNING: This file contains some APIs which are internal and are subject
1.34 +// to change without notice. Such APIs should therefore not be used
1.35 +// outside the Kernel and Hardware Services package.
1.36 +//
1.37 +
1.38 +/**
1.39 + @file
1.40 + @internalTechnology
1.41 +*/
1.42 +
1.43 +
1.44 +#ifndef __UNICODE_H__
1.45 +#define __UNICODE_H__ 1
1.46 +
1.47 +#include <e32cmn.h>
1.48 +
1.49 +/*
1.50 +A structure to contain the raw data about a Unicode character:
1.51 +it must not have a constructor because an array of these in unitable.cpp is initialised as an aggregate.
1.52 +*/
1.53 +struct TUnicodeData
1.54 + {
1.55 + // bit values for iFlags
1.56 + enum
1.57 + {
1.58 + EHasLowerCase = 1, // adding the case offset gives the lower case form
1.59 + EHasUpperCase = 2, // subtracting the case offset gives the upper case form
1.60 + EHasTitleCase = 4, // a title case form exists that is distinct from the upper case form
1.61 + EMirrored = 8, // this character is replaced by a mirror-image in right-to-left text
1.62 + ENumericFlags = 0x70, // one of these flags is set if this number has a numeric value
1.63 + ENonNumeric = 0x00, // this character has no numeric value
1.64 + ESmallNumeric = 0x10, // numeric in the range 0..255 (see iDigitOffset)
1.65 + EFiveHundred = 0x20, // numeric with the value 500
1.66 + EOneThousand = 0x30, // numeric with the value 1000
1.67 + EFiveThousand = 0x40, // numeric with the value 5000
1.68 + ETenThousand = 0x50, // numeric with the value 10000
1.69 + EHundredThousand = 0x60, // numeric with the value 100000
1.70 + EFraction = 0x70 // numeric with a fractional value
1.71 + };
1.72 +
1.73 + TUint8 iCategory; // general category
1.74 + TUint8 iBdCategory; // bidirectional category
1.75 + TUint8 iCombiningClass; // combining class
1.76 + TInt8 iDigitOffset; // if this character has a small numeric value, the difference between the low
1.77 + // 8 bits of the character code and the numeric value
1.78 + TInt16 iCaseOffset; // offset to other case; subtract to get upper case, add to get lower
1.79 + // case (this makes it more likely that characters
1.80 + // differing only by case have the same data, making the table smaller)
1.81 + TUint8 iFlags; // flags: does this character have a lower case form, etc.
1.82 + };
1.83 +
1.84 +/*
1.85 +A structure for Unicode plane information.
1.86 +An array of 17 elements should be defined in unitable.cpp, which is generated
1.87 +by the readtype tool. All characters in a plane are divided into blocks. All
1.88 +blocks in a plane have the same block size. Block size can be 2, 4, 8, etc.
1.89 +Any field in this structure can be calculated from any other field. Such
1.90 +'redundant' information is just for faster runtime speed.
1.91 +For example, a plane has block size of 16, which is 2 ^ 4. The code number
1.92 +will be 4. The mask for block will be 0xFFF0, which means high 12 bit indicates
1.93 +block index. The mask for code point will be 0x000F, which means the lower 4
1.94 +bits indicates index in block.
1.95 +*/
1.96 +struct TUnicodePlane
1.97 + {
1.98 + TUint8 iCodesPerBlock; // how many bits are used to represent code points (for example if there were 4096 blocks (12 bits), this would be 4 bits)
1.99 + TUint16 iMaskForBlock; // mask of 16 bits for blocks (for example 8 bits would be 0xff00)
1.100 + TUint16 iMaskForCodePoint; // mask of 16 bits for index in block (for example 8 bits would be 0x00ff)
1.101 + };
1.102 +
1.103 +/*
1.104 +A structure for a range of Unicode characters with the same raw data; must not have a
1.105 +constructor because an array of these in unitable.cpp is initialised as an aggregate.
1.106 +
1.107 +@deprecated
1.108 +*/
1.109 +struct TUnicodeDataRange
1.110 + {
1.111 + TUint16 iRangeStart; // Unicode value of the start of the range of characters
1.112 + TInt16 iIndex; // index into an array of character information structures (-1 means data no available)
1.113 + };
1.114 +
1.115 +/*
1.116 +A structure to hold a set of overriding character data
1.117 +*/
1.118 +struct TUnicodeDataSet
1.119 + {
1.120 + const TUnicodeData *iData; // array of character data structures
1.121 + const TUnicodeDataRange *iRange; // array of ranges referring to elements of iData
1.122 + TInt iRanges; // number of elements in the array of ranges
1.123 + };
1.124 +
1.125 +// A structure to hold the standard character data
1.126 +struct TStandardUnicodeDataSet
1.127 + {
1.128 + const TUint16* iIndex1; // first trie index: 4096 elements indexed by high 12 bits of Unicode value
1.129 + const TUint16* iIndex2; // second trie index, indexed by values in iIndex1
1.130 + const TUnicodeData *iData; // array of character data structures, indexed by values in iIndex2, offset
1.131 + // by low 4 bits of Unicode value
1.132 + };
1.133 +
1.134 +/*
1.135 +A class to hold a Unicode character and provide functions for characterisation (e.g., is this character lowercase?)
1.136 +composition (e.g., create a character from a base character and an accent), and decomposition
1.137 +(e.g., remove the accent from this character if there is one).
1.138 +*/
1.139 +class TUnicode
1.140 + {
1.141 + public:
1.142 +
1.143 + // Constructors
1.144 + TUnicode() { iCode = 0xFFFF; }
1.145 + TUnicode(TUint c) : iCode(c) {}
1.146 + operator TUint() const { return iCode; }
1.147 +
1.148 + // Attribute retrieval (functions used by the ExecHandler class, etc., in ekern.dll take IMPORT_C)
1.149 + void GetInfo(TChar::TCharInfo& aInfo,const TUnicodeDataSet *aOverridingDataSet) const;
1.150 + IMPORT_C TChar::TCategory GetCategory(const TUnicodeDataSet *aOverridingDataSet) const;
1.151 + TChar::TBdCategory GetBdCategory(const TUnicodeDataSet *aOverridingDataSet) const;
1.152 + TInt GetCombiningClass(const TUnicodeDataSet *aOverridingDataSet) const;
1.153 + IMPORT_C TUint GetLowerCase(const TUnicodeDataSet *aOverridingDataSet) const;
1.154 + IMPORT_C TUint GetUpperCase(const TUnicodeDataSet *aOverridingDataSet) const;
1.155 + TUint GetTitleCase(const TUnicodeDataSet *aOverridingDataSet) const;
1.156 + TBool IsMirrored(const TUnicodeDataSet *aOverridingDataSet) const;
1.157 + TInt GetNumericValue(const TUnicodeDataSet *aOverridingDataSet) const;
1.158 + TChar::TCjkWidth GetCjkWidth() const;
1.159 + IMPORT_C TUint Fold(TInt aFlags,const TUnicodeDataSet *aOverridingDataSet) const;
1.160 +
1.161 + // Utilities
1.162 + static TInt Compare(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2);
1.163 +
1.164 + private:
1.165 + const TUnicodeData& GetData(const TUnicodeDataSet *aOverridingDataSet) const;
1.166 + const TUnicodeData *GetDataFromDataSet(const TUnicodeDataSet& aDataSet) const;
1.167 + TUint GetLowerCase(const TUnicodeData& aData) const;
1.168 + TUint GetUpperCase(const TUnicodeData& aData) const;
1.169 + TUint GetTitleCase(const TUnicodeData& aData) const;
1.170 + TInt GetNumericValue(const TUnicodeData& aData) const;
1.171 +
1.172 + TUint iCode; // not TUint16 because values in the extended range from 0x10000 to 0xFFFFF may be used.
1.173 +
1.174 + public:
1.175 +#ifndef __KERNEL_MODE__
1.176 + static const TUint16 FoldTable[256]; // fold table (strip accents, fold case) for the range 0..255
1.177 + static const TUint16 CjkWidthFoldTable[256];// width fold table (convert from width variants) for range 0xFF00..0xFFFF
1.178 +#else
1.179 + static const TUint16* FoldTable;
1.180 + static const TUint16* CjkWidthFoldTable;
1.181 +#endif
1.182 + };
1.183 +
1.184 +#endif // __UNICODE_H__