os/kernelhwsrv/kernel/eka/include/unicode.h
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/kernelhwsrv/kernel/eka/include/unicode.h	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,181 @@
     1.4 +// Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.5 +// All rights reserved.
     1.6 +// This component and the accompanying materials are made available
     1.7 +// under the terms of the License "Eclipse Public License v1.0"
     1.8 +// which accompanies this distribution, and is available
     1.9 +// at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.10 +//
    1.11 +// Initial Contributors:
    1.12 +// Nokia Corporation - initial contribution.
    1.13 +//
    1.14 +// Contributors:
    1.15 +//
    1.16 +// Description:
    1.17 +// e32\include\unicode.h
    1.18 +// The TUnicode class contains a Unicode value. It is provided for convenience in implementing the
    1.19 +// character attribute retrieval functions. It also contains:
    1.20 +// structures used to store and search the tables of character information:
    1.21 +// when modifying these, please remember that they form part of tables that must be initialised as aggregates,
    1.22 +// so they cannot have constructors, non-public members, base classes or virtual functions. I have used 'struct'
    1.23 +// rather than class to make that clear.
    1.24 +// default constructor that sets the stored Unicode value to 0xFFFF - an invalid character
    1.25 +// constructors and conversion functions for converting between integers and TUnicode objects
    1.26 +// functions to retrieve the categories and attributes
    1.27 +// The categories are explained in 'unicode_fields.txt', which is a key to the fields of the data file
    1.28 +// 'unidata2.txt'; these files are supplied on the CD-ROM that comes with the book 'The Unicode Standard,
    1.29 +// Version 2.0'.
    1.30 +// Because the category constants must be available to users they are defined not here but in the TChar
    1.31 +// class in e32std.h.
    1.32 +// 
    1.33 +// WARNING: This file contains some APIs which are internal and are subject
    1.34 +//          to change without notice. Such APIs should therefore not be used
    1.35 +//          outside the Kernel and Hardware Services package.
    1.36 +//
    1.37 +
    1.38 +/**
    1.39 + @file
    1.40 + @internalTechnology
    1.41 +*/
    1.42 +
    1.43 +
    1.44 +#ifndef __UNICODE_H__
    1.45 +#define __UNICODE_H__ 1
    1.46 +
    1.47 +#include <e32cmn.h>
    1.48 +
    1.49 +/*
    1.50 +A structure to contain the raw data about a Unicode character:
    1.51 +it must not have a constructor because an array of these in unitable.cpp is initialised as an aggregate.
    1.52 +*/
    1.53 +struct TUnicodeData
    1.54 +	{
    1.55 +	// bit values for iFlags
    1.56 +	enum
    1.57 +		{
    1.58 +		EHasLowerCase = 1,			// adding the case offset gives the lower case form
    1.59 +		EHasUpperCase = 2,			// subtracting the case offset gives the upper case form
    1.60 +		EHasTitleCase = 4,			// a title case form exists that is distinct from the upper case form
    1.61 +		EMirrored = 8,				// this character is replaced by a mirror-image in right-to-left text
    1.62 +		ENumericFlags = 0x70,		// one of these flags is set if this number has a numeric value
    1.63 +		ENonNumeric = 0x00,			// this character has no numeric value
    1.64 +		ESmallNumeric = 0x10,		// numeric in the range 0..255 (see iDigitOffset)
    1.65 +		EFiveHundred = 0x20,		// numeric with the value 500
    1.66 +		EOneThousand = 0x30,		// numeric with the value 1000
    1.67 +		EFiveThousand = 0x40,		// numeric with the value 5000
    1.68 +		ETenThousand = 0x50,		// numeric with the value 10000
    1.69 +		EHundredThousand = 0x60,	// numeric with the value 100000
    1.70 +		EFraction = 0x70			// numeric with a fractional value
    1.71 +		};
    1.72 +
    1.73 +	TUint8 iCategory;					// general category
    1.74 +	TUint8 iBdCategory;					// bidirectional category
    1.75 +	TUint8 iCombiningClass;				// combining class
    1.76 +	TInt8 iDigitOffset;					// if this character has a small numeric value, the difference between the low
    1.77 +										// 8 bits of the character code and the numeric value
    1.78 +	TInt16 iCaseOffset;					// offset to other case; subtract to get upper case, add to get lower
    1.79 +										// case (this makes it more likely that characters
    1.80 +										// differing only by case have the same	data, making the table smaller)
    1.81 +	TUint8 iFlags;						// flags: does this character have a lower case form, etc.
    1.82 +	};
    1.83 +
    1.84 +/*
    1.85 +A structure for Unicode plane information.
    1.86 +An array of 17 elements should be defined in unitable.cpp, which is generated
    1.87 +by the readtype tool. All characters in a plane are divided into blocks. All
    1.88 +blocks in a plane have the same block size. Block size can be 2, 4, 8, etc.
    1.89 +Any field in this structure can be calculated from any other field. Such
    1.90 +'redundant' information is just for faster runtime speed.
    1.91 +For example, a plane has block size of 16, which is 2 ^ 4. The code number 
    1.92 +will be 4. The mask for block will be 0xFFF0, which means high 12 bit indicates
    1.93 +block index. The mask for code point will be 0x000F, which means the lower 4
    1.94 +bits indicates index in block.
    1.95 +*/
    1.96 +struct TUnicodePlane
    1.97 +	{
    1.98 +	TUint8 iCodesPerBlock;			// how many bits are used to represent code points (for example if there were 4096 blocks (12 bits), this would be 4 bits)
    1.99 +	TUint16 iMaskForBlock;			// mask of 16 bits for blocks (for example 8 bits would be 0xff00)
   1.100 +	TUint16 iMaskForCodePoint;		// mask of 16 bits for index in block (for example 8 bits would be 0x00ff)
   1.101 +	};
   1.102 +
   1.103 +/*
   1.104 +A structure for a range of Unicode characters with the same raw data; must not have a
   1.105 +constructor because an array of these in unitable.cpp is initialised as an aggregate.
   1.106 +
   1.107 +@deprecated
   1.108 +*/
   1.109 +struct TUnicodeDataRange
   1.110 +	{
   1.111 +	TUint16 iRangeStart;	// Unicode value of the start of the range of characters
   1.112 +	TInt16 iIndex;			// index into an array of character information structures (-1 means data no available)
   1.113 +	};
   1.114 +
   1.115 +/*
   1.116 +A structure to hold a set of overriding character data
   1.117 +*/
   1.118 +struct TUnicodeDataSet
   1.119 +	{
   1.120 +	const TUnicodeData *iData;			// array of character data structures
   1.121 +	const TUnicodeDataRange *iRange;	// array of ranges referring to elements of iData
   1.122 +	TInt iRanges;						// number of elements in the array of ranges
   1.123 +	};
   1.124 +
   1.125 +// A structure to hold the standard character data
   1.126 +struct TStandardUnicodeDataSet
   1.127 +	{
   1.128 +	const TUint16* iIndex1;				// first trie index: 4096 elements indexed by high 12 bits of Unicode value
   1.129 +	const TUint16* iIndex2;				// second trie index, indexed by values in iIndex1
   1.130 +	const TUnicodeData *iData;			// array of character data structures, indexed by values in iIndex2, offset
   1.131 +										// by low 4 bits of Unicode value
   1.132 +	};
   1.133 +
   1.134 +/*
   1.135 +A class to hold a Unicode character and provide functions for characterisation (e.g., is this character lowercase?)
   1.136 +composition (e.g., create a character from a base character and an accent), and decomposition
   1.137 +(e.g., remove the accent from this character if there is one).
   1.138 +*/
   1.139 +class TUnicode
   1.140 +	{
   1.141 +	public:
   1.142 +
   1.143 +	// Constructors
   1.144 +	TUnicode() { iCode = 0xFFFF; }
   1.145 +	TUnicode(TUint c) : iCode(c) {}
   1.146 +	operator TUint() const { return iCode; }
   1.147 +
   1.148 +	// Attribute retrieval (functions used by the ExecHandler class, etc., in ekern.dll take IMPORT_C)
   1.149 +	void GetInfo(TChar::TCharInfo& aInfo,const TUnicodeDataSet *aOverridingDataSet) const;
   1.150 +	IMPORT_C TChar::TCategory GetCategory(const TUnicodeDataSet *aOverridingDataSet) const;
   1.151 +	TChar::TBdCategory GetBdCategory(const TUnicodeDataSet *aOverridingDataSet) const;
   1.152 +	TInt GetCombiningClass(const TUnicodeDataSet *aOverridingDataSet) const;
   1.153 +	IMPORT_C TUint GetLowerCase(const TUnicodeDataSet *aOverridingDataSet) const;
   1.154 +	IMPORT_C TUint GetUpperCase(const TUnicodeDataSet *aOverridingDataSet) const;
   1.155 +	TUint GetTitleCase(const TUnicodeDataSet *aOverridingDataSet) const;
   1.156 +	TBool IsMirrored(const TUnicodeDataSet *aOverridingDataSet) const;
   1.157 +	TInt GetNumericValue(const TUnicodeDataSet *aOverridingDataSet) const;
   1.158 +	TChar::TCjkWidth GetCjkWidth() const;
   1.159 +	IMPORT_C TUint Fold(TInt aFlags,const TUnicodeDataSet *aOverridingDataSet) const;
   1.160 +	
   1.161 +	// Utilities
   1.162 +	static TInt Compare(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2);
   1.163 +
   1.164 +	private:
   1.165 +	const TUnicodeData& GetData(const TUnicodeDataSet *aOverridingDataSet) const;
   1.166 +	const TUnicodeData *GetDataFromDataSet(const TUnicodeDataSet& aDataSet) const;
   1.167 +	TUint GetLowerCase(const TUnicodeData& aData) const;
   1.168 +	TUint GetUpperCase(const TUnicodeData& aData) const;
   1.169 +	TUint GetTitleCase(const TUnicodeData& aData) const;
   1.170 +	TInt GetNumericValue(const TUnicodeData& aData) const;
   1.171 +
   1.172 +	TUint iCode; // not TUint16 because values in the extended range from 0x10000 to 0xFFFFF may be used.
   1.173 +
   1.174 +	public:
   1.175 +#ifndef __KERNEL_MODE__
   1.176 +	static const TUint16 FoldTable[256];		// fold table (strip accents, fold case) for the range 0..255
   1.177 +	static const TUint16 CjkWidthFoldTable[256];// width fold table (convert from width variants) for range 0xFF00..0xFFFF
   1.178 +#else
   1.179 +	static const TUint16* FoldTable;
   1.180 +	static const TUint16* CjkWidthFoldTable;
   1.181 +#endif
   1.182 +	};
   1.183 +
   1.184 +#endif // __UNICODE_H__