os/kernelhwsrv/kernel/eka/include/unicode.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
// Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     2
// All rights reserved.
sl@0
     3
// This component and the accompanying materials are made available
sl@0
     4
// under the terms of the License "Eclipse Public License v1.0"
sl@0
     5
// which accompanies this distribution, and is available
sl@0
     6
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     7
//
sl@0
     8
// Initial Contributors:
sl@0
     9
// Nokia Corporation - initial contribution.
sl@0
    10
//
sl@0
    11
// Contributors:
sl@0
    12
//
sl@0
    13
// Description:
sl@0
    14
// e32\include\unicode.h
sl@0
    15
// The TUnicode class contains a Unicode value. It is provided for convenience in implementing the
sl@0
    16
// character attribute retrieval functions. It also contains:
sl@0
    17
// structures used to store and search the tables of character information:
sl@0
    18
// when modifying these, please remember that they form part of tables that must be initialised as aggregates,
sl@0
    19
// so they cannot have constructors, non-public members, base classes or virtual functions. I have used 'struct'
sl@0
    20
// rather than class to make that clear.
sl@0
    21
// default constructor that sets the stored Unicode value to 0xFFFF - an invalid character
sl@0
    22
// constructors and conversion functions for converting between integers and TUnicode objects
sl@0
    23
// functions to retrieve the categories and attributes
sl@0
    24
// The categories are explained in 'unicode_fields.txt', which is a key to the fields of the data file
sl@0
    25
// 'unidata2.txt'; these files are supplied on the CD-ROM that comes with the book 'The Unicode Standard,
sl@0
    26
// Version 2.0'.
sl@0
    27
// Because the category constants must be available to users they are defined not here but in the TChar
sl@0
    28
// class in e32std.h.
sl@0
    29
// 
sl@0
    30
// WARNING: This file contains some APIs which are internal and are subject
sl@0
    31
//          to change without notice. Such APIs should therefore not be used
sl@0
    32
//          outside the Kernel and Hardware Services package.
sl@0
    33
//
sl@0
    34
sl@0
    35
/**
sl@0
    36
 @file
sl@0
    37
 @internalTechnology
sl@0
    38
*/
sl@0
    39
sl@0
    40
sl@0
    41
#ifndef __UNICODE_H__
sl@0
    42
#define __UNICODE_H__ 1
sl@0
    43
sl@0
    44
#include <e32cmn.h>
sl@0
    45
sl@0
    46
/*
sl@0
    47
A structure to contain the raw data about a Unicode character:
sl@0
    48
it must not have a constructor because an array of these in unitable.cpp is initialised as an aggregate.
sl@0
    49
*/
sl@0
    50
struct TUnicodeData
sl@0
    51
	{
sl@0
    52
	// bit values for iFlags
sl@0
    53
	enum
sl@0
    54
		{
sl@0
    55
		EHasLowerCase = 1,			// adding the case offset gives the lower case form
sl@0
    56
		EHasUpperCase = 2,			// subtracting the case offset gives the upper case form
sl@0
    57
		EHasTitleCase = 4,			// a title case form exists that is distinct from the upper case form
sl@0
    58
		EMirrored = 8,				// this character is replaced by a mirror-image in right-to-left text
sl@0
    59
		ENumericFlags = 0x70,		// one of these flags is set if this number has a numeric value
sl@0
    60
		ENonNumeric = 0x00,			// this character has no numeric value
sl@0
    61
		ESmallNumeric = 0x10,		// numeric in the range 0..255 (see iDigitOffset)
sl@0
    62
		EFiveHundred = 0x20,		// numeric with the value 500
sl@0
    63
		EOneThousand = 0x30,		// numeric with the value 1000
sl@0
    64
		EFiveThousand = 0x40,		// numeric with the value 5000
sl@0
    65
		ETenThousand = 0x50,		// numeric with the value 10000
sl@0
    66
		EHundredThousand = 0x60,	// numeric with the value 100000
sl@0
    67
		EFraction = 0x70			// numeric with a fractional value
sl@0
    68
		};
sl@0
    69
sl@0
    70
	TUint8 iCategory;					// general category
sl@0
    71
	TUint8 iBdCategory;					// bidirectional category
sl@0
    72
	TUint8 iCombiningClass;				// combining class
sl@0
    73
	TInt8 iDigitOffset;					// if this character has a small numeric value, the difference between the low
sl@0
    74
										// 8 bits of the character code and the numeric value
sl@0
    75
	TInt16 iCaseOffset;					// offset to other case; subtract to get upper case, add to get lower
sl@0
    76
										// case (this makes it more likely that characters
sl@0
    77
										// differing only by case have the same	data, making the table smaller)
sl@0
    78
	TUint8 iFlags;						// flags: does this character have a lower case form, etc.
sl@0
    79
	};
sl@0
    80
sl@0
    81
/*
sl@0
    82
A structure for Unicode plane information.
sl@0
    83
An array of 17 elements should be defined in unitable.cpp, which is generated
sl@0
    84
by the readtype tool. All characters in a plane are divided into blocks. All
sl@0
    85
blocks in a plane have the same block size. Block size can be 2, 4, 8, etc.
sl@0
    86
Any field in this structure can be calculated from any other field. Such
sl@0
    87
'redundant' information is just for faster runtime speed.
sl@0
    88
For example, a plane has block size of 16, which is 2 ^ 4. The code number 
sl@0
    89
will be 4. The mask for block will be 0xFFF0, which means high 12 bit indicates
sl@0
    90
block index. The mask for code point will be 0x000F, which means the lower 4
sl@0
    91
bits indicates index in block.
sl@0
    92
*/
sl@0
    93
struct TUnicodePlane
sl@0
    94
	{
sl@0
    95
	TUint8 iCodesPerBlock;			// how many bits are used to represent code points (for example if there were 4096 blocks (12 bits), this would be 4 bits)
sl@0
    96
	TUint16 iMaskForBlock;			// mask of 16 bits for blocks (for example 8 bits would be 0xff00)
sl@0
    97
	TUint16 iMaskForCodePoint;		// mask of 16 bits for index in block (for example 8 bits would be 0x00ff)
sl@0
    98
	};
sl@0
    99
sl@0
   100
/*
sl@0
   101
A structure for a range of Unicode characters with the same raw data; must not have a
sl@0
   102
constructor because an array of these in unitable.cpp is initialised as an aggregate.
sl@0
   103
sl@0
   104
@deprecated
sl@0
   105
*/
sl@0
   106
struct TUnicodeDataRange
sl@0
   107
	{
sl@0
   108
	TUint16 iRangeStart;	// Unicode value of the start of the range of characters
sl@0
   109
	TInt16 iIndex;			// index into an array of character information structures (-1 means data no available)
sl@0
   110
	};
sl@0
   111
sl@0
   112
/*
sl@0
   113
A structure to hold a set of overriding character data
sl@0
   114
*/
sl@0
   115
struct TUnicodeDataSet
sl@0
   116
	{
sl@0
   117
	const TUnicodeData *iData;			// array of character data structures
sl@0
   118
	const TUnicodeDataRange *iRange;	// array of ranges referring to elements of iData
sl@0
   119
	TInt iRanges;						// number of elements in the array of ranges
sl@0
   120
	};
sl@0
   121
sl@0
   122
// A structure to hold the standard character data
sl@0
   123
struct TStandardUnicodeDataSet
sl@0
   124
	{
sl@0
   125
	const TUint16* iIndex1;				// first trie index: 4096 elements indexed by high 12 bits of Unicode value
sl@0
   126
	const TUint16* iIndex2;				// second trie index, indexed by values in iIndex1
sl@0
   127
	const TUnicodeData *iData;			// array of character data structures, indexed by values in iIndex2, offset
sl@0
   128
										// by low 4 bits of Unicode value
sl@0
   129
	};
sl@0
   130
sl@0
   131
/*
sl@0
   132
A class to hold a Unicode character and provide functions for characterisation (e.g., is this character lowercase?)
sl@0
   133
composition (e.g., create a character from a base character and an accent), and decomposition
sl@0
   134
(e.g., remove the accent from this character if there is one).
sl@0
   135
*/
sl@0
   136
class TUnicode
sl@0
   137
	{
sl@0
   138
	public:
sl@0
   139
sl@0
   140
	// Constructors
sl@0
   141
	TUnicode() { iCode = 0xFFFF; }
sl@0
   142
	TUnicode(TUint c) : iCode(c) {}
sl@0
   143
	operator TUint() const { return iCode; }
sl@0
   144
sl@0
   145
	// Attribute retrieval (functions used by the ExecHandler class, etc., in ekern.dll take IMPORT_C)
sl@0
   146
	void GetInfo(TChar::TCharInfo& aInfo,const TUnicodeDataSet *aOverridingDataSet) const;
sl@0
   147
	IMPORT_C TChar::TCategory GetCategory(const TUnicodeDataSet *aOverridingDataSet) const;
sl@0
   148
	TChar::TBdCategory GetBdCategory(const TUnicodeDataSet *aOverridingDataSet) const;
sl@0
   149
	TInt GetCombiningClass(const TUnicodeDataSet *aOverridingDataSet) const;
sl@0
   150
	IMPORT_C TUint GetLowerCase(const TUnicodeDataSet *aOverridingDataSet) const;
sl@0
   151
	IMPORT_C TUint GetUpperCase(const TUnicodeDataSet *aOverridingDataSet) const;
sl@0
   152
	TUint GetTitleCase(const TUnicodeDataSet *aOverridingDataSet) const;
sl@0
   153
	TBool IsMirrored(const TUnicodeDataSet *aOverridingDataSet) const;
sl@0
   154
	TInt GetNumericValue(const TUnicodeDataSet *aOverridingDataSet) const;
sl@0
   155
	TChar::TCjkWidth GetCjkWidth() const;
sl@0
   156
	IMPORT_C TUint Fold(TInt aFlags,const TUnicodeDataSet *aOverridingDataSet) const;
sl@0
   157
	
sl@0
   158
	// Utilities
sl@0
   159
	static TInt Compare(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2);
sl@0
   160
sl@0
   161
	private:
sl@0
   162
	const TUnicodeData& GetData(const TUnicodeDataSet *aOverridingDataSet) const;
sl@0
   163
	const TUnicodeData *GetDataFromDataSet(const TUnicodeDataSet& aDataSet) const;
sl@0
   164
	TUint GetLowerCase(const TUnicodeData& aData) const;
sl@0
   165
	TUint GetUpperCase(const TUnicodeData& aData) const;
sl@0
   166
	TUint GetTitleCase(const TUnicodeData& aData) const;
sl@0
   167
	TInt GetNumericValue(const TUnicodeData& aData) const;
sl@0
   168
sl@0
   169
	TUint iCode; // not TUint16 because values in the extended range from 0x10000 to 0xFFFFF may be used.
sl@0
   170
sl@0
   171
	public:
sl@0
   172
#ifndef __KERNEL_MODE__
sl@0
   173
	static const TUint16 FoldTable[256];		// fold table (strip accents, fold case) for the range 0..255
sl@0
   174
	static const TUint16 CjkWidthFoldTable[256];// width fold table (convert from width variants) for range 0xFF00..0xFFFF
sl@0
   175
#else
sl@0
   176
	static const TUint16* FoldTable;
sl@0
   177
	static const TUint16* CjkWidthFoldTable;
sl@0
   178
#endif
sl@0
   179
	};
sl@0
   180
sl@0
   181
#endif // __UNICODE_H__