1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/kernelhwsrv/kernel/eka/euser/unicode/collate.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,1190 @@
1.4 +// Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
1.5 +// All rights reserved.
1.6 +// This component and the accompanying materials are made available
1.7 +// under the terms of the License "Eclipse Public License v1.0"
1.8 +// which accompanies this distribution, and is available
1.9 +// at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.10 +//
1.11 +// Initial Contributors:
1.12 +// Nokia Corporation - initial contribution.
1.13 +//
1.14 +// Contributors:
1.15 +//
1.16 +// Description:
1.17 +// The Unicode collation system.
1.18 +//
1.19 +//
1.20 +
1.21 +#include "collateimp.h"
1.22 +#include "foldtable.inl"
1.23 +
1.24 +#include <collate.h>
1.25 +#include <unicode.h>
1.26 +#include "u32std.h"
1.27 +
1.28 +// maximum size of string which has its own sort key
1.29 +// 16 instead of 8, in case all supplementary characters
1.30 +static const TInt KKeyedStringBufferSize = 16;
1.31 +
1.32 +
1.33 +inline TText16 GetHighSurrogate(TUint aChar)
1.34 +/**
1.35 +Retrieve the high surrogate of a supplementary character.
1.36 +
1.37 +@param aChar The 32-bit code point value of a Unicode character.
1.38 +
1.39 +@return High surrogate of aChar, if aChar is a supplementary character;
1.40 + aChar itself, if aChar is not a supplementary character.
1.41 +*/
1.42 + {
1.43 + return STATIC_CAST(TText16, 0xD7C0 + (aChar >> 10));
1.44 + }
1.45 +
1.46 +inline TText16 GetLowSurrogate(TUint aChar)
1.47 +/**
1.48 +Retrieve the low surrogate of a supplementary character.
1.49 +
1.50 +@param aChar The 32-bit code point value of a Unicode character.
1.51 +
1.52 +@return Low surrogate of aChar, if aChar is a supplementary character;
1.53 + zero, if aChar is not a supplementary character.
1.54 +*/
1.55 + {
1.56 + return STATIC_CAST(TText16, 0xDC00 | (aChar & 0x3FF));
1.57 + }
1.58 +
1.59 +inline TUint JoinSurrogate(TText16 aHighSurrogate, TText16 aLowSurrogate)
1.60 +/**
1.61 +Combine a high surrogate and a low surrogate into a supplementary character.
1.62 +
1.63 +@return The 32-bit code point value of the generated Unicode supplementary
1.64 + character.
1.65 +*/
1.66 + {
1.67 + return ((aHighSurrogate - 0xD7F7) << 10) + aLowSurrogate;
1.68 + }
1.69 +
1.70 +// Creates a one or two collation keys sequence corresponding to the input character.
1.71 +// Returns the number of keys output.
1.72 +static TInt CreateDefaultCollationKeySequence(TInt aChar, TCollationKey* aBuffer)
1.73 + {
1.74 + if (aChar >= 0x3400 && aChar <= 0x9FFF) // CJK
1.75 + {
1.76 + aBuffer[0].iLow = (TUint32)aChar << 16 | 0x0105;
1.77 + aBuffer[0].iHigh = aChar;
1.78 + return 1;//Collation key sequence consists of 1 key
1.79 + }
1.80 + aBuffer[0].iLow = 0xFF800000 | ((aChar << 1) & 0x3F0000) | 0x0104; // no stop bit
1.81 + aBuffer[0].iHigh = 1;
1.82 + aBuffer[1].iLow = (aChar << 16) | 0x80000105; // stop bit
1.83 + aBuffer[1].iHigh = 0;
1.84 + return 2;//Collation key sequence consists of 2 keys
1.85 + }
1.86 +
1.87 +// Finds a character's key in the main index, or returns -1 if it is not there
1.88 +static TInt FindCollationKeyIndex(TInt aChar, const TCollationKeyTable& aTable)
1.89 + {
1.90 + TInt n = aTable.iIndices;
1.91 + const TUint32 *base = aTable.iIndex;
1.92 + const TUint32 *start = base;
1.93 + const TUint32 *end = aTable.iIndex + n - 1;
1.94 + const TUint32 *p = base;
1.95 + TInt currentCharLength = 0;
1.96 +
1.97 + while (n > 0)
1.98 + {
1.99 + TInt pivot = n / 2;
1.100 + p += pivot;
1.101 + if ((p < start) || (p > end))
1.102 + {
1.103 + break;
1.104 + }
1.105 + TInt c = *p >> 16;
1.106 + if (IsHighSurrogate( (TText16)c ))
1.107 + {
1.108 + if ((p < end) && (IsLowSurrogate( (TText16)((*(p+1))>>16) )))
1.109 + {
1.110 + currentCharLength = 2;
1.111 + c = JoinSurrogate( (TText16)(*p>>16), (TText16)((*(p+1))>>16) );
1.112 + }
1.113 + }
1.114 + else if (IsLowSurrogate( (TText16)c ))
1.115 + {
1.116 + if ((p > start) && (IsHighSurrogate( (TText16)((*(p-1))>>16) )))
1.117 + {
1.118 + p--;
1.119 + pivot = pivot - 1;
1.120 + currentCharLength = 2;
1.121 + c = JoinSurrogate( (TText16)(*p>>16), (TText16)((*(p+1))>>16) );
1.122 + }
1.123 + }
1.124 + else
1.125 + {
1.126 + currentCharLength = 1;
1.127 + }
1.128 + if (aChar == c) // found it
1.129 + {
1.130 + return *p & 0xFFFF;
1.131 + }
1.132 + if (aChar < c) // it's before
1.133 + {
1.134 + n = pivot;
1.135 + }
1.136 + else // it's after
1.137 + {
1.138 + ASSERT(currentCharLength != 0);
1.139 + base = p + currentCharLength;
1.140 + n -= pivot + currentCharLength;
1.141 + }
1.142 + p = base;
1.143 + }
1.144 + return -1;
1.145 + }
1.146 +
1.147 +static void ProcessKeys(TUint32& aKey1, TUint32& aKey2, TUint aFlags)
1.148 + {
1.149 + if (aFlags & TCollationMethod::EFoldCase)
1.150 + {
1.151 + static const TUint case_fold_table[21] =
1.152 + { 0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x2, 0x3, 0x4, 0x5, 0x6,
1.153 + 0xD, 0xE, 0xF, 0x10, 0x11, 0x12, 0x13, 0x14 };
1.154 + aKey1 = case_fold_table[aKey1];
1.155 + aKey2 = case_fold_table[aKey2];
1.156 + }
1.157 + if (aFlags & TCollationMethod::ESwapCase)
1.158 + {
1.159 + static const TUint case_swap_table[21] =
1.160 + { 0, 0x1, 0x8, 0x9, 0xA, 0xB, 0xC, 0x7, 0x2, 0x3, 0x4, 0x5, 0x6,
1.161 + 0xD, 0xE, 0xF, 0x10, 0x11, 0x12, 0x13, 0x14 };
1.162 + aKey1 = case_swap_table[aKey1];
1.163 + aKey2 = case_swap_table[aKey2];
1.164 + }
1.165 + if (aFlags & TCollationMethod::ESwapKana)
1.166 + {
1.167 + static const TUint kana_swap_table[21] =
1.168 + { 0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC,
1.169 + 0x13, 0x14, 0xD, 0xE, 0xF, 0x10, 0x11, 0x12 };
1.170 + aKey1 = kana_swap_table[aKey1];
1.171 + aKey2 = kana_swap_table[aKey2];
1.172 + }
1.173 + }
1.174 +
1.175 +// Returns the position of the character in the string, or aLength if it is not present.
1.176 +// If aChar is found but it is preceded by aEscapeChar (aEscapeChar != 0), then the search continues.
1.177 +static TInt FindCharacter(TInt aChar, TInt aEscapeChar, const TUint16* aString, TInt aLength)
1.178 + {
1.179 + TBool isEscaped = EFalse;
1.180 + for(TInt pos=0;pos!=aLength;++pos,++aString)
1.181 + {
1.182 + if(isEscaped)
1.183 + {
1.184 + isEscaped = EFalse;
1.185 + }
1.186 + else if(*aString == aEscapeChar)
1.187 + {
1.188 + isEscaped = ETrue;
1.189 + }
1.190 + else if(*aString == aChar)
1.191 + {
1.192 + if(!isEscaped)
1.193 + {
1.194 + return pos;
1.195 + }
1.196 + }
1.197 + }
1.198 + return aLength;
1.199 + }
1.200 +
1.201 +/*
1.202 +The standard collation data, containing keys for all the WGL4 characters, plus
1.203 +commonly-used control characters and spaces. Generated by COLTAB.
1.204 +*/
1.205 +static const TUint32 TheKey[] =
1.206 + {
1.207 + 0x21e0112,0x21e0113,0x2260112,0x2260112,0x2260113,0x2740112,0x2740113,0x6c60178,
1.208 + 0x266017a,0x6c70179,0x6c60178,0x266017a,0x6c90179,0x6c60178,0x266017a,0x6cd0179,
1.209 + 0x6c80178,0x266017a,0x6c90179,0x6c80178,0x266017a,0x6cd0179,0x6ca0178,0x266017a,
1.210 + 0x6cd0179,0x6cc0178,0x266017a,0x6cd0179,0x6f70110,0x2650112,0x8050111,0x74b0110,
1.211 + 0x78d0111,0x74b0110,0x7bd0111,0x78d0110,0x7a10111,0x78d0128,0x7a10129,0x7bd0110,
1.212 + 0x2290113,0x7bd0128,0x2290113,0x7ed0128,0x8050111,0x805dd10,0x71f0111,0x805dd28,
1.213 + 0x71f0129,0x85ddd10,0x85d0111,0x8750150,0x7e50151,0x9060110,0x7ed0111,0x3,
1.214 + 0x201010b,0x202010b,0x203010b,0x204010b,0x205010b,0x206010b,0x207010b,0x208010b,
1.215 + 0x209010b,0x2090113,0x209016f,0x209020b,0x209130b,0x209160b,0x209180b,0x2091d0b,
1.216 + 0x209240b,0x209280b,0x2092a0b,0x2092f0b,0x209330b,0x209360b,0x209390b,0x2093b0b,
1.217 + 0x2093f0b,0x2096b0b,0x20b010b,0x20c010b,0x20d010b,0x20d016f,0x20e010b,0x20f010b,
1.218 + 0x210010b,0x211010b,0x214010b,0x21a010b,0x21c010b,0x21e010b,0x21f010b,0x221010b,
1.219 + 0x222010b,0x226010b,0x229010b,0x22d010b,0x22e010b,0x22f010b,0x230010b,0x231010b,
1.220 + 0x232010b,0x233010b,0x234010b,0x235010b,0x236010b,0x237010b,0x23c010b,0x23d010b,
1.221 + 0x23e010b,0x23f010b,0x240010b,0x241010b,0x242010b,0x243010b,0x25e010b,0x25f010b,
1.222 + 0x260010b,0x261010b,0x262010b,0x263010b,0x265010b,0x266010b,0x267010b,0x268010b,
1.223 + 0x269010b,0x26a010b,0x26c010b,0x26e010b,0x26f010b,0x270010b,0x274010b,0x2ac010b,
1.224 + 0x2ad010b,0x2af010b,0x2d6010b,0x2ff010b,0x300010b,0x301010b,0x302010b,0x303010b,
1.225 + 0x304010b,0x317010b,0x35c010b,0x35f010b,0x366010b,0x368010b,0x369010b,0x36a010b,
1.226 + 0x36b010b,0x36c010b,0x36d010b,0x36e010b,0x36f010b,0x370010b,0x371010b,0x372010b,
1.227 + 0x374010b,0x375010b,0x378010b,0x37c010b,0x37d010b,0x381010b,0x382010b,0x38a010b,
1.228 + 0x38c010b,0x3a2010b,0x3b9010b,0x3bb010b,0x3bc010b,0x42f010b,0x43d010b,0x44d010b,
1.229 + 0x44e010b,0x4d6010b,0x4d8010b,0x4e2010b,0x4e6010b,0x4ea010b,0x4ee010b,0x4f2010b,
1.230 + 0x4fa010b,0x502010b,0x50a010b,0x512010b,0x526010b,0x527010b,0x528010b,0x529010b,
1.231 + 0x52a010b,0x52b010b,0x52c010b,0x52d010b,0x52e010b,0x52f010b,0x530010b,0x531010b,
1.232 + 0x532010b,0x533010b,0x534010b,0x535010b,0x536010b,0x537010b,0x538010b,0x539010b,
1.233 + 0x53a010b,0x53b010b,0x53c010b,0x53d010b,0x53e010b,0x53f010b,0x540010b,0x541010b,
1.234 + 0x542010b,0x556010b,0x55a010b,0x55e010b,0x562010b,0x566010b,0x567010b,0x568010b,
1.235 + 0x569010b,0x56c010b,0x56d010b,0x576010b,0x577010b,0x578010b,0x57e010b,0x586010b,
1.236 + 0x588010b,0x590010b,0x596010b,0x597010b,0x59b010b,0x5a4010b,0x5a5010b,0x5b2010b,
1.237 + 0x5f0010b,0x5f1010b,0x5f2010b,0x5f6010b,0x5f8010b,0x616010b,0x619010b,0x61b010b,
1.238 + 0x61c010b,0x620010b,0x621010b,0x6b4010b,0x6b5010b,0x1309,0x1609,0x1809,
1.239 + 0x1d09,0x2209,0x2409,0x2809,0x2f09,0x3009,0x3309,0x3609,
1.240 + 0x3909,0x3b09,0x4109,0x2c20109,0x2c30109,0x2c40109,0x2c50109,0x2c60109,
1.241 + 0x2cd0109,0x2ce0109,0x2d10109,0x2d50109,0x2fa0109,0x6c50109,0x6c60109,0x6c60151,
1.242 + 0x6c70109,0x6c70151,0x6c80109,0x6c80151,0x6c90109,0x6ca0109,0x6cb0109,0x6cc0109,
1.243 + 0x6cd0109,0x6ce0109,0x6cf0109,0x6cf0121,0x6cf0151,0x6d30109,0x6d30121,0x6e30109,
1.244 + 0x6e30121,0x6f70109,0x6f70121,0x7030109,0x7030121,0x7070109,0x7070121,0x7170109,
1.245 + 0x7170121,0x71f0109,0x71f0121,0x74b0109,0x74b0121,0x74f0109,0x7530109,0x7530121,
1.246 + 0x7730109,0x7730121,0x77f0109,0x77f0121,0x78d0109,0x78d0121,0x7910109,0x7a10109,
1.247 + 0x7a10121,0x7b10109,0x7b10121,0x7bd0109,0x7bd0115,0x7bd0121,0x7c50109,0x7c50121,
1.248 + 0x7e50109,0x7e50121,0x7ed0109,0x7ed0121,0x7ed0151,0x8010109,0x8010121,0x8050109,
1.249 + 0x8050121,0x8050151,0x80d0109,0x80d0121,0x81d0109,0x81d0121,0x8290109,0x8290121,
1.250 + 0x8310109,0x8350109,0x8350121,0x85d0109,0x85d0121,0x85dde11,0x8750109,0x8750121,
1.251 + 0x8790109,0x8790121,0x88d0109,0x88d0121,0x8a50109,0x8a50121,0x8b10109,0x8b10121,
1.252 + 0x8b90109,0x8b90121,0x8bd0109,0x8bd0121,0x8c90109,0x8c90121,0x8e90109,0x8e90121,
1.253 + 0x9360109,0x9360121,0x9370109,0x9370121,0x9380109,0x9380121,0x9390109,0x9390121,
1.254 + 0x93a0109,0x93a0121,0x93d0109,0x93d0121,0x93e0109,0x93e0121,0x93f0109,0x93f0121,
1.255 + 0x9400109,0x9400121,0x9420109,0x9420121,0x9430109,0x9430121,0x9440109,0x9440111,
1.256 + 0x9440121,0x9450109,0x9450121,0x9460109,0x9460121,0x9470109,0x9470121,0x9480109,
1.257 + 0x9480121,0x94a0109,0x94a0121,0x94b0109,0x94b0121,0x94c0109,0x94c0121,0x94d0109,
1.258 + 0x94d0121,0x94e0109,0x94e0121,0x94f0109,0x94f0121,0x9500109,0x9500121,0x9510109,
1.259 + 0x9510121,0x95a0109,0x95a0121,0x9660109,0x9660121,0x96a0109,0x96a0121,0x96e0109,
1.260 + 0x96e0121,0x9720109,0x9720121,0x97e0109,0x97e0121,0x9820109,0x9820121,0x98a0109,
1.261 + 0x98a0121,0x98e0109,0x98e0121,0x9920109,0x9920121,0x99a0109,0x99a0121,0x99e0109,
1.262 + 0x99e0121,0x9a60109,0x9a60121,0x9aa0109,0x9aa0121,0x9ae0109,0x9ae0121,0x9b20109,
1.263 + 0x9b20121,0x9ca0109,0x9ca0121,0x9ce0109,0x9ce0121,0x9d20109,0x9d20121,0x9d60109,
1.264 + 0x9d60121,0x9e60109,0x9e60121,0x9ea0109,0x9ea0121,0x9f20109,0x9f20121,0x9fe0109,
1.265 + 0x9fe0121,0xa020109,0xa020121,0xa0a0109,0xa0a0121,0xa120109,0xa120121,0xa160109,
1.266 + 0xa160121,0xa260109,0xa260121,0xa2a0109,0xa2a0121,0xa460109,0xa460121,0xa4e0109,
1.267 + 0xa4e0121,0xa660109,0xa660121,0xa6a0109,0xa6a0121,0xa6e0109,0xa6e0121,0xa720109,
1.268 + 0xa720121,0xa760109,0xa760121,0xa7a0109,0xa7a0121,0xa820109,0xa820121,0xa860109,
1.269 + 0xa860121,0xa8a0109,0xa8a0121,
1.270 + };
1.271 +
1.272 +static const TUint32 TheIndex[] =
1.273 + {
1.274 + 0x37,0x10037,0x20037,0x30037,0x40037,0x50037,0x60037,0x70037,
1.275 + 0x80037,0x90038,0xa0039,0xb003a,0xc003b,0xd003c,0xe0037,0xf0037,
1.276 + 0x100037,0x110037,0x120037,0x130037,0x140037,0x150037,0x160037,0x170037,
1.277 + 0x180037,0x190037,0x1a0037,0x1b0037,0x1c0037,0x1d0037,0x1e0037,0x1f0037,
1.278 + 0x200040,0x21005d,0x22006a,0x230080,0x24010d,0x250081,0x26007f,0x270063,
1.279 + 0x280070,0x290071,0x2a007b,0x2b0096,0x2c005a,0x2d0053,0x2e0061,0x2f007c,
1.280 + 0x300115,0x310116,0x320118,0x33011a,0x34011c,0x35011d,0x36011e,0x37011f,
1.281 + 0x380120,0x390121,0x3a005c,0x3b005b,0x3c009a,0x3d009b,0x3e009c,0x3f005f,
1.282 + 0x40007a,0x410123,0x420128,0x43012a,0x44012c,0x450132,0x460134,0x470137,
1.283 + 0x480139,0x49013d,0x4a0140,0x4b0142,0x4c0145,0x4d0149,0x4e014b,0x4f0150,
1.284 + 0x500155,0x510157,0x52015a,0x53015c,0x54015f,0x550163,0x560165,0x570167,
1.285 + 0x580169,0x59016b,0x5a016d,0x5b0072,0x5c007e,0x5d0073,0x5e0047,0x5f0043,
1.286 + 0x600045,0x610122,0x620127,0x630129,0x64012b,0x650131,0x660133,0x670136,
1.287 + 0x680138,0x69013c,0x6a013f,0x6b0141,0x6c0143,0x6d0148,0x6e014a,0x6f014f,
1.288 + 0x700154,0x710156,0x720159,0x73015b,0x74015e,0x750162,0x760164,0x770166,
1.289 + 0x780168,0x79016a,0x7a016c,0x7b0074,0x7c009e,0x7d0075,0x7e00a0,0xa00042,
1.290 + 0xa1005e,0xa2010c,0xa3010e,0xa4010b,0xa5010f,0xa6009f,0xa70076,0xa80049,
1.291 + 0xa90078,0xaa0124,0xab006e,0xac009d,0xad0052,0xae0079,0xaf004f,0xb0008a,
1.292 + 0xb10097,0xb20119,0xb3011b,0xb40044,0xb50187,0xb60077,0xb70062,0xb8004d,
1.293 + 0xb90117,0xba0151,0xbb006f,0xbc000a,0xbd0007,0xbe0010,0xbf0060,0xc60126,
1.294 + 0xd00130,0xd70099,0xd80153,0xde016f,0xdf0031,0xe60125,0xf0012f,0xf70098,
1.295 + 0xf80152,0xfe016e,0x110012e,0x111012d,0x126013b,0x127013a,0x131013e,0x1320025,
1.296 + 0x1330023,0x1380158,0x13f0029,0x1400027,0x1410147,0x1420146,0x1490035,0x14a014e,
1.297 + 0x14b014d,0x152002f,0x153002d,0x1660161,0x1670160,0x17f015d,0x1920135,0x2c60087,
1.298 + 0x2c70088,0x2c90089,0x2d80046,0x2d9004c,0x2da0048,0x2db004e,0x2dd004b,0x30000fe,
1.299 + 0x30100fd,0x3020100,0x3030105,0x3040109,0x30600ff,0x3070106,0x3080103,0x30a0102,
1.300 + 0x30b0104,0x30c0101,0x30d010a,0x3270107,0x3280108,0x3840044,0x385004a,0x3870062,
1.301 + 0x3910171,0x3920173,0x3930175,0x3940177,0x3950179,0x396017b,0x397017d,0x398017f,
1.302 + 0x3990181,0x39a0183,0x39b0185,0x39c0188,0x39d018a,0x39e018c,0x39f018e,0x3a00190,
1.303 + 0x3a10192,0x3a30194,0x3a40196,0x3a50198,0x3a6019a,0x3a7019c,0x3a8019e,0x3a901a0,
1.304 + 0x3b10170,0x3b20172,0x3b30174,0x3b40176,0x3b50178,0x3b6017a,0x3b7017c,0x3b8017e,
1.305 + 0x3b90180,0x3ba0182,0x3bb0184,0x3bc0186,0x3bd0189,0x3be018b,0x3bf018d,0x3c0018f,
1.306 + 0x3c10191,0x3c20193,0x3c30193,0x3c40195,0x3c50197,0x3c60199,0x3c7019b,0x3c8019d,
1.307 + 0x3c9019f,0x40201ae,0x40401b2,0x40501b8,0x40601bc,0x40801be,0x40901c4,0x40a01ca,
1.308 + 0x40b01d6,0x40f01e2,0x41001a2,0x41101a4,0x41201a6,0x41301a8,0x41401ac,0x41501b0,
1.309 + 0x41601b4,0x41701b6,0x41801ba,0x41a01c0,0x41b01c2,0x41c01c6,0x41d01c8,0x41e01cc,
1.310 + 0x41f01ce,0x42001d0,0x42101d2,0x42201d4,0x42301d8,0x42401da,0x42501dc,0x42601de,
1.311 + 0x42701e0,0x42801e4,0x42901e6,0x42a01e8,0x42b01ea,0x42c01ec,0x42d01ee,0x42e01f0,
1.312 + 0x42f01f2,0x43001a1,0x43101a3,0x43201a5,0x43301a7,0x43401ab,0x43501af,0x43601b3,
1.313 + 0x43701b5,0x43801b9,0x43a01bf,0x43b01c1,0x43c01c5,0x43d01c7,0x43e01cb,0x43f01cd,
1.314 + 0x44001cf,0x44101d1,0x44201d3,0x44301d7,0x44401d9,0x44501db,0x44601dd,0x44701df,
1.315 + 0x44801e3,0x44901e5,0x44a01e7,0x44b01e9,0x44c01eb,0x44d01ed,0x44e01ef,0x44f01f1,
1.316 + 0x45201ad,0x45401b1,0x45501b7,0x45601bb,0x45801bd,0x45901c3,0x45a01c9,0x45b01d5,
1.317 + 0x45f01e1,0x49001aa,0x49101a9,0x20000041,0x20010041,0x20020041,0x20030041,0x20040041,
1.318 + 0x20050041,0x20060041,0x20070042,0x20080041,0x20090041,0x200a0041,0x200b003f,0x200c0037,
1.319 + 0x200d0037,0x200e0037,0x200f0037,0x20100054,0x20110055,0x20120056,0x20130057,0x20140058,
1.320 + 0x20150059,0x20170051,0x20180064,0x20190065,0x201a0066,0x201b0067,0x201c006b,0x201d006c,
1.321 + 0x201e006d,0x20200083,0x20210084,0x20220085,0x20260002,0x2028003d,0x2029003e,0x202a0037,
1.322 + 0x202b0037,0x202c0037,0x202d0037,0x202e0037,0x20300082,0x20320086,0x20330005,0x20390068,
1.323 + 0x203a0069,0x203c0000,0x203e0050,0x2044007d,0x207f014c,0x20a30110,0x20a40111,0x20a70112,
1.324 + 0x20ac0113,0x2105001c,0x21130144,0x2116002b,0x21220033,0x212601a0,0x212e0114,0x215b000d,
1.325 + 0x215c0013,0x215d0016,0x215e0019,0x2190008b,0x2191008d,0x2192008c,0x2193008e,0x2194008f,
1.326 + 0x21950090,0x21a80091,0x22020092,0x22060093,0x220f0094,0x22110095,0x221200a1,0x221500a2,
1.327 + 0x221900a3,0x221a00a4,0x221e00a5,0x221f00a6,0x222900a7,0x222b00a8,0x224800a9,0x226100aa,
1.328 + 0x226400ab,0x226500ac,0x230200ad,0x231000ae,0x232000af,0x232100b0,0x250000b1,0x250200b2,
1.329 + 0x250c00b3,0x251000b4,0x251400b5,0x251800b6,0x251c00b7,0x252400b8,0x252c00b9,0x253400ba,
1.330 + 0x253c00bb,0x255000bc,0x255100bd,0x255200be,0x255300bf,0x255400c0,0x255500c1,0x255600c2,
1.331 + 0x255700c3,0x255800c4,0x255900c5,0x255a00c6,0x255b00c7,0x255c00c8,0x255d00c9,0x255e00ca,
1.332 + 0x255f00cb,0x256000cc,0x256100cd,0x256200ce,0x256300cf,0x256400d0,0x256500d1,0x256600d2,
1.333 + 0x256700d3,0x256800d4,0x256900d5,0x256a00d6,0x256b00d7,0x256c00d8,0x258000d9,0x258400da,
1.334 + 0x258800db,0x258c00dc,0x259000dd,0x259100de,0x259200df,0x259300e0,0x25a000e1,0x25a100e2,
1.335 + 0x25aa00e3,0x25ab00e4,0x25ac00e5,0x25b200e6,0x25ba00e7,0x25bc00e8,0x25c400e9,0x25ca00ea,
1.336 + 0x25cb00eb,0x25cf00ec,0x25d800ed,0x25d900ee,0x25e600ef,0x263a00f0,0x263b00f1,0x263c00f2,
1.337 + 0x264000f3,0x264200f4,0x266000f5,0x266300f6,0x266500f7,0x266600f8,0x266a00f9,0x266b00fa,
1.338 + 0xfb01001f,0xfb020021,0xfeff0037,0xfffc00fb,0xfffd00fc,
1.339 + };
1.340 +
1.341 +static const TCollationKeyTable TheStandardTable =
1.342 + { TheKey, TheIndex, 517, NULL, NULL, 0 };
1.343 +
1.344 +const TCollationKeyTable* StandardCollationMethod()
1.345 + {
1.346 + return &TheStandardTable;
1.347 + };
1.348 +
1.349 +inline void Increment(TUint16 const *& aPointer,TBool aNarrow)
1.350 + {
1.351 + aPointer = aNarrow ? (const TUint16*)(((const TUint8*)aPointer) + 1) : aPointer + 1;
1.352 + }
1.353 +
1.354 +/////////////////////////////////////////////////////////////////////////////////////////////////
1.355 +// TCollationValueIterator
1.356 +/////////////////////////////////////////////////////////////////////////////////////////////////
1.357 +
1.358 +/**
1.359 +Initializes TCollationValueIterator object with a new character sequence.
1.360 +@param aSourceIt An iterator used to access the input character (non-normalized or
1.361 + normalized) sequence.
1.362 +@internalComponent
1.363 +*/
1.364 +void TCollationValueIterator::SetSourceIt(TUTF32Iterator& aSourceIt)
1.365 + {
1.366 + iCurrentKeyPos = 0;
1.367 + iKey.iKeys = 0;
1.368 + iDecompStrIt.Set(aSourceIt);
1.369 + }
1.370 +
1.371 +/**
1.372 +Gets current raw key.
1.373 +Note: the method may move the iterator one or more positions forward if there are no produced
1.374 + collation keys.
1.375 +@param aKey A reference to a TCollationKey object, initialized with the
1.376 + current collation key after the call, if there is available key.
1.377 +@return ETrue Successfull call, aKey initialized with the current collation key,
1.378 + EFalse - the iteration has come to the end.
1.379 +@internalComponent
1.380 +*/
1.381 +TBool TCollationValueIterator::GetCurrentKey(TCollationKey& aKey)
1.382 + {
1.383 + ASSERT(iCurrentKeyPos <= iKey.iKeys);
1.384 + if(!ProduceCollationKeys())
1.385 + {
1.386 + return EFalse;
1.387 + }
1.388 + aKey = iKey.iKey[iCurrentKeyPos];
1.389 + return ETrue;
1.390 + }
1.391 +
1.392 +/**
1.393 +Gets current key at the specified level.
1.394 +Note: the method may move the iterator one or more positions forward if there are no produced
1.395 + collation keys.
1.396 +@param aLevel Desired level of the collation key: 0..3
1.397 +@param aKey A reference to TUint32 where the retrieved key will be stored.
1.398 +@return ETrue Success, EFalse - end of the iteration.
1.399 +@internalComponent
1.400 +*/
1.401 +TBool TCollationValueIterator::GetCurrentKey(TInt aLevel, TUint32& aKey)
1.402 + {
1.403 + TCollationKey rawKey;
1.404 + if(GetCurrentKey(rawKey))
1.405 + {
1.406 + //Key values are ignored if their ignore bit is set and the level is less than 3: in other words, the
1.407 + //actual Unicode value is never ignored. This does NOT conform to the system of alternate weightings
1.408 + //described in Unicode Technical Report 10, and will probably have to be changed.
1.409 + aKey = (aLevel < 3 && (rawKey.iLow & TCollationKeyTable::EIgnoreFlag) && !IgnoringNone()) ? 0 : rawKey.Level(aLevel);
1.410 + return ETrue;
1.411 + }
1.412 + return EFalse;
1.413 + }
1.414 +
1.415 +/**
1.416 +The method iterates through the controlled character sequence and tries to find first non-zero
1.417 +corresponding collation key at the specified level.
1.418 +@param aLevel Desired level of the collation key: 0..3
1.419 +@return Non-zero collation key value or 0 if the iteration has come to the end.
1.420 +@internalComponent
1.421 +*/
1.422 +TUint32 TCollationValueIterator::GetNextNonZeroKey(TInt aLevel)
1.423 + {
1.424 + TUint32 key = 0;
1.425 + while(GetCurrentKey(aLevel, key) && key == 0)
1.426 + {
1.427 + Increment();
1.428 + }
1.429 + return key;
1.430 + }
1.431 +
1.432 +/**
1.433 +The method determines wheter the specified as a parameter character matches current iterator's
1.434 +character.
1.435 +If there is a match, the iterator will be moved one position forward.
1.436 +Note: the method may move the iterator one or more positions forward if there are no produced
1.437 + collation keys.
1.438 +@param aMatch The character to compare with the current iterator's character.
1.439 +@return ETrue The characters match, EFalse otherwise (or the iteration has come to the end).
1.440 +@internalComponent
1.441 +*/
1.442 +TBool TCollationValueIterator::MatchChar(TChar aMatch)
1.443 + {
1.444 + TUint32 key;
1.445 + if(GetCurrentKey(3, key))
1.446 + {
1.447 + // Find a match for the quaternary key.. will probably be the unicode value
1.448 + // This is a bit poor.
1.449 + if(aMatch == key)
1.450 + {
1.451 + Increment();
1.452 + return ETrue;
1.453 + }
1.454 + }
1.455 + return EFalse;
1.456 + }
1.457 +
1.458 +/**
1.459 +Note: the method may move the iterator one or more positions forward if there are no produced
1.460 + collation keys.
1.461 +@return The method returns ETrue if the iterator is at a combining character, EFalse otherwise
1.462 + (or the iterator has come to the end)
1.463 +@internalComponent
1.464 +*/
1.465 +TBool TCollationValueIterator::AtCombiningCharacter()
1.466 + {
1.467 + TCollationKey rawKey;
1.468 + if(!GetCurrentKey(rawKey))
1.469 + {
1.470 + return EFalse; // iteration ended
1.471 + }
1.472 + return rawKey.IsStarter() ? (TBool)EFalse : (TBool)ETrue;
1.473 + }
1.474 +
1.475 +/**
1.476 +Skips the following combining characters if they are.
1.477 +Note: the method may move the iterator one or more positions forward.
1.478 +@return The number of skipped combining characters.
1.479 +@internalComponent
1.480 +*/
1.481 +TInt TCollationValueIterator::SkipCombiningCharacters()
1.482 + {
1.483 + TInt count;
1.484 + for(count=0;AtCombiningCharacter();++count)
1.485 + {
1.486 + Increment();
1.487 + }
1.488 + return count;
1.489 + }
1.490 +
1.491 +/**
1.492 +Moves the iterator one step forward making the next collation key available for getting
1.493 +using GetCurrentKey().
1.494 +@return ETrue Successfull call, there is a collation key available.
1.495 + EFalse - the iteration has come to the end.
1.496 +@internalComponent
1.497 +@see TCollationValueIterator::GetCurrentKey()
1.498 +*/
1.499 +TBool TCollationValueIterator::Increment()
1.500 + {
1.501 + ASSERT(iCurrentKeyPos <= iKey.iKeys);
1.502 + if(!ProduceCollationKeys())
1.503 + {
1.504 + return EFalse;
1.505 + }
1.506 + ++iCurrentKeyPos;
1.507 + return ETrue;
1.508 + }
1.509 +
1.510 +/**
1.511 +Returns the position in the underlying string of the iteration,
1.512 +if this is well defined. It is not well defined if either we are
1.513 +half way through keys defined as a string in the collation table
1.514 +or if we are half way through a canonically reordered sequence.
1.515 +@return The position in the underlying string if this is well
1.516 + defined, or 0 if it is not.
1.517 +*/
1.518 +const TText16* TCollationValueIterator::CurrentPositionIfAtCharacter()
1.519 + {
1.520 + if (!ProduceCollationKeys())
1.521 + return iCurrentPosition;
1.522 + return iCurrentKeyPos == 0? iCurrentPosition : 0;
1.523 + }
1.524 +
1.525 +/**
1.526 +Produces the longest possible collation keys sequence using the decomposed character sequence,
1.527 +pointed by iDecompStrIt iterator. But this will happen only if all keys from iKey array are
1.528 +consumed.
1.529 +@return ETrue Successfull call, iKey initialized with the produced collation keys sequence,
1.530 + EFalse - the iteration has come to the end.
1.531 +@internalComponent
1.532 +*/
1.533 +TBool TCollationValueIterator::ProduceCollationKeys()
1.534 + {
1.535 + //iKey.iKeys represents the keys count in iKey array, so load more keys, only if all
1.536 + //collation keys are already consumed.
1.537 + if(iCurrentKeyPos == iKey.iKeys)
1.538 + {
1.539 + iCurrentPosition = iDecompStrIt.CurrentPositionIfAtCharacter();
1.540 + if(iDecompStrIt.AtEnd())
1.541 + {//No more characters in the input decomposed canonical string
1.542 + return EFalse;
1.543 + }
1.544 + //Try to get the next collation key sequence. There should be at least one key.
1.545 + GetNextRawKeySequence();
1.546 + ASSERT(iKey.iKeys > 0);
1.547 + iCurrentKeyPos = 0;
1.548 + }
1.549 + return ETrue;
1.550 + }
1.551 +
1.552 +/**
1.553 +Consume zero or more characters from the input and convert them into zero or more collation keys.
1.554 +@internalComponent
1.555 +*/
1.556 +void TCollationValueIterator::GetNextRawKeySequence()
1.557 + {
1.558 + //Store the first character combining class type for later use.
1.559 + TChar firstChar = iDecompStrIt.Get(0);
1.560 + TBool combining = !::IsBaseCharacter(firstChar);
1.561 + // Initialise.
1.562 + iKey.iCharactersConsumed = 0;
1.563 + iKey.iKeys = 0;
1.564 + // See if the override table has a key for the current collation unit.
1.565 + if(iMethod.iOverrideTable)
1.566 + {
1.567 + GetKeyFromTable(iMethod.iOverrideTable);
1.568 + }
1.569 + // If not, try the main table.
1.570 + if(iKey.iCharactersConsumed == 0)
1.571 + {
1.572 + GetKeyFromTable(iMethod.iMainTable);
1.573 + }
1.574 + //If no key was found use a default value depending on the current character.
1.575 + //For CJK characters:
1.576 + //the Unicode value itself as the primary key and 1 as the secondary and tertiary keys;
1.577 + //the lower 16 bits end up as 0x0105 because the bottom two bits are used for the ignorable bit,
1.578 + //which is clear, and the stop bit, which is set.
1.579 + //For other characters:
1.580 + //Return two keys containing the 21 bits of the character code (anything from 0 to 0x10FFFF), as
1.581 + //explained in Unicode Technical Report 10.
1.582 + if(iKey.iCharactersConsumed == 0)
1.583 + {
1.584 + iKey.iCharactersConsumed = 1;
1.585 + iDecompStrIt.Next(1);
1.586 + iKey.iKeys = ::CreateDefaultCollationKeySequence(firstChar, iKey.iKey);
1.587 + }
1.588 + if(!combining)
1.589 + {
1.590 + iKey.iKey[0].iHigh |= (TUint32)TCollationKey::KFlagIsStarter;
1.591 + }
1.592 + }
1.593 +
1.594 +/**
1.595 +Search for the string aText.
1.596 +Put the key index in aIndex if found, otherwise set aIndex to -1.
1.597 +If the sought string might be a prefix to a key in the table set aPossiblePrefix to TRUE.
1.598 +@internalComponent
1.599 +*/
1.600 +static void GetStringKey(const TCollationKeyTable* aTable,const TText* aText,TInt aLength,
1.601 + TInt& aIndex,TBool& aPossiblePrefix)
1.602 + {
1.603 + aIndex = -1;
1.604 + aPossiblePrefix = EFalse;
1.605 + TInt n = aTable->iStringIndices;
1.606 + const TUint32* base = aTable->iStringIndex;
1.607 + const TUint32* p = base;
1.608 + TInt pivot;
1.609 + while (n > 0)
1.610 + {
1.611 + pivot = n / 2;
1.612 + p += pivot;
1.613 + TUint16 string_index = (TUint16)(*p >> 16);
1.614 + const TText* cur_text = aTable->iString + string_index + 1;
1.615 + TInt cur_length = aTable->iString[string_index];
1.616 + TInt order = TUnicode::Compare(aText,aLength,cur_text,cur_length);
1.617 + if (order == 0) // found it
1.618 + {
1.619 + aIndex = *p & 0xFFFF;
1.620 + aPossiblePrefix = ETrue;
1.621 + break;
1.622 + }
1.623 + if (order < 1 && !aPossiblePrefix)
1.624 + {
1.625 + if (aLength < cur_length && TUnicode::Compare(aText,aLength,cur_text,aLength) == 0)
1.626 + aPossiblePrefix = ETrue;
1.627 + n = pivot;
1.628 + }
1.629 + else
1.630 + {
1.631 + base = p + 1;
1.632 + n -= pivot + 1;
1.633 + }
1.634 + p = base;
1.635 + }
1.636 + }
1.637 +
1.638 +/**
1.639 +Consumes output from iDecompStrIt, produces list of keys in iKey.
1.640 +@param aTable A const pointer to the collation key table used by the method.
1.641 +@internalComponent
1.642 +*/
1.643 +void TCollationValueIterator::GetKeyFromTable(const TCollationKeyTable* aTable)
1.644 + {
1.645 + ASSERT(aTable != NULL);
1.646 + iKey.iCharactersConsumed = 0;
1.647 + iKey.iKeys = 0;
1.648 +
1.649 + TInt cur_char = iDecompStrIt.Get(0);
1.650 +
1.651 + // Find the longest matching string.
1.652 + TInt index = -1;
1.653 + if(aTable->iStringIndices > 0)
1.654 + {
1.655 + TInt moved = 0;
1.656 + TText text[KKeyedStringBufferSize];
1.657 + TInt textLen = 0;
1.658 + if (cur_char <= 0xFFFF)
1.659 + {
1.660 + text[textLen++] = static_cast <TText> (cur_char);
1.661 + }
1.662 + else
1.663 + {
1.664 + text[textLen++] = GetHighSurrogate(cur_char);
1.665 + text[textLen++] = GetLowSurrogate(cur_char);
1.666 + }
1.667 + TBool possible_prefix = ETrue;
1.668 + for(TInt i = 1; (i < KKeyedStringBufferSize) && possible_prefix; i++)
1.669 + {
1.670 + ++moved;
1.671 + TInt c = iDecompStrIt.Get(i);//get the next character
1.672 + if(c == -1)
1.673 + {
1.674 + break;
1.675 + }
1.676 + if (c <= 0xFFFF)
1.677 + {
1.678 + text[textLen++] = static_cast <TText> (c);
1.679 + }
1.680 + else
1.681 + {
1.682 + text[textLen++] = GetHighSurrogate(c);
1.683 + text[textLen++] = GetLowSurrogate(c);
1.684 + }
1.685 + TInt cur_index = -1;
1.686 + ::GetStringKey(aTable, text, textLen, cur_index, possible_prefix);
1.687 + if(cur_index != -1)
1.688 + {
1.689 + index = cur_index;
1.690 + iKey.iCharactersConsumed = i + 1;
1.691 + }
1.692 + }
1.693 + if (iKey.iCharactersConsumed < moved)
1.694 + {
1.695 + moved = 0;
1.696 + }
1.697 + while (moved != iKey.iCharactersConsumed)
1.698 + {
1.699 + ++moved;
1.700 + }
1.701 + if(moved > 0)
1.702 + {
1.703 + iDecompStrIt.Next(moved);//adjust the iterator start position
1.704 + }
1.705 + }
1.706 +
1.707 + // Now search the main index.
1.708 + if(index == -1)
1.709 + {
1.710 + index = ::FindCollationKeyIndex(cur_char, *aTable);
1.711 + if(0 <= index)
1.712 + {
1.713 + iKey.iCharactersConsumed = 1;
1.714 + iDecompStrIt.Next(1);//adjust the iterator start position
1.715 + }
1.716 + }
1.717 +
1.718 + // Fill in the key or keys.
1.719 + if(index != -1)
1.720 + {
1.721 + const TUint32* p = &aTable->iKey[index];
1.722 + TCollationKey* q = iKey.iKey;
1.723 + iKey.iKeys = 0;
1.724 + while(iKey.iKeys < TKeyInfo::EMaxKeys)
1.725 + {
1.726 + q->iLow = *p;
1.727 + q->iHigh = cur_char;
1.728 + iKey.iKeys++;
1.729 + if(*p & 1)
1.730 + {
1.731 + break;
1.732 + }
1.733 + q++;
1.734 + p++;
1.735 + }
1.736 + }
1.737 + }
1.738 +
1.739 +/////////////////////////////////////////////////////////////////////////////////////////////////
1.740 +// TCollate
1.741 +/////////////////////////////////////////////////////////////////////////////////////////////////
1.742 +
1.743 +/**
1.744 +Construct a TCollate object based on the collation method specified
1.745 +within aCharSet, if any. If there is none, or aCharSet is null, the
1.746 +standard collation method will be used.
1.747 +aMask and aFlags provide a method for overriding the flags in the collation method:
1.748 +Each flag set to 1 in aMask is a flag that will be overridden and set to the
1.749 +corresponding flag value in aFlags.
1.750 +Ownership of aCharSet is not passed.
1.751 +@param aCharSet Locale-specific character attribute and collation data
1.752 +@param aMask Provides a method for overriding the flags in the collation method
1.753 +@param aFlags Provides a method for overriding the flags in the collation method
1.754 +@internalComponent
1.755 +*/
1.756 +TCollate::TCollate(const LCharSet* aCharSet, TUint aMask, TUint aFlags)
1.757 + {
1.758 + iMethod.iMainTable = NULL;
1.759 + iMethod.iOverrideTable = NULL;
1.760 + iMethod.iFlags = 0;
1.761 + if (aCharSet && aCharSet->iCollationDataSet && aCharSet->iCollationDataSet->iMethod)
1.762 + {
1.763 + iMethod = aCharSet->iCollationDataSet->iMethod[0];
1.764 + }
1.765 + if (iMethod.iMainTable == NULL)
1.766 + {
1.767 + iMethod.iMainTable = &TheStandardTable;
1.768 + }
1.769 + if (aMask)
1.770 + {
1.771 + iMethod.iFlags &= ~aMask;
1.772 + iMethod.iFlags |= (aMask & aFlags);
1.773 + }
1.774 + }
1.775 +
1.776 +/**
1.777 +Construct a TCollate object based on an already constructed
1.778 +TCollationMethod specified in aMethod.
1.779 +Ownership is not passed.
1.780 +@param aMethod Collation keys table
1.781 +@internalComponent
1.782 +*/
1.783 +TCollate::TCollate(const TCollationMethod& aMethod) :
1.784 + iMethod(aMethod)
1.785 + {
1.786 + if(!iMethod.iMainTable)
1.787 + {
1.788 + iMethod.iMainTable = &TheStandardTable;
1.789 + }
1.790 + }
1.791 +
1.792 +/**
1.793 +Compare the string beginning at aString1 of length aLength1 against the
1.794 +string beginning at aString2 of length aLength2.
1.795 +
1.796 +@param aString1 First string to compare
1.797 +@param aLength1 Length of aString1
1.798 +@param aString2 Second string to compare
1.799 +@param aLength2 Length of aString2
1.800 +@param aMaxLevel Determines the tightness of the collation. At level 0, only
1.801 + character identities are distinguished. At level 1 accents are
1.802 + distinguished as well. At level 2 case is distinguished as well. At
1.803 + level 3 all non canonically equivalent Unicode characters are considered
1.804 + different. By default aMaxLevel is 3.
1.805 +@return EStringsIdentical The strings are identical.
1.806 + ELeftComparesLessAndIsNotPrefix For example: aString1 = "aaa", aString2 = "zzzz".
1.807 + ELeftIsPrefixOfRight For example: aString1 = "abc", aString2 = "abcd".
1.808 + ERightIsPrefixOfLeft For example: aString1 = "abcd", aString2 = "abc".
1.809 + ERightComparesLessAndIsNotPrefix For example: aString1 = "zzzz", aString2 = "aaa".
1.810 +@internalComponent
1.811 +*/
1.812 +TCollate::TComparisonResult TCollate::Compare(const TUint16 *aString1, TInt aLength1,
1.813 + const TUint16 *aString2, TInt aLength2,
1.814 + TInt aMaxLevel) const
1.815 + {
1.816 + TUTF32Iterator itL(aString1, aString1 + aLength1);
1.817 + TUTF32Iterator itR(aString2, aString2 + aLength2);
1.818 + return CompareKeySequences(itL, itR, aMaxLevel, 0, 0);
1.819 + }
1.820 +
1.821 +/**
1.822 +Find the string beginning at aString2 of length aLength2 in the string
1.823 +beginning at aString1 of length aLength1.
1.824 +
1.825 +@param aString1 String to search
1.826 +@param aLength1 Length of aString1
1.827 +@param aString2 String to search for
1.828 +@param aLength2 Length of aString2
1.829 +@param aMaxLevel Determines the tightness of the collation. At level 0, only
1.830 + character identities are distinguished. At level 1 accents are
1.831 + distinguished as well. At level 2 case is distinguishes as well. At
1.832 + level 3 all valid different Unicode characters are considered different.
1.833 +@param aString2WildChar Wild card character which may be specified for aString2. By default
1.834 + wild card character is not specified and not used.
1.835 +@return KErrNotFound aString2 not found in aString1.
1.836 + Non-negative value telling the position in aString1 where the first occurrence of
1.837 + aString2 was found.
1.838 +@internalComponent
1.839 +*/
1.840 +TInt TCollate::Find(const TUint16 *aString1, TInt aLength1,
1.841 + const TUint16 *aString2, TInt aLength2,
1.842 + TInt aMaxLevel, TUint aString2WildChar) const
1.843 + {
1.844 + TInt dummy(0);
1.845 + return Find(aString1, aLength1, aString2,aLength2, dummy, aMaxLevel,aString2WildChar );
1.846 + }
1.847 +
1.848 +/**
1.849 +Find the string beginning at aString2 of length aLength2 in the string
1.850 +beginning at aString1 of length aLength1.
1.851 +
1.852 +@param aString1 String to search
1.853 +@param aLength1 Length of aString1
1.854 +@param aString2 String to search for
1.855 +@param aLength2 Length of aString2
1.856 +@param aLengthFound A refernce to the length of the match found in the candidate string
1.857 +@param aMaxLevel Determines the tightness of the collation. At level 0, only
1.858 + character identities are distinguished. At level 1 accents are
1.859 + distinguished as well. At level 2 case is distinguishes as well. At
1.860 + level 3 all valid different Unicode characters are considered different.
1.861 +@param aString2WildChar Wild card character which may be specified for aString2. By default
1.862 + wild card character is not specified and not used.
1.863 +@return KErrNotFound aString2 not found in aString1.
1.864 + Non-negative value telling the position in aString1 where the first occurrence of
1.865 + aString2 was found.
1.866 +@internalComponent
1.867 +*/
1.868 +TInt TCollate::Find(const TUint16 *aString1, TInt aLength1,
1.869 + const TUint16 *aString2, TInt aLength2,
1.870 + TInt &aLengthFound, TInt aMaxLevel, TUint aString2WildChar) const
1.871 + {
1.872 + TUTF32Iterator itL(aString1, aString1 + aLength1);
1.873 + TUTF32Iterator itR(aString2, aString2 + aLength2);
1.874 + return FindKeySequence(itL, itR, aMaxLevel, aString2WildChar, 0, aLengthFound);
1.875 + }
1.876 +
1.877 +/**
1.878 +Match the pattern defined by aSearchTerm with aCandidate.
1.879 +Return the index in aCandidate of the start of the first pattern matched -
1.880 +that is, the first character in aSearchTerm after all wild-sequence characters
1.881 +have been matched. Return KErrNotFound if there is no match.
1.882 +
1.883 +For example, if aCandidate is "abcdefghijkl", the following values of aSearchTerm yield the
1.884 +following results:
1.885 +"abc*" gives 0
1.886 +"abc" gives KErrNotFound
1.887 +"xyz" gives KErrNotFound
1.888 +"*def" gives KErrNotFound
1.889 +"*def*" gives 3
1.890 +"*d?f*" gives 3
1.891 +"a*kl" gives 0
1.892 +"*d*kl" gives 4
1.893 +
1.894 +To match a pattern anywhere in aCandidate, aSearchTerm must both start and end
1.895 +with aString2WildSequenceChar
1.896 +
1.897 +@param aCandidate String to search
1.898 +@param aCandidateLength Length of aCandidate
1.899 +@param aSearchTerm String to search for
1.900 +@param aSearchTermLength Length of aSearchTerm
1.901 +@param aMaxLevel Determines the tightness of the collation. At level 0, only
1.902 + character identities are distinguished. At level 1 accents are
1.903 + distinguished as well. At level 2 case is distinguishes as well. At
1.904 + level 3 all valid different Unicode characters are considered different.
1.905 +@param aWildChar Wild card character which may be specified for aSearchTerm. By default
1.906 + the wild card character used is '?'.
1.907 +@param aWildSequenceChar Wild card sequence character which may be specified for aSearchTerm.
1.908 + Its default value is '*'.
1.909 +@param aEscapeChar Escape character. If it is non-zero and precdes aWildChar and aWildSequenceChar characters in
1.910 + aCandidate string, then these characters should be treated as normal characters.
1.911 +@return The index in aCandidate of the start of the first pattern matched.
1.912 +
1.913 +@internalComponent.
1.914 +*/
1.915 +TInt TCollate::Match(const TUint16 *aCandidate, TInt aCandidateLength,
1.916 + const TUint16 *aSearchTerm,TInt aSearchTermLength,
1.917 + TInt aMaxLevel, TUint aWildChar, TUint aWildSequenceChar,
1.918 + TUint aEscapeChar) const
1.919 + {
1.920 + ASSERT(0 <= aSearchTermLength);
1.921 + ASSERT(0 <= aCandidateLength);
1.922 +
1.923 + if(aMaxLevel == 3 && (iMethod.iFlags & TCollationMethod::EFoldCase))
1.924 + {
1.925 + aMaxLevel = 2;
1.926 + }
1.927 +
1.928 + TUTF32Iterator candidate(aCandidate, aCandidate + aCandidateLength);
1.929 + TUTF32Iterator searchTerm(aSearchTerm, aSearchTerm + aSearchTermLength);
1.930 +
1.931 + TInt firstMatch = KErrNotFound;
1.932 + TInt segEnd = ::FindCharacter(aWildSequenceChar, aEscapeChar, aSearchTerm, aSearchTermLength);
1.933 +
1.934 + // Is there any prefix that the candidate string must have?
1.935 + // aSearchTerm looks like "abc*...". Then segEnd will be 3 (the position of '*').
1.936 + // Check that aCandidate begins with "abc" too.
1.937 + if(segEnd != 0 || aSearchTermLength == 0)
1.938 + {
1.939 + searchTerm = TUTF32Iterator(aSearchTerm, aSearchTerm + segEnd);
1.940 + TComparisonResult order = CompareKeySequences(candidate, searchTerm, aMaxLevel, aWildChar, aEscapeChar);
1.941 + if(order != ERightIsPrefixOfLeft && order != EStringsIdentical)
1.942 + {
1.943 + return KErrNotFound;
1.944 + }
1.945 + if(aSearchTermLength == segEnd)
1.946 + {
1.947 + return order == EStringsIdentical ? 0 : KErrNotFound;
1.948 + }
1.949 + firstMatch = 0;
1.950 + }
1.951 +
1.952 + // search for all remaining segments
1.953 + // For example: aSearchTerm = "abc*def*ghi", aCandidate = "abc...".
1.954 + // aCandidate was already searched for "abc" and segEnd = 3.
1.955 + // Search aCandidate for the remaining segments: "def" and "ghi".
1.956 + while(aSearchTermLength != (segEnd + 1))
1.957 + {
1.958 + ++segEnd;
1.959 + aSearchTermLength -= segEnd;
1.960 + aSearchTerm += segEnd;
1.961 + segEnd = ::FindCharacter(aWildSequenceChar, aEscapeChar, aSearchTerm, aSearchTermLength);
1.962 + searchTerm = TUTF32Iterator(aSearchTerm, aSearchTerm + segEnd);//searchTerm holds the next aSearchTerm segment
1.963 + //We will store here the current position of candidate string.
1.964 + const TUint16* candidateCurrentPos = candidate.CurrentPosition();
1.965 + TInt dummy(0);
1.966 + TInt match = FindKeySequence(candidate, searchTerm, aMaxLevel, aWildChar, aEscapeChar, dummy);
1.967 + if (match < 0)
1.968 + {
1.969 + return KErrNotFound;
1.970 + }
1.971 + if (aSearchTermLength == segEnd)
1.972 + {
1.973 + candidate.SetStart(candidateCurrentPos + match);
1.974 + TComparisonResult order = CompareKeySequences(candidate, searchTerm, aMaxLevel, aWildChar, aEscapeChar);
1.975 + if (order == EStringsIdentical)
1.976 + return firstMatch < 0 ? (match + candidateCurrentPos - aCandidate): firstMatch;
1.977 + while (match >= 0)
1.978 + {
1.979 + // We are at the very end of the search term, so this segment must
1.980 + // match the end of the candidate string.
1.981 + candidate.SetStart(candidateCurrentPos + match + 1);
1.982 + candidateCurrentPos = candidate.CurrentPosition();
1.983 + match = FindKeySequence(candidate, searchTerm, aMaxLevel, aWildChar, aEscapeChar, dummy);
1.984 + candidate.SetStart(candidateCurrentPos + match);
1.985 + order = CompareKeySequences(candidate, searchTerm, aMaxLevel, aWildChar, aEscapeChar);
1.986 + if (order == EStringsIdentical)
1.987 + return firstMatch < 0 ? (match + candidateCurrentPos - aCandidate): firstMatch;
1.988 + }
1.989 + return KErrNotFound;
1.990 + }
1.991 + //Initialize the first match position, if not initialized yet
1.992 + if (firstMatch < 0 && segEnd != 0)
1.993 + {
1.994 + firstMatch = match;
1.995 + }
1.996 + }
1.997 + return firstMatch < 0 ? aCandidateLength : firstMatch;
1.998 + }
1.999 +
1.1000 +/**
1.1001 +Compare values output from the iterators. After the comparison, if
1.1002 +ERightIsPrefixOfLeft or EStringsIdentical is returned, then aLeft
1.1003 +will be pointing at the next character (at MaxLevel) after the match.
1.1004 +If right is shown to be a prefix of left, this means that it has been
1.1005 +checked at all requested levels. If it is reported that the right is a
1.1006 +prefix of the left, then this will mean also that there are no unmatched
1.1007 +combining characters on the left.
1.1008 +
1.1009 +@internalComponent
1.1010 +*/
1.1011 +TCollate::TComparisonResult TCollate::CompareKeySequences(TUTF32Iterator& aLeft, TUTF32Iterator& aRight,
1.1012 + TInt aMaxLevel, TInt aRightStringWildChar, TInt aEscapeChar) const
1.1013 + {
1.1014 + // Clamp the maximum level of the comparison.
1.1015 + if(aMaxLevel < 0)
1.1016 + {
1.1017 + aMaxLevel = 0;
1.1018 + }
1.1019 + if(aMaxLevel > 3)
1.1020 + {
1.1021 + aMaxLevel = 3;
1.1022 + }
1.1023 + //Case folding forces the maximum level to 2. Case folding could only be done at level 3, which
1.1024 + //makes use of the actual Unicode values, if we had access to a case conversion table appropriate for
1.1025 + //the collation method.
1.1026 + if(aMaxLevel == 3 && (iMethod.iFlags & TCollationMethod::EFoldCase))
1.1027 + {
1.1028 + aMaxLevel = 2;
1.1029 + }
1.1030 + TCollationValueIterator itL(iMethod);
1.1031 + TCollationValueIterator itR(iMethod);
1.1032 + // Perform the comparison.
1.1033 + TComparisonResult order = EStringsIdentical;
1.1034 + TComparisonResult accumulatedOrder = EStringsIdentical;
1.1035 + const TText16* endOfLeft = 0;
1.1036 + for (int cur_level = 0; cur_level <= aMaxLevel; cur_level++)
1.1037 + {
1.1038 + itL.SetSourceIt(aLeft);
1.1039 + itR.SetSourceIt(aRight);
1.1040 +
1.1041 + for (;;)
1.1042 + {
1.1043 + TUint32 c2 = itR.GetNextNonZeroKey(cur_level);
1.1044 + if (c2 == 0)
1.1045 + {
1.1046 + TUint32 more = itL.GetNextNonZeroKey(cur_level);
1.1047 + if (cur_level == 0)
1.1048 + endOfLeft = itL.CurrentPositionIfAtCharacter();
1.1049 + if (more == 0)
1.1050 + {//No non-zero keys at all
1.1051 + order = EStringsIdentical;
1.1052 + }
1.1053 + else if (!(TCollationMethod::EIgnoreCombining & iMethod.iFlags)
1.1054 + && itL.AtCombiningCharacter())
1.1055 + {
1.1056 + order = ERightComparesLessAndIsNotPrefix;
1.1057 + }
1.1058 + else
1.1059 + {
1.1060 + order = ERightIsPrefixOfLeft;
1.1061 + }
1.1062 + break;
1.1063 + }
1.1064 + TUint32 c1 = itL.GetNextNonZeroKey(cur_level);
1.1065 + if (c1 == 0)
1.1066 + {
1.1067 + order = ELeftIsPrefixOfRight;
1.1068 + break;
1.1069 + }
1.1070 +
1.1071 + itL.Increment();
1.1072 + if(cur_level == 0 && aEscapeChar != 0 && itR.MatchChar(aEscapeChar))
1.1073 + {//Escape character found. Get the next key.
1.1074 + c2 = itR.GetNextNonZeroKey(cur_level);
1.1075 + itR.Increment();
1.1076 + }
1.1077 + else
1.1078 + {
1.1079 + if(aRightStringWildChar && itR.MatchChar(aRightStringWildChar))
1.1080 + {
1.1081 + itL.SkipCombiningCharacters();
1.1082 + itR.SkipCombiningCharacters();
1.1083 + c1 = c2;
1.1084 + }
1.1085 + else
1.1086 + {
1.1087 + itR.Increment();
1.1088 + }
1.1089 + }
1.1090 +
1.1091 + // Has an order been determined by key difference?
1.1092 + if (c1 != c2)
1.1093 + {
1.1094 + // Fold to lower case, or switch ordering for case or kana syllabary if necessary.
1.1095 + if (cur_level == 2 && (c1 <= (0x14 * 4) && c2 <= (0x14 * 4)))
1.1096 + {
1.1097 + // Divide keys by 4 to get them back into the range 0..63
1.1098 + // because keys returned by GetKey are masked but not shifted.
1.1099 + c1 /= 4;
1.1100 + c2 /= 4;
1.1101 + ProcessKeys(c1, c2, iMethod.iFlags);
1.1102 + }
1.1103 + if (c1 != c2) // test equality again because case folding might have made them equal
1.1104 + {
1.1105 + order = c1 > c2 ? ERightComparesLessAndIsNotPrefix : ELeftComparesLessAndIsNotPrefix;
1.1106 + TBool backwards = cur_level == 1 && (iMethod.iFlags & TCollationMethod::EAccentsBackwards);
1.1107 + if (order && !backwards)
1.1108 + {
1.1109 + break;
1.1110 + }
1.1111 + }
1.1112 + }
1.1113 + }
1.1114 + if (accumulatedOrder != order && order != EStringsIdentical)
1.1115 + {
1.1116 + if (accumulatedOrder == ERightIsPrefixOfLeft)
1.1117 + {
1.1118 + return ERightComparesLessAndIsNotPrefix;
1.1119 + }
1.1120 + else if (accumulatedOrder == ELeftIsPrefixOfRight)
1.1121 + {
1.1122 + return ELeftComparesLessAndIsNotPrefix;
1.1123 + }
1.1124 + else
1.1125 + {
1.1126 + // accumulatedOrder == EStringsIdentical
1.1127 + if (order == ELeftComparesLessAndIsNotPrefix || order == ERightComparesLessAndIsNotPrefix)
1.1128 + {
1.1129 + return order;
1.1130 + }
1.1131 + }
1.1132 + accumulatedOrder = order;
1.1133 + }
1.1134 + }
1.1135 +
1.1136 + if (accumulatedOrder == EStringsIdentical || accumulatedOrder == ERightIsPrefixOfLeft)
1.1137 + {
1.1138 + if (endOfLeft)
1.1139 + {
1.1140 + aLeft.SetStart(endOfLeft);
1.1141 + }
1.1142 + else if (accumulatedOrder == ERightIsPrefixOfLeft)
1.1143 + {
1.1144 + accumulatedOrder = ERightComparesLessAndIsNotPrefix;
1.1145 + }
1.1146 + }
1.1147 + return accumulatedOrder;
1.1148 + }
1.1149 +
1.1150 +/**
1.1151 +Finds search term inside candidate string. Returns KErrNotFound if there
1.1152 +is no match, returns the offset into the candidate string at which the
1.1153 +search term was found. If a string was found, the search term iterator is left
1.1154 +pointing at the end of the search term, and the candidate iterator is
1.1155 +left pointing just after the matched keys. aMatchPos returns where in
1.1156 +the candidate string the match was found.
1.1157 +
1.1158 +@internalComponent
1.1159 +*/
1.1160 +TInt TCollate::FindKeySequence(TUTF32Iterator& aCandidate, TUTF32Iterator& aSearchTerm,
1.1161 + TInt aMaxLevel, TInt aWildChar, TInt aEscapeChar, TInt& aLengthFound) const
1.1162 + {
1.1163 + TInt matchOffset = 0;
1.1164 + //Save the start of the candidate string
1.1165 + const TText* candidateStart = aCandidate.CurrentPosition();
1.1166 + //Create copies of aCandidate and aSearchTerm
1.1167 + TUTF32Iterator candidateCopy(aCandidate);
1.1168 + TUTF32Iterator searchTermCopy(aSearchTerm);
1.1169 + aLengthFound = KErrNotFound;
1.1170 + //Do the search
1.1171 + for(;;)
1.1172 + {
1.1173 + TComparisonResult order = CompareKeySequences(aCandidate, aSearchTerm, aMaxLevel, aWildChar, aEscapeChar);
1.1174 + if(order == ELeftIsPrefixOfRight)
1.1175 + {
1.1176 + return KErrNotFound;
1.1177 + }
1.1178 + if(order == ERightIsPrefixOfLeft || order == EStringsIdentical)
1.1179 + {
1.1180 + aLengthFound = (aCandidate.CurrentPosition() - candidateStart) - matchOffset;
1.1181 + return matchOffset;
1.1182 + }
1.1183 +
1.1184 + aCandidate = candidateCopy;
1.1185 + aCandidate.Next();
1.1186 + ::SkipCombiningCharacters(aCandidate);
1.1187 + candidateCopy = aCandidate;
1.1188 +
1.1189 + matchOffset = aCandidate.CurrentPosition() - candidateStart;
1.1190 +
1.1191 + aSearchTerm = searchTermCopy;
1.1192 + }
1.1193 + }