sl@0
|
1 |
// Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
|
sl@0
|
2 |
// All rights reserved.
|
sl@0
|
3 |
// This component and the accompanying materials are made available
|
sl@0
|
4 |
// under the terms of the License "Eclipse Public License v1.0"
|
sl@0
|
5 |
// which accompanies this distribution, and is available
|
sl@0
|
6 |
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
|
sl@0
|
7 |
//
|
sl@0
|
8 |
// Initial Contributors:
|
sl@0
|
9 |
// Nokia Corporation - initial contribution.
|
sl@0
|
10 |
//
|
sl@0
|
11 |
// Contributors:
|
sl@0
|
12 |
//
|
sl@0
|
13 |
// Description:
|
sl@0
|
14 |
// The Unicode collation system.
|
sl@0
|
15 |
//
|
sl@0
|
16 |
//
|
sl@0
|
17 |
|
sl@0
|
18 |
#include "collateimp.h"
|
sl@0
|
19 |
#include "foldtable.inl"
|
sl@0
|
20 |
|
sl@0
|
21 |
#include <collate.h>
|
sl@0
|
22 |
#include <unicode.h>
|
sl@0
|
23 |
#include "u32std.h"
|
sl@0
|
24 |
|
sl@0
|
25 |
// maximum size of string which has its own sort key
|
sl@0
|
26 |
// 16 instead of 8, in case all supplementary characters
|
sl@0
|
27 |
static const TInt KKeyedStringBufferSize = 16;
|
sl@0
|
28 |
|
sl@0
|
29 |
|
sl@0
|
30 |
inline TText16 GetHighSurrogate(TUint aChar)
|
sl@0
|
31 |
/**
|
sl@0
|
32 |
Retrieve the high surrogate of a supplementary character.
|
sl@0
|
33 |
|
sl@0
|
34 |
@param aChar The 32-bit code point value of a Unicode character.
|
sl@0
|
35 |
|
sl@0
|
36 |
@return High surrogate of aChar, if aChar is a supplementary character;
|
sl@0
|
37 |
aChar itself, if aChar is not a supplementary character.
|
sl@0
|
38 |
*/
|
sl@0
|
39 |
{
|
sl@0
|
40 |
return STATIC_CAST(TText16, 0xD7C0 + (aChar >> 10));
|
sl@0
|
41 |
}
|
sl@0
|
42 |
|
sl@0
|
43 |
inline TText16 GetLowSurrogate(TUint aChar)
|
sl@0
|
44 |
/**
|
sl@0
|
45 |
Retrieve the low surrogate of a supplementary character.
|
sl@0
|
46 |
|
sl@0
|
47 |
@param aChar The 32-bit code point value of a Unicode character.
|
sl@0
|
48 |
|
sl@0
|
49 |
@return Low surrogate of aChar, if aChar is a supplementary character;
|
sl@0
|
50 |
zero, if aChar is not a supplementary character.
|
sl@0
|
51 |
*/
|
sl@0
|
52 |
{
|
sl@0
|
53 |
return STATIC_CAST(TText16, 0xDC00 | (aChar & 0x3FF));
|
sl@0
|
54 |
}
|
sl@0
|
55 |
|
sl@0
|
56 |
inline TUint JoinSurrogate(TText16 aHighSurrogate, TText16 aLowSurrogate)
|
sl@0
|
57 |
/**
|
sl@0
|
58 |
Combine a high surrogate and a low surrogate into a supplementary character.
|
sl@0
|
59 |
|
sl@0
|
60 |
@return The 32-bit code point value of the generated Unicode supplementary
|
sl@0
|
61 |
character.
|
sl@0
|
62 |
*/
|
sl@0
|
63 |
{
|
sl@0
|
64 |
return ((aHighSurrogate - 0xD7F7) << 10) + aLowSurrogate;
|
sl@0
|
65 |
}
|
sl@0
|
66 |
|
sl@0
|
67 |
// Creates a one or two collation keys sequence corresponding to the input character.
|
sl@0
|
68 |
// Returns the number of keys output.
|
sl@0
|
69 |
static TInt CreateDefaultCollationKeySequence(TInt aChar, TCollationKey* aBuffer)
|
sl@0
|
70 |
{
|
sl@0
|
71 |
if (aChar >= 0x3400 && aChar <= 0x9FFF) // CJK
|
sl@0
|
72 |
{
|
sl@0
|
73 |
aBuffer[0].iLow = (TUint32)aChar << 16 | 0x0105;
|
sl@0
|
74 |
aBuffer[0].iHigh = aChar;
|
sl@0
|
75 |
return 1;//Collation key sequence consists of 1 key
|
sl@0
|
76 |
}
|
sl@0
|
77 |
aBuffer[0].iLow = 0xFF800000 | ((aChar << 1) & 0x3F0000) | 0x0104; // no stop bit
|
sl@0
|
78 |
aBuffer[0].iHigh = 1;
|
sl@0
|
79 |
aBuffer[1].iLow = (aChar << 16) | 0x80000105; // stop bit
|
sl@0
|
80 |
aBuffer[1].iHigh = 0;
|
sl@0
|
81 |
return 2;//Collation key sequence consists of 2 keys
|
sl@0
|
82 |
}
|
sl@0
|
83 |
|
sl@0
|
84 |
// Finds a character's key in the main index, or returns -1 if it is not there
|
sl@0
|
85 |
static TInt FindCollationKeyIndex(TInt aChar, const TCollationKeyTable& aTable)
|
sl@0
|
86 |
{
|
sl@0
|
87 |
TInt n = aTable.iIndices;
|
sl@0
|
88 |
const TUint32 *base = aTable.iIndex;
|
sl@0
|
89 |
const TUint32 *start = base;
|
sl@0
|
90 |
const TUint32 *end = aTable.iIndex + n - 1;
|
sl@0
|
91 |
const TUint32 *p = base;
|
sl@0
|
92 |
TInt currentCharLength = 0;
|
sl@0
|
93 |
|
sl@0
|
94 |
while (n > 0)
|
sl@0
|
95 |
{
|
sl@0
|
96 |
TInt pivot = n / 2;
|
sl@0
|
97 |
p += pivot;
|
sl@0
|
98 |
if ((p < start) || (p > end))
|
sl@0
|
99 |
{
|
sl@0
|
100 |
break;
|
sl@0
|
101 |
}
|
sl@0
|
102 |
TInt c = *p >> 16;
|
sl@0
|
103 |
if (IsHighSurrogate( (TText16)c ))
|
sl@0
|
104 |
{
|
sl@0
|
105 |
if ((p < end) && (IsLowSurrogate( (TText16)((*(p+1))>>16) )))
|
sl@0
|
106 |
{
|
sl@0
|
107 |
currentCharLength = 2;
|
sl@0
|
108 |
c = JoinSurrogate( (TText16)(*p>>16), (TText16)((*(p+1))>>16) );
|
sl@0
|
109 |
}
|
sl@0
|
110 |
}
|
sl@0
|
111 |
else if (IsLowSurrogate( (TText16)c ))
|
sl@0
|
112 |
{
|
sl@0
|
113 |
if ((p > start) && (IsHighSurrogate( (TText16)((*(p-1))>>16) )))
|
sl@0
|
114 |
{
|
sl@0
|
115 |
p--;
|
sl@0
|
116 |
pivot = pivot - 1;
|
sl@0
|
117 |
currentCharLength = 2;
|
sl@0
|
118 |
c = JoinSurrogate( (TText16)(*p>>16), (TText16)((*(p+1))>>16) );
|
sl@0
|
119 |
}
|
sl@0
|
120 |
}
|
sl@0
|
121 |
else
|
sl@0
|
122 |
{
|
sl@0
|
123 |
currentCharLength = 1;
|
sl@0
|
124 |
}
|
sl@0
|
125 |
if (aChar == c) // found it
|
sl@0
|
126 |
{
|
sl@0
|
127 |
return *p & 0xFFFF;
|
sl@0
|
128 |
}
|
sl@0
|
129 |
if (aChar < c) // it's before
|
sl@0
|
130 |
{
|
sl@0
|
131 |
n = pivot;
|
sl@0
|
132 |
}
|
sl@0
|
133 |
else // it's after
|
sl@0
|
134 |
{
|
sl@0
|
135 |
ASSERT(currentCharLength != 0);
|
sl@0
|
136 |
base = p + currentCharLength;
|
sl@0
|
137 |
n -= pivot + currentCharLength;
|
sl@0
|
138 |
}
|
sl@0
|
139 |
p = base;
|
sl@0
|
140 |
}
|
sl@0
|
141 |
return -1;
|
sl@0
|
142 |
}
|
sl@0
|
143 |
|
sl@0
|
144 |
static void ProcessKeys(TUint32& aKey1, TUint32& aKey2, TUint aFlags)
|
sl@0
|
145 |
{
|
sl@0
|
146 |
if (aFlags & TCollationMethod::EFoldCase)
|
sl@0
|
147 |
{
|
sl@0
|
148 |
static const TUint case_fold_table[21] =
|
sl@0
|
149 |
{ 0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x2, 0x3, 0x4, 0x5, 0x6,
|
sl@0
|
150 |
0xD, 0xE, 0xF, 0x10, 0x11, 0x12, 0x13, 0x14 };
|
sl@0
|
151 |
aKey1 = case_fold_table[aKey1];
|
sl@0
|
152 |
aKey2 = case_fold_table[aKey2];
|
sl@0
|
153 |
}
|
sl@0
|
154 |
if (aFlags & TCollationMethod::ESwapCase)
|
sl@0
|
155 |
{
|
sl@0
|
156 |
static const TUint case_swap_table[21] =
|
sl@0
|
157 |
{ 0, 0x1, 0x8, 0x9, 0xA, 0xB, 0xC, 0x7, 0x2, 0x3, 0x4, 0x5, 0x6,
|
sl@0
|
158 |
0xD, 0xE, 0xF, 0x10, 0x11, 0x12, 0x13, 0x14 };
|
sl@0
|
159 |
aKey1 = case_swap_table[aKey1];
|
sl@0
|
160 |
aKey2 = case_swap_table[aKey2];
|
sl@0
|
161 |
}
|
sl@0
|
162 |
if (aFlags & TCollationMethod::ESwapKana)
|
sl@0
|
163 |
{
|
sl@0
|
164 |
static const TUint kana_swap_table[21] =
|
sl@0
|
165 |
{ 0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC,
|
sl@0
|
166 |
0x13, 0x14, 0xD, 0xE, 0xF, 0x10, 0x11, 0x12 };
|
sl@0
|
167 |
aKey1 = kana_swap_table[aKey1];
|
sl@0
|
168 |
aKey2 = kana_swap_table[aKey2];
|
sl@0
|
169 |
}
|
sl@0
|
170 |
}
|
sl@0
|
171 |
|
sl@0
|
172 |
// Returns the position of the character in the string, or aLength if it is not present.
|
sl@0
|
173 |
// If aChar is found but it is preceded by aEscapeChar (aEscapeChar != 0), then the search continues.
|
sl@0
|
174 |
static TInt FindCharacter(TInt aChar, TInt aEscapeChar, const TUint16* aString, TInt aLength)
|
sl@0
|
175 |
{
|
sl@0
|
176 |
TBool isEscaped = EFalse;
|
sl@0
|
177 |
for(TInt pos=0;pos!=aLength;++pos,++aString)
|
sl@0
|
178 |
{
|
sl@0
|
179 |
if(isEscaped)
|
sl@0
|
180 |
{
|
sl@0
|
181 |
isEscaped = EFalse;
|
sl@0
|
182 |
}
|
sl@0
|
183 |
else if(*aString == aEscapeChar)
|
sl@0
|
184 |
{
|
sl@0
|
185 |
isEscaped = ETrue;
|
sl@0
|
186 |
}
|
sl@0
|
187 |
else if(*aString == aChar)
|
sl@0
|
188 |
{
|
sl@0
|
189 |
if(!isEscaped)
|
sl@0
|
190 |
{
|
sl@0
|
191 |
return pos;
|
sl@0
|
192 |
}
|
sl@0
|
193 |
}
|
sl@0
|
194 |
}
|
sl@0
|
195 |
return aLength;
|
sl@0
|
196 |
}
|
sl@0
|
197 |
|
sl@0
|
198 |
/*
|
sl@0
|
199 |
The standard collation data, containing keys for all the WGL4 characters, plus
|
sl@0
|
200 |
commonly-used control characters and spaces. Generated by COLTAB.
|
sl@0
|
201 |
*/
|
sl@0
|
202 |
static const TUint32 TheKey[] =
|
sl@0
|
203 |
{
|
sl@0
|
204 |
0x21e0112,0x21e0113,0x2260112,0x2260112,0x2260113,0x2740112,0x2740113,0x6c60178,
|
sl@0
|
205 |
0x266017a,0x6c70179,0x6c60178,0x266017a,0x6c90179,0x6c60178,0x266017a,0x6cd0179,
|
sl@0
|
206 |
0x6c80178,0x266017a,0x6c90179,0x6c80178,0x266017a,0x6cd0179,0x6ca0178,0x266017a,
|
sl@0
|
207 |
0x6cd0179,0x6cc0178,0x266017a,0x6cd0179,0x6f70110,0x2650112,0x8050111,0x74b0110,
|
sl@0
|
208 |
0x78d0111,0x74b0110,0x7bd0111,0x78d0110,0x7a10111,0x78d0128,0x7a10129,0x7bd0110,
|
sl@0
|
209 |
0x2290113,0x7bd0128,0x2290113,0x7ed0128,0x8050111,0x805dd10,0x71f0111,0x805dd28,
|
sl@0
|
210 |
0x71f0129,0x85ddd10,0x85d0111,0x8750150,0x7e50151,0x9060110,0x7ed0111,0x3,
|
sl@0
|
211 |
0x201010b,0x202010b,0x203010b,0x204010b,0x205010b,0x206010b,0x207010b,0x208010b,
|
sl@0
|
212 |
0x209010b,0x2090113,0x209016f,0x209020b,0x209130b,0x209160b,0x209180b,0x2091d0b,
|
sl@0
|
213 |
0x209240b,0x209280b,0x2092a0b,0x2092f0b,0x209330b,0x209360b,0x209390b,0x2093b0b,
|
sl@0
|
214 |
0x2093f0b,0x2096b0b,0x20b010b,0x20c010b,0x20d010b,0x20d016f,0x20e010b,0x20f010b,
|
sl@0
|
215 |
0x210010b,0x211010b,0x214010b,0x21a010b,0x21c010b,0x21e010b,0x21f010b,0x221010b,
|
sl@0
|
216 |
0x222010b,0x226010b,0x229010b,0x22d010b,0x22e010b,0x22f010b,0x230010b,0x231010b,
|
sl@0
|
217 |
0x232010b,0x233010b,0x234010b,0x235010b,0x236010b,0x237010b,0x23c010b,0x23d010b,
|
sl@0
|
218 |
0x23e010b,0x23f010b,0x240010b,0x241010b,0x242010b,0x243010b,0x25e010b,0x25f010b,
|
sl@0
|
219 |
0x260010b,0x261010b,0x262010b,0x263010b,0x265010b,0x266010b,0x267010b,0x268010b,
|
sl@0
|
220 |
0x269010b,0x26a010b,0x26c010b,0x26e010b,0x26f010b,0x270010b,0x274010b,0x2ac010b,
|
sl@0
|
221 |
0x2ad010b,0x2af010b,0x2d6010b,0x2ff010b,0x300010b,0x301010b,0x302010b,0x303010b,
|
sl@0
|
222 |
0x304010b,0x317010b,0x35c010b,0x35f010b,0x366010b,0x368010b,0x369010b,0x36a010b,
|
sl@0
|
223 |
0x36b010b,0x36c010b,0x36d010b,0x36e010b,0x36f010b,0x370010b,0x371010b,0x372010b,
|
sl@0
|
224 |
0x374010b,0x375010b,0x378010b,0x37c010b,0x37d010b,0x381010b,0x382010b,0x38a010b,
|
sl@0
|
225 |
0x38c010b,0x3a2010b,0x3b9010b,0x3bb010b,0x3bc010b,0x42f010b,0x43d010b,0x44d010b,
|
sl@0
|
226 |
0x44e010b,0x4d6010b,0x4d8010b,0x4e2010b,0x4e6010b,0x4ea010b,0x4ee010b,0x4f2010b,
|
sl@0
|
227 |
0x4fa010b,0x502010b,0x50a010b,0x512010b,0x526010b,0x527010b,0x528010b,0x529010b,
|
sl@0
|
228 |
0x52a010b,0x52b010b,0x52c010b,0x52d010b,0x52e010b,0x52f010b,0x530010b,0x531010b,
|
sl@0
|
229 |
0x532010b,0x533010b,0x534010b,0x535010b,0x536010b,0x537010b,0x538010b,0x539010b,
|
sl@0
|
230 |
0x53a010b,0x53b010b,0x53c010b,0x53d010b,0x53e010b,0x53f010b,0x540010b,0x541010b,
|
sl@0
|
231 |
0x542010b,0x556010b,0x55a010b,0x55e010b,0x562010b,0x566010b,0x567010b,0x568010b,
|
sl@0
|
232 |
0x569010b,0x56c010b,0x56d010b,0x576010b,0x577010b,0x578010b,0x57e010b,0x586010b,
|
sl@0
|
233 |
0x588010b,0x590010b,0x596010b,0x597010b,0x59b010b,0x5a4010b,0x5a5010b,0x5b2010b,
|
sl@0
|
234 |
0x5f0010b,0x5f1010b,0x5f2010b,0x5f6010b,0x5f8010b,0x616010b,0x619010b,0x61b010b,
|
sl@0
|
235 |
0x61c010b,0x620010b,0x621010b,0x6b4010b,0x6b5010b,0x1309,0x1609,0x1809,
|
sl@0
|
236 |
0x1d09,0x2209,0x2409,0x2809,0x2f09,0x3009,0x3309,0x3609,
|
sl@0
|
237 |
0x3909,0x3b09,0x4109,0x2c20109,0x2c30109,0x2c40109,0x2c50109,0x2c60109,
|
sl@0
|
238 |
0x2cd0109,0x2ce0109,0x2d10109,0x2d50109,0x2fa0109,0x6c50109,0x6c60109,0x6c60151,
|
sl@0
|
239 |
0x6c70109,0x6c70151,0x6c80109,0x6c80151,0x6c90109,0x6ca0109,0x6cb0109,0x6cc0109,
|
sl@0
|
240 |
0x6cd0109,0x6ce0109,0x6cf0109,0x6cf0121,0x6cf0151,0x6d30109,0x6d30121,0x6e30109,
|
sl@0
|
241 |
0x6e30121,0x6f70109,0x6f70121,0x7030109,0x7030121,0x7070109,0x7070121,0x7170109,
|
sl@0
|
242 |
0x7170121,0x71f0109,0x71f0121,0x74b0109,0x74b0121,0x74f0109,0x7530109,0x7530121,
|
sl@0
|
243 |
0x7730109,0x7730121,0x77f0109,0x77f0121,0x78d0109,0x78d0121,0x7910109,0x7a10109,
|
sl@0
|
244 |
0x7a10121,0x7b10109,0x7b10121,0x7bd0109,0x7bd0115,0x7bd0121,0x7c50109,0x7c50121,
|
sl@0
|
245 |
0x7e50109,0x7e50121,0x7ed0109,0x7ed0121,0x7ed0151,0x8010109,0x8010121,0x8050109,
|
sl@0
|
246 |
0x8050121,0x8050151,0x80d0109,0x80d0121,0x81d0109,0x81d0121,0x8290109,0x8290121,
|
sl@0
|
247 |
0x8310109,0x8350109,0x8350121,0x85d0109,0x85d0121,0x85dde11,0x8750109,0x8750121,
|
sl@0
|
248 |
0x8790109,0x8790121,0x88d0109,0x88d0121,0x8a50109,0x8a50121,0x8b10109,0x8b10121,
|
sl@0
|
249 |
0x8b90109,0x8b90121,0x8bd0109,0x8bd0121,0x8c90109,0x8c90121,0x8e90109,0x8e90121,
|
sl@0
|
250 |
0x9360109,0x9360121,0x9370109,0x9370121,0x9380109,0x9380121,0x9390109,0x9390121,
|
sl@0
|
251 |
0x93a0109,0x93a0121,0x93d0109,0x93d0121,0x93e0109,0x93e0121,0x93f0109,0x93f0121,
|
sl@0
|
252 |
0x9400109,0x9400121,0x9420109,0x9420121,0x9430109,0x9430121,0x9440109,0x9440111,
|
sl@0
|
253 |
0x9440121,0x9450109,0x9450121,0x9460109,0x9460121,0x9470109,0x9470121,0x9480109,
|
sl@0
|
254 |
0x9480121,0x94a0109,0x94a0121,0x94b0109,0x94b0121,0x94c0109,0x94c0121,0x94d0109,
|
sl@0
|
255 |
0x94d0121,0x94e0109,0x94e0121,0x94f0109,0x94f0121,0x9500109,0x9500121,0x9510109,
|
sl@0
|
256 |
0x9510121,0x95a0109,0x95a0121,0x9660109,0x9660121,0x96a0109,0x96a0121,0x96e0109,
|
sl@0
|
257 |
0x96e0121,0x9720109,0x9720121,0x97e0109,0x97e0121,0x9820109,0x9820121,0x98a0109,
|
sl@0
|
258 |
0x98a0121,0x98e0109,0x98e0121,0x9920109,0x9920121,0x99a0109,0x99a0121,0x99e0109,
|
sl@0
|
259 |
0x99e0121,0x9a60109,0x9a60121,0x9aa0109,0x9aa0121,0x9ae0109,0x9ae0121,0x9b20109,
|
sl@0
|
260 |
0x9b20121,0x9ca0109,0x9ca0121,0x9ce0109,0x9ce0121,0x9d20109,0x9d20121,0x9d60109,
|
sl@0
|
261 |
0x9d60121,0x9e60109,0x9e60121,0x9ea0109,0x9ea0121,0x9f20109,0x9f20121,0x9fe0109,
|
sl@0
|
262 |
0x9fe0121,0xa020109,0xa020121,0xa0a0109,0xa0a0121,0xa120109,0xa120121,0xa160109,
|
sl@0
|
263 |
0xa160121,0xa260109,0xa260121,0xa2a0109,0xa2a0121,0xa460109,0xa460121,0xa4e0109,
|
sl@0
|
264 |
0xa4e0121,0xa660109,0xa660121,0xa6a0109,0xa6a0121,0xa6e0109,0xa6e0121,0xa720109,
|
sl@0
|
265 |
0xa720121,0xa760109,0xa760121,0xa7a0109,0xa7a0121,0xa820109,0xa820121,0xa860109,
|
sl@0
|
266 |
0xa860121,0xa8a0109,0xa8a0121,
|
sl@0
|
267 |
};
|
sl@0
|
268 |
|
sl@0
|
269 |
static const TUint32 TheIndex[] =
|
sl@0
|
270 |
{
|
sl@0
|
271 |
0x37,0x10037,0x20037,0x30037,0x40037,0x50037,0x60037,0x70037,
|
sl@0
|
272 |
0x80037,0x90038,0xa0039,0xb003a,0xc003b,0xd003c,0xe0037,0xf0037,
|
sl@0
|
273 |
0x100037,0x110037,0x120037,0x130037,0x140037,0x150037,0x160037,0x170037,
|
sl@0
|
274 |
0x180037,0x190037,0x1a0037,0x1b0037,0x1c0037,0x1d0037,0x1e0037,0x1f0037,
|
sl@0
|
275 |
0x200040,0x21005d,0x22006a,0x230080,0x24010d,0x250081,0x26007f,0x270063,
|
sl@0
|
276 |
0x280070,0x290071,0x2a007b,0x2b0096,0x2c005a,0x2d0053,0x2e0061,0x2f007c,
|
sl@0
|
277 |
0x300115,0x310116,0x320118,0x33011a,0x34011c,0x35011d,0x36011e,0x37011f,
|
sl@0
|
278 |
0x380120,0x390121,0x3a005c,0x3b005b,0x3c009a,0x3d009b,0x3e009c,0x3f005f,
|
sl@0
|
279 |
0x40007a,0x410123,0x420128,0x43012a,0x44012c,0x450132,0x460134,0x470137,
|
sl@0
|
280 |
0x480139,0x49013d,0x4a0140,0x4b0142,0x4c0145,0x4d0149,0x4e014b,0x4f0150,
|
sl@0
|
281 |
0x500155,0x510157,0x52015a,0x53015c,0x54015f,0x550163,0x560165,0x570167,
|
sl@0
|
282 |
0x580169,0x59016b,0x5a016d,0x5b0072,0x5c007e,0x5d0073,0x5e0047,0x5f0043,
|
sl@0
|
283 |
0x600045,0x610122,0x620127,0x630129,0x64012b,0x650131,0x660133,0x670136,
|
sl@0
|
284 |
0x680138,0x69013c,0x6a013f,0x6b0141,0x6c0143,0x6d0148,0x6e014a,0x6f014f,
|
sl@0
|
285 |
0x700154,0x710156,0x720159,0x73015b,0x74015e,0x750162,0x760164,0x770166,
|
sl@0
|
286 |
0x780168,0x79016a,0x7a016c,0x7b0074,0x7c009e,0x7d0075,0x7e00a0,0xa00042,
|
sl@0
|
287 |
0xa1005e,0xa2010c,0xa3010e,0xa4010b,0xa5010f,0xa6009f,0xa70076,0xa80049,
|
sl@0
|
288 |
0xa90078,0xaa0124,0xab006e,0xac009d,0xad0052,0xae0079,0xaf004f,0xb0008a,
|
sl@0
|
289 |
0xb10097,0xb20119,0xb3011b,0xb40044,0xb50187,0xb60077,0xb70062,0xb8004d,
|
sl@0
|
290 |
0xb90117,0xba0151,0xbb006f,0xbc000a,0xbd0007,0xbe0010,0xbf0060,0xc60126,
|
sl@0
|
291 |
0xd00130,0xd70099,0xd80153,0xde016f,0xdf0031,0xe60125,0xf0012f,0xf70098,
|
sl@0
|
292 |
0xf80152,0xfe016e,0x110012e,0x111012d,0x126013b,0x127013a,0x131013e,0x1320025,
|
sl@0
|
293 |
0x1330023,0x1380158,0x13f0029,0x1400027,0x1410147,0x1420146,0x1490035,0x14a014e,
|
sl@0
|
294 |
0x14b014d,0x152002f,0x153002d,0x1660161,0x1670160,0x17f015d,0x1920135,0x2c60087,
|
sl@0
|
295 |
0x2c70088,0x2c90089,0x2d80046,0x2d9004c,0x2da0048,0x2db004e,0x2dd004b,0x30000fe,
|
sl@0
|
296 |
0x30100fd,0x3020100,0x3030105,0x3040109,0x30600ff,0x3070106,0x3080103,0x30a0102,
|
sl@0
|
297 |
0x30b0104,0x30c0101,0x30d010a,0x3270107,0x3280108,0x3840044,0x385004a,0x3870062,
|
sl@0
|
298 |
0x3910171,0x3920173,0x3930175,0x3940177,0x3950179,0x396017b,0x397017d,0x398017f,
|
sl@0
|
299 |
0x3990181,0x39a0183,0x39b0185,0x39c0188,0x39d018a,0x39e018c,0x39f018e,0x3a00190,
|
sl@0
|
300 |
0x3a10192,0x3a30194,0x3a40196,0x3a50198,0x3a6019a,0x3a7019c,0x3a8019e,0x3a901a0,
|
sl@0
|
301 |
0x3b10170,0x3b20172,0x3b30174,0x3b40176,0x3b50178,0x3b6017a,0x3b7017c,0x3b8017e,
|
sl@0
|
302 |
0x3b90180,0x3ba0182,0x3bb0184,0x3bc0186,0x3bd0189,0x3be018b,0x3bf018d,0x3c0018f,
|
sl@0
|
303 |
0x3c10191,0x3c20193,0x3c30193,0x3c40195,0x3c50197,0x3c60199,0x3c7019b,0x3c8019d,
|
sl@0
|
304 |
0x3c9019f,0x40201ae,0x40401b2,0x40501b8,0x40601bc,0x40801be,0x40901c4,0x40a01ca,
|
sl@0
|
305 |
0x40b01d6,0x40f01e2,0x41001a2,0x41101a4,0x41201a6,0x41301a8,0x41401ac,0x41501b0,
|
sl@0
|
306 |
0x41601b4,0x41701b6,0x41801ba,0x41a01c0,0x41b01c2,0x41c01c6,0x41d01c8,0x41e01cc,
|
sl@0
|
307 |
0x41f01ce,0x42001d0,0x42101d2,0x42201d4,0x42301d8,0x42401da,0x42501dc,0x42601de,
|
sl@0
|
308 |
0x42701e0,0x42801e4,0x42901e6,0x42a01e8,0x42b01ea,0x42c01ec,0x42d01ee,0x42e01f0,
|
sl@0
|
309 |
0x42f01f2,0x43001a1,0x43101a3,0x43201a5,0x43301a7,0x43401ab,0x43501af,0x43601b3,
|
sl@0
|
310 |
0x43701b5,0x43801b9,0x43a01bf,0x43b01c1,0x43c01c5,0x43d01c7,0x43e01cb,0x43f01cd,
|
sl@0
|
311 |
0x44001cf,0x44101d1,0x44201d3,0x44301d7,0x44401d9,0x44501db,0x44601dd,0x44701df,
|
sl@0
|
312 |
0x44801e3,0x44901e5,0x44a01e7,0x44b01e9,0x44c01eb,0x44d01ed,0x44e01ef,0x44f01f1,
|
sl@0
|
313 |
0x45201ad,0x45401b1,0x45501b7,0x45601bb,0x45801bd,0x45901c3,0x45a01c9,0x45b01d5,
|
sl@0
|
314 |
0x45f01e1,0x49001aa,0x49101a9,0x20000041,0x20010041,0x20020041,0x20030041,0x20040041,
|
sl@0
|
315 |
0x20050041,0x20060041,0x20070042,0x20080041,0x20090041,0x200a0041,0x200b003f,0x200c0037,
|
sl@0
|
316 |
0x200d0037,0x200e0037,0x200f0037,0x20100054,0x20110055,0x20120056,0x20130057,0x20140058,
|
sl@0
|
317 |
0x20150059,0x20170051,0x20180064,0x20190065,0x201a0066,0x201b0067,0x201c006b,0x201d006c,
|
sl@0
|
318 |
0x201e006d,0x20200083,0x20210084,0x20220085,0x20260002,0x2028003d,0x2029003e,0x202a0037,
|
sl@0
|
319 |
0x202b0037,0x202c0037,0x202d0037,0x202e0037,0x20300082,0x20320086,0x20330005,0x20390068,
|
sl@0
|
320 |
0x203a0069,0x203c0000,0x203e0050,0x2044007d,0x207f014c,0x20a30110,0x20a40111,0x20a70112,
|
sl@0
|
321 |
0x20ac0113,0x2105001c,0x21130144,0x2116002b,0x21220033,0x212601a0,0x212e0114,0x215b000d,
|
sl@0
|
322 |
0x215c0013,0x215d0016,0x215e0019,0x2190008b,0x2191008d,0x2192008c,0x2193008e,0x2194008f,
|
sl@0
|
323 |
0x21950090,0x21a80091,0x22020092,0x22060093,0x220f0094,0x22110095,0x221200a1,0x221500a2,
|
sl@0
|
324 |
0x221900a3,0x221a00a4,0x221e00a5,0x221f00a6,0x222900a7,0x222b00a8,0x224800a9,0x226100aa,
|
sl@0
|
325 |
0x226400ab,0x226500ac,0x230200ad,0x231000ae,0x232000af,0x232100b0,0x250000b1,0x250200b2,
|
sl@0
|
326 |
0x250c00b3,0x251000b4,0x251400b5,0x251800b6,0x251c00b7,0x252400b8,0x252c00b9,0x253400ba,
|
sl@0
|
327 |
0x253c00bb,0x255000bc,0x255100bd,0x255200be,0x255300bf,0x255400c0,0x255500c1,0x255600c2,
|
sl@0
|
328 |
0x255700c3,0x255800c4,0x255900c5,0x255a00c6,0x255b00c7,0x255c00c8,0x255d00c9,0x255e00ca,
|
sl@0
|
329 |
0x255f00cb,0x256000cc,0x256100cd,0x256200ce,0x256300cf,0x256400d0,0x256500d1,0x256600d2,
|
sl@0
|
330 |
0x256700d3,0x256800d4,0x256900d5,0x256a00d6,0x256b00d7,0x256c00d8,0x258000d9,0x258400da,
|
sl@0
|
331 |
0x258800db,0x258c00dc,0x259000dd,0x259100de,0x259200df,0x259300e0,0x25a000e1,0x25a100e2,
|
sl@0
|
332 |
0x25aa00e3,0x25ab00e4,0x25ac00e5,0x25b200e6,0x25ba00e7,0x25bc00e8,0x25c400e9,0x25ca00ea,
|
sl@0
|
333 |
0x25cb00eb,0x25cf00ec,0x25d800ed,0x25d900ee,0x25e600ef,0x263a00f0,0x263b00f1,0x263c00f2,
|
sl@0
|
334 |
0x264000f3,0x264200f4,0x266000f5,0x266300f6,0x266500f7,0x266600f8,0x266a00f9,0x266b00fa,
|
sl@0
|
335 |
0xfb01001f,0xfb020021,0xfeff0037,0xfffc00fb,0xfffd00fc,
|
sl@0
|
336 |
};
|
sl@0
|
337 |
|
sl@0
|
338 |
static const TCollationKeyTable TheStandardTable =
|
sl@0
|
339 |
{ TheKey, TheIndex, 517, NULL, NULL, 0 };
|
sl@0
|
340 |
|
sl@0
|
341 |
const TCollationKeyTable* StandardCollationMethod()
|
sl@0
|
342 |
{
|
sl@0
|
343 |
return &TheStandardTable;
|
sl@0
|
344 |
};
|
sl@0
|
345 |
|
sl@0
|
346 |
inline void Increment(TUint16 const *& aPointer,TBool aNarrow)
|
sl@0
|
347 |
{
|
sl@0
|
348 |
aPointer = aNarrow ? (const TUint16*)(((const TUint8*)aPointer) + 1) : aPointer + 1;
|
sl@0
|
349 |
}
|
sl@0
|
350 |
|
sl@0
|
351 |
/////////////////////////////////////////////////////////////////////////////////////////////////
|
sl@0
|
352 |
// TCollationValueIterator
|
sl@0
|
353 |
/////////////////////////////////////////////////////////////////////////////////////////////////
|
sl@0
|
354 |
|
sl@0
|
355 |
/**
|
sl@0
|
356 |
Initializes TCollationValueIterator object with a new character sequence.
|
sl@0
|
357 |
@param aSourceIt An iterator used to access the input character (non-normalized or
|
sl@0
|
358 |
normalized) sequence.
|
sl@0
|
359 |
@internalComponent
|
sl@0
|
360 |
*/
|
sl@0
|
361 |
void TCollationValueIterator::SetSourceIt(TUTF32Iterator& aSourceIt)
|
sl@0
|
362 |
{
|
sl@0
|
363 |
iCurrentKeyPos = 0;
|
sl@0
|
364 |
iKey.iKeys = 0;
|
sl@0
|
365 |
iDecompStrIt.Set(aSourceIt);
|
sl@0
|
366 |
}
|
sl@0
|
367 |
|
sl@0
|
368 |
/**
|
sl@0
|
369 |
Gets current raw key.
|
sl@0
|
370 |
Note: the method may move the iterator one or more positions forward if there are no produced
|
sl@0
|
371 |
collation keys.
|
sl@0
|
372 |
@param aKey A reference to a TCollationKey object, initialized with the
|
sl@0
|
373 |
current collation key after the call, if there is available key.
|
sl@0
|
374 |
@return ETrue Successfull call, aKey initialized with the current collation key,
|
sl@0
|
375 |
EFalse - the iteration has come to the end.
|
sl@0
|
376 |
@internalComponent
|
sl@0
|
377 |
*/
|
sl@0
|
378 |
TBool TCollationValueIterator::GetCurrentKey(TCollationKey& aKey)
|
sl@0
|
379 |
{
|
sl@0
|
380 |
ASSERT(iCurrentKeyPos <= iKey.iKeys);
|
sl@0
|
381 |
if(!ProduceCollationKeys())
|
sl@0
|
382 |
{
|
sl@0
|
383 |
return EFalse;
|
sl@0
|
384 |
}
|
sl@0
|
385 |
aKey = iKey.iKey[iCurrentKeyPos];
|
sl@0
|
386 |
return ETrue;
|
sl@0
|
387 |
}
|
sl@0
|
388 |
|
sl@0
|
389 |
/**
|
sl@0
|
390 |
Gets current key at the specified level.
|
sl@0
|
391 |
Note: the method may move the iterator one or more positions forward if there are no produced
|
sl@0
|
392 |
collation keys.
|
sl@0
|
393 |
@param aLevel Desired level of the collation key: 0..3
|
sl@0
|
394 |
@param aKey A reference to TUint32 where the retrieved key will be stored.
|
sl@0
|
395 |
@return ETrue Success, EFalse - end of the iteration.
|
sl@0
|
396 |
@internalComponent
|
sl@0
|
397 |
*/
|
sl@0
|
398 |
TBool TCollationValueIterator::GetCurrentKey(TInt aLevel, TUint32& aKey)
|
sl@0
|
399 |
{
|
sl@0
|
400 |
TCollationKey rawKey;
|
sl@0
|
401 |
if(GetCurrentKey(rawKey))
|
sl@0
|
402 |
{
|
sl@0
|
403 |
//Key values are ignored if their ignore bit is set and the level is less than 3: in other words, the
|
sl@0
|
404 |
//actual Unicode value is never ignored. This does NOT conform to the system of alternate weightings
|
sl@0
|
405 |
//described in Unicode Technical Report 10, and will probably have to be changed.
|
sl@0
|
406 |
aKey = (aLevel < 3 && (rawKey.iLow & TCollationKeyTable::EIgnoreFlag) && !IgnoringNone()) ? 0 : rawKey.Level(aLevel);
|
sl@0
|
407 |
return ETrue;
|
sl@0
|
408 |
}
|
sl@0
|
409 |
return EFalse;
|
sl@0
|
410 |
}
|
sl@0
|
411 |
|
sl@0
|
412 |
/**
|
sl@0
|
413 |
The method iterates through the controlled character sequence and tries to find first non-zero
|
sl@0
|
414 |
corresponding collation key at the specified level.
|
sl@0
|
415 |
@param aLevel Desired level of the collation key: 0..3
|
sl@0
|
416 |
@return Non-zero collation key value or 0 if the iteration has come to the end.
|
sl@0
|
417 |
@internalComponent
|
sl@0
|
418 |
*/
|
sl@0
|
419 |
TUint32 TCollationValueIterator::GetNextNonZeroKey(TInt aLevel)
|
sl@0
|
420 |
{
|
sl@0
|
421 |
TUint32 key = 0;
|
sl@0
|
422 |
while(GetCurrentKey(aLevel, key) && key == 0)
|
sl@0
|
423 |
{
|
sl@0
|
424 |
Increment();
|
sl@0
|
425 |
}
|
sl@0
|
426 |
return key;
|
sl@0
|
427 |
}
|
sl@0
|
428 |
|
sl@0
|
429 |
/**
|
sl@0
|
430 |
The method determines wheter the specified as a parameter character matches current iterator's
|
sl@0
|
431 |
character.
|
sl@0
|
432 |
If there is a match, the iterator will be moved one position forward.
|
sl@0
|
433 |
Note: the method may move the iterator one or more positions forward if there are no produced
|
sl@0
|
434 |
collation keys.
|
sl@0
|
435 |
@param aMatch The character to compare with the current iterator's character.
|
sl@0
|
436 |
@return ETrue The characters match, EFalse otherwise (or the iteration has come to the end).
|
sl@0
|
437 |
@internalComponent
|
sl@0
|
438 |
*/
|
sl@0
|
439 |
TBool TCollationValueIterator::MatchChar(TChar aMatch)
|
sl@0
|
440 |
{
|
sl@0
|
441 |
TUint32 key;
|
sl@0
|
442 |
if(GetCurrentKey(3, key))
|
sl@0
|
443 |
{
|
sl@0
|
444 |
// Find a match for the quaternary key.. will probably be the unicode value
|
sl@0
|
445 |
// This is a bit poor.
|
sl@0
|
446 |
if(aMatch == key)
|
sl@0
|
447 |
{
|
sl@0
|
448 |
Increment();
|
sl@0
|
449 |
return ETrue;
|
sl@0
|
450 |
}
|
sl@0
|
451 |
}
|
sl@0
|
452 |
return EFalse;
|
sl@0
|
453 |
}
|
sl@0
|
454 |
|
sl@0
|
455 |
/**
|
sl@0
|
456 |
Note: the method may move the iterator one or more positions forward if there are no produced
|
sl@0
|
457 |
collation keys.
|
sl@0
|
458 |
@return The method returns ETrue if the iterator is at a combining character, EFalse otherwise
|
sl@0
|
459 |
(or the iterator has come to the end)
|
sl@0
|
460 |
@internalComponent
|
sl@0
|
461 |
*/
|
sl@0
|
462 |
TBool TCollationValueIterator::AtCombiningCharacter()
|
sl@0
|
463 |
{
|
sl@0
|
464 |
TCollationKey rawKey;
|
sl@0
|
465 |
if(!GetCurrentKey(rawKey))
|
sl@0
|
466 |
{
|
sl@0
|
467 |
return EFalse; // iteration ended
|
sl@0
|
468 |
}
|
sl@0
|
469 |
return rawKey.IsStarter() ? (TBool)EFalse : (TBool)ETrue;
|
sl@0
|
470 |
}
|
sl@0
|
471 |
|
sl@0
|
472 |
/**
|
sl@0
|
473 |
Skips the following combining characters if they are.
|
sl@0
|
474 |
Note: the method may move the iterator one or more positions forward.
|
sl@0
|
475 |
@return The number of skipped combining characters.
|
sl@0
|
476 |
@internalComponent
|
sl@0
|
477 |
*/
|
sl@0
|
478 |
TInt TCollationValueIterator::SkipCombiningCharacters()
|
sl@0
|
479 |
{
|
sl@0
|
480 |
TInt count;
|
sl@0
|
481 |
for(count=0;AtCombiningCharacter();++count)
|
sl@0
|
482 |
{
|
sl@0
|
483 |
Increment();
|
sl@0
|
484 |
}
|
sl@0
|
485 |
return count;
|
sl@0
|
486 |
}
|
sl@0
|
487 |
|
sl@0
|
488 |
/**
|
sl@0
|
489 |
Moves the iterator one step forward making the next collation key available for getting
|
sl@0
|
490 |
using GetCurrentKey().
|
sl@0
|
491 |
@return ETrue Successfull call, there is a collation key available.
|
sl@0
|
492 |
EFalse - the iteration has come to the end.
|
sl@0
|
493 |
@internalComponent
|
sl@0
|
494 |
@see TCollationValueIterator::GetCurrentKey()
|
sl@0
|
495 |
*/
|
sl@0
|
496 |
TBool TCollationValueIterator::Increment()
|
sl@0
|
497 |
{
|
sl@0
|
498 |
ASSERT(iCurrentKeyPos <= iKey.iKeys);
|
sl@0
|
499 |
if(!ProduceCollationKeys())
|
sl@0
|
500 |
{
|
sl@0
|
501 |
return EFalse;
|
sl@0
|
502 |
}
|
sl@0
|
503 |
++iCurrentKeyPos;
|
sl@0
|
504 |
return ETrue;
|
sl@0
|
505 |
}
|
sl@0
|
506 |
|
sl@0
|
507 |
/**
|
sl@0
|
508 |
Returns the position in the underlying string of the iteration,
|
sl@0
|
509 |
if this is well defined. It is not well defined if either we are
|
sl@0
|
510 |
half way through keys defined as a string in the collation table
|
sl@0
|
511 |
or if we are half way through a canonically reordered sequence.
|
sl@0
|
512 |
@return The position in the underlying string if this is well
|
sl@0
|
513 |
defined, or 0 if it is not.
|
sl@0
|
514 |
*/
|
sl@0
|
515 |
const TText16* TCollationValueIterator::CurrentPositionIfAtCharacter()
|
sl@0
|
516 |
{
|
sl@0
|
517 |
if (!ProduceCollationKeys())
|
sl@0
|
518 |
return iCurrentPosition;
|
sl@0
|
519 |
return iCurrentKeyPos == 0? iCurrentPosition : 0;
|
sl@0
|
520 |
}
|
sl@0
|
521 |
|
sl@0
|
522 |
/**
|
sl@0
|
523 |
Produces the longest possible collation keys sequence using the decomposed character sequence,
|
sl@0
|
524 |
pointed by iDecompStrIt iterator. But this will happen only if all keys from iKey array are
|
sl@0
|
525 |
consumed.
|
sl@0
|
526 |
@return ETrue Successfull call, iKey initialized with the produced collation keys sequence,
|
sl@0
|
527 |
EFalse - the iteration has come to the end.
|
sl@0
|
528 |
@internalComponent
|
sl@0
|
529 |
*/
|
sl@0
|
530 |
TBool TCollationValueIterator::ProduceCollationKeys()
|
sl@0
|
531 |
{
|
sl@0
|
532 |
//iKey.iKeys represents the keys count in iKey array, so load more keys, only if all
|
sl@0
|
533 |
//collation keys are already consumed.
|
sl@0
|
534 |
if(iCurrentKeyPos == iKey.iKeys)
|
sl@0
|
535 |
{
|
sl@0
|
536 |
iCurrentPosition = iDecompStrIt.CurrentPositionIfAtCharacter();
|
sl@0
|
537 |
if(iDecompStrIt.AtEnd())
|
sl@0
|
538 |
{//No more characters in the input decomposed canonical string
|
sl@0
|
539 |
return EFalse;
|
sl@0
|
540 |
}
|
sl@0
|
541 |
//Try to get the next collation key sequence. There should be at least one key.
|
sl@0
|
542 |
GetNextRawKeySequence();
|
sl@0
|
543 |
ASSERT(iKey.iKeys > 0);
|
sl@0
|
544 |
iCurrentKeyPos = 0;
|
sl@0
|
545 |
}
|
sl@0
|
546 |
return ETrue;
|
sl@0
|
547 |
}
|
sl@0
|
548 |
|
sl@0
|
549 |
/**
|
sl@0
|
550 |
Consume zero or more characters from the input and convert them into zero or more collation keys.
|
sl@0
|
551 |
@internalComponent
|
sl@0
|
552 |
*/
|
sl@0
|
553 |
void TCollationValueIterator::GetNextRawKeySequence()
|
sl@0
|
554 |
{
|
sl@0
|
555 |
//Store the first character combining class type for later use.
|
sl@0
|
556 |
TChar firstChar = iDecompStrIt.Get(0);
|
sl@0
|
557 |
TBool combining = !::IsBaseCharacter(firstChar);
|
sl@0
|
558 |
// Initialise.
|
sl@0
|
559 |
iKey.iCharactersConsumed = 0;
|
sl@0
|
560 |
iKey.iKeys = 0;
|
sl@0
|
561 |
// See if the override table has a key for the current collation unit.
|
sl@0
|
562 |
if(iMethod.iOverrideTable)
|
sl@0
|
563 |
{
|
sl@0
|
564 |
GetKeyFromTable(iMethod.iOverrideTable);
|
sl@0
|
565 |
}
|
sl@0
|
566 |
// If not, try the main table.
|
sl@0
|
567 |
if(iKey.iCharactersConsumed == 0)
|
sl@0
|
568 |
{
|
sl@0
|
569 |
GetKeyFromTable(iMethod.iMainTable);
|
sl@0
|
570 |
}
|
sl@0
|
571 |
//If no key was found use a default value depending on the current character.
|
sl@0
|
572 |
//For CJK characters:
|
sl@0
|
573 |
//the Unicode value itself as the primary key and 1 as the secondary and tertiary keys;
|
sl@0
|
574 |
//the lower 16 bits end up as 0x0105 because the bottom two bits are used for the ignorable bit,
|
sl@0
|
575 |
//which is clear, and the stop bit, which is set.
|
sl@0
|
576 |
//For other characters:
|
sl@0
|
577 |
//Return two keys containing the 21 bits of the character code (anything from 0 to 0x10FFFF), as
|
sl@0
|
578 |
//explained in Unicode Technical Report 10.
|
sl@0
|
579 |
if(iKey.iCharactersConsumed == 0)
|
sl@0
|
580 |
{
|
sl@0
|
581 |
iKey.iCharactersConsumed = 1;
|
sl@0
|
582 |
iDecompStrIt.Next(1);
|
sl@0
|
583 |
iKey.iKeys = ::CreateDefaultCollationKeySequence(firstChar, iKey.iKey);
|
sl@0
|
584 |
}
|
sl@0
|
585 |
if(!combining)
|
sl@0
|
586 |
{
|
sl@0
|
587 |
iKey.iKey[0].iHigh |= (TUint32)TCollationKey::KFlagIsStarter;
|
sl@0
|
588 |
}
|
sl@0
|
589 |
}
|
sl@0
|
590 |
|
sl@0
|
591 |
/**
|
sl@0
|
592 |
Search for the string aText.
|
sl@0
|
593 |
Put the key index in aIndex if found, otherwise set aIndex to -1.
|
sl@0
|
594 |
If the sought string might be a prefix to a key in the table set aPossiblePrefix to TRUE.
|
sl@0
|
595 |
@internalComponent
|
sl@0
|
596 |
*/
|
sl@0
|
597 |
static void GetStringKey(const TCollationKeyTable* aTable,const TText* aText,TInt aLength,
|
sl@0
|
598 |
TInt& aIndex,TBool& aPossiblePrefix)
|
sl@0
|
599 |
{
|
sl@0
|
600 |
aIndex = -1;
|
sl@0
|
601 |
aPossiblePrefix = EFalse;
|
sl@0
|
602 |
TInt n = aTable->iStringIndices;
|
sl@0
|
603 |
const TUint32* base = aTable->iStringIndex;
|
sl@0
|
604 |
const TUint32* p = base;
|
sl@0
|
605 |
TInt pivot;
|
sl@0
|
606 |
while (n > 0)
|
sl@0
|
607 |
{
|
sl@0
|
608 |
pivot = n / 2;
|
sl@0
|
609 |
p += pivot;
|
sl@0
|
610 |
TUint16 string_index = (TUint16)(*p >> 16);
|
sl@0
|
611 |
const TText* cur_text = aTable->iString + string_index + 1;
|
sl@0
|
612 |
TInt cur_length = aTable->iString[string_index];
|
sl@0
|
613 |
TInt order = TUnicode::Compare(aText,aLength,cur_text,cur_length);
|
sl@0
|
614 |
if (order == 0) // found it
|
sl@0
|
615 |
{
|
sl@0
|
616 |
aIndex = *p & 0xFFFF;
|
sl@0
|
617 |
aPossiblePrefix = ETrue;
|
sl@0
|
618 |
break;
|
sl@0
|
619 |
}
|
sl@0
|
620 |
if (order < 1 && !aPossiblePrefix)
|
sl@0
|
621 |
{
|
sl@0
|
622 |
if (aLength < cur_length && TUnicode::Compare(aText,aLength,cur_text,aLength) == 0)
|
sl@0
|
623 |
aPossiblePrefix = ETrue;
|
sl@0
|
624 |
n = pivot;
|
sl@0
|
625 |
}
|
sl@0
|
626 |
else
|
sl@0
|
627 |
{
|
sl@0
|
628 |
base = p + 1;
|
sl@0
|
629 |
n -= pivot + 1;
|
sl@0
|
630 |
}
|
sl@0
|
631 |
p = base;
|
sl@0
|
632 |
}
|
sl@0
|
633 |
}
|
sl@0
|
634 |
|
sl@0
|
635 |
/**
|
sl@0
|
636 |
Consumes output from iDecompStrIt, produces list of keys in iKey.
|
sl@0
|
637 |
@param aTable A const pointer to the collation key table used by the method.
|
sl@0
|
638 |
@internalComponent
|
sl@0
|
639 |
*/
|
sl@0
|
640 |
void TCollationValueIterator::GetKeyFromTable(const TCollationKeyTable* aTable)
|
sl@0
|
641 |
{
|
sl@0
|
642 |
ASSERT(aTable != NULL);
|
sl@0
|
643 |
iKey.iCharactersConsumed = 0;
|
sl@0
|
644 |
iKey.iKeys = 0;
|
sl@0
|
645 |
|
sl@0
|
646 |
TInt cur_char = iDecompStrIt.Get(0);
|
sl@0
|
647 |
|
sl@0
|
648 |
// Find the longest matching string.
|
sl@0
|
649 |
TInt index = -1;
|
sl@0
|
650 |
if(aTable->iStringIndices > 0)
|
sl@0
|
651 |
{
|
sl@0
|
652 |
TInt moved = 0;
|
sl@0
|
653 |
TText text[KKeyedStringBufferSize];
|
sl@0
|
654 |
TInt textLen = 0;
|
sl@0
|
655 |
if (cur_char <= 0xFFFF)
|
sl@0
|
656 |
{
|
sl@0
|
657 |
text[textLen++] = static_cast <TText> (cur_char);
|
sl@0
|
658 |
}
|
sl@0
|
659 |
else
|
sl@0
|
660 |
{
|
sl@0
|
661 |
text[textLen++] = GetHighSurrogate(cur_char);
|
sl@0
|
662 |
text[textLen++] = GetLowSurrogate(cur_char);
|
sl@0
|
663 |
}
|
sl@0
|
664 |
TBool possible_prefix = ETrue;
|
sl@0
|
665 |
for(TInt i = 1; (i < KKeyedStringBufferSize) && possible_prefix; i++)
|
sl@0
|
666 |
{
|
sl@0
|
667 |
++moved;
|
sl@0
|
668 |
TInt c = iDecompStrIt.Get(i);//get the next character
|
sl@0
|
669 |
if(c == -1)
|
sl@0
|
670 |
{
|
sl@0
|
671 |
break;
|
sl@0
|
672 |
}
|
sl@0
|
673 |
if (c <= 0xFFFF)
|
sl@0
|
674 |
{
|
sl@0
|
675 |
text[textLen++] = static_cast <TText> (c);
|
sl@0
|
676 |
}
|
sl@0
|
677 |
else
|
sl@0
|
678 |
{
|
sl@0
|
679 |
text[textLen++] = GetHighSurrogate(c);
|
sl@0
|
680 |
text[textLen++] = GetLowSurrogate(c);
|
sl@0
|
681 |
}
|
sl@0
|
682 |
TInt cur_index = -1;
|
sl@0
|
683 |
::GetStringKey(aTable, text, textLen, cur_index, possible_prefix);
|
sl@0
|
684 |
if(cur_index != -1)
|
sl@0
|
685 |
{
|
sl@0
|
686 |
index = cur_index;
|
sl@0
|
687 |
iKey.iCharactersConsumed = i + 1;
|
sl@0
|
688 |
}
|
sl@0
|
689 |
}
|
sl@0
|
690 |
if (iKey.iCharactersConsumed < moved)
|
sl@0
|
691 |
{
|
sl@0
|
692 |
moved = 0;
|
sl@0
|
693 |
}
|
sl@0
|
694 |
while (moved != iKey.iCharactersConsumed)
|
sl@0
|
695 |
{
|
sl@0
|
696 |
++moved;
|
sl@0
|
697 |
}
|
sl@0
|
698 |
if(moved > 0)
|
sl@0
|
699 |
{
|
sl@0
|
700 |
iDecompStrIt.Next(moved);//adjust the iterator start position
|
sl@0
|
701 |
}
|
sl@0
|
702 |
}
|
sl@0
|
703 |
|
sl@0
|
704 |
// Now search the main index.
|
sl@0
|
705 |
if(index == -1)
|
sl@0
|
706 |
{
|
sl@0
|
707 |
index = ::FindCollationKeyIndex(cur_char, *aTable);
|
sl@0
|
708 |
if(0 <= index)
|
sl@0
|
709 |
{
|
sl@0
|
710 |
iKey.iCharactersConsumed = 1;
|
sl@0
|
711 |
iDecompStrIt.Next(1);//adjust the iterator start position
|
sl@0
|
712 |
}
|
sl@0
|
713 |
}
|
sl@0
|
714 |
|
sl@0
|
715 |
// Fill in the key or keys.
|
sl@0
|
716 |
if(index != -1)
|
sl@0
|
717 |
{
|
sl@0
|
718 |
const TUint32* p = &aTable->iKey[index];
|
sl@0
|
719 |
TCollationKey* q = iKey.iKey;
|
sl@0
|
720 |
iKey.iKeys = 0;
|
sl@0
|
721 |
while(iKey.iKeys < TKeyInfo::EMaxKeys)
|
sl@0
|
722 |
{
|
sl@0
|
723 |
q->iLow = *p;
|
sl@0
|
724 |
q->iHigh = cur_char;
|
sl@0
|
725 |
iKey.iKeys++;
|
sl@0
|
726 |
if(*p & 1)
|
sl@0
|
727 |
{
|
sl@0
|
728 |
break;
|
sl@0
|
729 |
}
|
sl@0
|
730 |
q++;
|
sl@0
|
731 |
p++;
|
sl@0
|
732 |
}
|
sl@0
|
733 |
}
|
sl@0
|
734 |
}
|
sl@0
|
735 |
|
sl@0
|
736 |
/////////////////////////////////////////////////////////////////////////////////////////////////
|
sl@0
|
737 |
// TCollate
|
sl@0
|
738 |
/////////////////////////////////////////////////////////////////////////////////////////////////
|
sl@0
|
739 |
|
sl@0
|
740 |
/**
|
sl@0
|
741 |
Construct a TCollate object based on the collation method specified
|
sl@0
|
742 |
within aCharSet, if any. If there is none, or aCharSet is null, the
|
sl@0
|
743 |
standard collation method will be used.
|
sl@0
|
744 |
aMask and aFlags provide a method for overriding the flags in the collation method:
|
sl@0
|
745 |
Each flag set to 1 in aMask is a flag that will be overridden and set to the
|
sl@0
|
746 |
corresponding flag value in aFlags.
|
sl@0
|
747 |
Ownership of aCharSet is not passed.
|
sl@0
|
748 |
@param aCharSet Locale-specific character attribute and collation data
|
sl@0
|
749 |
@param aMask Provides a method for overriding the flags in the collation method
|
sl@0
|
750 |
@param aFlags Provides a method for overriding the flags in the collation method
|
sl@0
|
751 |
@internalComponent
|
sl@0
|
752 |
*/
|
sl@0
|
753 |
TCollate::TCollate(const LCharSet* aCharSet, TUint aMask, TUint aFlags)
|
sl@0
|
754 |
{
|
sl@0
|
755 |
iMethod.iMainTable = NULL;
|
sl@0
|
756 |
iMethod.iOverrideTable = NULL;
|
sl@0
|
757 |
iMethod.iFlags = 0;
|
sl@0
|
758 |
if (aCharSet && aCharSet->iCollationDataSet && aCharSet->iCollationDataSet->iMethod)
|
sl@0
|
759 |
{
|
sl@0
|
760 |
iMethod = aCharSet->iCollationDataSet->iMethod[0];
|
sl@0
|
761 |
}
|
sl@0
|
762 |
if (iMethod.iMainTable == NULL)
|
sl@0
|
763 |
{
|
sl@0
|
764 |
iMethod.iMainTable = &TheStandardTable;
|
sl@0
|
765 |
}
|
sl@0
|
766 |
if (aMask)
|
sl@0
|
767 |
{
|
sl@0
|
768 |
iMethod.iFlags &= ~aMask;
|
sl@0
|
769 |
iMethod.iFlags |= (aMask & aFlags);
|
sl@0
|
770 |
}
|
sl@0
|
771 |
}
|
sl@0
|
772 |
|
sl@0
|
773 |
/**
|
sl@0
|
774 |
Construct a TCollate object based on an already constructed
|
sl@0
|
775 |
TCollationMethod specified in aMethod.
|
sl@0
|
776 |
Ownership is not passed.
|
sl@0
|
777 |
@param aMethod Collation keys table
|
sl@0
|
778 |
@internalComponent
|
sl@0
|
779 |
*/
|
sl@0
|
780 |
TCollate::TCollate(const TCollationMethod& aMethod) :
|
sl@0
|
781 |
iMethod(aMethod)
|
sl@0
|
782 |
{
|
sl@0
|
783 |
if(!iMethod.iMainTable)
|
sl@0
|
784 |
{
|
sl@0
|
785 |
iMethod.iMainTable = &TheStandardTable;
|
sl@0
|
786 |
}
|
sl@0
|
787 |
}
|
sl@0
|
788 |
|
sl@0
|
789 |
/**
|
sl@0
|
790 |
Compare the string beginning at aString1 of length aLength1 against the
|
sl@0
|
791 |
string beginning at aString2 of length aLength2.
|
sl@0
|
792 |
|
sl@0
|
793 |
@param aString1 First string to compare
|
sl@0
|
794 |
@param aLength1 Length of aString1
|
sl@0
|
795 |
@param aString2 Second string to compare
|
sl@0
|
796 |
@param aLength2 Length of aString2
|
sl@0
|
797 |
@param aMaxLevel Determines the tightness of the collation. At level 0, only
|
sl@0
|
798 |
character identities are distinguished. At level 1 accents are
|
sl@0
|
799 |
distinguished as well. At level 2 case is distinguished as well. At
|
sl@0
|
800 |
level 3 all non canonically equivalent Unicode characters are considered
|
sl@0
|
801 |
different. By default aMaxLevel is 3.
|
sl@0
|
802 |
@return EStringsIdentical The strings are identical.
|
sl@0
|
803 |
ELeftComparesLessAndIsNotPrefix For example: aString1 = "aaa", aString2 = "zzzz".
|
sl@0
|
804 |
ELeftIsPrefixOfRight For example: aString1 = "abc", aString2 = "abcd".
|
sl@0
|
805 |
ERightIsPrefixOfLeft For example: aString1 = "abcd", aString2 = "abc".
|
sl@0
|
806 |
ERightComparesLessAndIsNotPrefix For example: aString1 = "zzzz", aString2 = "aaa".
|
sl@0
|
807 |
@internalComponent
|
sl@0
|
808 |
*/
|
sl@0
|
809 |
TCollate::TComparisonResult TCollate::Compare(const TUint16 *aString1, TInt aLength1,
|
sl@0
|
810 |
const TUint16 *aString2, TInt aLength2,
|
sl@0
|
811 |
TInt aMaxLevel) const
|
sl@0
|
812 |
{
|
sl@0
|
813 |
TUTF32Iterator itL(aString1, aString1 + aLength1);
|
sl@0
|
814 |
TUTF32Iterator itR(aString2, aString2 + aLength2);
|
sl@0
|
815 |
return CompareKeySequences(itL, itR, aMaxLevel, 0, 0);
|
sl@0
|
816 |
}
|
sl@0
|
817 |
|
sl@0
|
818 |
/**
|
sl@0
|
819 |
Find the string beginning at aString2 of length aLength2 in the string
|
sl@0
|
820 |
beginning at aString1 of length aLength1.
|
sl@0
|
821 |
|
sl@0
|
822 |
@param aString1 String to search
|
sl@0
|
823 |
@param aLength1 Length of aString1
|
sl@0
|
824 |
@param aString2 String to search for
|
sl@0
|
825 |
@param aLength2 Length of aString2
|
sl@0
|
826 |
@param aMaxLevel Determines the tightness of the collation. At level 0, only
|
sl@0
|
827 |
character identities are distinguished. At level 1 accents are
|
sl@0
|
828 |
distinguished as well. At level 2 case is distinguishes as well. At
|
sl@0
|
829 |
level 3 all valid different Unicode characters are considered different.
|
sl@0
|
830 |
@param aString2WildChar Wild card character which may be specified for aString2. By default
|
sl@0
|
831 |
wild card character is not specified and not used.
|
sl@0
|
832 |
@return KErrNotFound aString2 not found in aString1.
|
sl@0
|
833 |
Non-negative value telling the position in aString1 where the first occurrence of
|
sl@0
|
834 |
aString2 was found.
|
sl@0
|
835 |
@internalComponent
|
sl@0
|
836 |
*/
|
sl@0
|
837 |
TInt TCollate::Find(const TUint16 *aString1, TInt aLength1,
|
sl@0
|
838 |
const TUint16 *aString2, TInt aLength2,
|
sl@0
|
839 |
TInt aMaxLevel, TUint aString2WildChar) const
|
sl@0
|
840 |
{
|
sl@0
|
841 |
TInt dummy(0);
|
sl@0
|
842 |
return Find(aString1, aLength1, aString2,aLength2, dummy, aMaxLevel,aString2WildChar );
|
sl@0
|
843 |
}
|
sl@0
|
844 |
|
sl@0
|
845 |
/**
|
sl@0
|
846 |
Find the string beginning at aString2 of length aLength2 in the string
|
sl@0
|
847 |
beginning at aString1 of length aLength1.
|
sl@0
|
848 |
|
sl@0
|
849 |
@param aString1 String to search
|
sl@0
|
850 |
@param aLength1 Length of aString1
|
sl@0
|
851 |
@param aString2 String to search for
|
sl@0
|
852 |
@param aLength2 Length of aString2
|
sl@0
|
853 |
@param aLengthFound A refernce to the length of the match found in the candidate string
|
sl@0
|
854 |
@param aMaxLevel Determines the tightness of the collation. At level 0, only
|
sl@0
|
855 |
character identities are distinguished. At level 1 accents are
|
sl@0
|
856 |
distinguished as well. At level 2 case is distinguishes as well. At
|
sl@0
|
857 |
level 3 all valid different Unicode characters are considered different.
|
sl@0
|
858 |
@param aString2WildChar Wild card character which may be specified for aString2. By default
|
sl@0
|
859 |
wild card character is not specified and not used.
|
sl@0
|
860 |
@return KErrNotFound aString2 not found in aString1.
|
sl@0
|
861 |
Non-negative value telling the position in aString1 where the first occurrence of
|
sl@0
|
862 |
aString2 was found.
|
sl@0
|
863 |
@internalComponent
|
sl@0
|
864 |
*/
|
sl@0
|
865 |
TInt TCollate::Find(const TUint16 *aString1, TInt aLength1,
|
sl@0
|
866 |
const TUint16 *aString2, TInt aLength2,
|
sl@0
|
867 |
TInt &aLengthFound, TInt aMaxLevel, TUint aString2WildChar) const
|
sl@0
|
868 |
{
|
sl@0
|
869 |
TUTF32Iterator itL(aString1, aString1 + aLength1);
|
sl@0
|
870 |
TUTF32Iterator itR(aString2, aString2 + aLength2);
|
sl@0
|
871 |
return FindKeySequence(itL, itR, aMaxLevel, aString2WildChar, 0, aLengthFound);
|
sl@0
|
872 |
}
|
sl@0
|
873 |
|
sl@0
|
874 |
/**
|
sl@0
|
875 |
Match the pattern defined by aSearchTerm with aCandidate.
|
sl@0
|
876 |
Return the index in aCandidate of the start of the first pattern matched -
|
sl@0
|
877 |
that is, the first character in aSearchTerm after all wild-sequence characters
|
sl@0
|
878 |
have been matched. Return KErrNotFound if there is no match.
|
sl@0
|
879 |
|
sl@0
|
880 |
For example, if aCandidate is "abcdefghijkl", the following values of aSearchTerm yield the
|
sl@0
|
881 |
following results:
|
sl@0
|
882 |
"abc*" gives 0
|
sl@0
|
883 |
"abc" gives KErrNotFound
|
sl@0
|
884 |
"xyz" gives KErrNotFound
|
sl@0
|
885 |
"*def" gives KErrNotFound
|
sl@0
|
886 |
"*def*" gives 3
|
sl@0
|
887 |
"*d?f*" gives 3
|
sl@0
|
888 |
"a*kl" gives 0
|
sl@0
|
889 |
"*d*kl" gives 4
|
sl@0
|
890 |
|
sl@0
|
891 |
To match a pattern anywhere in aCandidate, aSearchTerm must both start and end
|
sl@0
|
892 |
with aString2WildSequenceChar
|
sl@0
|
893 |
|
sl@0
|
894 |
@param aCandidate String to search
|
sl@0
|
895 |
@param aCandidateLength Length of aCandidate
|
sl@0
|
896 |
@param aSearchTerm String to search for
|
sl@0
|
897 |
@param aSearchTermLength Length of aSearchTerm
|
sl@0
|
898 |
@param aMaxLevel Determines the tightness of the collation. At level 0, only
|
sl@0
|
899 |
character identities are distinguished. At level 1 accents are
|
sl@0
|
900 |
distinguished as well. At level 2 case is distinguishes as well. At
|
sl@0
|
901 |
level 3 all valid different Unicode characters are considered different.
|
sl@0
|
902 |
@param aWildChar Wild card character which may be specified for aSearchTerm. By default
|
sl@0
|
903 |
the wild card character used is '?'.
|
sl@0
|
904 |
@param aWildSequenceChar Wild card sequence character which may be specified for aSearchTerm.
|
sl@0
|
905 |
Its default value is '*'.
|
sl@0
|
906 |
@param aEscapeChar Escape character. If it is non-zero and precdes aWildChar and aWildSequenceChar characters in
|
sl@0
|
907 |
aCandidate string, then these characters should be treated as normal characters.
|
sl@0
|
908 |
@return The index in aCandidate of the start of the first pattern matched.
|
sl@0
|
909 |
|
sl@0
|
910 |
@internalComponent.
|
sl@0
|
911 |
*/
|
sl@0
|
912 |
TInt TCollate::Match(const TUint16 *aCandidate, TInt aCandidateLength,
|
sl@0
|
913 |
const TUint16 *aSearchTerm,TInt aSearchTermLength,
|
sl@0
|
914 |
TInt aMaxLevel, TUint aWildChar, TUint aWildSequenceChar,
|
sl@0
|
915 |
TUint aEscapeChar) const
|
sl@0
|
916 |
{
|
sl@0
|
917 |
ASSERT(0 <= aSearchTermLength);
|
sl@0
|
918 |
ASSERT(0 <= aCandidateLength);
|
sl@0
|
919 |
|
sl@0
|
920 |
if(aMaxLevel == 3 && (iMethod.iFlags & TCollationMethod::EFoldCase))
|
sl@0
|
921 |
{
|
sl@0
|
922 |
aMaxLevel = 2;
|
sl@0
|
923 |
}
|
sl@0
|
924 |
|
sl@0
|
925 |
TUTF32Iterator candidate(aCandidate, aCandidate + aCandidateLength);
|
sl@0
|
926 |
TUTF32Iterator searchTerm(aSearchTerm, aSearchTerm + aSearchTermLength);
|
sl@0
|
927 |
|
sl@0
|
928 |
TInt firstMatch = KErrNotFound;
|
sl@0
|
929 |
TInt segEnd = ::FindCharacter(aWildSequenceChar, aEscapeChar, aSearchTerm, aSearchTermLength);
|
sl@0
|
930 |
|
sl@0
|
931 |
// Is there any prefix that the candidate string must have?
|
sl@0
|
932 |
// aSearchTerm looks like "abc*...". Then segEnd will be 3 (the position of '*').
|
sl@0
|
933 |
// Check that aCandidate begins with "abc" too.
|
sl@0
|
934 |
if(segEnd != 0 || aSearchTermLength == 0)
|
sl@0
|
935 |
{
|
sl@0
|
936 |
searchTerm = TUTF32Iterator(aSearchTerm, aSearchTerm + segEnd);
|
sl@0
|
937 |
TComparisonResult order = CompareKeySequences(candidate, searchTerm, aMaxLevel, aWildChar, aEscapeChar);
|
sl@0
|
938 |
if(order != ERightIsPrefixOfLeft && order != EStringsIdentical)
|
sl@0
|
939 |
{
|
sl@0
|
940 |
return KErrNotFound;
|
sl@0
|
941 |
}
|
sl@0
|
942 |
if(aSearchTermLength == segEnd)
|
sl@0
|
943 |
{
|
sl@0
|
944 |
return order == EStringsIdentical ? 0 : KErrNotFound;
|
sl@0
|
945 |
}
|
sl@0
|
946 |
firstMatch = 0;
|
sl@0
|
947 |
}
|
sl@0
|
948 |
|
sl@0
|
949 |
// search for all remaining segments
|
sl@0
|
950 |
// For example: aSearchTerm = "abc*def*ghi", aCandidate = "abc...".
|
sl@0
|
951 |
// aCandidate was already searched for "abc" and segEnd = 3.
|
sl@0
|
952 |
// Search aCandidate for the remaining segments: "def" and "ghi".
|
sl@0
|
953 |
while(aSearchTermLength != (segEnd + 1))
|
sl@0
|
954 |
{
|
sl@0
|
955 |
++segEnd;
|
sl@0
|
956 |
aSearchTermLength -= segEnd;
|
sl@0
|
957 |
aSearchTerm += segEnd;
|
sl@0
|
958 |
segEnd = ::FindCharacter(aWildSequenceChar, aEscapeChar, aSearchTerm, aSearchTermLength);
|
sl@0
|
959 |
searchTerm = TUTF32Iterator(aSearchTerm, aSearchTerm + segEnd);//searchTerm holds the next aSearchTerm segment
|
sl@0
|
960 |
//We will store here the current position of candidate string.
|
sl@0
|
961 |
const TUint16* candidateCurrentPos = candidate.CurrentPosition();
|
sl@0
|
962 |
TInt dummy(0);
|
sl@0
|
963 |
TInt match = FindKeySequence(candidate, searchTerm, aMaxLevel, aWildChar, aEscapeChar, dummy);
|
sl@0
|
964 |
if (match < 0)
|
sl@0
|
965 |
{
|
sl@0
|
966 |
return KErrNotFound;
|
sl@0
|
967 |
}
|
sl@0
|
968 |
if (aSearchTermLength == segEnd)
|
sl@0
|
969 |
{
|
sl@0
|
970 |
candidate.SetStart(candidateCurrentPos + match);
|
sl@0
|
971 |
TComparisonResult order = CompareKeySequences(candidate, searchTerm, aMaxLevel, aWildChar, aEscapeChar);
|
sl@0
|
972 |
if (order == EStringsIdentical)
|
sl@0
|
973 |
return firstMatch < 0 ? (match + candidateCurrentPos - aCandidate): firstMatch;
|
sl@0
|
974 |
while (match >= 0)
|
sl@0
|
975 |
{
|
sl@0
|
976 |
// We are at the very end of the search term, so this segment must
|
sl@0
|
977 |
// match the end of the candidate string.
|
sl@0
|
978 |
candidate.SetStart(candidateCurrentPos + match + 1);
|
sl@0
|
979 |
candidateCurrentPos = candidate.CurrentPosition();
|
sl@0
|
980 |
match = FindKeySequence(candidate, searchTerm, aMaxLevel, aWildChar, aEscapeChar, dummy);
|
sl@0
|
981 |
candidate.SetStart(candidateCurrentPos + match);
|
sl@0
|
982 |
order = CompareKeySequences(candidate, searchTerm, aMaxLevel, aWildChar, aEscapeChar);
|
sl@0
|
983 |
if (order == EStringsIdentical)
|
sl@0
|
984 |
return firstMatch < 0 ? (match + candidateCurrentPos - aCandidate): firstMatch;
|
sl@0
|
985 |
}
|
sl@0
|
986 |
return KErrNotFound;
|
sl@0
|
987 |
}
|
sl@0
|
988 |
//Initialize the first match position, if not initialized yet
|
sl@0
|
989 |
if (firstMatch < 0 && segEnd != 0)
|
sl@0
|
990 |
{
|
sl@0
|
991 |
firstMatch = match;
|
sl@0
|
992 |
}
|
sl@0
|
993 |
}
|
sl@0
|
994 |
return firstMatch < 0 ? aCandidateLength : firstMatch;
|
sl@0
|
995 |
}
|
sl@0
|
996 |
|
sl@0
|
997 |
/**
|
sl@0
|
998 |
Compare values output from the iterators. After the comparison, if
|
sl@0
|
999 |
ERightIsPrefixOfLeft or EStringsIdentical is returned, then aLeft
|
sl@0
|
1000 |
will be pointing at the next character (at MaxLevel) after the match.
|
sl@0
|
1001 |
If right is shown to be a prefix of left, this means that it has been
|
sl@0
|
1002 |
checked at all requested levels. If it is reported that the right is a
|
sl@0
|
1003 |
prefix of the left, then this will mean also that there are no unmatched
|
sl@0
|
1004 |
combining characters on the left.
|
sl@0
|
1005 |
|
sl@0
|
1006 |
@internalComponent
|
sl@0
|
1007 |
*/
|
sl@0
|
1008 |
TCollate::TComparisonResult TCollate::CompareKeySequences(TUTF32Iterator& aLeft, TUTF32Iterator& aRight,
|
sl@0
|
1009 |
TInt aMaxLevel, TInt aRightStringWildChar, TInt aEscapeChar) const
|
sl@0
|
1010 |
{
|
sl@0
|
1011 |
// Clamp the maximum level of the comparison.
|
sl@0
|
1012 |
if(aMaxLevel < 0)
|
sl@0
|
1013 |
{
|
sl@0
|
1014 |
aMaxLevel = 0;
|
sl@0
|
1015 |
}
|
sl@0
|
1016 |
if(aMaxLevel > 3)
|
sl@0
|
1017 |
{
|
sl@0
|
1018 |
aMaxLevel = 3;
|
sl@0
|
1019 |
}
|
sl@0
|
1020 |
//Case folding forces the maximum level to 2. Case folding could only be done at level 3, which
|
sl@0
|
1021 |
//makes use of the actual Unicode values, if we had access to a case conversion table appropriate for
|
sl@0
|
1022 |
//the collation method.
|
sl@0
|
1023 |
if(aMaxLevel == 3 && (iMethod.iFlags & TCollationMethod::EFoldCase))
|
sl@0
|
1024 |
{
|
sl@0
|
1025 |
aMaxLevel = 2;
|
sl@0
|
1026 |
}
|
sl@0
|
1027 |
TCollationValueIterator itL(iMethod);
|
sl@0
|
1028 |
TCollationValueIterator itR(iMethod);
|
sl@0
|
1029 |
// Perform the comparison.
|
sl@0
|
1030 |
TComparisonResult order = EStringsIdentical;
|
sl@0
|
1031 |
TComparisonResult accumulatedOrder = EStringsIdentical;
|
sl@0
|
1032 |
const TText16* endOfLeft = 0;
|
sl@0
|
1033 |
for (int cur_level = 0; cur_level <= aMaxLevel; cur_level++)
|
sl@0
|
1034 |
{
|
sl@0
|
1035 |
itL.SetSourceIt(aLeft);
|
sl@0
|
1036 |
itR.SetSourceIt(aRight);
|
sl@0
|
1037 |
|
sl@0
|
1038 |
for (;;)
|
sl@0
|
1039 |
{
|
sl@0
|
1040 |
TUint32 c2 = itR.GetNextNonZeroKey(cur_level);
|
sl@0
|
1041 |
if (c2 == 0)
|
sl@0
|
1042 |
{
|
sl@0
|
1043 |
TUint32 more = itL.GetNextNonZeroKey(cur_level);
|
sl@0
|
1044 |
if (cur_level == 0)
|
sl@0
|
1045 |
endOfLeft = itL.CurrentPositionIfAtCharacter();
|
sl@0
|
1046 |
if (more == 0)
|
sl@0
|
1047 |
{//No non-zero keys at all
|
sl@0
|
1048 |
order = EStringsIdentical;
|
sl@0
|
1049 |
}
|
sl@0
|
1050 |
else if (!(TCollationMethod::EIgnoreCombining & iMethod.iFlags)
|
sl@0
|
1051 |
&& itL.AtCombiningCharacter())
|
sl@0
|
1052 |
{
|
sl@0
|
1053 |
order = ERightComparesLessAndIsNotPrefix;
|
sl@0
|
1054 |
}
|
sl@0
|
1055 |
else
|
sl@0
|
1056 |
{
|
sl@0
|
1057 |
order = ERightIsPrefixOfLeft;
|
sl@0
|
1058 |
}
|
sl@0
|
1059 |
break;
|
sl@0
|
1060 |
}
|
sl@0
|
1061 |
TUint32 c1 = itL.GetNextNonZeroKey(cur_level);
|
sl@0
|
1062 |
if (c1 == 0)
|
sl@0
|
1063 |
{
|
sl@0
|
1064 |
order = ELeftIsPrefixOfRight;
|
sl@0
|
1065 |
break;
|
sl@0
|
1066 |
}
|
sl@0
|
1067 |
|
sl@0
|
1068 |
itL.Increment();
|
sl@0
|
1069 |
if(cur_level == 0 && aEscapeChar != 0 && itR.MatchChar(aEscapeChar))
|
sl@0
|
1070 |
{//Escape character found. Get the next key.
|
sl@0
|
1071 |
c2 = itR.GetNextNonZeroKey(cur_level);
|
sl@0
|
1072 |
itR.Increment();
|
sl@0
|
1073 |
}
|
sl@0
|
1074 |
else
|
sl@0
|
1075 |
{
|
sl@0
|
1076 |
if(aRightStringWildChar && itR.MatchChar(aRightStringWildChar))
|
sl@0
|
1077 |
{
|
sl@0
|
1078 |
itL.SkipCombiningCharacters();
|
sl@0
|
1079 |
itR.SkipCombiningCharacters();
|
sl@0
|
1080 |
c1 = c2;
|
sl@0
|
1081 |
}
|
sl@0
|
1082 |
else
|
sl@0
|
1083 |
{
|
sl@0
|
1084 |
itR.Increment();
|
sl@0
|
1085 |
}
|
sl@0
|
1086 |
}
|
sl@0
|
1087 |
|
sl@0
|
1088 |
// Has an order been determined by key difference?
|
sl@0
|
1089 |
if (c1 != c2)
|
sl@0
|
1090 |
{
|
sl@0
|
1091 |
// Fold to lower case, or switch ordering for case or kana syllabary if necessary.
|
sl@0
|
1092 |
if (cur_level == 2 && (c1 <= (0x14 * 4) && c2 <= (0x14 * 4)))
|
sl@0
|
1093 |
{
|
sl@0
|
1094 |
// Divide keys by 4 to get them back into the range 0..63
|
sl@0
|
1095 |
// because keys returned by GetKey are masked but not shifted.
|
sl@0
|
1096 |
c1 /= 4;
|
sl@0
|
1097 |
c2 /= 4;
|
sl@0
|
1098 |
ProcessKeys(c1, c2, iMethod.iFlags);
|
sl@0
|
1099 |
}
|
sl@0
|
1100 |
if (c1 != c2) // test equality again because case folding might have made them equal
|
sl@0
|
1101 |
{
|
sl@0
|
1102 |
order = c1 > c2 ? ERightComparesLessAndIsNotPrefix : ELeftComparesLessAndIsNotPrefix;
|
sl@0
|
1103 |
TBool backwards = cur_level == 1 && (iMethod.iFlags & TCollationMethod::EAccentsBackwards);
|
sl@0
|
1104 |
if (order && !backwards)
|
sl@0
|
1105 |
{
|
sl@0
|
1106 |
break;
|
sl@0
|
1107 |
}
|
sl@0
|
1108 |
}
|
sl@0
|
1109 |
}
|
sl@0
|
1110 |
}
|
sl@0
|
1111 |
if (accumulatedOrder != order && order != EStringsIdentical)
|
sl@0
|
1112 |
{
|
sl@0
|
1113 |
if (accumulatedOrder == ERightIsPrefixOfLeft)
|
sl@0
|
1114 |
{
|
sl@0
|
1115 |
return ERightComparesLessAndIsNotPrefix;
|
sl@0
|
1116 |
}
|
sl@0
|
1117 |
else if (accumulatedOrder == ELeftIsPrefixOfRight)
|
sl@0
|
1118 |
{
|
sl@0
|
1119 |
return ELeftComparesLessAndIsNotPrefix;
|
sl@0
|
1120 |
}
|
sl@0
|
1121 |
else
|
sl@0
|
1122 |
{
|
sl@0
|
1123 |
// accumulatedOrder == EStringsIdentical
|
sl@0
|
1124 |
if (order == ELeftComparesLessAndIsNotPrefix || order == ERightComparesLessAndIsNotPrefix)
|
sl@0
|
1125 |
{
|
sl@0
|
1126 |
return order;
|
sl@0
|
1127 |
}
|
sl@0
|
1128 |
}
|
sl@0
|
1129 |
accumulatedOrder = order;
|
sl@0
|
1130 |
}
|
sl@0
|
1131 |
}
|
sl@0
|
1132 |
|
sl@0
|
1133 |
if (accumulatedOrder == EStringsIdentical || accumulatedOrder == ERightIsPrefixOfLeft)
|
sl@0
|
1134 |
{
|
sl@0
|
1135 |
if (endOfLeft)
|
sl@0
|
1136 |
{
|
sl@0
|
1137 |
aLeft.SetStart(endOfLeft);
|
sl@0
|
1138 |
}
|
sl@0
|
1139 |
else if (accumulatedOrder == ERightIsPrefixOfLeft)
|
sl@0
|
1140 |
{
|
sl@0
|
1141 |
accumulatedOrder = ERightComparesLessAndIsNotPrefix;
|
sl@0
|
1142 |
}
|
sl@0
|
1143 |
}
|
sl@0
|
1144 |
return accumulatedOrder;
|
sl@0
|
1145 |
}
|
sl@0
|
1146 |
|
sl@0
|
1147 |
/**
|
sl@0
|
1148 |
Finds search term inside candidate string. Returns KErrNotFound if there
|
sl@0
|
1149 |
is no match, returns the offset into the candidate string at which the
|
sl@0
|
1150 |
search term was found. If a string was found, the search term iterator is left
|
sl@0
|
1151 |
pointing at the end of the search term, and the candidate iterator is
|
sl@0
|
1152 |
left pointing just after the matched keys. aMatchPos returns where in
|
sl@0
|
1153 |
the candidate string the match was found.
|
sl@0
|
1154 |
|
sl@0
|
1155 |
@internalComponent
|
sl@0
|
1156 |
*/
|
sl@0
|
1157 |
TInt TCollate::FindKeySequence(TUTF32Iterator& aCandidate, TUTF32Iterator& aSearchTerm,
|
sl@0
|
1158 |
TInt aMaxLevel, TInt aWildChar, TInt aEscapeChar, TInt& aLengthFound) const
|
sl@0
|
1159 |
{
|
sl@0
|
1160 |
TInt matchOffset = 0;
|
sl@0
|
1161 |
//Save the start of the candidate string
|
sl@0
|
1162 |
const TText* candidateStart = aCandidate.CurrentPosition();
|
sl@0
|
1163 |
//Create copies of aCandidate and aSearchTerm
|
sl@0
|
1164 |
TUTF32Iterator candidateCopy(aCandidate);
|
sl@0
|
1165 |
TUTF32Iterator searchTermCopy(aSearchTerm);
|
sl@0
|
1166 |
aLengthFound = KErrNotFound;
|
sl@0
|
1167 |
//Do the search
|
sl@0
|
1168 |
for(;;)
|
sl@0
|
1169 |
{
|
sl@0
|
1170 |
TComparisonResult order = CompareKeySequences(aCandidate, aSearchTerm, aMaxLevel, aWildChar, aEscapeChar);
|
sl@0
|
1171 |
if(order == ELeftIsPrefixOfRight)
|
sl@0
|
1172 |
{
|
sl@0
|
1173 |
return KErrNotFound;
|
sl@0
|
1174 |
}
|
sl@0
|
1175 |
if(order == ERightIsPrefixOfLeft || order == EStringsIdentical)
|
sl@0
|
1176 |
{
|
sl@0
|
1177 |
aLengthFound = (aCandidate.CurrentPosition() - candidateStart) - matchOffset;
|
sl@0
|
1178 |
return matchOffset;
|
sl@0
|
1179 |
}
|
sl@0
|
1180 |
|
sl@0
|
1181 |
aCandidate = candidateCopy;
|
sl@0
|
1182 |
aCandidate.Next();
|
sl@0
|
1183 |
::SkipCombiningCharacters(aCandidate);
|
sl@0
|
1184 |
candidateCopy = aCandidate;
|
sl@0
|
1185 |
|
sl@0
|
1186 |
matchOffset = aCandidate.CurrentPosition() - candidateStart;
|
sl@0
|
1187 |
|
sl@0
|
1188 |
aSearchTerm = searchTermCopy;
|
sl@0
|
1189 |
}
|
sl@0
|
1190 |
}
|