os/kernelhwsrv/kernel/eka/euser/unicode/Compare.cpp
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
// Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     2
// All rights reserved.
sl@0
     3
// This component and the accompanying materials are made available
sl@0
     4
// under the terms of the License "Eclipse Public License v1.0"
sl@0
     5
// which accompanies this distribution, and is available
sl@0
     6
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     7
//
sl@0
     8
// Initial Contributors:
sl@0
     9
// Nokia Corporation - initial contribution.
sl@0
    10
//
sl@0
    11
// Contributors:
sl@0
    12
//
sl@0
    13
// Description:
sl@0
    14
// Folding and decomposition implementation
sl@0
    15
// 
sl@0
    16
//
sl@0
    17
sl@0
    18
#include "FoldDecomp.inl"
sl@0
    19
#include "CompareImp.h"
sl@0
    20
#include "u32std.h"
sl@0
    21
sl@0
    22
#define ARRAY_LENGTH(a) (static_cast<TInt>(sizeof(a)/sizeof(a[0])))
sl@0
    23
sl@0
    24
////////////////////////////////////////////////////////////////////////////////////////////
sl@0
    25
// Global functions
sl@0
    26
////////////////////////////////////////////////////////////////////////////////////////////
sl@0
    27
sl@0
    28
/**
sl@0
    29
@internalComponent
sl@0
    30
*/
sl@0
    31
TChar UTF16ToChar(const TText16* a)
sl@0
    32
	{
sl@0
    33
	if (0xD800 <= a[0])
sl@0
    34
		{
sl@0
    35
		if (a[0] < 0xE000)
sl@0
    36
			{
sl@0
    37
            if (a[0] < 0xDC00 && ::IsLowSurrogate(a[1]))
sl@0
    38
				{
sl@0
    39
                TChar c = ::PairSurrogates(a[0], a[1]);
sl@0
    40
				if ((c & 0xFFFE) != 0xFFFE)
sl@0
    41
					return c;
sl@0
    42
				}
sl@0
    43
			return 0xFFFF;
sl@0
    44
			}
sl@0
    45
		if (a[0] == 0xFFFE)
sl@0
    46
			return 0xFFFF;
sl@0
    47
		}
sl@0
    48
	return a[0];
sl@0
    49
	}
sl@0
    50
sl@0
    51
/**
sl@0
    52
Is a character a base character (ETrue) or a combiner (EFalse)?
sl@0
    53
For now, we will treat all control characters as base characters.
sl@0
    54
@internalComponent
sl@0
    55
*/
sl@0
    56
TBool IsBaseCharacter(TChar a)
sl@0
    57
	{
sl@0
    58
	if(a < 0x220)
sl@0
    59
        {
sl@0
    60
		// These Unicode characters are all assigned
sl@0
    61
		// and none of them is a combining character
sl@0
    62
		return ETrue;
sl@0
    63
        }
sl@0
    64
	return (a.GetCategory() & 0xFFF0) - TChar::EMarkGroup;
sl@0
    65
	}
sl@0
    66
sl@0
    67
/**
sl@0
    68
@internalComponent
sl@0
    69
*/
sl@0
    70
inline TInt GetDecompositionIndex(TChar a)
sl@0
    71
	{
sl@0
    72
    TInt i = DecompositionHashStart(a);
sl@0
    73
	TUint32 v = KUnicodeToIndexHash[i];
sl@0
    74
	if (!v)
sl@0
    75
		return -1;
sl@0
    76
	if ((v & 0xFFFFF) != a)
sl@0
    77
		{
sl@0
    78
        TInt step = DecompositionHashStep(a);
sl@0
    79
		do	{
sl@0
    80
			i = (i + step) & KDecompositionHashBitmask;
sl@0
    81
			v = KUnicodeToIndexHash[i];
sl@0
    82
			if (!v)
sl@0
    83
				return -1;
sl@0
    84
			} while ((v & 0xFFFFF) != a);
sl@0
    85
		}
sl@0
    86
// it is important that this is an unsigned shift
sl@0
    87
	return static_cast<TInt>(v >> 20);
sl@0
    88
	}
sl@0
    89
sl@0
    90
/**
sl@0
    91
Will not work if an invalid index is passed.
sl@0
    92
@internalComponent
sl@0
    93
*/
sl@0
    94
static TUTF32Iterator GetFoldedDecomposition(TInt aIndex)
sl@0
    95
	{
sl@0
    96
	TInt singletonIndex = aIndex - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2);
sl@0
    97
	if (0 <= singletonIndex)
sl@0
    98
		return TUTF32Iterator(KSingletonFolds + singletonIndex);
sl@0
    99
	const TText* start = KNonSingletonFolds + aIndex * 2;
sl@0
   100
	if (*start != KLongD)
sl@0
   101
		return TUTF32Iterator(start, start + 2,
sl@0
   102
			TUTF32Iterator::EStartsWithValidCharacter);
sl@0
   103
	TInt longDecompIndex = start[1];
sl@0
   104
	start = KLongDecompositions + (longDecompIndex & 0xFFF);
sl@0
   105
	return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3,
sl@0
   106
		TUTF32Iterator::EStartsWithValidCharacter);
sl@0
   107
	}
sl@0
   108
sl@0
   109
/**
sl@0
   110
@internalComponent
sl@0
   111
*/
sl@0
   112
static TChar GetFirstFoldedChar(TChar a)
sl@0
   113
	{
sl@0
   114
    TInt index = ::GetDecompositionIndex(a);
sl@0
   115
    return index < 0? a : ::GetFoldedDecomposition(index).Current();
sl@0
   116
	}
sl@0
   117
sl@0
   118
/**
sl@0
   119
@internalComponent
sl@0
   120
*/
sl@0
   121
static TBool FirstFoldedCodeIsNot(TInt aNonSurrogate, TInt aFoldedNonSurrogate)
sl@0
   122
	{
sl@0
   123
    TInt index = ::GetDecompositionIndex(aNonSurrogate);
sl@0
   124
	if (index < 0)
sl@0
   125
		return aNonSurrogate - aFoldedNonSurrogate;
sl@0
   126
	TInt singletonIndex = index - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2);
sl@0
   127
	if (0 < singletonIndex)
sl@0
   128
		return KSingletonFolds[singletonIndex] - aFoldedNonSurrogate;
sl@0
   129
	const TText* start = KNonSingletonFolds + index * 2;
sl@0
   130
	if (start[0] == KLongD)
sl@0
   131
		start = KLongDecompositions + (start[1] & 0xFFF);
sl@0
   132
	return *start - aFoldedNonSurrogate;
sl@0
   133
	}
sl@0
   134
sl@0
   135
/**
sl@0
   136
@internalComponent
sl@0
   137
*/
sl@0
   138
static TInt GetClass(TFoldedDecompIterator& a)
sl@0
   139
	{
sl@0
   140
	ASSERT(!a.AtEnd());
sl@0
   141
	a.EnterFoldedSequence();
sl@0
   142
	TChar ch = a.Current();
sl@0
   143
	TInt cl = ch.GetCombiningClass();
sl@0
   144
	if (cl == 0)
sl@0
   145
		// Assume starters have been folded from ypogegrammeni
sl@0
   146
		cl = 240;
sl@0
   147
	return cl;
sl@0
   148
	}
sl@0
   149
sl@0
   150
////////////////////////////////////////////////////////////////////////////////////////////
sl@0
   151
// TUTF32Iterator
sl@0
   152
////////////////////////////////////////////////////////////////////////////////////////////
sl@0
   153
sl@0
   154
/**
sl@0
   155
@internalComponent
sl@0
   156
*/
sl@0
   157
void TUTF32Iterator::Next()
sl@0
   158
	{
sl@0
   159
	ASSERT(iStart != iEnd);
sl@0
   160
	while (++iStart != iEnd)
sl@0
   161
		{
sl@0
   162
        iCurrent = ::UTF16ToChar(iStart);
sl@0
   163
		if (iCurrent != 0xFFFF)
sl@0
   164
			return;
sl@0
   165
		}
sl@0
   166
	}
sl@0
   167
sl@0
   168
/**
sl@0
   169
Locates a base character in a string using a folded comparision. Will not find combining 
sl@0
   170
characters, nor will it consider Korean combining Jamo to be equivalent to Hangul.
sl@0
   171
@internalComponent
sl@0
   172
*/
sl@0
   173
TBool TUTF32Iterator::LocateFoldedBaseCharacter(TChar aChar)
sl@0
   174
	{
sl@0
   175
	// A quick shortcut for simple rejections
sl@0
   176
	if (aChar < 0xFFFF)
sl@0
   177
		{
sl@0
   178
        while (iStart != iEnd && *iStart < 0xD800 && ::FirstFoldedCodeIsNot(*iStart, aChar))
sl@0
   179
			++iStart;
sl@0
   180
		if (iStart != iEnd)
sl@0
   181
			{
sl@0
   182
            iCurrent = ::UTF16ToChar(iStart);
sl@0
   183
			if (iCurrent == 0xFFFF)
sl@0
   184
				Next();
sl@0
   185
			}
sl@0
   186
		}
sl@0
   187
	while (!AtEnd())
sl@0
   188
		{
sl@0
   189
        TChar foldedChar = ::GetFirstFoldedChar(iCurrent);
sl@0
   190
		if (aChar == foldedChar)
sl@0
   191
			return ETrue;
sl@0
   192
		Next();
sl@0
   193
		}
sl@0
   194
	return EFalse;
sl@0
   195
	}
sl@0
   196
sl@0
   197
////////////////////////////////////////////////////////////////////////////////////////////
sl@0
   198
// TFoldedDecompIterator
sl@0
   199
////////////////////////////////////////////////////////////////////////////////////////////
sl@0
   200
sl@0
   201
/**
sl@0
   202
@internalComponent
sl@0
   203
*/
sl@0
   204
TFoldedDecompIterator::TFoldedDecompIterator(const TUTF32Iterator& a)
sl@0
   205
	{
sl@0
   206
	Set(a);
sl@0
   207
	}
sl@0
   208
sl@0
   209
/**
sl@0
   210
@internalComponent
sl@0
   211
*/
sl@0
   212
TBool TFoldedDecompIterator::AtEnd() const
sl@0
   213
	{
sl@0
   214
	return iOriginal.AtEnd();
sl@0
   215
	}
sl@0
   216
sl@0
   217
/**
sl@0
   218
@internalComponent
sl@0
   219
*/
sl@0
   220
TBool TFoldedDecompIterator::AtEndOrWildcard() const
sl@0
   221
	{
sl@0
   222
	// neither '?' nor '*' have decomposition sequences, so we can assume that
sl@0
   223
	// if we are pointing at one or the other, we don't need to check if we
sl@0
   224
	// are in a decomposition sequence or not.
sl@0
   225
	return iOriginal.AtEnd() || iOriginal.Current() == '?' || iOriginal.Current() == '*';
sl@0
   226
	}
sl@0
   227
sl@0
   228
/**
sl@0
   229
@internalComponent
sl@0
   230
*/
sl@0
   231
TBool TFoldedDecompIterator::EnterFoldedSequence()
sl@0
   232
	{
sl@0
   233
	ASSERT(!AtEnd());
sl@0
   234
	return !IsInFoldedSequence() && StrictEnterFoldedSequence();
sl@0
   235
	}
sl@0
   236
sl@0
   237
/**
sl@0
   238
Enter folded sequence, assuming that we are not already in one
sl@0
   239
@internalComponent
sl@0
   240
*/
sl@0
   241
TBool TFoldedDecompIterator::StrictEnterFoldedSequence()
sl@0
   242
	{
sl@0
   243
	ASSERT(!AtEnd());
sl@0
   244
	ASSERT(!IsInFoldedSequence());
sl@0
   245
    TInt index = ::GetDecompositionIndex(iOriginal.Current());
sl@0
   246
	if (index < 0)
sl@0
   247
		return EFalse;
sl@0
   248
	iFolded = ::GetFoldedDecomposition(index);
sl@0
   249
	return ETrue;
sl@0
   250
	}
sl@0
   251
sl@0
   252
/**
sl@0
   253
An iota might have folded from a combining ypogegrammeni.
sl@0
   254
If the current character is a base character, this function will
sl@0
   255
detect whether it was folded from a combining character (and
sl@0
   256
therefore does not constitute a grapheme boundary).
sl@0
   257
@internalComponent
sl@0
   258
*/
sl@0
   259
TBool TFoldedDecompIterator::CurrentIsBaseFoldedFromCombiner() const
sl@0
   260
	{
sl@0
   261
	if (!IsInFoldedSequence())
sl@0
   262
		return EFalse;
sl@0
   263
	// The only character that does this is Ypogerammeni, which folds to iota
sl@0
   264
	if (iFolded.Current() != 0x3B9)
sl@0
   265
		return EFalse;
sl@0
   266
	// If the unfolded character is a combiner then it cannot contain an iota,
sl@0
   267
	// so it must be an ypogegrammeni that has been folded to one.
sl@0
   268
	// This will catch 0x345, the ypogegrammeni itself.
sl@0
   269
    if (!::IsBaseCharacter(iOriginal.Current()))
sl@0
   270
		return ETrue;
sl@0
   271
	// Otherwise the base character will be at the start of the decomposition
sl@0
   272
	// sequence. We will assume that it is folded from a genuine iota if and
sl@0
   273
	// only if there is an iota at the start of the folded decomposition
sl@0
   274
	// sequence. (In theory there might be an iota with a combining
sl@0
   275
	// ypogegrammeni, but in practice this will not occur).
sl@0
   276
	TInt index = ::GetDecompositionIndex(iOriginal.Current());
sl@0
   277
	ASSERT(0 <= index);
sl@0
   278
	TUTF32Iterator folded = ::GetFoldedDecomposition(index);
sl@0
   279
	return folded.Current() != 0x3B9;
sl@0
   280
	}
sl@0
   281
sl@0
   282
/**
sl@0
   283
@internalComponent
sl@0
   284
*/
sl@0
   285
TChar TFoldedDecompIterator::Current() const
sl@0
   286
	{
sl@0
   287
	ASSERT(!AtEnd());
sl@0
   288
	return IsInFoldedSequence()? iFolded.Current() : iOriginal.Current();
sl@0
   289
	}
sl@0
   290
sl@0
   291
/** 
sl@0
   292
Move past this code if it matches unfolded or folded 
sl@0
   293
@internalComponent
sl@0
   294
*/
sl@0
   295
TBool TFoldedDecompIterator::Match(TChar aCode)
sl@0
   296
	{
sl@0
   297
	ASSERT(!AtEnd());
sl@0
   298
	if (!IsInFoldedSequence())
sl@0
   299
		{
sl@0
   300
		if (aCode == iOriginal.Current())
sl@0
   301
			{
sl@0
   302
			iOriginal.Next();
sl@0
   303
			return ETrue;
sl@0
   304
			}
sl@0
   305
		if (!StrictEnterFoldedSequence())
sl@0
   306
			return EFalse;
sl@0
   307
		}
sl@0
   308
	ASSERT(IsInFoldedSequence());
sl@0
   309
	if (aCode == iFolded.Current())
sl@0
   310
		{
sl@0
   311
		iFolded.Next();
sl@0
   312
		if (iFolded.AtEnd())
sl@0
   313
			iOriginal.Next();
sl@0
   314
		return ETrue;
sl@0
   315
		}
sl@0
   316
	return EFalse;
sl@0
   317
	}
sl@0
   318
sl@0
   319
/** 
sl@0
   320
Move this and argument past matching character. 
sl@0
   321
@internalComponent
sl@0
   322
*/
sl@0
   323
TBool TFoldedDecompIterator::Match(TFoldedDecompIterator& aThat)
sl@0
   324
	{
sl@0
   325
	ASSERT(!AtEnd());
sl@0
   326
	if (!IsInFoldedSequence())
sl@0
   327
		{
sl@0
   328
		if ( aThat.Match(iOriginal.Current()) )
sl@0
   329
			{
sl@0
   330
			iOriginal.Next();
sl@0
   331
			return ETrue;
sl@0
   332
			}
sl@0
   333
		if (!StrictEnterFoldedSequence())
sl@0
   334
			return EFalse;
sl@0
   335
		}
sl@0
   336
	ASSERT(IsInFoldedSequence());
sl@0
   337
	if ( aThat.Match(iFolded.Current()) )
sl@0
   338
		{
sl@0
   339
		iFolded.Next();
sl@0
   340
		if (iFolded.AtEnd())
sl@0
   341
			iOriginal.Next();
sl@0
   342
		return ETrue;
sl@0
   343
		}
sl@0
   344
	return EFalse;
sl@0
   345
	}
sl@0
   346
sl@0
   347
/** 
sl@0
   348
@internalComponent
sl@0
   349
*/
sl@0
   350
void TFoldedDecompIterator::Next()
sl@0
   351
	{
sl@0
   352
	ASSERT(!AtEnd());
sl@0
   353
	if (IsInFoldedSequence())
sl@0
   354
		{
sl@0
   355
		iFolded.Next();
sl@0
   356
		if (IsInFoldedSequence())
sl@0
   357
			return;
sl@0
   358
		}
sl@0
   359
	iOriginal.Next();
sl@0
   360
	}
sl@0
   361
sl@0
   362
////////////////////////////////////////////////////////////////////////////////////////////
sl@0
   363
// TFoldedSortedDecompIterator
sl@0
   364
////////////////////////////////////////////////////////////////////////////////////////////
sl@0
   365
sl@0
   366
/**
sl@0
   367
Set this iterator to iterate over the next combining characters.
sl@0
   368
Iotas in folded sequences are assumed to be character class 240
sl@0
   369
(combining ypogegrammeni). Next() must be used once before
sl@0
   370
the first call to Current(), as long as AtEnd() returns false.
sl@0
   371
@param aBase Sets the start of the iteration. This value is advanced to the
sl@0
   372
             end of the iteration.
sl@0
   373
@return The number of codes in the iteration.
sl@0
   374
@internalComponent
sl@0
   375
*/
sl@0
   376
TInt TFoldedSortedDecompIterator::Set(TFoldedDecompIterator &aBase)
sl@0
   377
	{
sl@0
   378
	iStart = aBase;
sl@0
   379
	TInt length = 0;
sl@0
   380
	iCurrentClass = 256;
sl@0
   381
sl@0
   382
	const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet;
sl@0
   383
sl@0
   384
	// Find the next starter (i.e. character with combining class 0).
sl@0
   385
	// We must not count iota folded from ypogegrammeni.
sl@0
   386
	// Ypogegrammeni has combining class 240.
sl@0
   387
	// Iota has combining class 0.
sl@0
   388
	// Also we will be searching for the character with the smallest
sl@0
   389
	// combining class to start at.
sl@0
   390
	while (!aBase.AtEnd())
sl@0
   391
		{
sl@0
   392
		aBase.EnterFoldedSequence();
sl@0
   393
		TChar ch = aBase.Current();
sl@0
   394
		TInt cl = TUnicode(TUint(ch)).GetCombiningClass(charDataSet);
sl@0
   395
		if (cl == 0)
sl@0
   396
			{
sl@0
   397
			if (aBase.CurrentIsBaseFoldedFromCombiner())
sl@0
   398
				cl = 240;
sl@0
   399
			else
sl@0
   400
				break;
sl@0
   401
			}
sl@0
   402
		if (cl < iCurrentClass)
sl@0
   403
			{
sl@0
   404
			iCurrentClass = cl;
sl@0
   405
			iCurrent = aBase;
sl@0
   406
			iCurrentCount = length;
sl@0
   407
			}
sl@0
   408
		++length;
sl@0
   409
		aBase.Next();
sl@0
   410
		}
sl@0
   411
	iRemaining = length;
sl@0
   412
	iLength = length;
sl@0
   413
	ASSERT(iLength == 0 || iCurrentClass < 256);
sl@0
   414
	return length;
sl@0
   415
	}
sl@0
   416
sl@0
   417
/** 
sl@0
   418
Set this iterator so that AtEnd() returns ETrue. 
sl@0
   419
@internalComponent
sl@0
   420
*/
sl@0
   421
void TFoldedSortedDecompIterator::Set()
sl@0
   422
	{
sl@0
   423
	iRemaining = 0;
sl@0
   424
	}
sl@0
   425
sl@0
   426
/** 
sl@0
   427
@internalComponent
sl@0
   428
*/
sl@0
   429
TBool TFoldedSortedDecompIterator::AtEnd() const
sl@0
   430
	{
sl@0
   431
	return iRemaining == 0;
sl@0
   432
	}
sl@0
   433
sl@0
   434
/** 
sl@0
   435
@internalComponent
sl@0
   436
*/
sl@0
   437
TChar TFoldedSortedDecompIterator::Current() const
sl@0
   438
	{
sl@0
   439
	ASSERT(!AtEnd());
sl@0
   440
	return iCurrent.Current();
sl@0
   441
	}
sl@0
   442
sl@0
   443
/** 
sl@0
   444
@internalComponent
sl@0
   445
*/
sl@0
   446
void TFoldedSortedDecompIterator::Next()
sl@0
   447
	{
sl@0
   448
	ASSERT(!AtEnd());
sl@0
   449
	--iRemaining;
sl@0
   450
	while (++iCurrentCount != iLength)
sl@0
   451
		{
sl@0
   452
		iCurrent.Next();
sl@0
   453
        if (::GetClass(iCurrent) == iCurrentClass)
sl@0
   454
			return;
sl@0
   455
		}
sl@0
   456
	// We have not found any more of the same class, so we will look
sl@0
   457
	// for the smallest one larger.
sl@0
   458
	TInt minClass = 256;
sl@0
   459
	TFoldedDecompIterator searcher(iStart);
sl@0
   460
	TInt searchCount = 0;
sl@0
   461
	while (searchCount != iLength)
sl@0
   462
		{
sl@0
   463
        TInt cl = ::GetClass(searcher);
sl@0
   464
		if (iCurrentClass < cl && cl < minClass)
sl@0
   465
			{
sl@0
   466
			minClass = cl;
sl@0
   467
			iCurrentCount = searchCount;
sl@0
   468
			iCurrent = searcher;
sl@0
   469
			}
sl@0
   470
		++searchCount;
sl@0
   471
		searcher.Next();
sl@0
   472
		}
sl@0
   473
	iCurrentClass = minClass;
sl@0
   474
	ASSERT(minClass < 256 || AtEnd());
sl@0
   475
	}
sl@0
   476
sl@0
   477
////////////////////////////////////////////////////////////////////////////////////////////
sl@0
   478
// TFoldedCanonicalIterator
sl@0
   479
////////////////////////////////////////////////////////////////////////////////////////////
sl@0
   480
sl@0
   481
/** 
sl@0
   482
@internalComponent
sl@0
   483
*/
sl@0
   484
TFoldedCanonicalIterator::TFoldedCanonicalIterator(const TUTF32Iterator& a)
sl@0
   485
	{
sl@0
   486
	iBase.Set(a);
sl@0
   487
	iSorted.Set();
sl@0
   488
    if(!iBase.AtEnd())
sl@0
   489
        {
sl@0
   490
	    iBase.EnterFoldedSequence();
sl@0
   491
	    if (iBase.Current().GetCombiningClass() != 0 || iBase.CurrentIsBaseFoldedFromCombiner())
sl@0
   492
            {
sl@0
   493
		    iSorted.Set(iBase);
sl@0
   494
            }
sl@0
   495
        }
sl@0
   496
	}
sl@0
   497
sl@0
   498
/** 
sl@0
   499
@internalComponent
sl@0
   500
*/
sl@0
   501
TBool TFoldedCanonicalIterator::AtEnd() const
sl@0
   502
	{
sl@0
   503
	return iSorted.AtEnd() && iBase.AtEnd();
sl@0
   504
	}
sl@0
   505
sl@0
   506
/** 
sl@0
   507
@internalComponent
sl@0
   508
*/
sl@0
   509
TChar TFoldedCanonicalIterator::Current() const
sl@0
   510
	{
sl@0
   511
	ASSERT(!iBase.AtEnd() || !iSorted.AtEnd());
sl@0
   512
	return iSorted.AtEnd()? iBase.Current() : iSorted.Current();
sl@0
   513
	}
sl@0
   514
sl@0
   515
/** 
sl@0
   516
@internalComponent
sl@0
   517
*/
sl@0
   518
void TFoldedCanonicalIterator::Next(const TUnicodeDataSet* aCharDataSet)
sl@0
   519
	{
sl@0
   520
	ASSERT(!iBase.AtEnd() || !iSorted.AtEnd());
sl@0
   521
	if (!iSorted.AtEnd())
sl@0
   522
		{
sl@0
   523
		iSorted.Next();
sl@0
   524
		return;
sl@0
   525
		}
sl@0
   526
	iBase.Next();
sl@0
   527
    if(!iBase.AtEnd())
sl@0
   528
        {
sl@0
   529
	    iBase.EnterFoldedSequence();
sl@0
   530
 	    if (TUnicode(TUint(iBase.Current())).GetCombiningClass(aCharDataSet) != 0 || iBase.CurrentIsBaseFoldedFromCombiner())
sl@0
   531
           {
sl@0
   532
		    iSorted.Set(iBase);
sl@0
   533
            }
sl@0
   534
        }
sl@0
   535
	}
sl@0
   536
sl@0
   537
////////////////////////////////////////////////////////////////////////////////////////////
sl@0
   538
// Folding - Global functions and structures
sl@0
   539
////////////////////////////////////////////////////////////////////////////////////////////
sl@0
   540
sl@0
   541
/** 
sl@0
   542
@internalComponent
sl@0
   543
*/
sl@0
   544
struct TEndTester 
sl@0
   545
    { 
sl@0
   546
    typedef enum {EGenuine, EWildcard} TType;
sl@0
   547
sl@0
   548
    inline TEndTester(TType aType) :
sl@0
   549
        iType(aType)
sl@0
   550
        {
sl@0
   551
        }
sl@0
   552
sl@0
   553
    inline TBool operator()(const TFoldedDecompIterator& a) const
sl@0
   554
        {
sl@0
   555
        return iType == EGenuine ? a.AtEnd() : a.AtEndOrWildcard();
sl@0
   556
        } 
sl@0
   557
sl@0
   558
private:
sl@0
   559
    TType iType;
sl@0
   560
sl@0
   561
    };
sl@0
   562
sl@0
   563
/**
sl@0
   564
Consumes as much of aCandidate as matches aSearchTerm up to the next '?' or
sl@0
   565
'*' wildcard or the end of aSearchTerm.
sl@0
   566
It is assumed that the search term begins with a base character.
sl@0
   567
Returns true if and only if the whole search term was matched
sl@0
   568
with a whole number of UTF16s in the candidate string.
sl@0
   569
On return of ETrue the candidate string iterator will have consumed the
sl@0
   570
matching characters the search term will have had all its matching characters
sl@0
   571
consumed.
sl@0
   572
@internalComponent
sl@0
   573
*/
sl@0
   574
TBool ConsumeFoldedMatch(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm,
sl@0
   575
                         const TEndTester& aEndTester)
sl@0
   576
	{
sl@0
   577
	TBool assumeBase = ETrue;
sl@0
   578
	TFoldedDecompIterator st(aSearchTerm);
sl@0
   579
	TFoldedDecompIterator cs(aCandidateString);
sl@0
   580
	while (!aEndTester(st))
sl@0
   581
		{
sl@0
   582
		if (cs.AtEnd())
sl@0
   583
			return EFalse;
sl@0
   584
		if (st.Match(cs))
sl@0
   585
			{
sl@0
   586
			assumeBase = EFalse;
sl@0
   587
			if (aEndTester(st))
sl@0
   588
				{
sl@0
   589
                // We have a match...
sl@0
   590
                if (cs.IsInFoldedSequence())
sl@0
   591
                    // but it was against only part of a character
sl@0
   592
                    // in the original string, so we fail.
sl@0
   593
                    return EFalse;
sl@0
   594
				aCandidateString = cs.BaseIterator();
sl@0
   595
				aSearchTerm = st.BaseIterator();
sl@0
   596
				return ETrue;
sl@0
   597
				}
sl@0
   598
			continue;
sl@0
   599
			}
sl@0
   600
		// did not match. We need to re-order canonically.
sl@0
   601
		if (assumeBase)
sl@0
   602
			// The first characters did not match.. do not bother
sl@0
   603
			// to re-order.
sl@0
   604
			return EFalse;
sl@0
   605
		TFoldedSortedDecompIterator csc;
sl@0
   606
		TInt cscLength = csc.Set(cs);
sl@0
   607
		if (cscLength < 2)
sl@0
   608
			// If there are less than two characters to be reordered,
sl@0
   609
			// there is no hope of getting a match by re-ordering
sl@0
   610
			return EFalse;
sl@0
   611
		TFoldedSortedDecompIterator stc;
sl@0
   612
		if (cscLength != stc.Set(st))
sl@0
   613
			// if the strings have differing numbers of characters,
sl@0
   614
			// there can be no match
sl@0
   615
			return EFalse;
sl@0
   616
		while (!stc.AtEnd())
sl@0
   617
			{
sl@0
   618
			ASSERT(!csc.AtEnd());
sl@0
   619
			if (stc.Current() != csc.Current())
sl@0
   620
				// well, we tried.
sl@0
   621
				return EFalse;
sl@0
   622
			stc.Next();
sl@0
   623
			csc.Next();
sl@0
   624
			}
sl@0
   625
		}
sl@0
   626
	// If the candidate string is in a folded sequence, then
sl@0
   627
	// we should not accept the match, as we require all matches
sl@0
   628
	// to be for a whole number of characters in the original string.
sl@0
   629
	if (cs.IsInFoldedSequence())
sl@0
   630
		return EFalse;
sl@0
   631
	aCandidateString = cs.BaseIterator();
sl@0
   632
	aSearchTerm = st.BaseIterator();
sl@0
   633
	return ETrue;
sl@0
   634
	}
sl@0
   635
sl@0
   636
/** 
sl@0
   637
@internalComponent
sl@0
   638
*/
sl@0
   639
TBool ConsumeFoldedMatchWholeGraphemes(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm, 
sl@0
   640
                                       const TEndTester& aEndTester)
sl@0
   641
	{
sl@0
   642
    if (!::ConsumeFoldedMatch(aCandidateString, aSearchTerm, aEndTester))
sl@0
   643
		return EFalse;
sl@0
   644
    return aCandidateString.AtEnd() || ::IsBaseCharacter(aCandidateString.Current());
sl@0
   645
	}
sl@0
   646
sl@0
   647
/** 
sl@0
   648
@internalComponent
sl@0
   649
*/
sl@0
   650
static TBool ConsumeGrapheme(TUTF32Iterator& a)
sl@0
   651
	{
sl@0
   652
	if (a.AtEnd())
sl@0
   653
		return EFalse;
sl@0
   654
	a.Next();
sl@0
   655
    while (!a.AtEnd() && !::IsBaseCharacter(a.Current()))
sl@0
   656
		a.Next();
sl@0
   657
	return ETrue;
sl@0
   658
	}
sl@0
   659
sl@0
   660
/** 
sl@0
   661
@internalComponent
sl@0
   662
*/
sl@0
   663
TBool MatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
sl@0
   664
	{
sl@0
   665
    TEndTester endTester(TEndTester::EWildcard);
sl@0
   666
    while(::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester))
sl@0
   667
		{
sl@0
   668
		if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*')
sl@0
   669
			return ETrue;
sl@0
   670
		ASSERT(aSearchTerm.Current() == '?');
sl@0
   671
		aSearchTerm.Next();
sl@0
   672
        if (!::ConsumeGrapheme(aCandidateString))
sl@0
   673
			return EFalse;
sl@0
   674
		}
sl@0
   675
	return EFalse;
sl@0
   676
	}
sl@0
   677
sl@0
   678
/** 
sl@0
   679
@internalComponent
sl@0
   680
*/
sl@0
   681
TBool FindMatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
sl@0
   682
	{
sl@0
   683
	// match as many graphemes as there are leading ?s
sl@0
   684
	while (!aSearchTerm.AtEnd()
sl@0
   685
		&& aSearchTerm.Current() != '*' && aSearchTerm.Current() == '?')
sl@0
   686
		{
sl@0
   687
        if (!::ConsumeGrapheme(aCandidateString))
sl@0
   688
			return EFalse;
sl@0
   689
		aSearchTerm.Next();
sl@0
   690
		}
sl@0
   691
	if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*')
sl@0
   692
		return ETrue;
sl@0
   693
    TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current());
sl@0
   694
	TUTF32Iterator savedST(aSearchTerm);
sl@0
   695
	while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm))
sl@0
   696
		{
sl@0
   697
		TUTF32Iterator savedCS = aCandidateString;
sl@0
   698
		if (::MatchSectionFolded(aCandidateString, aSearchTerm))
sl@0
   699
			return ETrue;
sl@0
   700
		aSearchTerm = savedST;
sl@0
   701
		aCandidateString = savedCS;
sl@0
   702
		aCandidateString.Next();
sl@0
   703
		}
sl@0
   704
	return EFalse;
sl@0
   705
	}
sl@0
   706
sl@0
   707
/** 
sl@0
   708
@internalComponent
sl@0
   709
*/
sl@0
   710
TBool MatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
sl@0
   711
                        const TText16* aSearchTermStart, const TText16* aSearchTermEnd)
sl@0
   712
	{
sl@0
   713
	TUTF32Iterator candidate(aCandidateStringStart, aCandidateStringEnd);
sl@0
   714
	TUTF32Iterator searchTerm(aSearchTermStart, aSearchTermEnd);
sl@0
   715
	// First section of search term must match exactly at the start of the
sl@0
   716
	// candidate string.
sl@0
   717
	if (!::MatchSectionFolded(candidate, searchTerm))
sl@0
   718
		return EFalse;
sl@0
   719
sl@0
   720
	// If there was only one section, it must match the whole candidate string.
sl@0
   721
	if (searchTerm.AtEnd())
sl@0
   722
		return candidate.AtEnd();
sl@0
   723
sl@0
   724
	while (!searchTerm.AtEnd())
sl@0
   725
		{
sl@0
   726
		searchTerm.Next();
sl@0
   727
		if (!::FindMatchSectionFolded(candidate, searchTerm))
sl@0
   728
			return EFalse;
sl@0
   729
		}
sl@0
   730
sl@0
   731
	// The last section must match exactly at the end of the candidate string.
sl@0
   732
	if (candidate.AtEnd())	// shortcut
sl@0
   733
		return ETrue;
sl@0
   734
	const TText16* searchTermLastSection = aSearchTermEnd;
sl@0
   735
	while (searchTermLastSection != aSearchTermStart
sl@0
   736
		&& searchTermLastSection[-1] != '*')
sl@0
   737
		--searchTermLastSection;
sl@0
   738
	if (searchTermLastSection == aSearchTermEnd)
sl@0
   739
		// last section is null, so trivially matches
sl@0
   740
		return ETrue;
sl@0
   741
	// Count graphemes by counting the number of base characters.
sl@0
   742
	// The first one is assumed to start a grapheme.
sl@0
   743
	TInt graphemeCount = 1;
sl@0
   744
	for (const TText16* s = searchTermLastSection + 1; s != aSearchTermEnd; ++s)
sl@0
   745
		{
sl@0
   746
        if (::IsBaseCharacter(*s))
sl@0
   747
			++graphemeCount;
sl@0
   748
		}
sl@0
   749
	// Count this many graphemes back in the candidate string
sl@0
   750
	const TText16* candidateLastSection = aCandidateStringEnd;
sl@0
   751
	while (graphemeCount != 0
sl@0
   752
		&& candidateLastSection != aCandidateStringStart)
sl@0
   753
		{
sl@0
   754
        if (::IsBaseCharacter(*--candidateLastSection))
sl@0
   755
			--graphemeCount;
sl@0
   756
		}
sl@0
   757
	TUTF32Iterator last(candidateLastSection, aCandidateStringEnd);
sl@0
   758
	TUTF32Iterator st(searchTermLastSection, aSearchTermEnd);
sl@0
   759
	return ::MatchSectionFolded(last, st);
sl@0
   760
	}
sl@0
   761
sl@0
   762
/** 
sl@0
   763
Implementation of MatchF
sl@0
   764
(slow if there is a match: MatchStringFolded is better, but does not return
sl@0
   765
the position of the match)
sl@0
   766
@internalComponent
sl@0
   767
*/
sl@0
   768
TInt LocateMatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
sl@0
   769
                             const TText16* aSearchTermStart, const TText16* aSearchTermEnd)
sl@0
   770
	{
sl@0
   771
	// Quick shortcut for simple non-match
sl@0
   772
	if (aSearchTermStart != aSearchTermEnd && *aSearchTermStart != '*')
sl@0
   773
		{
sl@0
   774
		if (aCandidateStringStart == aCandidateStringEnd)
sl@0
   775
			return KErrNotFound;
sl@0
   776
		// We won't bother with non-characters and surrogate pairs.
sl@0
   777
		if (*aSearchTermStart != '?'
sl@0
   778
			&& *aSearchTermStart < 0xD800
sl@0
   779
			&& *aCandidateStringStart < 0xD800
sl@0
   780
            && ::GetFirstFoldedChar(*aSearchTermStart) != ::GetFirstFoldedChar(*aCandidateStringStart))
sl@0
   781
			return KErrNotFound;
sl@0
   782
		}
sl@0
   783
    if (!::MatchStringFolded(aCandidateStringStart, aCandidateStringEnd,
sl@0
   784
		aSearchTermStart, aSearchTermEnd))
sl@0
   785
		return KErrNotFound;
sl@0
   786
	// find where it matches
sl@0
   787
	while (aSearchTermStart != aSearchTermEnd && *aSearchTermStart == '*')
sl@0
   788
		++aSearchTermStart;
sl@0
   789
	const TText16* end = aSearchTermStart;
sl@0
   790
	while (end != aSearchTermEnd && *end != '*')
sl@0
   791
		++end;
sl@0
   792
	// To preserve existing behaviour, a search term consisting of nothing
sl@0
   793
	// but stars is considered to match at 0.
sl@0
   794
	if (end == aSearchTermStart)
sl@0
   795
		return 0;
sl@0
   796
	for (const TText16* csSection = aCandidateStringStart;
sl@0
   797
		csSection <= aCandidateStringEnd; ++csSection)
sl@0
   798
		{
sl@0
   799
		TUTF32Iterator cs(csSection, aCandidateStringEnd);
sl@0
   800
		TUTF32Iterator st(aSearchTermStart, end);
sl@0
   801
        if (::MatchSectionFolded(cs, st))
sl@0
   802
			{
sl@0
   803
			// If this match must match exactly at the end, we must keep
sl@0
   804
			// going.
sl@0
   805
			// This could be a lot faster, with some optimizations.
sl@0
   806
			if (end == aSearchTermEnd)
sl@0
   807
				{
sl@0
   808
				if (!cs.AtEnd())
sl@0
   809
					continue;
sl@0
   810
				}
sl@0
   811
			return csSection - aCandidateStringStart;
sl@0
   812
			}
sl@0
   813
        // Because this function is using TUTF32Iterator, which means the
sl@0
   814
        // original author want to support surrogate. Take it as a defect and
sl@0
   815
        // fix it, while do not define a new LocateMatchStringFoldedSurrogate().
sl@0
   816
        if (IsSurrogate(*csSection))
sl@0
   817
        	++csSection;
sl@0
   818
		}
sl@0
   819
	// this should never happen!
sl@0
   820
	ASSERT(0);
sl@0
   821
	return KErrNotFound;
sl@0
   822
	}
sl@0
   823
sl@0
   824
/** 
sl@0
   825
Implementation of FindF
sl@0
   826
@internalComponent
sl@0
   827
*/
sl@0
   828
TInt FindFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
sl@0
   829
	{
sl@0
   830
    //Empty aSearchTerm string? - Then return 0 - keep the new implementation functionally 
sl@0
   831
    //compatible with the old one.
sl@0
   832
    if(aSearchTerm.Length() == 0)
sl@0
   833
        {
sl@0
   834
        return 0;
sl@0
   835
        }
sl@0
   836
	const TText16* candidateStartPosition = aCandidateString.CurrentPosition();
sl@0
   837
    TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current());
sl@0
   838
	TUTF32Iterator savedST(aSearchTerm);
sl@0
   839
	while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm))
sl@0
   840
		{
sl@0
   841
		TUTF32Iterator savedCS = aCandidateString;
sl@0
   842
        TEndTester endTester(TEndTester::EGenuine);
sl@0
   843
        if (::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester))
sl@0
   844
			return savedCS.CurrentPosition() - candidateStartPosition;
sl@0
   845
		aSearchTerm = savedST;
sl@0
   846
		aCandidateString = savedCS;
sl@0
   847
		aCandidateString.Next();
sl@0
   848
		}
sl@0
   849
	return KErrNotFound;
sl@0
   850
	}
sl@0
   851
sl@0
   852
/** 
sl@0
   853
Implementation of boolean CompareF
sl@0
   854
@internalComponent
sl@0
   855
*/
sl@0
   856
TBool EqualsFolded(TUTF32Iterator& aLeft, TUTF32Iterator& aRight)
sl@0
   857
	{
sl@0
   858
    TEndTester endTester(TEndTester::EGenuine);
sl@0
   859
    if (::ConsumeFoldedMatchWholeGraphemes(aLeft, aRight, endTester))
sl@0
   860
		return aLeft.AtEnd();
sl@0
   861
	return EFalse;
sl@0
   862
	}
sl@0
   863
sl@0
   864
/** 
sl@0
   865
Implementation of tri-state CompareF
sl@0
   866
@internalComponent
sl@0
   867
*/
sl@0
   868
TInt CompareFolded(const TUTF32Iterator& aLeft, const TUTF32Iterator& aRight)
sl@0
   869
	{
sl@0
   870
	TFoldedCanonicalIterator left(aLeft);
sl@0
   871
	TFoldedCanonicalIterator right(aRight);
sl@0
   872
sl@0
   873
	const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet;
sl@0
   874
sl@0
   875
	while (!left.AtEnd())
sl@0
   876
		{
sl@0
   877
		if (right.AtEnd())
sl@0
   878
			return 1;
sl@0
   879
		TChar cr = right.Current();
sl@0
   880
		TChar cl = left.Current();
sl@0
   881
		if (cr != cl)
sl@0
   882
			return cl - cr;
sl@0
   883
        right.Next(charDataSet);
sl@0
   884
        left.Next(charDataSet);
sl@0
   885
		}
sl@0
   886
	return right.AtEnd()? 0 : -1;
sl@0
   887
	}
sl@0
   888
sl@0
   889
////////////////////////////////////////////////////////////////////////////////////////////
sl@0
   890
// Composition/Decomposition - Global functions and structures
sl@0
   891
////////////////////////////////////////////////////////////////////////////////////////////
sl@0
   892
sl@0
   893
/** 
sl@0
   894
@internalComponent
sl@0
   895
*/
sl@0
   896
static TUTF32Iterator IndexToNonSingletonDecomposition(TInt aIndex)
sl@0
   897
	{
sl@0
   898
	const TText* start = KNonSingletonDecompositions + aIndex * 2;
sl@0
   899
	if (*start != KLongD)
sl@0
   900
		return TUTF32Iterator(start, start + 2, TUTF32Iterator::EStartsWithValidCharacter);
sl@0
   901
	TInt longDecompIndex = start[1];
sl@0
   902
	start = KLongDecompositions + (longDecompIndex & 0xFFF);
sl@0
   903
	return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3, TUTF32Iterator::EStartsWithValidCharacter);
sl@0
   904
	}
sl@0
   905
sl@0
   906
/** 
sl@0
   907
Come up with a decomposition for the current value pointed at by the iterator
sl@0
   908
@internalComponent
sl@0
   909
*/
sl@0
   910
static TBool Decompose(const TUTF32Iterator& aUTF32It, TUTF32Iterator& aOutIt)
sl@0
   911
	{
sl@0
   912
    TInt index = ::GetDecompositionIndex(aUTF32It.Current());
sl@0
   913
	TInt singletonIndex = index - (sizeof(KNonSingletonDecompositions)/sizeof(KNonSingletonDecompositions[0])/2);
sl@0
   914
	const TInt SizeOfSingletonTable = sizeof(KSingletonDecompositions)/sizeof(KSingletonDecompositions[0]);
sl@0
   915
	if(index < 0 || SizeOfSingletonTable <= singletonIndex)
sl@0
   916
        {
sl@0
   917
        aOutIt = aUTF32It.CurrentAsIterator();
sl@0
   918
		return EFalse;//There is not any valid decomposition
sl@0
   919
        }
sl@0
   920
	if(0 <= singletonIndex)
sl@0
   921
        {
sl@0
   922
        // KSingletonDecompositions contains some items that come from "ShortDecompsLongFolds".
sl@0
   923
        // "ShortDecompsLongFolds" contains some characters that have no composition, but have "long fold".
sl@0
   924
        // This basically specific to non-BMP characters with fold also outside BMP.
sl@0
   925
        // For example:
sl@0
   926
        // 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428;
sl@0
   927
        // In Unicode 5.0.0, totally, there are 40 similar characters, which are U+10400-U+10427.
sl@0
   928
        if (KSingletonDecompositions[singletonIndex] == 0xFFFF)
sl@0
   929
        	return EFalse;
sl@0
   930
		aOutIt = TUTF32Iterator(KSingletonDecompositions + singletonIndex);
sl@0
   931
        }
sl@0
   932
    else
sl@0
   933
        {
sl@0
   934
        aOutIt = ::IndexToNonSingletonDecomposition(index);
sl@0
   935
        }
sl@0
   936
    return ETrue;//Valid decomposition
sl@0
   937
	}
sl@0
   938
sl@0
   939
/** 
sl@0
   940
@internalComponent
sl@0
   941
*/
sl@0
   942
static TUTF32Iterator CharToUTF32Iterator(TChar aChar, TDes16& aBuf)
sl@0
   943
	{
sl@0
   944
	aBuf.Zero();
sl@0
   945
	if (aChar < 0x10000)
sl@0
   946
		aBuf.Append(aChar);
sl@0
   947
	else
sl@0
   948
		{
sl@0
   949
		aBuf.Append((aChar >> 10) + 0xD7C0);
sl@0
   950
		aBuf.Append((aChar & 0x3FF) + 0xDC00);
sl@0
   951
		}
sl@0
   952
	const TText16* t = aBuf.Ptr();
sl@0
   953
	return TUTF32Iterator(t, t + aBuf.Length());
sl@0
   954
	}
sl@0
   955
sl@0
   956
/** 
sl@0
   957
@internalComponent
sl@0
   958
*/
sl@0
   959
TBool DecomposeChar(TChar aCh, TPtrC16& aResult)
sl@0
   960
    {
sl@0
   961
    aResult.Set(NULL, 0);
sl@0
   962
    TBuf16<2> srcBuf;
sl@0
   963
    TUTF32Iterator it = ::CharToUTF32Iterator(aCh, srcBuf);
sl@0
   964
    TBool res = ::Decompose(it, it);
sl@0
   965
    if(res)
sl@0
   966
        {
sl@0
   967
        aResult.Set(it.CurrentPosition(), it.Length());
sl@0
   968
        }
sl@0
   969
    return res;
sl@0
   970
    }
sl@0
   971
sl@0
   972
/** 
sl@0
   973
Turn an index into the hash table known to point to a non-singleton
sl@0
   974
decomposition into that decomposition.
sl@0
   975
@internalComponent
sl@0
   976
*/
sl@0
   977
static TUTF32Iterator HashIndexToDecomposition(TInt aIndex)
sl@0
   978
	{
sl@0
   979
	TUint32 v = KUnicodeToIndexHash[aIndex];
sl@0
   980
	ASSERT(v != 0);
sl@0
   981
	TInt index = static_cast<TInt>(v >> 20);
sl@0
   982
	ASSERT(index < ARRAY_LENGTH(KNonSingletonDecompositions)/2);
sl@0
   983
    return ::IndexToNonSingletonDecomposition(index);
sl@0
   984
	}
sl@0
   985
sl@0
   986
/**
sl@0
   987
Takes a start and (one past the) end index into KCompostionMapping
sl@0
   988
and a number of UTF16 characters (aLengthSoFar). All of the compositions
sl@0
   989
within the range must have their first aLengthSoFar UTF16 characters
sl@0
   990
matching.
sl@0
   991
sl@0
   992
On entry, if aStart == aEnd then there is no possibility of a match so return
sl@0
   993
immediately with EFalse. To continue, aStart must be strictly less than aEnd.
sl@0
   994
sl@0
   995
Afterwards, aStart and aEnd will be narrowed to all those compositions
sl@0
   996
where the aLengthSoFar'th UTF16 character matches aNextCharacter.
sl@0
   997
No further compositions existing is indicated by aStart == aEnd.
sl@0
   998
sl@0
   999
@return ETrue if the composition at aStart is exactly of length aLengthSoFar + 1.
sl@0
  1000
@internalComponent
sl@0
  1001
*/
sl@0
  1002
static TBool RefineComposition(TInt& aStart, TInt& aEnd, TInt aLengthSoFar, TInt aNextCharacter)
sl@0
  1003
	{
sl@0
  1004
	if (aStart == aEnd)
sl@0
  1005
		return EFalse;
sl@0
  1006
	ASSERT((TUint)aStart < (TUint)aEnd);
sl@0
  1007
 	ASSERT((TUint)aEnd <= (TUint)ARRAY_LENGTH(KCompositionMapping));
sl@0
  1008
    TUTF32Iterator startIterator(::HashIndexToDecomposition(KCompositionMapping[aStart]));
sl@0
  1009
	if (startIterator.Length() == aLengthSoFar)
sl@0
  1010
		++aStart;
sl@0
  1011
sl@0
  1012
	// Find a single example of a decomposition that is suitable
sl@0
  1013
	TInt mid;
sl@0
  1014
	TUTF32Iterator midIt;
sl@0
  1015
	for (;;)
sl@0
  1016
		{
sl@0
  1017
		if (aStart == aEnd)
sl@0
  1018
			return EFalse;
sl@0
  1019
		mid = aStart + ((aEnd - aStart) >> 1);
sl@0
  1020
        midIt = ::HashIndexToDecomposition(KCompositionMapping[mid]);
sl@0
  1021
		ASSERT(aLengthSoFar < midIt.Length());
sl@0
  1022
		TInt midItChar = midIt[aLengthSoFar];
sl@0
  1023
		if (midItChar < aNextCharacter)
sl@0
  1024
			aStart = mid + 1;
sl@0
  1025
		else if (aNextCharacter < midItChar)
sl@0
  1026
			aEnd = mid;
sl@0
  1027
		else
sl@0
  1028
			{
sl@0
  1029
			startIterator = midIt;
sl@0
  1030
			break;
sl@0
  1031
			}
sl@0
  1032
		}
sl@0
  1033
sl@0
  1034
	// FInd the first decomposition that does not match
sl@0
  1035
	TInt start2 = mid + 1;
sl@0
  1036
	while (start2 != aEnd)
sl@0
  1037
		{
sl@0
  1038
		ASSERT(start2 < aEnd);
sl@0
  1039
		TInt mid2 = start2 + ((aEnd - start2) >> 1);
sl@0
  1040
        midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]);
sl@0
  1041
		ASSERT(aLengthSoFar < midIt.Length());
sl@0
  1042
		TInt midItChar = midIt[aLengthSoFar];
sl@0
  1043
		ASSERT(aNextCharacter <= midItChar);
sl@0
  1044
		if (aNextCharacter < midItChar)
sl@0
  1045
			aEnd = mid2;
sl@0
  1046
		else
sl@0
  1047
			start2 = mid2 + 1;
sl@0
  1048
		}
sl@0
  1049
sl@0
  1050
	// Find the first decomposition that matches
sl@0
  1051
	while (aStart != mid)
sl@0
  1052
		{
sl@0
  1053
		ASSERT(aStart < mid);
sl@0
  1054
		TInt mid2 = aStart + ((mid - aStart) >> 1);
sl@0
  1055
        midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]);
sl@0
  1056
		ASSERT(aLengthSoFar < midIt.Length());
sl@0
  1057
		TInt midItChar = midIt[aLengthSoFar];
sl@0
  1058
		ASSERT(midItChar <= aNextCharacter);
sl@0
  1059
		if (midItChar < aNextCharacter)
sl@0
  1060
			aStart = mid2 + 1;
sl@0
  1061
		else
sl@0
  1062
			{
sl@0
  1063
			startIterator = midIt;
sl@0
  1064
			mid = mid2;
sl@0
  1065
			}
sl@0
  1066
		}
sl@0
  1067
sl@0
  1068
	return startIterator.Length() == (aLengthSoFar + 1);
sl@0
  1069
	}
sl@0
  1070
sl@0
  1071
/** 
sl@0
  1072
@internalComponent
sl@0
  1073
*/
sl@0
  1074
static TBool RefineCompositionUTF32(TInt& aStart, TInt& aEnd, TInt& aLengthSoFar, TChar aChar)
sl@0
  1075
	{
sl@0
  1076
	if (aChar < 0x10000)
sl@0
  1077
        return ::RefineComposition(aStart, aEnd, aLengthSoFar++, aChar);
sl@0
  1078
    ::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar >> 10) + 0xD7C0);
sl@0
  1079
	if (aStart == aEnd)
sl@0
  1080
		return EFalse;
sl@0
  1081
    return ::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar & 0x3FF) + 0xDC00);
sl@0
  1082
	}
sl@0
  1083
sl@0
  1084
/**
sl@0
  1085
Combine as many of the characters presented as possible into a single
sl@0
  1086
character.
sl@0
  1087
@return The number of characters successfully combined.
sl@0
  1088
@param aCombined If a nonzero value is returned, this contains
sl@0
  1089
            the character that is that number of characters from the start of
sl@0
  1090
            aDes combined.
sl@0
  1091
@internalComponent
sl@0
  1092
*/
sl@0
  1093
TInt CombineAsMuchAsPossible(const TDesC16& aDes, TChar& aCombined)
sl@0
  1094
	{
sl@0
  1095
	TInt start = 0;
sl@0
  1096
	TInt end = sizeof(KCompositionMapping)/sizeof(KCompositionMapping[0]);
sl@0
  1097
	TInt length = 0;
sl@0
  1098
	TInt bestIndex = 0;
sl@0
  1099
	TInt bestLength = 0;
sl@0
  1100
	const TText16* ptr = aDes.Ptr();
sl@0
  1101
	TUTF32Iterator input(ptr, ptr + aDes.Length());
sl@0
  1102
	while (!input.AtEnd())
sl@0
  1103
		{
sl@0
  1104
        if (::RefineCompositionUTF32(start, end, length, input.Current()))
sl@0
  1105
			{
sl@0
  1106
			bestIndex = start;
sl@0
  1107
			bestLength = length;
sl@0
  1108
			}
sl@0
  1109
		input.Next();
sl@0
  1110
		}
sl@0
  1111
	if (bestLength == 0)
sl@0
  1112
		return 0;
sl@0
  1113
	aCombined = KUnicodeToIndexHash[KCompositionMapping[bestIndex]] & 0xFFFFF;
sl@0
  1114
	return bestLength;
sl@0
  1115
	}
sl@0
  1116
sl@0
  1117
//////////////////////////////////////////////////////////////////////////////////////////////
sl@0
  1118
// COLLATION
sl@0
  1119
//////////////////////////////////////////////////////////////////////////////////////////////
sl@0
  1120
sl@0
  1121
//////////////////////////////////////////////////////////////////////////////////////////////
sl@0
  1122
// TDecompositionIterator class
sl@0
  1123
sl@0
  1124
/** 
sl@0
  1125
@internalComponent
sl@0
  1126
*/
sl@0
  1127
void TDecompositionIterator::Set(const TUTF32Iterator& a)
sl@0
  1128
	{
sl@0
  1129
	iBase = a;
sl@0
  1130
	if (!iBase.AtEnd())
sl@0
  1131
        {
sl@0
  1132
        (void)::Decompose(iBase, iDecomposition);
sl@0
  1133
        }
sl@0
  1134
	}
sl@0
  1135
sl@0
  1136
/** 
sl@0
  1137
@internalComponent
sl@0
  1138
*/
sl@0
  1139
TDecompositionIterator::TDecompositionIterator(const TUTF32Iterator& a)
sl@0
  1140
	{
sl@0
  1141
	Set(a);
sl@0
  1142
	}
sl@0
  1143
sl@0
  1144
/** 
sl@0
  1145
@internalComponent
sl@0
  1146
*/
sl@0
  1147
TBool TDecompositionIterator::AtEnd() const
sl@0
  1148
	{
sl@0
  1149
	return iBase.AtEnd();
sl@0
  1150
	}
sl@0
  1151
sl@0
  1152
/** 
sl@0
  1153
@internalComponent
sl@0
  1154
*/
sl@0
  1155
TChar TDecompositionIterator::Current() const
sl@0
  1156
	{
sl@0
  1157
	return iDecomposition.Current();
sl@0
  1158
	}
sl@0
  1159
sl@0
  1160
/** 
sl@0
  1161
@internalComponent
sl@0
  1162
*/
sl@0
  1163
void TDecompositionIterator::Next()
sl@0
  1164
	{
sl@0
  1165
	ASSERT(!iBase.AtEnd() && !iDecomposition.AtEnd());
sl@0
  1166
	iDecomposition.Next();
sl@0
  1167
	if (!iDecomposition.AtEnd())
sl@0
  1168
		return;
sl@0
  1169
	iBase.Next();
sl@0
  1170
	if (!iBase.AtEnd())
sl@0
  1171
        {
sl@0
  1172
        (void)::Decompose(iBase, iDecomposition);
sl@0
  1173
        }
sl@0
  1174
	}
sl@0
  1175
sl@0
  1176
/** 
sl@0
  1177
@internalComponent
sl@0
  1178
*/
sl@0
  1179
const TText16* TDecompositionIterator::CurrentPosition() const
sl@0
  1180
	{
sl@0
  1181
	return iBase.CurrentPosition();
sl@0
  1182
	}
sl@0
  1183
sl@0
  1184
/** 
sl@0
  1185
Find out the length and minimum combining class of
sl@0
  1186
the current run of characters of nonzero combining class.
sl@0
  1187
aMinClass and aMinClassPos are not written to if the return
sl@0
  1188
value is 0.
sl@0
  1189
aEndOfRun is written to with the final position of the iteration
sl@0
  1190
if 0 is returned and aEndOfRun is non-null
sl@0
  1191
@internalComponent
sl@0
  1192
*/
sl@0
  1193
static TInt ReorderingRun(TInt& aMinClass, TDecompositionIterator& aMinClassPos,
sl@0
  1194
                          const TDecompositionIterator& aStart, TBool* aOpenSequence, 
sl@0
  1195
                          TInt aMaxDisallowedClass = 0, TDecompositionIterator* aEndOfRun = 0)
sl@0
  1196
	{
sl@0
  1197
	TInt comclass = aStart.AtEnd()? 0 : aStart.Current().GetCombiningClass();
sl@0
  1198
	if (comclass == 0)
sl@0
  1199
		{
sl@0
  1200
		if (aEndOfRun)
sl@0
  1201
			*aEndOfRun = aStart;
sl@0
  1202
		if (aOpenSequence)
sl@0
  1203
			*aOpenSequence = aStart.AtEnd();
sl@0
  1204
		return 0;
sl@0
  1205
		}
sl@0
  1206
	aMinClass = 256;
sl@0
  1207
	TDecompositionIterator i = aStart;
sl@0
  1208
	TInt count = 0;
sl@0
  1209
	while (comclass != 0)
sl@0
  1210
		{
sl@0
  1211
		if (aMaxDisallowedClass < comclass)
sl@0
  1212
			{
sl@0
  1213
			if (comclass < aMinClass)
sl@0
  1214
				{
sl@0
  1215
				aMinClass = comclass;
sl@0
  1216
				aMinClassPos = i;
sl@0
  1217
				}
sl@0
  1218
			++count;
sl@0
  1219
			}
sl@0
  1220
		i.Next();
sl@0
  1221
		comclass = i.AtEnd()? 0 : i.Current().GetCombiningClass();
sl@0
  1222
		}
sl@0
  1223
	if (count == 0 && aEndOfRun)
sl@0
  1224
		*aEndOfRun = i;
sl@0
  1225
	if (aOpenSequence)
sl@0
  1226
		*aOpenSequence = i.AtEnd();
sl@0
  1227
	return count;
sl@0
  1228
	}
sl@0
  1229
sl@0
  1230
//////////////////////////////////////////////////////////////////////////////////////////////
sl@0
  1231
// TCanonicalDecompositionIterator class
sl@0
  1232
sl@0
  1233
/** 
sl@0
  1234
@internalComponent
sl@0
  1235
*/
sl@0
  1236
void TCanonicalDecompositionIterator::Set(const TUTF32Iterator& a)
sl@0
  1237
	{
sl@0
  1238
	iBase.Set(a);
sl@0
  1239
	iLastPosition = 0;
sl@0
  1240
	if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2)
sl@0
  1241
		iCurrentCombiningClass = 0;
sl@0
  1242
	}
sl@0
  1243
sl@0
  1244
/** 
sl@0
  1245
@internalComponent
sl@0
  1246
*/
sl@0
  1247
TBool TCanonicalDecompositionIterator::AtEnd() const
sl@0
  1248
	{
sl@0
  1249
	return iBase.AtEnd();
sl@0
  1250
	}
sl@0
  1251
sl@0
  1252
/** 
sl@0
  1253
@internalComponent
sl@0
  1254
*/
sl@0
  1255
TChar TCanonicalDecompositionIterator::Current() const
sl@0
  1256
	{
sl@0
  1257
	return iCurrentCombiningClass? iCurrent.Current() : iBase.Current();
sl@0
  1258
	}
sl@0
  1259
sl@0
  1260
/** 
sl@0
  1261
@internalComponent
sl@0
  1262
*/
sl@0
  1263
void TCanonicalDecompositionIterator::Next()
sl@0
  1264
	{
sl@0
  1265
	iLastPosition = iBase.CurrentPosition();
sl@0
  1266
	if (iCurrentCombiningClass == 0)
sl@0
  1267
		{
sl@0
  1268
		iBase.Next();
sl@0
  1269
		if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2)
sl@0
  1270
			iCurrentCombiningClass = 0;
sl@0
  1271
		return;
sl@0
  1272
		}
sl@0
  1273
	// Find the next character in the run with the same combining class
sl@0
  1274
	iCurrent.Next();
sl@0
  1275
	TInt curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass();
sl@0
  1276
	while (curclass != 0)
sl@0
  1277
		{
sl@0
  1278
		if (curclass == iCurrentCombiningClass)
sl@0
  1279
			// success
sl@0
  1280
			return;
sl@0
  1281
		iCurrent.Next();
sl@0
  1282
		curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass();
sl@0
  1283
		}
sl@0
  1284
	// There are none left in the current class. Find out what the next one is.
sl@0
  1285
	if (0 == ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, 0, iCurrentCombiningClass, &iBase))
sl@0
  1286
		iCurrentCombiningClass = 0;
sl@0
  1287
	}
sl@0
  1288
sl@0
  1289
/** 
sl@0
  1290
@internalComponent
sl@0
  1291
*/
sl@0
  1292
const TText16* TCanonicalDecompositionIterator::CurrentPositionIfAtCharacter() const
sl@0
  1293
	{
sl@0
  1294
	if (iCurrentCombiningClass != 0)
sl@0
  1295
		return 0;
sl@0
  1296
	const TText16* p = iBase.CurrentPosition();
sl@0
  1297
	return iLastPosition == p? 0 : p;
sl@0
  1298
	}
sl@0
  1299
sl@0
  1300
/** 
sl@0
  1301
@internalComponent
sl@0
  1302
*/
sl@0
  1303
TBool TCanonicalDecompositionIterator::IsInOpenSequence() const
sl@0
  1304
	{
sl@0
  1305
	return iInOpenSequence;
sl@0
  1306
	}
sl@0
  1307
sl@0
  1308
//////////////////////////////////////////////////////////////////////////////////////////////
sl@0
  1309
// TCanonicalDecompositionIteratorCached class
sl@0
  1310
sl@0
  1311
/** 
sl@0
  1312
@internalComponent
sl@0
  1313
*/
sl@0
  1314
void TCanonicalDecompositionIteratorCached::Set(const TUTF32Iterator& a)
sl@0
  1315
	{
sl@0
  1316
	iBase.Set(a);
sl@0
  1317
	iCacheStart = 0;
sl@0
  1318
	iCacheSize = 0;
sl@0
  1319
	}
sl@0
  1320
sl@0
  1321
/** 
sl@0
  1322
@internalComponent
sl@0
  1323
*/
sl@0
  1324
TBool TCanonicalDecompositionIteratorCached::AtEnd() const
sl@0
  1325
	{
sl@0
  1326
	return iCacheSize == 0 && iBase.AtEnd();
sl@0
  1327
	}
sl@0
  1328
sl@0
  1329
/** 
sl@0
  1330
@internalComponent
sl@0
  1331
*/
sl@0
  1332
void TCanonicalDecompositionIteratorCached::Next(TInt aOffset)
sl@0
  1333
	{
sl@0
  1334
	ASSERT(0 <= aOffset);
sl@0
  1335
	ASSERT(0 <= iCacheSize);
sl@0
  1336
	ASSERT(0 != iCacheSize || !iBase.AtEnd());
sl@0
  1337
	if (aOffset <= iCacheSize)
sl@0
  1338
		{
sl@0
  1339
		iCacheSize -= aOffset;
sl@0
  1340
		iCacheStart = (iCacheStart + aOffset) & (KMaxLookAhead - 1);
sl@0
  1341
		return;
sl@0
  1342
		}
sl@0
  1343
	aOffset -= iCacheSize;
sl@0
  1344
	iCacheSize = 0;
sl@0
  1345
	while (aOffset != 0)
sl@0
  1346
		{
sl@0
  1347
		iBase.Next();
sl@0
  1348
		--aOffset;
sl@0
  1349
		}
sl@0
  1350
	}
sl@0
  1351
sl@0
  1352
/** 
sl@0
  1353
Get the character at the position of the iterator plus aOffset steps.
sl@0
  1354
Returns -1 if we are looking too far ahead.
sl@0
  1355
@internalComponent
sl@0
  1356
*/
sl@0
  1357
TChar TCanonicalDecompositionIteratorCached::Get(TInt aOffset)
sl@0
  1358
	{
sl@0
  1359
	// should be assert debug: there is a chance this could go off with
sl@0
  1360
	// bad collation tables
sl@0
  1361
	ASSERT(aOffset <= KMaxLookAhead);
sl@0
  1362
	while (iCacheSize <= aOffset)
sl@0
  1363
		{
sl@0
  1364
		if (iBase.AtEnd())
sl@0
  1365
			return TChar(static_cast <TUint> (-1));
sl@0
  1366
		TInt cachePos = (iCacheStart + iCacheSize) & (KMaxLookAhead - 1);
sl@0
  1367
		iCache[cachePos].iChar = iBase.Current();
sl@0
  1368
		iCache[cachePos].iPos = iBase.CurrentPositionIfAtCharacter();
sl@0
  1369
		++iCacheSize;
sl@0
  1370
		iBase.Next();
sl@0
  1371
		}
sl@0
  1372
	return iCacheSize == aOffset? iBase.Current() : iCache[(iCacheStart + aOffset) & (KMaxLookAhead - 1)].iChar;
sl@0
  1373
	}
sl@0
  1374
sl@0
  1375
/** 
sl@0
  1376
If the current position in the original string is representable
sl@0
  1377
as a pointer into it and we know what it is, return it.
sl@0
  1378
@internalComponent
sl@0
  1379
*/
sl@0
  1380
const TText16* TCanonicalDecompositionIteratorCached::CurrentPositionIfAtCharacter() const
sl@0
  1381
	{
sl@0
  1382
	if(iCacheSize == 0)
sl@0
  1383
		{
sl@0
  1384
		return iBase.CurrentPositionIfAtCharacter();
sl@0
  1385
		}
sl@0
  1386
	return iCache[iCacheStart & (KMaxLookAhead - 1)].iPos;
sl@0
  1387
	}
sl@0
  1388