os/kernelhwsrv/kernel/eka/euser/unicode/Compare.cpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 // Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
     2 // All rights reserved.
     3 // This component and the accompanying materials are made available
     4 // under the terms of the License "Eclipse Public License v1.0"
     5 // which accompanies this distribution, and is available
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
     7 //
     8 // Initial Contributors:
     9 // Nokia Corporation - initial contribution.
    10 //
    11 // Contributors:
    12 //
    13 // Description:
    14 // Folding and decomposition implementation
    15 // 
    16 //
    17 
    18 #include "FoldDecomp.inl"
    19 #include "CompareImp.h"
    20 #include "u32std.h"
    21 
    22 #define ARRAY_LENGTH(a) (static_cast<TInt>(sizeof(a)/sizeof(a[0])))
    23 
    24 ////////////////////////////////////////////////////////////////////////////////////////////
    25 // Global functions
    26 ////////////////////////////////////////////////////////////////////////////////////////////
    27 
    28 /**
    29 @internalComponent
    30 */
    31 TChar UTF16ToChar(const TText16* a)
    32 	{
    33 	if (0xD800 <= a[0])
    34 		{
    35 		if (a[0] < 0xE000)
    36 			{
    37             if (a[0] < 0xDC00 && ::IsLowSurrogate(a[1]))
    38 				{
    39                 TChar c = ::PairSurrogates(a[0], a[1]);
    40 				if ((c & 0xFFFE) != 0xFFFE)
    41 					return c;
    42 				}
    43 			return 0xFFFF;
    44 			}
    45 		if (a[0] == 0xFFFE)
    46 			return 0xFFFF;
    47 		}
    48 	return a[0];
    49 	}
    50 
    51 /**
    52 Is a character a base character (ETrue) or a combiner (EFalse)?
    53 For now, we will treat all control characters as base characters.
    54 @internalComponent
    55 */
    56 TBool IsBaseCharacter(TChar a)
    57 	{
    58 	if(a < 0x220)
    59         {
    60 		// These Unicode characters are all assigned
    61 		// and none of them is a combining character
    62 		return ETrue;
    63         }
    64 	return (a.GetCategory() & 0xFFF0) - TChar::EMarkGroup;
    65 	}
    66 
    67 /**
    68 @internalComponent
    69 */
    70 inline TInt GetDecompositionIndex(TChar a)
    71 	{
    72     TInt i = DecompositionHashStart(a);
    73 	TUint32 v = KUnicodeToIndexHash[i];
    74 	if (!v)
    75 		return -1;
    76 	if ((v & 0xFFFFF) != a)
    77 		{
    78         TInt step = DecompositionHashStep(a);
    79 		do	{
    80 			i = (i + step) & KDecompositionHashBitmask;
    81 			v = KUnicodeToIndexHash[i];
    82 			if (!v)
    83 				return -1;
    84 			} while ((v & 0xFFFFF) != a);
    85 		}
    86 // it is important that this is an unsigned shift
    87 	return static_cast<TInt>(v >> 20);
    88 	}
    89 
    90 /**
    91 Will not work if an invalid index is passed.
    92 @internalComponent
    93 */
    94 static TUTF32Iterator GetFoldedDecomposition(TInt aIndex)
    95 	{
    96 	TInt singletonIndex = aIndex - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2);
    97 	if (0 <= singletonIndex)
    98 		return TUTF32Iterator(KSingletonFolds + singletonIndex);
    99 	const TText* start = KNonSingletonFolds + aIndex * 2;
   100 	if (*start != KLongD)
   101 		return TUTF32Iterator(start, start + 2,
   102 			TUTF32Iterator::EStartsWithValidCharacter);
   103 	TInt longDecompIndex = start[1];
   104 	start = KLongDecompositions + (longDecompIndex & 0xFFF);
   105 	return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3,
   106 		TUTF32Iterator::EStartsWithValidCharacter);
   107 	}
   108 
   109 /**
   110 @internalComponent
   111 */
   112 static TChar GetFirstFoldedChar(TChar a)
   113 	{
   114     TInt index = ::GetDecompositionIndex(a);
   115     return index < 0? a : ::GetFoldedDecomposition(index).Current();
   116 	}
   117 
   118 /**
   119 @internalComponent
   120 */
   121 static TBool FirstFoldedCodeIsNot(TInt aNonSurrogate, TInt aFoldedNonSurrogate)
   122 	{
   123     TInt index = ::GetDecompositionIndex(aNonSurrogate);
   124 	if (index < 0)
   125 		return aNonSurrogate - aFoldedNonSurrogate;
   126 	TInt singletonIndex = index - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2);
   127 	if (0 < singletonIndex)
   128 		return KSingletonFolds[singletonIndex] - aFoldedNonSurrogate;
   129 	const TText* start = KNonSingletonFolds + index * 2;
   130 	if (start[0] == KLongD)
   131 		start = KLongDecompositions + (start[1] & 0xFFF);
   132 	return *start - aFoldedNonSurrogate;
   133 	}
   134 
   135 /**
   136 @internalComponent
   137 */
   138 static TInt GetClass(TFoldedDecompIterator& a)
   139 	{
   140 	ASSERT(!a.AtEnd());
   141 	a.EnterFoldedSequence();
   142 	TChar ch = a.Current();
   143 	TInt cl = ch.GetCombiningClass();
   144 	if (cl == 0)
   145 		// Assume starters have been folded from ypogegrammeni
   146 		cl = 240;
   147 	return cl;
   148 	}
   149 
   150 ////////////////////////////////////////////////////////////////////////////////////////////
   151 // TUTF32Iterator
   152 ////////////////////////////////////////////////////////////////////////////////////////////
   153 
   154 /**
   155 @internalComponent
   156 */
   157 void TUTF32Iterator::Next()
   158 	{
   159 	ASSERT(iStart != iEnd);
   160 	while (++iStart != iEnd)
   161 		{
   162         iCurrent = ::UTF16ToChar(iStart);
   163 		if (iCurrent != 0xFFFF)
   164 			return;
   165 		}
   166 	}
   167 
   168 /**
   169 Locates a base character in a string using a folded comparision. Will not find combining 
   170 characters, nor will it consider Korean combining Jamo to be equivalent to Hangul.
   171 @internalComponent
   172 */
   173 TBool TUTF32Iterator::LocateFoldedBaseCharacter(TChar aChar)
   174 	{
   175 	// A quick shortcut for simple rejections
   176 	if (aChar < 0xFFFF)
   177 		{
   178         while (iStart != iEnd && *iStart < 0xD800 && ::FirstFoldedCodeIsNot(*iStart, aChar))
   179 			++iStart;
   180 		if (iStart != iEnd)
   181 			{
   182             iCurrent = ::UTF16ToChar(iStart);
   183 			if (iCurrent == 0xFFFF)
   184 				Next();
   185 			}
   186 		}
   187 	while (!AtEnd())
   188 		{
   189         TChar foldedChar = ::GetFirstFoldedChar(iCurrent);
   190 		if (aChar == foldedChar)
   191 			return ETrue;
   192 		Next();
   193 		}
   194 	return EFalse;
   195 	}
   196 
   197 ////////////////////////////////////////////////////////////////////////////////////////////
   198 // TFoldedDecompIterator
   199 ////////////////////////////////////////////////////////////////////////////////////////////
   200 
   201 /**
   202 @internalComponent
   203 */
   204 TFoldedDecompIterator::TFoldedDecompIterator(const TUTF32Iterator& a)
   205 	{
   206 	Set(a);
   207 	}
   208 
   209 /**
   210 @internalComponent
   211 */
   212 TBool TFoldedDecompIterator::AtEnd() const
   213 	{
   214 	return iOriginal.AtEnd();
   215 	}
   216 
   217 /**
   218 @internalComponent
   219 */
   220 TBool TFoldedDecompIterator::AtEndOrWildcard() const
   221 	{
   222 	// neither '?' nor '*' have decomposition sequences, so we can assume that
   223 	// if we are pointing at one or the other, we don't need to check if we
   224 	// are in a decomposition sequence or not.
   225 	return iOriginal.AtEnd() || iOriginal.Current() == '?' || iOriginal.Current() == '*';
   226 	}
   227 
   228 /**
   229 @internalComponent
   230 */
   231 TBool TFoldedDecompIterator::EnterFoldedSequence()
   232 	{
   233 	ASSERT(!AtEnd());
   234 	return !IsInFoldedSequence() && StrictEnterFoldedSequence();
   235 	}
   236 
   237 /**
   238 Enter folded sequence, assuming that we are not already in one
   239 @internalComponent
   240 */
   241 TBool TFoldedDecompIterator::StrictEnterFoldedSequence()
   242 	{
   243 	ASSERT(!AtEnd());
   244 	ASSERT(!IsInFoldedSequence());
   245     TInt index = ::GetDecompositionIndex(iOriginal.Current());
   246 	if (index < 0)
   247 		return EFalse;
   248 	iFolded = ::GetFoldedDecomposition(index);
   249 	return ETrue;
   250 	}
   251 
   252 /**
   253 An iota might have folded from a combining ypogegrammeni.
   254 If the current character is a base character, this function will
   255 detect whether it was folded from a combining character (and
   256 therefore does not constitute a grapheme boundary).
   257 @internalComponent
   258 */
   259 TBool TFoldedDecompIterator::CurrentIsBaseFoldedFromCombiner() const
   260 	{
   261 	if (!IsInFoldedSequence())
   262 		return EFalse;
   263 	// The only character that does this is Ypogerammeni, which folds to iota
   264 	if (iFolded.Current() != 0x3B9)
   265 		return EFalse;
   266 	// If the unfolded character is a combiner then it cannot contain an iota,
   267 	// so it must be an ypogegrammeni that has been folded to one.
   268 	// This will catch 0x345, the ypogegrammeni itself.
   269     if (!::IsBaseCharacter(iOriginal.Current()))
   270 		return ETrue;
   271 	// Otherwise the base character will be at the start of the decomposition
   272 	// sequence. We will assume that it is folded from a genuine iota if and
   273 	// only if there is an iota at the start of the folded decomposition
   274 	// sequence. (In theory there might be an iota with a combining
   275 	// ypogegrammeni, but in practice this will not occur).
   276 	TInt index = ::GetDecompositionIndex(iOriginal.Current());
   277 	ASSERT(0 <= index);
   278 	TUTF32Iterator folded = ::GetFoldedDecomposition(index);
   279 	return folded.Current() != 0x3B9;
   280 	}
   281 
   282 /**
   283 @internalComponent
   284 */
   285 TChar TFoldedDecompIterator::Current() const
   286 	{
   287 	ASSERT(!AtEnd());
   288 	return IsInFoldedSequence()? iFolded.Current() : iOriginal.Current();
   289 	}
   290 
   291 /** 
   292 Move past this code if it matches unfolded or folded 
   293 @internalComponent
   294 */
   295 TBool TFoldedDecompIterator::Match(TChar aCode)
   296 	{
   297 	ASSERT(!AtEnd());
   298 	if (!IsInFoldedSequence())
   299 		{
   300 		if (aCode == iOriginal.Current())
   301 			{
   302 			iOriginal.Next();
   303 			return ETrue;
   304 			}
   305 		if (!StrictEnterFoldedSequence())
   306 			return EFalse;
   307 		}
   308 	ASSERT(IsInFoldedSequence());
   309 	if (aCode == iFolded.Current())
   310 		{
   311 		iFolded.Next();
   312 		if (iFolded.AtEnd())
   313 			iOriginal.Next();
   314 		return ETrue;
   315 		}
   316 	return EFalse;
   317 	}
   318 
   319 /** 
   320 Move this and argument past matching character. 
   321 @internalComponent
   322 */
   323 TBool TFoldedDecompIterator::Match(TFoldedDecompIterator& aThat)
   324 	{
   325 	ASSERT(!AtEnd());
   326 	if (!IsInFoldedSequence())
   327 		{
   328 		if ( aThat.Match(iOriginal.Current()) )
   329 			{
   330 			iOriginal.Next();
   331 			return ETrue;
   332 			}
   333 		if (!StrictEnterFoldedSequence())
   334 			return EFalse;
   335 		}
   336 	ASSERT(IsInFoldedSequence());
   337 	if ( aThat.Match(iFolded.Current()) )
   338 		{
   339 		iFolded.Next();
   340 		if (iFolded.AtEnd())
   341 			iOriginal.Next();
   342 		return ETrue;
   343 		}
   344 	return EFalse;
   345 	}
   346 
   347 /** 
   348 @internalComponent
   349 */
   350 void TFoldedDecompIterator::Next()
   351 	{
   352 	ASSERT(!AtEnd());
   353 	if (IsInFoldedSequence())
   354 		{
   355 		iFolded.Next();
   356 		if (IsInFoldedSequence())
   357 			return;
   358 		}
   359 	iOriginal.Next();
   360 	}
   361 
   362 ////////////////////////////////////////////////////////////////////////////////////////////
   363 // TFoldedSortedDecompIterator
   364 ////////////////////////////////////////////////////////////////////////////////////////////
   365 
   366 /**
   367 Set this iterator to iterate over the next combining characters.
   368 Iotas in folded sequences are assumed to be character class 240
   369 (combining ypogegrammeni). Next() must be used once before
   370 the first call to Current(), as long as AtEnd() returns false.
   371 @param aBase Sets the start of the iteration. This value is advanced to the
   372              end of the iteration.
   373 @return The number of codes in the iteration.
   374 @internalComponent
   375 */
   376 TInt TFoldedSortedDecompIterator::Set(TFoldedDecompIterator &aBase)
   377 	{
   378 	iStart = aBase;
   379 	TInt length = 0;
   380 	iCurrentClass = 256;
   381 
   382 	const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet;
   383 
   384 	// Find the next starter (i.e. character with combining class 0).
   385 	// We must not count iota folded from ypogegrammeni.
   386 	// Ypogegrammeni has combining class 240.
   387 	// Iota has combining class 0.
   388 	// Also we will be searching for the character with the smallest
   389 	// combining class to start at.
   390 	while (!aBase.AtEnd())
   391 		{
   392 		aBase.EnterFoldedSequence();
   393 		TChar ch = aBase.Current();
   394 		TInt cl = TUnicode(TUint(ch)).GetCombiningClass(charDataSet);
   395 		if (cl == 0)
   396 			{
   397 			if (aBase.CurrentIsBaseFoldedFromCombiner())
   398 				cl = 240;
   399 			else
   400 				break;
   401 			}
   402 		if (cl < iCurrentClass)
   403 			{
   404 			iCurrentClass = cl;
   405 			iCurrent = aBase;
   406 			iCurrentCount = length;
   407 			}
   408 		++length;
   409 		aBase.Next();
   410 		}
   411 	iRemaining = length;
   412 	iLength = length;
   413 	ASSERT(iLength == 0 || iCurrentClass < 256);
   414 	return length;
   415 	}
   416 
   417 /** 
   418 Set this iterator so that AtEnd() returns ETrue. 
   419 @internalComponent
   420 */
   421 void TFoldedSortedDecompIterator::Set()
   422 	{
   423 	iRemaining = 0;
   424 	}
   425 
   426 /** 
   427 @internalComponent
   428 */
   429 TBool TFoldedSortedDecompIterator::AtEnd() const
   430 	{
   431 	return iRemaining == 0;
   432 	}
   433 
   434 /** 
   435 @internalComponent
   436 */
   437 TChar TFoldedSortedDecompIterator::Current() const
   438 	{
   439 	ASSERT(!AtEnd());
   440 	return iCurrent.Current();
   441 	}
   442 
   443 /** 
   444 @internalComponent
   445 */
   446 void TFoldedSortedDecompIterator::Next()
   447 	{
   448 	ASSERT(!AtEnd());
   449 	--iRemaining;
   450 	while (++iCurrentCount != iLength)
   451 		{
   452 		iCurrent.Next();
   453         if (::GetClass(iCurrent) == iCurrentClass)
   454 			return;
   455 		}
   456 	// We have not found any more of the same class, so we will look
   457 	// for the smallest one larger.
   458 	TInt minClass = 256;
   459 	TFoldedDecompIterator searcher(iStart);
   460 	TInt searchCount = 0;
   461 	while (searchCount != iLength)
   462 		{
   463         TInt cl = ::GetClass(searcher);
   464 		if (iCurrentClass < cl && cl < minClass)
   465 			{
   466 			minClass = cl;
   467 			iCurrentCount = searchCount;
   468 			iCurrent = searcher;
   469 			}
   470 		++searchCount;
   471 		searcher.Next();
   472 		}
   473 	iCurrentClass = minClass;
   474 	ASSERT(minClass < 256 || AtEnd());
   475 	}
   476 
   477 ////////////////////////////////////////////////////////////////////////////////////////////
   478 // TFoldedCanonicalIterator
   479 ////////////////////////////////////////////////////////////////////////////////////////////
   480 
   481 /** 
   482 @internalComponent
   483 */
   484 TFoldedCanonicalIterator::TFoldedCanonicalIterator(const TUTF32Iterator& a)
   485 	{
   486 	iBase.Set(a);
   487 	iSorted.Set();
   488     if(!iBase.AtEnd())
   489         {
   490 	    iBase.EnterFoldedSequence();
   491 	    if (iBase.Current().GetCombiningClass() != 0 || iBase.CurrentIsBaseFoldedFromCombiner())
   492             {
   493 		    iSorted.Set(iBase);
   494             }
   495         }
   496 	}
   497 
   498 /** 
   499 @internalComponent
   500 */
   501 TBool TFoldedCanonicalIterator::AtEnd() const
   502 	{
   503 	return iSorted.AtEnd() && iBase.AtEnd();
   504 	}
   505 
   506 /** 
   507 @internalComponent
   508 */
   509 TChar TFoldedCanonicalIterator::Current() const
   510 	{
   511 	ASSERT(!iBase.AtEnd() || !iSorted.AtEnd());
   512 	return iSorted.AtEnd()? iBase.Current() : iSorted.Current();
   513 	}
   514 
   515 /** 
   516 @internalComponent
   517 */
   518 void TFoldedCanonicalIterator::Next(const TUnicodeDataSet* aCharDataSet)
   519 	{
   520 	ASSERT(!iBase.AtEnd() || !iSorted.AtEnd());
   521 	if (!iSorted.AtEnd())
   522 		{
   523 		iSorted.Next();
   524 		return;
   525 		}
   526 	iBase.Next();
   527     if(!iBase.AtEnd())
   528         {
   529 	    iBase.EnterFoldedSequence();
   530  	    if (TUnicode(TUint(iBase.Current())).GetCombiningClass(aCharDataSet) != 0 || iBase.CurrentIsBaseFoldedFromCombiner())
   531            {
   532 		    iSorted.Set(iBase);
   533             }
   534         }
   535 	}
   536 
   537 ////////////////////////////////////////////////////////////////////////////////////////////
   538 // Folding - Global functions and structures
   539 ////////////////////////////////////////////////////////////////////////////////////////////
   540 
   541 /** 
   542 @internalComponent
   543 */
   544 struct TEndTester 
   545     { 
   546     typedef enum {EGenuine, EWildcard} TType;
   547 
   548     inline TEndTester(TType aType) :
   549         iType(aType)
   550         {
   551         }
   552 
   553     inline TBool operator()(const TFoldedDecompIterator& a) const
   554         {
   555         return iType == EGenuine ? a.AtEnd() : a.AtEndOrWildcard();
   556         } 
   557 
   558 private:
   559     TType iType;
   560 
   561     };
   562 
   563 /**
   564 Consumes as much of aCandidate as matches aSearchTerm up to the next '?' or
   565 '*' wildcard or the end of aSearchTerm.
   566 It is assumed that the search term begins with a base character.
   567 Returns true if and only if the whole search term was matched
   568 with a whole number of UTF16s in the candidate string.
   569 On return of ETrue the candidate string iterator will have consumed the
   570 matching characters the search term will have had all its matching characters
   571 consumed.
   572 @internalComponent
   573 */
   574 TBool ConsumeFoldedMatch(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm,
   575                          const TEndTester& aEndTester)
   576 	{
   577 	TBool assumeBase = ETrue;
   578 	TFoldedDecompIterator st(aSearchTerm);
   579 	TFoldedDecompIterator cs(aCandidateString);
   580 	while (!aEndTester(st))
   581 		{
   582 		if (cs.AtEnd())
   583 			return EFalse;
   584 		if (st.Match(cs))
   585 			{
   586 			assumeBase = EFalse;
   587 			if (aEndTester(st))
   588 				{
   589                 // We have a match...
   590                 if (cs.IsInFoldedSequence())
   591                     // but it was against only part of a character
   592                     // in the original string, so we fail.
   593                     return EFalse;
   594 				aCandidateString = cs.BaseIterator();
   595 				aSearchTerm = st.BaseIterator();
   596 				return ETrue;
   597 				}
   598 			continue;
   599 			}
   600 		// did not match. We need to re-order canonically.
   601 		if (assumeBase)
   602 			// The first characters did not match.. do not bother
   603 			// to re-order.
   604 			return EFalse;
   605 		TFoldedSortedDecompIterator csc;
   606 		TInt cscLength = csc.Set(cs);
   607 		if (cscLength < 2)
   608 			// If there are less than two characters to be reordered,
   609 			// there is no hope of getting a match by re-ordering
   610 			return EFalse;
   611 		TFoldedSortedDecompIterator stc;
   612 		if (cscLength != stc.Set(st))
   613 			// if the strings have differing numbers of characters,
   614 			// there can be no match
   615 			return EFalse;
   616 		while (!stc.AtEnd())
   617 			{
   618 			ASSERT(!csc.AtEnd());
   619 			if (stc.Current() != csc.Current())
   620 				// well, we tried.
   621 				return EFalse;
   622 			stc.Next();
   623 			csc.Next();
   624 			}
   625 		}
   626 	// If the candidate string is in a folded sequence, then
   627 	// we should not accept the match, as we require all matches
   628 	// to be for a whole number of characters in the original string.
   629 	if (cs.IsInFoldedSequence())
   630 		return EFalse;
   631 	aCandidateString = cs.BaseIterator();
   632 	aSearchTerm = st.BaseIterator();
   633 	return ETrue;
   634 	}
   635 
   636 /** 
   637 @internalComponent
   638 */
   639 TBool ConsumeFoldedMatchWholeGraphemes(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm, 
   640                                        const TEndTester& aEndTester)
   641 	{
   642     if (!::ConsumeFoldedMatch(aCandidateString, aSearchTerm, aEndTester))
   643 		return EFalse;
   644     return aCandidateString.AtEnd() || ::IsBaseCharacter(aCandidateString.Current());
   645 	}
   646 
   647 /** 
   648 @internalComponent
   649 */
   650 static TBool ConsumeGrapheme(TUTF32Iterator& a)
   651 	{
   652 	if (a.AtEnd())
   653 		return EFalse;
   654 	a.Next();
   655     while (!a.AtEnd() && !::IsBaseCharacter(a.Current()))
   656 		a.Next();
   657 	return ETrue;
   658 	}
   659 
   660 /** 
   661 @internalComponent
   662 */
   663 TBool MatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
   664 	{
   665     TEndTester endTester(TEndTester::EWildcard);
   666     while(::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester))
   667 		{
   668 		if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*')
   669 			return ETrue;
   670 		ASSERT(aSearchTerm.Current() == '?');
   671 		aSearchTerm.Next();
   672         if (!::ConsumeGrapheme(aCandidateString))
   673 			return EFalse;
   674 		}
   675 	return EFalse;
   676 	}
   677 
   678 /** 
   679 @internalComponent
   680 */
   681 TBool FindMatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
   682 	{
   683 	// match as many graphemes as there are leading ?s
   684 	while (!aSearchTerm.AtEnd()
   685 		&& aSearchTerm.Current() != '*' && aSearchTerm.Current() == '?')
   686 		{
   687         if (!::ConsumeGrapheme(aCandidateString))
   688 			return EFalse;
   689 		aSearchTerm.Next();
   690 		}
   691 	if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*')
   692 		return ETrue;
   693     TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current());
   694 	TUTF32Iterator savedST(aSearchTerm);
   695 	while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm))
   696 		{
   697 		TUTF32Iterator savedCS = aCandidateString;
   698 		if (::MatchSectionFolded(aCandidateString, aSearchTerm))
   699 			return ETrue;
   700 		aSearchTerm = savedST;
   701 		aCandidateString = savedCS;
   702 		aCandidateString.Next();
   703 		}
   704 	return EFalse;
   705 	}
   706 
   707 /** 
   708 @internalComponent
   709 */
   710 TBool MatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
   711                         const TText16* aSearchTermStart, const TText16* aSearchTermEnd)
   712 	{
   713 	TUTF32Iterator candidate(aCandidateStringStart, aCandidateStringEnd);
   714 	TUTF32Iterator searchTerm(aSearchTermStart, aSearchTermEnd);
   715 	// First section of search term must match exactly at the start of the
   716 	// candidate string.
   717 	if (!::MatchSectionFolded(candidate, searchTerm))
   718 		return EFalse;
   719 
   720 	// If there was only one section, it must match the whole candidate string.
   721 	if (searchTerm.AtEnd())
   722 		return candidate.AtEnd();
   723 
   724 	while (!searchTerm.AtEnd())
   725 		{
   726 		searchTerm.Next();
   727 		if (!::FindMatchSectionFolded(candidate, searchTerm))
   728 			return EFalse;
   729 		}
   730 
   731 	// The last section must match exactly at the end of the candidate string.
   732 	if (candidate.AtEnd())	// shortcut
   733 		return ETrue;
   734 	const TText16* searchTermLastSection = aSearchTermEnd;
   735 	while (searchTermLastSection != aSearchTermStart
   736 		&& searchTermLastSection[-1] != '*')
   737 		--searchTermLastSection;
   738 	if (searchTermLastSection == aSearchTermEnd)
   739 		// last section is null, so trivially matches
   740 		return ETrue;
   741 	// Count graphemes by counting the number of base characters.
   742 	// The first one is assumed to start a grapheme.
   743 	TInt graphemeCount = 1;
   744 	for (const TText16* s = searchTermLastSection + 1; s != aSearchTermEnd; ++s)
   745 		{
   746         if (::IsBaseCharacter(*s))
   747 			++graphemeCount;
   748 		}
   749 	// Count this many graphemes back in the candidate string
   750 	const TText16* candidateLastSection = aCandidateStringEnd;
   751 	while (graphemeCount != 0
   752 		&& candidateLastSection != aCandidateStringStart)
   753 		{
   754         if (::IsBaseCharacter(*--candidateLastSection))
   755 			--graphemeCount;
   756 		}
   757 	TUTF32Iterator last(candidateLastSection, aCandidateStringEnd);
   758 	TUTF32Iterator st(searchTermLastSection, aSearchTermEnd);
   759 	return ::MatchSectionFolded(last, st);
   760 	}
   761 
   762 /** 
   763 Implementation of MatchF
   764 (slow if there is a match: MatchStringFolded is better, but does not return
   765 the position of the match)
   766 @internalComponent
   767 */
   768 TInt LocateMatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
   769                              const TText16* aSearchTermStart, const TText16* aSearchTermEnd)
   770 	{
   771 	// Quick shortcut for simple non-match
   772 	if (aSearchTermStart != aSearchTermEnd && *aSearchTermStart != '*')
   773 		{
   774 		if (aCandidateStringStart == aCandidateStringEnd)
   775 			return KErrNotFound;
   776 		// We won't bother with non-characters and surrogate pairs.
   777 		if (*aSearchTermStart != '?'
   778 			&& *aSearchTermStart < 0xD800
   779 			&& *aCandidateStringStart < 0xD800
   780             && ::GetFirstFoldedChar(*aSearchTermStart) != ::GetFirstFoldedChar(*aCandidateStringStart))
   781 			return KErrNotFound;
   782 		}
   783     if (!::MatchStringFolded(aCandidateStringStart, aCandidateStringEnd,
   784 		aSearchTermStart, aSearchTermEnd))
   785 		return KErrNotFound;
   786 	// find where it matches
   787 	while (aSearchTermStart != aSearchTermEnd && *aSearchTermStart == '*')
   788 		++aSearchTermStart;
   789 	const TText16* end = aSearchTermStart;
   790 	while (end != aSearchTermEnd && *end != '*')
   791 		++end;
   792 	// To preserve existing behaviour, a search term consisting of nothing
   793 	// but stars is considered to match at 0.
   794 	if (end == aSearchTermStart)
   795 		return 0;
   796 	for (const TText16* csSection = aCandidateStringStart;
   797 		csSection <= aCandidateStringEnd; ++csSection)
   798 		{
   799 		TUTF32Iterator cs(csSection, aCandidateStringEnd);
   800 		TUTF32Iterator st(aSearchTermStart, end);
   801         if (::MatchSectionFolded(cs, st))
   802 			{
   803 			// If this match must match exactly at the end, we must keep
   804 			// going.
   805 			// This could be a lot faster, with some optimizations.
   806 			if (end == aSearchTermEnd)
   807 				{
   808 				if (!cs.AtEnd())
   809 					continue;
   810 				}
   811 			return csSection - aCandidateStringStart;
   812 			}
   813         // Because this function is using TUTF32Iterator, which means the
   814         // original author want to support surrogate. Take it as a defect and
   815         // fix it, while do not define a new LocateMatchStringFoldedSurrogate().
   816         if (IsSurrogate(*csSection))
   817         	++csSection;
   818 		}
   819 	// this should never happen!
   820 	ASSERT(0);
   821 	return KErrNotFound;
   822 	}
   823 
   824 /** 
   825 Implementation of FindF
   826 @internalComponent
   827 */
   828 TInt FindFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
   829 	{
   830     //Empty aSearchTerm string? - Then return 0 - keep the new implementation functionally 
   831     //compatible with the old one.
   832     if(aSearchTerm.Length() == 0)
   833         {
   834         return 0;
   835         }
   836 	const TText16* candidateStartPosition = aCandidateString.CurrentPosition();
   837     TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current());
   838 	TUTF32Iterator savedST(aSearchTerm);
   839 	while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm))
   840 		{
   841 		TUTF32Iterator savedCS = aCandidateString;
   842         TEndTester endTester(TEndTester::EGenuine);
   843         if (::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester))
   844 			return savedCS.CurrentPosition() - candidateStartPosition;
   845 		aSearchTerm = savedST;
   846 		aCandidateString = savedCS;
   847 		aCandidateString.Next();
   848 		}
   849 	return KErrNotFound;
   850 	}
   851 
   852 /** 
   853 Implementation of boolean CompareF
   854 @internalComponent
   855 */
   856 TBool EqualsFolded(TUTF32Iterator& aLeft, TUTF32Iterator& aRight)
   857 	{
   858     TEndTester endTester(TEndTester::EGenuine);
   859     if (::ConsumeFoldedMatchWholeGraphemes(aLeft, aRight, endTester))
   860 		return aLeft.AtEnd();
   861 	return EFalse;
   862 	}
   863 
   864 /** 
   865 Implementation of tri-state CompareF
   866 @internalComponent
   867 */
   868 TInt CompareFolded(const TUTF32Iterator& aLeft, const TUTF32Iterator& aRight)
   869 	{
   870 	TFoldedCanonicalIterator left(aLeft);
   871 	TFoldedCanonicalIterator right(aRight);
   872 
   873 	const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet;
   874 
   875 	while (!left.AtEnd())
   876 		{
   877 		if (right.AtEnd())
   878 			return 1;
   879 		TChar cr = right.Current();
   880 		TChar cl = left.Current();
   881 		if (cr != cl)
   882 			return cl - cr;
   883         right.Next(charDataSet);
   884         left.Next(charDataSet);
   885 		}
   886 	return right.AtEnd()? 0 : -1;
   887 	}
   888 
   889 ////////////////////////////////////////////////////////////////////////////////////////////
   890 // Composition/Decomposition - Global functions and structures
   891 ////////////////////////////////////////////////////////////////////////////////////////////
   892 
   893 /** 
   894 @internalComponent
   895 */
   896 static TUTF32Iterator IndexToNonSingletonDecomposition(TInt aIndex)
   897 	{
   898 	const TText* start = KNonSingletonDecompositions + aIndex * 2;
   899 	if (*start != KLongD)
   900 		return TUTF32Iterator(start, start + 2, TUTF32Iterator::EStartsWithValidCharacter);
   901 	TInt longDecompIndex = start[1];
   902 	start = KLongDecompositions + (longDecompIndex & 0xFFF);
   903 	return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3, TUTF32Iterator::EStartsWithValidCharacter);
   904 	}
   905 
   906 /** 
   907 Come up with a decomposition for the current value pointed at by the iterator
   908 @internalComponent
   909 */
   910 static TBool Decompose(const TUTF32Iterator& aUTF32It, TUTF32Iterator& aOutIt)
   911 	{
   912     TInt index = ::GetDecompositionIndex(aUTF32It.Current());
   913 	TInt singletonIndex = index - (sizeof(KNonSingletonDecompositions)/sizeof(KNonSingletonDecompositions[0])/2);
   914 	const TInt SizeOfSingletonTable = sizeof(KSingletonDecompositions)/sizeof(KSingletonDecompositions[0]);
   915 	if(index < 0 || SizeOfSingletonTable <= singletonIndex)
   916         {
   917         aOutIt = aUTF32It.CurrentAsIterator();
   918 		return EFalse;//There is not any valid decomposition
   919         }
   920 	if(0 <= singletonIndex)
   921         {
   922         // KSingletonDecompositions contains some items that come from "ShortDecompsLongFolds".
   923         // "ShortDecompsLongFolds" contains some characters that have no composition, but have "long fold".
   924         // This basically specific to non-BMP characters with fold also outside BMP.
   925         // For example:
   926         // 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428;
   927         // In Unicode 5.0.0, totally, there are 40 similar characters, which are U+10400-U+10427.
   928         if (KSingletonDecompositions[singletonIndex] == 0xFFFF)
   929         	return EFalse;
   930 		aOutIt = TUTF32Iterator(KSingletonDecompositions + singletonIndex);
   931         }
   932     else
   933         {
   934         aOutIt = ::IndexToNonSingletonDecomposition(index);
   935         }
   936     return ETrue;//Valid decomposition
   937 	}
   938 
   939 /** 
   940 @internalComponent
   941 */
   942 static TUTF32Iterator CharToUTF32Iterator(TChar aChar, TDes16& aBuf)
   943 	{
   944 	aBuf.Zero();
   945 	if (aChar < 0x10000)
   946 		aBuf.Append(aChar);
   947 	else
   948 		{
   949 		aBuf.Append((aChar >> 10) + 0xD7C0);
   950 		aBuf.Append((aChar & 0x3FF) + 0xDC00);
   951 		}
   952 	const TText16* t = aBuf.Ptr();
   953 	return TUTF32Iterator(t, t + aBuf.Length());
   954 	}
   955 
   956 /** 
   957 @internalComponent
   958 */
   959 TBool DecomposeChar(TChar aCh, TPtrC16& aResult)
   960     {
   961     aResult.Set(NULL, 0);
   962     TBuf16<2> srcBuf;
   963     TUTF32Iterator it = ::CharToUTF32Iterator(aCh, srcBuf);
   964     TBool res = ::Decompose(it, it);
   965     if(res)
   966         {
   967         aResult.Set(it.CurrentPosition(), it.Length());
   968         }
   969     return res;
   970     }
   971 
   972 /** 
   973 Turn an index into the hash table known to point to a non-singleton
   974 decomposition into that decomposition.
   975 @internalComponent
   976 */
   977 static TUTF32Iterator HashIndexToDecomposition(TInt aIndex)
   978 	{
   979 	TUint32 v = KUnicodeToIndexHash[aIndex];
   980 	ASSERT(v != 0);
   981 	TInt index = static_cast<TInt>(v >> 20);
   982 	ASSERT(index < ARRAY_LENGTH(KNonSingletonDecompositions)/2);
   983     return ::IndexToNonSingletonDecomposition(index);
   984 	}
   985 
   986 /**
   987 Takes a start and (one past the) end index into KCompostionMapping
   988 and a number of UTF16 characters (aLengthSoFar). All of the compositions
   989 within the range must have their first aLengthSoFar UTF16 characters
   990 matching.
   991 
   992 On entry, if aStart == aEnd then there is no possibility of a match so return
   993 immediately with EFalse. To continue, aStart must be strictly less than aEnd.
   994 
   995 Afterwards, aStart and aEnd will be narrowed to all those compositions
   996 where the aLengthSoFar'th UTF16 character matches aNextCharacter.
   997 No further compositions existing is indicated by aStart == aEnd.
   998 
   999 @return ETrue if the composition at aStart is exactly of length aLengthSoFar + 1.
  1000 @internalComponent
  1001 */
  1002 static TBool RefineComposition(TInt& aStart, TInt& aEnd, TInt aLengthSoFar, TInt aNextCharacter)
  1003 	{
  1004 	if (aStart == aEnd)
  1005 		return EFalse;
  1006 	ASSERT((TUint)aStart < (TUint)aEnd);
  1007  	ASSERT((TUint)aEnd <= (TUint)ARRAY_LENGTH(KCompositionMapping));
  1008     TUTF32Iterator startIterator(::HashIndexToDecomposition(KCompositionMapping[aStart]));
  1009 	if (startIterator.Length() == aLengthSoFar)
  1010 		++aStart;
  1011 
  1012 	// Find a single example of a decomposition that is suitable
  1013 	TInt mid;
  1014 	TUTF32Iterator midIt;
  1015 	for (;;)
  1016 		{
  1017 		if (aStart == aEnd)
  1018 			return EFalse;
  1019 		mid = aStart + ((aEnd - aStart) >> 1);
  1020         midIt = ::HashIndexToDecomposition(KCompositionMapping[mid]);
  1021 		ASSERT(aLengthSoFar < midIt.Length());
  1022 		TInt midItChar = midIt[aLengthSoFar];
  1023 		if (midItChar < aNextCharacter)
  1024 			aStart = mid + 1;
  1025 		else if (aNextCharacter < midItChar)
  1026 			aEnd = mid;
  1027 		else
  1028 			{
  1029 			startIterator = midIt;
  1030 			break;
  1031 			}
  1032 		}
  1033 
  1034 	// FInd the first decomposition that does not match
  1035 	TInt start2 = mid + 1;
  1036 	while (start2 != aEnd)
  1037 		{
  1038 		ASSERT(start2 < aEnd);
  1039 		TInt mid2 = start2 + ((aEnd - start2) >> 1);
  1040         midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]);
  1041 		ASSERT(aLengthSoFar < midIt.Length());
  1042 		TInt midItChar = midIt[aLengthSoFar];
  1043 		ASSERT(aNextCharacter <= midItChar);
  1044 		if (aNextCharacter < midItChar)
  1045 			aEnd = mid2;
  1046 		else
  1047 			start2 = mid2 + 1;
  1048 		}
  1049 
  1050 	// Find the first decomposition that matches
  1051 	while (aStart != mid)
  1052 		{
  1053 		ASSERT(aStart < mid);
  1054 		TInt mid2 = aStart + ((mid - aStart) >> 1);
  1055         midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]);
  1056 		ASSERT(aLengthSoFar < midIt.Length());
  1057 		TInt midItChar = midIt[aLengthSoFar];
  1058 		ASSERT(midItChar <= aNextCharacter);
  1059 		if (midItChar < aNextCharacter)
  1060 			aStart = mid2 + 1;
  1061 		else
  1062 			{
  1063 			startIterator = midIt;
  1064 			mid = mid2;
  1065 			}
  1066 		}
  1067 
  1068 	return startIterator.Length() == (aLengthSoFar + 1);
  1069 	}
  1070 
  1071 /** 
  1072 @internalComponent
  1073 */
  1074 static TBool RefineCompositionUTF32(TInt& aStart, TInt& aEnd, TInt& aLengthSoFar, TChar aChar)
  1075 	{
  1076 	if (aChar < 0x10000)
  1077         return ::RefineComposition(aStart, aEnd, aLengthSoFar++, aChar);
  1078     ::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar >> 10) + 0xD7C0);
  1079 	if (aStart == aEnd)
  1080 		return EFalse;
  1081     return ::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar & 0x3FF) + 0xDC00);
  1082 	}
  1083 
  1084 /**
  1085 Combine as many of the characters presented as possible into a single
  1086 character.
  1087 @return The number of characters successfully combined.
  1088 @param aCombined If a nonzero value is returned, this contains
  1089             the character that is that number of characters from the start of
  1090             aDes combined.
  1091 @internalComponent
  1092 */
  1093 TInt CombineAsMuchAsPossible(const TDesC16& aDes, TChar& aCombined)
  1094 	{
  1095 	TInt start = 0;
  1096 	TInt end = sizeof(KCompositionMapping)/sizeof(KCompositionMapping[0]);
  1097 	TInt length = 0;
  1098 	TInt bestIndex = 0;
  1099 	TInt bestLength = 0;
  1100 	const TText16* ptr = aDes.Ptr();
  1101 	TUTF32Iterator input(ptr, ptr + aDes.Length());
  1102 	while (!input.AtEnd())
  1103 		{
  1104         if (::RefineCompositionUTF32(start, end, length, input.Current()))
  1105 			{
  1106 			bestIndex = start;
  1107 			bestLength = length;
  1108 			}
  1109 		input.Next();
  1110 		}
  1111 	if (bestLength == 0)
  1112 		return 0;
  1113 	aCombined = KUnicodeToIndexHash[KCompositionMapping[bestIndex]] & 0xFFFFF;
  1114 	return bestLength;
  1115 	}
  1116 
  1117 //////////////////////////////////////////////////////////////////////////////////////////////
  1118 // COLLATION
  1119 //////////////////////////////////////////////////////////////////////////////////////////////
  1120 
  1121 //////////////////////////////////////////////////////////////////////////////////////////////
  1122 // TDecompositionIterator class
  1123 
  1124 /** 
  1125 @internalComponent
  1126 */
  1127 void TDecompositionIterator::Set(const TUTF32Iterator& a)
  1128 	{
  1129 	iBase = a;
  1130 	if (!iBase.AtEnd())
  1131         {
  1132         (void)::Decompose(iBase, iDecomposition);
  1133         }
  1134 	}
  1135 
  1136 /** 
  1137 @internalComponent
  1138 */
  1139 TDecompositionIterator::TDecompositionIterator(const TUTF32Iterator& a)
  1140 	{
  1141 	Set(a);
  1142 	}
  1143 
  1144 /** 
  1145 @internalComponent
  1146 */
  1147 TBool TDecompositionIterator::AtEnd() const
  1148 	{
  1149 	return iBase.AtEnd();
  1150 	}
  1151 
  1152 /** 
  1153 @internalComponent
  1154 */
  1155 TChar TDecompositionIterator::Current() const
  1156 	{
  1157 	return iDecomposition.Current();
  1158 	}
  1159 
  1160 /** 
  1161 @internalComponent
  1162 */
  1163 void TDecompositionIterator::Next()
  1164 	{
  1165 	ASSERT(!iBase.AtEnd() && !iDecomposition.AtEnd());
  1166 	iDecomposition.Next();
  1167 	if (!iDecomposition.AtEnd())
  1168 		return;
  1169 	iBase.Next();
  1170 	if (!iBase.AtEnd())
  1171         {
  1172         (void)::Decompose(iBase, iDecomposition);
  1173         }
  1174 	}
  1175 
  1176 /** 
  1177 @internalComponent
  1178 */
  1179 const TText16* TDecompositionIterator::CurrentPosition() const
  1180 	{
  1181 	return iBase.CurrentPosition();
  1182 	}
  1183 
  1184 /** 
  1185 Find out the length and minimum combining class of
  1186 the current run of characters of nonzero combining class.
  1187 aMinClass and aMinClassPos are not written to if the return
  1188 value is 0.
  1189 aEndOfRun is written to with the final position of the iteration
  1190 if 0 is returned and aEndOfRun is non-null
  1191 @internalComponent
  1192 */
  1193 static TInt ReorderingRun(TInt& aMinClass, TDecompositionIterator& aMinClassPos,
  1194                           const TDecompositionIterator& aStart, TBool* aOpenSequence, 
  1195                           TInt aMaxDisallowedClass = 0, TDecompositionIterator* aEndOfRun = 0)
  1196 	{
  1197 	TInt comclass = aStart.AtEnd()? 0 : aStart.Current().GetCombiningClass();
  1198 	if (comclass == 0)
  1199 		{
  1200 		if (aEndOfRun)
  1201 			*aEndOfRun = aStart;
  1202 		if (aOpenSequence)
  1203 			*aOpenSequence = aStart.AtEnd();
  1204 		return 0;
  1205 		}
  1206 	aMinClass = 256;
  1207 	TDecompositionIterator i = aStart;
  1208 	TInt count = 0;
  1209 	while (comclass != 0)
  1210 		{
  1211 		if (aMaxDisallowedClass < comclass)
  1212 			{
  1213 			if (comclass < aMinClass)
  1214 				{
  1215 				aMinClass = comclass;
  1216 				aMinClassPos = i;
  1217 				}
  1218 			++count;
  1219 			}
  1220 		i.Next();
  1221 		comclass = i.AtEnd()? 0 : i.Current().GetCombiningClass();
  1222 		}
  1223 	if (count == 0 && aEndOfRun)
  1224 		*aEndOfRun = i;
  1225 	if (aOpenSequence)
  1226 		*aOpenSequence = i.AtEnd();
  1227 	return count;
  1228 	}
  1229 
  1230 //////////////////////////////////////////////////////////////////////////////////////////////
  1231 // TCanonicalDecompositionIterator class
  1232 
  1233 /** 
  1234 @internalComponent
  1235 */
  1236 void TCanonicalDecompositionIterator::Set(const TUTF32Iterator& a)
  1237 	{
  1238 	iBase.Set(a);
  1239 	iLastPosition = 0;
  1240 	if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2)
  1241 		iCurrentCombiningClass = 0;
  1242 	}
  1243 
  1244 /** 
  1245 @internalComponent
  1246 */
  1247 TBool TCanonicalDecompositionIterator::AtEnd() const
  1248 	{
  1249 	return iBase.AtEnd();
  1250 	}
  1251 
  1252 /** 
  1253 @internalComponent
  1254 */
  1255 TChar TCanonicalDecompositionIterator::Current() const
  1256 	{
  1257 	return iCurrentCombiningClass? iCurrent.Current() : iBase.Current();
  1258 	}
  1259 
  1260 /** 
  1261 @internalComponent
  1262 */
  1263 void TCanonicalDecompositionIterator::Next()
  1264 	{
  1265 	iLastPosition = iBase.CurrentPosition();
  1266 	if (iCurrentCombiningClass == 0)
  1267 		{
  1268 		iBase.Next();
  1269 		if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2)
  1270 			iCurrentCombiningClass = 0;
  1271 		return;
  1272 		}
  1273 	// Find the next character in the run with the same combining class
  1274 	iCurrent.Next();
  1275 	TInt curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass();
  1276 	while (curclass != 0)
  1277 		{
  1278 		if (curclass == iCurrentCombiningClass)
  1279 			// success
  1280 			return;
  1281 		iCurrent.Next();
  1282 		curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass();
  1283 		}
  1284 	// There are none left in the current class. Find out what the next one is.
  1285 	if (0 == ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, 0, iCurrentCombiningClass, &iBase))
  1286 		iCurrentCombiningClass = 0;
  1287 	}
  1288 
  1289 /** 
  1290 @internalComponent
  1291 */
  1292 const TText16* TCanonicalDecompositionIterator::CurrentPositionIfAtCharacter() const
  1293 	{
  1294 	if (iCurrentCombiningClass != 0)
  1295 		return 0;
  1296 	const TText16* p = iBase.CurrentPosition();
  1297 	return iLastPosition == p? 0 : p;
  1298 	}
  1299 
  1300 /** 
  1301 @internalComponent
  1302 */
  1303 TBool TCanonicalDecompositionIterator::IsInOpenSequence() const
  1304 	{
  1305 	return iInOpenSequence;
  1306 	}
  1307 
  1308 //////////////////////////////////////////////////////////////////////////////////////////////
  1309 // TCanonicalDecompositionIteratorCached class
  1310 
  1311 /** 
  1312 @internalComponent
  1313 */
  1314 void TCanonicalDecompositionIteratorCached::Set(const TUTF32Iterator& a)
  1315 	{
  1316 	iBase.Set(a);
  1317 	iCacheStart = 0;
  1318 	iCacheSize = 0;
  1319 	}
  1320 
  1321 /** 
  1322 @internalComponent
  1323 */
  1324 TBool TCanonicalDecompositionIteratorCached::AtEnd() const
  1325 	{
  1326 	return iCacheSize == 0 && iBase.AtEnd();
  1327 	}
  1328 
  1329 /** 
  1330 @internalComponent
  1331 */
  1332 void TCanonicalDecompositionIteratorCached::Next(TInt aOffset)
  1333 	{
  1334 	ASSERT(0 <= aOffset);
  1335 	ASSERT(0 <= iCacheSize);
  1336 	ASSERT(0 != iCacheSize || !iBase.AtEnd());
  1337 	if (aOffset <= iCacheSize)
  1338 		{
  1339 		iCacheSize -= aOffset;
  1340 		iCacheStart = (iCacheStart + aOffset) & (KMaxLookAhead - 1);
  1341 		return;
  1342 		}
  1343 	aOffset -= iCacheSize;
  1344 	iCacheSize = 0;
  1345 	while (aOffset != 0)
  1346 		{
  1347 		iBase.Next();
  1348 		--aOffset;
  1349 		}
  1350 	}
  1351 
  1352 /** 
  1353 Get the character at the position of the iterator plus aOffset steps.
  1354 Returns -1 if we are looking too far ahead.
  1355 @internalComponent
  1356 */
  1357 TChar TCanonicalDecompositionIteratorCached::Get(TInt aOffset)
  1358 	{
  1359 	// should be assert debug: there is a chance this could go off with
  1360 	// bad collation tables
  1361 	ASSERT(aOffset <= KMaxLookAhead);
  1362 	while (iCacheSize <= aOffset)
  1363 		{
  1364 		if (iBase.AtEnd())
  1365 			return TChar(static_cast <TUint> (-1));
  1366 		TInt cachePos = (iCacheStart + iCacheSize) & (KMaxLookAhead - 1);
  1367 		iCache[cachePos].iChar = iBase.Current();
  1368 		iCache[cachePos].iPos = iBase.CurrentPositionIfAtCharacter();
  1369 		++iCacheSize;
  1370 		iBase.Next();
  1371 		}
  1372 	return iCacheSize == aOffset? iBase.Current() : iCache[(iCacheStart + aOffset) & (KMaxLookAhead - 1)].iChar;
  1373 	}
  1374 
  1375 /** 
  1376 If the current position in the original string is representable
  1377 as a pointer into it and we know what it is, return it.
  1378 @internalComponent
  1379 */
  1380 const TText16* TCanonicalDecompositionIteratorCached::CurrentPositionIfAtCharacter() const
  1381 	{
  1382 	if(iCacheSize == 0)
  1383 		{
  1384 		return iBase.CurrentPositionIfAtCharacter();
  1385 		}
  1386 	return iCache[iCacheStart & (KMaxLookAhead - 1)].iPos;
  1387 	}
  1388