Update contrib.
1 // Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
2 // All rights reserved.
3 // This component and the accompanying materials are made available
4 // under the terms of the License "Eclipse Public License v1.0"
5 // which accompanies this distribution, and is available
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
8 // Initial Contributors:
9 // Nokia Corporation - initial contribution.
14 // Folding and decomposition implementation
18 #include "FoldDecomp.inl"
19 #include "CompareImp.h"
22 #define ARRAY_LENGTH(a) (static_cast<TInt>(sizeof(a)/sizeof(a[0])))
24 ////////////////////////////////////////////////////////////////////////////////////////////
26 ////////////////////////////////////////////////////////////////////////////////////////////
31 TChar UTF16ToChar(const TText16* a)
37 if (a[0] < 0xDC00 && ::IsLowSurrogate(a[1]))
39 TChar c = ::PairSurrogates(a[0], a[1]);
40 if ((c & 0xFFFE) != 0xFFFE)
52 Is a character a base character (ETrue) or a combiner (EFalse)?
53 For now, we will treat all control characters as base characters.
56 TBool IsBaseCharacter(TChar a)
60 // These Unicode characters are all assigned
61 // and none of them is a combining character
64 return (a.GetCategory() & 0xFFF0) - TChar::EMarkGroup;
70 inline TInt GetDecompositionIndex(TChar a)
72 TInt i = DecompositionHashStart(a);
73 TUint32 v = KUnicodeToIndexHash[i];
76 if ((v & 0xFFFFF) != a)
78 TInt step = DecompositionHashStep(a);
80 i = (i + step) & KDecompositionHashBitmask;
81 v = KUnicodeToIndexHash[i];
84 } while ((v & 0xFFFFF) != a);
86 // it is important that this is an unsigned shift
87 return static_cast<TInt>(v >> 20);
91 Will not work if an invalid index is passed.
94 static TUTF32Iterator GetFoldedDecomposition(TInt aIndex)
96 TInt singletonIndex = aIndex - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2);
97 if (0 <= singletonIndex)
98 return TUTF32Iterator(KSingletonFolds + singletonIndex);
99 const TText* start = KNonSingletonFolds + aIndex * 2;
100 if (*start != KLongD)
101 return TUTF32Iterator(start, start + 2,
102 TUTF32Iterator::EStartsWithValidCharacter);
103 TInt longDecompIndex = start[1];
104 start = KLongDecompositions + (longDecompIndex & 0xFFF);
105 return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3,
106 TUTF32Iterator::EStartsWithValidCharacter);
112 static TChar GetFirstFoldedChar(TChar a)
114 TInt index = ::GetDecompositionIndex(a);
115 return index < 0? a : ::GetFoldedDecomposition(index).Current();
121 static TBool FirstFoldedCodeIsNot(TInt aNonSurrogate, TInt aFoldedNonSurrogate)
123 TInt index = ::GetDecompositionIndex(aNonSurrogate);
125 return aNonSurrogate - aFoldedNonSurrogate;
126 TInt singletonIndex = index - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2);
127 if (0 < singletonIndex)
128 return KSingletonFolds[singletonIndex] - aFoldedNonSurrogate;
129 const TText* start = KNonSingletonFolds + index * 2;
130 if (start[0] == KLongD)
131 start = KLongDecompositions + (start[1] & 0xFFF);
132 return *start - aFoldedNonSurrogate;
138 static TInt GetClass(TFoldedDecompIterator& a)
141 a.EnterFoldedSequence();
142 TChar ch = a.Current();
143 TInt cl = ch.GetCombiningClass();
145 // Assume starters have been folded from ypogegrammeni
150 ////////////////////////////////////////////////////////////////////////////////////////////
152 ////////////////////////////////////////////////////////////////////////////////////////////
157 void TUTF32Iterator::Next()
159 ASSERT(iStart != iEnd);
160 while (++iStart != iEnd)
162 iCurrent = ::UTF16ToChar(iStart);
163 if (iCurrent != 0xFFFF)
169 Locates a base character in a string using a folded comparision. Will not find combining
170 characters, nor will it consider Korean combining Jamo to be equivalent to Hangul.
173 TBool TUTF32Iterator::LocateFoldedBaseCharacter(TChar aChar)
175 // A quick shortcut for simple rejections
178 while (iStart != iEnd && *iStart < 0xD800 && ::FirstFoldedCodeIsNot(*iStart, aChar))
182 iCurrent = ::UTF16ToChar(iStart);
183 if (iCurrent == 0xFFFF)
189 TChar foldedChar = ::GetFirstFoldedChar(iCurrent);
190 if (aChar == foldedChar)
197 ////////////////////////////////////////////////////////////////////////////////////////////
198 // TFoldedDecompIterator
199 ////////////////////////////////////////////////////////////////////////////////////////////
204 TFoldedDecompIterator::TFoldedDecompIterator(const TUTF32Iterator& a)
212 TBool TFoldedDecompIterator::AtEnd() const
214 return iOriginal.AtEnd();
220 TBool TFoldedDecompIterator::AtEndOrWildcard() const
222 // neither '?' nor '*' have decomposition sequences, so we can assume that
223 // if we are pointing at one or the other, we don't need to check if we
224 // are in a decomposition sequence or not.
225 return iOriginal.AtEnd() || iOriginal.Current() == '?' || iOriginal.Current() == '*';
231 TBool TFoldedDecompIterator::EnterFoldedSequence()
234 return !IsInFoldedSequence() && StrictEnterFoldedSequence();
238 Enter folded sequence, assuming that we are not already in one
241 TBool TFoldedDecompIterator::StrictEnterFoldedSequence()
244 ASSERT(!IsInFoldedSequence());
245 TInt index = ::GetDecompositionIndex(iOriginal.Current());
248 iFolded = ::GetFoldedDecomposition(index);
253 An iota might have folded from a combining ypogegrammeni.
254 If the current character is a base character, this function will
255 detect whether it was folded from a combining character (and
256 therefore does not constitute a grapheme boundary).
259 TBool TFoldedDecompIterator::CurrentIsBaseFoldedFromCombiner() const
261 if (!IsInFoldedSequence())
263 // The only character that does this is Ypogerammeni, which folds to iota
264 if (iFolded.Current() != 0x3B9)
266 // If the unfolded character is a combiner then it cannot contain an iota,
267 // so it must be an ypogegrammeni that has been folded to one.
268 // This will catch 0x345, the ypogegrammeni itself.
269 if (!::IsBaseCharacter(iOriginal.Current()))
271 // Otherwise the base character will be at the start of the decomposition
272 // sequence. We will assume that it is folded from a genuine iota if and
273 // only if there is an iota at the start of the folded decomposition
274 // sequence. (In theory there might be an iota with a combining
275 // ypogegrammeni, but in practice this will not occur).
276 TInt index = ::GetDecompositionIndex(iOriginal.Current());
278 TUTF32Iterator folded = ::GetFoldedDecomposition(index);
279 return folded.Current() != 0x3B9;
285 TChar TFoldedDecompIterator::Current() const
288 return IsInFoldedSequence()? iFolded.Current() : iOriginal.Current();
292 Move past this code if it matches unfolded or folded
295 TBool TFoldedDecompIterator::Match(TChar aCode)
298 if (!IsInFoldedSequence())
300 if (aCode == iOriginal.Current())
305 if (!StrictEnterFoldedSequence())
308 ASSERT(IsInFoldedSequence());
309 if (aCode == iFolded.Current())
320 Move this and argument past matching character.
323 TBool TFoldedDecompIterator::Match(TFoldedDecompIterator& aThat)
326 if (!IsInFoldedSequence())
328 if ( aThat.Match(iOriginal.Current()) )
333 if (!StrictEnterFoldedSequence())
336 ASSERT(IsInFoldedSequence());
337 if ( aThat.Match(iFolded.Current()) )
350 void TFoldedDecompIterator::Next()
353 if (IsInFoldedSequence())
356 if (IsInFoldedSequence())
362 ////////////////////////////////////////////////////////////////////////////////////////////
363 // TFoldedSortedDecompIterator
364 ////////////////////////////////////////////////////////////////////////////////////////////
367 Set this iterator to iterate over the next combining characters.
368 Iotas in folded sequences are assumed to be character class 240
369 (combining ypogegrammeni). Next() must be used once before
370 the first call to Current(), as long as AtEnd() returns false.
371 @param aBase Sets the start of the iteration. This value is advanced to the
372 end of the iteration.
373 @return The number of codes in the iteration.
376 TInt TFoldedSortedDecompIterator::Set(TFoldedDecompIterator &aBase)
382 const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet;
384 // Find the next starter (i.e. character with combining class 0).
385 // We must not count iota folded from ypogegrammeni.
386 // Ypogegrammeni has combining class 240.
387 // Iota has combining class 0.
388 // Also we will be searching for the character with the smallest
389 // combining class to start at.
390 while (!aBase.AtEnd())
392 aBase.EnterFoldedSequence();
393 TChar ch = aBase.Current();
394 TInt cl = TUnicode(TUint(ch)).GetCombiningClass(charDataSet);
397 if (aBase.CurrentIsBaseFoldedFromCombiner())
402 if (cl < iCurrentClass)
406 iCurrentCount = length;
413 ASSERT(iLength == 0 || iCurrentClass < 256);
418 Set this iterator so that AtEnd() returns ETrue.
421 void TFoldedSortedDecompIterator::Set()
429 TBool TFoldedSortedDecompIterator::AtEnd() const
431 return iRemaining == 0;
437 TChar TFoldedSortedDecompIterator::Current() const
440 return iCurrent.Current();
446 void TFoldedSortedDecompIterator::Next()
450 while (++iCurrentCount != iLength)
453 if (::GetClass(iCurrent) == iCurrentClass)
456 // We have not found any more of the same class, so we will look
457 // for the smallest one larger.
459 TFoldedDecompIterator searcher(iStart);
460 TInt searchCount = 0;
461 while (searchCount != iLength)
463 TInt cl = ::GetClass(searcher);
464 if (iCurrentClass < cl && cl < minClass)
467 iCurrentCount = searchCount;
473 iCurrentClass = minClass;
474 ASSERT(minClass < 256 || AtEnd());
477 ////////////////////////////////////////////////////////////////////////////////////////////
478 // TFoldedCanonicalIterator
479 ////////////////////////////////////////////////////////////////////////////////////////////
484 TFoldedCanonicalIterator::TFoldedCanonicalIterator(const TUTF32Iterator& a)
490 iBase.EnterFoldedSequence();
491 if (iBase.Current().GetCombiningClass() != 0 || iBase.CurrentIsBaseFoldedFromCombiner())
501 TBool TFoldedCanonicalIterator::AtEnd() const
503 return iSorted.AtEnd() && iBase.AtEnd();
509 TChar TFoldedCanonicalIterator::Current() const
511 ASSERT(!iBase.AtEnd() || !iSorted.AtEnd());
512 return iSorted.AtEnd()? iBase.Current() : iSorted.Current();
518 void TFoldedCanonicalIterator::Next(const TUnicodeDataSet* aCharDataSet)
520 ASSERT(!iBase.AtEnd() || !iSorted.AtEnd());
521 if (!iSorted.AtEnd())
529 iBase.EnterFoldedSequence();
530 if (TUnicode(TUint(iBase.Current())).GetCombiningClass(aCharDataSet) != 0 || iBase.CurrentIsBaseFoldedFromCombiner())
537 ////////////////////////////////////////////////////////////////////////////////////////////
538 // Folding - Global functions and structures
539 ////////////////////////////////////////////////////////////////////////////////////////////
546 typedef enum {EGenuine, EWildcard} TType;
548 inline TEndTester(TType aType) :
553 inline TBool operator()(const TFoldedDecompIterator& a) const
555 return iType == EGenuine ? a.AtEnd() : a.AtEndOrWildcard();
564 Consumes as much of aCandidate as matches aSearchTerm up to the next '?' or
565 '*' wildcard or the end of aSearchTerm.
566 It is assumed that the search term begins with a base character.
567 Returns true if and only if the whole search term was matched
568 with a whole number of UTF16s in the candidate string.
569 On return of ETrue the candidate string iterator will have consumed the
570 matching characters the search term will have had all its matching characters
574 TBool ConsumeFoldedMatch(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm,
575 const TEndTester& aEndTester)
577 TBool assumeBase = ETrue;
578 TFoldedDecompIterator st(aSearchTerm);
579 TFoldedDecompIterator cs(aCandidateString);
580 while (!aEndTester(st))
589 // We have a match...
590 if (cs.IsInFoldedSequence())
591 // but it was against only part of a character
592 // in the original string, so we fail.
594 aCandidateString = cs.BaseIterator();
595 aSearchTerm = st.BaseIterator();
600 // did not match. We need to re-order canonically.
602 // The first characters did not match.. do not bother
605 TFoldedSortedDecompIterator csc;
606 TInt cscLength = csc.Set(cs);
608 // If there are less than two characters to be reordered,
609 // there is no hope of getting a match by re-ordering
611 TFoldedSortedDecompIterator stc;
612 if (cscLength != stc.Set(st))
613 // if the strings have differing numbers of characters,
614 // there can be no match
618 ASSERT(!csc.AtEnd());
619 if (stc.Current() != csc.Current())
626 // If the candidate string is in a folded sequence, then
627 // we should not accept the match, as we require all matches
628 // to be for a whole number of characters in the original string.
629 if (cs.IsInFoldedSequence())
631 aCandidateString = cs.BaseIterator();
632 aSearchTerm = st.BaseIterator();
639 TBool ConsumeFoldedMatchWholeGraphemes(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm,
640 const TEndTester& aEndTester)
642 if (!::ConsumeFoldedMatch(aCandidateString, aSearchTerm, aEndTester))
644 return aCandidateString.AtEnd() || ::IsBaseCharacter(aCandidateString.Current());
650 static TBool ConsumeGrapheme(TUTF32Iterator& a)
655 while (!a.AtEnd() && !::IsBaseCharacter(a.Current()))
663 TBool MatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
665 TEndTester endTester(TEndTester::EWildcard);
666 while(::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester))
668 if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*')
670 ASSERT(aSearchTerm.Current() == '?');
672 if (!::ConsumeGrapheme(aCandidateString))
681 TBool FindMatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
683 // match as many graphemes as there are leading ?s
684 while (!aSearchTerm.AtEnd()
685 && aSearchTerm.Current() != '*' && aSearchTerm.Current() == '?')
687 if (!::ConsumeGrapheme(aCandidateString))
691 if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*')
693 TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current());
694 TUTF32Iterator savedST(aSearchTerm);
695 while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm))
697 TUTF32Iterator savedCS = aCandidateString;
698 if (::MatchSectionFolded(aCandidateString, aSearchTerm))
700 aSearchTerm = savedST;
701 aCandidateString = savedCS;
702 aCandidateString.Next();
710 TBool MatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
711 const TText16* aSearchTermStart, const TText16* aSearchTermEnd)
713 TUTF32Iterator candidate(aCandidateStringStart, aCandidateStringEnd);
714 TUTF32Iterator searchTerm(aSearchTermStart, aSearchTermEnd);
715 // First section of search term must match exactly at the start of the
717 if (!::MatchSectionFolded(candidate, searchTerm))
720 // If there was only one section, it must match the whole candidate string.
721 if (searchTerm.AtEnd())
722 return candidate.AtEnd();
724 while (!searchTerm.AtEnd())
727 if (!::FindMatchSectionFolded(candidate, searchTerm))
731 // The last section must match exactly at the end of the candidate string.
732 if (candidate.AtEnd()) // shortcut
734 const TText16* searchTermLastSection = aSearchTermEnd;
735 while (searchTermLastSection != aSearchTermStart
736 && searchTermLastSection[-1] != '*')
737 --searchTermLastSection;
738 if (searchTermLastSection == aSearchTermEnd)
739 // last section is null, so trivially matches
741 // Count graphemes by counting the number of base characters.
742 // The first one is assumed to start a grapheme.
743 TInt graphemeCount = 1;
744 for (const TText16* s = searchTermLastSection + 1; s != aSearchTermEnd; ++s)
746 if (::IsBaseCharacter(*s))
749 // Count this many graphemes back in the candidate string
750 const TText16* candidateLastSection = aCandidateStringEnd;
751 while (graphemeCount != 0
752 && candidateLastSection != aCandidateStringStart)
754 if (::IsBaseCharacter(*--candidateLastSection))
757 TUTF32Iterator last(candidateLastSection, aCandidateStringEnd);
758 TUTF32Iterator st(searchTermLastSection, aSearchTermEnd);
759 return ::MatchSectionFolded(last, st);
763 Implementation of MatchF
764 (slow if there is a match: MatchStringFolded is better, but does not return
765 the position of the match)
768 TInt LocateMatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
769 const TText16* aSearchTermStart, const TText16* aSearchTermEnd)
771 // Quick shortcut for simple non-match
772 if (aSearchTermStart != aSearchTermEnd && *aSearchTermStart != '*')
774 if (aCandidateStringStart == aCandidateStringEnd)
776 // We won't bother with non-characters and surrogate pairs.
777 if (*aSearchTermStart != '?'
778 && *aSearchTermStart < 0xD800
779 && *aCandidateStringStart < 0xD800
780 && ::GetFirstFoldedChar(*aSearchTermStart) != ::GetFirstFoldedChar(*aCandidateStringStart))
783 if (!::MatchStringFolded(aCandidateStringStart, aCandidateStringEnd,
784 aSearchTermStart, aSearchTermEnd))
786 // find where it matches
787 while (aSearchTermStart != aSearchTermEnd && *aSearchTermStart == '*')
789 const TText16* end = aSearchTermStart;
790 while (end != aSearchTermEnd && *end != '*')
792 // To preserve existing behaviour, a search term consisting of nothing
793 // but stars is considered to match at 0.
794 if (end == aSearchTermStart)
796 for (const TText16* csSection = aCandidateStringStart;
797 csSection <= aCandidateStringEnd; ++csSection)
799 TUTF32Iterator cs(csSection, aCandidateStringEnd);
800 TUTF32Iterator st(aSearchTermStart, end);
801 if (::MatchSectionFolded(cs, st))
803 // If this match must match exactly at the end, we must keep
805 // This could be a lot faster, with some optimizations.
806 if (end == aSearchTermEnd)
811 return csSection - aCandidateStringStart;
813 // Because this function is using TUTF32Iterator, which means the
814 // original author want to support surrogate. Take it as a defect and
815 // fix it, while do not define a new LocateMatchStringFoldedSurrogate().
816 if (IsSurrogate(*csSection))
819 // this should never happen!
825 Implementation of FindF
828 TInt FindFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
830 //Empty aSearchTerm string? - Then return 0 - keep the new implementation functionally
831 //compatible with the old one.
832 if(aSearchTerm.Length() == 0)
836 const TText16* candidateStartPosition = aCandidateString.CurrentPosition();
837 TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current());
838 TUTF32Iterator savedST(aSearchTerm);
839 while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm))
841 TUTF32Iterator savedCS = aCandidateString;
842 TEndTester endTester(TEndTester::EGenuine);
843 if (::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester))
844 return savedCS.CurrentPosition() - candidateStartPosition;
845 aSearchTerm = savedST;
846 aCandidateString = savedCS;
847 aCandidateString.Next();
853 Implementation of boolean CompareF
856 TBool EqualsFolded(TUTF32Iterator& aLeft, TUTF32Iterator& aRight)
858 TEndTester endTester(TEndTester::EGenuine);
859 if (::ConsumeFoldedMatchWholeGraphemes(aLeft, aRight, endTester))
860 return aLeft.AtEnd();
865 Implementation of tri-state CompareF
868 TInt CompareFolded(const TUTF32Iterator& aLeft, const TUTF32Iterator& aRight)
870 TFoldedCanonicalIterator left(aLeft);
871 TFoldedCanonicalIterator right(aRight);
873 const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet;
875 while (!left.AtEnd())
879 TChar cr = right.Current();
880 TChar cl = left.Current();
883 right.Next(charDataSet);
884 left.Next(charDataSet);
886 return right.AtEnd()? 0 : -1;
889 ////////////////////////////////////////////////////////////////////////////////////////////
890 // Composition/Decomposition - Global functions and structures
891 ////////////////////////////////////////////////////////////////////////////////////////////
896 static TUTF32Iterator IndexToNonSingletonDecomposition(TInt aIndex)
898 const TText* start = KNonSingletonDecompositions + aIndex * 2;
899 if (*start != KLongD)
900 return TUTF32Iterator(start, start + 2, TUTF32Iterator::EStartsWithValidCharacter);
901 TInt longDecompIndex = start[1];
902 start = KLongDecompositions + (longDecompIndex & 0xFFF);
903 return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3, TUTF32Iterator::EStartsWithValidCharacter);
907 Come up with a decomposition for the current value pointed at by the iterator
910 static TBool Decompose(const TUTF32Iterator& aUTF32It, TUTF32Iterator& aOutIt)
912 TInt index = ::GetDecompositionIndex(aUTF32It.Current());
913 TInt singletonIndex = index - (sizeof(KNonSingletonDecompositions)/sizeof(KNonSingletonDecompositions[0])/2);
914 const TInt SizeOfSingletonTable = sizeof(KSingletonDecompositions)/sizeof(KSingletonDecompositions[0]);
915 if(index < 0 || SizeOfSingletonTable <= singletonIndex)
917 aOutIt = aUTF32It.CurrentAsIterator();
918 return EFalse;//There is not any valid decomposition
920 if(0 <= singletonIndex)
922 // KSingletonDecompositions contains some items that come from "ShortDecompsLongFolds".
923 // "ShortDecompsLongFolds" contains some characters that have no composition, but have "long fold".
924 // This basically specific to non-BMP characters with fold also outside BMP.
926 // 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428;
927 // In Unicode 5.0.0, totally, there are 40 similar characters, which are U+10400-U+10427.
928 if (KSingletonDecompositions[singletonIndex] == 0xFFFF)
930 aOutIt = TUTF32Iterator(KSingletonDecompositions + singletonIndex);
934 aOutIt = ::IndexToNonSingletonDecomposition(index);
936 return ETrue;//Valid decomposition
942 static TUTF32Iterator CharToUTF32Iterator(TChar aChar, TDes16& aBuf)
949 aBuf.Append((aChar >> 10) + 0xD7C0);
950 aBuf.Append((aChar & 0x3FF) + 0xDC00);
952 const TText16* t = aBuf.Ptr();
953 return TUTF32Iterator(t, t + aBuf.Length());
959 TBool DecomposeChar(TChar aCh, TPtrC16& aResult)
961 aResult.Set(NULL, 0);
963 TUTF32Iterator it = ::CharToUTF32Iterator(aCh, srcBuf);
964 TBool res = ::Decompose(it, it);
967 aResult.Set(it.CurrentPosition(), it.Length());
973 Turn an index into the hash table known to point to a non-singleton
974 decomposition into that decomposition.
977 static TUTF32Iterator HashIndexToDecomposition(TInt aIndex)
979 TUint32 v = KUnicodeToIndexHash[aIndex];
981 TInt index = static_cast<TInt>(v >> 20);
982 ASSERT(index < ARRAY_LENGTH(KNonSingletonDecompositions)/2);
983 return ::IndexToNonSingletonDecomposition(index);
987 Takes a start and (one past the) end index into KCompostionMapping
988 and a number of UTF16 characters (aLengthSoFar). All of the compositions
989 within the range must have their first aLengthSoFar UTF16 characters
992 On entry, if aStart == aEnd then there is no possibility of a match so return
993 immediately with EFalse. To continue, aStart must be strictly less than aEnd.
995 Afterwards, aStart and aEnd will be narrowed to all those compositions
996 where the aLengthSoFar'th UTF16 character matches aNextCharacter.
997 No further compositions existing is indicated by aStart == aEnd.
999 @return ETrue if the composition at aStart is exactly of length aLengthSoFar + 1.
1002 static TBool RefineComposition(TInt& aStart, TInt& aEnd, TInt aLengthSoFar, TInt aNextCharacter)
1006 ASSERT((TUint)aStart < (TUint)aEnd);
1007 ASSERT((TUint)aEnd <= (TUint)ARRAY_LENGTH(KCompositionMapping));
1008 TUTF32Iterator startIterator(::HashIndexToDecomposition(KCompositionMapping[aStart]));
1009 if (startIterator.Length() == aLengthSoFar)
1012 // Find a single example of a decomposition that is suitable
1014 TUTF32Iterator midIt;
1019 mid = aStart + ((aEnd - aStart) >> 1);
1020 midIt = ::HashIndexToDecomposition(KCompositionMapping[mid]);
1021 ASSERT(aLengthSoFar < midIt.Length());
1022 TInt midItChar = midIt[aLengthSoFar];
1023 if (midItChar < aNextCharacter)
1025 else if (aNextCharacter < midItChar)
1029 startIterator = midIt;
1034 // FInd the first decomposition that does not match
1035 TInt start2 = mid + 1;
1036 while (start2 != aEnd)
1038 ASSERT(start2 < aEnd);
1039 TInt mid2 = start2 + ((aEnd - start2) >> 1);
1040 midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]);
1041 ASSERT(aLengthSoFar < midIt.Length());
1042 TInt midItChar = midIt[aLengthSoFar];
1043 ASSERT(aNextCharacter <= midItChar);
1044 if (aNextCharacter < midItChar)
1050 // Find the first decomposition that matches
1051 while (aStart != mid)
1053 ASSERT(aStart < mid);
1054 TInt mid2 = aStart + ((mid - aStart) >> 1);
1055 midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]);
1056 ASSERT(aLengthSoFar < midIt.Length());
1057 TInt midItChar = midIt[aLengthSoFar];
1058 ASSERT(midItChar <= aNextCharacter);
1059 if (midItChar < aNextCharacter)
1063 startIterator = midIt;
1068 return startIterator.Length() == (aLengthSoFar + 1);
1074 static TBool RefineCompositionUTF32(TInt& aStart, TInt& aEnd, TInt& aLengthSoFar, TChar aChar)
1076 if (aChar < 0x10000)
1077 return ::RefineComposition(aStart, aEnd, aLengthSoFar++, aChar);
1078 ::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar >> 10) + 0xD7C0);
1081 return ::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar & 0x3FF) + 0xDC00);
1085 Combine as many of the characters presented as possible into a single
1087 @return The number of characters successfully combined.
1088 @param aCombined If a nonzero value is returned, this contains
1089 the character that is that number of characters from the start of
1093 TInt CombineAsMuchAsPossible(const TDesC16& aDes, TChar& aCombined)
1096 TInt end = sizeof(KCompositionMapping)/sizeof(KCompositionMapping[0]);
1099 TInt bestLength = 0;
1100 const TText16* ptr = aDes.Ptr();
1101 TUTF32Iterator input(ptr, ptr + aDes.Length());
1102 while (!input.AtEnd())
1104 if (::RefineCompositionUTF32(start, end, length, input.Current()))
1107 bestLength = length;
1111 if (bestLength == 0)
1113 aCombined = KUnicodeToIndexHash[KCompositionMapping[bestIndex]] & 0xFFFFF;
1117 //////////////////////////////////////////////////////////////////////////////////////////////
1119 //////////////////////////////////////////////////////////////////////////////////////////////
1121 //////////////////////////////////////////////////////////////////////////////////////////////
1122 // TDecompositionIterator class
1127 void TDecompositionIterator::Set(const TUTF32Iterator& a)
1132 (void)::Decompose(iBase, iDecomposition);
1139 TDecompositionIterator::TDecompositionIterator(const TUTF32Iterator& a)
1147 TBool TDecompositionIterator::AtEnd() const
1149 return iBase.AtEnd();
1155 TChar TDecompositionIterator::Current() const
1157 return iDecomposition.Current();
1163 void TDecompositionIterator::Next()
1165 ASSERT(!iBase.AtEnd() && !iDecomposition.AtEnd());
1166 iDecomposition.Next();
1167 if (!iDecomposition.AtEnd())
1172 (void)::Decompose(iBase, iDecomposition);
1179 const TText16* TDecompositionIterator::CurrentPosition() const
1181 return iBase.CurrentPosition();
1185 Find out the length and minimum combining class of
1186 the current run of characters of nonzero combining class.
1187 aMinClass and aMinClassPos are not written to if the return
1189 aEndOfRun is written to with the final position of the iteration
1190 if 0 is returned and aEndOfRun is non-null
1193 static TInt ReorderingRun(TInt& aMinClass, TDecompositionIterator& aMinClassPos,
1194 const TDecompositionIterator& aStart, TBool* aOpenSequence,
1195 TInt aMaxDisallowedClass = 0, TDecompositionIterator* aEndOfRun = 0)
1197 TInt comclass = aStart.AtEnd()? 0 : aStart.Current().GetCombiningClass();
1201 *aEndOfRun = aStart;
1203 *aOpenSequence = aStart.AtEnd();
1207 TDecompositionIterator i = aStart;
1209 while (comclass != 0)
1211 if (aMaxDisallowedClass < comclass)
1213 if (comclass < aMinClass)
1215 aMinClass = comclass;
1221 comclass = i.AtEnd()? 0 : i.Current().GetCombiningClass();
1223 if (count == 0 && aEndOfRun)
1226 *aOpenSequence = i.AtEnd();
1230 //////////////////////////////////////////////////////////////////////////////////////////////
1231 // TCanonicalDecompositionIterator class
1236 void TCanonicalDecompositionIterator::Set(const TUTF32Iterator& a)
1240 if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2)
1241 iCurrentCombiningClass = 0;
1247 TBool TCanonicalDecompositionIterator::AtEnd() const
1249 return iBase.AtEnd();
1255 TChar TCanonicalDecompositionIterator::Current() const
1257 return iCurrentCombiningClass? iCurrent.Current() : iBase.Current();
1263 void TCanonicalDecompositionIterator::Next()
1265 iLastPosition = iBase.CurrentPosition();
1266 if (iCurrentCombiningClass == 0)
1269 if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2)
1270 iCurrentCombiningClass = 0;
1273 // Find the next character in the run with the same combining class
1275 TInt curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass();
1276 while (curclass != 0)
1278 if (curclass == iCurrentCombiningClass)
1282 curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass();
1284 // There are none left in the current class. Find out what the next one is.
1285 if (0 == ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, 0, iCurrentCombiningClass, &iBase))
1286 iCurrentCombiningClass = 0;
1292 const TText16* TCanonicalDecompositionIterator::CurrentPositionIfAtCharacter() const
1294 if (iCurrentCombiningClass != 0)
1296 const TText16* p = iBase.CurrentPosition();
1297 return iLastPosition == p? 0 : p;
1303 TBool TCanonicalDecompositionIterator::IsInOpenSequence() const
1305 return iInOpenSequence;
1308 //////////////////////////////////////////////////////////////////////////////////////////////
1309 // TCanonicalDecompositionIteratorCached class
1314 void TCanonicalDecompositionIteratorCached::Set(const TUTF32Iterator& a)
1324 TBool TCanonicalDecompositionIteratorCached::AtEnd() const
1326 return iCacheSize == 0 && iBase.AtEnd();
1332 void TCanonicalDecompositionIteratorCached::Next(TInt aOffset)
1334 ASSERT(0 <= aOffset);
1335 ASSERT(0 <= iCacheSize);
1336 ASSERT(0 != iCacheSize || !iBase.AtEnd());
1337 if (aOffset <= iCacheSize)
1339 iCacheSize -= aOffset;
1340 iCacheStart = (iCacheStart + aOffset) & (KMaxLookAhead - 1);
1343 aOffset -= iCacheSize;
1345 while (aOffset != 0)
1353 Get the character at the position of the iterator plus aOffset steps.
1354 Returns -1 if we are looking too far ahead.
1357 TChar TCanonicalDecompositionIteratorCached::Get(TInt aOffset)
1359 // should be assert debug: there is a chance this could go off with
1360 // bad collation tables
1361 ASSERT(aOffset <= KMaxLookAhead);
1362 while (iCacheSize <= aOffset)
1365 return TChar(static_cast <TUint> (-1));
1366 TInt cachePos = (iCacheStart + iCacheSize) & (KMaxLookAhead - 1);
1367 iCache[cachePos].iChar = iBase.Current();
1368 iCache[cachePos].iPos = iBase.CurrentPositionIfAtCharacter();
1372 return iCacheSize == aOffset? iBase.Current() : iCache[(iCacheStart + aOffset) & (KMaxLookAhead - 1)].iChar;
1376 If the current position in the original string is representable
1377 as a pointer into it and we know what it is, return it.
1380 const TText16* TCanonicalDecompositionIteratorCached::CurrentPositionIfAtCharacter() const
1384 return iBase.CurrentPositionIfAtCharacter();
1386 return iCache[iCacheStart & (KMaxLookAhead - 1)].iPos;