| sl@0 |      1 | // Copyright (c) 2002-2009 Nokia Corporation and/or its subsidiary(-ies).
 | 
| sl@0 |      2 | // All rights reserved.
 | 
| sl@0 |      3 | // This component and the accompanying materials are made available
 | 
| sl@0 |      4 | // under the terms of the License "Eclipse Public License v1.0"
 | 
| sl@0 |      5 | // which accompanies this distribution, and is available
 | 
| sl@0 |      6 | // at the URL "http://www.eclipse.org/legal/epl-v10.html".
 | 
| sl@0 |      7 | //
 | 
| sl@0 |      8 | // Initial Contributors:
 | 
| sl@0 |      9 | // Nokia Corporation - initial contribution.
 | 
| sl@0 |     10 | //
 | 
| sl@0 |     11 | // Contributors:
 | 
| sl@0 |     12 | //
 | 
| sl@0 |     13 | // Description:
 | 
| sl@0 |     14 | // Folding and decomposition implementation
 | 
| sl@0 |     15 | // 
 | 
| sl@0 |     16 | //
 | 
| sl@0 |     17 | 
 | 
| sl@0 |     18 | #ifndef __COMPAREIMP_H__
 | 
| sl@0 |     19 | #define __COMPAREIMP_H__
 | 
| sl@0 |     20 | 
 | 
| sl@0 |     21 | #include <e32std.h>
 | 
| sl@0 |     22 | #include <unicode.h>
 | 
| sl@0 |     23 | 
 | 
| sl@0 |     24 | //Forward declarations
 | 
| sl@0 |     25 | class TUTF32Iterator;
 | 
| sl@0 |     26 | 
 | 
| sl@0 |     27 | //////////////////////////////////////////////////////////////////////////////////////////////
 | 
| sl@0 |     28 | // Global functions
 | 
| sl@0 |     29 | //////////////////////////////////////////////////////////////////////////////////////////////
 | 
| sl@0 |     30 | 
 | 
| sl@0 |     31 | TChar UTF16ToChar(const TText16* a);
 | 
| sl@0 |     32 | TBool IsBaseCharacter(TChar);
 | 
| sl@0 |     33 | 
 | 
| sl@0 |     34 | TBool MatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm);
 | 
| sl@0 |     35 | 
 | 
| sl@0 |     36 | TBool FindMatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm);
 | 
| sl@0 |     37 | 
 | 
| sl@0 |     38 | TBool MatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
 | 
| sl@0 |     39 |                         const TText16* aSearchTermStart, const TText16* aSearchTermEnd);
 | 
| sl@0 |     40 | 
 | 
| sl@0 |     41 | TInt LocateMatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
 | 
| sl@0 |     42 |                              const TText16* aSearchTermStart, const TText16* aSearchTermEnd);
 | 
| sl@0 |     43 | 
 | 
| sl@0 |     44 | TInt FindFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm);
 | 
| sl@0 |     45 | 
 | 
| sl@0 |     46 | TInt CompareFolded(const TUTF32Iterator& aLeft, const TUTF32Iterator& aRight);
 | 
| sl@0 |     47 | 
 | 
| sl@0 |     48 | TInt CombineAsMuchAsPossible(const TDesC16& aDes, TChar& aCombined);
 | 
| sl@0 |     49 | 
 | 
| sl@0 |     50 | TBool DecomposeChar(TChar aCh, TPtrC16& aResult);
 | 
| sl@0 |     51 | 
 | 
| sl@0 |     52 | inline void SkipCombiningCharacters(TUTF32Iterator& aUTF32It);
 | 
| sl@0 |     53 | 
 | 
| sl@0 |     54 | /**
 | 
| sl@0 |     55 | Converts UTF16 into UTF32, ignoring non-characters and
 | 
| sl@0 |     56 | unpaired surrogates and combining paired surrogates.
 | 
| sl@0 |     57 | @internalComponent
 | 
| sl@0 |     58 | */
 | 
| sl@0 |     59 | class TUTF32Iterator
 | 
| sl@0 |     60 | 	{
 | 
| sl@0 |     61 | public:
 | 
| sl@0 |     62 | 	enum TStartsWithValidCharacter { EStartsWithValidCharacter };
 | 
| sl@0 |     63 | 	inline TUTF32Iterator();
 | 
| sl@0 |     64 | 	inline explicit TUTF32Iterator(const TText16* aSingleton);
 | 
| sl@0 |     65 | 	inline TUTF32Iterator(const TText16* aStart, const TText16* aEnd);
 | 
| sl@0 |     66 | 	inline TUTF32Iterator(const TText16* aStart, const TText16* aEnd, TStartsWithValidCharacter);
 | 
| sl@0 |     67 | 
 | 
| sl@0 |     68 | 	inline TUTF32Iterator CurrentAsIterator() const;
 | 
| sl@0 |     69 | 	inline TBool AtEnd() const;
 | 
| sl@0 |     70 | 	void Next();
 | 
| sl@0 |     71 | 	inline TChar Current() const;
 | 
| sl@0 |     72 | 	TBool LocateFoldedBaseCharacter(TChar aChar);
 | 
| sl@0 |     73 | 	inline const TText16* CurrentPosition() const;
 | 
| sl@0 |     74 | 	inline TInt Length() const;
 | 
| sl@0 |     75 | 	inline TInt operator[](TInt) const;
 | 
| sl@0 |     76 | 	inline void SetStart(const TText16*);
 | 
| sl@0 |     77 | private:
 | 
| sl@0 |     78 | 	const TText16* iStart;
 | 
| sl@0 |     79 | 	const TText16* iEnd;
 | 
| sl@0 |     80 | 	TChar iCurrent;
 | 
| sl@0 |     81 | 	};
 | 
| sl@0 |     82 | 
 | 
| sl@0 |     83 | //////////////////////////////////////////////////////////////////////////////////////////////
 | 
| sl@0 |     84 | // FOLDING
 | 
| sl@0 |     85 | //////////////////////////////////////////////////////////////////////////////////////////////
 | 
| sl@0 |     86 | 
 | 
| sl@0 |     87 | /**
 | 
| sl@0 |     88 | @internalComponent
 | 
| sl@0 |     89 | */
 | 
| sl@0 |     90 | class TFoldedDecompIterator
 | 
| sl@0 |     91 | 	{
 | 
| sl@0 |     92 | public:
 | 
| sl@0 |     93 | 	inline TFoldedDecompIterator();
 | 
| sl@0 |     94 | 	explicit TFoldedDecompIterator(const TUTF32Iterator&);
 | 
| sl@0 |     95 | 	inline void Set(const TUTF32Iterator&);
 | 
| sl@0 |     96 | 	TBool AtEnd() const;
 | 
| sl@0 |     97 | 	TBool AtEndOrWildcard() const;
 | 
| sl@0 |     98 | 	TBool EnterFoldedSequence();
 | 
| sl@0 |     99 | 	TBool StrictEnterFoldedSequence();
 | 
| sl@0 |    100 | 	inline TBool IsInFoldedSequence() const;
 | 
| sl@0 |    101 | 	TBool CurrentIsBaseFoldedFromCombiner() const;
 | 
| sl@0 |    102 | 	TChar Current() const;
 | 
| sl@0 |    103 | 	TBool Match(TChar aCode);
 | 
| sl@0 |    104 | 	TBool Match(TFoldedDecompIterator& aThat);
 | 
| sl@0 |    105 | 	void Next();
 | 
| sl@0 |    106 | 	inline TUTF32Iterator BaseIterator() const;
 | 
| sl@0 |    107 | private:
 | 
| sl@0 |    108 | 	TUTF32Iterator iOriginal;
 | 
| sl@0 |    109 | 	TUTF32Iterator iFolded;
 | 
| sl@0 |    110 | 	};
 | 
| sl@0 |    111 | 
 | 
| sl@0 |    112 | /**
 | 
| sl@0 |    113 | Sorts sequences of combining characters with non-zero combining classes into
 | 
| sl@0 |    114 | order of their combining classes.
 | 
| sl@0 |    115 | @internalComponent
 | 
| sl@0 |    116 | */
 | 
| sl@0 |    117 | class TFoldedSortedDecompIterator
 | 
| sl@0 |    118 | 	{
 | 
| sl@0 |    119 | public:
 | 
| sl@0 |    120 | 	inline TFoldedSortedDecompIterator();
 | 
| sl@0 |    121 | 	TInt Set(TFoldedDecompIterator &aBase);
 | 
| sl@0 |    122 | 	void Set();
 | 
| sl@0 |    123 | 	TBool AtEnd() const;
 | 
| sl@0 |    124 | 	TChar Current() const;
 | 
| sl@0 |    125 | 	void Next();
 | 
| sl@0 |    126 | 
 | 
| sl@0 |    127 | private:
 | 
| sl@0 |    128 | 	TFoldedDecompIterator iStart; // Starting code.	
 | 
| sl@0 |    129 | 	TInt iLength; // Length in decomposed codes.
 | 
| sl@0 |    130 | 	TFoldedDecompIterator iCurrent; // Current code.
 | 
| sl@0 |    131 | 	TInt iCurrentCount; // Number of decomposed codes iCurrent is past iStart
 | 
| sl@0 |    132 | 	TInt iCurrentClass; // Current class being searched for.
 | 
| sl@0 |    133 | 	TInt iRemaining; // Number of Next()s left
 | 
| sl@0 |    134 | 	};
 | 
| sl@0 |    135 | 
 | 
| sl@0 |    136 | /**
 | 
| sl@0 |    137 | Iterator that outputs canonically decomposed folded strings.
 | 
| sl@0 |    138 | This is much slower than using the matching functions, so should only
 | 
| sl@0 |    139 | be used where an ordering is required.
 | 
| sl@0 |    140 | @internalComponent
 | 
| sl@0 |    141 | */
 | 
| sl@0 |    142 | class TFoldedCanonicalIterator
 | 
| sl@0 |    143 | 	{
 | 
| sl@0 |    144 | public:
 | 
| sl@0 |    145 | 	TFoldedCanonicalIterator(const TUTF32Iterator&);
 | 
| sl@0 |    146 | 	TBool AtEnd() const;
 | 
| sl@0 |    147 | 	TChar Current() const;
 | 
| sl@0 |    148 | 	void Next(const TUnicodeDataSet* aCharDataSet);
 | 
| sl@0 |    149 | private:
 | 
| sl@0 |    150 | 	TFoldedDecompIterator iBase;
 | 
| sl@0 |    151 | 	TFoldedSortedDecompIterator iSorted;
 | 
| sl@0 |    152 | 	};
 | 
| sl@0 |    153 | 
 | 
| sl@0 |    154 | 
 | 
| sl@0 |    155 | //////////////////////////////////////////////////////////////////////////////////////////////
 | 
| sl@0 |    156 | // COLLATION
 | 
| sl@0 |    157 | //////////////////////////////////////////////////////////////////////////////////////////////
 | 
| sl@0 |    158 | 
 | 
| sl@0 |    159 | /**
 | 
| sl@0 |    160 | @internalComponent
 | 
| sl@0 |    161 | */
 | 
| sl@0 |    162 | class TDecompositionIterator
 | 
| sl@0 |    163 | 	{
 | 
| sl@0 |    164 | public:
 | 
| sl@0 |    165 | 	inline TDecompositionIterator();
 | 
| sl@0 |    166 | 	void Set(const TUTF32Iterator&);
 | 
| sl@0 |    167 | 	explicit TDecompositionIterator(const TUTF32Iterator&);
 | 
| sl@0 |    168 | 	TBool AtEnd() const;
 | 
| sl@0 |    169 | 	TChar Current() const;
 | 
| sl@0 |    170 | 	void Next();
 | 
| sl@0 |    171 | 	const TText16* CurrentPosition() const;
 | 
| sl@0 |    172 | private:
 | 
| sl@0 |    173 | 	TUTF32Iterator iBase;
 | 
| sl@0 |    174 | 	TUTF32Iterator iDecomposition;
 | 
| sl@0 |    175 | 	};
 | 
| sl@0 |    176 | 
 | 
| sl@0 |    177 | /**
 | 
| sl@0 |    178 | @internalComponent
 | 
| sl@0 |    179 | */
 | 
| sl@0 |    180 | class TCanonicalDecompositionIterator
 | 
| sl@0 |    181 | 	{
 | 
| sl@0 |    182 | public:
 | 
| sl@0 |    183 | 	inline TCanonicalDecompositionIterator();
 | 
| sl@0 |    184 | 	void Set(const TUTF32Iterator&);
 | 
| sl@0 |    185 | 	TBool AtEnd() const;
 | 
| sl@0 |    186 | 	TChar Current() const;
 | 
| sl@0 |    187 | 	void Next();
 | 
| sl@0 |    188 | 	const TText16* CurrentPositionIfAtCharacter() const;
 | 
| sl@0 |    189 | 	TBool IsInOpenSequence() const;
 | 
| sl@0 |    190 | private:
 | 
| sl@0 |    191 | 	TDecompositionIterator iBase;
 | 
| sl@0 |    192 | 	// iBase.CurrentPosition() before the last move
 | 
| sl@0 |    193 | 	const TText16* iLastPosition;
 | 
| sl@0 |    194 | 	// If iCurrent is active, iCurrentCombiningClass
 | 
| sl@0 |    195 | 	// is nonzero, and represents the combining class
 | 
| sl@0 |    196 | 	// of the character it points to.
 | 
| sl@0 |    197 | 	TInt iCurrentCombiningClass;
 | 
| sl@0 |    198 | 	// contains true if more characters added to the end may change
 | 
| sl@0 |    199 | 	// the characters currently being output
 | 
| sl@0 |    200 | 	TBool iInOpenSequence;
 | 
| sl@0 |    201 | 	// Iterator that looks for characters to be sorted.
 | 
| sl@0 |    202 | 	TDecompositionIterator iCurrent;
 | 
| sl@0 |    203 | 	};
 | 
| sl@0 |    204 | 
 | 
| sl@0 |    205 | /**
 | 
| sl@0 |    206 | Iterator that gives the canonically decomposed form of
 | 
| sl@0 |    207 | its input, and allows a limited amount of look-ahead (i.e.
 | 
| sl@0 |    208 | peeking further into the decomposition without moving
 | 
| sl@0 |    209 | the iterator)
 | 
| sl@0 |    210 | @internalComponent
 | 
| sl@0 |    211 | */
 | 
| sl@0 |    212 | class TCanonicalDecompositionIteratorCached
 | 
| sl@0 |    213 | 	{
 | 
| sl@0 |    214 | public:
 | 
| sl@0 |    215 | 	void Set(const TUTF32Iterator&);
 | 
| sl@0 |    216 | 	TBool AtEnd() const;
 | 
| sl@0 |    217 | 	// Advance aOffset characters.
 | 
| sl@0 |    218 | 	void Next(TInt aOffset);
 | 
| sl@0 |    219 | 	// Get the character at the position of the iterator plus aOffset steps.
 | 
| sl@0 |    220 | 	// Returns FFFF if we are looking too far ahead.
 | 
| sl@0 |    221 | 	TChar Get(TInt aOffset);
 | 
| sl@0 |    222 | 	// If the current position in the original string is representable
 | 
| sl@0 |    223 | 	// as a pointer into it and we know what it is, return it.
 | 
| sl@0 |    224 | 	const TText16* CurrentPositionIfAtCharacter() const;
 | 
| sl@0 |    225 | private:
 | 
| sl@0 |    226 | 	// KMaxLookAhead must be a power of 2
 | 
| sl@0 |    227 | 	enum { KMaxLookAhead = 8 };
 | 
| sl@0 |    228 | 	TCanonicalDecompositionIterator iBase;
 | 
| sl@0 |    229 | 	struct TCache
 | 
| sl@0 |    230 | 		{
 | 
| sl@0 |    231 | 		TChar iChar;
 | 
| sl@0 |    232 | 		const TText16* iPos;
 | 
| sl@0 |    233 | 		};
 | 
| sl@0 |    234 | 	TCache iCache[KMaxLookAhead + 1];
 | 
| sl@0 |    235 | 	TInt iCacheStart;
 | 
| sl@0 |    236 | 	TInt iCacheSize;
 | 
| sl@0 |    237 | 	};
 | 
| sl@0 |    238 | 
 | 
| sl@0 |    239 | #include "CompareImp.inl"
 | 
| sl@0 |    240 | 
 | 
| sl@0 |    241 | #endif //__COMPAREIMP_H__
 |