epoc32/include/collate.h
branchSymbian3
changeset 4 837f303aceeb
parent 3 e1b950c65cb4
     1.1 --- a/epoc32/include/collate.h	Wed Mar 31 12:27:01 2010 +0100
     1.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.3 @@ -1,362 +0,0 @@
     1.4 -// Copyright (c) 1996-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.5 -// All rights reserved.
     1.6 -// This component and the accompanying materials are made available
     1.7 -// under the terms of the License "Symbian Foundation License v1.0" to Symbian Foundation members and "Symbian Foundation End User License Agreement v1.0" to non-members
     1.8 -// which accompanies this distribution, and is available
     1.9 -// at the URL "http://www.symbianfoundation.org/legal/licencesv10.html".
    1.10 -//
    1.11 -// Initial Contributors:
    1.12 -// Nokia Corporation - initial contribution.
    1.13 -//
    1.14 -// Contributors:
    1.15 -//
    1.16 -// Description:
    1.17 -// e32\include\collate.h
    1.18 -// Definitions needed for Unicode collation.
    1.19 -// Collation is the comparison of two Unicode strings to produce an ordering
    1.20 -// that may be used in a dictionary or other list.
    1.21 -// Collation is implemented using the Standard Unicode Collation algorithm. There
    1.22 -// are four levels of comparison:
    1.23 -// primary: basic character identity
    1.24 -// secondary: accents and diacritics
    1.25 -// tertiary: upper and lower case, and other minor attributes
    1.26 -// quaternary: Unicode character value
    1.27 -// Punctuation is normally ignored but can optionally be taken into account.
    1.28 -// Strings are fully expanded using the standard Unicode canonical expansions before
    1.29 -// they are compared. Thai and Lao vowels are swapped with the following character
    1.30 -// if any.
    1.31 -// EUSER contains the 'basic collation method'. This method assigns the standard Unicode collation key values
    1.32 -// to the characters in the WGL4 repertoire, plus commonly used control characters and fixed-width spaces, plus
    1.33 -// the CJK ideograms (for which the keys can be generated algorithmically). Other characters are collated after
    1.34 -// all the characters for which keys are defined, and ordered by their Unicode values.
    1.35 -// Locales can supply any number of other collation methods. They will usually supply a 'tailoring' of the standard
    1.36 -// method. This is done by using the standard table as the main key table (signalled by placing NULL in
    1.37 -// TCollationMethod::iMainTable) and specifying an override table (TCollationMethod::iOverrideTable).
    1.38 -// Locale-specific collation data resides in ELOCL.
    1.39 -// 
    1.40 -//
    1.41 -
    1.42 -
    1.43 -
    1.44 -#ifndef __COLLATE_H__
    1.45 -#define __COLLATE_H__
    1.46 -
    1.47 -#ifdef __KERNEL_MODE__
    1.48 -#include <e32cmn.h>
    1.49 -#else
    1.50 -#include <e32std.h>
    1.51 -#endif
    1.52 -
    1.53 -//This material is used in the Unicode build only.
    1.54 -#ifdef _UNICODE
    1.55 -
    1.56 -/**
    1.57 -Collation key table structure.
    1.58 -@publishedPartner
    1.59 -*/
    1.60 -struct TCollationKeyTable
    1.61 -	{
    1.62 -public:
    1.63 -	/**
    1.64 -	Masks for the various parts of the elements of the iKey array.
    1.65 -	*/
    1.66 -	enum
    1.67 -		{
    1.68 -		ELevel0Mask = 0xFFFF0000,	// primary key - basic character identity
    1.69 -		ELevel1Mask = 0x0000FF00,	// secondary key - accents and diacritics
    1.70 -		ELevel2Mask = 0x000000FC,	// tertiary key - case, etc.
    1.71 -		EIgnoreFlag = 0x2,			// if set, this key is normally ignored
    1.72 -		EStopFlag = 0x1				// if set, this key is the last in a sequence representing a Unicode value or values
    1.73 -		};
    1.74 -
    1.75 -	/**
    1.76 -	An array containing all of the keys and strings of keys concatenated
    1.77 -	together. Each key has EStopFlag set only if it is the last key in its
    1.78 -	string. Eack key contains the keys for levels 0, 1 and 2, and a flag
    1.79 -	EIgnoreFlag if the key is usually ignored (for punctuation & spaces
    1.80 -	etc.).
    1.81 -	*/
    1.82 -	const TUint32* iKey;
    1.83 -	/**
    1.84 -	An array of indices into the iKey array. Each element has its high 16
    1.85 -	bits indicating a Unicode value and its low 16 bits indicating an index
    1.86 -	into the iKey array at which its key starts. The elements are sorted by
    1.87 -	Unicode value.
    1.88 -	*/
    1.89 -	const TUint32* iIndex;
    1.90 -	/**
    1.91 -	The size of the iIndex array.
    1.92 -	*/
    1.93 -	TInt iIndices;
    1.94 -	/**
    1.95 -	Concatenated Unicode strings. Each is a strings that is to be converted
    1.96 -	to keys differently from how it would be if each letter were converted
    1.97 -	independently. An example is "ch" in Spanish, which sorts as though it
    1.98 -	were a single letter. Each Unicode string is preceeded by a 16-bit value
    1.99 -	indicating the string's length. The end of the string is not delimited.
   1.100 -	*/
   1.101 -	const TUint16* iString;
   1.102 -	/**
   1.103 -	An array of elements mapping elements of iString to elements of iIndex.
   1.104 -	Each element has its high 16 bits indicating the index of the start of
   1.105 -	an element of iString, and its low 16 bits indicating the corresponding
   1.106 -	element in iIndex. This array is sorted on the string index.
   1.107 -	*/
   1.108 -	const TUint32* iStringIndex;
   1.109 -	/**
   1.110 -	The size of the iStringIndex array.
   1.111 -	*/
   1.112 -	TInt iStringIndices;
   1.113 -	};
   1.114 -
   1.115 -/**
   1.116 -Defines a collation method. 
   1.117 -
   1.118 -Collation means sorting pieces of text. It needs to take into account characters, 
   1.119 -accents and case; spaces and punctuation are usually ignored. It differs from 
   1.120 -ordinary methods of sorting in that it is locale-dependent - different 
   1.121 -languages use different ordering methods. Additionally, multiple collation 
   1.122 -methods may exist within the same locale.
   1.123 -
   1.124 -A collation method provides the collation keys and other data needed to customise 
   1.125 -collation; the Mem and TDesC16 collation functions (e.g. Mem::CompareC()) 
   1.126 -perform the collation. Note that these functions use the standard collation 
   1.127 -method for the current locale - you only need to specify an object of class 
   1.128 -TCollationMethod to customise this collation scheme. Collation methods can 
   1.129 -be retrieved using member functions of the Mem class. Each one has a unique 
   1.130 -identifier.
   1.131 -
   1.132 -A collation method specifies a main table of collation keys, and optionally 
   1.133 -an overriding table that contains keys for which the values in the main table 
   1.134 -are overridden. A collation key table (TCollationKeyTable) is the set of collation 
   1.135 -keys: primary (basic character identity), secondary (accents and diacritics) 
   1.136 -and tertiary (case). The quaternary key is the Unicode character values themselves.
   1.137 -
   1.138 -The simplest way to customise a collation method is to create a local copy 
   1.139 -of the standard collation method and change it. For example, you could use 
   1.140 -the standard method, but not ignore punctuation and spaces:
   1.141 -
   1.142 -@code
   1.143 -TCollationMethod m = *Mem::CollationMethodByIndex(0); // get the standard method
   1.144 -m.iFlags |= TCollationMethod::EIgnoreNone; // dont ignore punctuation and spaces
   1.145 -@endcode
   1.146 -
   1.147 -@publishedPartner
   1.148 -*/
   1.149 -struct TCollationMethod
   1.150 -	{
   1.151 -	public:
   1.152 -	/**
   1.153 -	The UID of this collation method.
   1.154 -	*/
   1.155 -	TUint iId;
   1.156 -	
   1.157 -	/**
   1.158 -	The main collation key table; if NULL, use the standard table.
   1.159 -	*/
   1.160 -	const TCollationKeyTable* iMainTable;
   1.161 -	
   1.162 -	/**
   1.163 -	If non-NULL, tailoring for collation keys.
   1.164 -	*/
   1.165 -	const TCollationKeyTable* iOverrideTable;
   1.166 -	enum
   1.167 -		{
   1.168 -		/**
   1.169 -		Don't ignore any keys (punctuation, etc. is normally ignored).
   1.170 -		*/
   1.171 -		EIgnoreNone = 1,
   1.172 -		
   1.173 -		/**
   1.174 -		Reverse the normal order for characters differing only in case
   1.175 -		*/
   1.176 -		ESwapCase = 2,
   1.177 -		
   1.178 -		/**
   1.179 -		Compare secondary keys which represent accents in reverse
   1.180 -		order (from right to left); this is needed for French when comparing
   1.181 -		words that differ only in accents.
   1.182 -		*/
   1.183 -		EAccentsBackwards = 4,	
   1.184 -		
   1.185 -		/**
   1.186 -		Reverse the normal order for characters differing only in whether they
   1.187 -		are katakana or hiragana.
   1.188 -		*/
   1.189 -		ESwapKana = 8,
   1.190 -		
   1.191 -		/**
   1.192 -		Fold all characters to lower case before extracting keys; needed for
   1.193 -		comparison of filenames, for which case is ignored but other
   1.194 -		tertiary (level-2) distinctions are not.
   1.195 -		*/
   1.196 -		EFoldCase = 16,
   1.197 -		
   1.198 -		/** Flag to indicate a collation method for matching purpose 
   1.199 -		This flag is only needed if we wish to specify a particular collation method
   1.200 -		to be used for matching purpose.
   1.201 -		*/
   1.202 -		EMatchingTable = 32,
   1.203 -		
   1.204 -		/** Ignore the check for adjacent combining characters.  A combining
   1.205 -		character effectively changes the character it combines with to something
   1.206 -		else and so a match doesn't occur.  Setting this flag will allow character
   1.207 -		matching regardless of any combining characters.
   1.208 -		*/
   1.209 -		EIgnoreCombining = 64
   1.210 -		};
   1.211 -		
   1.212 -	/**
   1.213 -	Flags.
   1.214 -	
   1.215 -	@see TCollationMethod::EIgnoreNone
   1.216 -	@see TCollationMethod::ESwapCase
   1.217 -	@see TCollationMethod::EAccentsBackwards
   1.218 -	@see TCollationMethod::ESwapKana
   1.219 -	@see TCollationMethod::EFoldCase
   1.220 -	*/
   1.221 -	TUint iFlags;
   1.222 -	};
   1.223 -
   1.224 -/**
   1.225 -A collation data set provides any collation methods needed by a locale.
   1.226 -@publishedPartner
   1.227 -*/
   1.228 -struct TCollationDataSet
   1.229 -	{
   1.230 -	public:
   1.231 -	const TCollationMethod* iMethod;
   1.232 -	TInt iMethods;
   1.233 -	};
   1.234 -
   1.235 -// Collation method IDs
   1.236 -
   1.237 -/**
   1.238 -A collation data set provides any collation methods needed by a locale.
   1.239 -@internalTechnology
   1.240 -@released
   1.241 -*/
   1.242 -const TUint KUidBasicCollationMethod = 0x10004F4E;
   1.243 -
   1.244 -/**
   1.245 -A collation data set provides any collation methods needed by a locale.
   1.246 -@internalTechnology
   1.247 -@released
   1.248 -*/
   1.249 -const TUint KUidStandardUnicodeCollationMethod = 0x10004E96;
   1.250 -
   1.251 -#ifndef __KERNEL_MODE__
   1.252 -
   1.253 -//Forward declarations
   1.254 -class TUTF32Iterator;
   1.255 -struct LCharSet;
   1.256 -
   1.257 -/**
   1.258 -Provides low-level collation functions.
   1.259 -@internalComponent
   1.260 -*/
   1.261 -class TCollate
   1.262 -	{
   1.263 -public:
   1.264 -	/**
   1.265 -	Construct a TCollate object based on the collation method specified
   1.266 -	within aCharSet, if any. If there is none, or aCharSet is null, the
   1.267 -	standard collation method will be used. aMask and aFlags provide a
   1.268 -	method for overriding the flags in the collation method: Each flag set
   1.269 -	to 1 in aMask is a flag that will be overridden and set to the
   1.270 -	corresponding flag value in aFlags. Ownership of aCharSet is not passed.
   1.271 -	*/
   1.272 -	TCollate(const LCharSet* aCharSet,TUint aMask = 0,TUint aFlags = 0xFFFFFFFF);
   1.273 -	/**
   1.274 -	Construct a TCollate object based on an already constructed
   1.275 -	TCollationMethod specified in aMethod. Ownership is not passed.
   1.276 -	*/
   1.277 -	TCollate(const TCollationMethod& aMethod);
   1.278 -
   1.279 -	enum TComparisonResult
   1.280 -		{
   1.281 -		ELeftComparesLessAndIsNotPrefix = -2,
   1.282 -		ELeftIsPrefixOfRight = -1,
   1.283 -		EStringsIdentical = 0,
   1.284 -		ERightIsPrefixOfLeft = 1,
   1.285 -		ERightComparesLessAndIsNotPrefix = 2
   1.286 -		};
   1.287 -
   1.288 -	/**
   1.289 -	Compare the string beginning at aString1 of length aLength1 against the
   1.290 -	string beginning at aString2 of length aLength2.
   1.291 -	aMaxLevel determines the tightness of the collation. At level 0, only
   1.292 -	character identities are distinguished. At level 1 accents are
   1.293 -	distinguished as well. At level 2 case is distinguishes as well. At
   1.294 -	level 3 all valid different Unicode characters are considered different.
   1.295 -	*/
   1.296 -	TComparisonResult Compare(const TUint16* aString1,TInt aLength1,
   1.297 -							  const TUint16* aString2,TInt aLength2,
   1.298 -							  TInt aMaxLevel = 3) const;
   1.299 -	/**
   1.300 -	Find the string beginning at aString2 of length aLength2 in the string
   1.301 -	beginning at aString1 of length aLength1. aMaxLevel determines
   1.302 -	the tightness of the collation, see Compare for details.
   1.303 -	*/
   1.304 -	TInt Find(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2,
   1.305 -			  TInt aMaxLevel,TUint aString2WildChar = 0) const;
   1.306 -			  
   1.307 -	TInt Find(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2,
   1.308 -		      TInt &aLengthFound,TInt aMaxLevel,TUint aString2WildChar = 0) const;
   1.309 -		      
   1.310 -	/**
   1.311 -	Test if the string beginning at aSearchTerm of length aSearchTermLength
   1.312 -	matches the string beginning at aCandidate of length aCandidateLength.
   1.313 -	aMaxLevel determines the tightness of the collation, see
   1.314 -	Compare for details. The search term may have wild card characters as
   1.315 -	specified by aWildChar (for matching a single grapheme- i.e. character
   1.316 -	and any characters that combine with it, such as accents) and
   1.317 -	aWildSequenceChar (for matching any sequence of whole graphemes). The
   1.318 -	return value is KErrNotFound iff the search term does not match the
   1.319 -	candidate string exactly. To find a match within the candidate string,
   1.320 -	the search term must begin and end with a wild sequence character. If
   1.321 -	the search term does match the candidate string, 0 will be returned,
   1.322 -	unless the first character of the search term is a wild sequence
   1.323 -	character in which case the value returned will be the index into
   1.324 -	aCandidate at which the first non-wild sequence character matched.
   1.325 -	aWildSequenceChar must be a valid (non-surrogate) Unicode character
   1.326 -	below FFFE.
   1.327 -	*/
   1.328 -	TInt Match(const TUint16 *aCandidate, TInt aCandidateLength,
   1.329 -			   const TUint16 *aSearchTerm,TInt aSearchTermLength,
   1.330 -			   TInt aMaxLevel, TUint aWildChar = '?', TUint aWildSequenceChar = '*', TUint aEscapeChar = 0) const;
   1.331 -
   1.332 -private:
   1.333 -	/**
   1.334 -	Compare values output from the iterators. After the comparison, if
   1.335 -	ERightIsPrefixOfLeft or EStringsIdentical is returned, then aLeft and
   1.336 -	aRight will be pointing at the next key (at MaxLevel) after the match.
   1.337 -	If right is shown to be a prefix of left, this means that it has been
   1.338 -	checked at all requested levels. If it is reported that the right is a
   1.339 -	prefix of the left, then this will mean also that there are no unmatched
   1.340 -	combining characters on the left.
   1.341 -	*/
   1.342 -	TComparisonResult CompareKeySequences(TUTF32Iterator& aLeft, TUTF32Iterator& aRight,
   1.343 -										  TInt aMaxLevel, TInt aRightStringWildChar, TInt aEscapeChar) const;
   1.344 -	/**
   1.345 -	Finds search term inside candidate string. Returns KErrNotFound if there
   1.346 -	is no match, returns the offset into the candidate string at which the
   1.347 -	search term was found (note that this is the offset from the start of
   1.348 -	the iteration, not from where the iteration was when the function was
   1.349 -	called). If a string was found, the search term iterator is left
   1.350 -	pointing at the end of the search term, and the candidate iterator is
   1.351 -	left pointing just after the matched keys. aMatchPos returns where in
   1.352 -	the candidate string the match was found.
   1.353 -	*/
   1.354 -	TInt FindKeySequence(TUTF32Iterator& aCandidate, TUTF32Iterator& aSearchTerm,
   1.355 -						 TInt aMaxLevel, TInt aWildChar, TInt aEscapeChar, TInt& aLengthFound) const;
   1.356 -
   1.357 -private:
   1.358 -	TCollationMethod iMethod;
   1.359 -	};
   1.360 -
   1.361 -#endif	// __KERNEL_MODE__
   1.362 -
   1.363 -#endif // _UNICODE
   1.364 -
   1.365 -#endif // __COLLATE_H__