1.1 --- a/epoc32/include/collate.h Wed Mar 31 12:27:01 2010 +0100
1.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
1.3 @@ -1,362 +0,0 @@
1.4 -// Copyright (c) 1996-2009 Nokia Corporation and/or its subsidiary(-ies).
1.5 -// All rights reserved.
1.6 -// This component and the accompanying materials are made available
1.7 -// under the terms of the License "Symbian Foundation License v1.0" to Symbian Foundation members and "Symbian Foundation End User License Agreement v1.0" to non-members
1.8 -// which accompanies this distribution, and is available
1.9 -// at the URL "http://www.symbianfoundation.org/legal/licencesv10.html".
1.10 -//
1.11 -// Initial Contributors:
1.12 -// Nokia Corporation - initial contribution.
1.13 -//
1.14 -// Contributors:
1.15 -//
1.16 -// Description:
1.17 -// e32\include\collate.h
1.18 -// Definitions needed for Unicode collation.
1.19 -// Collation is the comparison of two Unicode strings to produce an ordering
1.20 -// that may be used in a dictionary or other list.
1.21 -// Collation is implemented using the Standard Unicode Collation algorithm. There
1.22 -// are four levels of comparison:
1.23 -// primary: basic character identity
1.24 -// secondary: accents and diacritics
1.25 -// tertiary: upper and lower case, and other minor attributes
1.26 -// quaternary: Unicode character value
1.27 -// Punctuation is normally ignored but can optionally be taken into account.
1.28 -// Strings are fully expanded using the standard Unicode canonical expansions before
1.29 -// they are compared. Thai and Lao vowels are swapped with the following character
1.30 -// if any.
1.31 -// EUSER contains the 'basic collation method'. This method assigns the standard Unicode collation key values
1.32 -// to the characters in the WGL4 repertoire, plus commonly used control characters and fixed-width spaces, plus
1.33 -// the CJK ideograms (for which the keys can be generated algorithmically). Other characters are collated after
1.34 -// all the characters for which keys are defined, and ordered by their Unicode values.
1.35 -// Locales can supply any number of other collation methods. They will usually supply a 'tailoring' of the standard
1.36 -// method. This is done by using the standard table as the main key table (signalled by placing NULL in
1.37 -// TCollationMethod::iMainTable) and specifying an override table (TCollationMethod::iOverrideTable).
1.38 -// Locale-specific collation data resides in ELOCL.
1.39 -//
1.40 -//
1.41 -
1.42 -
1.43 -
1.44 -#ifndef __COLLATE_H__
1.45 -#define __COLLATE_H__
1.46 -
1.47 -#ifdef __KERNEL_MODE__
1.48 -#include <e32cmn.h>
1.49 -#else
1.50 -#include <e32std.h>
1.51 -#endif
1.52 -
1.53 -//This material is used in the Unicode build only.
1.54 -#ifdef _UNICODE
1.55 -
1.56 -/**
1.57 -Collation key table structure.
1.58 -@publishedPartner
1.59 -*/
1.60 -struct TCollationKeyTable
1.61 - {
1.62 -public:
1.63 - /**
1.64 - Masks for the various parts of the elements of the iKey array.
1.65 - */
1.66 - enum
1.67 - {
1.68 - ELevel0Mask = 0xFFFF0000, // primary key - basic character identity
1.69 - ELevel1Mask = 0x0000FF00, // secondary key - accents and diacritics
1.70 - ELevel2Mask = 0x000000FC, // tertiary key - case, etc.
1.71 - EIgnoreFlag = 0x2, // if set, this key is normally ignored
1.72 - EStopFlag = 0x1 // if set, this key is the last in a sequence representing a Unicode value or values
1.73 - };
1.74 -
1.75 - /**
1.76 - An array containing all of the keys and strings of keys concatenated
1.77 - together. Each key has EStopFlag set only if it is the last key in its
1.78 - string. Eack key contains the keys for levels 0, 1 and 2, and a flag
1.79 - EIgnoreFlag if the key is usually ignored (for punctuation & spaces
1.80 - etc.).
1.81 - */
1.82 - const TUint32* iKey;
1.83 - /**
1.84 - An array of indices into the iKey array. Each element has its high 16
1.85 - bits indicating a Unicode value and its low 16 bits indicating an index
1.86 - into the iKey array at which its key starts. The elements are sorted by
1.87 - Unicode value.
1.88 - */
1.89 - const TUint32* iIndex;
1.90 - /**
1.91 - The size of the iIndex array.
1.92 - */
1.93 - TInt iIndices;
1.94 - /**
1.95 - Concatenated Unicode strings. Each is a strings that is to be converted
1.96 - to keys differently from how it would be if each letter were converted
1.97 - independently. An example is "ch" in Spanish, which sorts as though it
1.98 - were a single letter. Each Unicode string is preceeded by a 16-bit value
1.99 - indicating the string's length. The end of the string is not delimited.
1.100 - */
1.101 - const TUint16* iString;
1.102 - /**
1.103 - An array of elements mapping elements of iString to elements of iIndex.
1.104 - Each element has its high 16 bits indicating the index of the start of
1.105 - an element of iString, and its low 16 bits indicating the corresponding
1.106 - element in iIndex. This array is sorted on the string index.
1.107 - */
1.108 - const TUint32* iStringIndex;
1.109 - /**
1.110 - The size of the iStringIndex array.
1.111 - */
1.112 - TInt iStringIndices;
1.113 - };
1.114 -
1.115 -/**
1.116 -Defines a collation method.
1.117 -
1.118 -Collation means sorting pieces of text. It needs to take into account characters,
1.119 -accents and case; spaces and punctuation are usually ignored. It differs from
1.120 -ordinary methods of sorting in that it is locale-dependent - different
1.121 -languages use different ordering methods. Additionally, multiple collation
1.122 -methods may exist within the same locale.
1.123 -
1.124 -A collation method provides the collation keys and other data needed to customise
1.125 -collation; the Mem and TDesC16 collation functions (e.g. Mem::CompareC())
1.126 -perform the collation. Note that these functions use the standard collation
1.127 -method for the current locale - you only need to specify an object of class
1.128 -TCollationMethod to customise this collation scheme. Collation methods can
1.129 -be retrieved using member functions of the Mem class. Each one has a unique
1.130 -identifier.
1.131 -
1.132 -A collation method specifies a main table of collation keys, and optionally
1.133 -an overriding table that contains keys for which the values in the main table
1.134 -are overridden. A collation key table (TCollationKeyTable) is the set of collation
1.135 -keys: primary (basic character identity), secondary (accents and diacritics)
1.136 -and tertiary (case). The quaternary key is the Unicode character values themselves.
1.137 -
1.138 -The simplest way to customise a collation method is to create a local copy
1.139 -of the standard collation method and change it. For example, you could use
1.140 -the standard method, but not ignore punctuation and spaces:
1.141 -
1.142 -@code
1.143 -TCollationMethod m = *Mem::CollationMethodByIndex(0); // get the standard method
1.144 -m.iFlags |= TCollationMethod::EIgnoreNone; // dont ignore punctuation and spaces
1.145 -@endcode
1.146 -
1.147 -@publishedPartner
1.148 -*/
1.149 -struct TCollationMethod
1.150 - {
1.151 - public:
1.152 - /**
1.153 - The UID of this collation method.
1.154 - */
1.155 - TUint iId;
1.156 -
1.157 - /**
1.158 - The main collation key table; if NULL, use the standard table.
1.159 - */
1.160 - const TCollationKeyTable* iMainTable;
1.161 -
1.162 - /**
1.163 - If non-NULL, tailoring for collation keys.
1.164 - */
1.165 - const TCollationKeyTable* iOverrideTable;
1.166 - enum
1.167 - {
1.168 - /**
1.169 - Don't ignore any keys (punctuation, etc. is normally ignored).
1.170 - */
1.171 - EIgnoreNone = 1,
1.172 -
1.173 - /**
1.174 - Reverse the normal order for characters differing only in case
1.175 - */
1.176 - ESwapCase = 2,
1.177 -
1.178 - /**
1.179 - Compare secondary keys which represent accents in reverse
1.180 - order (from right to left); this is needed for French when comparing
1.181 - words that differ only in accents.
1.182 - */
1.183 - EAccentsBackwards = 4,
1.184 -
1.185 - /**
1.186 - Reverse the normal order for characters differing only in whether they
1.187 - are katakana or hiragana.
1.188 - */
1.189 - ESwapKana = 8,
1.190 -
1.191 - /**
1.192 - Fold all characters to lower case before extracting keys; needed for
1.193 - comparison of filenames, for which case is ignored but other
1.194 - tertiary (level-2) distinctions are not.
1.195 - */
1.196 - EFoldCase = 16,
1.197 -
1.198 - /** Flag to indicate a collation method for matching purpose
1.199 - This flag is only needed if we wish to specify a particular collation method
1.200 - to be used for matching purpose.
1.201 - */
1.202 - EMatchingTable = 32,
1.203 -
1.204 - /** Ignore the check for adjacent combining characters. A combining
1.205 - character effectively changes the character it combines with to something
1.206 - else and so a match doesn't occur. Setting this flag will allow character
1.207 - matching regardless of any combining characters.
1.208 - */
1.209 - EIgnoreCombining = 64
1.210 - };
1.211 -
1.212 - /**
1.213 - Flags.
1.214 -
1.215 - @see TCollationMethod::EIgnoreNone
1.216 - @see TCollationMethod::ESwapCase
1.217 - @see TCollationMethod::EAccentsBackwards
1.218 - @see TCollationMethod::ESwapKana
1.219 - @see TCollationMethod::EFoldCase
1.220 - */
1.221 - TUint iFlags;
1.222 - };
1.223 -
1.224 -/**
1.225 -A collation data set provides any collation methods needed by a locale.
1.226 -@publishedPartner
1.227 -*/
1.228 -struct TCollationDataSet
1.229 - {
1.230 - public:
1.231 - const TCollationMethod* iMethod;
1.232 - TInt iMethods;
1.233 - };
1.234 -
1.235 -// Collation method IDs
1.236 -
1.237 -/**
1.238 -A collation data set provides any collation methods needed by a locale.
1.239 -@internalTechnology
1.240 -@released
1.241 -*/
1.242 -const TUint KUidBasicCollationMethod = 0x10004F4E;
1.243 -
1.244 -/**
1.245 -A collation data set provides any collation methods needed by a locale.
1.246 -@internalTechnology
1.247 -@released
1.248 -*/
1.249 -const TUint KUidStandardUnicodeCollationMethod = 0x10004E96;
1.250 -
1.251 -#ifndef __KERNEL_MODE__
1.252 -
1.253 -//Forward declarations
1.254 -class TUTF32Iterator;
1.255 -struct LCharSet;
1.256 -
1.257 -/**
1.258 -Provides low-level collation functions.
1.259 -@internalComponent
1.260 -*/
1.261 -class TCollate
1.262 - {
1.263 -public:
1.264 - /**
1.265 - Construct a TCollate object based on the collation method specified
1.266 - within aCharSet, if any. If there is none, or aCharSet is null, the
1.267 - standard collation method will be used. aMask and aFlags provide a
1.268 - method for overriding the flags in the collation method: Each flag set
1.269 - to 1 in aMask is a flag that will be overridden and set to the
1.270 - corresponding flag value in aFlags. Ownership of aCharSet is not passed.
1.271 - */
1.272 - TCollate(const LCharSet* aCharSet,TUint aMask = 0,TUint aFlags = 0xFFFFFFFF);
1.273 - /**
1.274 - Construct a TCollate object based on an already constructed
1.275 - TCollationMethod specified in aMethod. Ownership is not passed.
1.276 - */
1.277 - TCollate(const TCollationMethod& aMethod);
1.278 -
1.279 - enum TComparisonResult
1.280 - {
1.281 - ELeftComparesLessAndIsNotPrefix = -2,
1.282 - ELeftIsPrefixOfRight = -1,
1.283 - EStringsIdentical = 0,
1.284 - ERightIsPrefixOfLeft = 1,
1.285 - ERightComparesLessAndIsNotPrefix = 2
1.286 - };
1.287 -
1.288 - /**
1.289 - Compare the string beginning at aString1 of length aLength1 against the
1.290 - string beginning at aString2 of length aLength2.
1.291 - aMaxLevel determines the tightness of the collation. At level 0, only
1.292 - character identities are distinguished. At level 1 accents are
1.293 - distinguished as well. At level 2 case is distinguishes as well. At
1.294 - level 3 all valid different Unicode characters are considered different.
1.295 - */
1.296 - TComparisonResult Compare(const TUint16* aString1,TInt aLength1,
1.297 - const TUint16* aString2,TInt aLength2,
1.298 - TInt aMaxLevel = 3) const;
1.299 - /**
1.300 - Find the string beginning at aString2 of length aLength2 in the string
1.301 - beginning at aString1 of length aLength1. aMaxLevel determines
1.302 - the tightness of the collation, see Compare for details.
1.303 - */
1.304 - TInt Find(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2,
1.305 - TInt aMaxLevel,TUint aString2WildChar = 0) const;
1.306 -
1.307 - TInt Find(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2,
1.308 - TInt &aLengthFound,TInt aMaxLevel,TUint aString2WildChar = 0) const;
1.309 -
1.310 - /**
1.311 - Test if the string beginning at aSearchTerm of length aSearchTermLength
1.312 - matches the string beginning at aCandidate of length aCandidateLength.
1.313 - aMaxLevel determines the tightness of the collation, see
1.314 - Compare for details. The search term may have wild card characters as
1.315 - specified by aWildChar (for matching a single grapheme- i.e. character
1.316 - and any characters that combine with it, such as accents) and
1.317 - aWildSequenceChar (for matching any sequence of whole graphemes). The
1.318 - return value is KErrNotFound iff the search term does not match the
1.319 - candidate string exactly. To find a match within the candidate string,
1.320 - the search term must begin and end with a wild sequence character. If
1.321 - the search term does match the candidate string, 0 will be returned,
1.322 - unless the first character of the search term is a wild sequence
1.323 - character in which case the value returned will be the index into
1.324 - aCandidate at which the first non-wild sequence character matched.
1.325 - aWildSequenceChar must be a valid (non-surrogate) Unicode character
1.326 - below FFFE.
1.327 - */
1.328 - TInt Match(const TUint16 *aCandidate, TInt aCandidateLength,
1.329 - const TUint16 *aSearchTerm,TInt aSearchTermLength,
1.330 - TInt aMaxLevel, TUint aWildChar = '?', TUint aWildSequenceChar = '*', TUint aEscapeChar = 0) const;
1.331 -
1.332 -private:
1.333 - /**
1.334 - Compare values output from the iterators. After the comparison, if
1.335 - ERightIsPrefixOfLeft or EStringsIdentical is returned, then aLeft and
1.336 - aRight will be pointing at the next key (at MaxLevel) after the match.
1.337 - If right is shown to be a prefix of left, this means that it has been
1.338 - checked at all requested levels. If it is reported that the right is a
1.339 - prefix of the left, then this will mean also that there are no unmatched
1.340 - combining characters on the left.
1.341 - */
1.342 - TComparisonResult CompareKeySequences(TUTF32Iterator& aLeft, TUTF32Iterator& aRight,
1.343 - TInt aMaxLevel, TInt aRightStringWildChar, TInt aEscapeChar) const;
1.344 - /**
1.345 - Finds search term inside candidate string. Returns KErrNotFound if there
1.346 - is no match, returns the offset into the candidate string at which the
1.347 - search term was found (note that this is the offset from the start of
1.348 - the iteration, not from where the iteration was when the function was
1.349 - called). If a string was found, the search term iterator is left
1.350 - pointing at the end of the search term, and the candidate iterator is
1.351 - left pointing just after the matched keys. aMatchPos returns where in
1.352 - the candidate string the match was found.
1.353 - */
1.354 - TInt FindKeySequence(TUTF32Iterator& aCandidate, TUTF32Iterator& aSearchTerm,
1.355 - TInt aMaxLevel, TInt aWildChar, TInt aEscapeChar, TInt& aLengthFound) const;
1.356 -
1.357 -private:
1.358 - TCollationMethod iMethod;
1.359 - };
1.360 -
1.361 -#endif // __KERNEL_MODE__
1.362 -
1.363 -#endif // _UNICODE
1.364 -
1.365 -#endif // __COLLATE_H__