1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/localisation/localesupport/coltab/COLTAB.CPP Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,1278 @@
1.4 +// Copyright (c) 1999-2009 Nokia Corporation and/or its subsidiary(-ies).
1.5 +// All rights reserved.
1.6 +// This component and the accompanying materials are made available
1.7 +// under the terms of "Eclipse Public License v1.0"
1.8 +// which accompanies this distribution, and is available
1.9 +// at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.10 +//
1.11 +// Initial Contributors:
1.12 +// Nokia Corporation - initial contribution.
1.13 +//
1.14 +// Contributors:
1.15 +//
1.16 +// Description:
1.17 +// Reads and parses the Unicode collation value table and writes out a C++ source file
1.18 +// containing the data in a form that can be used by the EPOC collation system.
1.19 +//
1.20 +// The program reads three files or one compositive files:
1.21 +//
1.22 +// Three files (by default):
1.23 +// 1. Base keys (maps single Unicode values to single collation key values): must be in the same format as
1.24 +// basekeys.txt, supplied with the Standard Unicode Collation system
1.25 +//
1.26 +// 2. Composite keys (maps single Unicode values to strings of collation keys): must be in the same format as
1.27 +// compkeys.txt, supplied with the Standard Unicode Collation system
1.28 +//
1.29 +// 3. Strings (maps strings of Unicode values to single collation keys OR strings of collation keys): must be in the
1.30 +// same format as compkeys.txt, except that there can be any number of Unicode characters at the start of the line,
1.31 +// space-separated and each exactly 4 hex digits.
1.32 +//
1.33 +// One compositive files (with option /a):
1.34 +// 1. All Keys (combine above three files into one file): must be in the same format as allkeys.txt, supplied with the Standard Unicode Collation system (after Unicode 3.0).
1.35 +//
1.36 +//
1.37 +
1.38 +
1.39 +#include <assert.h>
1.40 +#include <ctype.h>
1.41 +
1.42 +#ifdef __MSVCDOTNET__
1.43 +#include <fstream>
1.44 +#include <iostream>
1.45 +using namespace std;
1.46 +#else //!__MSVCDOTNET__
1.47 +#include <fstream.h>
1.48 +#include <iostream.h>
1.49 +#endif //__MSVCDOTNET__
1.50 +
1.51 +#include <stdlib.h>
1.52 +#include <string.h>
1.53 +#include <stdio.h>
1.54 +
1.55 +/*
1.56 +Constants constraining the range of level-1 and level-2 keys so that they can be packed.
1.57 +Non-zero values are reduced by one less than the minimum value.
1.58 +*/
1.59 +const unsigned int KLevel1Bits = 8;
1.60 +const unsigned int KLevel1Min = 0x20;
1.61 +const unsigned int KLevel1Max = KLevel1Min + (1 << KLevel1Bits) - 2;
1.62 +const unsigned int KLevel2Bits = 6;
1.63 +const unsigned int KLevel2Min = 1;
1.64 +const unsigned int KLevel2Max = KLevel2Min + (1 << KLevel2Bits) - 2;
1.65 +
1.66 +/*
1.67 +Table of characters in the WGL4 set, plus characters in canonical decompositions of
1.68 +those characters, plus commonly used control characters and space characters,
1.69 +given as ranges of Unicode characters. In each pair, the first code is the first in the range,
1.70 +and the second is the first code NOT in the range.
1.71 +
1.72 +The extra characters are added mainly to ensure that control characters and spaces are
1.73 +normally ignored. The extra characters are:
1.74 +
1.75 +0x0000-0x001F: ASCII control characters
1.76 +0x2000-0x2012: spaces, hyphen variants, figure dash
1.77 +0x2028-0x202E: line and paragraph separator, bidirectional control characters
1.78 +0xFEFF : byte-order mark
1.79 +0xFFFC-0xFFFD: object replacement character, replacement character
1.80 +*/
1.81 +const unsigned int Wgl4Range[] =
1.82 + {
1.83 + 0x00, 0x7f, // All ASCII
1.84 + 0xa0, 0x180, // Non-breaking space, Latin-1, Latin Extended-A
1.85 + 0x192,0x193, // Latin f with hook
1.86 + 0x1fa,0x200, // A-ring, a-ring, AE, ae, O slash, o slash all with acute accent
1.87 + 0x2c6,0x2c8, // non-combining circumflex and caron
1.88 + 0x2c9,0x2ca, // non-combining macron
1.89 + 0x2d8,0x2dc, // non-combining breve, dot above, ring above, ogonek
1.90 + 0x2dd,0x2de, // non-combining double acute
1.91 + 0x300,0x305, // combining grave, acute, circumflex, tilde, macron
1.92 + 0x306,0x309, // combining breve, dot above, double dot above
1.93 + 0x30a,0x30e, // combining ring above, double acute, caron, vertical line above
1.94 + 0x327,0x329, // combining cedilla, ogonek
1.95 + 0x384,0x38b, // Greek
1.96 + 0x38c,0x38d, // Greek
1.97 + 0x38e,0x3a2, // Greek
1.98 + 0x3a3,0x3cf, // Greek
1.99 + 0x401,0x40d, // Cyrillic
1.100 + 0x40e,0x450, // Cyrillic
1.101 + 0x451,0x45d, // Cyrillic
1.102 + 0x45e,0x460, // Cyrillic
1.103 + 0x490,0x492, // Cyrillic
1.104 + 0x1e80,0x1e86, // Both W and w with each of grave, acute and diaeresis
1.105 + 0x1ef2,0x1ef4, // Y with grave, y with grave
1.106 + 0x2000,0x2016, // various space and horizontal lines
1.107 + 0x2017,0x201f, //double vertical line, double low line, various quotation marks
1.108 + 0x2020,0x2023, // dagger, double dagger, bullet
1.109 + 0x2026,0x2027, //ellipsis
1.110 + 0x2028,0x202F, // line & paragraph separators and directional formatting
1.111 + 0x2030,0x2031, // per mille
1.112 + 0x2032,0x2034, // prime
1.113 + 0x2039,0x203b, // single angle quotation marks
1.114 + 0x203c,0x203d, // double exclamation mark
1.115 + 0x203e,0x203f, // non-combining overscore
1.116 + 0x2044,0x2045, // fraction slash
1.117 + 0x207f,0x2080, // superscript n
1.118 + 0x20a3,0x20a5, // French Franc, Italian/Turkish Lira
1.119 + 0x20a7,0x20a8, // Spanish Peseta
1.120 + 0x20ac,0x20ad, // Euro symbol
1.121 + 0x2105,0x2106, // care of
1.122 + 0x2113,0x2114, // script l
1.123 + 0x2116,0x2117, // numero
1.124 + 0x2122,0x2123, // trade mark
1.125 + 0x2126,0x2127, // ohm
1.126 + 0x212e,0x212f, // estimated (net weight)
1.127 + 0x215b,0x215f, // 1/8, 3/8, 5/8, 7/8
1.128 + 0x2190,0x2196, // horizontal and vertical arrows
1.129 + 0x21a8,0x21a9, // up down arrow with base
1.130 + 0x2202,0x2203, // partial differential
1.131 + 0x2206,0x2207, // increment (delta)
1.132 + 0x220f,0x2210, // n-ary product (pi)
1.133 + 0x2211,0x2213, // n-ary sum (sigma), minus
1.134 + 0x2215,0x2216, // division (slash)
1.135 + 0x2219,0x221b, // bullet operator, square root
1.136 + 0x221e,0x2220, // infinity, right angle
1.137 + 0x2229,0x222a, // intersection
1.138 + 0x222b,0x222c, // union
1.139 + 0x2248,0x2249, // almost equal to
1.140 + 0x2260,0x2262, // not equal to, identical to
1.141 + 0x2264,0x2266, // less-than-or-equal-to, greater-than-or-equal-to
1.142 + 0x2302,0x2303, // house
1.143 + 0x2310,0x2311, // rversed not sign
1.144 + 0x2320,0x2322, // top and bottom of integral
1.145 + 0x2500,0x2501, // box drawing
1.146 + 0x2502,0x2503, // box drawing
1.147 + 0x250c,0x250d, // box drawing
1.148 + 0x2510,0x2511, // box drawing
1.149 + 0x2514,0x2515, // box drawing
1.150 + 0x2518,0x2519, // box drawing
1.151 + 0x251c,0x251d, // box drawing
1.152 + 0x2524,0x2525, // box drawing
1.153 + 0x252c,0x252d, // box drawing
1.154 + 0x2534,0x2535, // box drawing
1.155 + 0x253c,0x253d, // box drawing
1.156 + 0x2550,0x256d, // box drawing
1.157 + 0x2580,0x2581, // block element
1.158 + 0x2584,0x2585, // block element
1.159 + 0x2588,0x2589, // block element
1.160 + 0x258c,0x258d, // block element
1.161 + 0x2590,0x2594, // block element
1.162 + 0x25a0,0x25a2, // geometric shapes
1.163 + 0x25aa,0x25ad, // geometric shapes
1.164 + 0x25b2,0x25b3, // geometric shapes
1.165 + 0x25ba,0x25bb, // geometric shapes
1.166 + 0x25bc,0x25bd, // geometric shapes
1.167 + 0x25c4,0x25c5, // geometric shapes
1.168 + 0x25ca,0x25cc, // geometric shapes
1.169 + 0x25cf,0x25d0, // geometric shapes
1.170 + 0x25d8,0x25da, // geometric shapes
1.171 + 0x25e6,0x25e7, // geometric shapes
1.172 + 0x263a,0x263d, // smilies, sun
1.173 + 0x2640,0x2641, // female
1.174 + 0x2642,0x2643, // male
1.175 + 0x2660,0x2661, // spade
1.176 + 0x2663,0x2664, // club
1.177 + 0x2665,0x2667, // heart
1.178 + 0x266a,0x266c, // quaver, beamed quavers
1.179 + 0xfb01,0xfb03, // fi, fl ligatures
1.180 + 0xfeff,0xff00, // zero-width non-breaking space
1.181 + 0xfffc, 0xfffe // object replacement character and replacement character
1.182 + };
1.183 +const int Wgl4Ranges = sizeof(Wgl4Range) / sizeof(Wgl4Range[0]) / 2;
1.184 +
1.185 +int CompareWgl4Ranges(const void* aRange1,const void* aRange2)
1.186 + {
1.187 + unsigned int* p = (unsigned int*)aRange1;
1.188 + unsigned int* q = (unsigned int*)aRange2;
1.189 + if (q[0] == q[1])
1.190 + {
1.191 + unsigned int* temp = p;
1.192 + p = q;
1.193 + q = temp;
1.194 + }
1.195 + if (*p < *q)
1.196 + return -1;
1.197 + else if (*p >= q[1])
1.198 + return 1;
1.199 + else
1.200 + return 0;
1.201 + }
1.202 +
1.203 +// Determine if a character is in the WGL4 character repertoire.
1.204 +static bool InWgl4(unsigned int aChar)
1.205 + {
1.206 + unsigned int key[2];
1.207 + key[0] = key[1] = aChar;
1.208 + return bsearch(key,Wgl4Range,Wgl4Ranges,sizeof(Wgl4Range[0]) * 2,CompareWgl4Ranges) != NULL;
1.209 + }
1.210 +
1.211 +// A collation key.
1.212 +class CollationKey
1.213 + {
1.214 +public:
1.215 + bool operator==(const CollationKey& k) const
1.216 + { return iLevel[0] == k.iLevel[0] && iLevel[1] == k.iLevel[1] && iLevel[2] == k.iLevel[2] &&
1.217 + iIgnorable == k.iIgnorable && iStop == k.iStop; }
1.218 +
1.219 + enum
1.220 + {
1.221 + ELevels = 3
1.222 + };
1.223 + int iLevel[ELevels];// the keys at the various levels
1.224 + bool iIgnorable; // TRUE if this key can normally be ignored
1.225 + bool iStop; // TRUE if this is the last key in a string of keys
1.226 + };
1.227 +
1.228 +// The collation index for a single Unicode value.
1.229 +class CollationIndex
1.230 + {
1.231 +public:
1.232 + static int Compare(const void* aIndex1,const void* aIndex2);
1.233 +
1.234 + int iCode; // Unicode value
1.235 + int iIndex; // index into the key table
1.236 + };
1.237 +
1.238 +class Reader
1.239 + {
1.240 +public:
1.241 + Reader(bool aWgl4,bool aStandard,const char* aLocaleName, const char* aUidString);
1.242 + ~Reader();
1.243 + void ReadBaseKeys(const char* aFileName);
1.244 + void ReadCompKeys(const char* aFileName);
1.245 + void ReadStrings(const char* aFileName);
1.246 + void ReadAllKeys(const char* aFileName);
1.247 + void WriteOutput(const char* aFileName, bool aCopyrightMessage);
1.248 + int CompareStringIndices(int aIndex1,int aIndex2) const;
1.249 +
1.250 +private:
1.251 + Reader(const Reader&);
1.252 + int Hex(const char *aString, int &aCharConsumed, bool aTolerate = false);
1.253 + void GetCollationKey(const char* aString, int& aCharConsumed, CollationKey* aKey=NULL);
1.254 + void GetMultipleCollationKeys(const char* aString);
1.255 + unsigned int PackKey(const CollationKey& aValue);
1.256 + int PackIndex(const CollationIndex& aValue, unsigned int result[2]);
1.257 + bool ParseLine(const char* aLine, int aCode[16], int& aCodeCount, int& aKeyStart, int& aKeyCount);
1.258 + void AddKeyOneToOne(const char* aLine, const int aCode, const int aKeyStart);
1.259 + void AddKeyOneToMuch(const char* aLine, const int aCode, const int aKeyStart);
1.260 + void AddKeyMuchToMuch(const char* aLine, const int aCode[16], const int aCodeCount, const int aKeyStart);
1.261 +
1.262 + enum
1.263 + {
1.264 + EMaxCollationKeys = 0x110000 * 2, /*more elements considering composite keys */
1.265 + EMaxCollationIndices = 0x110000,
1.266 + EMaxStringElements = 65536,
1.267 + EMaxStringIndices = 65536
1.268 + };
1.269 + CollationKey iCollationKey[EMaxCollationKeys];
1.270 + int iKeys;
1.271 + CollationIndex iCollationIndex[EMaxCollationIndices];
1.272 + int iIndices;
1.273 + int iStringElement[EMaxStringElements];
1.274 + int iStringElements;
1.275 + unsigned int iStringIndex[EMaxStringIndices];
1.276 + int iStringIndices;
1.277 + const char* iInputFileName;
1.278 + int iLineNumber;
1.279 + bool iSuppressCanonseqWarning; // have we issued the canonseq warning yet?
1.280 + bool iWgl4; // true if writing keys for wgl4 characters only
1.281 + bool iStandard; // true if reading standard files, not tailoring files
1.282 + const char* iLocaleName;
1.283 + const char* iUidString;
1.284 + char* iCPlusPlusIdentifier; // iLocaleName in title case with difficult characters removed
1.285 + };
1.286 +
1.287 +bool isValidHexDigit(char c)
1.288 + {
1.289 + if ('0' <= c && c <= '9')
1.290 + return true;
1.291 + if ('a' <= c && c <= 'f')
1.292 + return true;
1.293 + if ('A' <= c && c <= 'F')
1.294 + return true;
1.295 + return false;
1.296 + }
1.297 +
1.298 +void PrintUsage()
1.299 + {
1.300 + cout << "Usage: coltab [/u<uid>] [/c] [/a] [/h<topic>] <locale>\n";
1.301 + cout << "By Default (without /a option), for the locales 'standard' and 'wgl4' coltab reads basekeys.txt & compkeys.txt\n";
1.302 + cout << "For any other locale name <name> coltab reads <name>_basekeys.txt,\n";
1.303 + cout << "<name>_compkeys.txt and <name>_strings.txt.\n";
1.304 + cout << "Use the /a option, for the locales 'standard' and 'wgl4' coltab reads allkeys.txt\n";
1.305 + cout << "For any other locale name <name> coltab reads <name>_allkeys.txt.\n";
1.306 + cout << "The output file is always ls_<name>.cpp.\n";
1.307 + cout << "Use the /u option to specify the UID that the collation table should have.\n";
1.308 + cout << "A hex number must follow /u immediately, for example /u800ACBDE\n";
1.309 + cout << "this hex number must not exceed eight digits. If this is not specified,\n";
1.310 + cout << "the output file will have to be edited to make it compilable.\n";
1.311 + cout << "Specify /c to prefix the output with a Nokia copyright message.\n";
1.312 + cout << "Specify /h for in-depth help.";
1.313 + }
1.314 +
1.315 +void UsageError()
1.316 + {
1.317 + PrintUsage();
1.318 + exit(1);
1.319 + }
1.320 +
1.321 +void PrintHelp(char* aTopic)
1.322 + {
1.323 + int topic = 0;
1.324 + while ('0' <= *aTopic && *aTopic <= '9')
1.325 + {
1.326 + topic = topic * 10 + (*aTopic - '0');
1.327 + ++aTopic;
1.328 + }
1.329 + switch(topic)
1.330 + {
1.331 + case 1:
1.332 + cout << "How Coltab interprets CANONSEQ:\n\n"\
1.333 + "If the CANONSEQ specifier is used in a line, Coltab will ignore the mapping.\n"\
1.334 + "This because, on the Symbian platform, any canonically composed character is\n"\
1.335 + "decomposed before the key mapping is applied, so characters with canonical\n"\
1.336 + "decompositions do not need keys. In files supplied by the Unicode Consortium,\n"\
1.337 + "all mappings for composed characters are flagged by CANONSEQ, so it is useful\n"\
1.338 + "if Coltab can just ignore these so that Unicode Consortium files can be used\n"\
1.339 + "unedited.\n\n"\
1.340 + "This can cause problems if a localizer copies a line from a Unicode file into,\n"\
1.341 + "say, the <lang>_strings.txt file, in order to give a mapping for an accented\n"\
1.342 + "character. The localizer replaces the composed character code with the\n"\
1.343 + "decomposition and changes the keys but forgets to remove the CANONSEQ\n"\
1.344 + "specifier. In this case the key would be ignored. Coltab provides a warning so\n"\
1.345 + "that this can be put right.\n\n"\
1.346 + "Coltab will only warn about the first CANONSEQ in each file, and does not warn\n"\
1.347 + "if the 'standard' or 'wgl4' options are used.";
1.348 + exit(1);
1.349 + break;
1.350 + case 2:
1.351 + cout << "How to ensure coltab's output files are compilable.\n\n"\
1.352 + "By default, Coltab's files for locales need to be edited before they are\n"\
1.353 + "compilable. The UID for the collation method needs to be filled in. This UID\n"\
1.354 + "is added so that the collation table can be searched for later. At present,\n"\
1.355 + "this UID is not necessary for the correct functioning of the Symbian platform\n"\
1.356 + "and so a value of 0 can be safely used.\n\n"\
1.357 + "To insert this value into the file directly, use the /u option, for example\n"\
1.358 + "coltab /u0 french\n"\
1.359 + "If the /u option is used, the file should be compilable as is. If it is not,\n"\
1.360 + "please raise it as a defect with Symbian's internationalization team,\n"\
1.361 + "supplying the files that caused the problem if this is possible.\n"\
1.362 + "If the 'standard' or 'wgl4' options are used, no UID is output, so the /u\n"\
1.363 + "option is not required.";
1.364 + exit(1);
1.365 + break;
1.366 + case 3:
1.367 + cout << "How to ensure collation key values are inside the supported range. \n\n"\
1.368 + "According to Unicode Standard, the range suppored by tool COLTAB:\n"\
1.369 + " Level 0 (primary): 0000 - FFFF, \n"\
1.370 + " Level 1 (Secondary): 0020 - 011E, \n"\
1.371 + " Level 2 (Tertiary): 0001 - 003F. \n"\
1.372 + "Please edit your collation files and make sure key values are inside the above range";
1.373 + exit(1);
1.374 + break;
1.375 + default:
1.376 + PrintUsage();
1.377 + cout << "\n\nSpecify /h1 for help on the use of CANONSEQ\n";
1.378 + cout << "Specify /h2 for help on making compilable files that do not need editing\n";
1.379 + exit(1);
1.380 + break;
1.381 + }
1.382 + }
1.383 +
1.384 +short HighSurrogate(int aCode)
1.385 + {
1.386 + return static_cast<short>(0xD7C0 + (aCode >> 10));
1.387 + }
1.388 +
1.389 +short LowSurrogate(int aCode)
1.390 + {
1.391 + return static_cast<short>(0xDC00 | (aCode & 0x3FF));
1.392 + }
1.393 +
1.394 +int main(int argc,char** argv)
1.395 + {
1.396 + bool copyright = false;
1.397 + bool wgl4 = false;
1.398 + bool allKeys = false;
1.399 + const char* prefix = "";
1.400 + const char* infix = "";
1.401 + const char* locale = "";
1.402 + char* localeArg = 0;
1.403 + char* uidArg = 0;
1.404 + for (int i = 1; i < argc; ++i)
1.405 + {
1.406 + if (argv[i][0] == '/' || argv[i][0] == '-')
1.407 + {
1.408 + switch (argv[i][1])
1.409 + {
1.410 + case 'u':
1.411 + case 'U':
1.412 + {
1.413 + uidArg = argv[i] + 2;
1.414 + const char* uidCheck = uidArg;
1.415 + while (*uidCheck)
1.416 + {
1.417 + if (!isValidHexDigit(*uidCheck))
1.418 + UsageError();
1.419 + ++uidCheck;
1.420 + }
1.421 + if (uidCheck == uidArg || 8 < uidCheck - uidArg)
1.422 + UsageError();
1.423 + break;
1.424 + }
1.425 + case 'c':
1.426 + case 'C':
1.427 + copyright = true;
1.428 + break;
1.429 + case 'a':
1.430 + allKeys = true;
1.431 + break;
1.432 + case 'h':
1.433 + case 'H':
1.434 + PrintHelp(argv[i] + 2);
1.435 + break;
1.436 + default:
1.437 + UsageError();
1.438 + break;
1.439 + }
1.440 + }
1.441 + else if (!localeArg)
1.442 + localeArg = argv[i];
1.443 + else
1.444 + UsageError();
1.445 + }
1.446 + if (!localeArg)
1.447 + UsageError();
1.448 + bool standard = false;
1.449 + if (!_stricmp(localeArg, "standard"))
1.450 + {
1.451 + locale = "Standard";
1.452 + standard = true;
1.453 + }
1.454 + else if (!_stricmp(localeArg, "wgl4"))
1.455 + {
1.456 + locale = "Wgl4";
1.457 + wgl4 = true;
1.458 + standard = true;
1.459 + }
1.460 + else
1.461 + {
1.462 + locale = prefix = localeArg;
1.463 + infix = "_";
1.464 + }
1.465 +
1.466 + Reader* reader = new Reader(wgl4, standard, locale, uidArg);
1.467 + if (!reader)
1.468 + {
1.469 + cout << "out of memory\n";
1.470 + exit(1);
1.471 + }
1.472 + char* filename = new char[strlen(prefix) + strlen(infix) + 64];
1.473 + if (allKeys == false)
1.474 + {
1.475 + sprintf(filename,"%s%scompkeys.txt",prefix,infix);
1.476 + reader->ReadCompKeys(filename);
1.477 + if (!standard)
1.478 + {
1.479 + sprintf(filename,"%s%sstrings.txt",prefix,infix);
1.480 + reader->ReadStrings(filename);
1.481 + }
1.482 + sprintf(filename,"%s%sbasekeys.txt",prefix,infix);
1.483 + reader->ReadBaseKeys(filename);
1.484 + }
1.485 + else
1.486 + {
1.487 + sprintf(filename,"%s%sAllKeys.txt",prefix,infix);
1.488 + reader->ReadAllKeys(filename);
1.489 + }
1.490 + sprintf(filename,"ls_%s.cpp", localeArg);
1.491 + reader->WriteOutput(filename, copyright);
1.492 +
1.493 + delete reader;
1.494 + delete [] filename;
1.495 + return 0;
1.496 + }
1.497 +
1.498 +Reader::Reader(bool aWgl4, bool aStandard,
1.499 + const char* aLocaleName, const char* aUidString):
1.500 + iKeys(0),
1.501 + iIndices(0),
1.502 + iStringElements(0),
1.503 + iStringIndices(0),
1.504 + iInputFileName(NULL),
1.505 + iLineNumber(0),
1.506 + iSuppressCanonseqWarning(false),
1.507 + iWgl4(aWgl4),
1.508 + iStandard(aStandard),
1.509 + iLocaleName(aLocaleName),
1.510 + iUidString(aUidString)
1.511 + {
1.512 + if (iStandard)
1.513 + {
1.514 + iCPlusPlusIdentifier = new char[9];
1.515 + strcpy(iCPlusPlusIdentifier, "Standard");
1.516 + return;
1.517 + }
1.518 + char* p = iCPlusPlusIdentifier = new char[strlen(aLocaleName) + 2];
1.519 + int current = toupper(aLocaleName[0]);
1.520 + if (current < 'A' || 'Z' < current)
1.521 + *p++ = 'C';
1.522 + else
1.523 + {
1.524 + *p++ = static_cast<char>(current);
1.525 + ++aLocaleName;
1.526 + }
1.527 + bool inUnderScore = false;
1.528 + while (*aLocaleName)
1.529 + {
1.530 + current = tolower(*aLocaleName++);
1.531 + if (current < 'a' || 'z' < current)
1.532 + {
1.533 + if (!inUnderScore)
1.534 + {
1.535 + inUnderScore = true;
1.536 + *p++ = '_';
1.537 + }
1.538 + }
1.539 + else
1.540 + {
1.541 + inUnderScore = false;
1.542 + *p++ = static_cast<char>(current);
1.543 + }
1.544 + }
1.545 + *p = 0;
1.546 + }
1.547 +
1.548 +Reader::~Reader()
1.549 + {
1.550 + delete [] iCPlusPlusIdentifier;
1.551 + }
1.552 +
1.553 +// Get a hex number of exactly four digits from aString. Return -1 if none is found and aTolerate is true.
1.554 +int Reader::Hex(const char *aString, int &aCharConsumed, bool aTolerate)
1.555 + {
1.556 + char *end;
1.557 + unsigned long x = strtoul(aString,&end,16);
1.558 + aCharConsumed = end - aString;
1.559 + if ((aCharConsumed != 4) && (aCharConsumed != 5) && (aCharConsumed != 6))
1.560 + {
1.561 + if (!aTolerate)
1.562 + {
1.563 + cout << "bad hex number on line " << iLineNumber << " of file " << iInputFileName << '\n';
1.564 + exit(1);
1.565 + }
1.566 + return -1;
1.567 + }
1.568 + return x;
1.569 + }
1.570 +
1.571 +// Get a collation value from a string of the form [.xxxx.xxxx.xxxx.xxxx]
1.572 +void Reader::GetCollationKey(const char* aString, int& aCharConsumed, CollationKey* aKey)
1.573 + {
1.574 + aCharConsumed = 0;
1.575 + const char *end = strchr(aString, ']');
1.576 + if (end != NULL){
1.577 + aCharConsumed = end - aString;
1.578 + }
1.579 +
1.580 + if (aString[0] != '[' || (aCharConsumed != 21 && aCharConsumed != 22 && aCharConsumed != 23))
1.581 + {
1.582 + cout << "syntax error on line " << iLineNumber << " of file " << iInputFileName << '\n';
1.583 + exit(1);
1.584 + }
1.585 + if (aKey == NULL)
1.586 + {
1.587 + if (iKeys >= EMaxCollationKeys)
1.588 + {
1.589 + cout << "too many keys";
1.590 + exit(1);
1.591 + }
1.592 + aKey = &iCollationKey[iKeys++];
1.593 + }
1.594 + aKey->iIgnorable = aString[1] == '*'; // asterisk means that this character is normally ignored
1.595 + int charConsumed = 0;
1.596 + for (int i = 0; i < CollationKey::ELevels; i++)
1.597 + aKey->iLevel[i] = Hex(aString + 2 + i * 5, charConsumed);
1.598 +
1.599 + if (aKey->iLevel[1] > 0 && (aKey->iLevel[1] < KLevel1Min || aKey->iLevel[1] > KLevel1Max))
1.600 + {
1.601 + aKey->iLevel[1] = KLevel1Max;
1.602 + cout << "illegal level-1 key value on line " << iLineNumber << "; outside the range " << KLevel1Min << ".." << KLevel1Max << "\n";
1.603 + cout << "Error: illegal key value in file, please see coltab /h3 for details.\n";
1.604 + exit(1);
1.605 + }
1.606 +
1.607 + if (aKey->iLevel[2] > 0 && (aKey->iLevel[2] < KLevel2Min || aKey->iLevel[2] > KLevel2Max))
1.608 + {
1.609 + cout << "illegal level-2 key value on line " << iLineNumber << "; outside the range " << KLevel2Min << ".." << KLevel2Max << "\n";
1.610 + cout << "Error: illegal key value in file, please see coltab /h3 for details.\n";
1.611 + exit(1);
1.612 + }
1.613 +
1.614 + aKey->iStop = true;
1.615 + }
1.616 +
1.617 +void Reader::GetMultipleCollationKeys(const char* aString)
1.618 + {
1.619 + int keyCount = 0;
1.620 + int charConsumed =0;
1.621 + while (aString[0] == '[')
1.622 + {
1.623 + GetCollationKey(aString, charConsumed);
1.624 +
1.625 + keyCount++;
1.626 + iCollationKey[iKeys - 1].iStop = false;
1.627 + int length = strlen(aString);
1.628 + if (length <= charConsumed + 1)
1.629 + break;
1.630 + aString += charConsumed + 1;
1.631 +
1.632 + if (aString[0] == ' ') //a space is put between collation keys in keys files provided by previous Unicode Standard (i.e 3.1)
1.633 + aString++;
1.634 +
1.635 + }
1.636 + iCollationKey[iKeys - 1].iStop = true;
1.637 + }
1.638 +
1.639 +/*
1.640 +Partially parse a line, returning its key code and the start of its first block of key data.
1.641 +Return false if it is not a data line, or not relevant.
1.642 +*/
1.643 +bool Reader::ParseLine(const char* aLine, int aCode[16], int& aCodeCount, int& aKeyStart, int& aKeyCount)
1.644 + {
1.645 + int lineLength = strlen(aLine);
1.646 + int charConsumed = 0;
1.647 + aCodeCount = 0;
1.648 + aCode[0] = Hex(aLine,charConsumed,true);
1.649 +
1.650 + /*
1.651 + A data line must start with a hex number and be at least 27 characters long.
1.652 + Canonically decomposable Unicode characters are skipped.
1.653 + Skip non-WGL4 characters if doing WGL4 only.
1.654 + */
1.655 + if (aCode[0] != -1)
1.656 + {
1.657 + aCodeCount = 1;
1.658 + if (!strcmp(aLine + lineLength - 8,"CANONSEQ"))
1.659 + {
1.660 + if (!iSuppressCanonseqWarning)
1.661 + {
1.662 + cout << "Warning: CANONSEQ used in file " << iInputFileName
1.663 + << " on line " << iLineNumber << ".\nWarning: All mappings specifying CANONSEQ are ignored.\n"
1.664 + << "Warning: Use coltab /h1 for more details.";
1.665 + iSuppressCanonseqWarning = true;
1.666 + }
1.667 + aCodeCount = 0;
1.668 + }
1.669 + else if (lineLength < 27 ||
1.670 + (iWgl4 && !InWgl4((unsigned int)aCode)))
1.671 + aCodeCount = 0;
1.672 + }
1.673 +
1.674 + if (aCode[0] != -1)
1.675 + {
1.676 + // find '['
1.677 + aKeyStart = charConsumed;
1.678 + while (aKeyStart < lineLength && aLine[aKeyStart] != '[')
1.679 + aKeyStart++;
1.680 +
1.681 + // read all hex before '['
1.682 + int index = charConsumed + 1;
1.683 + while (index < aKeyStart)
1.684 + {
1.685 + aCode[aCodeCount] = Hex(aLine+index, charConsumed, true);
1.686 + if (aCode[aCodeCount] == -1)
1.687 + break;
1.688 +
1.689 + index += charConsumed + 1;
1.690 + aCodeCount++;
1.691 + }
1.692 +
1.693 + // find number of collation keys
1.694 + aKeyCount = 0;
1.695 + index = aKeyStart;
1.696 + while (index < lineLength && aLine[index] != '%' && aLine[index] != '#')
1.697 + {
1.698 + if (aLine[index] == '[')
1.699 + aKeyCount++;
1.700 + index++;
1.701 + }
1.702 + }
1.703 +
1.704 + return aCodeCount > 0;
1.705 + }
1.706 +
1.707 +void Reader::AddKeyOneToOne(const char* aLine, const int aCode, const int aKeyStart)
1.708 + {
1.709 + if (iIndices >= EMaxCollationIndices)
1.710 + {
1.711 + cout << "too many Unicode values";
1.712 + exit(1);
1.713 + }
1.714 + CollationIndex& index = iCollationIndex[iIndices++];
1.715 + index.iCode = aCode;
1.716 + index.iIndex = -1;
1.717 +
1.718 + /*
1.719 + First try to find the key in the array of keys found so far.
1.720 + Search backwards to use the fact that runs of the same key occur together.
1.721 + */
1.722 + CollationKey key;
1.723 + int charConsumed = 0;
1.724 + GetCollationKey(aLine + aKeyStart, charConsumed, &key);
1.725 + for (int i = iKeys - 1; i >= 0 && index.iIndex == -1; i--)
1.726 + if (iCollationKey[i] == key)
1.727 + index.iIndex = i;
1.728 +
1.729 + // If that fails, add a new key.
1.730 + if (index.iIndex == -1)
1.731 + {
1.732 + index.iIndex = iKeys++;
1.733 + if (iKeys > EMaxCollationKeys)
1.734 + {
1.735 + cout << "too many keys";
1.736 + exit(1);
1.737 + }
1.738 + iCollationKey[index.iIndex] = key;
1.739 + }
1.740 + }
1.741 +/*
1.742 +Read 1-to-1 mapping. Sample:
1.743 +02B9 ; [*02A5.0020.0002.02B9] % MODIFIER LETTER PRIME
1.744 +
1.745 +aCombinedFile = true: aFileName is combined file, which contains base keys, comp keys, and string keys.
1.746 +*/
1.747 +void Reader::ReadBaseKeys(const char* aFileName)
1.748 + {
1.749 + iSuppressCanonseqWarning = iStandard || iWgl4;
1.750 + iLineNumber = 0;
1.751 + iInputFileName = aFileName;
1.752 + ifstream input_file;
1.753 +
1.754 +#ifdef __MSVCDOTNET__
1.755 + input_file.open(iInputFileName, ios::in);
1.756 +#else //!__MSVCDOTNET__
1.757 + input_file.open(iInputFileName, ios::in | ios::nocreate);
1.758 +#endif //__MSVCDOTNET__
1.759 +
1.760 + if (input_file.fail())
1.761 + {
1.762 + cout << "cannot open input file '" << iInputFileName << "'\n";
1.763 + exit(1);
1.764 + }
1.765 + cout << "reading base keys from '" << iInputFileName << "'\n";
1.766 +
1.767 + char line[1024];
1.768 + for (;;)
1.769 + {
1.770 + input_file.getline(line,sizeof(line));
1.771 + if (input_file.eof())
1.772 + break;
1.773 + iLineNumber++;
1.774 + // line number counting
1.775 + if (iLineNumber % 100 == 0)
1.776 + {
1.777 + cout << "line " << iLineNumber << '\n';
1.778 + cout.flush();
1.779 + }
1.780 + int code[16];
1.781 + int codeCount = 0;
1.782 + int key_start = 0;
1.783 + int keyCount = 0;
1.784 + if (ParseLine(line, code, codeCount, key_start, keyCount))
1.785 + {
1.786 + if (codeCount != 1 || keyCount != 1)
1.787 + continue; // goto next line
1.788 + AddKeyOneToOne(line, code[0], key_start);
1.789 + }
1.790 + }
1.791 +
1.792 + input_file.close();
1.793 + }
1.794 +
1.795 +void Reader::AddKeyOneToMuch(const char* aLine, const int aCode, const int aKeyStart)
1.796 + {
1.797 + if (iIndices >= EMaxCollationIndices)
1.798 + {
1.799 + cout << "too many Unicode values";
1.800 + exit(1);
1.801 + }
1.802 + CollationIndex& index = iCollationIndex[iIndices++];
1.803 + index.iCode = aCode;
1.804 + index.iIndex = iKeys;
1.805 + GetMultipleCollationKeys(aLine + aKeyStart);
1.806 + }
1.807 +/*
1.808 +Read 1-to-much mapping.
1.809 +3303 ; [.279F.0020.001C.3303][.1114.0020.001C.3303][.27C7.0020.001F.3303] # SQUARE AARU; QQKN
1.810 +*/
1.811 +void Reader::ReadCompKeys(const char* aFileName)
1.812 + {
1.813 + iSuppressCanonseqWarning = iStandard || iWgl4;
1.814 + iLineNumber = 0;
1.815 + iInputFileName = aFileName;
1.816 + ifstream input_file;
1.817 +
1.818 +#ifdef __MSVCDOTNET__
1.819 + input_file.open(iInputFileName, ios::in);
1.820 +#else //!__MSVCDOTNET__
1.821 + input_file.open(iInputFileName, ios::in | ios::nocreate);
1.822 +#endif //__MSVCDOTNET__
1.823 +
1.824 + if (input_file.fail())
1.825 + {
1.826 + cout << "there are no composite keys; '" << iInputFileName << "' not found\n";
1.827 + return;
1.828 + }
1.829 + cout << "reading composite keys from '" << iInputFileName << "'\n";
1.830 +
1.831 + char line[1024];
1.832 + for (;;)
1.833 + {
1.834 + input_file.getline(line,sizeof(line));
1.835 + if (input_file.eof())
1.836 + break;
1.837 + iLineNumber++;
1.838 + // line number counting
1.839 + if (iLineNumber % 100 == 0)
1.840 + {
1.841 + cout << "line " << iLineNumber << '\n';
1.842 + cout.flush();
1.843 + }
1.844 + int code[16];
1.845 + int codeCount = 0;
1.846 + int key_start = 0;
1.847 + int keyCount = 0;
1.848 + if (ParseLine(line, code, codeCount, key_start, keyCount))
1.849 + {
1.850 + if (codeCount != 1 || keyCount < 2)
1.851 + continue; // goto next line
1.852 + AddKeyOneToMuch(line, code[0], key_start);
1.853 + }
1.854 + }
1.855 +
1.856 + input_file.close();
1.857 + }
1.858 +
1.859 +
1.860 +void Reader::AddKeyMuchToMuch(const char* aLine, const int aCode[16], const int aCodeCount, const int aKeyStart)
1.861 + {
1.862 +
1.863 + // Store the index to the Unicode string and the key sequence.
1.864 + if (iStringIndices > EMaxStringIndices)
1.865 + {
1.866 + cout << "too many string indices";
1.867 + exit(1);
1.868 + }
1.869 + iStringIndex[iStringIndices++] = (iStringElements << 16) | iKeys;
1.870 +
1.871 + // Reserve space for the length.
1.872 + if (iStringElements >= EMaxStringElements)
1.873 + {
1.874 + cout << "too many string elements";
1.875 + exit(1);
1.876 + }
1.877 + iStringElements++;
1.878 +
1.879 + // Read the Unicode string.
1.880 + int length = 0; // in unit of int16
1.881 + int charCount = 0; // in unit of char. for debug.
1.882 +
1.883 + for (int i=0; i<aCodeCount; i++)
1.884 + {
1.885 + if (iStringElements >= EMaxStringElements)
1.886 + {
1.887 + cout << "too many string elements";
1.888 + exit(1);
1.889 + }
1.890 +
1.891 + if (aCode[i] > 0xFFFF)
1.892 + {
1.893 + // UCS4 --> UTF-16
1.894 + iStringElement[iStringElements++] = 0xD7C0 + (aCode[i] >> 10);
1.895 + iStringElement[iStringElements++] = 0xDC00 | (aCode[i] & 0x3FF);
1.896 + length += 2;
1.897 + }
1.898 + else
1.899 + {
1.900 + iStringElement[iStringElements++] = aCode[i];
1.901 + length++;
1.902 + }
1.903 + charCount++;
1.904 + }
1.905 +
1.906 + iStringElement[iStringElements - length - 1] = (unsigned int)length;
1.907 +
1.908 + // Read the key sequence.
1.909 + GetMultipleCollationKeys(aLine + aKeyStart);
1.910 + }
1.911 +/*
1.912 +Read much-to-much mapping. Sample:
1.913 +004F 0338 [.08EA.0020.0008.00D8] % capital O-stroke
1.914 +0E40 0E08 ; [.1E2B.0020.0002.0E08][.1E5E.0020.001F.0E40] # <THAI CHARACTER SARA E, THAI CHARACTER CHO CHAN>
1.915 +*/
1.916 +void Reader::ReadStrings(const char* aFileName)
1.917 + {
1.918 + iSuppressCanonseqWarning = iStandard || iWgl4;
1.919 + iLineNumber = 0;
1.920 + iInputFileName = aFileName;
1.921 + ifstream input_file;
1.922 +
1.923 +#ifdef __MSVCDOTNET__
1.924 + input_file.open(iInputFileName, ios::in);
1.925 +#else //!__MSVCDOTNET__
1.926 + input_file.open(iInputFileName, ios::in | ios::nocreate);
1.927 +#endif //__MSVCDOTNET__
1.928 +
1.929 + if (input_file.fail())
1.930 + {
1.931 + cout << "there are no strings; '" << iInputFileName << "' not found\n";
1.932 + return;
1.933 + }
1.934 + cout << "reading strings from '" << iInputFileName << "'\n";
1.935 +
1.936 + char line[1024];
1.937 + for (;;)
1.938 + {
1.939 + input_file.getline(line,sizeof(line));
1.940 + if (input_file.eof())
1.941 + break;
1.942 + iLineNumber++;
1.943 + // line number counting
1.944 + if (iLineNumber % 100 == 0)
1.945 + {
1.946 + cout << "line " << iLineNumber << '\n';
1.947 + cout.flush();
1.948 + }
1.949 + int code[16];
1.950 + int codeCount = 0;
1.951 + int key_start = 0;
1.952 + int keyCount = 0;
1.953 + if (ParseLine(line, code, codeCount, key_start, keyCount))
1.954 + {
1.955 + if (codeCount < 2 || keyCount < 1)
1.956 + continue; // goto next line
1.957 + AddKeyMuchToMuch(line, code, codeCount, key_start);
1.958 + }
1.959 + }
1.960 +
1.961 + input_file.close();
1.962 + }
1.963 +
1.964 +/*
1.965 +Read combined key table. Sample:
1.966 +1-to-1 mapping:
1.967 +02B9 ; [*02A5.0020.0002.02B9] % MODIFIER LETTER PRIME
1.968 +
1.969 +1-to-much mapping:
1.970 +3303 ; [.279F.0020.001C.3303][.1114.0020.001C.3303][.27C7.0020.001F.3303] # SQUARE AARU; QQKN
1.971 +
1.972 +much-to-much mapping:
1.973 +004F 0338 [.08EA.0020.0008.00D8] % capital O-stroke
1.974 +0E40 0E08 ; [.1E2B.0020.0002.0E08][.1E5E.0020.001F.0E40] # <THAI CHARACTER SARA E, THAI CHARACTER CHO CHAN>
1.975 +*/
1.976 +void Reader::ReadAllKeys(const char* aFileName)
1.977 + {
1.978 + iSuppressCanonseqWarning = iStandard || iWgl4;
1.979 + iLineNumber = 0;
1.980 + iInputFileName = aFileName;
1.981 + ifstream input_file;
1.982 +
1.983 +#ifdef __MSVCDOTNET__
1.984 + input_file.open(iInputFileName, ios::in);
1.985 +#else //!__MSVCDOTNET__
1.986 + input_file.open(iInputFileName, ios::in | ios::nocreate);
1.987 +#endif //__MSVCDOTNET__
1.988 +
1.989 + if (input_file.fail())
1.990 + {
1.991 + cout << "there are no keys; '" << iInputFileName << "' not found\n";
1.992 + return;
1.993 + }
1.994 + cout << "reading all keys from '" << iInputFileName << "'\n";
1.995 +
1.996 + char line[1024];
1.997 + for (;;)
1.998 + {
1.999 + if (input_file.eof())
1.1000 + break;
1.1001 + input_file.getline(line,sizeof(line));
1.1002 + iLineNumber++;
1.1003 +
1.1004 + int code[16];
1.1005 + int codeCount = 0;
1.1006 + int key_start = 0;
1.1007 + int keyCount = 0;
1.1008 + if (ParseLine(line, code, codeCount, key_start, keyCount))
1.1009 + {
1.1010 + if (codeCount == 1 && keyCount == 1)
1.1011 + AddKeyOneToOne(line, code[0], key_start);
1.1012 + else if (codeCount == 1 && keyCount > 1)
1.1013 + AddKeyOneToMuch(line, code[0], key_start);
1.1014 + else if (codeCount > 1 && keyCount > 0)
1.1015 + AddKeyMuchToMuch(line, code, codeCount, key_start);
1.1016 + else
1.1017 + cout << "ignore line: " << line << "\n";
1.1018 + }
1.1019 + }
1.1020 +
1.1021 + input_file.close();
1.1022 + }
1.1023 +
1.1024 +
1.1025 +// Pack the 3 collation key levels into a single 32-bit integer.
1.1026 +unsigned int Reader::PackKey(const CollationKey& aValue)
1.1027 + {
1.1028 + unsigned int level0 = aValue.iLevel[0];
1.1029 + unsigned int level1 = aValue.iLevel[1];
1.1030 + if (level1 > 0)
1.1031 + level1 -= (KLevel1Min - 1);
1.1032 + unsigned int level2 = aValue.iLevel[2];
1.1033 + if (level2 > 0)
1.1034 + level2 -= (KLevel2Min - 1);
1.1035 + unsigned int key = level0 << 16 | level1 << 8 | level2 << 2;
1.1036 + if (aValue.iIgnorable)
1.1037 + key |= 2;
1.1038 + if (aValue.iStop)
1.1039 + key |= 1;
1.1040 + return key;
1.1041 + }
1.1042 +
1.1043 +// Pack a collation index value into a single 32-bit integer.
1.1044 +int Reader::PackIndex(const CollationIndex& aValue, unsigned int result[2])
1.1045 + {
1.1046 + unsigned int code = aValue.iCode;
1.1047 + unsigned int index = aValue.iIndex;
1.1048 + if (code <= 0xFFFF)
1.1049 + {
1.1050 + result[0] = (code << 16 | index);
1.1051 + return 1;
1.1052 + }
1.1053 + else
1.1054 + {
1.1055 + result[0] = (::HighSurrogate(code) << 16 | index);
1.1056 + result[1] = (::LowSurrogate(code) << 16 | index);
1.1057 + return 2;
1.1058 + }
1.1059 + }
1.1060 +
1.1061 +const Reader* TheReader;
1.1062 +static int CompareStringIndices(const void* aIndex1,const void* aIndex2)
1.1063 + {
1.1064 + return TheReader->CompareStringIndices(*(unsigned int*)aIndex1 >> 16,*(unsigned int*)aIndex2 >> 16);
1.1065 + }
1.1066 +
1.1067 +int CompareUnicodeStrings(const int *aString1,int aLength1,const int *aString2,int aLength2)
1.1068 + {
1.1069 + for (int i = 0; i < aLength1 || i < aLength2; i++, aString1++, aString2++)
1.1070 + {
1.1071 + int x = i < aLength1 ? *aString1 : -1;
1.1072 + int y = i < aLength2 ? *aString2 : -1;
1.1073 + if (x != y)
1.1074 + return x - y;
1.1075 + }
1.1076 + return 0;
1.1077 + }
1.1078 +
1.1079 +int Reader::CompareStringIndices(int aIndex1,int aIndex2) const
1.1080 + {
1.1081 + return CompareUnicodeStrings(iStringElement + aIndex1 + 1,iStringElement[aIndex1],
1.1082 + iStringElement + aIndex2 + 1,iStringElement[aIndex2]);
1.1083 + }
1.1084 +
1.1085 +void Reader::WriteOutput(const char* aFileName, bool aCopyright)
1.1086 + {
1.1087 + int i;
1.1088 + ofstream output_file;
1.1089 + output_file.open(aFileName);
1.1090 + if (output_file.fail())
1.1091 + {
1.1092 + cout << "cannot open output file '" << aFileName << "'\n";
1.1093 + exit(1);
1.1094 + }
1.1095 + cout << "writing output to '" << aFileName << "'\n";
1.1096 +
1.1097 + char *locale = NULL;
1.1098 + if (iStandard)
1.1099 + locale = _strdup("Standard");
1.1100 + else
1.1101 + locale = _strdup(iLocaleName);
1.1102 +
1.1103 + if (!iStandard)
1.1104 + {
1.1105 + _strlwr(locale);
1.1106 + locale[0] = (char)toupper(locale[0]);
1.1107 + if (aCopyright)
1.1108 + {
1.1109 + char* capsFileName = new char[strlen(aFileName) + 1];
1.1110 + strcpy(capsFileName, aFileName);
1.1111 + _strupr(capsFileName);
1.1112 + output_file << "/*\n" << capsFileName << "\n\nCopyright (C) 2000-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.\n*/\n";
1.1113 + delete [] capsFileName;
1.1114 + output_file << "\n/*\nThe LCharSet object used by the " << locale << " locale.\n";
1.1115 + output_file << "Generated by COLTAB.\n*/\n";
1.1116 + }
1.1117 +
1.1118 + output_file << "\n#include \"ls_std.h\"\n#include <collate.h>\n";
1.1119 + output_file << "\nconst TUint KUid" << iCPlusPlusIdentifier << "CollationMethod = ";
1.1120 + if (iUidString)
1.1121 + output_file << "0x" << iUidString << ";\n";
1.1122 + else
1.1123 + {
1.1124 + output_file << "/* FILL THIS IN */;\n";
1.1125 + cout << "Warning: File will need editing\nWarning: see coltab /h2 for details.\n";
1.1126 + }
1.1127 + }
1.1128 +
1.1129 + /*
1.1130 + Write the unique collation keys.
1.1131 + Each one has the format, going from highest to lowest bit:
1.1132 +
1.1133 + 16 bits: level-0 key
1.1134 + 8 bits: level-1 key
1.1135 + 6 bits: level-2 key
1.1136 + 1 bit: set if this key is optionally ignorable
1.1137 + 1 bit: set if this is the last key in the string of keys for a single Unicode value
1.1138 +
1.1139 + */
1.1140 + if (iKeys != 0)
1.1141 + {
1.1142 + output_file << "\nstatic const TUint32 The" << iCPlusPlusIdentifier << "Key[] = \n\t{";
1.1143 + CollationKey* ck = iCollationKey;
1.1144 + output_file << "\t // " << iKeys << " keys";
1.1145 + output_file << hex;
1.1146 + for (i = 0; i < iKeys; i++, ck++)
1.1147 + {
1.1148 + unsigned int key = PackKey(*ck);
1.1149 + if (i % 8 == 0)
1.1150 + output_file << "\n\t";
1.1151 + output_file << "0x";
1.1152 + output_file << key << ",";
1.1153 + }
1.1154 + output_file << dec;
1.1155 + output_file << "\n\t};\n\n";
1.1156 + }
1.1157 +
1.1158 + if (iIndices != 0)
1.1159 + {
1.1160 + // Sort then write the collation index values - these relate Unicode values to collation keys.
1.1161 + qsort(iCollationIndex,iIndices,sizeof(CollationIndex),CollationIndex::Compare);
1.1162 + output_file << "static const TUint32 The" << iCPlusPlusIdentifier << "Index[] = \n\t{";
1.1163 + CollationIndex* ci = iCollationIndex;
1.1164 + int entry=0;
1.1165 + output_file << "\t // " << iIndices << " indices";
1.1166 + output_file << hex;
1.1167 + for (i = 0; i < iIndices; i++, ci++, entry++)
1.1168 + {
1.1169 + unsigned int key[2];
1.1170 + int bytecount = PackIndex(*ci, key);
1.1171 +
1.1172 + if (entry % 8 == 0)
1.1173 + output_file << "\n\t";
1.1174 + output_file << "0x";
1.1175 + output_file << key[0] << ",";
1.1176 +
1.1177 + if (bytecount == 2)
1.1178 + {
1.1179 + entry++;
1.1180 + if (entry % 8 == 0)
1.1181 + output_file << "\n\t";
1.1182 + output_file << "0x";
1.1183 + output_file << key[1] << ",";
1.1184 + }
1.1185 + }
1.1186 + output_file << dec;
1.1187 + output_file << "\n\t};";
1.1188 + output_file << "\t // " << entry << " entries";
1.1189 + output_file << "\n\n";
1.1190 + iIndices = entry; //One surrogate pair occupies 2 entries
1.1191 + }
1.1192 +
1.1193 + if (iStringElements)
1.1194 + {
1.1195 + // Write the Unicode strings; these are preceded by their lengths.
1.1196 + output_file << "static const TUint16 The" << iCPlusPlusIdentifier << "StringElement[] = \n\t{";
1.1197 + output_file << hex;
1.1198 + for (i = 0; i < iStringElements; i++)
1.1199 + {
1.1200 + if (i % 8 == 0)
1.1201 + output_file << "\n\t";
1.1202 + output_file << "0x" << iStringElement[i] << ",";
1.1203 + }
1.1204 + output_file << dec;
1.1205 + if (iStringElements==0)
1.1206 + output_file << "0";
1.1207 + output_file << "\n\t};\n\n";
1.1208 +
1.1209 + /*
1.1210 + Sort then write the string index values - these relate Unicode strings to collation keys.
1.1211 + Each one has the string index in the upper word and the key index in the lower word.
1.1212 + */
1.1213 + TheReader = this;
1.1214 + qsort(iStringIndex,iStringIndices,sizeof(iStringIndex[0]),::CompareStringIndices);
1.1215 + output_file << "static const TUint32 The" << iCPlusPlusIdentifier << "StringIndex[] = \n\t{";
1.1216 + output_file << hex;
1.1217 + for (i = 0; i < iStringIndices; i++)
1.1218 + {
1.1219 + if (i % 8 == 0)
1.1220 + output_file << "\n\t";
1.1221 + output_file << "0x" << iStringIndex[i] << ",";
1.1222 + }
1.1223 + output_file << dec;
1.1224 + if (iStringIndices ==0)
1.1225 + output_file << "0";
1.1226 + output_file << "\n\t};\n\n";
1.1227 + }
1.1228 +
1.1229 + // Write the collation table structure.
1.1230 + output_file << "static const TCollationKeyTable The" << iCPlusPlusIdentifier << "Table = \n\t{ ";
1.1231 + if (iKeys)
1.1232 + output_file << "The" << iCPlusPlusIdentifier << "Key";
1.1233 + else
1.1234 + output_file << "0";
1.1235 + if (iIndices)
1.1236 + output_file << ", The" << iCPlusPlusIdentifier << "Index, " << iIndices;
1.1237 + else
1.1238 + output_file << ", 0, 0";
1.1239 + if (iStringElements)
1.1240 + output_file << ", The" << iCPlusPlusIdentifier << "StringElement, The" << iCPlusPlusIdentifier << "StringIndex, " << iStringIndices << " };\n";
1.1241 + else
1.1242 + output_file << ", 0, 0, 0 };\n";
1.1243 +
1.1244 + if (!iStandard)
1.1245 + output_file << "\nstatic const TCollationMethod TheCollationMethod[] = \n"\
1.1246 + " {\n"\
1.1247 + " {\n"\
1.1248 + " KUid" << iCPlusPlusIdentifier << "CollationMethod, // the method for the locale\n"\
1.1249 + " NULL, // use the standard table as the main table\n"\
1.1250 + " &The" << iCPlusPlusIdentifier << "Table, // the locale values override the standard values\n"\
1.1251 + " 0 // the flags are standard\n"\
1.1252 + " },\n"\
1.1253 + " {\n"\
1.1254 + " KUidBasicCollationMethod, // the standard unlocalised method\n"\
1.1255 + " NULL, // null means use the standard table\n"\
1.1256 + " NULL, // there's no override table\n"\
1.1257 + " 0 // the flags are standard\n"\
1.1258 + " }\n"\
1.1259 + " };\n"\
1.1260 + "\n"\
1.1261 + "static const TCollationDataSet TheCollationDataSet =\n"\
1.1262 + " {\n"\
1.1263 + " TheCollationMethod,\n"\
1.1264 + " 2\n"\
1.1265 + " };"\
1.1266 + "\n\n"\
1.1267 + "// The one and only locale character set object.\n"\
1.1268 + "const LCharSet TheCharSet =\n"\
1.1269 + " {\n"\
1.1270 + " NULL,\n"\
1.1271 + " &TheCollationDataSet\n"\
1.1272 + " };\n";
1.1273 +
1.1274 + output_file.close();
1.1275 + delete [] locale;
1.1276 + }
1.1277 +
1.1278 +int CollationIndex::Compare(const void* aIndex1,const void* aIndex2)
1.1279 + {
1.1280 + return ((CollationIndex*)aIndex1)->iCode - ((CollationIndex*)aIndex2)->iCode;
1.1281 + }