sl@0: // Copyright (c) 1999-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: // All rights reserved. sl@0: // This component and the accompanying materials are made available sl@0: // under the terms of "Eclipse Public License v1.0" sl@0: // which accompanies this distribution, and is available sl@0: // at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: // sl@0: // Initial Contributors: sl@0: // Nokia Corporation - initial contribution. sl@0: // sl@0: // Contributors: sl@0: // sl@0: // Description: sl@0: // Reads and parses the Unicode collation value table and writes out a C++ source file sl@0: // containing the data in a form that can be used by the EPOC collation system. sl@0: // sl@0: // The program reads three files or one compositive files: sl@0: // sl@0: // Three files (by default): sl@0: // 1. Base keys (maps single Unicode values to single collation key values): must be in the same format as sl@0: // basekeys.txt, supplied with the Standard Unicode Collation system sl@0: // sl@0: // 2. Composite keys (maps single Unicode values to strings of collation keys): must be in the same format as sl@0: // compkeys.txt, supplied with the Standard Unicode Collation system sl@0: // sl@0: // 3. Strings (maps strings of Unicode values to single collation keys OR strings of collation keys): must be in the sl@0: // same format as compkeys.txt, except that there can be any number of Unicode characters at the start of the line, sl@0: // space-separated and each exactly 4 hex digits. sl@0: // sl@0: // One compositive files (with option /a): sl@0: // 1. All Keys (combine above three files into one file): must be in the same format as allkeys.txt, supplied with the Standard Unicode Collation system (after Unicode 3.0). sl@0: // sl@0: // sl@0: sl@0: sl@0: #include sl@0: #include sl@0: sl@0: #ifdef __MSVCDOTNET__ sl@0: #include sl@0: #include sl@0: using namespace std; sl@0: #else //!__MSVCDOTNET__ sl@0: #include sl@0: #include sl@0: #endif //__MSVCDOTNET__ sl@0: sl@0: #include sl@0: #include sl@0: #include sl@0: sl@0: /* sl@0: Constants constraining the range of level-1 and level-2 keys so that they can be packed. sl@0: Non-zero values are reduced by one less than the minimum value. sl@0: */ sl@0: const unsigned int KLevel1Bits = 8; sl@0: const unsigned int KLevel1Min = 0x20; sl@0: const unsigned int KLevel1Max = KLevel1Min + (1 << KLevel1Bits) - 2; sl@0: const unsigned int KLevel2Bits = 6; sl@0: const unsigned int KLevel2Min = 1; sl@0: const unsigned int KLevel2Max = KLevel2Min + (1 << KLevel2Bits) - 2; sl@0: sl@0: /* sl@0: Table of characters in the WGL4 set, plus characters in canonical decompositions of sl@0: those characters, plus commonly used control characters and space characters, sl@0: given as ranges of Unicode characters. In each pair, the first code is the first in the range, sl@0: and the second is the first code NOT in the range. sl@0: sl@0: The extra characters are added mainly to ensure that control characters and spaces are sl@0: normally ignored. The extra characters are: sl@0: sl@0: 0x0000-0x001F: ASCII control characters sl@0: 0x2000-0x2012: spaces, hyphen variants, figure dash sl@0: 0x2028-0x202E: line and paragraph separator, bidirectional control characters sl@0: 0xFEFF : byte-order mark sl@0: 0xFFFC-0xFFFD: object replacement character, replacement character sl@0: */ sl@0: const unsigned int Wgl4Range[] = sl@0: { sl@0: 0x00, 0x7f, // All ASCII sl@0: 0xa0, 0x180, // Non-breaking space, Latin-1, Latin Extended-A sl@0: 0x192,0x193, // Latin f with hook sl@0: 0x1fa,0x200, // A-ring, a-ring, AE, ae, O slash, o slash all with acute accent sl@0: 0x2c6,0x2c8, // non-combining circumflex and caron sl@0: 0x2c9,0x2ca, // non-combining macron sl@0: 0x2d8,0x2dc, // non-combining breve, dot above, ring above, ogonek sl@0: 0x2dd,0x2de, // non-combining double acute sl@0: 0x300,0x305, // combining grave, acute, circumflex, tilde, macron sl@0: 0x306,0x309, // combining breve, dot above, double dot above sl@0: 0x30a,0x30e, // combining ring above, double acute, caron, vertical line above sl@0: 0x327,0x329, // combining cedilla, ogonek sl@0: 0x384,0x38b, // Greek sl@0: 0x38c,0x38d, // Greek sl@0: 0x38e,0x3a2, // Greek sl@0: 0x3a3,0x3cf, // Greek sl@0: 0x401,0x40d, // Cyrillic sl@0: 0x40e,0x450, // Cyrillic sl@0: 0x451,0x45d, // Cyrillic sl@0: 0x45e,0x460, // Cyrillic sl@0: 0x490,0x492, // Cyrillic sl@0: 0x1e80,0x1e86, // Both W and w with each of grave, acute and diaeresis sl@0: 0x1ef2,0x1ef4, // Y with grave, y with grave sl@0: 0x2000,0x2016, // various space and horizontal lines sl@0: 0x2017,0x201f, //double vertical line, double low line, various quotation marks sl@0: 0x2020,0x2023, // dagger, double dagger, bullet sl@0: 0x2026,0x2027, //ellipsis sl@0: 0x2028,0x202F, // line & paragraph separators and directional formatting sl@0: 0x2030,0x2031, // per mille sl@0: 0x2032,0x2034, // prime sl@0: 0x2039,0x203b, // single angle quotation marks sl@0: 0x203c,0x203d, // double exclamation mark sl@0: 0x203e,0x203f, // non-combining overscore sl@0: 0x2044,0x2045, // fraction slash sl@0: 0x207f,0x2080, // superscript n sl@0: 0x20a3,0x20a5, // French Franc, Italian/Turkish Lira sl@0: 0x20a7,0x20a8, // Spanish Peseta sl@0: 0x20ac,0x20ad, // Euro symbol sl@0: 0x2105,0x2106, // care of sl@0: 0x2113,0x2114, // script l sl@0: 0x2116,0x2117, // numero sl@0: 0x2122,0x2123, // trade mark sl@0: 0x2126,0x2127, // ohm sl@0: 0x212e,0x212f, // estimated (net weight) sl@0: 0x215b,0x215f, // 1/8, 3/8, 5/8, 7/8 sl@0: 0x2190,0x2196, // horizontal and vertical arrows sl@0: 0x21a8,0x21a9, // up down arrow with base sl@0: 0x2202,0x2203, // partial differential sl@0: 0x2206,0x2207, // increment (delta) sl@0: 0x220f,0x2210, // n-ary product (pi) sl@0: 0x2211,0x2213, // n-ary sum (sigma), minus sl@0: 0x2215,0x2216, // division (slash) sl@0: 0x2219,0x221b, // bullet operator, square root sl@0: 0x221e,0x2220, // infinity, right angle sl@0: 0x2229,0x222a, // intersection sl@0: 0x222b,0x222c, // union sl@0: 0x2248,0x2249, // almost equal to sl@0: 0x2260,0x2262, // not equal to, identical to sl@0: 0x2264,0x2266, // less-than-or-equal-to, greater-than-or-equal-to sl@0: 0x2302,0x2303, // house sl@0: 0x2310,0x2311, // rversed not sign sl@0: 0x2320,0x2322, // top and bottom of integral sl@0: 0x2500,0x2501, // box drawing sl@0: 0x2502,0x2503, // box drawing sl@0: 0x250c,0x250d, // box drawing sl@0: 0x2510,0x2511, // box drawing sl@0: 0x2514,0x2515, // box drawing sl@0: 0x2518,0x2519, // box drawing sl@0: 0x251c,0x251d, // box drawing sl@0: 0x2524,0x2525, // box drawing sl@0: 0x252c,0x252d, // box drawing sl@0: 0x2534,0x2535, // box drawing sl@0: 0x253c,0x253d, // box drawing sl@0: 0x2550,0x256d, // box drawing sl@0: 0x2580,0x2581, // block element sl@0: 0x2584,0x2585, // block element sl@0: 0x2588,0x2589, // block element sl@0: 0x258c,0x258d, // block element sl@0: 0x2590,0x2594, // block element sl@0: 0x25a0,0x25a2, // geometric shapes sl@0: 0x25aa,0x25ad, // geometric shapes sl@0: 0x25b2,0x25b3, // geometric shapes sl@0: 0x25ba,0x25bb, // geometric shapes sl@0: 0x25bc,0x25bd, // geometric shapes sl@0: 0x25c4,0x25c5, // geometric shapes sl@0: 0x25ca,0x25cc, // geometric shapes sl@0: 0x25cf,0x25d0, // geometric shapes sl@0: 0x25d8,0x25da, // geometric shapes sl@0: 0x25e6,0x25e7, // geometric shapes sl@0: 0x263a,0x263d, // smilies, sun sl@0: 0x2640,0x2641, // female sl@0: 0x2642,0x2643, // male sl@0: 0x2660,0x2661, // spade sl@0: 0x2663,0x2664, // club sl@0: 0x2665,0x2667, // heart sl@0: 0x266a,0x266c, // quaver, beamed quavers sl@0: 0xfb01,0xfb03, // fi, fl ligatures sl@0: 0xfeff,0xff00, // zero-width non-breaking space sl@0: 0xfffc, 0xfffe // object replacement character and replacement character sl@0: }; sl@0: const int Wgl4Ranges = sizeof(Wgl4Range) / sizeof(Wgl4Range[0]) / 2; sl@0: sl@0: int CompareWgl4Ranges(const void* aRange1,const void* aRange2) sl@0: { sl@0: unsigned int* p = (unsigned int*)aRange1; sl@0: unsigned int* q = (unsigned int*)aRange2; sl@0: if (q[0] == q[1]) sl@0: { sl@0: unsigned int* temp = p; sl@0: p = q; sl@0: q = temp; sl@0: } sl@0: if (*p < *q) sl@0: return -1; sl@0: else if (*p >= q[1]) sl@0: return 1; sl@0: else sl@0: return 0; sl@0: } sl@0: sl@0: // Determine if a character is in the WGL4 character repertoire. sl@0: static bool InWgl4(unsigned int aChar) sl@0: { sl@0: unsigned int key[2]; sl@0: key[0] = key[1] = aChar; sl@0: return bsearch(key,Wgl4Range,Wgl4Ranges,sizeof(Wgl4Range[0]) * 2,CompareWgl4Ranges) != NULL; sl@0: } sl@0: sl@0: // A collation key. sl@0: class CollationKey sl@0: { sl@0: public: sl@0: bool operator==(const CollationKey& k) const sl@0: { return iLevel[0] == k.iLevel[0] && iLevel[1] == k.iLevel[1] && iLevel[2] == k.iLevel[2] && sl@0: iIgnorable == k.iIgnorable && iStop == k.iStop; } sl@0: sl@0: enum sl@0: { sl@0: ELevels = 3 sl@0: }; sl@0: int iLevel[ELevels];// the keys at the various levels sl@0: bool iIgnorable; // TRUE if this key can normally be ignored sl@0: bool iStop; // TRUE if this is the last key in a string of keys sl@0: }; sl@0: sl@0: // The collation index for a single Unicode value. sl@0: class CollationIndex sl@0: { sl@0: public: sl@0: static int Compare(const void* aIndex1,const void* aIndex2); sl@0: sl@0: int iCode; // Unicode value sl@0: int iIndex; // index into the key table sl@0: }; sl@0: sl@0: class Reader sl@0: { sl@0: public: sl@0: Reader(bool aWgl4,bool aStandard,const char* aLocaleName, const char* aUidString); sl@0: ~Reader(); sl@0: void ReadBaseKeys(const char* aFileName); sl@0: void ReadCompKeys(const char* aFileName); sl@0: void ReadStrings(const char* aFileName); sl@0: void ReadAllKeys(const char* aFileName); sl@0: void WriteOutput(const char* aFileName, bool aCopyrightMessage); sl@0: int CompareStringIndices(int aIndex1,int aIndex2) const; sl@0: sl@0: private: sl@0: Reader(const Reader&); sl@0: int Hex(const char *aString, int &aCharConsumed, bool aTolerate = false); sl@0: void GetCollationKey(const char* aString, int& aCharConsumed, CollationKey* aKey=NULL); sl@0: void GetMultipleCollationKeys(const char* aString); sl@0: unsigned int PackKey(const CollationKey& aValue); sl@0: int PackIndex(const CollationIndex& aValue, unsigned int result[2]); sl@0: bool ParseLine(const char* aLine, int aCode[16], int& aCodeCount, int& aKeyStart, int& aKeyCount); sl@0: void AddKeyOneToOne(const char* aLine, const int aCode, const int aKeyStart); sl@0: void AddKeyOneToMuch(const char* aLine, const int aCode, const int aKeyStart); sl@0: void AddKeyMuchToMuch(const char* aLine, const int aCode[16], const int aCodeCount, const int aKeyStart); sl@0: sl@0: enum sl@0: { sl@0: EMaxCollationKeys = 0x110000 * 2, /*more elements considering composite keys */ sl@0: EMaxCollationIndices = 0x110000, sl@0: EMaxStringElements = 65536, sl@0: EMaxStringIndices = 65536 sl@0: }; sl@0: CollationKey iCollationKey[EMaxCollationKeys]; sl@0: int iKeys; sl@0: CollationIndex iCollationIndex[EMaxCollationIndices]; sl@0: int iIndices; sl@0: int iStringElement[EMaxStringElements]; sl@0: int iStringElements; sl@0: unsigned int iStringIndex[EMaxStringIndices]; sl@0: int iStringIndices; sl@0: const char* iInputFileName; sl@0: int iLineNumber; sl@0: bool iSuppressCanonseqWarning; // have we issued the canonseq warning yet? sl@0: bool iWgl4; // true if writing keys for wgl4 characters only sl@0: bool iStandard; // true if reading standard files, not tailoring files sl@0: const char* iLocaleName; sl@0: const char* iUidString; sl@0: char* iCPlusPlusIdentifier; // iLocaleName in title case with difficult characters removed sl@0: }; sl@0: sl@0: bool isValidHexDigit(char c) sl@0: { sl@0: if ('0' <= c && c <= '9') sl@0: return true; sl@0: if ('a' <= c && c <= 'f') sl@0: return true; sl@0: if ('A' <= c && c <= 'F') sl@0: return true; sl@0: return false; sl@0: } sl@0: sl@0: void PrintUsage() sl@0: { sl@0: cout << "Usage: coltab [/u] [/c] [/a] [/h] \n"; sl@0: cout << "By Default (without /a option), for the locales 'standard' and 'wgl4' coltab reads basekeys.txt & compkeys.txt\n"; sl@0: cout << "For any other locale name coltab reads _basekeys.txt,\n"; sl@0: cout << "_compkeys.txt and _strings.txt.\n"; sl@0: cout << "Use the /a option, for the locales 'standard' and 'wgl4' coltab reads allkeys.txt\n"; sl@0: cout << "For any other locale name coltab reads _allkeys.txt.\n"; sl@0: cout << "The output file is always ls_.cpp.\n"; sl@0: cout << "Use the /u option to specify the UID that the collation table should have.\n"; sl@0: cout << "A hex number must follow /u immediately, for example /u800ACBDE\n"; sl@0: cout << "this hex number must not exceed eight digits. If this is not specified,\n"; sl@0: cout << "the output file will have to be edited to make it compilable.\n"; sl@0: cout << "Specify /c to prefix the output with a Nokia copyright message.\n"; sl@0: cout << "Specify /h for in-depth help."; sl@0: } sl@0: sl@0: void UsageError() sl@0: { sl@0: PrintUsage(); sl@0: exit(1); sl@0: } sl@0: sl@0: void PrintHelp(char* aTopic) sl@0: { sl@0: int topic = 0; sl@0: while ('0' <= *aTopic && *aTopic <= '9') sl@0: { sl@0: topic = topic * 10 + (*aTopic - '0'); sl@0: ++aTopic; sl@0: } sl@0: switch(topic) sl@0: { sl@0: case 1: sl@0: cout << "How Coltab interprets CANONSEQ:\n\n"\ sl@0: "If the CANONSEQ specifier is used in a line, Coltab will ignore the mapping.\n"\ sl@0: "This because, on the Symbian platform, any canonically composed character is\n"\ sl@0: "decomposed before the key mapping is applied, so characters with canonical\n"\ sl@0: "decompositions do not need keys. In files supplied by the Unicode Consortium,\n"\ sl@0: "all mappings for composed characters are flagged by CANONSEQ, so it is useful\n"\ sl@0: "if Coltab can just ignore these so that Unicode Consortium files can be used\n"\ sl@0: "unedited.\n\n"\ sl@0: "This can cause problems if a localizer copies a line from a Unicode file into,\n"\ sl@0: "say, the _strings.txt file, in order to give a mapping for an accented\n"\ sl@0: "character. The localizer replaces the composed character code with the\n"\ sl@0: "decomposition and changes the keys but forgets to remove the CANONSEQ\n"\ sl@0: "specifier. In this case the key would be ignored. Coltab provides a warning so\n"\ sl@0: "that this can be put right.\n\n"\ sl@0: "Coltab will only warn about the first CANONSEQ in each file, and does not warn\n"\ sl@0: "if the 'standard' or 'wgl4' options are used."; sl@0: exit(1); sl@0: break; sl@0: case 2: sl@0: cout << "How to ensure coltab's output files are compilable.\n\n"\ sl@0: "By default, Coltab's files for locales need to be edited before they are\n"\ sl@0: "compilable. The UID for the collation method needs to be filled in. This UID\n"\ sl@0: "is added so that the collation table can be searched for later. At present,\n"\ sl@0: "this UID is not necessary for the correct functioning of the Symbian platform\n"\ sl@0: "and so a value of 0 can be safely used.\n\n"\ sl@0: "To insert this value into the file directly, use the /u option, for example\n"\ sl@0: "coltab /u0 french\n"\ sl@0: "If the /u option is used, the file should be compilable as is. If it is not,\n"\ sl@0: "please raise it as a defect with Symbian's internationalization team,\n"\ sl@0: "supplying the files that caused the problem if this is possible.\n"\ sl@0: "If the 'standard' or 'wgl4' options are used, no UID is output, so the /u\n"\ sl@0: "option is not required."; sl@0: exit(1); sl@0: break; sl@0: case 3: sl@0: cout << "How to ensure collation key values are inside the supported range. \n\n"\ sl@0: "According to Unicode Standard, the range suppored by tool COLTAB:\n"\ sl@0: " Level 0 (primary): 0000 - FFFF, \n"\ sl@0: " Level 1 (Secondary): 0020 - 011E, \n"\ sl@0: " Level 2 (Tertiary): 0001 - 003F. \n"\ sl@0: "Please edit your collation files and make sure key values are inside the above range"; sl@0: exit(1); sl@0: break; sl@0: default: sl@0: PrintUsage(); sl@0: cout << "\n\nSpecify /h1 for help on the use of CANONSEQ\n"; sl@0: cout << "Specify /h2 for help on making compilable files that do not need editing\n"; sl@0: exit(1); sl@0: break; sl@0: } sl@0: } sl@0: sl@0: short HighSurrogate(int aCode) sl@0: { sl@0: return static_cast(0xD7C0 + (aCode >> 10)); sl@0: } sl@0: sl@0: short LowSurrogate(int aCode) sl@0: { sl@0: return static_cast(0xDC00 | (aCode & 0x3FF)); sl@0: } sl@0: sl@0: int main(int argc,char** argv) sl@0: { sl@0: bool copyright = false; sl@0: bool wgl4 = false; sl@0: bool allKeys = false; sl@0: const char* prefix = ""; sl@0: const char* infix = ""; sl@0: const char* locale = ""; sl@0: char* localeArg = 0; sl@0: char* uidArg = 0; sl@0: for (int i = 1; i < argc; ++i) sl@0: { sl@0: if (argv[i][0] == '/' || argv[i][0] == '-') sl@0: { sl@0: switch (argv[i][1]) sl@0: { sl@0: case 'u': sl@0: case 'U': sl@0: { sl@0: uidArg = argv[i] + 2; sl@0: const char* uidCheck = uidArg; sl@0: while (*uidCheck) sl@0: { sl@0: if (!isValidHexDigit(*uidCheck)) sl@0: UsageError(); sl@0: ++uidCheck; sl@0: } sl@0: if (uidCheck == uidArg || 8 < uidCheck - uidArg) sl@0: UsageError(); sl@0: break; sl@0: } sl@0: case 'c': sl@0: case 'C': sl@0: copyright = true; sl@0: break; sl@0: case 'a': sl@0: allKeys = true; sl@0: break; sl@0: case 'h': sl@0: case 'H': sl@0: PrintHelp(argv[i] + 2); sl@0: break; sl@0: default: sl@0: UsageError(); sl@0: break; sl@0: } sl@0: } sl@0: else if (!localeArg) sl@0: localeArg = argv[i]; sl@0: else sl@0: UsageError(); sl@0: } sl@0: if (!localeArg) sl@0: UsageError(); sl@0: bool standard = false; sl@0: if (!_stricmp(localeArg, "standard")) sl@0: { sl@0: locale = "Standard"; sl@0: standard = true; sl@0: } sl@0: else if (!_stricmp(localeArg, "wgl4")) sl@0: { sl@0: locale = "Wgl4"; sl@0: wgl4 = true; sl@0: standard = true; sl@0: } sl@0: else sl@0: { sl@0: locale = prefix = localeArg; sl@0: infix = "_"; sl@0: } sl@0: sl@0: Reader* reader = new Reader(wgl4, standard, locale, uidArg); sl@0: if (!reader) sl@0: { sl@0: cout << "out of memory\n"; sl@0: exit(1); sl@0: } sl@0: char* filename = new char[strlen(prefix) + strlen(infix) + 64]; sl@0: if (allKeys == false) sl@0: { sl@0: sprintf(filename,"%s%scompkeys.txt",prefix,infix); sl@0: reader->ReadCompKeys(filename); sl@0: if (!standard) sl@0: { sl@0: sprintf(filename,"%s%sstrings.txt",prefix,infix); sl@0: reader->ReadStrings(filename); sl@0: } sl@0: sprintf(filename,"%s%sbasekeys.txt",prefix,infix); sl@0: reader->ReadBaseKeys(filename); sl@0: } sl@0: else sl@0: { sl@0: sprintf(filename,"%s%sAllKeys.txt",prefix,infix); sl@0: reader->ReadAllKeys(filename); sl@0: } sl@0: sprintf(filename,"ls_%s.cpp", localeArg); sl@0: reader->WriteOutput(filename, copyright); sl@0: sl@0: delete reader; sl@0: delete [] filename; sl@0: return 0; sl@0: } sl@0: sl@0: Reader::Reader(bool aWgl4, bool aStandard, sl@0: const char* aLocaleName, const char* aUidString): sl@0: iKeys(0), sl@0: iIndices(0), sl@0: iStringElements(0), sl@0: iStringIndices(0), sl@0: iInputFileName(NULL), sl@0: iLineNumber(0), sl@0: iSuppressCanonseqWarning(false), sl@0: iWgl4(aWgl4), sl@0: iStandard(aStandard), sl@0: iLocaleName(aLocaleName), sl@0: iUidString(aUidString) sl@0: { sl@0: if (iStandard) sl@0: { sl@0: iCPlusPlusIdentifier = new char[9]; sl@0: strcpy(iCPlusPlusIdentifier, "Standard"); sl@0: return; sl@0: } sl@0: char* p = iCPlusPlusIdentifier = new char[strlen(aLocaleName) + 2]; sl@0: int current = toupper(aLocaleName[0]); sl@0: if (current < 'A' || 'Z' < current) sl@0: *p++ = 'C'; sl@0: else sl@0: { sl@0: *p++ = static_cast(current); sl@0: ++aLocaleName; sl@0: } sl@0: bool inUnderScore = false; sl@0: while (*aLocaleName) sl@0: { sl@0: current = tolower(*aLocaleName++); sl@0: if (current < 'a' || 'z' < current) sl@0: { sl@0: if (!inUnderScore) sl@0: { sl@0: inUnderScore = true; sl@0: *p++ = '_'; sl@0: } sl@0: } sl@0: else sl@0: { sl@0: inUnderScore = false; sl@0: *p++ = static_cast(current); sl@0: } sl@0: } sl@0: *p = 0; sl@0: } sl@0: sl@0: Reader::~Reader() sl@0: { sl@0: delete [] iCPlusPlusIdentifier; sl@0: } sl@0: sl@0: // Get a hex number of exactly four digits from aString. Return -1 if none is found and aTolerate is true. sl@0: int Reader::Hex(const char *aString, int &aCharConsumed, bool aTolerate) sl@0: { sl@0: char *end; sl@0: unsigned long x = strtoul(aString,&end,16); sl@0: aCharConsumed = end - aString; sl@0: if ((aCharConsumed != 4) && (aCharConsumed != 5) && (aCharConsumed != 6)) sl@0: { sl@0: if (!aTolerate) sl@0: { sl@0: cout << "bad hex number on line " << iLineNumber << " of file " << iInputFileName << '\n'; sl@0: exit(1); sl@0: } sl@0: return -1; sl@0: } sl@0: return x; sl@0: } sl@0: sl@0: // Get a collation value from a string of the form [.xxxx.xxxx.xxxx.xxxx] sl@0: void Reader::GetCollationKey(const char* aString, int& aCharConsumed, CollationKey* aKey) sl@0: { sl@0: aCharConsumed = 0; sl@0: const char *end = strchr(aString, ']'); sl@0: if (end != NULL){ sl@0: aCharConsumed = end - aString; sl@0: } sl@0: sl@0: if (aString[0] != '[' || (aCharConsumed != 21 && aCharConsumed != 22 && aCharConsumed != 23)) sl@0: { sl@0: cout << "syntax error on line " << iLineNumber << " of file " << iInputFileName << '\n'; sl@0: exit(1); sl@0: } sl@0: if (aKey == NULL) sl@0: { sl@0: if (iKeys >= EMaxCollationKeys) sl@0: { sl@0: cout << "too many keys"; sl@0: exit(1); sl@0: } sl@0: aKey = &iCollationKey[iKeys++]; sl@0: } sl@0: aKey->iIgnorable = aString[1] == '*'; // asterisk means that this character is normally ignored sl@0: int charConsumed = 0; sl@0: for (int i = 0; i < CollationKey::ELevels; i++) sl@0: aKey->iLevel[i] = Hex(aString + 2 + i * 5, charConsumed); sl@0: sl@0: if (aKey->iLevel[1] > 0 && (aKey->iLevel[1] < KLevel1Min || aKey->iLevel[1] > KLevel1Max)) sl@0: { sl@0: aKey->iLevel[1] = KLevel1Max; sl@0: cout << "illegal level-1 key value on line " << iLineNumber << "; outside the range " << KLevel1Min << ".." << KLevel1Max << "\n"; sl@0: cout << "Error: illegal key value in file, please see coltab /h3 for details.\n"; sl@0: exit(1); sl@0: } sl@0: sl@0: if (aKey->iLevel[2] > 0 && (aKey->iLevel[2] < KLevel2Min || aKey->iLevel[2] > KLevel2Max)) sl@0: { sl@0: cout << "illegal level-2 key value on line " << iLineNumber << "; outside the range " << KLevel2Min << ".." << KLevel2Max << "\n"; sl@0: cout << "Error: illegal key value in file, please see coltab /h3 for details.\n"; sl@0: exit(1); sl@0: } sl@0: sl@0: aKey->iStop = true; sl@0: } sl@0: sl@0: void Reader::GetMultipleCollationKeys(const char* aString) sl@0: { sl@0: int keyCount = 0; sl@0: int charConsumed =0; sl@0: while (aString[0] == '[') sl@0: { sl@0: GetCollationKey(aString, charConsumed); sl@0: sl@0: keyCount++; sl@0: iCollationKey[iKeys - 1].iStop = false; sl@0: int length = strlen(aString); sl@0: if (length <= charConsumed + 1) sl@0: break; sl@0: aString += charConsumed + 1; sl@0: sl@0: if (aString[0] == ' ') //a space is put between collation keys in keys files provided by previous Unicode Standard (i.e 3.1) sl@0: aString++; sl@0: sl@0: } sl@0: iCollationKey[iKeys - 1].iStop = true; sl@0: } sl@0: sl@0: /* sl@0: Partially parse a line, returning its key code and the start of its first block of key data. sl@0: Return false if it is not a data line, or not relevant. sl@0: */ sl@0: bool Reader::ParseLine(const char* aLine, int aCode[16], int& aCodeCount, int& aKeyStart, int& aKeyCount) sl@0: { sl@0: int lineLength = strlen(aLine); sl@0: int charConsumed = 0; sl@0: aCodeCount = 0; sl@0: aCode[0] = Hex(aLine,charConsumed,true); sl@0: sl@0: /* sl@0: A data line must start with a hex number and be at least 27 characters long. sl@0: Canonically decomposable Unicode characters are skipped. sl@0: Skip non-WGL4 characters if doing WGL4 only. sl@0: */ sl@0: if (aCode[0] != -1) sl@0: { sl@0: aCodeCount = 1; sl@0: if (!strcmp(aLine + lineLength - 8,"CANONSEQ")) sl@0: { sl@0: if (!iSuppressCanonseqWarning) sl@0: { sl@0: cout << "Warning: CANONSEQ used in file " << iInputFileName sl@0: << " on line " << iLineNumber << ".\nWarning: All mappings specifying CANONSEQ are ignored.\n" sl@0: << "Warning: Use coltab /h1 for more details."; sl@0: iSuppressCanonseqWarning = true; sl@0: } sl@0: aCodeCount = 0; sl@0: } sl@0: else if (lineLength < 27 || sl@0: (iWgl4 && !InWgl4((unsigned int)aCode))) sl@0: aCodeCount = 0; sl@0: } sl@0: sl@0: if (aCode[0] != -1) sl@0: { sl@0: // find '[' sl@0: aKeyStart = charConsumed; sl@0: while (aKeyStart < lineLength && aLine[aKeyStart] != '[') sl@0: aKeyStart++; sl@0: sl@0: // read all hex before '[' sl@0: int index = charConsumed + 1; sl@0: while (index < aKeyStart) sl@0: { sl@0: aCode[aCodeCount] = Hex(aLine+index, charConsumed, true); sl@0: if (aCode[aCodeCount] == -1) sl@0: break; sl@0: sl@0: index += charConsumed + 1; sl@0: aCodeCount++; sl@0: } sl@0: sl@0: // find number of collation keys sl@0: aKeyCount = 0; sl@0: index = aKeyStart; sl@0: while (index < lineLength && aLine[index] != '%' && aLine[index] != '#') sl@0: { sl@0: if (aLine[index] == '[') sl@0: aKeyCount++; sl@0: index++; sl@0: } sl@0: } sl@0: sl@0: return aCodeCount > 0; sl@0: } sl@0: sl@0: void Reader::AddKeyOneToOne(const char* aLine, const int aCode, const int aKeyStart) sl@0: { sl@0: if (iIndices >= EMaxCollationIndices) sl@0: { sl@0: cout << "too many Unicode values"; sl@0: exit(1); sl@0: } sl@0: CollationIndex& index = iCollationIndex[iIndices++]; sl@0: index.iCode = aCode; sl@0: index.iIndex = -1; sl@0: sl@0: /* sl@0: First try to find the key in the array of keys found so far. sl@0: Search backwards to use the fact that runs of the same key occur together. sl@0: */ sl@0: CollationKey key; sl@0: int charConsumed = 0; sl@0: GetCollationKey(aLine + aKeyStart, charConsumed, &key); sl@0: for (int i = iKeys - 1; i >= 0 && index.iIndex == -1; i--) sl@0: if (iCollationKey[i] == key) sl@0: index.iIndex = i; sl@0: sl@0: // If that fails, add a new key. sl@0: if (index.iIndex == -1) sl@0: { sl@0: index.iIndex = iKeys++; sl@0: if (iKeys > EMaxCollationKeys) sl@0: { sl@0: cout << "too many keys"; sl@0: exit(1); sl@0: } sl@0: iCollationKey[index.iIndex] = key; sl@0: } sl@0: } sl@0: /* sl@0: Read 1-to-1 mapping. Sample: sl@0: 02B9 ; [*02A5.0020.0002.02B9] % MODIFIER LETTER PRIME sl@0: sl@0: aCombinedFile = true: aFileName is combined file, which contains base keys, comp keys, and string keys. sl@0: */ sl@0: void Reader::ReadBaseKeys(const char* aFileName) sl@0: { sl@0: iSuppressCanonseqWarning = iStandard || iWgl4; sl@0: iLineNumber = 0; sl@0: iInputFileName = aFileName; sl@0: ifstream input_file; sl@0: sl@0: #ifdef __MSVCDOTNET__ sl@0: input_file.open(iInputFileName, ios::in); sl@0: #else //!__MSVCDOTNET__ sl@0: input_file.open(iInputFileName, ios::in | ios::nocreate); sl@0: #endif //__MSVCDOTNET__ sl@0: sl@0: if (input_file.fail()) sl@0: { sl@0: cout << "cannot open input file '" << iInputFileName << "'\n"; sl@0: exit(1); sl@0: } sl@0: cout << "reading base keys from '" << iInputFileName << "'\n"; sl@0: sl@0: char line[1024]; sl@0: for (;;) sl@0: { sl@0: input_file.getline(line,sizeof(line)); sl@0: if (input_file.eof()) sl@0: break; sl@0: iLineNumber++; sl@0: // line number counting sl@0: if (iLineNumber % 100 == 0) sl@0: { sl@0: cout << "line " << iLineNumber << '\n'; sl@0: cout.flush(); sl@0: } sl@0: int code[16]; sl@0: int codeCount = 0; sl@0: int key_start = 0; sl@0: int keyCount = 0; sl@0: if (ParseLine(line, code, codeCount, key_start, keyCount)) sl@0: { sl@0: if (codeCount != 1 || keyCount != 1) sl@0: continue; // goto next line sl@0: AddKeyOneToOne(line, code[0], key_start); sl@0: } sl@0: } sl@0: sl@0: input_file.close(); sl@0: } sl@0: sl@0: void Reader::AddKeyOneToMuch(const char* aLine, const int aCode, const int aKeyStart) sl@0: { sl@0: if (iIndices >= EMaxCollationIndices) sl@0: { sl@0: cout << "too many Unicode values"; sl@0: exit(1); sl@0: } sl@0: CollationIndex& index = iCollationIndex[iIndices++]; sl@0: index.iCode = aCode; sl@0: index.iIndex = iKeys; sl@0: GetMultipleCollationKeys(aLine + aKeyStart); sl@0: } sl@0: /* sl@0: Read 1-to-much mapping. sl@0: 3303 ; [.279F.0020.001C.3303][.1114.0020.001C.3303][.27C7.0020.001F.3303] # SQUARE AARU; QQKN sl@0: */ sl@0: void Reader::ReadCompKeys(const char* aFileName) sl@0: { sl@0: iSuppressCanonseqWarning = iStandard || iWgl4; sl@0: iLineNumber = 0; sl@0: iInputFileName = aFileName; sl@0: ifstream input_file; sl@0: sl@0: #ifdef __MSVCDOTNET__ sl@0: input_file.open(iInputFileName, ios::in); sl@0: #else //!__MSVCDOTNET__ sl@0: input_file.open(iInputFileName, ios::in | ios::nocreate); sl@0: #endif //__MSVCDOTNET__ sl@0: sl@0: if (input_file.fail()) sl@0: { sl@0: cout << "there are no composite keys; '" << iInputFileName << "' not found\n"; sl@0: return; sl@0: } sl@0: cout << "reading composite keys from '" << iInputFileName << "'\n"; sl@0: sl@0: char line[1024]; sl@0: for (;;) sl@0: { sl@0: input_file.getline(line,sizeof(line)); sl@0: if (input_file.eof()) sl@0: break; sl@0: iLineNumber++; sl@0: // line number counting sl@0: if (iLineNumber % 100 == 0) sl@0: { sl@0: cout << "line " << iLineNumber << '\n'; sl@0: cout.flush(); sl@0: } sl@0: int code[16]; sl@0: int codeCount = 0; sl@0: int key_start = 0; sl@0: int keyCount = 0; sl@0: if (ParseLine(line, code, codeCount, key_start, keyCount)) sl@0: { sl@0: if (codeCount != 1 || keyCount < 2) sl@0: continue; // goto next line sl@0: AddKeyOneToMuch(line, code[0], key_start); sl@0: } sl@0: } sl@0: sl@0: input_file.close(); sl@0: } sl@0: sl@0: sl@0: void Reader::AddKeyMuchToMuch(const char* aLine, const int aCode[16], const int aCodeCount, const int aKeyStart) sl@0: { sl@0: sl@0: // Store the index to the Unicode string and the key sequence. sl@0: if (iStringIndices > EMaxStringIndices) sl@0: { sl@0: cout << "too many string indices"; sl@0: exit(1); sl@0: } sl@0: iStringIndex[iStringIndices++] = (iStringElements << 16) | iKeys; sl@0: sl@0: // Reserve space for the length. sl@0: if (iStringElements >= EMaxStringElements) sl@0: { sl@0: cout << "too many string elements"; sl@0: exit(1); sl@0: } sl@0: iStringElements++; sl@0: sl@0: // Read the Unicode string. sl@0: int length = 0; // in unit of int16 sl@0: int charCount = 0; // in unit of char. for debug. sl@0: sl@0: for (int i=0; i= EMaxStringElements) sl@0: { sl@0: cout << "too many string elements"; sl@0: exit(1); sl@0: } sl@0: sl@0: if (aCode[i] > 0xFFFF) sl@0: { sl@0: // UCS4 --> UTF-16 sl@0: iStringElement[iStringElements++] = 0xD7C0 + (aCode[i] >> 10); sl@0: iStringElement[iStringElements++] = 0xDC00 | (aCode[i] & 0x3FF); sl@0: length += 2; sl@0: } sl@0: else sl@0: { sl@0: iStringElement[iStringElements++] = aCode[i]; sl@0: length++; sl@0: } sl@0: charCount++; sl@0: } sl@0: sl@0: iStringElement[iStringElements - length - 1] = (unsigned int)length; sl@0: sl@0: // Read the key sequence. sl@0: GetMultipleCollationKeys(aLine + aKeyStart); sl@0: } sl@0: /* sl@0: Read much-to-much mapping. Sample: sl@0: 004F 0338 [.08EA.0020.0008.00D8] % capital O-stroke sl@0: 0E40 0E08 ; [.1E2B.0020.0002.0E08][.1E5E.0020.001F.0E40] # sl@0: */ sl@0: void Reader::ReadStrings(const char* aFileName) sl@0: { sl@0: iSuppressCanonseqWarning = iStandard || iWgl4; sl@0: iLineNumber = 0; sl@0: iInputFileName = aFileName; sl@0: ifstream input_file; sl@0: sl@0: #ifdef __MSVCDOTNET__ sl@0: input_file.open(iInputFileName, ios::in); sl@0: #else //!__MSVCDOTNET__ sl@0: input_file.open(iInputFileName, ios::in | ios::nocreate); sl@0: #endif //__MSVCDOTNET__ sl@0: sl@0: if (input_file.fail()) sl@0: { sl@0: cout << "there are no strings; '" << iInputFileName << "' not found\n"; sl@0: return; sl@0: } sl@0: cout << "reading strings from '" << iInputFileName << "'\n"; sl@0: sl@0: char line[1024]; sl@0: for (;;) sl@0: { sl@0: input_file.getline(line,sizeof(line)); sl@0: if (input_file.eof()) sl@0: break; sl@0: iLineNumber++; sl@0: // line number counting sl@0: if (iLineNumber % 100 == 0) sl@0: { sl@0: cout << "line " << iLineNumber << '\n'; sl@0: cout.flush(); sl@0: } sl@0: int code[16]; sl@0: int codeCount = 0; sl@0: int key_start = 0; sl@0: int keyCount = 0; sl@0: if (ParseLine(line, code, codeCount, key_start, keyCount)) sl@0: { sl@0: if (codeCount < 2 || keyCount < 1) sl@0: continue; // goto next line sl@0: AddKeyMuchToMuch(line, code, codeCount, key_start); sl@0: } sl@0: } sl@0: sl@0: input_file.close(); sl@0: } sl@0: sl@0: /* sl@0: Read combined key table. Sample: sl@0: 1-to-1 mapping: sl@0: 02B9 ; [*02A5.0020.0002.02B9] % MODIFIER LETTER PRIME sl@0: sl@0: 1-to-much mapping: sl@0: 3303 ; [.279F.0020.001C.3303][.1114.0020.001C.3303][.27C7.0020.001F.3303] # SQUARE AARU; QQKN sl@0: sl@0: much-to-much mapping: sl@0: 004F 0338 [.08EA.0020.0008.00D8] % capital O-stroke sl@0: 0E40 0E08 ; [.1E2B.0020.0002.0E08][.1E5E.0020.001F.0E40] # sl@0: */ sl@0: void Reader::ReadAllKeys(const char* aFileName) sl@0: { sl@0: iSuppressCanonseqWarning = iStandard || iWgl4; sl@0: iLineNumber = 0; sl@0: iInputFileName = aFileName; sl@0: ifstream input_file; sl@0: sl@0: #ifdef __MSVCDOTNET__ sl@0: input_file.open(iInputFileName, ios::in); sl@0: #else //!__MSVCDOTNET__ sl@0: input_file.open(iInputFileName, ios::in | ios::nocreate); sl@0: #endif //__MSVCDOTNET__ sl@0: sl@0: if (input_file.fail()) sl@0: { sl@0: cout << "there are no keys; '" << iInputFileName << "' not found\n"; sl@0: return; sl@0: } sl@0: cout << "reading all keys from '" << iInputFileName << "'\n"; sl@0: sl@0: char line[1024]; sl@0: for (;;) sl@0: { sl@0: if (input_file.eof()) sl@0: break; sl@0: input_file.getline(line,sizeof(line)); sl@0: iLineNumber++; sl@0: sl@0: int code[16]; sl@0: int codeCount = 0; sl@0: int key_start = 0; sl@0: int keyCount = 0; sl@0: if (ParseLine(line, code, codeCount, key_start, keyCount)) sl@0: { sl@0: if (codeCount == 1 && keyCount == 1) sl@0: AddKeyOneToOne(line, code[0], key_start); sl@0: else if (codeCount == 1 && keyCount > 1) sl@0: AddKeyOneToMuch(line, code[0], key_start); sl@0: else if (codeCount > 1 && keyCount > 0) sl@0: AddKeyMuchToMuch(line, code, codeCount, key_start); sl@0: else sl@0: cout << "ignore line: " << line << "\n"; sl@0: } sl@0: } sl@0: sl@0: input_file.close(); sl@0: } sl@0: sl@0: sl@0: // Pack the 3 collation key levels into a single 32-bit integer. sl@0: unsigned int Reader::PackKey(const CollationKey& aValue) sl@0: { sl@0: unsigned int level0 = aValue.iLevel[0]; sl@0: unsigned int level1 = aValue.iLevel[1]; sl@0: if (level1 > 0) sl@0: level1 -= (KLevel1Min - 1); sl@0: unsigned int level2 = aValue.iLevel[2]; sl@0: if (level2 > 0) sl@0: level2 -= (KLevel2Min - 1); sl@0: unsigned int key = level0 << 16 | level1 << 8 | level2 << 2; sl@0: if (aValue.iIgnorable) sl@0: key |= 2; sl@0: if (aValue.iStop) sl@0: key |= 1; sl@0: return key; sl@0: } sl@0: sl@0: // Pack a collation index value into a single 32-bit integer. sl@0: int Reader::PackIndex(const CollationIndex& aValue, unsigned int result[2]) sl@0: { sl@0: unsigned int code = aValue.iCode; sl@0: unsigned int index = aValue.iIndex; sl@0: if (code <= 0xFFFF) sl@0: { sl@0: result[0] = (code << 16 | index); sl@0: return 1; sl@0: } sl@0: else sl@0: { sl@0: result[0] = (::HighSurrogate(code) << 16 | index); sl@0: result[1] = (::LowSurrogate(code) << 16 | index); sl@0: return 2; sl@0: } sl@0: } sl@0: sl@0: const Reader* TheReader; sl@0: static int CompareStringIndices(const void* aIndex1,const void* aIndex2) sl@0: { sl@0: return TheReader->CompareStringIndices(*(unsigned int*)aIndex1 >> 16,*(unsigned int*)aIndex2 >> 16); sl@0: } sl@0: sl@0: int CompareUnicodeStrings(const int *aString1,int aLength1,const int *aString2,int aLength2) sl@0: { sl@0: for (int i = 0; i < aLength1 || i < aLength2; i++, aString1++, aString2++) sl@0: { sl@0: int x = i < aLength1 ? *aString1 : -1; sl@0: int y = i < aLength2 ? *aString2 : -1; sl@0: if (x != y) sl@0: return x - y; sl@0: } sl@0: return 0; sl@0: } sl@0: sl@0: int Reader::CompareStringIndices(int aIndex1,int aIndex2) const sl@0: { sl@0: return CompareUnicodeStrings(iStringElement + aIndex1 + 1,iStringElement[aIndex1], sl@0: iStringElement + aIndex2 + 1,iStringElement[aIndex2]); sl@0: } sl@0: sl@0: void Reader::WriteOutput(const char* aFileName, bool aCopyright) sl@0: { sl@0: int i; sl@0: ofstream output_file; sl@0: output_file.open(aFileName); sl@0: if (output_file.fail()) sl@0: { sl@0: cout << "cannot open output file '" << aFileName << "'\n"; sl@0: exit(1); sl@0: } sl@0: cout << "writing output to '" << aFileName << "'\n"; sl@0: sl@0: char *locale = NULL; sl@0: if (iStandard) sl@0: locale = _strdup("Standard"); sl@0: else sl@0: locale = _strdup(iLocaleName); sl@0: sl@0: if (!iStandard) sl@0: { sl@0: _strlwr(locale); sl@0: locale[0] = (char)toupper(locale[0]); sl@0: if (aCopyright) sl@0: { sl@0: char* capsFileName = new char[strlen(aFileName) + 1]; sl@0: strcpy(capsFileName, aFileName); sl@0: _strupr(capsFileName); sl@0: output_file << "/*\n" << capsFileName << "\n\nCopyright (C) 2000-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.\n*/\n"; sl@0: delete [] capsFileName; sl@0: output_file << "\n/*\nThe LCharSet object used by the " << locale << " locale.\n"; sl@0: output_file << "Generated by COLTAB.\n*/\n"; sl@0: } sl@0: sl@0: output_file << "\n#include \"ls_std.h\"\n#include \n"; sl@0: output_file << "\nconst TUint KUid" << iCPlusPlusIdentifier << "CollationMethod = "; sl@0: if (iUidString) sl@0: output_file << "0x" << iUidString << ";\n"; sl@0: else sl@0: { sl@0: output_file << "/* FILL THIS IN */;\n"; sl@0: cout << "Warning: File will need editing\nWarning: see coltab /h2 for details.\n"; sl@0: } sl@0: } sl@0: sl@0: /* sl@0: Write the unique collation keys. sl@0: Each one has the format, going from highest to lowest bit: sl@0: sl@0: 16 bits: level-0 key sl@0: 8 bits: level-1 key sl@0: 6 bits: level-2 key sl@0: 1 bit: set if this key is optionally ignorable sl@0: 1 bit: set if this is the last key in the string of keys for a single Unicode value sl@0: sl@0: */ sl@0: if (iKeys != 0) sl@0: { sl@0: output_file << "\nstatic const TUint32 The" << iCPlusPlusIdentifier << "Key[] = \n\t{"; sl@0: CollationKey* ck = iCollationKey; sl@0: output_file << "\t // " << iKeys << " keys"; sl@0: output_file << hex; sl@0: for (i = 0; i < iKeys; i++, ck++) sl@0: { sl@0: unsigned int key = PackKey(*ck); sl@0: if (i % 8 == 0) sl@0: output_file << "\n\t"; sl@0: output_file << "0x"; sl@0: output_file << key << ","; sl@0: } sl@0: output_file << dec; sl@0: output_file << "\n\t};\n\n"; sl@0: } sl@0: sl@0: if (iIndices != 0) sl@0: { sl@0: // Sort then write the collation index values - these relate Unicode values to collation keys. sl@0: qsort(iCollationIndex,iIndices,sizeof(CollationIndex),CollationIndex::Compare); sl@0: output_file << "static const TUint32 The" << iCPlusPlusIdentifier << "Index[] = \n\t{"; sl@0: CollationIndex* ci = iCollationIndex; sl@0: int entry=0; sl@0: output_file << "\t // " << iIndices << " indices"; sl@0: output_file << hex; sl@0: for (i = 0; i < iIndices; i++, ci++, entry++) sl@0: { sl@0: unsigned int key[2]; sl@0: int bytecount = PackIndex(*ci, key); sl@0: sl@0: if (entry % 8 == 0) sl@0: output_file << "\n\t"; sl@0: output_file << "0x"; sl@0: output_file << key[0] << ","; sl@0: sl@0: if (bytecount == 2) sl@0: { sl@0: entry++; sl@0: if (entry % 8 == 0) sl@0: output_file << "\n\t"; sl@0: output_file << "0x"; sl@0: output_file << key[1] << ","; sl@0: } sl@0: } sl@0: output_file << dec; sl@0: output_file << "\n\t};"; sl@0: output_file << "\t // " << entry << " entries"; sl@0: output_file << "\n\n"; sl@0: iIndices = entry; //One surrogate pair occupies 2 entries sl@0: } sl@0: sl@0: if (iStringElements) sl@0: { sl@0: // Write the Unicode strings; these are preceded by their lengths. sl@0: output_file << "static const TUint16 The" << iCPlusPlusIdentifier << "StringElement[] = \n\t{"; sl@0: output_file << hex; sl@0: for (i = 0; i < iStringElements; i++) sl@0: { sl@0: if (i % 8 == 0) sl@0: output_file << "\n\t"; sl@0: output_file << "0x" << iStringElement[i] << ","; sl@0: } sl@0: output_file << dec; sl@0: if (iStringElements==0) sl@0: output_file << "0"; sl@0: output_file << "\n\t};\n\n"; sl@0: sl@0: /* sl@0: Sort then write the string index values - these relate Unicode strings to collation keys. sl@0: Each one has the string index in the upper word and the key index in the lower word. sl@0: */ sl@0: TheReader = this; sl@0: qsort(iStringIndex,iStringIndices,sizeof(iStringIndex[0]),::CompareStringIndices); sl@0: output_file << "static const TUint32 The" << iCPlusPlusIdentifier << "StringIndex[] = \n\t{"; sl@0: output_file << hex; sl@0: for (i = 0; i < iStringIndices; i++) sl@0: { sl@0: if (i % 8 == 0) sl@0: output_file << "\n\t"; sl@0: output_file << "0x" << iStringIndex[i] << ","; sl@0: } sl@0: output_file << dec; sl@0: if (iStringIndices ==0) sl@0: output_file << "0"; sl@0: output_file << "\n\t};\n\n"; sl@0: } sl@0: sl@0: // Write the collation table structure. sl@0: output_file << "static const TCollationKeyTable The" << iCPlusPlusIdentifier << "Table = \n\t{ "; sl@0: if (iKeys) sl@0: output_file << "The" << iCPlusPlusIdentifier << "Key"; sl@0: else sl@0: output_file << "0"; sl@0: if (iIndices) sl@0: output_file << ", The" << iCPlusPlusIdentifier << "Index, " << iIndices; sl@0: else sl@0: output_file << ", 0, 0"; sl@0: if (iStringElements) sl@0: output_file << ", The" << iCPlusPlusIdentifier << "StringElement, The" << iCPlusPlusIdentifier << "StringIndex, " << iStringIndices << " };\n"; sl@0: else sl@0: output_file << ", 0, 0, 0 };\n"; sl@0: sl@0: if (!iStandard) sl@0: output_file << "\nstatic const TCollationMethod TheCollationMethod[] = \n"\ sl@0: " {\n"\ sl@0: " {\n"\ sl@0: " KUid" << iCPlusPlusIdentifier << "CollationMethod, // the method for the locale\n"\ sl@0: " NULL, // use the standard table as the main table\n"\ sl@0: " &The" << iCPlusPlusIdentifier << "Table, // the locale values override the standard values\n"\ sl@0: " 0 // the flags are standard\n"\ sl@0: " },\n"\ sl@0: " {\n"\ sl@0: " KUidBasicCollationMethod, // the standard unlocalised method\n"\ sl@0: " NULL, // null means use the standard table\n"\ sl@0: " NULL, // there's no override table\n"\ sl@0: " 0 // the flags are standard\n"\ sl@0: " }\n"\ sl@0: " };\n"\ sl@0: "\n"\ sl@0: "static const TCollationDataSet TheCollationDataSet =\n"\ sl@0: " {\n"\ sl@0: " TheCollationMethod,\n"\ sl@0: " 2\n"\ sl@0: " };"\ sl@0: "\n\n"\ sl@0: "// The one and only locale character set object.\n"\ sl@0: "const LCharSet TheCharSet =\n"\ sl@0: " {\n"\ sl@0: " NULL,\n"\ sl@0: " &TheCollationDataSet\n"\ sl@0: " };\n"; sl@0: sl@0: output_file.close(); sl@0: delete [] locale; sl@0: } sl@0: sl@0: int CollationIndex::Compare(const void* aIndex1,const void* aIndex2) sl@0: { sl@0: return ((CollationIndex*)aIndex1)->iCode - ((CollationIndex*)aIndex2)->iCode; sl@0: }