sl@0: /* sl@0: * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: * All rights reserved. sl@0: * This component and the accompanying materials are made available sl@0: * under the terms of "Eclipse Public License v1.0" sl@0: * which accompanies this distribution, and is available sl@0: * at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: * sl@0: * Initial Contributors: sl@0: * Nokia Corporation - initial contribution. sl@0: * sl@0: * Contributors: sl@0: * sl@0: * Description: sl@0: * sl@0: */ sl@0: sl@0: sl@0: #include sl@0: #include sl@0: sl@0: #define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0])) sl@0: sl@0: LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_1[]= sl@0: { sl@0: 0x201a, sl@0: 0x0192, sl@0: 0x201e, sl@0: 0x2026, sl@0: 0x2020, sl@0: 0x2021, sl@0: 0x02c6, sl@0: 0x2030, sl@0: 0x0160, sl@0: 0x2039, sl@0: 0x0152 sl@0: }; sl@0: sl@0: LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_2[]= sl@0: { sl@0: 0x2018, sl@0: 0x2019, sl@0: 0x201c, sl@0: 0x201d, sl@0: 0x2022, sl@0: 0x2013, sl@0: 0x2014, sl@0: 0x02dc, sl@0: 0x2122, sl@0: 0x0161, sl@0: 0x203a, sl@0: 0x0153 sl@0: }; sl@0: sl@0: LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_3[]= sl@0: { sl@0: 0x017e, sl@0: 0x0178 sl@0: }; sl@0: sl@0: LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable16OfIndexedTables16::SKeyedEntry keyedTables16OfIndexedTables16_keyedEntries_codePage1252ToUnicode_1[]= sl@0: { sl@0: { sl@0: 0x82, sl@0: 0x8c, sl@0: keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_1 sl@0: }, sl@0: { sl@0: 0x91, sl@0: 0x9c, sl@0: keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_2 sl@0: }, sl@0: { sl@0: 0x9e, sl@0: 0x9f, sl@0: keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_3 sl@0: } sl@0: }; sl@0: sl@0: LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_unicodeToCodePage1252_1[]= sl@0: { sl@0: { sl@0: 0x0152, sl@0: 0x8c sl@0: }, sl@0: { sl@0: 0x0153, sl@0: 0x9c sl@0: }, sl@0: { sl@0: 0x0160, sl@0: 0x8a sl@0: }, sl@0: { sl@0: 0x0161, sl@0: 0x9a sl@0: }, sl@0: { sl@0: 0x0178, sl@0: 0x9f sl@0: }, sl@0: { sl@0: 0x017d, sl@0: 0x8e sl@0: }, sl@0: { sl@0: 0x017e, sl@0: 0x9e sl@0: }, sl@0: { sl@0: 0x0192, sl@0: 0x83 sl@0: }, sl@0: { sl@0: 0x02c6, sl@0: 0x88 sl@0: }, sl@0: { sl@0: 0x02dc, sl@0: 0x98 sl@0: }, sl@0: { sl@0: 0x2013, sl@0: 0x96 sl@0: }, sl@0: { sl@0: 0x2014, sl@0: 0x97 sl@0: }, sl@0: { sl@0: 0x2018, sl@0: 0x91 sl@0: }, sl@0: { sl@0: 0x2019, sl@0: 0x92 sl@0: }, sl@0: { sl@0: 0x201a, sl@0: 0x82 sl@0: }, sl@0: { sl@0: 0x201c, sl@0: 0x93 sl@0: }, sl@0: { sl@0: 0x201d, sl@0: 0x94 sl@0: }, sl@0: { sl@0: 0x201e, sl@0: 0x84 sl@0: }, sl@0: { sl@0: 0x2020, sl@0: 0x86 sl@0: }, sl@0: { sl@0: 0x2021, sl@0: 0x87 sl@0: }, sl@0: { sl@0: 0x2022, sl@0: 0x95 sl@0: }, sl@0: { sl@0: 0x2026, sl@0: 0x85 sl@0: }, sl@0: { sl@0: 0x2030, sl@0: 0x89 sl@0: }, sl@0: { sl@0: 0x2039, sl@0: 0x8b sl@0: }, sl@0: { sl@0: 0x203a, sl@0: 0x9b sl@0: }, sl@0: { sl@0: 0x20ac, sl@0: 0x80 sl@0: }, sl@0: { sl@0: 0x2122, sl@0: 0x99 sl@0: } sl@0: }; sl@0: sl@0: LOCAL_D const SCnvConversionData::SVariableByteData::SRange codePage1252VariableByteDataRanges[]= sl@0: { sl@0: { sl@0: 0x00, sl@0: 0xff, sl@0: 0, sl@0: 0 sl@0: } sl@0: }; sl@0: sl@0: LOCAL_D const SCnvConversionData::SOneDirectionData::SRange codePage1252ToUnicodeDataRanges[]= sl@0: { sl@0: { sl@0: 0x00, sl@0: 0x7f, sl@0: SCnvConversionData::SOneDirectionData::SRange::EDirect, sl@0: 0, sl@0: 0, sl@0: { sl@0: 0, sl@0: 0 sl@0: } sl@0: }, sl@0: { sl@0: 0xa0, sl@0: 0xff, sl@0: SCnvConversionData::SOneDirectionData::SRange::EDirect, sl@0: 0, sl@0: 0, sl@0: { sl@0: 0, sl@0: 0 sl@0: } sl@0: }, sl@0: { sl@0: 0x80, sl@0: 0x80, sl@0: SCnvConversionData::SOneDirectionData::SRange::EOffset, sl@0: 0, sl@0: 0, sl@0: { sl@0: STATIC_CAST(TUint, 8236), sl@0: 0 sl@0: } sl@0: }, sl@0: { sl@0: 0x8e, sl@0: 0x8e, sl@0: SCnvConversionData::SOneDirectionData::SRange::EOffset, sl@0: 0, sl@0: 0, sl@0: { sl@0: STATIC_CAST(TUint, 239), sl@0: 0 sl@0: } sl@0: }, sl@0: { sl@0: 0x82, sl@0: 0x9f, sl@0: SCnvConversionData::SOneDirectionData::SRange::EKeyedTable16OfIndexedTables16, sl@0: 0, sl@0: 0, sl@0: { sl@0: UData_SKeyedTable16OfIndexedTables16(keyedTables16OfIndexedTables16_keyedEntries_codePage1252ToUnicode_1) sl@0: } sl@0: } sl@0: }; sl@0: sl@0: LOCAL_D const SCnvConversionData::SOneDirectionData::SRange unicodeToCodePage1252DataRanges[]= sl@0: { sl@0: { sl@0: 0x0000, sl@0: 0x007f, sl@0: SCnvConversionData::SOneDirectionData::SRange::EDirect, sl@0: 1, sl@0: 0, sl@0: { sl@0: 0, sl@0: 0 sl@0: } sl@0: }, sl@0: { sl@0: 0x00a0, sl@0: 0x00ff, sl@0: SCnvConversionData::SOneDirectionData::SRange::EDirect, sl@0: 1, sl@0: 0, sl@0: { sl@0: 0, sl@0: 0 sl@0: } sl@0: }, sl@0: { sl@0: 0x0152, sl@0: 0x2122, sl@0: SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616, sl@0: 1, sl@0: 0, sl@0: { sl@0: UData_SKeyedTable1616(keyedTable1616_unicodeToCodePage1252_1) sl@0: } sl@0: } sl@0: }; sl@0: sl@0: GLREF_D const SCnvConversionData codePage1252ConversionData= sl@0: { sl@0: SCnvConversionData::EUnspecified, sl@0: { sl@0: ARRAY_LENGTH(codePage1252VariableByteDataRanges), sl@0: codePage1252VariableByteDataRanges sl@0: }, sl@0: { sl@0: ARRAY_LENGTH(codePage1252ToUnicodeDataRanges), sl@0: codePage1252ToUnicodeDataRanges sl@0: }, sl@0: { sl@0: ARRAY_LENGTH(unicodeToCodePage1252DataRanges), sl@0: unicodeToCodePage1252DataRanges sl@0: }, sl@0: NULL, sl@0: NULL sl@0: }; sl@0: sl@0: GLREF_C void IsCharacterSetCP1252(TInt& aConfidenceLevel, const TDesC8& aSample) sl@0: { sl@0: aConfidenceLevel = 60; sl@0: TInt sampleLength = aSample.Length(); sl@0: sl@0: for (TInt i=0; i= 0x80) && (aSample[i] <= 0x9f)) sl@0: { sl@0: if((aSample[i]==0x81)||(aSample[i]==0x8D)||(aSample[i]==0x8f)|| sl@0: (aSample[i]==0x90)||(aSample[i]==0x9d)) sl@0: { sl@0: // These code values are not supported by the Codepage CP1252 sl@0: aConfidenceLevel = 0; sl@0: break; sl@0: } sl@0: else sl@0: { sl@0: // problem: UTF8 uses the values 0x80-0x9f in more than 50% of it's multibyte representation sl@0: // so if the text was UTF8 .... the confidence here would hit the roof. Could check to make sl@0: // sure that this is not UTF8 sl@0: aConfidenceLevel+=1; sl@0: } sl@0: } sl@0: TInt increment1 = i+1; sl@0: TInt decrement1 = i-1; sl@0: // 0xf7 is the division symbol in CP1252. sl@0: // 0xd7 is the division symbol in CP1252.If char on either side of the division sl@0: // symbol is a number then the confidence that it's ISO88591 increases sl@0: if( decrement1>= 0 && ((aSample[i]==0xf7) || (aSample[i]==0xd7)) && increment1= sampleLength) sl@0: break; sl@0: if ( (aSample[decrement1] >= 0x30) && (aSample[decrement1] <= 0x39) && // char before is a number sl@0: (aSample[increment1] >= 0x30) && (aSample[increment1] <= 0x39) ) // char after is a number sl@0: { sl@0: aConfidenceLevel+=5; sl@0: } sl@0: } sl@0: // Can also use the currency symbol to increase confidence if the char after a sl@0: // currency symbol is numeric sl@0: if((aSample[i]>=0xa2) && (aSample[i] <= 0xa5) && increment1= 0x30) && (aSample[increment1] <= 0x39)) sl@0: { sl@0: aConfidenceLevel+=5; sl@0: } sl@0: } sl@0: } // for loop sl@0: aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0; sl@0: }