1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/charconvfw/charconv_fw/src/charconv/cp1252.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,369 @@
1.4 +/*
1.5 +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description:
1.18 +*
1.19 +*/
1.20 +
1.21 +
1.22 +#include <e32std.h>
1.23 +#include <convdata.h>
1.24 +
1.25 +#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
1.26 +
1.27 +LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_1[]=
1.28 + {
1.29 + 0x201a,
1.30 + 0x0192,
1.31 + 0x201e,
1.32 + 0x2026,
1.33 + 0x2020,
1.34 + 0x2021,
1.35 + 0x02c6,
1.36 + 0x2030,
1.37 + 0x0160,
1.38 + 0x2039,
1.39 + 0x0152
1.40 + };
1.41 +
1.42 +LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_2[]=
1.43 + {
1.44 + 0x2018,
1.45 + 0x2019,
1.46 + 0x201c,
1.47 + 0x201d,
1.48 + 0x2022,
1.49 + 0x2013,
1.50 + 0x2014,
1.51 + 0x02dc,
1.52 + 0x2122,
1.53 + 0x0161,
1.54 + 0x203a,
1.55 + 0x0153
1.56 + };
1.57 +
1.58 +LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_3[]=
1.59 + {
1.60 + 0x017e,
1.61 + 0x0178
1.62 + };
1.63 +
1.64 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable16OfIndexedTables16::SKeyedEntry keyedTables16OfIndexedTables16_keyedEntries_codePage1252ToUnicode_1[]=
1.65 + {
1.66 + {
1.67 + 0x82,
1.68 + 0x8c,
1.69 + keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_1
1.70 + },
1.71 + {
1.72 + 0x91,
1.73 + 0x9c,
1.74 + keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_2
1.75 + },
1.76 + {
1.77 + 0x9e,
1.78 + 0x9f,
1.79 + keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_3
1.80 + }
1.81 + };
1.82 +
1.83 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_unicodeToCodePage1252_1[]=
1.84 + {
1.85 + {
1.86 + 0x0152,
1.87 + 0x8c
1.88 + },
1.89 + {
1.90 + 0x0153,
1.91 + 0x9c
1.92 + },
1.93 + {
1.94 + 0x0160,
1.95 + 0x8a
1.96 + },
1.97 + {
1.98 + 0x0161,
1.99 + 0x9a
1.100 + },
1.101 + {
1.102 + 0x0178,
1.103 + 0x9f
1.104 + },
1.105 + {
1.106 + 0x017d,
1.107 + 0x8e
1.108 + },
1.109 + {
1.110 + 0x017e,
1.111 + 0x9e
1.112 + },
1.113 + {
1.114 + 0x0192,
1.115 + 0x83
1.116 + },
1.117 + {
1.118 + 0x02c6,
1.119 + 0x88
1.120 + },
1.121 + {
1.122 + 0x02dc,
1.123 + 0x98
1.124 + },
1.125 + {
1.126 + 0x2013,
1.127 + 0x96
1.128 + },
1.129 + {
1.130 + 0x2014,
1.131 + 0x97
1.132 + },
1.133 + {
1.134 + 0x2018,
1.135 + 0x91
1.136 + },
1.137 + {
1.138 + 0x2019,
1.139 + 0x92
1.140 + },
1.141 + {
1.142 + 0x201a,
1.143 + 0x82
1.144 + },
1.145 + {
1.146 + 0x201c,
1.147 + 0x93
1.148 + },
1.149 + {
1.150 + 0x201d,
1.151 + 0x94
1.152 + },
1.153 + {
1.154 + 0x201e,
1.155 + 0x84
1.156 + },
1.157 + {
1.158 + 0x2020,
1.159 + 0x86
1.160 + },
1.161 + {
1.162 + 0x2021,
1.163 + 0x87
1.164 + },
1.165 + {
1.166 + 0x2022,
1.167 + 0x95
1.168 + },
1.169 + {
1.170 + 0x2026,
1.171 + 0x85
1.172 + },
1.173 + {
1.174 + 0x2030,
1.175 + 0x89
1.176 + },
1.177 + {
1.178 + 0x2039,
1.179 + 0x8b
1.180 + },
1.181 + {
1.182 + 0x203a,
1.183 + 0x9b
1.184 + },
1.185 + {
1.186 + 0x20ac,
1.187 + 0x80
1.188 + },
1.189 + {
1.190 + 0x2122,
1.191 + 0x99
1.192 + }
1.193 + };
1.194 +
1.195 +LOCAL_D const SCnvConversionData::SVariableByteData::SRange codePage1252VariableByteDataRanges[]=
1.196 + {
1.197 + {
1.198 + 0x00,
1.199 + 0xff,
1.200 + 0,
1.201 + 0
1.202 + }
1.203 + };
1.204 +
1.205 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange codePage1252ToUnicodeDataRanges[]=
1.206 + {
1.207 + {
1.208 + 0x00,
1.209 + 0x7f,
1.210 + SCnvConversionData::SOneDirectionData::SRange::EDirect,
1.211 + 0,
1.212 + 0,
1.213 + {
1.214 + 0,
1.215 + 0
1.216 + }
1.217 + },
1.218 + {
1.219 + 0xa0,
1.220 + 0xff,
1.221 + SCnvConversionData::SOneDirectionData::SRange::EDirect,
1.222 + 0,
1.223 + 0,
1.224 + {
1.225 + 0,
1.226 + 0
1.227 + }
1.228 + },
1.229 + {
1.230 + 0x80,
1.231 + 0x80,
1.232 + SCnvConversionData::SOneDirectionData::SRange::EOffset,
1.233 + 0,
1.234 + 0,
1.235 + {
1.236 + STATIC_CAST(TUint, 8236),
1.237 + 0
1.238 + }
1.239 + },
1.240 + {
1.241 + 0x8e,
1.242 + 0x8e,
1.243 + SCnvConversionData::SOneDirectionData::SRange::EOffset,
1.244 + 0,
1.245 + 0,
1.246 + {
1.247 + STATIC_CAST(TUint, 239),
1.248 + 0
1.249 + }
1.250 + },
1.251 + {
1.252 + 0x82,
1.253 + 0x9f,
1.254 + SCnvConversionData::SOneDirectionData::SRange::EKeyedTable16OfIndexedTables16,
1.255 + 0,
1.256 + 0,
1.257 + {
1.258 + UData_SKeyedTable16OfIndexedTables16(keyedTables16OfIndexedTables16_keyedEntries_codePage1252ToUnicode_1)
1.259 + }
1.260 + }
1.261 + };
1.262 +
1.263 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange unicodeToCodePage1252DataRanges[]=
1.264 + {
1.265 + {
1.266 + 0x0000,
1.267 + 0x007f,
1.268 + SCnvConversionData::SOneDirectionData::SRange::EDirect,
1.269 + 1,
1.270 + 0,
1.271 + {
1.272 + 0,
1.273 + 0
1.274 + }
1.275 + },
1.276 + {
1.277 + 0x00a0,
1.278 + 0x00ff,
1.279 + SCnvConversionData::SOneDirectionData::SRange::EDirect,
1.280 + 1,
1.281 + 0,
1.282 + {
1.283 + 0,
1.284 + 0
1.285 + }
1.286 + },
1.287 + {
1.288 + 0x0152,
1.289 + 0x2122,
1.290 + SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616,
1.291 + 1,
1.292 + 0,
1.293 + {
1.294 + UData_SKeyedTable1616(keyedTable1616_unicodeToCodePage1252_1)
1.295 + }
1.296 + }
1.297 + };
1.298 +
1.299 +GLREF_D const SCnvConversionData codePage1252ConversionData=
1.300 + {
1.301 + SCnvConversionData::EUnspecified,
1.302 + {
1.303 + ARRAY_LENGTH(codePage1252VariableByteDataRanges),
1.304 + codePage1252VariableByteDataRanges
1.305 + },
1.306 + {
1.307 + ARRAY_LENGTH(codePage1252ToUnicodeDataRanges),
1.308 + codePage1252ToUnicodeDataRanges
1.309 + },
1.310 + {
1.311 + ARRAY_LENGTH(unicodeToCodePage1252DataRanges),
1.312 + unicodeToCodePage1252DataRanges
1.313 + },
1.314 + NULL,
1.315 + NULL
1.316 + };
1.317 +
1.318 +GLREF_C void IsCharacterSetCP1252(TInt& aConfidenceLevel, const TDesC8& aSample)
1.319 + {
1.320 + aConfidenceLevel = 60;
1.321 + TInt sampleLength = aSample.Length();
1.322 +
1.323 + for (TInt i=0; i<sampleLength; ++i)
1.324 + {
1.325 + // CP1252 includes ASCII as well
1.326 + // first check if the char is in the range 0x80 - 0x9f (controls codes in ISO88591)
1.327 + // If it is in that range then the likelihood that it's CP1252 is a bit higher
1.328 + if ((aSample[i] >= 0x80) && (aSample[i] <= 0x9f))
1.329 + {
1.330 + if((aSample[i]==0x81)||(aSample[i]==0x8D)||(aSample[i]==0x8f)||
1.331 + (aSample[i]==0x90)||(aSample[i]==0x9d))
1.332 + {
1.333 + // These code values are not supported by the Codepage CP1252
1.334 + aConfidenceLevel = 0;
1.335 + break;
1.336 + }
1.337 + else
1.338 + {
1.339 + // problem: UTF8 uses the values 0x80-0x9f in more than 50% of it's multibyte representation
1.340 + // so if the text was UTF8 .... the confidence here would hit the roof. Could check to make
1.341 + // sure that this is not UTF8
1.342 + aConfidenceLevel+=1;
1.343 + }
1.344 + }
1.345 + TInt increment1 = i+1;
1.346 + TInt decrement1 = i-1;
1.347 + // 0xf7 is the division symbol in CP1252.
1.348 + // 0xd7 is the division symbol in CP1252.If char on either side of the division
1.349 + // symbol is a number then the confidence that it's ISO88591 increases
1.350 + if( decrement1>= 0 && ((aSample[i]==0xf7) || (aSample[i]==0xd7)) && increment1<sampleLength)
1.351 + {
1.352 +
1.353 + if (increment1 >= sampleLength)
1.354 + break;
1.355 + if ( (aSample[decrement1] >= 0x30) && (aSample[decrement1] <= 0x39) && // char before is a number
1.356 + (aSample[increment1] >= 0x30) && (aSample[increment1] <= 0x39) ) // char after is a number
1.357 + {
1.358 + aConfidenceLevel+=5;
1.359 + }
1.360 + }
1.361 + // Can also use the currency symbol to increase confidence if the char after a
1.362 + // currency symbol is numeric
1.363 + if((aSample[i]>=0xa2) && (aSample[i] <= 0xa5) && increment1<sampleLength)
1.364 + {
1.365 + if ((aSample[increment1] >= 0x30) && (aSample[increment1] <= 0x39))
1.366 + {
1.367 + aConfidenceLevel+=5;
1.368 + }
1.369 + }
1.370 + } // for loop
1.371 + aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
1.372 + }