os/textandloc/charconvfw/charconv_fw/src/charconv/cp1252.cpp
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/charconvfw/charconv_fw/src/charconv/cp1252.cpp	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,369 @@
     1.4 +/*
     1.5 +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +* All rights reserved.
     1.7 +* This component and the accompanying materials are made available
     1.8 +* under the terms of "Eclipse Public License v1.0"
     1.9 +* which accompanies this distribution, and is available
    1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +*
    1.12 +* Initial Contributors:
    1.13 +* Nokia Corporation - initial contribution.
    1.14 +*
    1.15 +* Contributors:
    1.16 +*
    1.17 +* Description: 
    1.18 +*
    1.19 +*/
    1.20 +
    1.21 +
    1.22 +#include <e32std.h>
    1.23 +#include <convdata.h>
    1.24 +
    1.25 +#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
    1.26 +
    1.27 +LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_1[]=
    1.28 +	{
    1.29 +	0x201a,
    1.30 +	0x0192,
    1.31 +	0x201e,
    1.32 +	0x2026,
    1.33 +	0x2020,
    1.34 +	0x2021,
    1.35 +	0x02c6,
    1.36 +	0x2030,
    1.37 +	0x0160,
    1.38 +	0x2039,
    1.39 +	0x0152
    1.40 +	};
    1.41 +
    1.42 +LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_2[]=
    1.43 +	{
    1.44 +	0x2018,
    1.45 +	0x2019,
    1.46 +	0x201c,
    1.47 +	0x201d,
    1.48 +	0x2022,
    1.49 +	0x2013,
    1.50 +	0x2014,
    1.51 +	0x02dc,
    1.52 +	0x2122,
    1.53 +	0x0161,
    1.54 +	0x203a,
    1.55 +	0x0153
    1.56 +	};
    1.57 +
    1.58 +LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_3[]=
    1.59 +	{
    1.60 +	0x017e,
    1.61 +	0x0178
    1.62 +	};
    1.63 +
    1.64 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable16OfIndexedTables16::SKeyedEntry keyedTables16OfIndexedTables16_keyedEntries_codePage1252ToUnicode_1[]=
    1.65 +	{
    1.66 +		{
    1.67 +		0x82,
    1.68 +		0x8c,
    1.69 +		keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_1
    1.70 +		},
    1.71 +		{
    1.72 +		0x91,
    1.73 +		0x9c,
    1.74 +		keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_2
    1.75 +		},
    1.76 +		{
    1.77 +		0x9e,
    1.78 +		0x9f,
    1.79 +		keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_3
    1.80 +		}
    1.81 +	};
    1.82 +
    1.83 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_unicodeToCodePage1252_1[]=
    1.84 +	{
    1.85 +		{
    1.86 +		0x0152,
    1.87 +		0x8c
    1.88 +		},
    1.89 +		{
    1.90 +		0x0153,
    1.91 +		0x9c
    1.92 +		},
    1.93 +		{
    1.94 +		0x0160,
    1.95 +		0x8a
    1.96 +		},
    1.97 +		{
    1.98 +		0x0161,
    1.99 +		0x9a
   1.100 +		},
   1.101 +		{
   1.102 +		0x0178,
   1.103 +		0x9f
   1.104 +		},
   1.105 +		{
   1.106 +		0x017d,
   1.107 +		0x8e
   1.108 +		},
   1.109 +		{
   1.110 +		0x017e,
   1.111 +		0x9e
   1.112 +		},
   1.113 +		{
   1.114 +		0x0192,
   1.115 +		0x83
   1.116 +		},
   1.117 +		{
   1.118 +		0x02c6,
   1.119 +		0x88
   1.120 +		},
   1.121 +		{
   1.122 +		0x02dc,
   1.123 +		0x98
   1.124 +		},
   1.125 +		{
   1.126 +		0x2013,
   1.127 +		0x96
   1.128 +		},
   1.129 +		{
   1.130 +		0x2014,
   1.131 +		0x97
   1.132 +		},
   1.133 +		{
   1.134 +		0x2018,
   1.135 +		0x91
   1.136 +		},
   1.137 +		{
   1.138 +		0x2019,
   1.139 +		0x92
   1.140 +		},
   1.141 +		{
   1.142 +		0x201a,
   1.143 +		0x82
   1.144 +		},
   1.145 +		{
   1.146 +		0x201c,
   1.147 +		0x93
   1.148 +		},
   1.149 +		{
   1.150 +		0x201d,
   1.151 +		0x94
   1.152 +		},
   1.153 +		{
   1.154 +		0x201e,
   1.155 +		0x84
   1.156 +		},
   1.157 +		{
   1.158 +		0x2020,
   1.159 +		0x86
   1.160 +		},
   1.161 +		{
   1.162 +		0x2021,
   1.163 +		0x87
   1.164 +		},
   1.165 +		{
   1.166 +		0x2022,
   1.167 +		0x95
   1.168 +		},
   1.169 +		{
   1.170 +		0x2026,
   1.171 +		0x85
   1.172 +		},
   1.173 +		{
   1.174 +		0x2030,
   1.175 +		0x89
   1.176 +		},
   1.177 +		{
   1.178 +		0x2039,
   1.179 +		0x8b
   1.180 +		},
   1.181 +		{
   1.182 +		0x203a,
   1.183 +		0x9b
   1.184 +		},
   1.185 +		{
   1.186 +		0x20ac,
   1.187 +		0x80
   1.188 +		},
   1.189 +		{
   1.190 +		0x2122,
   1.191 +		0x99
   1.192 +		}
   1.193 +	};
   1.194 +
   1.195 +LOCAL_D const SCnvConversionData::SVariableByteData::SRange codePage1252VariableByteDataRanges[]=
   1.196 +	{
   1.197 +		{
   1.198 +		0x00,
   1.199 +		0xff,
   1.200 +		0,
   1.201 +		0
   1.202 +		}
   1.203 +	};
   1.204 +
   1.205 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange codePage1252ToUnicodeDataRanges[]=
   1.206 +	{
   1.207 +		{
   1.208 +		0x00,
   1.209 +		0x7f,
   1.210 +		SCnvConversionData::SOneDirectionData::SRange::EDirect,
   1.211 +		0,
   1.212 +		0,
   1.213 +			{
   1.214 +			0,
   1.215 +			0
   1.216 +			}
   1.217 +		},
   1.218 +		{
   1.219 +		0xa0,
   1.220 +		0xff,
   1.221 +		SCnvConversionData::SOneDirectionData::SRange::EDirect,
   1.222 +		0,
   1.223 +		0,
   1.224 +			{
   1.225 +			0,
   1.226 +			0
   1.227 +			}
   1.228 +		},
   1.229 +		{
   1.230 +		0x80,
   1.231 +		0x80,
   1.232 +		SCnvConversionData::SOneDirectionData::SRange::EOffset,
   1.233 +		0,
   1.234 +		0,
   1.235 +			{
   1.236 +			STATIC_CAST(TUint, 8236),
   1.237 +			0
   1.238 +			}
   1.239 +		},
   1.240 +		{
   1.241 +		0x8e,
   1.242 +		0x8e,
   1.243 +		SCnvConversionData::SOneDirectionData::SRange::EOffset,
   1.244 +		0,
   1.245 +		0,
   1.246 +			{
   1.247 +			STATIC_CAST(TUint, 239),
   1.248 +			0
   1.249 +			}
   1.250 +		},
   1.251 +		{
   1.252 +		0x82,
   1.253 +		0x9f,
   1.254 +		SCnvConversionData::SOneDirectionData::SRange::EKeyedTable16OfIndexedTables16,
   1.255 +		0,
   1.256 +		0,
   1.257 +			{
   1.258 +			UData_SKeyedTable16OfIndexedTables16(keyedTables16OfIndexedTables16_keyedEntries_codePage1252ToUnicode_1)
   1.259 +			}
   1.260 +		}
   1.261 +	};
   1.262 +
   1.263 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange unicodeToCodePage1252DataRanges[]=
   1.264 +	{
   1.265 +		{
   1.266 +		0x0000,
   1.267 +		0x007f,
   1.268 +		SCnvConversionData::SOneDirectionData::SRange::EDirect,
   1.269 +		1,
   1.270 +		0,
   1.271 +			{
   1.272 +			0,
   1.273 +			0
   1.274 +			}
   1.275 +		},
   1.276 +		{
   1.277 +		0x00a0,
   1.278 +		0x00ff,
   1.279 +		SCnvConversionData::SOneDirectionData::SRange::EDirect,
   1.280 +		1,
   1.281 +		0,
   1.282 +			{
   1.283 +			0,
   1.284 +			0
   1.285 +			}
   1.286 +		},
   1.287 +		{
   1.288 +		0x0152,
   1.289 +		0x2122,
   1.290 +		SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616,
   1.291 +		1,
   1.292 +		0,
   1.293 +			{
   1.294 +			UData_SKeyedTable1616(keyedTable1616_unicodeToCodePage1252_1)
   1.295 +			}
   1.296 +		}
   1.297 +	};
   1.298 +
   1.299 +GLREF_D const SCnvConversionData codePage1252ConversionData=
   1.300 +	{
   1.301 +	SCnvConversionData::EUnspecified,
   1.302 +		{
   1.303 +		ARRAY_LENGTH(codePage1252VariableByteDataRanges),
   1.304 +		codePage1252VariableByteDataRanges
   1.305 +		},
   1.306 +		{
   1.307 +		ARRAY_LENGTH(codePage1252ToUnicodeDataRanges),
   1.308 +		codePage1252ToUnicodeDataRanges
   1.309 +		},
   1.310 +		{
   1.311 +		ARRAY_LENGTH(unicodeToCodePage1252DataRanges),
   1.312 +		unicodeToCodePage1252DataRanges
   1.313 +		},
   1.314 +	NULL,
   1.315 +	NULL
   1.316 +	};
   1.317 +
   1.318 +GLREF_C void IsCharacterSetCP1252(TInt& aConfidenceLevel, const TDesC8& aSample)
   1.319 +	{
   1.320 +	aConfidenceLevel = 60;
   1.321 +	TInt sampleLength = aSample.Length();
   1.322 +
   1.323 +	for (TInt i=0; i<sampleLength; ++i)
   1.324 +		{
   1.325 +		// CP1252 includes ASCII as well
   1.326 +		// first check if the char is in the range 0x80 - 0x9f (controls codes in ISO88591)
   1.327 +		// If it is in that range then the likelihood that it's CP1252 is a bit higher
   1.328 +		if ((aSample[i] >= 0x80) && (aSample[i] <= 0x9f))
   1.329 +			{
   1.330 +			if((aSample[i]==0x81)||(aSample[i]==0x8D)||(aSample[i]==0x8f)||
   1.331 +				(aSample[i]==0x90)||(aSample[i]==0x9d))
   1.332 +				{
   1.333 +				// These code values are not supported by the Codepage CP1252
   1.334 +				aConfidenceLevel = 0;
   1.335 +				break;
   1.336 +				}
   1.337 +			else
   1.338 +				{
   1.339 +				// problem: UTF8 uses the values 0x80-0x9f in more than 50% of it's multibyte representation
   1.340 +				// so if the text was UTF8 .... the confidence here would hit the roof. Could check to make 
   1.341 +				// sure that this is not UTF8
   1.342 +				aConfidenceLevel+=1;
   1.343 +				}
   1.344 +			}
   1.345 +		TInt increment1 = i+1;
   1.346 +		TInt decrement1 = i-1;
   1.347 +		// 0xf7 is the division symbol in CP1252.
   1.348 +		// 0xd7 is the division symbol in CP1252.If char on either side of the division
   1.349 +		// symbol is a number then the confidence that it's ISO88591 increases
   1.350 +		if( decrement1>= 0 && ((aSample[i]==0xf7) || (aSample[i]==0xd7)) && increment1<sampleLength)
   1.351 +			{
   1.352 +			
   1.353 +			if (increment1 >= sampleLength)
   1.354 +				break;
   1.355 +			if ( (aSample[decrement1] >= 0x30) && (aSample[decrement1] <= 0x39) &&  // char before is a number
   1.356 +				 (aSample[increment1] >= 0x30) && (aSample[increment1] <= 0x39) )   // char after is a number
   1.357 +				{
   1.358 +				aConfidenceLevel+=5;
   1.359 +				}
   1.360 +			}
   1.361 +		// Can also use the currency symbol to increase confidence if the char after a 
   1.362 +		// currency symbol is numeric
   1.363 +		if((aSample[i]>=0xa2) && (aSample[i] <= 0xa5) && increment1<sampleLength)
   1.364 +			{
   1.365 +			if ((aSample[increment1] >= 0x30) && (aSample[increment1] <= 0x39))
   1.366 +				{
   1.367 +				aConfidenceLevel+=5; 
   1.368 +				}
   1.369 +			}
   1.370 +		} // for loop
   1.371 +	aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
   1.372 +	}