os/textandloc/charconvfw/charconv_fw/src/charconv/ascii.cpp
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/charconvfw/charconv_fw/src/charconv/ascii.cpp	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,247 @@
     1.4 +/*
     1.5 +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +* All rights reserved.
     1.7 +* This component and the accompanying materials are made available
     1.8 +* under the terms of "Eclipse Public License v1.0"
     1.9 +* which accompanies this distribution, and is available
    1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +*
    1.12 +* Initial Contributors:
    1.13 +* Nokia Corporation - initial contribution.
    1.14 +*
    1.15 +* Contributors:
    1.16 +*
    1.17 +* Description: 
    1.18 +*
    1.19 +*/
    1.20 +
    1.21 +
    1.22 +#include <e32std.h>
    1.23 +#include <convdata.h>
    1.24 +
    1.25 +#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
    1.26 +
    1.27 +LOCAL_D const SCnvConversionData::SVariableByteData::SRange asciiVariableByteDataRanges[]=
    1.28 +	{
    1.29 +		{
    1.30 +		0x00,
    1.31 +		0xff,
    1.32 +		0,
    1.33 +		0
    1.34 +		}
    1.35 +	};
    1.36 +
    1.37 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange asciiToUnicodeDataRanges[]=
    1.38 +	{
    1.39 +		{
    1.40 +		0x00,
    1.41 +		0x7f,
    1.42 +		SCnvConversionData::SOneDirectionData::SRange::EDirect,
    1.43 +		0,
    1.44 +		0,
    1.45 +			{
    1.46 +			0,
    1.47 +			0
    1.48 +			}
    1.49 +		}
    1.50 +	};
    1.51 +
    1.52 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange unicodeToAsciiDataRanges[]=
    1.53 +	{
    1.54 +		{
    1.55 +		0x0000,
    1.56 +		0x007f,
    1.57 +		SCnvConversionData::SOneDirectionData::SRange::EDirect,
    1.58 +		1,
    1.59 +		0,
    1.60 +			{
    1.61 +			0,
    1.62 +			0
    1.63 +			}
    1.64 +		}
    1.65 +	};
    1.66 +
    1.67 +GLREF_D const SCnvConversionData asciiConversionData=
    1.68 +	{
    1.69 +	SCnvConversionData::EUnspecified,
    1.70 +		{
    1.71 +		ARRAY_LENGTH(asciiVariableByteDataRanges),
    1.72 +		asciiVariableByteDataRanges
    1.73 +		},
    1.74 +		{
    1.75 +		ARRAY_LENGTH(asciiToUnicodeDataRanges),
    1.76 +		asciiToUnicodeDataRanges
    1.77 +		},
    1.78 +		{
    1.79 +		ARRAY_LENGTH(unicodeToAsciiDataRanges),
    1.80 +		unicodeToAsciiDataRanges
    1.81 +		},
    1.82 +	NULL,
    1.83 +	NULL
    1.84 +	};
    1.85 +
    1.86 +GLREF_C void IsCharacterSetAscii(TInt& aConfidenceLevel, const TDesC8& aSample)
    1.87 +	{
    1.88 +	// loop through the aSample text checking the range of the character
    1.89 +	// If greater than 127 then it's not ASCII (gotta be harsh!)  ... 
    1.90 +	TInt sampleLength = aSample.Length();
    1.91 +	if (sampleLength == 0)
    1.92 +		{
    1.93 +		aConfidenceLevel = 91;
    1.94 +		return;
    1.95 +		}
    1.96 +	aConfidenceLevel = 100;
    1.97 +	
    1.98 +	
    1.99 +	_LIT8(KAsciiEsc,"\x28\x42");
   1.100 +	_LIT8(KJisRomanEsc,"\x28\x4a");
   1.101 +	_LIT8(KJisCEsc,"\x24\x40");
   1.102 +	_LIT8(KJisX0208Esc,"\x24\x42");
   1.103 +	_LIT8(KJisX0212Esc,"\x24\x28\x44");
   1.104 +	_LIT8(KHz1Esc,"\x7e\x7e");
   1.105 +	_LIT8(KHz2Esc,"\x7e\x7b");
   1.106 +	_LIT8(KHz3Esc,"\x7e\x7b");
   1.107 +	
   1.108 +	TInt asciiResult = 0;
   1.109 +	TInt jisRomanResult = 0; 
   1.110 +	TInt jisCResult = 0;
   1.111 +	TInt jisX0208Result =0;
   1.112 +	TInt jisX0212Result =0;
   1.113 +	TInt hz1Result=0;
   1.114 +	TInt hz2Result=0;
   1.115 +	TInt hz3Result=0;
   1.116 +	
   1.117 +	TInt escSequences = 0; 
   1.118 +	TInt controls = 0;
   1.119 +
   1.120 +
   1.121 +	for (TInt i = 0; i < sampleLength; ++i)
   1.122 +		{
   1.123 +		if ((aSample[i]&0x80)!=0x00)
   1.124 +			{
   1.125 +			aConfidenceLevel = 0;
   1.126 +			break;
   1.127 +			}
   1.128 +
   1.129 +		if (i > asciiResult)
   1.130 +			{
   1.131 +			asciiResult=(aSample.Right(sampleLength-i)).Find(KAsciiEsc);
   1.132 +			if (asciiResult!=KErrNotFound) //aConfidenceLevel-=2;
   1.133 +				escSequences += 2;
   1.134 +			}
   1.135 +
   1.136 +		if (i > jisRomanResult)
   1.137 +			{
   1.138 +			jisRomanResult=(aSample.Right(sampleLength-i)).Find(KJisRomanEsc);
   1.139 +			if (jisRomanResult!=KErrNotFound) //aConfidenceLevel-=2;
   1.140 +				escSequences += 2;
   1.141 +			}
   1.142 +
   1.143 +		if (i > jisCResult)
   1.144 +			{
   1.145 +			jisCResult=(aSample.Right(sampleLength-i)).Find(KJisCEsc);
   1.146 +			if (jisCResult!=KErrNotFound) //aConfidenceLevel-=2;
   1.147 +				escSequences += 2;
   1.148 +			}
   1.149 +
   1.150 +		if (i > jisX0208Result)
   1.151 +			{
   1.152 +			jisX0208Result=(aSample.Right(sampleLength-i)).Find(KJisX0208Esc);
   1.153 +			if (jisX0208Result!=KErrNotFound) //aConfidenceLevel-=2;
   1.154 +				escSequences += 2;
   1.155 +			}
   1.156 +
   1.157 +		if (i > jisX0212Result)
   1.158 +			{
   1.159 +			jisX0212Result=(aSample.Right(sampleLength-i)).Find(KJisX0212Esc);
   1.160 +			if (jisX0212Result!=KErrNotFound) //aConfidenceLevel-=2;
   1.161 +				escSequences += 2;
   1.162 +			}
   1.163 +
   1.164 +		if (i > hz1Result)
   1.165 +			{
   1.166 +			hz1Result=(aSample.Right(sampleLength-i)).Find(KHz1Esc);
   1.167 +			if (hz1Result!=KErrNotFound) //aConfidenceLevel-=2;
   1.168 +				escSequences += 2;
   1.169 +			}
   1.170 +
   1.171 +		if (i > hz2Result)
   1.172 +			{
   1.173 +			hz2Result=(aSample.Right(sampleLength-i)).Find(KHz2Esc);
   1.174 +			if (hz2Result!=KErrNotFound) //aConfidenceLevel-=2;
   1.175 +				escSequences += 2;
   1.176 +			}
   1.177 +
   1.178 +		if (i > hz3Result)
   1.179 +			{
   1.180 +			hz3Result=(aSample.Right(sampleLength-i)).Find(KHz3Esc);
   1.181 +			if (hz3Result!=KErrNotFound) //aConfidenceLevel-=2;
   1.182 +				escSequences += 2;
   1.183 +			}
   1.184 +
   1.185 +		if (aSample[i]==0x7f)
   1.186 +			// 0x7f is the control code for delete ... 
   1.187 +			{
   1.188 +			aConfidenceLevel = 0;
   1.189 +			break;
   1.190 +			}
   1.191 +
   1.192 +		 if (aSample[i]==0x1b)
   1.193 +			{
   1.194 +			static const TInt smsExtensionTable[12] = 
   1.195 +				{0x0a, 0x14, 0x1b, 0x24, 0x28, 0x29, 0x2f, 0x3c, 0x3d, 0x3e, 0x40, 0x65};
   1.196 +			for (TInt j=0; j < 12; ++j) // change the hard coded number to the Array length 
   1.197 +				{
   1.198 +				TInt increment1 = i+1;
   1.199 +				if (increment1 >= sampleLength)
   1.200 +					break;
   1.201 +				if (aSample[increment1] == smsExtensionTable[j])
   1.202 +					{
   1.203 +					aConfidenceLevel-=5;
   1.204 +					// /x1b/x24 & /x1b/x28 are the first two characters of a few
   1.205 +					// of  JIS & ISO2022JP escape sequence (That's why 0x24 was added in smsExtensionTable)
   1.206 +					// So if what's up next matches any JIS & IS02022JP escape sequence..... more deduction 
   1.207 +					// of the confidence Level 
   1.208 +					TInt increment2 = i+2;
   1.209 +					TInt increment3 = i+3;
   1.210 +					if((increment2 >= sampleLength)||((increment3) >= sampleLength))
   1.211 +						break;
   1.212 +					if (smsExtensionTable[j]==0x24)
   1.213 +						{
   1.214 +						// 24 -> 40,42 (28,44)
   1.215 +						if ((aSample[increment2]==0x40) || (aSample[increment2]==0x42) ||
   1.216 +							((aSample[increment2]==0x28)&&(aSample[increment3]==0x44)))
   1.217 +							{
   1.218 +							aConfidenceLevel=0;
   1.219 +							break;
   1.220 +							}
   1.221 +						}
   1.222 +					else if (smsExtensionTable[j]==0x28)
   1.223 +						{
   1.224 +						// 28 -> 42, 49, 4a
   1.225 +						if ((aSample[increment2]==0x42) || (aSample[increment2]==0x49) || (aSample[increment2]==0x4a))
   1.226 +							{
   1.227 +							aConfidenceLevel=0;
   1.228 +							break;
   1.229 +							}
   1.230 +						}
   1.231 +					}
   1.232 +				}
   1.233 +			if(aConfidenceLevel==0)
   1.234 +				break;
   1.235 +			}
   1.236 +			if (controls < 100 && aSample[i] < 0x20 && 	aSample[i] != '\r' && aSample[i] != '\n' && aSample[i] != '\t')
   1.237 +			// a few more control codes besides LF, CR, TAB 
   1.238 +			{
   1.239 +			controls ? controls *= 3 : controls = 3; 
   1.240 +			}
   1.241 +		}
   1.242 +		
   1.243 +	aConfidenceLevel -= controls;
   1.244 +	aConfidenceLevel = aConfidenceLevel - ((escSequences*100)/sampleLength);
   1.245 +	aConfidenceLevel =(aConfidenceLevel >0)? aConfidenceLevel: 0;
   1.246 +
   1.247 +
   1.248 +	}
   1.249 +
   1.250 +