1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/charconvfw/charconv_fw/src/charconv/ascii.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,247 @@
1.4 +/*
1.5 +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description:
1.18 +*
1.19 +*/
1.20 +
1.21 +
1.22 +#include <e32std.h>
1.23 +#include <convdata.h>
1.24 +
1.25 +#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
1.26 +
1.27 +LOCAL_D const SCnvConversionData::SVariableByteData::SRange asciiVariableByteDataRanges[]=
1.28 + {
1.29 + {
1.30 + 0x00,
1.31 + 0xff,
1.32 + 0,
1.33 + 0
1.34 + }
1.35 + };
1.36 +
1.37 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange asciiToUnicodeDataRanges[]=
1.38 + {
1.39 + {
1.40 + 0x00,
1.41 + 0x7f,
1.42 + SCnvConversionData::SOneDirectionData::SRange::EDirect,
1.43 + 0,
1.44 + 0,
1.45 + {
1.46 + 0,
1.47 + 0
1.48 + }
1.49 + }
1.50 + };
1.51 +
1.52 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange unicodeToAsciiDataRanges[]=
1.53 + {
1.54 + {
1.55 + 0x0000,
1.56 + 0x007f,
1.57 + SCnvConversionData::SOneDirectionData::SRange::EDirect,
1.58 + 1,
1.59 + 0,
1.60 + {
1.61 + 0,
1.62 + 0
1.63 + }
1.64 + }
1.65 + };
1.66 +
1.67 +GLREF_D const SCnvConversionData asciiConversionData=
1.68 + {
1.69 + SCnvConversionData::EUnspecified,
1.70 + {
1.71 + ARRAY_LENGTH(asciiVariableByteDataRanges),
1.72 + asciiVariableByteDataRanges
1.73 + },
1.74 + {
1.75 + ARRAY_LENGTH(asciiToUnicodeDataRanges),
1.76 + asciiToUnicodeDataRanges
1.77 + },
1.78 + {
1.79 + ARRAY_LENGTH(unicodeToAsciiDataRanges),
1.80 + unicodeToAsciiDataRanges
1.81 + },
1.82 + NULL,
1.83 + NULL
1.84 + };
1.85 +
1.86 +GLREF_C void IsCharacterSetAscii(TInt& aConfidenceLevel, const TDesC8& aSample)
1.87 + {
1.88 + // loop through the aSample text checking the range of the character
1.89 + // If greater than 127 then it's not ASCII (gotta be harsh!) ...
1.90 + TInt sampleLength = aSample.Length();
1.91 + if (sampleLength == 0)
1.92 + {
1.93 + aConfidenceLevel = 91;
1.94 + return;
1.95 + }
1.96 + aConfidenceLevel = 100;
1.97 +
1.98 +
1.99 + _LIT8(KAsciiEsc,"\x28\x42");
1.100 + _LIT8(KJisRomanEsc,"\x28\x4a");
1.101 + _LIT8(KJisCEsc,"\x24\x40");
1.102 + _LIT8(KJisX0208Esc,"\x24\x42");
1.103 + _LIT8(KJisX0212Esc,"\x24\x28\x44");
1.104 + _LIT8(KHz1Esc,"\x7e\x7e");
1.105 + _LIT8(KHz2Esc,"\x7e\x7b");
1.106 + _LIT8(KHz3Esc,"\x7e\x7b");
1.107 +
1.108 + TInt asciiResult = 0;
1.109 + TInt jisRomanResult = 0;
1.110 + TInt jisCResult = 0;
1.111 + TInt jisX0208Result =0;
1.112 + TInt jisX0212Result =0;
1.113 + TInt hz1Result=0;
1.114 + TInt hz2Result=0;
1.115 + TInt hz3Result=0;
1.116 +
1.117 + TInt escSequences = 0;
1.118 + TInt controls = 0;
1.119 +
1.120 +
1.121 + for (TInt i = 0; i < sampleLength; ++i)
1.122 + {
1.123 + if ((aSample[i]&0x80)!=0x00)
1.124 + {
1.125 + aConfidenceLevel = 0;
1.126 + break;
1.127 + }
1.128 +
1.129 + if (i > asciiResult)
1.130 + {
1.131 + asciiResult=(aSample.Right(sampleLength-i)).Find(KAsciiEsc);
1.132 + if (asciiResult!=KErrNotFound) //aConfidenceLevel-=2;
1.133 + escSequences += 2;
1.134 + }
1.135 +
1.136 + if (i > jisRomanResult)
1.137 + {
1.138 + jisRomanResult=(aSample.Right(sampleLength-i)).Find(KJisRomanEsc);
1.139 + if (jisRomanResult!=KErrNotFound) //aConfidenceLevel-=2;
1.140 + escSequences += 2;
1.141 + }
1.142 +
1.143 + if (i > jisCResult)
1.144 + {
1.145 + jisCResult=(aSample.Right(sampleLength-i)).Find(KJisCEsc);
1.146 + if (jisCResult!=KErrNotFound) //aConfidenceLevel-=2;
1.147 + escSequences += 2;
1.148 + }
1.149 +
1.150 + if (i > jisX0208Result)
1.151 + {
1.152 + jisX0208Result=(aSample.Right(sampleLength-i)).Find(KJisX0208Esc);
1.153 + if (jisX0208Result!=KErrNotFound) //aConfidenceLevel-=2;
1.154 + escSequences += 2;
1.155 + }
1.156 +
1.157 + if (i > jisX0212Result)
1.158 + {
1.159 + jisX0212Result=(aSample.Right(sampleLength-i)).Find(KJisX0212Esc);
1.160 + if (jisX0212Result!=KErrNotFound) //aConfidenceLevel-=2;
1.161 + escSequences += 2;
1.162 + }
1.163 +
1.164 + if (i > hz1Result)
1.165 + {
1.166 + hz1Result=(aSample.Right(sampleLength-i)).Find(KHz1Esc);
1.167 + if (hz1Result!=KErrNotFound) //aConfidenceLevel-=2;
1.168 + escSequences += 2;
1.169 + }
1.170 +
1.171 + if (i > hz2Result)
1.172 + {
1.173 + hz2Result=(aSample.Right(sampleLength-i)).Find(KHz2Esc);
1.174 + if (hz2Result!=KErrNotFound) //aConfidenceLevel-=2;
1.175 + escSequences += 2;
1.176 + }
1.177 +
1.178 + if (i > hz3Result)
1.179 + {
1.180 + hz3Result=(aSample.Right(sampleLength-i)).Find(KHz3Esc);
1.181 + if (hz3Result!=KErrNotFound) //aConfidenceLevel-=2;
1.182 + escSequences += 2;
1.183 + }
1.184 +
1.185 + if (aSample[i]==0x7f)
1.186 + // 0x7f is the control code for delete ...
1.187 + {
1.188 + aConfidenceLevel = 0;
1.189 + break;
1.190 + }
1.191 +
1.192 + if (aSample[i]==0x1b)
1.193 + {
1.194 + static const TInt smsExtensionTable[12] =
1.195 + {0x0a, 0x14, 0x1b, 0x24, 0x28, 0x29, 0x2f, 0x3c, 0x3d, 0x3e, 0x40, 0x65};
1.196 + for (TInt j=0; j < 12; ++j) // change the hard coded number to the Array length
1.197 + {
1.198 + TInt increment1 = i+1;
1.199 + if (increment1 >= sampleLength)
1.200 + break;
1.201 + if (aSample[increment1] == smsExtensionTable[j])
1.202 + {
1.203 + aConfidenceLevel-=5;
1.204 + // /x1b/x24 & /x1b/x28 are the first two characters of a few
1.205 + // of JIS & ISO2022JP escape sequence (That's why 0x24 was added in smsExtensionTable)
1.206 + // So if what's up next matches any JIS & IS02022JP escape sequence..... more deduction
1.207 + // of the confidence Level
1.208 + TInt increment2 = i+2;
1.209 + TInt increment3 = i+3;
1.210 + if((increment2 >= sampleLength)||((increment3) >= sampleLength))
1.211 + break;
1.212 + if (smsExtensionTable[j]==0x24)
1.213 + {
1.214 + // 24 -> 40,42 (28,44)
1.215 + if ((aSample[increment2]==0x40) || (aSample[increment2]==0x42) ||
1.216 + ((aSample[increment2]==0x28)&&(aSample[increment3]==0x44)))
1.217 + {
1.218 + aConfidenceLevel=0;
1.219 + break;
1.220 + }
1.221 + }
1.222 + else if (smsExtensionTable[j]==0x28)
1.223 + {
1.224 + // 28 -> 42, 49, 4a
1.225 + if ((aSample[increment2]==0x42) || (aSample[increment2]==0x49) || (aSample[increment2]==0x4a))
1.226 + {
1.227 + aConfidenceLevel=0;
1.228 + break;
1.229 + }
1.230 + }
1.231 + }
1.232 + }
1.233 + if(aConfidenceLevel==0)
1.234 + break;
1.235 + }
1.236 + if (controls < 100 && aSample[i] < 0x20 && aSample[i] != '\r' && aSample[i] != '\n' && aSample[i] != '\t')
1.237 + // a few more control codes besides LF, CR, TAB
1.238 + {
1.239 + controls ? controls *= 3 : controls = 3;
1.240 + }
1.241 + }
1.242 +
1.243 + aConfidenceLevel -= controls;
1.244 + aConfidenceLevel = aConfidenceLevel - ((escSequences*100)/sampleLength);
1.245 + aConfidenceLevel =(aConfidenceLevel >0)? aConfidenceLevel: 0;
1.246 +
1.247 +
1.248 + }
1.249 +
1.250 +