os/textandloc/charconvfw/charconv_fw/src/charconv/ascii.cpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     3 * All rights reserved.
     4 * This component and the accompanying materials are made available
     5 * under the terms of "Eclipse Public License v1.0"
     6 * which accompanies this distribution, and is available
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
     8 *
     9 * Initial Contributors:
    10 * Nokia Corporation - initial contribution.
    11 *
    12 * Contributors:
    13 *
    14 * Description: 
    15 *
    16 */
    17 
    18 
    19 #include <e32std.h>
    20 #include <convdata.h>
    21 
    22 #define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
    23 
    24 LOCAL_D const SCnvConversionData::SVariableByteData::SRange asciiVariableByteDataRanges[]=
    25 	{
    26 		{
    27 		0x00,
    28 		0xff,
    29 		0,
    30 		0
    31 		}
    32 	};
    33 
    34 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange asciiToUnicodeDataRanges[]=
    35 	{
    36 		{
    37 		0x00,
    38 		0x7f,
    39 		SCnvConversionData::SOneDirectionData::SRange::EDirect,
    40 		0,
    41 		0,
    42 			{
    43 			0,
    44 			0
    45 			}
    46 		}
    47 	};
    48 
    49 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange unicodeToAsciiDataRanges[]=
    50 	{
    51 		{
    52 		0x0000,
    53 		0x007f,
    54 		SCnvConversionData::SOneDirectionData::SRange::EDirect,
    55 		1,
    56 		0,
    57 			{
    58 			0,
    59 			0
    60 			}
    61 		}
    62 	};
    63 
    64 GLREF_D const SCnvConversionData asciiConversionData=
    65 	{
    66 	SCnvConversionData::EUnspecified,
    67 		{
    68 		ARRAY_LENGTH(asciiVariableByteDataRanges),
    69 		asciiVariableByteDataRanges
    70 		},
    71 		{
    72 		ARRAY_LENGTH(asciiToUnicodeDataRanges),
    73 		asciiToUnicodeDataRanges
    74 		},
    75 		{
    76 		ARRAY_LENGTH(unicodeToAsciiDataRanges),
    77 		unicodeToAsciiDataRanges
    78 		},
    79 	NULL,
    80 	NULL
    81 	};
    82 
    83 GLREF_C void IsCharacterSetAscii(TInt& aConfidenceLevel, const TDesC8& aSample)
    84 	{
    85 	// loop through the aSample text checking the range of the character
    86 	// If greater than 127 then it's not ASCII (gotta be harsh!)  ... 
    87 	TInt sampleLength = aSample.Length();
    88 	if (sampleLength == 0)
    89 		{
    90 		aConfidenceLevel = 91;
    91 		return;
    92 		}
    93 	aConfidenceLevel = 100;
    94 	
    95 	
    96 	_LIT8(KAsciiEsc,"\x28\x42");
    97 	_LIT8(KJisRomanEsc,"\x28\x4a");
    98 	_LIT8(KJisCEsc,"\x24\x40");
    99 	_LIT8(KJisX0208Esc,"\x24\x42");
   100 	_LIT8(KJisX0212Esc,"\x24\x28\x44");
   101 	_LIT8(KHz1Esc,"\x7e\x7e");
   102 	_LIT8(KHz2Esc,"\x7e\x7b");
   103 	_LIT8(KHz3Esc,"\x7e\x7b");
   104 	
   105 	TInt asciiResult = 0;
   106 	TInt jisRomanResult = 0; 
   107 	TInt jisCResult = 0;
   108 	TInt jisX0208Result =0;
   109 	TInt jisX0212Result =0;
   110 	TInt hz1Result=0;
   111 	TInt hz2Result=0;
   112 	TInt hz3Result=0;
   113 	
   114 	TInt escSequences = 0; 
   115 	TInt controls = 0;
   116 
   117 
   118 	for (TInt i = 0; i < sampleLength; ++i)
   119 		{
   120 		if ((aSample[i]&0x80)!=0x00)
   121 			{
   122 			aConfidenceLevel = 0;
   123 			break;
   124 			}
   125 
   126 		if (i > asciiResult)
   127 			{
   128 			asciiResult=(aSample.Right(sampleLength-i)).Find(KAsciiEsc);
   129 			if (asciiResult!=KErrNotFound) //aConfidenceLevel-=2;
   130 				escSequences += 2;
   131 			}
   132 
   133 		if (i > jisRomanResult)
   134 			{
   135 			jisRomanResult=(aSample.Right(sampleLength-i)).Find(KJisRomanEsc);
   136 			if (jisRomanResult!=KErrNotFound) //aConfidenceLevel-=2;
   137 				escSequences += 2;
   138 			}
   139 
   140 		if (i > jisCResult)
   141 			{
   142 			jisCResult=(aSample.Right(sampleLength-i)).Find(KJisCEsc);
   143 			if (jisCResult!=KErrNotFound) //aConfidenceLevel-=2;
   144 				escSequences += 2;
   145 			}
   146 
   147 		if (i > jisX0208Result)
   148 			{
   149 			jisX0208Result=(aSample.Right(sampleLength-i)).Find(KJisX0208Esc);
   150 			if (jisX0208Result!=KErrNotFound) //aConfidenceLevel-=2;
   151 				escSequences += 2;
   152 			}
   153 
   154 		if (i > jisX0212Result)
   155 			{
   156 			jisX0212Result=(aSample.Right(sampleLength-i)).Find(KJisX0212Esc);
   157 			if (jisX0212Result!=KErrNotFound) //aConfidenceLevel-=2;
   158 				escSequences += 2;
   159 			}
   160 
   161 		if (i > hz1Result)
   162 			{
   163 			hz1Result=(aSample.Right(sampleLength-i)).Find(KHz1Esc);
   164 			if (hz1Result!=KErrNotFound) //aConfidenceLevel-=2;
   165 				escSequences += 2;
   166 			}
   167 
   168 		if (i > hz2Result)
   169 			{
   170 			hz2Result=(aSample.Right(sampleLength-i)).Find(KHz2Esc);
   171 			if (hz2Result!=KErrNotFound) //aConfidenceLevel-=2;
   172 				escSequences += 2;
   173 			}
   174 
   175 		if (i > hz3Result)
   176 			{
   177 			hz3Result=(aSample.Right(sampleLength-i)).Find(KHz3Esc);
   178 			if (hz3Result!=KErrNotFound) //aConfidenceLevel-=2;
   179 				escSequences += 2;
   180 			}
   181 
   182 		if (aSample[i]==0x7f)
   183 			// 0x7f is the control code for delete ... 
   184 			{
   185 			aConfidenceLevel = 0;
   186 			break;
   187 			}
   188 
   189 		 if (aSample[i]==0x1b)
   190 			{
   191 			static const TInt smsExtensionTable[12] = 
   192 				{0x0a, 0x14, 0x1b, 0x24, 0x28, 0x29, 0x2f, 0x3c, 0x3d, 0x3e, 0x40, 0x65};
   193 			for (TInt j=0; j < 12; ++j) // change the hard coded number to the Array length 
   194 				{
   195 				TInt increment1 = i+1;
   196 				if (increment1 >= sampleLength)
   197 					break;
   198 				if (aSample[increment1] == smsExtensionTable[j])
   199 					{
   200 					aConfidenceLevel-=5;
   201 					// /x1b/x24 & /x1b/x28 are the first two characters of a few
   202 					// of  JIS & ISO2022JP escape sequence (That's why 0x24 was added in smsExtensionTable)
   203 					// So if what's up next matches any JIS & IS02022JP escape sequence..... more deduction 
   204 					// of the confidence Level 
   205 					TInt increment2 = i+2;
   206 					TInt increment3 = i+3;
   207 					if((increment2 >= sampleLength)||((increment3) >= sampleLength))
   208 						break;
   209 					if (smsExtensionTable[j]==0x24)
   210 						{
   211 						// 24 -> 40,42 (28,44)
   212 						if ((aSample[increment2]==0x40) || (aSample[increment2]==0x42) ||
   213 							((aSample[increment2]==0x28)&&(aSample[increment3]==0x44)))
   214 							{
   215 							aConfidenceLevel=0;
   216 							break;
   217 							}
   218 						}
   219 					else if (smsExtensionTable[j]==0x28)
   220 						{
   221 						// 28 -> 42, 49, 4a
   222 						if ((aSample[increment2]==0x42) || (aSample[increment2]==0x49) || (aSample[increment2]==0x4a))
   223 							{
   224 							aConfidenceLevel=0;
   225 							break;
   226 							}
   227 						}
   228 					}
   229 				}
   230 			if(aConfidenceLevel==0)
   231 				break;
   232 			}
   233 			if (controls < 100 && aSample[i] < 0x20 && 	aSample[i] != '\r' && aSample[i] != '\n' && aSample[i] != '\t')
   234 			// a few more control codes besides LF, CR, TAB 
   235 			{
   236 			controls ? controls *= 3 : controls = 3; 
   237 			}
   238 		}
   239 		
   240 	aConfidenceLevel -= controls;
   241 	aConfidenceLevel = aConfidenceLevel - ((escSequences*100)/sampleLength);
   242 	aConfidenceLevel =(aConfidenceLevel >0)? aConfidenceLevel: 0;
   243 
   244 
   245 	}
   246 
   247