os/textandloc/charconvfw/charconv_fw/src/charconv/ascii.cpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
* All rights reserved.
sl@0
     4
* This component and the accompanying materials are made available
sl@0
     5
* under the terms of "Eclipse Public License v1.0"
sl@0
     6
* which accompanies this distribution, and is available
sl@0
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
*
sl@0
     9
* Initial Contributors:
sl@0
    10
* Nokia Corporation - initial contribution.
sl@0
    11
*
sl@0
    12
* Contributors:
sl@0
    13
*
sl@0
    14
* Description: 
sl@0
    15
*
sl@0
    16
*/
sl@0
    17
sl@0
    18
sl@0
    19
#include <e32std.h>
sl@0
    20
#include <convdata.h>
sl@0
    21
sl@0
    22
#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
sl@0
    23
sl@0
    24
LOCAL_D const SCnvConversionData::SVariableByteData::SRange asciiVariableByteDataRanges[]=
sl@0
    25
	{
sl@0
    26
		{
sl@0
    27
		0x00,
sl@0
    28
		0xff,
sl@0
    29
		0,
sl@0
    30
		0
sl@0
    31
		}
sl@0
    32
	};
sl@0
    33
sl@0
    34
LOCAL_D const SCnvConversionData::SOneDirectionData::SRange asciiToUnicodeDataRanges[]=
sl@0
    35
	{
sl@0
    36
		{
sl@0
    37
		0x00,
sl@0
    38
		0x7f,
sl@0
    39
		SCnvConversionData::SOneDirectionData::SRange::EDirect,
sl@0
    40
		0,
sl@0
    41
		0,
sl@0
    42
			{
sl@0
    43
			0,
sl@0
    44
			0
sl@0
    45
			}
sl@0
    46
		}
sl@0
    47
	};
sl@0
    48
sl@0
    49
LOCAL_D const SCnvConversionData::SOneDirectionData::SRange unicodeToAsciiDataRanges[]=
sl@0
    50
	{
sl@0
    51
		{
sl@0
    52
		0x0000,
sl@0
    53
		0x007f,
sl@0
    54
		SCnvConversionData::SOneDirectionData::SRange::EDirect,
sl@0
    55
		1,
sl@0
    56
		0,
sl@0
    57
			{
sl@0
    58
			0,
sl@0
    59
			0
sl@0
    60
			}
sl@0
    61
		}
sl@0
    62
	};
sl@0
    63
sl@0
    64
GLREF_D const SCnvConversionData asciiConversionData=
sl@0
    65
	{
sl@0
    66
	SCnvConversionData::EUnspecified,
sl@0
    67
		{
sl@0
    68
		ARRAY_LENGTH(asciiVariableByteDataRanges),
sl@0
    69
		asciiVariableByteDataRanges
sl@0
    70
		},
sl@0
    71
		{
sl@0
    72
		ARRAY_LENGTH(asciiToUnicodeDataRanges),
sl@0
    73
		asciiToUnicodeDataRanges
sl@0
    74
		},
sl@0
    75
		{
sl@0
    76
		ARRAY_LENGTH(unicodeToAsciiDataRanges),
sl@0
    77
		unicodeToAsciiDataRanges
sl@0
    78
		},
sl@0
    79
	NULL,
sl@0
    80
	NULL
sl@0
    81
	};
sl@0
    82
sl@0
    83
GLREF_C void IsCharacterSetAscii(TInt& aConfidenceLevel, const TDesC8& aSample)
sl@0
    84
	{
sl@0
    85
	// loop through the aSample text checking the range of the character
sl@0
    86
	// If greater than 127 then it's not ASCII (gotta be harsh!)  ... 
sl@0
    87
	TInt sampleLength = aSample.Length();
sl@0
    88
	if (sampleLength == 0)
sl@0
    89
		{
sl@0
    90
		aConfidenceLevel = 91;
sl@0
    91
		return;
sl@0
    92
		}
sl@0
    93
	aConfidenceLevel = 100;
sl@0
    94
	
sl@0
    95
	
sl@0
    96
	_LIT8(KAsciiEsc,"\x28\x42");
sl@0
    97
	_LIT8(KJisRomanEsc,"\x28\x4a");
sl@0
    98
	_LIT8(KJisCEsc,"\x24\x40");
sl@0
    99
	_LIT8(KJisX0208Esc,"\x24\x42");
sl@0
   100
	_LIT8(KJisX0212Esc,"\x24\x28\x44");
sl@0
   101
	_LIT8(KHz1Esc,"\x7e\x7e");
sl@0
   102
	_LIT8(KHz2Esc,"\x7e\x7b");
sl@0
   103
	_LIT8(KHz3Esc,"\x7e\x7b");
sl@0
   104
	
sl@0
   105
	TInt asciiResult = 0;
sl@0
   106
	TInt jisRomanResult = 0; 
sl@0
   107
	TInt jisCResult = 0;
sl@0
   108
	TInt jisX0208Result =0;
sl@0
   109
	TInt jisX0212Result =0;
sl@0
   110
	TInt hz1Result=0;
sl@0
   111
	TInt hz2Result=0;
sl@0
   112
	TInt hz3Result=0;
sl@0
   113
	
sl@0
   114
	TInt escSequences = 0; 
sl@0
   115
	TInt controls = 0;
sl@0
   116
sl@0
   117
sl@0
   118
	for (TInt i = 0; i < sampleLength; ++i)
sl@0
   119
		{
sl@0
   120
		if ((aSample[i]&0x80)!=0x00)
sl@0
   121
			{
sl@0
   122
			aConfidenceLevel = 0;
sl@0
   123
			break;
sl@0
   124
			}
sl@0
   125
sl@0
   126
		if (i > asciiResult)
sl@0
   127
			{
sl@0
   128
			asciiResult=(aSample.Right(sampleLength-i)).Find(KAsciiEsc);
sl@0
   129
			if (asciiResult!=KErrNotFound) //aConfidenceLevel-=2;
sl@0
   130
				escSequences += 2;
sl@0
   131
			}
sl@0
   132
sl@0
   133
		if (i > jisRomanResult)
sl@0
   134
			{
sl@0
   135
			jisRomanResult=(aSample.Right(sampleLength-i)).Find(KJisRomanEsc);
sl@0
   136
			if (jisRomanResult!=KErrNotFound) //aConfidenceLevel-=2;
sl@0
   137
				escSequences += 2;
sl@0
   138
			}
sl@0
   139
sl@0
   140
		if (i > jisCResult)
sl@0
   141
			{
sl@0
   142
			jisCResult=(aSample.Right(sampleLength-i)).Find(KJisCEsc);
sl@0
   143
			if (jisCResult!=KErrNotFound) //aConfidenceLevel-=2;
sl@0
   144
				escSequences += 2;
sl@0
   145
			}
sl@0
   146
sl@0
   147
		if (i > jisX0208Result)
sl@0
   148
			{
sl@0
   149
			jisX0208Result=(aSample.Right(sampleLength-i)).Find(KJisX0208Esc);
sl@0
   150
			if (jisX0208Result!=KErrNotFound) //aConfidenceLevel-=2;
sl@0
   151
				escSequences += 2;
sl@0
   152
			}
sl@0
   153
sl@0
   154
		if (i > jisX0212Result)
sl@0
   155
			{
sl@0
   156
			jisX0212Result=(aSample.Right(sampleLength-i)).Find(KJisX0212Esc);
sl@0
   157
			if (jisX0212Result!=KErrNotFound) //aConfidenceLevel-=2;
sl@0
   158
				escSequences += 2;
sl@0
   159
			}
sl@0
   160
sl@0
   161
		if (i > hz1Result)
sl@0
   162
			{
sl@0
   163
			hz1Result=(aSample.Right(sampleLength-i)).Find(KHz1Esc);
sl@0
   164
			if (hz1Result!=KErrNotFound) //aConfidenceLevel-=2;
sl@0
   165
				escSequences += 2;
sl@0
   166
			}
sl@0
   167
sl@0
   168
		if (i > hz2Result)
sl@0
   169
			{
sl@0
   170
			hz2Result=(aSample.Right(sampleLength-i)).Find(KHz2Esc);
sl@0
   171
			if (hz2Result!=KErrNotFound) //aConfidenceLevel-=2;
sl@0
   172
				escSequences += 2;
sl@0
   173
			}
sl@0
   174
sl@0
   175
		if (i > hz3Result)
sl@0
   176
			{
sl@0
   177
			hz3Result=(aSample.Right(sampleLength-i)).Find(KHz3Esc);
sl@0
   178
			if (hz3Result!=KErrNotFound) //aConfidenceLevel-=2;
sl@0
   179
				escSequences += 2;
sl@0
   180
			}
sl@0
   181
sl@0
   182
		if (aSample[i]==0x7f)
sl@0
   183
			// 0x7f is the control code for delete ... 
sl@0
   184
			{
sl@0
   185
			aConfidenceLevel = 0;
sl@0
   186
			break;
sl@0
   187
			}
sl@0
   188
sl@0
   189
		 if (aSample[i]==0x1b)
sl@0
   190
			{
sl@0
   191
			static const TInt smsExtensionTable[12] = 
sl@0
   192
				{0x0a, 0x14, 0x1b, 0x24, 0x28, 0x29, 0x2f, 0x3c, 0x3d, 0x3e, 0x40, 0x65};
sl@0
   193
			for (TInt j=0; j < 12; ++j) // change the hard coded number to the Array length 
sl@0
   194
				{
sl@0
   195
				TInt increment1 = i+1;
sl@0
   196
				if (increment1 >= sampleLength)
sl@0
   197
					break;
sl@0
   198
				if (aSample[increment1] == smsExtensionTable[j])
sl@0
   199
					{
sl@0
   200
					aConfidenceLevel-=5;
sl@0
   201
					// /x1b/x24 & /x1b/x28 are the first two characters of a few
sl@0
   202
					// of  JIS & ISO2022JP escape sequence (That's why 0x24 was added in smsExtensionTable)
sl@0
   203
					// So if what's up next matches any JIS & IS02022JP escape sequence..... more deduction 
sl@0
   204
					// of the confidence Level 
sl@0
   205
					TInt increment2 = i+2;
sl@0
   206
					TInt increment3 = i+3;
sl@0
   207
					if((increment2 >= sampleLength)||((increment3) >= sampleLength))
sl@0
   208
						break;
sl@0
   209
					if (smsExtensionTable[j]==0x24)
sl@0
   210
						{
sl@0
   211
						// 24 -> 40,42 (28,44)
sl@0
   212
						if ((aSample[increment2]==0x40) || (aSample[increment2]==0x42) ||
sl@0
   213
							((aSample[increment2]==0x28)&&(aSample[increment3]==0x44)))
sl@0
   214
							{
sl@0
   215
							aConfidenceLevel=0;
sl@0
   216
							break;
sl@0
   217
							}
sl@0
   218
						}
sl@0
   219
					else if (smsExtensionTable[j]==0x28)
sl@0
   220
						{
sl@0
   221
						// 28 -> 42, 49, 4a
sl@0
   222
						if ((aSample[increment2]==0x42) || (aSample[increment2]==0x49) || (aSample[increment2]==0x4a))
sl@0
   223
							{
sl@0
   224
							aConfidenceLevel=0;
sl@0
   225
							break;
sl@0
   226
							}
sl@0
   227
						}
sl@0
   228
					}
sl@0
   229
				}
sl@0
   230
			if(aConfidenceLevel==0)
sl@0
   231
				break;
sl@0
   232
			}
sl@0
   233
			if (controls < 100 && aSample[i] < 0x20 && 	aSample[i] != '\r' && aSample[i] != '\n' && aSample[i] != '\t')
sl@0
   234
			// a few more control codes besides LF, CR, TAB 
sl@0
   235
			{
sl@0
   236
			controls ? controls *= 3 : controls = 3; 
sl@0
   237
			}
sl@0
   238
		}
sl@0
   239
		
sl@0
   240
	aConfidenceLevel -= controls;
sl@0
   241
	aConfidenceLevel = aConfidenceLevel - ((escSequences*100)/sampleLength);
sl@0
   242
	aConfidenceLevel =(aConfidenceLevel >0)? aConfidenceLevel: 0;
sl@0
   243
sl@0
   244
sl@0
   245
	}
sl@0
   246
sl@0
   247