os/textandloc/charconvfw/charconv_fw/src/charconv/iso88591.cpp
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
/*
sl@0
     2
* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
* All rights reserved.
sl@0
     4
* This component and the accompanying materials are made available
sl@0
     5
* under the terms of "Eclipse Public License v1.0"
sl@0
     6
* which accompanies this distribution, and is available
sl@0
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
*
sl@0
     9
* Initial Contributors:
sl@0
    10
* Nokia Corporation - initial contribution.
sl@0
    11
*
sl@0
    12
* Contributors:
sl@0
    13
*
sl@0
    14
* Description: 
sl@0
    15
*
sl@0
    16
*/
sl@0
    17
sl@0
    18
sl@0
    19
#include <e32std.h>
sl@0
    20
#include <convdata.h>
sl@0
    21
sl@0
    22
#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
sl@0
    23
sl@0
    24
LOCAL_D const SCnvConversionData::SVariableByteData::SRange iso88591VariableByteDataRanges[]=
sl@0
    25
	{
sl@0
    26
		{
sl@0
    27
		0x00,
sl@0
    28
		0xff,
sl@0
    29
		0,
sl@0
    30
		0
sl@0
    31
		}
sl@0
    32
	};
sl@0
    33
sl@0
    34
LOCAL_D const SCnvConversionData::SOneDirectionData::SRange iso88591ToUnicodeDataRanges[]=
sl@0
    35
	{
sl@0
    36
		{
sl@0
    37
		0x00,
sl@0
    38
		0x7f,
sl@0
    39
		SCnvConversionData::SOneDirectionData::SRange::EDirect,
sl@0
    40
		0,
sl@0
    41
		0,
sl@0
    42
			{
sl@0
    43
			0,
sl@0
    44
			0
sl@0
    45
			}
sl@0
    46
		},
sl@0
    47
		{
sl@0
    48
		0x81,
sl@0
    49
		0xff,
sl@0
    50
		SCnvConversionData::SOneDirectionData::SRange::EDirect,
sl@0
    51
		0,
sl@0
    52
		0,
sl@0
    53
			{
sl@0
    54
			0,
sl@0
    55
			0
sl@0
    56
			}
sl@0
    57
		},
sl@0
    58
		{
sl@0
    59
		0x80,
sl@0
    60
		0x80,
sl@0
    61
		SCnvConversionData::SOneDirectionData::SRange::EOffset,
sl@0
    62
		0,
sl@0
    63
		0,
sl@0
    64
			{
sl@0
    65
			STATIC_CAST(TUint, 8236)
sl@0
    66
			}
sl@0
    67
		}
sl@0
    68
	};
sl@0
    69
sl@0
    70
LOCAL_D const SCnvConversionData::SOneDirectionData::SRange unicodeToIso88591DataRanges[]=
sl@0
    71
	{
sl@0
    72
		{
sl@0
    73
		0x0000,
sl@0
    74
		0x007f,
sl@0
    75
		SCnvConversionData::SOneDirectionData::SRange::EDirect,
sl@0
    76
		1,
sl@0
    77
		0,
sl@0
    78
			{
sl@0
    79
			0,
sl@0
    80
			0
sl@0
    81
			}
sl@0
    82
		},
sl@0
    83
		{
sl@0
    84
		0x0081,
sl@0
    85
		0x00ff,
sl@0
    86
		SCnvConversionData::SOneDirectionData::SRange::EDirect,
sl@0
    87
		1,
sl@0
    88
		0,
sl@0
    89
			{
sl@0
    90
			0,
sl@0
    91
			0
sl@0
    92
			}
sl@0
    93
		},
sl@0
    94
		{
sl@0
    95
		0x20ac,
sl@0
    96
		0x20ac,
sl@0
    97
		SCnvConversionData::SOneDirectionData::SRange::EOffset,
sl@0
    98
		1,
sl@0
    99
		0,
sl@0
   100
			{
sl@0
   101
			STATIC_CAST(TUint, -8236)
sl@0
   102
			}
sl@0
   103
		}
sl@0
   104
	};
sl@0
   105
sl@0
   106
GLREF_D const SCnvConversionData iso88591ConversionData=
sl@0
   107
	{
sl@0
   108
	SCnvConversionData::EUnspecified,
sl@0
   109
		{
sl@0
   110
		ARRAY_LENGTH(iso88591VariableByteDataRanges),
sl@0
   111
		iso88591VariableByteDataRanges
sl@0
   112
		},
sl@0
   113
		{
sl@0
   114
		ARRAY_LENGTH(iso88591ToUnicodeDataRanges),
sl@0
   115
		iso88591ToUnicodeDataRanges
sl@0
   116
		},
sl@0
   117
		{
sl@0
   118
		ARRAY_LENGTH(unicodeToIso88591DataRanges),
sl@0
   119
		unicodeToIso88591DataRanges
sl@0
   120
		},
sl@0
   121
	NULL,
sl@0
   122
	NULL
sl@0
   123
	};
sl@0
   124
sl@0
   125
GLREF_C void IsCharacterSetISO88591(TInt& aConfidenceLevel, const TDesC8& aSample)
sl@0
   126
	{
sl@0
   127
	TInt sampleLength = aSample.Length();
sl@0
   128
	aConfidenceLevel = 75;
sl@0
   129
sl@0
   130
	for (TInt i=0; i<sampleLength; ++i)
sl@0
   131
		{
sl@0
   132
		// ISO88591 includes ASCII as well
sl@0
   133
		// first check if the char is in the range 0x80 - 0x9f (controls codes)
sl@0
   134
		// If it is in that range then it's not ISO88591
sl@0
   135
		if ((aSample[i] >= 0x80) && (aSample[i] <= 0x9f))
sl@0
   136
			{
sl@0
   137
			aConfidenceLevel=0;
sl@0
   138
			break;
sl@0
   139
			}
sl@0
   140
		// 0xf7 is the division symbol in ISO88591.
sl@0
   141
		// 0xd7 is the division symbol in ISO88591.If char on either side of the division
sl@0
   142
		// symbol is a number then the confidence that it's ISO88591 increases
sl@0
   143
		if( i>0 && ((aSample[i]==0xf7) || (aSample[i]==0xd7)) && ((i+1)<sampleLength) )
sl@0
   144
			{
sl@0
   145
			if ( (aSample[i-1] >= 0x30) && (aSample[i-1] <= 0x39) &&  // char before is a number
sl@0
   146
				 (aSample[i+1] >= 0x30) && (aSample[i+1] <= 0x39) )   // char after is a number
sl@0
   147
				{
sl@0
   148
				aConfidenceLevel+=5;
sl@0
   149
				}
sl@0
   150
			}
sl@0
   151
		// Can also use the currency symbol to increase confidence if the char after a 
sl@0
   152
		// currency symbol is numeric
sl@0
   153
		if((aSample[i]>=0xa2) && (aSample[i] <= 0xa5) && ((i+1)<sampleLength))
sl@0
   154
			{
sl@0
   155
			if ((aSample[i+1] >= 0x30) && (aSample[i+1] <= 0x39))
sl@0
   156
				{
sl@0
   157
				aConfidenceLevel+=5; 
sl@0
   158
				}
sl@0
   159
			}
sl@0
   160
		} // for loop
sl@0
   161
	aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
sl@0
   162
	}