os/textandloc/charconvfw/charconv_fw/src/charconv/iso88591.cpp
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/charconvfw/charconv_fw/src/charconv/iso88591.cpp	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,162 @@
     1.4 +/*
     1.5 +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +* All rights reserved.
     1.7 +* This component and the accompanying materials are made available
     1.8 +* under the terms of "Eclipse Public License v1.0"
     1.9 +* which accompanies this distribution, and is available
    1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +*
    1.12 +* Initial Contributors:
    1.13 +* Nokia Corporation - initial contribution.
    1.14 +*
    1.15 +* Contributors:
    1.16 +*
    1.17 +* Description: 
    1.18 +*
    1.19 +*/
    1.20 +
    1.21 +
    1.22 +#include <e32std.h>
    1.23 +#include <convdata.h>
    1.24 +
    1.25 +#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
    1.26 +
    1.27 +LOCAL_D const SCnvConversionData::SVariableByteData::SRange iso88591VariableByteDataRanges[]=
    1.28 +	{
    1.29 +		{
    1.30 +		0x00,
    1.31 +		0xff,
    1.32 +		0,
    1.33 +		0
    1.34 +		}
    1.35 +	};
    1.36 +
    1.37 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange iso88591ToUnicodeDataRanges[]=
    1.38 +	{
    1.39 +		{
    1.40 +		0x00,
    1.41 +		0x7f,
    1.42 +		SCnvConversionData::SOneDirectionData::SRange::EDirect,
    1.43 +		0,
    1.44 +		0,
    1.45 +			{
    1.46 +			0,
    1.47 +			0
    1.48 +			}
    1.49 +		},
    1.50 +		{
    1.51 +		0x81,
    1.52 +		0xff,
    1.53 +		SCnvConversionData::SOneDirectionData::SRange::EDirect,
    1.54 +		0,
    1.55 +		0,
    1.56 +			{
    1.57 +			0,
    1.58 +			0
    1.59 +			}
    1.60 +		},
    1.61 +		{
    1.62 +		0x80,
    1.63 +		0x80,
    1.64 +		SCnvConversionData::SOneDirectionData::SRange::EOffset,
    1.65 +		0,
    1.66 +		0,
    1.67 +			{
    1.68 +			STATIC_CAST(TUint, 8236)
    1.69 +			}
    1.70 +		}
    1.71 +	};
    1.72 +
    1.73 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange unicodeToIso88591DataRanges[]=
    1.74 +	{
    1.75 +		{
    1.76 +		0x0000,
    1.77 +		0x007f,
    1.78 +		SCnvConversionData::SOneDirectionData::SRange::EDirect,
    1.79 +		1,
    1.80 +		0,
    1.81 +			{
    1.82 +			0,
    1.83 +			0
    1.84 +			}
    1.85 +		},
    1.86 +		{
    1.87 +		0x0081,
    1.88 +		0x00ff,
    1.89 +		SCnvConversionData::SOneDirectionData::SRange::EDirect,
    1.90 +		1,
    1.91 +		0,
    1.92 +			{
    1.93 +			0,
    1.94 +			0
    1.95 +			}
    1.96 +		},
    1.97 +		{
    1.98 +		0x20ac,
    1.99 +		0x20ac,
   1.100 +		SCnvConversionData::SOneDirectionData::SRange::EOffset,
   1.101 +		1,
   1.102 +		0,
   1.103 +			{
   1.104 +			STATIC_CAST(TUint, -8236)
   1.105 +			}
   1.106 +		}
   1.107 +	};
   1.108 +
   1.109 +GLREF_D const SCnvConversionData iso88591ConversionData=
   1.110 +	{
   1.111 +	SCnvConversionData::EUnspecified,
   1.112 +		{
   1.113 +		ARRAY_LENGTH(iso88591VariableByteDataRanges),
   1.114 +		iso88591VariableByteDataRanges
   1.115 +		},
   1.116 +		{
   1.117 +		ARRAY_LENGTH(iso88591ToUnicodeDataRanges),
   1.118 +		iso88591ToUnicodeDataRanges
   1.119 +		},
   1.120 +		{
   1.121 +		ARRAY_LENGTH(unicodeToIso88591DataRanges),
   1.122 +		unicodeToIso88591DataRanges
   1.123 +		},
   1.124 +	NULL,
   1.125 +	NULL
   1.126 +	};
   1.127 +
   1.128 +GLREF_C void IsCharacterSetISO88591(TInt& aConfidenceLevel, const TDesC8& aSample)
   1.129 +	{
   1.130 +	TInt sampleLength = aSample.Length();
   1.131 +	aConfidenceLevel = 75;
   1.132 +
   1.133 +	for (TInt i=0; i<sampleLength; ++i)
   1.134 +		{
   1.135 +		// ISO88591 includes ASCII as well
   1.136 +		// first check if the char is in the range 0x80 - 0x9f (controls codes)
   1.137 +		// If it is in that range then it's not ISO88591
   1.138 +		if ((aSample[i] >= 0x80) && (aSample[i] <= 0x9f))
   1.139 +			{
   1.140 +			aConfidenceLevel=0;
   1.141 +			break;
   1.142 +			}
   1.143 +		// 0xf7 is the division symbol in ISO88591.
   1.144 +		// 0xd7 is the division symbol in ISO88591.If char on either side of the division
   1.145 +		// symbol is a number then the confidence that it's ISO88591 increases
   1.146 +		if( i>0 && ((aSample[i]==0xf7) || (aSample[i]==0xd7)) && ((i+1)<sampleLength) )
   1.147 +			{
   1.148 +			if ( (aSample[i-1] >= 0x30) && (aSample[i-1] <= 0x39) &&  // char before is a number
   1.149 +				 (aSample[i+1] >= 0x30) && (aSample[i+1] <= 0x39) )   // char after is a number
   1.150 +				{
   1.151 +				aConfidenceLevel+=5;
   1.152 +				}
   1.153 +			}
   1.154 +		// Can also use the currency symbol to increase confidence if the char after a 
   1.155 +		// currency symbol is numeric
   1.156 +		if((aSample[i]>=0xa2) && (aSample[i] <= 0xa5) && ((i+1)<sampleLength))
   1.157 +			{
   1.158 +			if ((aSample[i+1] >= 0x30) && (aSample[i+1] <= 0x39))
   1.159 +				{
   1.160 +				aConfidenceLevel+=5; 
   1.161 +				}
   1.162 +			}
   1.163 +		} // for loop
   1.164 +	aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
   1.165 +	}