os/textandloc/charconvfw/charconv_fw/src/charconv/iso88591.cpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     3 * All rights reserved.
     4 * This component and the accompanying materials are made available
     5 * under the terms of "Eclipse Public License v1.0"
     6 * which accompanies this distribution, and is available
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
     8 *
     9 * Initial Contributors:
    10 * Nokia Corporation - initial contribution.
    11 *
    12 * Contributors:
    13 *
    14 * Description: 
    15 *
    16 */
    17 
    18 
    19 #include <e32std.h>
    20 #include <convdata.h>
    21 
    22 #define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
    23 
    24 LOCAL_D const SCnvConversionData::SVariableByteData::SRange iso88591VariableByteDataRanges[]=
    25 	{
    26 		{
    27 		0x00,
    28 		0xff,
    29 		0,
    30 		0
    31 		}
    32 	};
    33 
    34 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange iso88591ToUnicodeDataRanges[]=
    35 	{
    36 		{
    37 		0x00,
    38 		0x7f,
    39 		SCnvConversionData::SOneDirectionData::SRange::EDirect,
    40 		0,
    41 		0,
    42 			{
    43 			0,
    44 			0
    45 			}
    46 		},
    47 		{
    48 		0x81,
    49 		0xff,
    50 		SCnvConversionData::SOneDirectionData::SRange::EDirect,
    51 		0,
    52 		0,
    53 			{
    54 			0,
    55 			0
    56 			}
    57 		},
    58 		{
    59 		0x80,
    60 		0x80,
    61 		SCnvConversionData::SOneDirectionData::SRange::EOffset,
    62 		0,
    63 		0,
    64 			{
    65 			STATIC_CAST(TUint, 8236)
    66 			}
    67 		}
    68 	};
    69 
    70 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange unicodeToIso88591DataRanges[]=
    71 	{
    72 		{
    73 		0x0000,
    74 		0x007f,
    75 		SCnvConversionData::SOneDirectionData::SRange::EDirect,
    76 		1,
    77 		0,
    78 			{
    79 			0,
    80 			0
    81 			}
    82 		},
    83 		{
    84 		0x0081,
    85 		0x00ff,
    86 		SCnvConversionData::SOneDirectionData::SRange::EDirect,
    87 		1,
    88 		0,
    89 			{
    90 			0,
    91 			0
    92 			}
    93 		},
    94 		{
    95 		0x20ac,
    96 		0x20ac,
    97 		SCnvConversionData::SOneDirectionData::SRange::EOffset,
    98 		1,
    99 		0,
   100 			{
   101 			STATIC_CAST(TUint, -8236)
   102 			}
   103 		}
   104 	};
   105 
   106 GLREF_D const SCnvConversionData iso88591ConversionData=
   107 	{
   108 	SCnvConversionData::EUnspecified,
   109 		{
   110 		ARRAY_LENGTH(iso88591VariableByteDataRanges),
   111 		iso88591VariableByteDataRanges
   112 		},
   113 		{
   114 		ARRAY_LENGTH(iso88591ToUnicodeDataRanges),
   115 		iso88591ToUnicodeDataRanges
   116 		},
   117 		{
   118 		ARRAY_LENGTH(unicodeToIso88591DataRanges),
   119 		unicodeToIso88591DataRanges
   120 		},
   121 	NULL,
   122 	NULL
   123 	};
   124 
   125 GLREF_C void IsCharacterSetISO88591(TInt& aConfidenceLevel, const TDesC8& aSample)
   126 	{
   127 	TInt sampleLength = aSample.Length();
   128 	aConfidenceLevel = 75;
   129 
   130 	for (TInt i=0; i<sampleLength; ++i)
   131 		{
   132 		// ISO88591 includes ASCII as well
   133 		// first check if the char is in the range 0x80 - 0x9f (controls codes)
   134 		// If it is in that range then it's not ISO88591
   135 		if ((aSample[i] >= 0x80) && (aSample[i] <= 0x9f))
   136 			{
   137 			aConfidenceLevel=0;
   138 			break;
   139 			}
   140 		// 0xf7 is the division symbol in ISO88591.
   141 		// 0xd7 is the division symbol in ISO88591.If char on either side of the division
   142 		// symbol is a number then the confidence that it's ISO88591 increases
   143 		if( i>0 && ((aSample[i]==0xf7) || (aSample[i]==0xd7)) && ((i+1)<sampleLength) )
   144 			{
   145 			if ( (aSample[i-1] >= 0x30) && (aSample[i-1] <= 0x39) &&  // char before is a number
   146 				 (aSample[i+1] >= 0x30) && (aSample[i+1] <= 0x39) )   // char after is a number
   147 				{
   148 				aConfidenceLevel+=5;
   149 				}
   150 			}
   151 		// Can also use the currency symbol to increase confidence if the char after a 
   152 		// currency symbol is numeric
   153 		if((aSample[i]>=0xa2) && (aSample[i] <= 0xa5) && ((i+1)<sampleLength))
   154 			{
   155 			if ((aSample[i+1] >= 0x30) && (aSample[i+1] <= 0x39))
   156 				{
   157 				aConfidenceLevel+=5; 
   158 				}
   159 			}
   160 		} // for loop
   161 	aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
   162 	}