os/textandloc/charconvfw/charconv_fw/src/charconv/unicodelittle.cpp
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/charconvfw/charconv_fw/src/charconv/unicodelittle.cpp	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,101 @@
     1.4 +/*
     1.5 +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +* All rights reserved.
     1.7 +* This component and the accompanying materials are made available
     1.8 +* under the terms of "Eclipse Public License v1.0"
     1.9 +* which accompanies this distribution, and is available
    1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +*
    1.12 +* Initial Contributors:
    1.13 +* Nokia Corporation - initial contribution.
    1.14 +*
    1.15 +* Contributors:
    1.16 +*
    1.17 +* Description: 
    1.18 +* Little-Endian converter
    1.19 +*
    1.20 +*/
    1.21 +
    1.22 +
    1.23 +#include <e32std.h>
    1.24 +#include <convdata.h>
    1.25 +#include "unicode.h"
    1.26 +
    1.27 +#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
    1.28 +
    1.29 +
    1.30 +GLREF_D const SCnvConversionData unicodeConversionDataLittle=
    1.31 +	{
    1.32 +	SCnvConversionData::EFixedLittleEndian,
    1.33 +		{
    1.34 +		ARRAY_LENGTH(unicodeVariableByteDataRanges),
    1.35 +		unicodeVariableByteDataRanges
    1.36 +		},
    1.37 +		{
    1.38 +		ARRAY_LENGTH(unicodeTounicodeDataRanges),
    1.39 +		unicodeTounicodeDataRanges
    1.40 +		},
    1.41 +		{
    1.42 +		ARRAY_LENGTH(unicodeTounicodeDataRanges),
    1.43 +		unicodeTounicodeDataRanges
    1.44 +		},
    1.45 +	NULL,
    1.46 +	NULL
    1.47 +	};
    1.48 +
    1.49 +GLREF_C void IsCharacterSetUnicodeLittle(TInt& aConfidenceLevel, const TDesC8& aSample)
    1.50 +	{
    1.51 +	
    1.52 +	TInt sampleLength = aSample.Length();
    1.53 +	aConfidenceLevel =70;
    1.54 +	if (sampleLength < 2)
    1.55 +		return;
    1.56 +
    1.57 +	if (aSample[0]==0xff)
    1.58 +		{
    1.59 +		// The first byte is a possible ByteOrderMark
    1.60 +		// Try matching the next character 
    1.61 +		if(aSample[1]==0xfe)
    1.62 +			{
    1.63 +			// the byte order mark could be 0xFEFF or 0xFFFE depending on 
    1.64 +			// endianness of the sample text.
    1.65 +			aConfidenceLevel=100;
    1.66 +			}
    1.67 +		}
    1.68 +
    1.69 +	for (TInt i = 0; i < sampleLength-1; ++i)
    1.70 +		{
    1.71 +		if (aSample[i] == 0x0d)
    1.72 + 			{
    1.73 + 			if (aSample[i+1] == 0x0a)
    1.74 + 				{
    1.75 + 				// reduce the confidence level
    1.76 + 				aConfidenceLevel -= 25;
    1.77 + 				}
    1.78 + 			}
    1.79 +		}
    1.80 +
    1.81 +	// if not 100% confident already, check if most odd bytes zero 
    1.82 +	#define MAX_SAMPLE_LENGTH 2048
    1.83 +	if ( aConfidenceLevel < 100 )
    1.84 +		{	
    1.85 +		TInt repeat=0;
    1.86 +
    1.87 +		// only check the first MAX_SAMPLE_LENGTH if big sample
    1.88 +		TInt length =( sampleLength > MAX_SAMPLE_LENGTH ? MAX_SAMPLE_LENGTH : sampleLength);
    1.89 +
    1.90 +		// start from 1 and check the odd bytes
    1.91 +		for (TInt i = 1; i < length-1; i+=2)
    1.92 +			{
    1.93 +			if (aSample[i] == 0x0) 
    1.94 +				repeat ++;
    1.95 +			}
    1.96 +
    1.97 +		// if more than 80% odd bytes zero, then this IS little Endian
    1.98 +		if ( (repeat * 100) /  (length * 5) >= 8)
    1.99 +			aConfidenceLevel  = 100;
   1.100 +		}
   1.101 +
   1.102 +	aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
   1.103 +	}
   1.104 +