1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/charconvfw/charconv_fw/src/charconv/unicodelittle.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,101 @@
1.4 +/*
1.5 +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description:
1.18 +* Little-Endian converter
1.19 +*
1.20 +*/
1.21 +
1.22 +
1.23 +#include <e32std.h>
1.24 +#include <convdata.h>
1.25 +#include "unicode.h"
1.26 +
1.27 +#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
1.28 +
1.29 +
1.30 +GLREF_D const SCnvConversionData unicodeConversionDataLittle=
1.31 + {
1.32 + SCnvConversionData::EFixedLittleEndian,
1.33 + {
1.34 + ARRAY_LENGTH(unicodeVariableByteDataRanges),
1.35 + unicodeVariableByteDataRanges
1.36 + },
1.37 + {
1.38 + ARRAY_LENGTH(unicodeTounicodeDataRanges),
1.39 + unicodeTounicodeDataRanges
1.40 + },
1.41 + {
1.42 + ARRAY_LENGTH(unicodeTounicodeDataRanges),
1.43 + unicodeTounicodeDataRanges
1.44 + },
1.45 + NULL,
1.46 + NULL
1.47 + };
1.48 +
1.49 +GLREF_C void IsCharacterSetUnicodeLittle(TInt& aConfidenceLevel, const TDesC8& aSample)
1.50 + {
1.51 +
1.52 + TInt sampleLength = aSample.Length();
1.53 + aConfidenceLevel =70;
1.54 + if (sampleLength < 2)
1.55 + return;
1.56 +
1.57 + if (aSample[0]==0xff)
1.58 + {
1.59 + // The first byte is a possible ByteOrderMark
1.60 + // Try matching the next character
1.61 + if(aSample[1]==0xfe)
1.62 + {
1.63 + // the byte order mark could be 0xFEFF or 0xFFFE depending on
1.64 + // endianness of the sample text.
1.65 + aConfidenceLevel=100;
1.66 + }
1.67 + }
1.68 +
1.69 + for (TInt i = 0; i < sampleLength-1; ++i)
1.70 + {
1.71 + if (aSample[i] == 0x0d)
1.72 + {
1.73 + if (aSample[i+1] == 0x0a)
1.74 + {
1.75 + // reduce the confidence level
1.76 + aConfidenceLevel -= 25;
1.77 + }
1.78 + }
1.79 + }
1.80 +
1.81 + // if not 100% confident already, check if most odd bytes zero
1.82 + #define MAX_SAMPLE_LENGTH 2048
1.83 + if ( aConfidenceLevel < 100 )
1.84 + {
1.85 + TInt repeat=0;
1.86 +
1.87 + // only check the first MAX_SAMPLE_LENGTH if big sample
1.88 + TInt length =( sampleLength > MAX_SAMPLE_LENGTH ? MAX_SAMPLE_LENGTH : sampleLength);
1.89 +
1.90 + // start from 1 and check the odd bytes
1.91 + for (TInt i = 1; i < length-1; i+=2)
1.92 + {
1.93 + if (aSample[i] == 0x0)
1.94 + repeat ++;
1.95 + }
1.96 +
1.97 + // if more than 80% odd bytes zero, then this IS little Endian
1.98 + if ( (repeat * 100) / (length * 5) >= 8)
1.99 + aConfidenceLevel = 100;
1.100 + }
1.101 +
1.102 + aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
1.103 + }
1.104 +