os/textandloc/charconvfw/charconv_fw/src/charconv/unicodelittle.cpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     3 * All rights reserved.
     4 * This component and the accompanying materials are made available
     5 * under the terms of "Eclipse Public License v1.0"
     6 * which accompanies this distribution, and is available
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
     8 *
     9 * Initial Contributors:
    10 * Nokia Corporation - initial contribution.
    11 *
    12 * Contributors:
    13 *
    14 * Description: 
    15 * Little-Endian converter
    16 *
    17 */
    18 
    19 
    20 #include <e32std.h>
    21 #include <convdata.h>
    22 #include "unicode.h"
    23 
    24 #define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
    25 
    26 
    27 GLREF_D const SCnvConversionData unicodeConversionDataLittle=
    28 	{
    29 	SCnvConversionData::EFixedLittleEndian,
    30 		{
    31 		ARRAY_LENGTH(unicodeVariableByteDataRanges),
    32 		unicodeVariableByteDataRanges
    33 		},
    34 		{
    35 		ARRAY_LENGTH(unicodeTounicodeDataRanges),
    36 		unicodeTounicodeDataRanges
    37 		},
    38 		{
    39 		ARRAY_LENGTH(unicodeTounicodeDataRanges),
    40 		unicodeTounicodeDataRanges
    41 		},
    42 	NULL,
    43 	NULL
    44 	};
    45 
    46 GLREF_C void IsCharacterSetUnicodeLittle(TInt& aConfidenceLevel, const TDesC8& aSample)
    47 	{
    48 	
    49 	TInt sampleLength = aSample.Length();
    50 	aConfidenceLevel =70;
    51 	if (sampleLength < 2)
    52 		return;
    53 
    54 	if (aSample[0]==0xff)
    55 		{
    56 		// The first byte is a possible ByteOrderMark
    57 		// Try matching the next character 
    58 		if(aSample[1]==0xfe)
    59 			{
    60 			// the byte order mark could be 0xFEFF or 0xFFFE depending on 
    61 			// endianness of the sample text.
    62 			aConfidenceLevel=100;
    63 			}
    64 		}
    65 
    66 	for (TInt i = 0; i < sampleLength-1; ++i)
    67 		{
    68 		if (aSample[i] == 0x0d)
    69  			{
    70  			if (aSample[i+1] == 0x0a)
    71  				{
    72  				// reduce the confidence level
    73  				aConfidenceLevel -= 25;
    74  				}
    75  			}
    76 		}
    77 
    78 	// if not 100% confident already, check if most odd bytes zero 
    79 	#define MAX_SAMPLE_LENGTH 2048
    80 	if ( aConfidenceLevel < 100 )
    81 		{	
    82 		TInt repeat=0;
    83 
    84 		// only check the first MAX_SAMPLE_LENGTH if big sample
    85 		TInt length =( sampleLength > MAX_SAMPLE_LENGTH ? MAX_SAMPLE_LENGTH : sampleLength);
    86 
    87 		// start from 1 and check the odd bytes
    88 		for (TInt i = 1; i < length-1; i+=2)
    89 			{
    90 			if (aSample[i] == 0x0) 
    91 				repeat ++;
    92 			}
    93 
    94 		// if more than 80% odd bytes zero, then this IS little Endian
    95 		if ( (repeat * 100) /  (length * 5) >= 8)
    96 			aConfidenceLevel  = 100;
    97 		}
    98 
    99 	aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
   100 	}
   101