sl@0
|
1 |
/*
|
sl@0
|
2 |
* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
|
sl@0
|
3 |
* All rights reserved.
|
sl@0
|
4 |
* This component and the accompanying materials are made available
|
sl@0
|
5 |
* under the terms of "Eclipse Public License v1.0"
|
sl@0
|
6 |
* which accompanies this distribution, and is available
|
sl@0
|
7 |
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
|
sl@0
|
8 |
*
|
sl@0
|
9 |
* Initial Contributors:
|
sl@0
|
10 |
* Nokia Corporation - initial contribution.
|
sl@0
|
11 |
*
|
sl@0
|
12 |
* Contributors:
|
sl@0
|
13 |
*
|
sl@0
|
14 |
* Description:
|
sl@0
|
15 |
* Little-Endian converter
|
sl@0
|
16 |
*
|
sl@0
|
17 |
*/
|
sl@0
|
18 |
|
sl@0
|
19 |
|
sl@0
|
20 |
#include <e32std.h>
|
sl@0
|
21 |
#include <convdata.h>
|
sl@0
|
22 |
#include "unicode.h"
|
sl@0
|
23 |
|
sl@0
|
24 |
#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
|
sl@0
|
25 |
|
sl@0
|
26 |
|
sl@0
|
27 |
GLREF_D const SCnvConversionData unicodeConversionDataLittle=
|
sl@0
|
28 |
{
|
sl@0
|
29 |
SCnvConversionData::EFixedLittleEndian,
|
sl@0
|
30 |
{
|
sl@0
|
31 |
ARRAY_LENGTH(unicodeVariableByteDataRanges),
|
sl@0
|
32 |
unicodeVariableByteDataRanges
|
sl@0
|
33 |
},
|
sl@0
|
34 |
{
|
sl@0
|
35 |
ARRAY_LENGTH(unicodeTounicodeDataRanges),
|
sl@0
|
36 |
unicodeTounicodeDataRanges
|
sl@0
|
37 |
},
|
sl@0
|
38 |
{
|
sl@0
|
39 |
ARRAY_LENGTH(unicodeTounicodeDataRanges),
|
sl@0
|
40 |
unicodeTounicodeDataRanges
|
sl@0
|
41 |
},
|
sl@0
|
42 |
NULL,
|
sl@0
|
43 |
NULL
|
sl@0
|
44 |
};
|
sl@0
|
45 |
|
sl@0
|
46 |
GLREF_C void IsCharacterSetUnicodeLittle(TInt& aConfidenceLevel, const TDesC8& aSample)
|
sl@0
|
47 |
{
|
sl@0
|
48 |
|
sl@0
|
49 |
TInt sampleLength = aSample.Length();
|
sl@0
|
50 |
aConfidenceLevel =70;
|
sl@0
|
51 |
if (sampleLength < 2)
|
sl@0
|
52 |
return;
|
sl@0
|
53 |
|
sl@0
|
54 |
if (aSample[0]==0xff)
|
sl@0
|
55 |
{
|
sl@0
|
56 |
// The first byte is a possible ByteOrderMark
|
sl@0
|
57 |
// Try matching the next character
|
sl@0
|
58 |
if(aSample[1]==0xfe)
|
sl@0
|
59 |
{
|
sl@0
|
60 |
// the byte order mark could be 0xFEFF or 0xFFFE depending on
|
sl@0
|
61 |
// endianness of the sample text.
|
sl@0
|
62 |
aConfidenceLevel=100;
|
sl@0
|
63 |
}
|
sl@0
|
64 |
}
|
sl@0
|
65 |
|
sl@0
|
66 |
for (TInt i = 0; i < sampleLength-1; ++i)
|
sl@0
|
67 |
{
|
sl@0
|
68 |
if (aSample[i] == 0x0d)
|
sl@0
|
69 |
{
|
sl@0
|
70 |
if (aSample[i+1] == 0x0a)
|
sl@0
|
71 |
{
|
sl@0
|
72 |
// reduce the confidence level
|
sl@0
|
73 |
aConfidenceLevel -= 25;
|
sl@0
|
74 |
}
|
sl@0
|
75 |
}
|
sl@0
|
76 |
}
|
sl@0
|
77 |
|
sl@0
|
78 |
// if not 100% confident already, check if most odd bytes zero
|
sl@0
|
79 |
#define MAX_SAMPLE_LENGTH 2048
|
sl@0
|
80 |
if ( aConfidenceLevel < 100 )
|
sl@0
|
81 |
{
|
sl@0
|
82 |
TInt repeat=0;
|
sl@0
|
83 |
|
sl@0
|
84 |
// only check the first MAX_SAMPLE_LENGTH if big sample
|
sl@0
|
85 |
TInt length =( sampleLength > MAX_SAMPLE_LENGTH ? MAX_SAMPLE_LENGTH : sampleLength);
|
sl@0
|
86 |
|
sl@0
|
87 |
// start from 1 and check the odd bytes
|
sl@0
|
88 |
for (TInt i = 1; i < length-1; i+=2)
|
sl@0
|
89 |
{
|
sl@0
|
90 |
if (aSample[i] == 0x0)
|
sl@0
|
91 |
repeat ++;
|
sl@0
|
92 |
}
|
sl@0
|
93 |
|
sl@0
|
94 |
// if more than 80% odd bytes zero, then this IS little Endian
|
sl@0
|
95 |
if ( (repeat * 100) / (length * 5) >= 8)
|
sl@0
|
96 |
aConfidenceLevel = 100;
|
sl@0
|
97 |
}
|
sl@0
|
98 |
|
sl@0
|
99 |
aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
|
sl@0
|
100 |
}
|
sl@0
|
101 |
|