Update contrib.
2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
4 * This component and the accompanying materials are made available
5 * under the terms of "Eclipse Public License v1.0"
6 * which accompanies this distribution, and is available
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
9 * Initial Contributors:
10 * Nokia Corporation - initial contribution.
22 #define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
24 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_sms7BitToUnicode_1[]=
55 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_sms7BitToUnicode_2[]=
68 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_sms7BitToUnicode_3[]=
78 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_sms7BitToUnicode_4[]=
87 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_sms7BitToUnicode_5[]=
99 0x000c, // meant to be a "Page Break" - a FORM FEED character is the nearest thing in Unicode to this
118 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_sms7BitToUnicode_6[]=
159 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_sms7BitToUnicode_7[]=
171 0x20ac // Euro symbol
174 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable16OfIndexedTables16::SKeyedEntry keyedTables16OfIndexedTables16_keyedEntries_sms7BitToUnicode_1[]=
179 keyedTables16OfIndexedTables16_indexedEntries_sms7BitToUnicode_1
184 keyedTables16OfIndexedTables16_indexedEntries_sms7BitToUnicode_2
189 keyedTables16OfIndexedTables16_indexedEntries_sms7BitToUnicode_3
194 keyedTables16OfIndexedTables16_indexedEntries_sms7BitToUnicode_4
198 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable16OfIndexedTables16::SKeyedEntry keyedTables16OfIndexedTables16_keyedEntries_sms7BitToUnicode_2[]=
203 keyedTables16OfIndexedTables16_indexedEntries_sms7BitToUnicode_5
208 keyedTables16OfIndexedTables16_indexedEntries_sms7BitToUnicode_6
213 keyedTables16OfIndexedTables16_indexedEntries_sms7BitToUnicode_7
218 keyedTables16OfIndexedTables16_indexedEntries_sms7BitToUnicode_4
223 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_sms7BitToUnicode_1[]=
268 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_1[]=
289 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_2[]=
299 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_3[]=
309 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_4[]=
330 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_5[]=
340 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_6[]=
350 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_7[]=
370 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_8[]=
387 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_9[]=
407 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_10[]=
426 LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_11[]=
436 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable16OfIndexedTables16::SKeyedEntry keyedTables16OfIndexedTables16_keyedEntries_unicodeToSms7Bit_1[]=
441 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_1
446 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_2
451 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_3
456 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_4
461 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_5
466 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_6
471 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_7
476 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_8
481 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_9
486 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_10
491 keyedTables16OfIndexedTables16_indexedEntries_unicodeToSms7Bit_11
495 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_unicodeToSms7Bit_1[]=
611 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_unicodeToSms7Bit_2[]=
655 LOCAL_D const SCnvConversionData::SVariableByteData::SRange sms7BitVariableByteDataRanges[]=
677 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange sms7BitToUnicodeDataRanges[]=
682 SCnvConversionData::SOneDirectionData::SRange::EDirect,
693 SCnvConversionData::SOneDirectionData::SRange::EDirect,
704 SCnvConversionData::SOneDirectionData::SRange::EDirect,
715 SCnvConversionData::SOneDirectionData::SRange::EOffset,
719 STATIC_CAST(TUint, 97),
726 SCnvConversionData::SOneDirectionData::SRange::EKeyedTable16OfIndexedTables16,
730 UData_SKeyedTable16OfIndexedTables16(keyedTables16OfIndexedTables16_keyedEntries_sms7BitToUnicode_1)
737 SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616,
741 UData_KeyedTable(keyedTable1616_sms7BitToUnicode_1)
748 SCnvConversionData::SOneDirectionData::SRange::EOffset,
759 SCnvConversionData::SOneDirectionData::SRange::EOffset,
770 SCnvConversionData::SOneDirectionData::SRange::EKeyedTable16OfIndexedTables16,
774 UData_SKeyedTable16OfIndexedTables16(keyedTables16OfIndexedTables16_keyedEntries_sms7BitToUnicode_2)
780 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange unicodeToSms7BitDataRanges[]=
785 SCnvConversionData::SOneDirectionData::SRange::EDirect,
796 SCnvConversionData::SOneDirectionData::SRange::EDirect,
807 SCnvConversionData::SOneDirectionData::SRange::EDirect,
818 SCnvConversionData::SOneDirectionData::SRange::EKeyedTable16OfIndexedTables16,
822 UData_SKeyedTable16OfIndexedTables16(keyedTables16OfIndexedTables16_keyedEntries_unicodeToSms7Bit_1)
828 SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616,
832 UData_SKeyedTable1616(keyedTable1616_unicodeToSms7Bit_1)
838 SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616,
842 UData_SKeyedTable1616(keyedTable1616_unicodeToSms7Bit_2)
847 GLREF_D const SCnvConversionData sms7BitConversionData=
849 SCnvConversionData::EFixedBigEndian,
851 ARRAY_LENGTH(sms7BitVariableByteDataRanges),
852 sms7BitVariableByteDataRanges
855 ARRAY_LENGTH(sms7BitToUnicodeDataRanges),
856 sms7BitToUnicodeDataRanges
859 ARRAY_LENGTH(unicodeToSms7BitDataRanges),
860 unicodeToSms7BitDataRanges
866 GLREF_C void IsCharacterSetSMS7Bit(TInt& aConfidenceLevel, const TDesC8& aSample)
868 _LIT8(KHz1Esc,"\x7e\x7e");
869 _LIT8(KHz2Esc,"\x7e\x7b");
870 _LIT8(KHz3Esc,"\x7e\x7d");
871 _LIT8(KLit8EscapeSequenceForJisRoman, "\x1b\x28\x4a");
872 _LIT8(KLit8EscapeSequenceForAscii, "\x1b\x28\x42");
873 // _LIT8(KLit8EscapeSequenceForHalfWidthKatakana, "\x1b\x28\x49");
874 _LIT8(KLit8EscapeSequenceForJisC6226_1978, "\x1b\x24\x40");
875 _LIT8(KLit8EscapeSequenceForJisX0208_1983, "\x1b\x24\x42");
876 _LIT8(KLit8EscapeSequenceForJisX0208_199x, "\x1b\x26\x40\x1b\x24\x42");
877 _LIT8(KLit8EscapeSequenceForJisX0212_1990, "\x1b\x24\x28\x44");
882 TInt JisRomanResult = 0;
883 TInt AsciiResult = 0;
885 TInt JisX0208Result = 0;
886 TInt JisX0212Result = 0;
887 TInt JisX0208XResult = 0;
891 TInt sampleLength = aSample.Length();
892 aConfidenceLevel =70;
893 if (sampleLength == 0)
896 TInt HzEscSequences = 0;
897 TInt JISEscSequences = 0;
899 for (TInt i = 0; i < sampleLength; ++i)
904 hz1Result=(aSample.Right(sampleLength-i)).Find(KHz1Esc);
905 if (hz1Result!=KErrNotFound) //aConfidenceLevel-=2;
911 hz2Result=(aSample.Right(sampleLength-i)).Find(KHz2Esc);
912 if (hz2Result!=KErrNotFound) //aConfidenceLevel-=2;
918 hz3Result=(aSample.Right(sampleLength-i)).Find(KHz3Esc);
919 if (hz3Result!=KErrNotFound) //aConfidenceLevel-=2;
923 if (i > JisRomanResult)
925 JisRomanResult = (aSample.Right(sampleLength-i)).Find(KLit8EscapeSequenceForJisRoman);
926 if (JisRomanResult!=KErrNotFound)
927 JISEscSequences +=10;
932 AsciiResult = (aSample.Right(sampleLength-i)).Find(KLit8EscapeSequenceForAscii);
933 if (AsciiResult!=KErrNotFound)
934 JISEscSequences +=10;
939 JisCResult = (aSample.Right(sampleLength-i)).Find(KLit8EscapeSequenceForJisC6226_1978);
940 if (JisCResult!=KErrNotFound)
944 if (i > JisX0208Result)
946 JisX0208Result = (aSample.Right(sampleLength-i)).Find(KLit8EscapeSequenceForJisX0208_1983);
947 if (JisX0208Result!=KErrNotFound)
951 if (i > JisX0212Result)
953 JisX0212Result = (aSample.Right(sampleLength-i)).Find(KLit8EscapeSequenceForJisX0212_1990);
954 if (JisX0212Result!=KErrNotFound)
958 if (i > JisX0208XResult)
960 JisX0208XResult = (aSample.Right(sampleLength-i)).Find(KLit8EscapeSequenceForJisX0208_199x);
961 if (JisX0208XResult!=KErrNotFound)
965 if ((aSample[i]&0x80)!=0x00)
967 aConfidenceLevel = 0;
972 TInt increment1 = i+1;
973 if (aSample[i] == 0x1b)
975 static const TInt defaultExtensionTable[10] =
976 {0x0a, 0x14, 0x1b, 0x29, 0x2f, 0x3c, 0x3d, 0x3e, 0x40, 0x65};
977 if (increment1 >= sampleLength)
979 for (TInt j =0; j < 10; j++)
981 if (aSample[increment1] == defaultExtensionTable[j])
988 else if (currency<100 && (aSample[i]==0x01 || aSample[i]==0x02 || aSample[i]==0x03))
990 if (increment1 >= sampleLength)
992 if ((aSample[increment1] >= 0x30) && (aSample[increment1] <= 0x39))
994 currency ? currency *= 3 : currency = 3;
997 else if (email<100 && aSample[i]==0x00) // @
1001 while(valid && pos>=0) // check before @
1003 TInt8 act = aSample[pos];
1004 if(act==' ' || act=='\n' || act=='\t')
1006 if( !( ( act >= 'a' && act <= 'z' ) ||
1007 ( act >= 'A' && act <= 'Z' ) ||
1008 ( act >= '0' && act <= '9' ) ||
1011 act == /*'_'*/0x11 ) )
1018 TInt space = aSample.Mid(i+1).Find((const unsigned char*)" ",1);
1019 TInt lf = aSample.Mid(i+1).Find((const unsigned char*)"\n",1);
1020 TInt end = aSample.Length() - (i+1);
1021 if(space != KErrNotFound)
1025 else if(lf != KErrNotFound)
1029 TPtrC8 string = aSample.Mid(i+1,end);
1030 TInt commat = string.Find((const unsigned char*)"\x0",1);
1031 TInt period = string.Find((const unsigned char*)".",1);
1032 if(commat!=KErrNotFound || period==KErrNotFound)
1038 email ? email *= 5 : email = 5;
1042 aConfidenceLevel += 5; // not valid address but still valid '@'
1045 //next test the occurance of values 00-2f (except 0a & 0d) next to another character
1046 // These are the ASCII control codes and therefore low chances of these values occuring
1047 // next to the ASCII value
1048 else if (aSample[i] < 0x20 && aSample[i] != '\r' && aSample[i] != '\n')
1050 if (increment1 >= sampleLength)
1052 if (((aSample[increment1] >= 0x20) && (aSample[increment1] <= 0x5a)) ||
1053 ((aSample[increment1] >= 0x61) && (aSample[increment1] <= 0x7a)))
1055 aConfidenceLevel+=2;
1058 // other possible tests to include... Greek matching...
1061 aConfidenceLevel += currency + email;
1062 aConfidenceLevel = aConfidenceLevel - ((HzEscSequences*100)/sampleLength);
1063 aConfidenceLevel = aConfidenceLevel - ((JISEscSequences*100)/sampleLength);
1064 aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;