sl@0: /* sl@0: * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: * All rights reserved. sl@0: * This component and the accompanying materials are made available sl@0: * under the terms of "Eclipse Public License v1.0" sl@0: * which accompanies this distribution, and is available sl@0: * at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: * sl@0: * Initial Contributors: sl@0: * Nokia Corporation - initial contribution. sl@0: * sl@0: * Contributors: sl@0: * sl@0: * Description: sl@0: * sl@0: */ sl@0: sl@0: sl@0: #include sl@0: #include sl@0: #include "big5.h" sl@0: #include sl@0: #include sl@0: sl@0: class CBIG5ConverterImpl : public CCharacterSetConverterPluginInterface sl@0: { sl@0: sl@0: public: sl@0: virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters(); sl@0: sl@0: virtual TInt ConvertFromUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, sl@0: TDes8& aForeign, sl@0: const TDesC16& aUnicode, sl@0: CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters); sl@0: sl@0: virtual TInt ConvertToUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aForeign, sl@0: TInt& aState, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter); sl@0: sl@0: virtual TBool IsInThisCharacterSetL( sl@0: TBool& aSetToTrue, sl@0: TInt& aConfidenceLevel, sl@0: const TDesC8& aSample); sl@0: sl@0: static CBIG5ConverterImpl* NewL(); sl@0: virtual ~CBIG5ConverterImpl(); sl@0: sl@0: private: sl@0: CBIG5ConverterImpl(); sl@0: sl@0: }; sl@0: sl@0: sl@0: const TDesC8& CBIG5ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters() sl@0: { sl@0: return CnvBig5::ReplacementForUnconvertibleUnicodeCharacters(); sl@0: } sl@0: sl@0: TInt CBIG5ConverterImpl::ConvertFromUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, sl@0: TDes8& aForeign, sl@0: const TDesC16& aUnicode, sl@0: CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) sl@0: { sl@0: return CCnvCharacterSetConverter::DoConvertFromUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters); sl@0: } sl@0: sl@0: TInt CBIG5ConverterImpl::ConvertToUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aForeign, sl@0: TInt& /*aState*/, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) sl@0: { sl@0: return CCnvCharacterSetConverter::DoConvertToUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter); sl@0: } sl@0: sl@0: TBool CBIG5ConverterImpl::IsInThisCharacterSetL( sl@0: TBool& aSetToTrue, sl@0: TInt& aConfidenceLevel, sl@0: const TDesC8& aSample) sl@0: { sl@0: aSetToTrue=ETrue; sl@0: TInt sampleLength = aSample.Length(); sl@0: aConfidenceLevel = 0; sl@0: //WBB the following is for distiguish between big5 and GBK sl@0: TInt totalWeight=0; //sum of the weights of 20 most frequent chars sl@0: TInt sumOfGoodChar=0; //the number of chars whose first byte and second are both in the range sl@0: TInt sumOfWeight=0; //sum of the weights of the chars which are included in the sample sl@0: TInt sumOutChar=0; //the number of chars which are not common sl@0: TInt sumOfBadSecondByte=0;//the number of chars whose first byte is in the range but not the second sl@0: TInt sumOfBadSingleByte=0; //the number of bad single byte, which is not in valid range sl@0: struct referenceChar sl@0: { sl@0: TUint charBig5; sl@0: TInt weight; sl@0: }; sl@0: sl@0: referenceChar refBig5[20]; sl@0: static const TInt iniWeight[20]= sl@0: { sl@0: //occurence per 1000 chars sl@0: 30,20,20,10,10,10,10,10,5,5, sl@0: 5,5,5,5,5,5,5,5,5,5 sl@0: }; sl@0: sl@0: static const TUint iniChar[20]= sl@0: { sl@0: 0xa141,0xaaba,0xa446,0xadd3,0xa4a3,0xa7e2,0xa440,0xac4f,0xad6e,0xa45d, sl@0: 0xa4d1,0xa457,0xa457,0xa94d,0xa4a4,0xa569,0xa662,0xa470,0xa448,0xa455 sl@0: }; sl@0: sl@0: for (TInt k=0; k<20; k++) sl@0: { sl@0: refBig5[k].charBig5=iniChar[k]; sl@0: refBig5[k].weight=iniWeight[k]; sl@0: totalWeight=totalWeight+iniWeight[k]; sl@0: } sl@0: //WBB sl@0: for (TInt i = 0; i < sampleLength; ++i) sl@0: { sl@0: // Big 5 encoding first byte range 0xA1-0xFE sl@0: // second byte range 0x40-0x7E 0xA1-0xFE sl@0: if((aSample[i] >= 0xa1) && (aSample[i] <= 0xfe)) sl@0: { sl@0: TInt increment1 = i+1; sl@0: if (increment1 >= sampleLength) sl@0: break; sl@0: if(((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) || sl@0: ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe))) sl@0: { sl@0: TUint charBig5=(aSample[i]<<8)|(aSample[increment1]); sl@0: if (charBig5>=0xc6a1)//Kanas start and rare chars follow after sl@0: sumOutChar++; sl@0: TInt j; sl@0: for (j=0; j<20; j++) sl@0: { sl@0: if (charBig5==refBig5[j].charBig5) sl@0: { sl@0: sumOfWeight=sumOfWeight+refBig5[j].weight; sl@0: break; sl@0: } sl@0: } sl@0: sumOfGoodChar++; sl@0: i++; sl@0: } sl@0: else sl@0: { sl@0: sumOfBadSecondByte++; sl@0: } sl@0: } sl@0: // if seldom used characters sl@0: else if (aSample[i] < 0x20 || aSample[i] > 0x7F ) sl@0: { sl@0: if (aSample[i]!=0x09 && aSample[i]!=0x0A && aSample[i]!=0x0D) sl@0: sumOfBadSingleByte++; sl@0: } sl@0: } // for sl@0: sl@0: if (sumOfGoodChar) sl@0: { sl@0: aConfidenceLevel=sumOfGoodChar*100/(sumOfBadSecondByte+sumOfGoodChar+sumOfBadSingleByte); sl@0: aConfidenceLevel=aConfidenceLevel-Max(0,((totalWeight-sumOfWeight)*sumOfGoodChar/1000));//against frequent chars sl@0: aConfidenceLevel=aConfidenceLevel-sumOutChar*100/sumOfGoodChar;//against gap sl@0: aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel; sl@0: } sl@0: else sl@0: aConfidenceLevel=0; sl@0: return ETrue; sl@0: } sl@0: sl@0: CBIG5ConverterImpl* CBIG5ConverterImpl::NewL() sl@0: { sl@0: CBIG5ConverterImpl* self = new(ELeave) CBIG5ConverterImpl(); sl@0: return self; sl@0: } sl@0: sl@0: CBIG5ConverterImpl::~CBIG5ConverterImpl() sl@0: { sl@0: } sl@0: sl@0: CBIG5ConverterImpl::CBIG5ConverterImpl() sl@0: { sl@0: } sl@0: sl@0: const TImplementationProxy ImplementationTable[] = sl@0: { sl@0: IMPLEMENTATION_PROXY_ENTRY(0x10000FBF,CBIG5ConverterImpl::NewL) sl@0: }; sl@0: sl@0: EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount) sl@0: { sl@0: aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy); sl@0: sl@0: return ImplementationTable; sl@0: } sl@0: