sl@0: /* sl@0: * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: * All rights reserved. sl@0: * This component and the accompanying materials are made available sl@0: * under the terms of "Eclipse Public License v1.0" sl@0: * which accompanies this distribution, and is available sl@0: * at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: * sl@0: * Initial Contributors: sl@0: * Nokia Corporation - initial contribution. sl@0: * sl@0: * Contributors: sl@0: * sl@0: * Description: sl@0: * sl@0: */ sl@0: sl@0: sl@0: #include sl@0: #include sl@0: #include "SHIFTJIS_2.H" sl@0: #include sl@0: #include "charactersetconverter.h" sl@0: #include "featmgr/featmgr.h" sl@0: sl@0: /** sl@0: Shift-JIS character converter wrapper sl@0: sl@0: @internalTechnology sl@0: @released 9.1 sl@0: */ sl@0: class CShiftJisConverterImpl : public CCharacterSetConverterPluginInterface sl@0: { sl@0: sl@0: public: sl@0: virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters(); sl@0: sl@0: virtual TInt ConvertFromUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, sl@0: TDes8& aForeign, sl@0: const TDesC16& aUnicode, sl@0: CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters); sl@0: sl@0: virtual TInt ConvertToUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aForeign, sl@0: TInt& aState, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter); sl@0: sl@0: virtual TBool IsInThisCharacterSetL( sl@0: TBool& aSetToTrue, sl@0: TInt& aConfidenceLevel, sl@0: const TDesC8& aSample); sl@0: sl@0: static CShiftJisConverterImpl* NewL(); sl@0: virtual ~CShiftJisConverterImpl(); sl@0: sl@0: private: sl@0: CShiftJisConverterImpl(); sl@0: void ConstructL(); sl@0: sl@0: }; sl@0: sl@0: /** sl@0: Get the the Shift-JIS byte sequence which will replace any Unicode characters which can't be converted. sl@0: sl@0: @return The Shift-JIS byte sequence which will replace any Unicode characters which can't be converted. sl@0: @internalTechnology sl@0: */ sl@0: const TDesC8& CShiftJisConverterImpl::ReplacementForUnconvertibleUnicodeCharacters() sl@0: { sl@0: return CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters(); sl@0: } sl@0: sl@0: TInt CShiftJisConverterImpl::ConvertFromUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, sl@0: TDes8& aForeign, sl@0: const TDesC16& aUnicode, sl@0: CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) sl@0: { sl@0: return CnvShiftJis::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters); sl@0: } sl@0: sl@0: sl@0: /** sl@0: Converts Shift-JIS encoded input text to Unicode sl@0: sl@0: NOTE: For debugging the selected character set is returned in the state. sl@0: sl@0: @released 9.1 sl@0: @param aDefaultEndiannessOfForeignCharacters The default endian-ness to use when reading characters sl@0: in the foreign character set. sl@0: @param aUnicode On return, contains the text converted into Unicode. sl@0: @param aForeign The non-Unicode source text to be converted. sl@0: @param aState Used to save state information across multiple calls sl@0: to ConvertToUnicode(). sl@0: @param aNumberOfUnconvertibleCharacters On return, contains the number of bytes which were not sl@0: converted. sl@0: @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, contains the index of the first bytein the sl@0: input text that could not be converted. A negative sl@0: value indicates that all the characters were sl@0: converted. sl@0: @return The number of unconverted bytes left at the end of the input descriptor sl@0: (e.g. because the output descriptor is not long enough to hold all the text), sl@0: or one of the error values defined in TError. sl@0: @internalTechnology sl@0: */ sl@0: TInt CShiftJisConverterImpl::ConvertToUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aForeign, sl@0: TInt& /*aState*/, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) sl@0: { sl@0: return CnvShiftJis::ConvertToUnicode(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter); sl@0: } sl@0: sl@0: sl@0: /** sl@0: This API is used by CCnvCharacterSetConverter::AutoDetectCharacterSetL(). sl@0: This method returns a value between 0 and 100, indicating how likely it sl@0: is that this is the correct converter, for the text supplied. sl@0: @internalTechnology sl@0: */ sl@0: TBool CShiftJisConverterImpl::IsInThisCharacterSetL( sl@0: TBool& aSetToTrue, sl@0: TInt& aConfidenceLevel, sl@0: const TDesC8& aSample) sl@0: { sl@0: aSetToTrue=ETrue; sl@0: TInt sampleLength = aSample.Length(); sl@0: aConfidenceLevel = 0; sl@0: TInt numberOfShiftJis=0; sl@0: TInt occurrence=0; sl@0: for (TInt i = 0; i < sampleLength; ++i) sl@0: { sl@0: // Check for JISX 0208:1997 Charset sl@0: // First Byte in range 0x81-0x9f, 0xe0-0xef sl@0: if (((aSample[i] >= 0x81) && (aSample[i] <= 0x9f)) || sl@0: ((aSample[i] >= 0xe0) && (aSample[i] <= 0xef))) sl@0: { sl@0: // check that the second byte is in range as well sl@0: TInt increment1 = i+1; sl@0: if(increment1 >= sampleLength) sl@0: break; sl@0: if (((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) || sl@0: ((aSample[increment1] >= 0x80) && (aSample[increment1] <= 0xfc))) sl@0: { sl@0: // increase the confidence of this sample as ShiftJis sl@0: aConfidenceLevel=(aConfidenceLevel >0)?aConfidenceLevel+5:60; sl@0: sl@0: TUint charShiftJis=(aSample[i]<<8)|(aSample[increment1]); sl@0: if ((charShiftJis>=0x829f)&&(charShiftJis<=0x82f1)|| sl@0: (charShiftJis>=0x8340)&&(charShiftJis<=0x8396))//those are kanas range sl@0: occurrence++; sl@0: numberOfShiftJis++; sl@0: i++; sl@0: } sl@0: } sl@0: // Check That no other Japanese escape sequence occur... if they do, cancel this and return 0 sl@0: // eg EUC-JP's SS(Single shift) characters followed by the sl@0: if(aSample[i]==0x8e) sl@0: { sl@0: TInt increment1 = i+1; sl@0: if(increment1 >= sampleLength) sl@0: break; sl@0: if ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xdf)) sl@0: { sl@0: // This could be EUC-JP format.. sl@0: aConfidenceLevel=0; sl@0: i++; sl@0: } sl@0: } sl@0: if(aSample[i]==0x8f) sl@0: { sl@0: TInt increment1 = i+1; sl@0: TInt increment2 = i+2; sl@0: if((increment1 >= sampleLength) || (increment2 >= sampleLength)) sl@0: break; sl@0: if (((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)) && sl@0: ((aSample[increment2] >= 0xa1) && (aSample[increment2] <= 0xfe))) sl@0: { sl@0: // This is definitely EUC-JP format. sl@0: aConfidenceLevel=0; sl@0: break; sl@0: } sl@0: } sl@0: // Check the half width Katakana sl@0: if (aSample[i]>=0xa1 && aSample[i]<=0xdf) sl@0: { sl@0: // increase the confidence of this sample as ShiftJis sl@0: aConfidenceLevel=(aConfidenceLevel > 0) ? aConfidenceLevel+5 : 75; sl@0: occurrence++; sl@0: numberOfShiftJis++; sl@0: } sl@0: else if (aSample[i]>=0xf0) sl@0: { sl@0: aConfidenceLevel=0; sl@0: } sl@0: } // for sl@0: sl@0: if(numberOfShiftJis) sl@0: { sl@0: aConfidenceLevel=(aConfidenceLevel >100)?100:((aConfidenceLevel <0)?0:aConfidenceLevel); sl@0: aConfidenceLevel=aConfidenceLevel-Max(0,(30-occurrence*100/numberOfShiftJis)); sl@0: } sl@0: aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel; sl@0: return ETrue; sl@0: } sl@0: sl@0: sl@0: CShiftJisConverterImpl* CShiftJisConverterImpl::NewL() sl@0: { sl@0: CShiftJisConverterImpl* self = new(ELeave) CShiftJisConverterImpl(); sl@0: CleanupStack::PushL(self); sl@0: self->ConstructL(); sl@0: CleanupStack::Pop(self); sl@0: return self; sl@0: } sl@0: sl@0: sl@0: CShiftJisConverterImpl::~CShiftJisConverterImpl() sl@0: { sl@0: FeatureManager::UnInitializeLib(); sl@0: } sl@0: sl@0: CShiftJisConverterImpl::CShiftJisConverterImpl() sl@0: { sl@0: } sl@0: sl@0: sl@0: void CShiftJisConverterImpl::ConstructL() sl@0: { sl@0: FeatureManager::InitializeLibL(); sl@0: } sl@0: sl@0: const TImplementationProxy ImplementationTable[] = sl@0: { sl@0: IMPLEMENTATION_PROXY_ENTRY(0x10000FBD, CShiftJisConverterImpl::NewL) sl@0: }; sl@0: sl@0: sl@0: EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount) sl@0: { sl@0: aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy); sl@0: sl@0: return ImplementationTable; sl@0: }