os/textandloc/charconvfw/charconvplugins/src/plugins/SHIFTJIS_2.CPP
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/charconvfw/charconvplugins/src/plugins/SHIFTJIS_2.CPP	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,253 @@
     1.4 +/*
     1.5 +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +* All rights reserved.
     1.7 +* This component and the accompanying materials are made available
     1.8 +* under the terms of "Eclipse Public License v1.0"
     1.9 +* which accompanies this distribution, and is available
    1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +*
    1.12 +* Initial Contributors:
    1.13 +* Nokia Corporation - initial contribution.
    1.14 +*
    1.15 +* Contributors:
    1.16 +*
    1.17 +* Description:       
    1.18 +*
    1.19 +*/
    1.20 +
    1.21 +
    1.22 +#include <e32std.h>
    1.23 +#include <charconv.h>
    1.24 +#include "SHIFTJIS_2.H"
    1.25 +#include <ecom/implementationproxy.h>
    1.26 +#include "charactersetconverter.h"
    1.27 +#include "featmgr/featmgr.h"
    1.28 +
    1.29 +/**
    1.30 +Shift-JIS character converter wrapper
    1.31 +
    1.32 +@internalTechnology 
    1.33 +@released 9.1
    1.34 +*/
    1.35 +class CShiftJisConverterImpl : public CCharacterSetConverterPluginInterface
    1.36 +	{
    1.37 +
    1.38 +public:
    1.39 +	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
    1.40 +
    1.41 +	virtual TInt ConvertFromUnicode(
    1.42 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.43 +		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    1.44 +		TDes8& aForeign, 
    1.45 +		const TDesC16& aUnicode, 
    1.46 +		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
    1.47 +
    1.48 +	virtual TInt ConvertToUnicode(
    1.49 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.50 +		TDes16& aUnicode, 
    1.51 +		const TDesC8& aForeign, 
    1.52 +		TInt& aState, 
    1.53 +		TInt& aNumberOfUnconvertibleCharacters, 
    1.54 +		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
    1.55 +
    1.56 +	virtual TBool IsInThisCharacterSetL(
    1.57 +		TBool& aSetToTrue, 
    1.58 +		TInt& aConfidenceLevel, 
    1.59 +		const TDesC8& aSample);
    1.60 +
    1.61 +	static CShiftJisConverterImpl* NewL();
    1.62 +	virtual ~CShiftJisConverterImpl();
    1.63 +
    1.64 +private:
    1.65 +	CShiftJisConverterImpl();
    1.66 +	void ConstructL();
    1.67 +
    1.68 +	};
    1.69 +
    1.70 +/**
    1.71 +Get the the Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
    1.72 +
    1.73 +@return The Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
    1.74 +@internalTechnology 
    1.75 +*/
    1.76 +const TDesC8& CShiftJisConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
    1.77 +	{
    1.78 +	return CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters();
    1.79 +	}
    1.80 +
    1.81 +TInt CShiftJisConverterImpl::ConvertFromUnicode(
    1.82 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.83 +		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    1.84 +		TDes8& aForeign, 
    1.85 +		const TDesC16& aUnicode, 
    1.86 +		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
    1.87 +	{
    1.88 +	return CnvShiftJis::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
    1.89 +	}
    1.90 +
    1.91 +
    1.92 +/**
    1.93 + Converts Shift-JIS encoded input text to Unicode
    1.94 + 
    1.95 + NOTE: For debugging the selected character set is returned in the state.
    1.96 + 
    1.97 +  @released  9.1
    1.98 +  @param     aDefaultEndiannessOfForeignCharacters The default endian-ness to use when reading characters
    1.99 +             in the foreign character set.
   1.100 +  @param     aUnicode On return, contains the text converted into Unicode.
   1.101 +  @param     aForeign The non-Unicode source text to be converted.
   1.102 +  @param     aState Used to save state information across multiple calls
   1.103 +             to <code>ConvertToUnicode()</code>.
   1.104 +  @param     aNumberOfUnconvertibleCharacters On return, contains the number of bytes which were not
   1.105 +             converted.
   1.106 +  @param     aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, contains the index of the first bytein the
   1.107 +             input text that could not be converted. A negative
   1.108 +             value indicates that all the characters were
   1.109 +             converted.
   1.110 +  @return 	 The number of unconverted bytes left at the end of the input descriptor 
   1.111 + 		     (e.g. because the output descriptor is not long enough to hold all the text), 
   1.112 + 		     or one of the error values defined in TError. 
   1.113 +  @internalTechnology 
   1.114 +*/
   1.115 +TInt CShiftJisConverterImpl::ConvertToUnicode(
   1.116 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   1.117 +		TDes16& aUnicode, 
   1.118 +		const TDesC8& aForeign, 
   1.119 +		TInt& /*aState*/, 
   1.120 +		TInt& aNumberOfUnconvertibleCharacters, 
   1.121 +		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
   1.122 +	{
   1.123 +	return CnvShiftJis::ConvertToUnicode(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
   1.124 +	}
   1.125 +
   1.126 +
   1.127 +/**
   1.128 + This API is used by CCnvCharacterSetConverter::AutoDetectCharacterSetL(). 
   1.129 + This method returns a value between 0 and 100, indicating how likely it 
   1.130 + is that this is the correct converter, for the text supplied.  
   1.131 + @internalTechnology 
   1.132 + */
   1.133 +TBool CShiftJisConverterImpl::IsInThisCharacterSetL(
   1.134 +		TBool& aSetToTrue, 
   1.135 +		TInt& aConfidenceLevel, 
   1.136 +		const TDesC8& aSample)
   1.137 +	{
   1.138 +	aSetToTrue=ETrue;
   1.139 +	TInt sampleLength = aSample.Length();
   1.140 +	aConfidenceLevel = 0;
   1.141 +	TInt numberOfShiftJis=0;
   1.142 +	TInt occurrence=0;
   1.143 +	for (TInt i = 0; i < sampleLength; ++i)
   1.144 +		{
   1.145 +		// Check for JISX 0208:1997 Charset
   1.146 +		// First Byte in range 0x81-0x9f, 0xe0-0xef
   1.147 +		if (((aSample[i] >= 0x81) && (aSample[i] <= 0x9f)) ||
   1.148 +			((aSample[i] >= 0xe0) && (aSample[i] <= 0xef)))
   1.149 +			{
   1.150 +			// check that the second byte is in range as well 
   1.151 +			TInt increment1 = i+1;
   1.152 +			if(increment1 >= sampleLength)
   1.153 +				break;
   1.154 +			if (((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) ||
   1.155 +				((aSample[increment1] >= 0x80) && (aSample[increment1] <= 0xfc)))
   1.156 +				{
   1.157 +				// increase the confidence of this sample as ShiftJis
   1.158 +				aConfidenceLevel=(aConfidenceLevel >0)?aConfidenceLevel+5:60;
   1.159 +	
   1.160 +				TUint charShiftJis=(aSample[i]<<8)|(aSample[increment1]);
   1.161 +				if ((charShiftJis>=0x829f)&&(charShiftJis<=0x82f1)||
   1.162 +					(charShiftJis>=0x8340)&&(charShiftJis<=0x8396))//those are kanas range
   1.163 +					occurrence++;
   1.164 +				numberOfShiftJis++;
   1.165 +				i++;
   1.166 +				}
   1.167 +			}
   1.168 +		// Check That no other Japanese escape sequence occur... if they do, cancel this and return 0
   1.169 +		// eg EUC-JP's SS(Single shift) characters followed by the
   1.170 +		if(aSample[i]==0x8e)
   1.171 +			{
   1.172 +			TInt increment1 = i+1;
   1.173 +			if(increment1 >= sampleLength)
   1.174 +				break;
   1.175 +			if ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xdf))
   1.176 +				{
   1.177 +				// This could be EUC-JP format..
   1.178 +				aConfidenceLevel=0;
   1.179 +				i++;
   1.180 +				}
   1.181 +			}
   1.182 +		if(aSample[i]==0x8f)
   1.183 +			{
   1.184 +			TInt increment1 = i+1;
   1.185 +			TInt increment2 = i+2;
   1.186 +			if((increment1 >= sampleLength) || (increment2 >= sampleLength))
   1.187 +				break;
   1.188 +			if (((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)) && 
   1.189 +				((aSample[increment2] >= 0xa1) && (aSample[increment2] <= 0xfe)))
   1.190 +				{
   1.191 +				// 	This is definitely EUC-JP format. 
   1.192 +				aConfidenceLevel=0;
   1.193 +				break;
   1.194 +				}
   1.195 +			}
   1.196 +        // Check the half width Katakana
   1.197 +        if (aSample[i]>=0xa1 && aSample[i]<=0xdf)
   1.198 +            {
   1.199 +            // increase the confidence of this sample as ShiftJis
   1.200 +            aConfidenceLevel=(aConfidenceLevel > 0) ? aConfidenceLevel+5 : 75;
   1.201 +            occurrence++;
   1.202 +            numberOfShiftJis++;
   1.203 +            }
   1.204 +        else if (aSample[i]>=0xf0)
   1.205 +            {
   1.206 +            aConfidenceLevel=0;
   1.207 +            }
   1.208 +		} // for 
   1.209 +
   1.210 +	if(numberOfShiftJis)
   1.211 +		{
   1.212 +		aConfidenceLevel=(aConfidenceLevel >100)?100:((aConfidenceLevel <0)?0:aConfidenceLevel);
   1.213 +		aConfidenceLevel=aConfidenceLevel-Max(0,(30-occurrence*100/numberOfShiftJis));
   1.214 +		}
   1.215 +	aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
   1.216 +	return ETrue;
   1.217 +	}
   1.218 +
   1.219 +
   1.220 +CShiftJisConverterImpl* CShiftJisConverterImpl::NewL()
   1.221 +	{
   1.222 +	CShiftJisConverterImpl* self = new(ELeave) CShiftJisConverterImpl();
   1.223 +	CleanupStack::PushL(self);
   1.224 +	self->ConstructL();
   1.225 +	CleanupStack::Pop(self);
   1.226 +	return self;
   1.227 +	}
   1.228 +
   1.229 +
   1.230 +CShiftJisConverterImpl::~CShiftJisConverterImpl()
   1.231 +	{
   1.232 +    FeatureManager::UnInitializeLib();	
   1.233 +	}
   1.234 +
   1.235 +CShiftJisConverterImpl::CShiftJisConverterImpl()
   1.236 +	{
   1.237 +	}
   1.238 +
   1.239 +
   1.240 +void CShiftJisConverterImpl::ConstructL()
   1.241 +	{
   1.242 +    FeatureManager::InitializeLibL();	
   1.243 +	}
   1.244 +
   1.245 +const TImplementationProxy ImplementationTable[] = 
   1.246 +	{
   1.247 +		IMPLEMENTATION_PROXY_ENTRY(0x10000FBD,	CShiftJisConverterImpl::NewL)
   1.248 +	};
   1.249 +
   1.250 +
   1.251 +EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
   1.252 +	{
   1.253 +	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
   1.254 +
   1.255 +	return ImplementationTable;
   1.256 +	}