1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/charconvfw/charconvplugins/src/plugins/SHIFTJIS_2.CPP Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,253 @@
1.4 +/*
1.5 +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description:
1.18 +*
1.19 +*/
1.20 +
1.21 +
1.22 +#include <e32std.h>
1.23 +#include <charconv.h>
1.24 +#include "SHIFTJIS_2.H"
1.25 +#include <ecom/implementationproxy.h>
1.26 +#include "charactersetconverter.h"
1.27 +#include "featmgr/featmgr.h"
1.28 +
1.29 +/**
1.30 +Shift-JIS character converter wrapper
1.31 +
1.32 +@internalTechnology
1.33 +@released 9.1
1.34 +*/
1.35 +class CShiftJisConverterImpl : public CCharacterSetConverterPluginInterface
1.36 + {
1.37 +
1.38 +public:
1.39 + virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
1.40 +
1.41 + virtual TInt ConvertFromUnicode(
1.42 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.43 + const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
1.44 + TDes8& aForeign,
1.45 + const TDesC16& aUnicode,
1.46 + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
1.47 +
1.48 + virtual TInt ConvertToUnicode(
1.49 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.50 + TDes16& aUnicode,
1.51 + const TDesC8& aForeign,
1.52 + TInt& aState,
1.53 + TInt& aNumberOfUnconvertibleCharacters,
1.54 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
1.55 +
1.56 + virtual TBool IsInThisCharacterSetL(
1.57 + TBool& aSetToTrue,
1.58 + TInt& aConfidenceLevel,
1.59 + const TDesC8& aSample);
1.60 +
1.61 + static CShiftJisConverterImpl* NewL();
1.62 + virtual ~CShiftJisConverterImpl();
1.63 +
1.64 +private:
1.65 + CShiftJisConverterImpl();
1.66 + void ConstructL();
1.67 +
1.68 + };
1.69 +
1.70 +/**
1.71 +Get the the Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
1.72 +
1.73 +@return The Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
1.74 +@internalTechnology
1.75 +*/
1.76 +const TDesC8& CShiftJisConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
1.77 + {
1.78 + return CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters();
1.79 + }
1.80 +
1.81 +TInt CShiftJisConverterImpl::ConvertFromUnicode(
1.82 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.83 + const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
1.84 + TDes8& aForeign,
1.85 + const TDesC16& aUnicode,
1.86 + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
1.87 + {
1.88 + return CnvShiftJis::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
1.89 + }
1.90 +
1.91 +
1.92 +/**
1.93 + Converts Shift-JIS encoded input text to Unicode
1.94 +
1.95 + NOTE: For debugging the selected character set is returned in the state.
1.96 +
1.97 + @released 9.1
1.98 + @param aDefaultEndiannessOfForeignCharacters The default endian-ness to use when reading characters
1.99 + in the foreign character set.
1.100 + @param aUnicode On return, contains the text converted into Unicode.
1.101 + @param aForeign The non-Unicode source text to be converted.
1.102 + @param aState Used to save state information across multiple calls
1.103 + to <code>ConvertToUnicode()</code>.
1.104 + @param aNumberOfUnconvertibleCharacters On return, contains the number of bytes which were not
1.105 + converted.
1.106 + @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, contains the index of the first bytein the
1.107 + input text that could not be converted. A negative
1.108 + value indicates that all the characters were
1.109 + converted.
1.110 + @return The number of unconverted bytes left at the end of the input descriptor
1.111 + (e.g. because the output descriptor is not long enough to hold all the text),
1.112 + or one of the error values defined in TError.
1.113 + @internalTechnology
1.114 +*/
1.115 +TInt CShiftJisConverterImpl::ConvertToUnicode(
1.116 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.117 + TDes16& aUnicode,
1.118 + const TDesC8& aForeign,
1.119 + TInt& /*aState*/,
1.120 + TInt& aNumberOfUnconvertibleCharacters,
1.121 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
1.122 + {
1.123 + return CnvShiftJis::ConvertToUnicode(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
1.124 + }
1.125 +
1.126 +
1.127 +/**
1.128 + This API is used by CCnvCharacterSetConverter::AutoDetectCharacterSetL().
1.129 + This method returns a value between 0 and 100, indicating how likely it
1.130 + is that this is the correct converter, for the text supplied.
1.131 + @internalTechnology
1.132 + */
1.133 +TBool CShiftJisConverterImpl::IsInThisCharacterSetL(
1.134 + TBool& aSetToTrue,
1.135 + TInt& aConfidenceLevel,
1.136 + const TDesC8& aSample)
1.137 + {
1.138 + aSetToTrue=ETrue;
1.139 + TInt sampleLength = aSample.Length();
1.140 + aConfidenceLevel = 0;
1.141 + TInt numberOfShiftJis=0;
1.142 + TInt occurrence=0;
1.143 + for (TInt i = 0; i < sampleLength; ++i)
1.144 + {
1.145 + // Check for JISX 0208:1997 Charset
1.146 + // First Byte in range 0x81-0x9f, 0xe0-0xef
1.147 + if (((aSample[i] >= 0x81) && (aSample[i] <= 0x9f)) ||
1.148 + ((aSample[i] >= 0xe0) && (aSample[i] <= 0xef)))
1.149 + {
1.150 + // check that the second byte is in range as well
1.151 + TInt increment1 = i+1;
1.152 + if(increment1 >= sampleLength)
1.153 + break;
1.154 + if (((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) ||
1.155 + ((aSample[increment1] >= 0x80) && (aSample[increment1] <= 0xfc)))
1.156 + {
1.157 + // increase the confidence of this sample as ShiftJis
1.158 + aConfidenceLevel=(aConfidenceLevel >0)?aConfidenceLevel+5:60;
1.159 +
1.160 + TUint charShiftJis=(aSample[i]<<8)|(aSample[increment1]);
1.161 + if ((charShiftJis>=0x829f)&&(charShiftJis<=0x82f1)||
1.162 + (charShiftJis>=0x8340)&&(charShiftJis<=0x8396))//those are kanas range
1.163 + occurrence++;
1.164 + numberOfShiftJis++;
1.165 + i++;
1.166 + }
1.167 + }
1.168 + // Check That no other Japanese escape sequence occur... if they do, cancel this and return 0
1.169 + // eg EUC-JP's SS(Single shift) characters followed by the
1.170 + if(aSample[i]==0x8e)
1.171 + {
1.172 + TInt increment1 = i+1;
1.173 + if(increment1 >= sampleLength)
1.174 + break;
1.175 + if ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xdf))
1.176 + {
1.177 + // This could be EUC-JP format..
1.178 + aConfidenceLevel=0;
1.179 + i++;
1.180 + }
1.181 + }
1.182 + if(aSample[i]==0x8f)
1.183 + {
1.184 + TInt increment1 = i+1;
1.185 + TInt increment2 = i+2;
1.186 + if((increment1 >= sampleLength) || (increment2 >= sampleLength))
1.187 + break;
1.188 + if (((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)) &&
1.189 + ((aSample[increment2] >= 0xa1) && (aSample[increment2] <= 0xfe)))
1.190 + {
1.191 + // This is definitely EUC-JP format.
1.192 + aConfidenceLevel=0;
1.193 + break;
1.194 + }
1.195 + }
1.196 + // Check the half width Katakana
1.197 + if (aSample[i]>=0xa1 && aSample[i]<=0xdf)
1.198 + {
1.199 + // increase the confidence of this sample as ShiftJis
1.200 + aConfidenceLevel=(aConfidenceLevel > 0) ? aConfidenceLevel+5 : 75;
1.201 + occurrence++;
1.202 + numberOfShiftJis++;
1.203 + }
1.204 + else if (aSample[i]>=0xf0)
1.205 + {
1.206 + aConfidenceLevel=0;
1.207 + }
1.208 + } // for
1.209 +
1.210 + if(numberOfShiftJis)
1.211 + {
1.212 + aConfidenceLevel=(aConfidenceLevel >100)?100:((aConfidenceLevel <0)?0:aConfidenceLevel);
1.213 + aConfidenceLevel=aConfidenceLevel-Max(0,(30-occurrence*100/numberOfShiftJis));
1.214 + }
1.215 + aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
1.216 + return ETrue;
1.217 + }
1.218 +
1.219 +
1.220 +CShiftJisConverterImpl* CShiftJisConverterImpl::NewL()
1.221 + {
1.222 + CShiftJisConverterImpl* self = new(ELeave) CShiftJisConverterImpl();
1.223 + CleanupStack::PushL(self);
1.224 + self->ConstructL();
1.225 + CleanupStack::Pop(self);
1.226 + return self;
1.227 + }
1.228 +
1.229 +
1.230 +CShiftJisConverterImpl::~CShiftJisConverterImpl()
1.231 + {
1.232 + FeatureManager::UnInitializeLib();
1.233 + }
1.234 +
1.235 +CShiftJisConverterImpl::CShiftJisConverterImpl()
1.236 + {
1.237 + }
1.238 +
1.239 +
1.240 +void CShiftJisConverterImpl::ConstructL()
1.241 + {
1.242 + FeatureManager::InitializeLibL();
1.243 + }
1.244 +
1.245 +const TImplementationProxy ImplementationTable[] =
1.246 + {
1.247 + IMPLEMENTATION_PROXY_ENTRY(0x10000FBD, CShiftJisConverterImpl::NewL)
1.248 + };
1.249 +
1.250 +
1.251 +EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
1.252 + {
1.253 + aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
1.254 +
1.255 + return ImplementationTable;
1.256 + }