os/textandloc/charconvfw/charconvplugins/src/plugins/shiftjis.cpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     3 * All rights reserved.
     4 * This component and the accompanying materials are made available
     5 * under the terms of "Eclipse Public License v1.0"
     6 * which accompanies this distribution, and is available
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
     8 *
     9 * Initial Contributors:
    10 * Nokia Corporation - initial contribution.
    11 *
    12 * Contributors:
    13 *
    14 * Description: 
    15 *
    16 */
    17 
    18 
    19 #include <e32std.h>
    20 #include <charconv.h>
    21 #include "shiftjis.h"
    22 #include <ecom/implementationproxy.h>
    23 #include <charactersetconverter.h>
    24 
    25 
    26 /**
    27 Shift-JIS character converter wrapper
    28 
    29 @internalTechnology 
    30 @released 9.1
    31 */
    32 class CShiftJisConverterImpl : public CCharacterSetConverterPluginInterface
    33 	{
    34 
    35 public:
    36 	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
    37 
    38 	virtual TInt ConvertFromUnicode(
    39 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    40 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    41 		TDes8& aForeign, 
    42 		const TDesC16& aUnicode, 
    43 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
    44 
    45 	virtual TInt ConvertToUnicode(
    46 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    47 		TDes16& aUnicode, 
    48 		const TDesC8& aForeign, 
    49 		TInt& aState, 
    50 		TInt& aNumberOfUnconvertibleCharacters, 
    51 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
    52 
    53 	virtual TBool IsInThisCharacterSetL(
    54 		TBool& aSetToTrue, 
    55 		TInt& aConfidenceLevel, 
    56 		const TDesC8& aSample);
    57 
    58 	static CShiftJisConverterImpl* NewL();
    59 	virtual ~CShiftJisConverterImpl();
    60 
    61 private:
    62 	CShiftJisConverterImpl();
    63 
    64 	};
    65 
    66 /**
    67 Get the the Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
    68 
    69 @return The Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
    70 @internalTechnology 
    71 */
    72 const TDesC8& CShiftJisConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
    73 	{
    74 	return CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters();
    75 	}
    76 
    77 TInt CShiftJisConverterImpl::ConvertFromUnicode(
    78 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    79 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    80 		TDes8& aForeign, 
    81 		const TDesC16& aUnicode, 
    82 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
    83 	{
    84 	return CnvShiftJis::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
    85 	}
    86 
    87 
    88 /**
    89  Converts Shift-JIS encoded input text to Unicode
    90  
    91  NOTE: For debugging the selected character set is returned in the state.
    92  
    93   @released  9.1
    94   @param     aDefaultEndiannessOfForeignCharacters The default endian-ness to use when reading characters
    95              in the foreign character set.
    96   @param     aUnicode On return, contains the text converted into Unicode.
    97   @param     aForeign The non-Unicode source text to be converted.
    98   @param     aState Used to save state information across multiple calls
    99              to <code>ConvertToUnicode()</code>.
   100   @param     aNumberOfUnconvertibleCharacters On return, contains the number of bytes which were not
   101              converted.
   102   @param     aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, contains the index of the first bytein the
   103              input text that could not be converted. A negative
   104              value indicates that all the characters were
   105              converted.
   106   @return 	 The number of unconverted bytes left at the end of the input descriptor 
   107  		     (e.g. because the output descriptor is not long enough to hold all the text), 
   108  		     or one of the error values defined in TError. 
   109   @internalTechnology 
   110 */
   111 TInt CShiftJisConverterImpl::ConvertToUnicode(
   112 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   113 		TDes16& aUnicode, 
   114 		const TDesC8& aForeign, 
   115 		TInt& /*aState*/, 
   116 		TInt& aNumberOfUnconvertibleCharacters, 
   117 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
   118 	{
   119 	return CnvShiftJis::ConvertToUnicode(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
   120 	}
   121 
   122 
   123 /**
   124  This API is used by CCnvCharacterSetConverter::AutoDetectCharacterSetL(). 
   125  This method returns a value between 0 and 100, indicating how likely it 
   126  is that this is the correct converter, for the text supplied.  
   127  @internalTechnology 
   128  */
   129 TBool CShiftJisConverterImpl::IsInThisCharacterSetL(
   130 		TBool& aSetToTrue, 
   131 		TInt& aConfidenceLevel, 
   132 		const TDesC8& aSample)
   133 	{
   134 	aSetToTrue=ETrue;
   135 	TInt sampleLength = aSample.Length();
   136 	aConfidenceLevel = 0;
   137 	TInt numberOfShiftJis=0;
   138 	TInt occurrence=0;
   139 	for (TInt i = 0; i < sampleLength; ++i)
   140 		{
   141 		// Check for JISX 0208:1997 Charset
   142 		// First Byte in range 0x81-0x9f, 0xe0-0xef
   143 		if (((aSample[i] >= 0x81) && (aSample[i] <= 0x9f)) ||
   144 			((aSample[i] >= 0xe0) && (aSample[i] <= 0xef)))
   145 			{
   146 			// check that the second byte is in range as well 
   147 			TInt increment1 = i+1;
   148 			if(increment1 >= sampleLength)
   149 				break;
   150 			if (((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) ||
   151 				((aSample[increment1] >= 0x80) && (aSample[increment1] <= 0xfc)))
   152 				{
   153 				// increase the confidence of this sample as ShiftJis
   154 				aConfidenceLevel=(aConfidenceLevel >0)?aConfidenceLevel+5:60;
   155 	
   156 				TUint charShiftJis=(aSample[i]<<8)|(aSample[increment1]);
   157 				if ((charShiftJis>=0x829f)&&(charShiftJis<=0x82f1)||
   158 					(charShiftJis>=0x8340)&&(charShiftJis<=0x8396))//those are kanas range
   159 					occurrence++;
   160 				numberOfShiftJis++;
   161 				i++;
   162 				}
   163 			}
   164 		// Check That no other Japanese escape sequence occur... if they do, cancel this and return 0
   165 		// eg EUC-JP's SS(Single shift) characters followed by the
   166 		if(aSample[i]==0x8e)
   167 			{
   168 			TInt increment1 = i+1;
   169 			if(increment1 >= sampleLength)
   170 				break;
   171 			if ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xdf))
   172 				{
   173 				// This could be EUC-JP format..
   174 				aConfidenceLevel=0;
   175 				i++;
   176 				}
   177 			}
   178 		if(aSample[i]==0x8f)
   179 			{
   180 			TInt increment1 = i+1;
   181 			TInt increment2 = i+2;
   182 			if((increment1 >= sampleLength) || (increment2 >= sampleLength))
   183 				break;
   184 			if (((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)) && 
   185 				((aSample[increment2] >= 0xa1) && (aSample[increment2] <= 0xfe)))
   186 				{
   187 				// 	This is definitely EUC-JP format. 
   188 				aConfidenceLevel=0;
   189 				break;
   190 				}
   191 			}
   192 		} // for 
   193 
   194 	if(numberOfShiftJis)
   195 		{
   196 		aConfidenceLevel=(aConfidenceLevel >100)?100:((aConfidenceLevel <0)?0:aConfidenceLevel);
   197 		aConfidenceLevel=aConfidenceLevel-Max(0,(30-occurrence*100/numberOfShiftJis));
   198 		}
   199 	aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
   200 	return ETrue;
   201 	}
   202 
   203 
   204 CShiftJisConverterImpl* CShiftJisConverterImpl::NewL()
   205 	{
   206 	CShiftJisConverterImpl* self = new(ELeave) CShiftJisConverterImpl();
   207 	return self;
   208 	}
   209 
   210 
   211 CShiftJisConverterImpl::~CShiftJisConverterImpl()
   212 	{
   213 	}
   214 
   215 CShiftJisConverterImpl::CShiftJisConverterImpl()
   216 	{
   217 	}
   218 
   219 const TImplementationProxy ImplementationTable[] = 
   220 	{
   221 #ifdef KDDIAU_TEST
   222 		// for the test build use a special test UID which is called
   223 		//explicitly from test code
   224 		IMPLEMENTATION_PROXY_ENTRY(0x01000001,	CShiftJisConverterImpl::NewL)
   225 #else
   226 		IMPLEMENTATION_PROXY_ENTRY(0x10000FBD,	CShiftJisConverterImpl::NewL)
   227 #endif
   228 	};
   229 
   230 
   231 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
   232 	{
   233 	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
   234 
   235 	return ImplementationTable;
   236 	}