os/textandloc/charconvfw/charconvplugins/src/plugins/SHIFTJIS_2.CPP
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     3 * All rights reserved.
     4 * This component and the accompanying materials are made available
     5 * under the terms of "Eclipse Public License v1.0"
     6 * which accompanies this distribution, and is available
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
     8 *
     9 * Initial Contributors:
    10 * Nokia Corporation - initial contribution.
    11 *
    12 * Contributors:
    13 *
    14 * Description:       
    15 *
    16 */
    17 
    18 
    19 #include <e32std.h>
    20 #include <charconv.h>
    21 #include "SHIFTJIS_2.H"
    22 #include <ecom/implementationproxy.h>
    23 #include "charactersetconverter.h"
    24 #include "featmgr/featmgr.h"
    25 
    26 /**
    27 Shift-JIS character converter wrapper
    28 
    29 @internalTechnology 
    30 @released 9.1
    31 */
    32 class CShiftJisConverterImpl : public CCharacterSetConverterPluginInterface
    33 	{
    34 
    35 public:
    36 	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
    37 
    38 	virtual TInt ConvertFromUnicode(
    39 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    40 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    41 		TDes8& aForeign, 
    42 		const TDesC16& aUnicode, 
    43 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
    44 
    45 	virtual TInt ConvertToUnicode(
    46 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    47 		TDes16& aUnicode, 
    48 		const TDesC8& aForeign, 
    49 		TInt& aState, 
    50 		TInt& aNumberOfUnconvertibleCharacters, 
    51 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
    52 
    53 	virtual TBool IsInThisCharacterSetL(
    54 		TBool& aSetToTrue, 
    55 		TInt& aConfidenceLevel, 
    56 		const TDesC8& aSample);
    57 
    58 	static CShiftJisConverterImpl* NewL();
    59 	virtual ~CShiftJisConverterImpl();
    60 
    61 private:
    62 	CShiftJisConverterImpl();
    63 	void ConstructL();
    64 
    65 	};
    66 
    67 /**
    68 Get the the Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
    69 
    70 @return The Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
    71 @internalTechnology 
    72 */
    73 const TDesC8& CShiftJisConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
    74 	{
    75 	return CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters();
    76 	}
    77 
    78 TInt CShiftJisConverterImpl::ConvertFromUnicode(
    79 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    80 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    81 		TDes8& aForeign, 
    82 		const TDesC16& aUnicode, 
    83 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
    84 	{
    85 	return CnvShiftJis::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
    86 	}
    87 
    88 
    89 /**
    90  Converts Shift-JIS encoded input text to Unicode
    91  
    92  NOTE: For debugging the selected character set is returned in the state.
    93  
    94   @released  9.1
    95   @param     aDefaultEndiannessOfForeignCharacters The default endian-ness to use when reading characters
    96              in the foreign character set.
    97   @param     aUnicode On return, contains the text converted into Unicode.
    98   @param     aForeign The non-Unicode source text to be converted.
    99   @param     aState Used to save state information across multiple calls
   100              to <code>ConvertToUnicode()</code>.
   101   @param     aNumberOfUnconvertibleCharacters On return, contains the number of bytes which were not
   102              converted.
   103   @param     aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, contains the index of the first bytein the
   104              input text that could not be converted. A negative
   105              value indicates that all the characters were
   106              converted.
   107   @return 	 The number of unconverted bytes left at the end of the input descriptor 
   108  		     (e.g. because the output descriptor is not long enough to hold all the text), 
   109  		     or one of the error values defined in TError. 
   110   @internalTechnology 
   111 */
   112 TInt CShiftJisConverterImpl::ConvertToUnicode(
   113 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   114 		TDes16& aUnicode, 
   115 		const TDesC8& aForeign, 
   116 		TInt& /*aState*/, 
   117 		TInt& aNumberOfUnconvertibleCharacters, 
   118 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
   119 	{
   120 	return CnvShiftJis::ConvertToUnicode(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
   121 	}
   122 
   123 
   124 /**
   125  This API is used by CCnvCharacterSetConverter::AutoDetectCharacterSetL(). 
   126  This method returns a value between 0 and 100, indicating how likely it 
   127  is that this is the correct converter, for the text supplied.  
   128  @internalTechnology 
   129  */
   130 TBool CShiftJisConverterImpl::IsInThisCharacterSetL(
   131 		TBool& aSetToTrue, 
   132 		TInt& aConfidenceLevel, 
   133 		const TDesC8& aSample)
   134 	{
   135 	aSetToTrue=ETrue;
   136 	TInt sampleLength = aSample.Length();
   137 	aConfidenceLevel = 0;
   138 	TInt numberOfShiftJis=0;
   139 	TInt occurrence=0;
   140 	for (TInt i = 0; i < sampleLength; ++i)
   141 		{
   142 		// Check for JISX 0208:1997 Charset
   143 		// First Byte in range 0x81-0x9f, 0xe0-0xef
   144 		if (((aSample[i] >= 0x81) && (aSample[i] <= 0x9f)) ||
   145 			((aSample[i] >= 0xe0) && (aSample[i] <= 0xef)))
   146 			{
   147 			// check that the second byte is in range as well 
   148 			TInt increment1 = i+1;
   149 			if(increment1 >= sampleLength)
   150 				break;
   151 			if (((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) ||
   152 				((aSample[increment1] >= 0x80) && (aSample[increment1] <= 0xfc)))
   153 				{
   154 				// increase the confidence of this sample as ShiftJis
   155 				aConfidenceLevel=(aConfidenceLevel >0)?aConfidenceLevel+5:60;
   156 	
   157 				TUint charShiftJis=(aSample[i]<<8)|(aSample[increment1]);
   158 				if ((charShiftJis>=0x829f)&&(charShiftJis<=0x82f1)||
   159 					(charShiftJis>=0x8340)&&(charShiftJis<=0x8396))//those are kanas range
   160 					occurrence++;
   161 				numberOfShiftJis++;
   162 				i++;
   163 				}
   164 			}
   165 		// Check That no other Japanese escape sequence occur... if they do, cancel this and return 0
   166 		// eg EUC-JP's SS(Single shift) characters followed by the
   167 		if(aSample[i]==0x8e)
   168 			{
   169 			TInt increment1 = i+1;
   170 			if(increment1 >= sampleLength)
   171 				break;
   172 			if ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xdf))
   173 				{
   174 				// This could be EUC-JP format..
   175 				aConfidenceLevel=0;
   176 				i++;
   177 				}
   178 			}
   179 		if(aSample[i]==0x8f)
   180 			{
   181 			TInt increment1 = i+1;
   182 			TInt increment2 = i+2;
   183 			if((increment1 >= sampleLength) || (increment2 >= sampleLength))
   184 				break;
   185 			if (((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)) && 
   186 				((aSample[increment2] >= 0xa1) && (aSample[increment2] <= 0xfe)))
   187 				{
   188 				// 	This is definitely EUC-JP format. 
   189 				aConfidenceLevel=0;
   190 				break;
   191 				}
   192 			}
   193         // Check the half width Katakana
   194         if (aSample[i]>=0xa1 && aSample[i]<=0xdf)
   195             {
   196             // increase the confidence of this sample as ShiftJis
   197             aConfidenceLevel=(aConfidenceLevel > 0) ? aConfidenceLevel+5 : 75;
   198             occurrence++;
   199             numberOfShiftJis++;
   200             }
   201         else if (aSample[i]>=0xf0)
   202             {
   203             aConfidenceLevel=0;
   204             }
   205 		} // for 
   206 
   207 	if(numberOfShiftJis)
   208 		{
   209 		aConfidenceLevel=(aConfidenceLevel >100)?100:((aConfidenceLevel <0)?0:aConfidenceLevel);
   210 		aConfidenceLevel=aConfidenceLevel-Max(0,(30-occurrence*100/numberOfShiftJis));
   211 		}
   212 	aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
   213 	return ETrue;
   214 	}
   215 
   216 
   217 CShiftJisConverterImpl* CShiftJisConverterImpl::NewL()
   218 	{
   219 	CShiftJisConverterImpl* self = new(ELeave) CShiftJisConverterImpl();
   220 	CleanupStack::PushL(self);
   221 	self->ConstructL();
   222 	CleanupStack::Pop(self);
   223 	return self;
   224 	}
   225 
   226 
   227 CShiftJisConverterImpl::~CShiftJisConverterImpl()
   228 	{
   229     FeatureManager::UnInitializeLib();	
   230 	}
   231 
   232 CShiftJisConverterImpl::CShiftJisConverterImpl()
   233 	{
   234 	}
   235 
   236 
   237 void CShiftJisConverterImpl::ConstructL()
   238 	{
   239     FeatureManager::InitializeLibL();	
   240 	}
   241 
   242 const TImplementationProxy ImplementationTable[] = 
   243 	{
   244 		IMPLEMENTATION_PROXY_ENTRY(0x10000FBD,	CShiftJisConverterImpl::NewL)
   245 	};
   246 
   247 
   248 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
   249 	{
   250 	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
   251 
   252 	return ImplementationTable;
   253 	}