os/textandloc/charconvfw/charconvplugins/src/plugins/shiftjis.cpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
* All rights reserved.
sl@0
     4
* This component and the accompanying materials are made available
sl@0
     5
* under the terms of "Eclipse Public License v1.0"
sl@0
     6
* which accompanies this distribution, and is available
sl@0
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
*
sl@0
     9
* Initial Contributors:
sl@0
    10
* Nokia Corporation - initial contribution.
sl@0
    11
*
sl@0
    12
* Contributors:
sl@0
    13
*
sl@0
    14
* Description: 
sl@0
    15
*
sl@0
    16
*/
sl@0
    17
sl@0
    18
sl@0
    19
#include <e32std.h>
sl@0
    20
#include <charconv.h>
sl@0
    21
#include "shiftjis.h"
sl@0
    22
#include <ecom/implementationproxy.h>
sl@0
    23
#include <charactersetconverter.h>
sl@0
    24
sl@0
    25
sl@0
    26
/**
sl@0
    27
Shift-JIS character converter wrapper
sl@0
    28
sl@0
    29
@internalTechnology 
sl@0
    30
@released 9.1
sl@0
    31
*/
sl@0
    32
class CShiftJisConverterImpl : public CCharacterSetConverterPluginInterface
sl@0
    33
	{
sl@0
    34
sl@0
    35
public:
sl@0
    36
	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
sl@0
    37
sl@0
    38
	virtual TInt ConvertFromUnicode(
sl@0
    39
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
    40
		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
    41
		TDes8& aForeign, 
sl@0
    42
		const TDesC16& aUnicode, 
sl@0
    43
		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
sl@0
    44
sl@0
    45
	virtual TInt ConvertToUnicode(
sl@0
    46
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
    47
		TDes16& aUnicode, 
sl@0
    48
		const TDesC8& aForeign, 
sl@0
    49
		TInt& aState, 
sl@0
    50
		TInt& aNumberOfUnconvertibleCharacters, 
sl@0
    51
		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
sl@0
    52
sl@0
    53
	virtual TBool IsInThisCharacterSetL(
sl@0
    54
		TBool& aSetToTrue, 
sl@0
    55
		TInt& aConfidenceLevel, 
sl@0
    56
		const TDesC8& aSample);
sl@0
    57
sl@0
    58
	static CShiftJisConverterImpl* NewL();
sl@0
    59
	virtual ~CShiftJisConverterImpl();
sl@0
    60
sl@0
    61
private:
sl@0
    62
	CShiftJisConverterImpl();
sl@0
    63
sl@0
    64
	};
sl@0
    65
sl@0
    66
/**
sl@0
    67
Get the the Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
sl@0
    68
sl@0
    69
@return The Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
sl@0
    70
@internalTechnology 
sl@0
    71
*/
sl@0
    72
const TDesC8& CShiftJisConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
sl@0
    73
	{
sl@0
    74
	return CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters();
sl@0
    75
	}
sl@0
    76
sl@0
    77
TInt CShiftJisConverterImpl::ConvertFromUnicode(
sl@0
    78
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
    79
		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
    80
		TDes8& aForeign, 
sl@0
    81
		const TDesC16& aUnicode, 
sl@0
    82
		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
sl@0
    83
	{
sl@0
    84
	return CnvShiftJis::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
sl@0
    85
	}
sl@0
    86
sl@0
    87
sl@0
    88
/**
sl@0
    89
 Converts Shift-JIS encoded input text to Unicode
sl@0
    90
 
sl@0
    91
 NOTE: For debugging the selected character set is returned in the state.
sl@0
    92
 
sl@0
    93
  @released  9.1
sl@0
    94
  @param     aDefaultEndiannessOfForeignCharacters The default endian-ness to use when reading characters
sl@0
    95
             in the foreign character set.
sl@0
    96
  @param     aUnicode On return, contains the text converted into Unicode.
sl@0
    97
  @param     aForeign The non-Unicode source text to be converted.
sl@0
    98
  @param     aState Used to save state information across multiple calls
sl@0
    99
             to <code>ConvertToUnicode()</code>.
sl@0
   100
  @param     aNumberOfUnconvertibleCharacters On return, contains the number of bytes which were not
sl@0
   101
             converted.
sl@0
   102
  @param     aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, contains the index of the first bytein the
sl@0
   103
             input text that could not be converted. A negative
sl@0
   104
             value indicates that all the characters were
sl@0
   105
             converted.
sl@0
   106
  @return 	 The number of unconverted bytes left at the end of the input descriptor 
sl@0
   107
 		     (e.g. because the output descriptor is not long enough to hold all the text), 
sl@0
   108
 		     or one of the error values defined in TError. 
sl@0
   109
  @internalTechnology 
sl@0
   110
*/
sl@0
   111
TInt CShiftJisConverterImpl::ConvertToUnicode(
sl@0
   112
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
   113
		TDes16& aUnicode, 
sl@0
   114
		const TDesC8& aForeign, 
sl@0
   115
		TInt& /*aState*/, 
sl@0
   116
		TInt& aNumberOfUnconvertibleCharacters, 
sl@0
   117
		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
sl@0
   118
	{
sl@0
   119
	return CnvShiftJis::ConvertToUnicode(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
sl@0
   120
	}
sl@0
   121
sl@0
   122
sl@0
   123
/**
sl@0
   124
 This API is used by CCnvCharacterSetConverter::AutoDetectCharacterSetL(). 
sl@0
   125
 This method returns a value between 0 and 100, indicating how likely it 
sl@0
   126
 is that this is the correct converter, for the text supplied.  
sl@0
   127
 @internalTechnology 
sl@0
   128
 */
sl@0
   129
TBool CShiftJisConverterImpl::IsInThisCharacterSetL(
sl@0
   130
		TBool& aSetToTrue, 
sl@0
   131
		TInt& aConfidenceLevel, 
sl@0
   132
		const TDesC8& aSample)
sl@0
   133
	{
sl@0
   134
	aSetToTrue=ETrue;
sl@0
   135
	TInt sampleLength = aSample.Length();
sl@0
   136
	aConfidenceLevel = 0;
sl@0
   137
	TInt numberOfShiftJis=0;
sl@0
   138
	TInt occurrence=0;
sl@0
   139
	for (TInt i = 0; i < sampleLength; ++i)
sl@0
   140
		{
sl@0
   141
		// Check for JISX 0208:1997 Charset
sl@0
   142
		// First Byte in range 0x81-0x9f, 0xe0-0xef
sl@0
   143
		if (((aSample[i] >= 0x81) && (aSample[i] <= 0x9f)) ||
sl@0
   144
			((aSample[i] >= 0xe0) && (aSample[i] <= 0xef)))
sl@0
   145
			{
sl@0
   146
			// check that the second byte is in range as well 
sl@0
   147
			TInt increment1 = i+1;
sl@0
   148
			if(increment1 >= sampleLength)
sl@0
   149
				break;
sl@0
   150
			if (((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) ||
sl@0
   151
				((aSample[increment1] >= 0x80) && (aSample[increment1] <= 0xfc)))
sl@0
   152
				{
sl@0
   153
				// increase the confidence of this sample as ShiftJis
sl@0
   154
				aConfidenceLevel=(aConfidenceLevel >0)?aConfidenceLevel+5:60;
sl@0
   155
	
sl@0
   156
				TUint charShiftJis=(aSample[i]<<8)|(aSample[increment1]);
sl@0
   157
				if ((charShiftJis>=0x829f)&&(charShiftJis<=0x82f1)||
sl@0
   158
					(charShiftJis>=0x8340)&&(charShiftJis<=0x8396))//those are kanas range
sl@0
   159
					occurrence++;
sl@0
   160
				numberOfShiftJis++;
sl@0
   161
				i++;
sl@0
   162
				}
sl@0
   163
			}
sl@0
   164
		// Check That no other Japanese escape sequence occur... if they do, cancel this and return 0
sl@0
   165
		// eg EUC-JP's SS(Single shift) characters followed by the
sl@0
   166
		if(aSample[i]==0x8e)
sl@0
   167
			{
sl@0
   168
			TInt increment1 = i+1;
sl@0
   169
			if(increment1 >= sampleLength)
sl@0
   170
				break;
sl@0
   171
			if ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xdf))
sl@0
   172
				{
sl@0
   173
				// This could be EUC-JP format..
sl@0
   174
				aConfidenceLevel=0;
sl@0
   175
				i++;
sl@0
   176
				}
sl@0
   177
			}
sl@0
   178
		if(aSample[i]==0x8f)
sl@0
   179
			{
sl@0
   180
			TInt increment1 = i+1;
sl@0
   181
			TInt increment2 = i+2;
sl@0
   182
			if((increment1 >= sampleLength) || (increment2 >= sampleLength))
sl@0
   183
				break;
sl@0
   184
			if (((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)) && 
sl@0
   185
				((aSample[increment2] >= 0xa1) && (aSample[increment2] <= 0xfe)))
sl@0
   186
				{
sl@0
   187
				// 	This is definitely EUC-JP format. 
sl@0
   188
				aConfidenceLevel=0;
sl@0
   189
				break;
sl@0
   190
				}
sl@0
   191
			}
sl@0
   192
		} // for 
sl@0
   193
sl@0
   194
	if(numberOfShiftJis)
sl@0
   195
		{
sl@0
   196
		aConfidenceLevel=(aConfidenceLevel >100)?100:((aConfidenceLevel <0)?0:aConfidenceLevel);
sl@0
   197
		aConfidenceLevel=aConfidenceLevel-Max(0,(30-occurrence*100/numberOfShiftJis));
sl@0
   198
		}
sl@0
   199
	aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
sl@0
   200
	return ETrue;
sl@0
   201
	}
sl@0
   202
sl@0
   203
sl@0
   204
CShiftJisConverterImpl* CShiftJisConverterImpl::NewL()
sl@0
   205
	{
sl@0
   206
	CShiftJisConverterImpl* self = new(ELeave) CShiftJisConverterImpl();
sl@0
   207
	return self;
sl@0
   208
	}
sl@0
   209
sl@0
   210
sl@0
   211
CShiftJisConverterImpl::~CShiftJisConverterImpl()
sl@0
   212
	{
sl@0
   213
	}
sl@0
   214
sl@0
   215
CShiftJisConverterImpl::CShiftJisConverterImpl()
sl@0
   216
	{
sl@0
   217
	}
sl@0
   218
sl@0
   219
const TImplementationProxy ImplementationTable[] = 
sl@0
   220
	{
sl@0
   221
#ifdef KDDIAU_TEST
sl@0
   222
		// for the test build use a special test UID which is called
sl@0
   223
		//explicitly from test code
sl@0
   224
		IMPLEMENTATION_PROXY_ENTRY(0x01000001,	CShiftJisConverterImpl::NewL)
sl@0
   225
#else
sl@0
   226
		IMPLEMENTATION_PROXY_ENTRY(0x10000FBD,	CShiftJisConverterImpl::NewL)
sl@0
   227
#endif
sl@0
   228
	};
sl@0
   229
sl@0
   230
sl@0
   231
EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
sl@0
   232
	{
sl@0
   233
	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
sl@0
   234
sl@0
   235
	return ImplementationTable;
sl@0
   236
	}