os/textandloc/charconvfw/charconvplugins/src/plugins/SHIFTJIS_2.CPP
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
/*
sl@0
     2
* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
* All rights reserved.
sl@0
     4
* This component and the accompanying materials are made available
sl@0
     5
* under the terms of "Eclipse Public License v1.0"
sl@0
     6
* which accompanies this distribution, and is available
sl@0
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
*
sl@0
     9
* Initial Contributors:
sl@0
    10
* Nokia Corporation - initial contribution.
sl@0
    11
*
sl@0
    12
* Contributors:
sl@0
    13
*
sl@0
    14
* Description:       
sl@0
    15
*
sl@0
    16
*/
sl@0
    17
sl@0
    18
sl@0
    19
#include <e32std.h>
sl@0
    20
#include <charconv.h>
sl@0
    21
#include "SHIFTJIS_2.H"
sl@0
    22
#include <ecom/implementationproxy.h>
sl@0
    23
#include "charactersetconverter.h"
sl@0
    24
#include "featmgr/featmgr.h"
sl@0
    25
sl@0
    26
/**
sl@0
    27
Shift-JIS character converter wrapper
sl@0
    28
sl@0
    29
@internalTechnology 
sl@0
    30
@released 9.1
sl@0
    31
*/
sl@0
    32
class CShiftJisConverterImpl : public CCharacterSetConverterPluginInterface
sl@0
    33
	{
sl@0
    34
sl@0
    35
public:
sl@0
    36
	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
sl@0
    37
sl@0
    38
	virtual TInt ConvertFromUnicode(
sl@0
    39
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
    40
		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
    41
		TDes8& aForeign, 
sl@0
    42
		const TDesC16& aUnicode, 
sl@0
    43
		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
sl@0
    44
sl@0
    45
	virtual TInt ConvertToUnicode(
sl@0
    46
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
    47
		TDes16& aUnicode, 
sl@0
    48
		const TDesC8& aForeign, 
sl@0
    49
		TInt& aState, 
sl@0
    50
		TInt& aNumberOfUnconvertibleCharacters, 
sl@0
    51
		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
sl@0
    52
sl@0
    53
	virtual TBool IsInThisCharacterSetL(
sl@0
    54
		TBool& aSetToTrue, 
sl@0
    55
		TInt& aConfidenceLevel, 
sl@0
    56
		const TDesC8& aSample);
sl@0
    57
sl@0
    58
	static CShiftJisConverterImpl* NewL();
sl@0
    59
	virtual ~CShiftJisConverterImpl();
sl@0
    60
sl@0
    61
private:
sl@0
    62
	CShiftJisConverterImpl();
sl@0
    63
	void ConstructL();
sl@0
    64
sl@0
    65
	};
sl@0
    66
sl@0
    67
/**
sl@0
    68
Get the the Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
sl@0
    69
sl@0
    70
@return The Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
sl@0
    71
@internalTechnology 
sl@0
    72
*/
sl@0
    73
const TDesC8& CShiftJisConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
sl@0
    74
	{
sl@0
    75
	return CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters();
sl@0
    76
	}
sl@0
    77
sl@0
    78
TInt CShiftJisConverterImpl::ConvertFromUnicode(
sl@0
    79
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
    80
		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
    81
		TDes8& aForeign, 
sl@0
    82
		const TDesC16& aUnicode, 
sl@0
    83
		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
sl@0
    84
	{
sl@0
    85
	return CnvShiftJis::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
sl@0
    86
	}
sl@0
    87
sl@0
    88
sl@0
    89
/**
sl@0
    90
 Converts Shift-JIS encoded input text to Unicode
sl@0
    91
 
sl@0
    92
 NOTE: For debugging the selected character set is returned in the state.
sl@0
    93
 
sl@0
    94
  @released  9.1
sl@0
    95
  @param     aDefaultEndiannessOfForeignCharacters The default endian-ness to use when reading characters
sl@0
    96
             in the foreign character set.
sl@0
    97
  @param     aUnicode On return, contains the text converted into Unicode.
sl@0
    98
  @param     aForeign The non-Unicode source text to be converted.
sl@0
    99
  @param     aState Used to save state information across multiple calls
sl@0
   100
             to <code>ConvertToUnicode()</code>.
sl@0
   101
  @param     aNumberOfUnconvertibleCharacters On return, contains the number of bytes which were not
sl@0
   102
             converted.
sl@0
   103
  @param     aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, contains the index of the first bytein the
sl@0
   104
             input text that could not be converted. A negative
sl@0
   105
             value indicates that all the characters were
sl@0
   106
             converted.
sl@0
   107
  @return 	 The number of unconverted bytes left at the end of the input descriptor 
sl@0
   108
 		     (e.g. because the output descriptor is not long enough to hold all the text), 
sl@0
   109
 		     or one of the error values defined in TError. 
sl@0
   110
  @internalTechnology 
sl@0
   111
*/
sl@0
   112
TInt CShiftJisConverterImpl::ConvertToUnicode(
sl@0
   113
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
   114
		TDes16& aUnicode, 
sl@0
   115
		const TDesC8& aForeign, 
sl@0
   116
		TInt& /*aState*/, 
sl@0
   117
		TInt& aNumberOfUnconvertibleCharacters, 
sl@0
   118
		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
sl@0
   119
	{
sl@0
   120
	return CnvShiftJis::ConvertToUnicode(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
sl@0
   121
	}
sl@0
   122
sl@0
   123
sl@0
   124
/**
sl@0
   125
 This API is used by CCnvCharacterSetConverter::AutoDetectCharacterSetL(). 
sl@0
   126
 This method returns a value between 0 and 100, indicating how likely it 
sl@0
   127
 is that this is the correct converter, for the text supplied.  
sl@0
   128
 @internalTechnology 
sl@0
   129
 */
sl@0
   130
TBool CShiftJisConverterImpl::IsInThisCharacterSetL(
sl@0
   131
		TBool& aSetToTrue, 
sl@0
   132
		TInt& aConfidenceLevel, 
sl@0
   133
		const TDesC8& aSample)
sl@0
   134
	{
sl@0
   135
	aSetToTrue=ETrue;
sl@0
   136
	TInt sampleLength = aSample.Length();
sl@0
   137
	aConfidenceLevel = 0;
sl@0
   138
	TInt numberOfShiftJis=0;
sl@0
   139
	TInt occurrence=0;
sl@0
   140
	for (TInt i = 0; i < sampleLength; ++i)
sl@0
   141
		{
sl@0
   142
		// Check for JISX 0208:1997 Charset
sl@0
   143
		// First Byte in range 0x81-0x9f, 0xe0-0xef
sl@0
   144
		if (((aSample[i] >= 0x81) && (aSample[i] <= 0x9f)) ||
sl@0
   145
			((aSample[i] >= 0xe0) && (aSample[i] <= 0xef)))
sl@0
   146
			{
sl@0
   147
			// check that the second byte is in range as well 
sl@0
   148
			TInt increment1 = i+1;
sl@0
   149
			if(increment1 >= sampleLength)
sl@0
   150
				break;
sl@0
   151
			if (((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) ||
sl@0
   152
				((aSample[increment1] >= 0x80) && (aSample[increment1] <= 0xfc)))
sl@0
   153
				{
sl@0
   154
				// increase the confidence of this sample as ShiftJis
sl@0
   155
				aConfidenceLevel=(aConfidenceLevel >0)?aConfidenceLevel+5:60;
sl@0
   156
	
sl@0
   157
				TUint charShiftJis=(aSample[i]<<8)|(aSample[increment1]);
sl@0
   158
				if ((charShiftJis>=0x829f)&&(charShiftJis<=0x82f1)||
sl@0
   159
					(charShiftJis>=0x8340)&&(charShiftJis<=0x8396))//those are kanas range
sl@0
   160
					occurrence++;
sl@0
   161
				numberOfShiftJis++;
sl@0
   162
				i++;
sl@0
   163
				}
sl@0
   164
			}
sl@0
   165
		// Check That no other Japanese escape sequence occur... if they do, cancel this and return 0
sl@0
   166
		// eg EUC-JP's SS(Single shift) characters followed by the
sl@0
   167
		if(aSample[i]==0x8e)
sl@0
   168
			{
sl@0
   169
			TInt increment1 = i+1;
sl@0
   170
			if(increment1 >= sampleLength)
sl@0
   171
				break;
sl@0
   172
			if ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xdf))
sl@0
   173
				{
sl@0
   174
				// This could be EUC-JP format..
sl@0
   175
				aConfidenceLevel=0;
sl@0
   176
				i++;
sl@0
   177
				}
sl@0
   178
			}
sl@0
   179
		if(aSample[i]==0x8f)
sl@0
   180
			{
sl@0
   181
			TInt increment1 = i+1;
sl@0
   182
			TInt increment2 = i+2;
sl@0
   183
			if((increment1 >= sampleLength) || (increment2 >= sampleLength))
sl@0
   184
				break;
sl@0
   185
			if (((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)) && 
sl@0
   186
				((aSample[increment2] >= 0xa1) && (aSample[increment2] <= 0xfe)))
sl@0
   187
				{
sl@0
   188
				// 	This is definitely EUC-JP format. 
sl@0
   189
				aConfidenceLevel=0;
sl@0
   190
				break;
sl@0
   191
				}
sl@0
   192
			}
sl@0
   193
        // Check the half width Katakana
sl@0
   194
        if (aSample[i]>=0xa1 && aSample[i]<=0xdf)
sl@0
   195
            {
sl@0
   196
            // increase the confidence of this sample as ShiftJis
sl@0
   197
            aConfidenceLevel=(aConfidenceLevel > 0) ? aConfidenceLevel+5 : 75;
sl@0
   198
            occurrence++;
sl@0
   199
            numberOfShiftJis++;
sl@0
   200
            }
sl@0
   201
        else if (aSample[i]>=0xf0)
sl@0
   202
            {
sl@0
   203
            aConfidenceLevel=0;
sl@0
   204
            }
sl@0
   205
		} // for 
sl@0
   206
sl@0
   207
	if(numberOfShiftJis)
sl@0
   208
		{
sl@0
   209
		aConfidenceLevel=(aConfidenceLevel >100)?100:((aConfidenceLevel <0)?0:aConfidenceLevel);
sl@0
   210
		aConfidenceLevel=aConfidenceLevel-Max(0,(30-occurrence*100/numberOfShiftJis));
sl@0
   211
		}
sl@0
   212
	aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
sl@0
   213
	return ETrue;
sl@0
   214
	}
sl@0
   215
sl@0
   216
sl@0
   217
CShiftJisConverterImpl* CShiftJisConverterImpl::NewL()
sl@0
   218
	{
sl@0
   219
	CShiftJisConverterImpl* self = new(ELeave) CShiftJisConverterImpl();
sl@0
   220
	CleanupStack::PushL(self);
sl@0
   221
	self->ConstructL();
sl@0
   222
	CleanupStack::Pop(self);
sl@0
   223
	return self;
sl@0
   224
	}
sl@0
   225
sl@0
   226
sl@0
   227
CShiftJisConverterImpl::~CShiftJisConverterImpl()
sl@0
   228
	{
sl@0
   229
    FeatureManager::UnInitializeLib();	
sl@0
   230
	}
sl@0
   231
sl@0
   232
CShiftJisConverterImpl::CShiftJisConverterImpl()
sl@0
   233
	{
sl@0
   234
	}
sl@0
   235
sl@0
   236
sl@0
   237
void CShiftJisConverterImpl::ConstructL()
sl@0
   238
	{
sl@0
   239
    FeatureManager::InitializeLibL();	
sl@0
   240
	}
sl@0
   241
sl@0
   242
const TImplementationProxy ImplementationTable[] = 
sl@0
   243
	{
sl@0
   244
		IMPLEMENTATION_PROXY_ENTRY(0x10000FBD,	CShiftJisConverterImpl::NewL)
sl@0
   245
	};
sl@0
   246
sl@0
   247
sl@0
   248
EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
sl@0
   249
	{
sl@0
   250
	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
sl@0
   251
sl@0
   252
	return ImplementationTable;
sl@0
   253
	}