os/textandloc/charconvfw/charconvplugins/src/shared/shiftjis_shared.cpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200 (2014-06-10)
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
* Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
* All rights reserved.
sl@0
     4
* This component and the accompanying materials are made available
sl@0
     5
* under the terms of "Eclipse Public License v1.0"
sl@0
     6
* which accompanies this distribution, and is available
sl@0
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
*
sl@0
     9
* Initial Contributors:
sl@0
    10
* Nokia Corporation - initial contribution.
sl@0
    11
*
sl@0
    12
* Contributors:
sl@0
    13
*
sl@0
    14
* Description: 
sl@0
    15
*
sl@0
    16
*/
sl@0
    17
sl@0
    18
sl@0
    19
#include <e32std.h>
sl@0
    20
#include <charconv.h>
sl@0
    21
#include <convutils.h>
sl@0
    22
#include "jisx0201.h"
sl@0
    23
#include "jisx0208.h"
sl@0
    24
#include "shiftjis.h"
sl@0
    25
#include <convdata.h>
sl@0
    26
#include "charconv_tls.h"
sl@0
    27
#include "charconv_table_utilities.h"
sl@0
    28
sl@0
    29
// Lead byte ranges for 2-Byte Shift-JIS sequences
sl@0
    30
const TUint KSingleByteRangeFirstBlockEnd=0x80;
sl@0
    31
const TUint KSingleByteRangeSecondBlockStart=0xa0;
sl@0
    32
const TUint KSingleByteRangeSecondBlockEnd=0xdf;
sl@0
    33
const TUint KLeadByteMax = 0xfc;
sl@0
    34
sl@0
    35
//Trail byte ranges for 2-Byte Shift-JIS sequences
sl@0
    36
const TUint KTrailByteMin = 0x40 ;
sl@0
    37
const TUint KTrailByteMax = 0xFC ;
sl@0
    38
const TUint KTrailByteIllegal = 0x7F ;
sl@0
    39
sl@0
    40
const TUint8 KSJISLineFeed = 0x0a ;	// Shift-Jis value for Line Feed
sl@0
    41
const TUint8 KSJISCarriageReturn = 0x0d;	// Shift-Jis value for Carriage Return
sl@0
    42
const TUint16 KUnicodeLineFeed = 0x2028; // Unicode Line Feed
sl@0
    43
const TUint16 KUnicodeParagraphSeperator = 0x2029; // Unicode Paragraph seperator
sl@0
    44
const TUint16 KUnicodeCarriageReturn = KSJISCarriageReturn ;
sl@0
    45
sl@0
    46
sl@0
    47
/** 
sl@0
    48
 * Get the Shift-JIS replacement for Unicode characters which cannot be converted
sl@0
    49
 * 
sl@0
    50
 * @return const TDesC8& 8-bit descriptor containing the Shift-JIS data which will replace any untranslatable Unicode characters.
sl@0
    51
 * @since Internationalization_6.2
sl@0
    52
 * @internalTechnology
sl@0
    53
 */
sl@0
    54
EXPORT_C const TDesC8& CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters()
sl@0
    55
	{
sl@0
    56
	return ReplacementForUnconvertibleUnicodeCharacters_internal();
sl@0
    57
	}
sl@0
    58
sl@0
    59
sl@0
    60
/**
sl@0
    61
 * Converts text from Unicode to Shift-JIS 
sl@0
    62
 * 
sl@0
    63
 * @since Internationalization_6.2
sl@0
    64
 * @internalTechnology 
sl@0
    65
 */
sl@0
    66
EXPORT_C TInt CnvShiftJis::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
sl@0
    67
                                              const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign,
sl@0
    68
                                              const TDesC16& aUnicode,
sl@0
    69
                                              CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
sl@0
    70
	{
sl@0
    71
	return DoConvertFromUnicode(aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
    72
	                            aForeign, aUnicode, aIndicesOfUnconvertibleCharacters) ;
sl@0
    73
	}
sl@0
    74
sl@0
    75
/**
sl@0
    76
 * Converts text from Unicode to Shift-JIS 
sl@0
    77
 * Note that CnvShiftJis does NOT support extensions through use of additional character sets,
sl@0
    78
 * instead multiple versions are built for the different (operator defined) implementations
sl@0
    79
 * of Shift-JIS which need to be supported and the correct one installed at ROM build time,
sl@0
    80
 * and this method is only supplied for purposes of interface compatibility. 
sl@0
    81
 * @since Internationalization_6.2
sl@0
    82
 * @internalTechnology
sl@0
    83
 */
sl@0
    84
EXPORT_C TInt CnvShiftJis::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */, 
sl@0
    85
                                              const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
    86
                                              TDes8& aForeign, const TDesC16& aUnicode, 
sl@0
    87
                                              CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, 
sl@0
    88
                                              const TArray<CnvUtilities::SCharacterSet>& /* aArrayOfAdditionalCharacterSets */)
sl@0
    89
	{
sl@0
    90
	return DoConvertFromUnicode(aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
    91
	                            aForeign, aUnicode, aIndicesOfUnconvertibleCharacters) ;
sl@0
    92
	}
sl@0
    93
sl@0
    94
EXPORT_C TInt CnvShiftJis::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
sl@0
    95
                                            TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters,
sl@0
    96
                                            TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
sl@0
    97
	{
sl@0
    98
	return DoConvertToUnicode(aUnicode, aForeign, aNumberOfUnconvertibleCharacters, 
sl@0
    99
                              aIndexOfFirstByteOfFirstUnconvertibleCharacter) ;
sl@0
   100
	}
sl@0
   101
sl@0
   102
sl@0
   103
/**
sl@0
   104
 * Converts text from Shift-JIS to Unicode 
sl@0
   105
 * 
sl@0
   106
 * @since Internationalization_6.2
sl@0
   107
 * @internalTechnology
sl@0
   108
 */
sl@0
   109
EXPORT_C TInt CnvShiftJis::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */, 
sl@0
   110
                                            TDes16& aUnicode, const TDesC8& aForeign, 
sl@0
   111
                                            TInt& aNumberOfUnconvertibleCharacters, 
sl@0
   112
                                            TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
sl@0
   113
                                            const TArray<CnvUtilities::SMethod>& /* aArrayOfAdditionalMethods */)
sl@0
   114
	{
sl@0
   115
	return DoConvertToUnicode(aUnicode, aForeign, aNumberOfUnconvertibleCharacters, 
sl@0
   116
                              aIndexOfFirstByteOfFirstUnconvertibleCharacter) ;
sl@0
   117
sl@0
   118
	}
sl@0
   119
	
sl@0
   120
	
sl@0
   121
sl@0
   122
/**
sl@0
   123
This function actually does the work of converting Shift-JIS input to unicode output.
sl@0
   124
sl@0
   125
@param TDes16& aUnicode The output buffer
sl@0
   126
@param const TDesC8& aForeign The input buffer
sl@0
   127
@param TInt& aNumberOfUnconvertibleCharacters Number of input characters which were processed but couldn't be converted. 
sl@0
   128
@param TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter Locates first untranslatable character in input buffer
sl@0
   129
@return CCnvCharacterSetConverter::EErrorIllFormedInput if the input was invalid, otherwise the number of bytes in the input buffer which weren't be processed (e.g. due to output buffer overflow). 
sl@0
   130
@internalTechnology
sl@0
   131
*/
sl@0
   132
	
sl@0
   133
TInt CnvShiftJis::DoConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, 
sl@0
   134
                                     TInt& aNumberOfUnconvertibleCharacters, 
sl@0
   135
                                     TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
sl@0
   136
	{
sl@0
   137
	TUint foreignBytePointer = 0 ;
sl@0
   138
	TUint row ;
sl@0
   139
	TUint column ;
sl@0
   140
	TUint8 foreignCharByte ;
sl@0
   141
	TUint8 foreignCharStart ;
sl@0
   142
	TChar unicodeChar ;
sl@0
   143
	TBool finished = EFalse ;
sl@0
   144
	TInt charsConverted = 0 ;
sl@0
   145
	TUint16 unicodeUnmappedCharacter = getUnicodeUnmappedCharacter() ;
sl@0
   146
	TInt unicodeBufferLength = aUnicode.MaxLength() ;
sl@0
   147
	TInt foreignDataLength = aForeign.Length() ;
sl@0
   148
		
sl@0
   149
	aIndexOfFirstByteOfFirstUnconvertibleCharacter = -1 ;
sl@0
   150
	aNumberOfUnconvertibleCharacters = 0 ;
sl@0
   151
sl@0
   152
	// Check for valid input and output buffers
sl@0
   153
	if (( unicodeBufferLength == 0) || foreignDataLength == 0)
sl@0
   154
		{
sl@0
   155
		finished = ETrue ;		
sl@0
   156
		}
sl@0
   157
		
sl@0
   158
	// Reset output buffer	
sl@0
   159
	aUnicode.Zero() ;
sl@0
   160
sl@0
   161
	// Perform conversion		
sl@0
   162
	while (!finished)
sl@0
   163
		{
sl@0
   164
		foreignCharStart = foreignBytePointer ;
sl@0
   165
		foreignCharByte = aForeign[foreignBytePointer++] ;
sl@0
   166
sl@0
   167
		// Look for (and handle) CR/LF pairs in ShiftJis input stream.
sl@0
   168
		// It is a specific requirement from Symbian KK that CR/LF pairs
sl@0
   169
		// in the input stream be converted to Unicode LF characters 
sl@0
   170
		if((KSJISCarriageReturn == foreignCharByte) && (foreignBytePointer < foreignDataLength))
sl@0
   171
			{
sl@0
   172
			// check next byte
sl@0
   173
			if(KSJISLineFeed == aForeign[foreignBytePointer]) 
sl@0
   174
				{
sl@0
   175
				// CR code
sl@0
   176
				unicodeChar = KUnicodeLineFeed ;
sl@0
   177
				foreignBytePointer++ ;
sl@0
   178
				}
sl@0
   179
			else
sl@0
   180
				{					
sl@0
   181
				unicodeChar = KUnicodeCarriageReturn ;
sl@0
   182
				}
sl@0
   183
			}
sl@0
   184
		else
sl@0
   185
			{
sl@0
   186
			if (((foreignCharByte > KSingleByteRangeFirstBlockEnd) && 
sl@0
   187
			     (foreignCharByte < KSingleByteRangeSecondBlockStart)) ||
sl@0
   188
			    ((foreignCharByte > KSingleByteRangeSecondBlockEnd) &&
sl@0
   189
			     (foreignCharByte <= KLeadByteMax)))
sl@0
   190
				{
sl@0
   191
				if (foreignBytePointer < foreignDataLength)
sl@0
   192
					{
sl@0
   193
					// Potential 2 byte shiftJis character
sl@0
   194
					row = foreignCharByte ;
sl@0
   195
					column = aForeign[foreignBytePointer] ;
sl@0
   196
					if (((column <= KTrailByteMax) && (column >= KTrailByteMin)) && column != KTrailByteIllegal)
sl@0
   197
						{
sl@0
   198
						foreignBytePointer++ ;	
sl@0
   199
						unicodeChar = lookupUnicodeChar(row,column) ;	
sl@0
   200
						}
sl@0
   201
					else
sl@0
   202
						{
sl@0
   203
						unicodeChar = unicodeUnmappedCharacter ;
sl@0
   204
						}
sl@0
   205
					}
sl@0
   206
				else
sl@0
   207
					{
sl@0
   208
					// Only got the first byte of a 2 byte character
sl@0
   209
					// reset "read" pointer to beginning of character
sl@0
   210
					// and bail out!
sl@0
   211
					finished = ETrue ;
sl@0
   212
					foreignBytePointer-- ;
sl@0
   213
					continue ;
sl@0
   214
					}
sl@0
   215
				}
sl@0
   216
			else
sl@0
   217
				{
sl@0
   218
				//Probably a single byte shiftJis chracter
sl@0
   219
				row = 0 ;
sl@0
   220
				column = foreignCharByte ;
sl@0
   221
				unicodeChar = lookupUnicodeChar(row,column) ;
sl@0
   222
				}
sl@0
   223
			}
sl@0
   224
			
sl@0
   225
sl@0
   226
		// Check for unconvertible characters.
sl@0
   227
		if (unicodeChar == unicodeUnmappedCharacter)
sl@0
   228
			{
sl@0
   229
			if (aIndexOfFirstByteOfFirstUnconvertibleCharacter == -1)
sl@0
   230
				{
sl@0
   231
				aIndexOfFirstByteOfFirstUnconvertibleCharacter = foreignCharStart ;
sl@0
   232
				}
sl@0
   233
			aNumberOfUnconvertibleCharacters++ ;
sl@0
   234
			}
sl@0
   235
			
sl@0
   236
		// Append the converted (or not!) character to the output buffer
sl@0
   237
		aUnicode.Append(unicodeChar);
sl@0
   238
		charsConverted++ ;
sl@0
   239
		
sl@0
   240
		// Check for end of input buffer or output buffer full
sl@0
   241
		if ((charsConverted >= unicodeBufferLength) || (foreignBytePointer >= foreignDataLength))
sl@0
   242
			finished = ETrue ;
sl@0
   243
		}
sl@0
   244
	
sl@0
   245
	// Evaluate success of the operation and either return error code (currently just 
sl@0
   246
	// invalid input) or return number of un-processed characters in input buffer in
sl@0
   247
	// case of output buffer being filled before input fully consumed (0 means all
sl@0
   248
	// characters consumed)
sl@0
   249
	TInt returnValue ;
sl@0
   250
	if (foreignDataLength && !charsConverted)
sl@0
   251
		{
sl@0
   252
		// Input must contain at least one complete character to be considered valid Shift-JIS.
sl@0
   253
		returnValue = CCnvCharacterSetConverter::EErrorIllFormedInput ;
sl@0
   254
		}
sl@0
   255
	else
sl@0
   256
		{
sl@0
   257
		returnValue = foreignDataLength - foreignBytePointer;
sl@0
   258
		}
sl@0
   259
	return returnValue ;
sl@0
   260
	}
sl@0
   261
	
sl@0
   262
sl@0
   263
sl@0
   264
	
sl@0
   265
/**
sl@0
   266
This function actually does the work of converting converting unicode input to Shift-JIS  output.
sl@0
   267
sl@0
   268
@param const TDesC8& aReplacementForUnconvertibleUnicodeCharacters byte sequence to be be used as output for unicode characters which have no mapping defined.
sl@0
   269
@param TDes16& aUnicode The input buffer
sl@0
   270
@param const TDesC8& aForeign The output buffer
sl@0
   271
@return The number of unicode characters in the input buffer which weren't be processed (e.g. due to output buffer overflow). 
sl@0
   272
@internalTechnology
sl@0
   273
*/
sl@0
   274
TInt CnvShiftJis::DoConvertFromUnicode(const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
   275
	                                   TDes8& aForeign, const TDesC16& aUnicode, 
sl@0
   276
	                                   CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
sl@0
   277
	{
sl@0
   278
	TUint unicodePointer = 0 ;
sl@0
   279
	TUint row ;
sl@0
   280
	TUint column ;
sl@0
   281
	TUint8 foreignCharByte ;
sl@0
   282
	TUint16 foreignChar ;
sl@0
   283
	TUint16 unicodeChar ;
sl@0
   284
	TBool finished = EFalse ;
sl@0
   285
	TInt charsConverted = 0 ;
sl@0
   286
	TInt unicodeLength = aUnicode.Length() ;
sl@0
   287
	TInt foreignMaxLength = aForeign.MaxLength() ;
sl@0
   288
	TUint16 foreignUnmappedCharacter = getForeignUnmappedCharacter() ;
sl@0
   289
	CCnvCharacterSetConverter::TDowngradeForExoticLineTerminatingCharacters downgradeForExoticLineTerminatingCharacters = CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed;
sl@0
   290
	TBool downgradeExoticLineTerminatingCharacters	= EFalse ;
sl@0
   291
	
sl@0
   292
	
sl@0
   293
	// Check for valid input and output buffers
sl@0
   294
	if ((unicodeLength == 0) ||  foreignMaxLength == 0)
sl@0
   295
		{
sl@0
   296
		finished = ETrue ;		
sl@0
   297
		}
sl@0
   298
	
sl@0
   299
	// If we've been called from an instance of CCnvCharacterSetConverter we can retrieve its state
sl@0
   300
	// from Thread Local Storage. This really isn't very nice but there's no other way we can get
sl@0
   301
	// hold of settings like downgrade for unicode line termination characters without breaking
sl@0
   302
	// compatibility with the existing plug-in interface!
sl@0
   303
	CCnvCharacterSetConverter* currentCharacterSetConverter = (CCnvCharacterSetConverter*)TTlsData::CurrentCharacterSetConverter();
sl@0
   304
	if (currentCharacterSetConverter)
sl@0
   305
		{
sl@0
   306
		downgradeForExoticLineTerminatingCharacters = currentCharacterSetConverter->GetDowngradeForExoticLineTerminatingCharacters() ;
sl@0
   307
		downgradeExoticLineTerminatingCharacters = ETrue ;
sl@0
   308
		}
sl@0
   309
		
sl@0
   310
	// Reset output buffer
sl@0
   311
	aForeign.Zero();
sl@0
   312
		
sl@0
   313
	// Process input buffer
sl@0
   314
	while (!finished)
sl@0
   315
		{
sl@0
   316
		
sl@0
   317
		// Look up foreign Char
sl@0
   318
		unicodeChar = aUnicode[unicodePointer] ;
sl@0
   319
		
sl@0
   320
		// Check for any downgrade of Unicode line endings characters required if we've got
sl@0
   321
		// a Unicode Line-Feed or Paragraph-Seperator character to deal with. 
sl@0
   322
		if (downgradeExoticLineTerminatingCharacters && 
sl@0
   323
		    ((unicodeChar==KUnicodeLineFeed) || (unicodeChar==KUnicodeParagraphSeperator)))
sl@0
   324
			{
sl@0
   325
			if (downgradeForExoticLineTerminatingCharacters == CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed)
sl@0
   326
				{
sl@0
   327
				if (aForeign.Length() < (foreignMaxLength - 1))
sl@0
   328
					{
sl@0
   329
					aForeign.Append(KSJISCarriageReturn) ;
sl@0
   330
					aForeign.Append(KSJISLineFeed) ;
sl@0
   331
					charsConverted++ ;
sl@0
   332
					}
sl@0
   333
					else
sl@0
   334
					{
sl@0
   335
					// Foreign buffer full!
sl@0
   336
					finished = ETrue;
sl@0
   337
					}
sl@0
   338
				}
sl@0
   339
			else if (downgradeForExoticLineTerminatingCharacters == CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToJustLineFeed)
sl@0
   340
				{
sl@0
   341
				if (aForeign.Length() < foreignMaxLength)
sl@0
   342
					{
sl@0
   343
					aForeign.Append(KSJISLineFeed) ;
sl@0
   344
					charsConverted++ ;
sl@0
   345
					}
sl@0
   346
					else
sl@0
   347
					{
sl@0
   348
					// Foreign buffer full!
sl@0
   349
					finished = ETrue;
sl@0
   350
					}
sl@0
   351
				}		
sl@0
   352
			}
sl@0
   353
		else
sl@0
   354
			{	
sl@0
   355
			row = unicodeChar / 256 ;
sl@0
   356
			column = unicodeChar % 256 ;		
sl@0
   357
			foreignChar = lookupForeignChar(row, column) ;		
sl@0
   358
sl@0
   359
			// Check for untranslatable character 
sl@0
   360
			if ((foreignChar == foreignUnmappedCharacter) &&
sl@0
   361
			    (aForeign.Length() < (foreignMaxLength - 1)))
sl@0
   362
				{
sl@0
   363
				aIndicesOfUnconvertibleCharacters.AppendIndex(unicodePointer) ;
sl@0
   364
				aForeign.Append(aReplacementForUnconvertibleUnicodeCharacters) ;
sl@0
   365
				}	
sl@0
   366
			else if ((foreignChar <= 0xFF) && (aForeign.Length() < foreignMaxLength))
sl@0
   367
				{
sl@0
   368
				// Single byte character
sl@0
   369
				foreignCharByte = (TUint8) foreignChar ;
sl@0
   370
				aForeign.Append(foreignCharByte) ;
sl@0
   371
				charsConverted++ ;
sl@0
   372
				}
sl@0
   373
			else if (aForeign.Length() < (foreignMaxLength - 1))
sl@0
   374
				{
sl@0
   375
				// Two byte character
sl@0
   376
				foreignCharByte = (TUint8) (foreignChar >> 8 ) ;
sl@0
   377
				aForeign.Append(foreignCharByte) ;
sl@0
   378
				foreignCharByte = (TUint8) (foreignChar & 0xFF) ;
sl@0
   379
				aForeign.Append(foreignCharByte) ;
sl@0
   380
				charsConverted++ ;
sl@0
   381
				}
sl@0
   382
			else
sl@0
   383
				{
sl@0
   384
				// Foreign buffer full!
sl@0
   385
				finished = ETrue;
sl@0
   386
				}
sl@0
   387
			}
sl@0
   388
		
sl@0
   389
		// Check for terminating condition (input buffer consumed or output buffer full)	
sl@0
   390
		if (!finished && (++unicodePointer >= unicodeLength))
sl@0
   391
			{
sl@0
   392
			finished = ETrue ;
sl@0
   393
			}
sl@0
   394
		}
sl@0
   395
	
sl@0
   396
	// Return number of input characters *not* processsed (will be zero unless output
sl@0
   397
	// buffer has been filled before all input consumed)	
sl@0
   398
	return unicodeLength - unicodePointer;
sl@0
   399
	}
sl@0
   400