sl@0: /* sl@0: * Copyright (c) 2002 Nokia Corporation and/or its subsidiary(-ies). sl@0: * All rights reserved. sl@0: * This component and the accompanying materials are made available sl@0: * under the terms of "Eclipse Public License v1.0" sl@0: * which accompanies this distribution, and is available sl@0: * at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: * sl@0: * Initial Contributors: sl@0: * Nokia Corporation - initial contribution. sl@0: * sl@0: * Contributors: sl@0: * sl@0: * Description: Implements the characterconversion plug-in sl@0: * for ISCII characterset. sl@0: * sl@0: */ sl@0: sl@0: sl@0: sl@0: sl@0: sl@0: #include sl@0: #include sl@0: #include sl@0: #include sl@0: sl@0: //The maximum length of any intermediate buffer allocated for conversion. sl@0: const TInt KMaximumLengthOfIntermediateBuffer=5; sl@0: //The ISCII ATR code point, used for ISCII script switching mechanism. sl@0: const TUint KControlCharacterEscape=0xef; sl@0: //The number of Indic scripts supported by the plug-in. sl@0: //ISCII in general addresses all the Indic character sets. sl@0: const TUint KNumberOfIndicCharactersetsSupported = 1; sl@0: //The common reason for panic for all panics raised by the iscii plug-in sl@0: _LIT16(KPanicReason,"ISCII Plug-in Panic"); sl@0: //The escape sequence for ISCII (ATR) is 0xEF and immidiate byte following sl@0: //that is the script selection code for Devanagari. sl@0: _LIT8(KEscapeSequenceDevanagari,"\xef\x42"); sl@0: //The sequence for Explicit Halant, in unicode it gets converted to VIRAMA+ZWNJ sl@0: _LIT8(KExplicitHalant,"\xe8\xe8"); sl@0: //For supportiing ISCII to Unicode conversion of multi byte ISCII sequences, the sl@0: //sequence is converted to intermediate unused iscii code point. sl@0: _LIT8(KReplacementForExplicitHalant,"\xe8\xfc"); sl@0: //The sequence for Soft Halant, in unicode it gets converted to VIRAMA+ZWJ sl@0: _LIT8(KSoftHalant,"\xe8\xe9"); sl@0: //For supportiing ISCII to Unicode conversion of multi byte ISCII sequences, the sl@0: //sequence is converted to intermediate unused iscii code point. sl@0: _LIT8(KReplacementForSoftHalant,"\xe8\xfd"); sl@0: //Devanagari character Om sl@0: _LIT8(KOm,"\xa1\xe9"); sl@0: ////For supportiing ISCII to Unicode conversion of multi byte ISCII sequences, the sl@0: //sequence is converted to intermediate unused iscii code point. sl@0: _LIT8(KReplacementForOm,"\xfe"); sl@0: //Devanagari character Avagraha sl@0: _LIT8(KAvagraha,"\xea\xe9"); sl@0: //For supportiing ISCII to Unicode conversion of multi byte ISCII sequences, the sl@0: //sequence is converted to intermediate unused iscii code point. sl@0: _LIT8(KReplacementForAvagraha,"\xff"); sl@0: sl@0: //Devanagari character VOCALIC RR sl@0: _LIT8(KVocalicRr,"\xaa\xe9"); sl@0: //For supportiing ISCII to Unicode conversion of multi byte ISCII sequences, the sl@0: //sequence is converted to intermediate unused iscii code point. sl@0: _LIT8(KReplacementForVocalicRr,"\x80"); sl@0: //Devanagari character VOCALIC LL sl@0: _LIT8(KVocalicLl,"\xa7\xe9"); sl@0: //For supportiing ISCII to Unicode conversion of multi byte ISCII sequences, the sl@0: //sequence is converted to intermediate unused iscii code point. sl@0: _LIT8(KReplacementForVocalicLl,"\x81"); sl@0: //Devanagari character VOCALIC L SIGN sl@0: _LIT8(KVocalicLSign,"\xdb\xe9"); sl@0: //For supportiing ISCII to Unicode conversion of multi byte ISCII sequences, the sl@0: //sequence is converted to intermediate unused iscii code point. sl@0: _LIT8(KReplacementForVocalicLSign,"\x82"); sl@0: //Devanagari character VOCALIC LL SIGN sl@0: _LIT8(KVocalicLlSign,"\xdc\xe9"); sl@0: //For supportiing ISCII to Unicode conversion of multi byte ISCII sequences, the sl@0: //sequence is converted to intermediate unused iscii code point. sl@0: _LIT8(KReplacementForVocalicLlSign,"\x83"); sl@0: //Devanagari character VOCALIC L sl@0: _LIT8(KVocalicL,"\xa6\xe9"); sl@0: //For supportiing ISCII to Unicode conversion of multi byte ISCII sequences, the sl@0: //sequence is converted to intermediate unused iscii code point. sl@0: _LIT8(KReplacementForVocalicL,"\x84"); sl@0: //Devanagari character VOCALIC RR SIGN sl@0: _LIT8(KVocalicRrSign,"\xdf\xe9"); sl@0: //For supportiing ISCII to Unicode conversion of multi byte ISCII sequences, the sl@0: //sequence is converted to intermediate unused iscii code point. sl@0: _LIT8(KReplacementForVocalicRrSign,"\x85"); sl@0: sl@0: //Unconvertible ISCII character sl@0: _LIT8(KIsciiUnconvertibleCharacter,"\xeb"); sl@0: sl@0: enum TPanic sl@0: { sl@0: //The panic raised by ConvertToUnicodeFromModalForeign_Internal() if the input sl@0: //conversion flag is CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter sl@0: EPanicBadInputConversionFlags=1, sl@0: //Panic raised if the buffer does not start with the escape sequence 0xEF sl@0: EPanicBadRemainderOfForeign, sl@0: //Panic is raised if the length of the search buffer is greater than the length of the sl@0: //replacement buffer sl@0: EPanicBadReplacementBuffer, sl@0: //If the offset of start of the escape sequence is not an unsigned number. sl@0: EPanicBadStartOfNextEscapeSequence sl@0: }; sl@0: sl@0: //The dummy datastructure for the dummy conversion data i.e. used for conversion if the sl@0: //script selection code is not supported. sl@0: #define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0])) sl@0: sl@0: LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_foreignToUnicode_1[]= sl@0: { sl@0: { sl@0: 0xa0, sl@0: 0xfffd sl@0: } sl@0: }; sl@0: sl@0: LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_unicodeToForeign_1[]= sl@0: { sl@0: { sl@0: 0xfffd, sl@0: 0xa0 sl@0: } sl@0: }; sl@0: sl@0: LOCAL_D const SCnvConversionData::SVariableByteData::SRange foreignVariableByteDataRanges[]= sl@0: { sl@0: { sl@0: 0x00, sl@0: 0xff, sl@0: 0, sl@0: 0 sl@0: } sl@0: }; sl@0: sl@0: LOCAL_D const SCnvConversionData::SOneDirectionData::SRange foreignToUnicodeDataRanges[]= sl@0: { sl@0: { sl@0: 0x00, sl@0: 0x7f, sl@0: SCnvConversionData::SOneDirectionData::SRange::EDirect, sl@0: 0, sl@0: 0, sl@0: { sl@0: 0 sl@0: } sl@0: }, sl@0: { sl@0: 0xa0, sl@0: 0xff, sl@0: SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616, sl@0: 0, sl@0: 0, sl@0: { sl@0: UData_SKeyedTable1616(keyedTable1616_foreignToUnicode_1) sl@0: } sl@0: } sl@0: }; sl@0: sl@0: LOCAL_D const SCnvConversionData::SOneDirectionData::SRange unicodeToForeignDataRanges[]= sl@0: { sl@0: { sl@0: 0x0000, sl@0: 0x007f, sl@0: SCnvConversionData::SOneDirectionData::SRange::EDirect, sl@0: 1, sl@0: 0, sl@0: { sl@0: 0 sl@0: } sl@0: }, sl@0: { sl@0: 0x00a0, sl@0: 0xffff, sl@0: SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616, sl@0: 1, sl@0: 0, sl@0: { sl@0: UData_SKeyedTable1616(keyedTable1616_unicodeToForeign_1) sl@0: } sl@0: } sl@0: }; sl@0: sl@0: //The dummy conversion data to be used for conversion if the iscii code sequence is not sl@0: //Devanagari (i.e. the script selection code is not 0x42 and something else. sl@0: //In this case the ISCII characters are converted to unconvertible characters. sl@0: sl@0: LOCAL_D const SCnvConversionData conversionDataDummy= sl@0: { sl@0: SCnvConversionData::EFixedBigEndian, sl@0: { sl@0: ARRAY_LENGTH(foreignVariableByteDataRanges), sl@0: foreignVariableByteDataRanges sl@0: }, sl@0: { sl@0: ARRAY_LENGTH(foreignToUnicodeDataRanges), sl@0: foreignToUnicodeDataRanges sl@0: }, sl@0: { sl@0: ARRAY_LENGTH(unicodeToForeignDataRanges), sl@0: unicodeToForeignDataRanges sl@0: }, sl@0: NULL, sl@0: NULL sl@0: }; sl@0: sl@0: sl@0: sl@0: #ifdef EKA2 sl@0: sl@0: /////////////////////////////////////////////////////////////// sl@0: // 3.1 Code sl@0: sl@0: // INCLUDES sl@0: #include sl@0: #include sl@0: sl@0: sl@0: /** sl@0: * The character conversion plug-in implementation for Iscii. sl@0: * sl@0: * @lib ecom.lib sl@0: * @since Series 60 3.1 sl@0: */ sl@0: sl@0: class CIsciiImplementation : public CCharacterSetConverterPluginInterface sl@0: { sl@0: public: sl@0: //From CCharacterSetConverterPluginInterface sl@0: virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters(); sl@0: sl@0: //From CCharacterSetConverterPluginInterface sl@0: virtual TInt ConvertFromUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, sl@0: TDes8& aForeign, sl@0: const TDesC16& aUnicode, sl@0: CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters ); sl@0: sl@0: //From CCharacterSetConverterPluginInterface sl@0: virtual TInt ConvertToUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aForeign, sl@0: TInt&, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter ); sl@0: sl@0: //From CCharacterSetConverterPluginInterface sl@0: virtual TBool IsInThisCharacterSetL( sl@0: TBool& aSetToTrue, sl@0: TInt& aConfidenceLevel, sl@0: const TDesC8& ); sl@0: sl@0: static CIsciiImplementation* NewL(); sl@0: sl@0: virtual ~CIsciiImplementation(); sl@0: private: sl@0: CIsciiImplementation(); sl@0: }; sl@0: sl@0: //Checks if a descriptor starts with another descriptor at the begining. sl@0: LOCAL_C TBool IsStartOf(const TDesC8& aEscapeSequence, const TDesC8& aBuffer) sl@0: { sl@0: const TInt lengthOfStart=aEscapeSequence.Length(); sl@0: return (aBuffer.Length()>=lengthOfStart) && (aBuffer.Left(lengthOfStart)==aEscapeSequence); sl@0: } sl@0: // ----------------------------------------------------------------------------- sl@0: // MatchesEscapeSequence() sl@0: //If the remainder of the foreign text starts with the passed escapesequence, modifies the remainder of the foreign text sl@0: //and sets the homogeneous run buffer that uses the same conversion data. sl@0: //The homogeneous run buffer is the buffer that will use a single conversion data, it doesn't contain the attribute code sl@0: // neither it contains the script switching code. sl@0: //The aRemainderOfForeign buffer sl@0: sl@0: // Returns: ETrue: If the sequence contains the escape sequence sl@0: // EFalse: If the sequence does not contain the escape sequence sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: LOCAL_C TBool MatchesEscapeSequence(TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, sl@0: TPtrC8& aRemainderOfForeign, const TDesC8& aEscapeSequence) sl@0: { sl@0: const TInt lengthOfEscapeSequence=aEscapeSequence.Length(); sl@0: if (IsStartOf(aEscapeSequence, aRemainderOfForeign)) sl@0: { sl@0: aRemainderOfForeign.Set(aRemainderOfForeign.Mid(lengthOfEscapeSequence)); sl@0: const TInt startOfNextEscapeSequence=aRemainderOfForeign.Locate(KControlCharacterEscape); sl@0: if (startOfNextEscapeSequence==KErrNotFound) sl@0: { sl@0: aHomogeneousRun.Set(aRemainderOfForeign); sl@0: aRemainderOfForeign.Set(NULL, 0); sl@0: } sl@0: else sl@0: { sl@0: aHomogeneousRun.Set(aRemainderOfForeign.Left(startOfNextEscapeSequence)); sl@0: aRemainderOfForeign.Set(aRemainderOfForeign.Mid(startOfNextEscapeSequence)); sl@0: } sl@0: aNumberOfForeignBytesConsumed+=lengthOfEscapeSequence; sl@0: return ETrue; sl@0: } sl@0: return EFalse; sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // NextHomogeneousForeignRun() sl@0: //Matches the escape sequence of each of the elements of the SState array with the remainder of sl@0: //foreign text and if the escape sequence matches with the start of remainder of the foreign text, sl@0: //then the conversion data is set to the conversion data corresponding to the escape sequence sl@0: //Also the homogeneous foreign text for conversion with the same escape sequence is set. sl@0: sl@0: // Returns: ETrue: If length of the remainder of foreign buffer is nonzero. sl@0: // EFalse: If length of the remainder of foreign buffer is zero. sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: sl@0: LOCAL_C TBool NextHomogeneousForeignRun(const SCnvConversionData*& aConversionData, TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TArray& aArrayOfStates, TUint& aOutputConversionFlags) sl@0: { sl@0: TBool returnValue = EFalse; sl@0: TBool foundState = EFalse; sl@0: __ASSERT_DEBUG((aRemainderOfForeign.Length()==0) || (aRemainderOfForeign[0]==KControlCharacterEscape), User::Panic(KPanicReason,EPanicBadRemainderOfForeign)); sl@0: if (aRemainderOfForeign.Length()==0) sl@0: { sl@0: return returnValue; sl@0: } sl@0: const TInt numberOfStates=aArrayOfStates.Count(); sl@0: TInt i; sl@0: for (i=0; i0) sl@0: { sl@0: returnValue = ETrue; sl@0: } sl@0: return returnValue; sl@0: } sl@0: // ----------------------------------------------------------------------------- sl@0: // ConvertFromUnicodeIntermediateBufferInPlace() sl@0: //Default implementation for conversion to the intermediate buffer sl@0: //It modifies the unicode buffer before it is converted back to iscii. sl@0: //The current implementation of iscii plug-in doesn't require any sl@0: //modification to the default implementation sl@0: // Returns: Nothing sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: sl@0: LOCAL_C void ConvertFromUnicodeIntermediateBufferInPlace(TInt aStartPositionInDescriptor, TDes8& aDescriptor, TInt& aNumberOfCharactersThatDroppedOut) sl@0: { sl@0: CnvUtilities::ConvertFromIntermediateBufferInPlace(aStartPositionInDescriptor, aDescriptor, aNumberOfCharactersThatDroppedOut, KEscapeSequenceDevanagari, 1); sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // DoFindAndModifyBuffer() sl@0: //Modifies the iscii buffer by replacing the search buffer with the replacement buffer. sl@0: //Introduced for handling multibyte iscii sequence. sl@0: //Takes the search buffer array and the replacement buffer arrays as input to it and replaces all sl@0: //the occurances of the search buffer with the corresponding replace buffer. sl@0: // Returns: Nothing sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: LOCAL_C void DoFindAndModifyBuffer(TDes8& aModifyBuffer,const TDesC8& aSearchBuffer,const TDesC8& aReplaceBuffer) sl@0: { sl@0: FOREVER sl@0: { sl@0: TInt offset; sl@0: __ASSERT_ALWAYS((aSearchBuffer.Length()>= aReplaceBuffer.Length()),User::Panic(KPanicReason,EPanicBadReplacementBuffer)); sl@0: if((offset = aModifyBuffer.Find(aSearchBuffer)) != KErrNotFound) sl@0: { sl@0: TUint8 *pointerToBuffer = const_cast (aModifyBuffer.Ptr()); sl@0: Mem::Copy(pointerToBuffer+offset,aReplaceBuffer.Ptr(),aReplaceBuffer.Length()); sl@0: Mem::Copy(pointerToBuffer+offset+aReplaceBuffer.Length(),pointerToBuffer+offset+aSearchBuffer.Length(),aModifyBuffer.Length()-aSearchBuffer.Length()-offset); sl@0: aModifyBuffer.SetLength(aModifyBuffer.Length() - aSearchBuffer.Length() + aReplaceBuffer.Length()); sl@0: } sl@0: else sl@0: break; sl@0: } sl@0: sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // FindAndModifyBuffer() sl@0: //Modifies the iscii buffer by replacing the search buffer with the replacement buffer. sl@0: //Calls DoFindAndModifyBuffer() and supplies the search buffer and replacement buffer. sl@0: //Introduced for handling multibyte iscii sequence. sl@0: // Returns: Nothing sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: LOCAL_C void FindAndModifyBuffer(TDes8& aModifyBuffer) sl@0: { sl@0: TInt ret = KErrNone; sl@0: RArray searchBuffer; sl@0: RArray replaceBuffer; sl@0: sl@0: //If the passed buffer contains the replacement buffer, sl@0: //Then it should not get converted to respective Unicode sl@0: //buffer rather it should get converted to replacement for sl@0: //unconvertible character. sl@0: sl@0: ret |= searchBuffer.Append(KReplacementForExplicitHalant().Right(1)); sl@0: ret |= searchBuffer.Append(KReplacementForSoftHalant().Right(1)); sl@0: ret |= searchBuffer.Append(KReplacementForOm().Right(1)); sl@0: ret |= searchBuffer.Append(KReplacementForAvagraha().Right(1)); sl@0: sl@0: ret |= searchBuffer.Append(KReplacementForVocalicRr().Right(1)); sl@0: ret |= searchBuffer.Append(KReplacementForVocalicLl().Right(1)); sl@0: ret |= searchBuffer.Append(KReplacementForVocalicLSign().Right(1)); sl@0: ret |= searchBuffer.Append(KReplacementForVocalicLlSign().Right(1)); sl@0: ret |= searchBuffer.Append(KReplacementForVocalicL().Right(1)); sl@0: ret |= searchBuffer.Append(KReplacementForVocalicRrSign().Right(1)); sl@0: sl@0: //All normal search buffers sl@0: ret |= searchBuffer.Append(KExplicitHalant().Mid(0)); sl@0: ret |= searchBuffer.Append(KSoftHalant().Mid(0)); sl@0: ret |= searchBuffer.Append(KOm().Mid(0)); sl@0: ret |= searchBuffer.Append(KAvagraha().Mid(0)); sl@0: sl@0: ret |= searchBuffer.Append(KVocalicRr().Mid(0)); sl@0: ret |= searchBuffer.Append(KVocalicLl().Mid(0)); sl@0: ret |= searchBuffer.Append(KVocalicLSign().Mid(0)); sl@0: ret |= searchBuffer.Append(KVocalicLlSign().Mid(0)); sl@0: ret |= searchBuffer.Append(KVocalicL().Mid(0)); sl@0: ret |= searchBuffer.Append(KVocalicRrSign().Mid(0)); sl@0: sl@0: //The replacement buffer for the odd cases to restrict the sl@0: //replacement buffers not to get converted to the corresponding sl@0: //unicode buffer sl@0: sl@0: ret |= replaceBuffer.Append(KIsciiUnconvertibleCharacter().Mid(0)); sl@0: ret |= replaceBuffer.Append(KIsciiUnconvertibleCharacter().Mid(0)); sl@0: ret |= replaceBuffer.Append(KIsciiUnconvertibleCharacter().Mid(0)); sl@0: ret |= replaceBuffer.Append(KIsciiUnconvertibleCharacter().Mid(0)); sl@0: sl@0: ret |= replaceBuffer.Append(KIsciiUnconvertibleCharacter().Mid(0)); sl@0: ret |= replaceBuffer.Append(KIsciiUnconvertibleCharacter().Mid(0)); sl@0: ret |= replaceBuffer.Append(KIsciiUnconvertibleCharacter().Mid(0)); sl@0: ret |= replaceBuffer.Append(KIsciiUnconvertibleCharacter().Mid(0)); sl@0: ret |= replaceBuffer.Append(KIsciiUnconvertibleCharacter().Mid(0)); sl@0: ret |= replaceBuffer.Append(KIsciiUnconvertibleCharacter().Mid(0)); sl@0: sl@0: //All normal replace buffers sl@0: ret |= replaceBuffer.Append(KReplacementForExplicitHalant().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForSoftHalant().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForOm().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForAvagraha().Mid(0)); sl@0: sl@0: sl@0: ret |= replaceBuffer.Append(KReplacementForVocalicRr().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForVocalicLl().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForVocalicLSign().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForVocalicLlSign().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForVocalicL().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForVocalicRrSign().Mid(0)); sl@0: sl@0: __ASSERT_DEBUG(!ret, User::Panic(_L("RArray append failure"), ret)); sl@0: sl@0: for(TInt counter=0;counter& anArrayOfSearches,RArray& anArrayOfReplaces) sl@0: { sl@0: TPtr8 buffer(const_cast(aBuffer.Ptr()),aBuffer.Length(),aBuffer.Length()); sl@0: TUint count = anArrayOfSearches.Count(); sl@0: FOREVER sl@0: { sl@0: TBool flag = EFalse; sl@0: for(TUint i=0;i searchBuffer; sl@0: RArray replaceBuffer; sl@0: sl@0: ret |= searchBuffer.Append(KExplicitHalant().Mid(0)); sl@0: ret |= searchBuffer.Append(KSoftHalant().Mid(0)); sl@0: ret |= searchBuffer.Append(KOm().Mid(0)); sl@0: ret |= searchBuffer.Append(KAvagraha().Mid(0)); sl@0: sl@0: ret |= searchBuffer.Append(KVocalicRr().Mid(0)); sl@0: ret |= searchBuffer.Append(KVocalicLl().Mid(0)); sl@0: ret |= searchBuffer.Append(KVocalicLSign().Mid(0)); sl@0: ret |= searchBuffer.Append(KVocalicLlSign().Mid(0)); sl@0: ret |= searchBuffer.Append(KVocalicL().Mid(0)); sl@0: ret |= searchBuffer.Append(KVocalicRrSign().Mid(0)); sl@0: sl@0: ret |= replaceBuffer.Append(KReplacementForExplicitHalant().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForSoftHalant().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForOm().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForAvagraha().Mid(0)); sl@0: sl@0: ret |= replaceBuffer.Append(KReplacementForVocalicRr().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForVocalicLl().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForVocalicLSign().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForVocalicLlSign().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForVocalicL().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForVocalicRrSign().Mid(0)); sl@0: sl@0: __ASSERT_DEBUG(!ret, User::Panic(_L("RArray append failure"), ret)); sl@0: sl@0: DoNormalizeReturnValue(aReturnValue,aBuffer,searchBuffer,replaceBuffer); sl@0: searchBuffer.Reset(); sl@0: replaceBuffer.Reset(); sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // HandleHomogeneousRun() sl@0: //Handles a homogeneous foreign buffer and converts the foreign buffer to unicode sl@0: //On return the aUnicode argument contains the converted unicode data. sl@0: //Also it sets the return value, returned from the conversion. The return value also sl@0: //takes into account if there is any buffer modification done before passing it to sl@0: //CCnvCharacterSetConverter::DoConvertToUnicode() sl@0: //buffers. sl@0: sl@0: // Returns: Nothing sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: LOCAL_C void HandleHomogeneousRun(const SCnvConversionData*& aConversionData, sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aHomogeneousForeign, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, sl@0: TUint& aOutputConversionFlags, sl@0: TUint aInputConversionFlags,TInt& aNumberOfForeignBytesConsumed, sl@0: TInt& aReturnValue) sl@0: { sl@0: TInt numberOfUnconvertibleCharacters; sl@0: TInt indexOfFirstByteOfFirstUnconvertibleCharacter; sl@0: TUint noOfConsumedBytes = 0; sl@0: if(aConversionData == NULL) sl@0: { sl@0: aReturnValue = CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: return; sl@0: } sl@0: aReturnValue = CCnvCharacterSetConverter::DoConvertToUnicode(*aConversionData,aDefaultEndiannessOfForeignCharacters, sl@0: aUnicode,aHomogeneousForeign,numberOfUnconvertibleCharacters, sl@0: indexOfFirstByteOfFirstUnconvertibleCharacter,aOutputConversionFlags, sl@0: sl@0: //The numberOfUnconvertibleCharacters and indexOfFirstByteOfFirstUnconvertibleCharacter are the values with sl@0: //respect to the intermediate iscii buffer and original values aIndexOfFirstByteOfFirstUnconvertibleCharacter and sl@0: //aNumberOfUnconvertibleCharacters need to be adjusted accordingly. sl@0: sl@0: aInputConversionFlags); sl@0: if(numberOfUnconvertibleCharacters>0) sl@0: { sl@0: if(aNumberOfUnconvertibleCharacters == 0) sl@0: { sl@0: aIndexOfFirstByteOfFirstUnconvertibleCharacter = aNumberOfForeignBytesConsumed + indexOfFirstByteOfFirstUnconvertibleCharacter; sl@0: } sl@0: aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters; sl@0: } sl@0: noOfConsumedBytes = aHomogeneousForeign.Length(); sl@0: //To Check whether it is really required. sl@0: NormalizeReturnValue(noOfConsumedBytes,aHomogeneousForeign); sl@0: aNumberOfForeignBytesConsumed+=noOfConsumedBytes; sl@0: if(aReturnValue>0) sl@0: { sl@0: TUint normalizedReturnValue = aReturnValue; sl@0: sl@0: //There original iscii buffer copied to an intermediate iscii buffer and then modified sl@0: //and is then passed for conversion. Now, after conversion, the return value needs to sl@0: //be adjusted according to the original buffer. NormalizeReturnValue() does the sl@0: //same thing. sl@0: sl@0: NormalizeReturnValue(normalizedReturnValue,aHomogeneousForeign); sl@0: aNumberOfForeignBytesConsumed-=normalizedReturnValue; sl@0: aReturnValue=normalizedReturnValue; sl@0: } sl@0: sl@0: //The HandleHomogeneousRun() method is called in a loop and once there is some sl@0: //iscii codes converted to unicode, the ConvertToUnicode() should not return sl@0: //CCnvCharacterSetConverter::EErrorIllFormedInput even though the conversion sl@0: //method does not convert any of the iscii codes ppassed. To ensure that once the sl@0: //first non-zero number of iscii codes are converted, the internal input conversion sl@0: //flag is set to EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable. sl@0: sl@0: if(aNumberOfForeignBytesConsumed>0) sl@0: { sl@0: aInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable; sl@0: } sl@0: return; sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // IsTruncatedDoubleByteIsciiSequence() sl@0: //Checks if anIsciiBuffer is a part of multi byte iscii sequence truncated in the middle. sl@0: //If it is a truncated sequence, then returns ETrue else returns EFalse. sl@0: // sl@0: // Returns: ETrue: If the intermediate input iscii buffer is truncated sl@0: // EFalse: If the intermediate input iscii buffer is not truncated sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: LOCAL_C TBool IsTruncatedDoubleByteIsciiSequence(const TDesC8& anIsciiBuffer) sl@0: { sl@0: RArray searchBuffer; sl@0: if(anIsciiBuffer.Length () == 0) sl@0: return EFalse; sl@0: if(anIsciiBuffer[anIsciiBuffer.Length()-1] == 0xEF) sl@0: return ETrue; sl@0: sl@0: TInt appendret = KErrNone; sl@0: appendret |= searchBuffer.Append(KSoftHalant().Mid(0)); sl@0: appendret |= searchBuffer.Append(KOm().Mid(0)); sl@0: appendret |= searchBuffer.Append(KAvagraha().Mid(0)); sl@0: appendret |= searchBuffer.Append(KExplicitHalant().Mid(0)); sl@0: __ASSERT_DEBUG(!appendret, User::Panic(_L("RArray append failure"), appendret)); sl@0: sl@0: TBool ret = EFalse; sl@0: TBool isNotTruncated =EFalse; sl@0: sl@0: //First check if the intermediate iscii buffer is ending with a complete multi byte sequence. sl@0: //If it ends with a complete multi byte sequence, no need to check if the last character of sl@0: //intermediate iscii is same as first character of multi byte iscii sequence. And return EFalse. sl@0: for(TUint counter = 0;counter aArrayOfCharacterSets; sl@0: aArrayOfCharacterSets[0].iConversionData = &conversionData; sl@0: aArrayOfCharacterSets[0].iConvertFromIntermediateBufferInPlace = ConvertFromUnicodeIntermediateBufferInPlace; sl@0: aArrayOfCharacterSets[0].iEscapeSequence = &KEscapeSequenceDevanagari(); sl@0: sl@0: return CnvUtilities::ConvertFromUnicode( sl@0: aDefaultEndiannessOfForeignCharacters, sl@0: aReplacementForUnconvertibleUnicodeCharacters, sl@0: aForeign, sl@0: aUnicode, sl@0: aIndicesOfUnconvertibleCharacters, sl@0: aArrayOfCharacterSets.Array()); sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // ConvertToUnicode() sl@0: //The main conversion function for converting from iscii to unicode sl@0: //Loaded and called by the character conversion framework. sl@0: //To support some of the iscii characters, the input forign buffer is sl@0: //copied to an intermediate buffer and then is then modified and sl@0: //CCnvCharactersetConverter::DoConvertToUnicode() is called with sl@0: //the modified buffer. For extensibility of iscii to other Indic languages sl@0: //it uses CnvUtilities::SState datastructure. CnvUtilities::SState is a sl@0: //Symbian defined class for modal charactersets. The escape sequence sl@0: //is specified to ATR followed by the script selection code and the conversion sl@0: //data is specified to be the conversion for the particular script. For the time sl@0: //being only Devanagari with script selection code 0x42 is supported. If sl@0: //any of the other script codes are used the conversion leads to unconvertible sl@0: //character i.e. 0xFFFD. sl@0: // Returns: The numbet of iscii codes it could not convert. sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: TInt CIsciiImplementation::ConvertToUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aForeign, sl@0: TInt& aState, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter ) sl@0: { sl@0: aNumberOfUnconvertibleCharacters = 0; sl@0: TUint aOutputConversionFlags = CCnvCharacterSetConverter::KStateDefault; sl@0: aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1; sl@0: TUint internalInputConversionFlags = 0; sl@0: TInt numberOfForeignBytesConsumed=0; sl@0: TPtrC8 remainderOfForeign(aForeign); sl@0: TInt returnValue; sl@0: TBool flag = EFalse; sl@0: TBool isSkipMatchSequence = EFalse; sl@0: const SCnvConversionData* convData; sl@0: //Set the iscii conversion data and escape sequence for Devanagari. sl@0: TFixedArray modals; sl@0: modals[0].iConversionData = &conversionData; sl@0: modals[0].iEscapeSequence = &KEscapeSequenceDevanagari(); sl@0: sl@0: aUnicode.SetLength(0); sl@0: sl@0: //The internal input conversion flag for conversion is set to CCnvCharacterSetConverter::EInputConversionFlagAppend sl@0: //so that for each conversion in the conversion loop, the generated conversion buffer is appened to the aUnicode buffer. sl@0: internalInputConversionFlags |= CCnvCharacterSetConverter::EInputConversionFlagAppend; sl@0: if (aForeign.Length()==0) sl@0: { sl@0: return 0; sl@0: } sl@0: //Get the conversion data from the previous call else the conversion data is set to the default sl@0: //conversion data, i.e. Devanagari. sl@0: convData=(aState!=CCnvCharacterSetConverter::KStateDefault)? REINTERPRET_CAST(const SCnvConversionData*, aState): modals[0].iConversionData; sl@0: FOREVER sl@0: { sl@0: TBuf8 intermediateBuffer; sl@0: TUint numberOfForeignBytesConsumedThisTime = 0; sl@0: if((remainderOfForeign.Length() >=KMaximumLengthOfIntermediateBuffer) && (aUnicode.MaxLength()-aUnicode.Length() >=KMaximumLengthOfIntermediateBuffer)) sl@0: { sl@0: numberOfForeignBytesConsumedThisTime = KMaximumLengthOfIntermediateBuffer; sl@0: intermediateBuffer = remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime); sl@0: //If the intermediate buffer is a part of truncated buffer sequence but the sl@0: //actual input buffer is not truncated then truncated sequence is not converted. sl@0: //The intermediate buffer is modified so as not to contain the truncated sequence. sl@0: sl@0: flag = IsTruncatedDoubleByteIsciiSequence(intermediateBuffer); sl@0: if(flag) sl@0: { sl@0: numberOfForeignBytesConsumedThisTime --; sl@0: intermediateBuffer = remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime); sl@0: } sl@0: sl@0: } sl@0: else sl@0: { sl@0: flag = IsTruncatedDoubleByteIsciiSequence(remainderOfForeign.Left(aUnicode.MaxLength()-aUnicode.Length())); sl@0: if(!flag) sl@0: { sl@0: numberOfForeignBytesConsumedThisTime = aUnicode.MaxLength()-aUnicode.Length(); sl@0: intermediateBuffer = remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime); sl@0: } sl@0: else sl@0: { sl@0: if(aForeign.Length()>(numberOfForeignBytesConsumed+aUnicode.Length())) sl@0: { sl@0: numberOfForeignBytesConsumedThisTime = aUnicode.MaxLength()-aUnicode.Length()-1; sl@0: intermediateBuffer = remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime); sl@0: break; sl@0: } sl@0: else sl@0: { sl@0: numberOfForeignBytesConsumedThisTime = aUnicode.MaxLength()-aUnicode.Length(); sl@0: intermediateBuffer = remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime); sl@0: } sl@0: } sl@0: } sl@0: sl@0: //The input intermediate iscii buffer is modified with the search and replace sl@0: //buffers. It is required for supporting multibyte iscii sequences. sl@0: FindAndModifyBuffer(intermediateBuffer); sl@0: TPtrC8 remainderOfForeignInternal(intermediateBuffer); sl@0: TPtrC8 homogeneousRun; sl@0: const TInt startOfNextEscapeSequence=intermediateBuffer.Locate(KControlCharacterEscape); sl@0: if (startOfNextEscapeSequence!=0) sl@0: { sl@0: if (startOfNextEscapeSequence==KErrNotFound) sl@0: { sl@0: homogeneousRun.Set(remainderOfForeignInternal); sl@0: remainderOfForeignInternal.Set(NULL, 0); sl@0: } sl@0: else sl@0: { sl@0: __ASSERT_DEBUG(startOfNextEscapeSequence>0, User::Panic(KPanicReason,EPanicBadStartOfNextEscapeSequence)); sl@0: homogeneousRun.Set(remainderOfForeignInternal.Left(startOfNextEscapeSequence)); sl@0: remainderOfForeignInternal.Set(remainderOfForeignInternal.Mid(startOfNextEscapeSequence)); sl@0: } sl@0: isSkipMatchSequence = ETrue; sl@0: } sl@0: FOREVER sl@0: { sl@0: if(!isSkipMatchSequence) sl@0: { sl@0: if (!NextHomogeneousForeignRun(convData, numberOfForeignBytesConsumed, homogeneousRun, sl@0: remainderOfForeignInternal, modals.Array(), aOutputConversionFlags)) sl@0: { sl@0: break; sl@0: } sl@0: } sl@0: HandleHomogeneousRun( convData, aDefaultEndiannessOfForeignCharacters, aUnicode, homogeneousRun, aNumberOfUnconvertibleCharacters, sl@0: aIndexOfFirstByteOfFirstUnconvertibleCharacter,aOutputConversionFlags,internalInputConversionFlags, sl@0: numberOfForeignBytesConsumed,returnValue); sl@0: if(returnValue<0) sl@0: { sl@0: return returnValue; sl@0: } sl@0: isSkipMatchSequence = EFalse; sl@0: } sl@0: if(returnValue > 0) sl@0: break; sl@0: if ((!flag && (numberOfForeignBytesConsumedThisTime != KMaximumLengthOfIntermediateBuffer)) || (flag && (numberOfForeignBytesConsumedThisTime != (KMaximumLengthOfIntermediateBuffer-1) ))) sl@0: break; sl@0: remainderOfForeign.Set(aForeign.Mid(numberOfForeignBytesConsumed)); sl@0: } sl@0: //If the number of iscii bytes consumed by the conversion is zero also the output conversion sl@0: //flag is not set to EOutputConversionFlagInputIsTruncated, then return EErrorIllFormedInput. sl@0: if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated)) sl@0: { sl@0: return CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: } sl@0: //Set the conversion data sothat next time when ConvertToUnicode() is called, sl@0: //will use the previous conversion data. sl@0: aState=REINTERPRET_CAST(TInt, convData); sl@0: return aForeign.Length()-numberOfForeignBytesConsumed; sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // IsInThisCharacterSetL() sl@0: //The method tells how probable it is that a sample piece of text is encoded in this character set. sl@0: //On return aConfidenceLevel, indicates how confident the function is about its return value. For sl@0: //iscii it is the default implementation and it does not implement the autodetect functionality. sl@0: //Loaded and called by the character conversion framework. sl@0: // sl@0: // Returns: EFalse: To tell CCnvCharacterSetConverter::AutoDetectCharacterSetL() that the plug-in DLL sl@0: // is not implementing a function of this signature and is therefore empty. sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: sl@0: //Default implementation for IsInThisCharacterSetL() sl@0: sl@0: TBool CIsciiImplementation::IsInThisCharacterSetL( sl@0: TBool& aSetToTrue, sl@0: TInt& aConfidenceLevel, sl@0: const TDesC8& ) sl@0: { sl@0: aSetToTrue = EFalse; sl@0: aConfidenceLevel = 0; sl@0: return EFalse; sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // NewL() sl@0: //Factory function for CIsciiImplementation(). Instantiates a CIsciiImplementation object on heap sl@0: //and returns the pointer to it. sl@0: // sl@0: // Returns: CIsciiImplementation* sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: CIsciiImplementation* CIsciiImplementation::NewL() sl@0: { sl@0: CIsciiImplementation* self = new(ELeave) CIsciiImplementation; sl@0: return self; sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // CIsciiImplementation() sl@0: //default constructor, does nothing sl@0: // sl@0: // Returns: Nothing sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: CIsciiImplementation::CIsciiImplementation() sl@0: { sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // ~CIsciiImplementation() sl@0: //default desstructor, does nothing sl@0: // sl@0: // Returns: Nothing sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: CIsciiImplementation::~CIsciiImplementation() sl@0: { sl@0: } sl@0: sl@0: // ECOM CREATION FUNCTION sl@0: const TImplementationProxy ImplementationTable[] = sl@0: { sl@0: // Used also in 0x1027508E.rss ( implementation_uid ) sl@0: IMPLEMENTATION_PROXY_ENTRY( 0x1027508E, CIsciiImplementation::NewL ) sl@0: }; sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // ImplementationGroupProxy() sl@0: //Returns a pointer to TImplementationProxy object which contains the implementation uid vs factory sl@0: //function table. Also on return sets the aTableCount to the number of entries in the table. sl@0: // sl@0: // Returns: TImplementationProxy* sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: EXPORT_C const TImplementationProxy* ImplementationGroupProxy( TInt& aTableCount ) sl@0: { sl@0: aTableCount = sizeof( ImplementationTable ) / sizeof(TImplementationProxy); sl@0: return ImplementationTable; sl@0: } sl@0: #else sl@0: sl@0: #include sl@0: sl@0: #ifndef EKA2 sl@0: // ----------------------------------------------------------------------------- sl@0: // E32Dll() sl@0: //For EKA1 this is the entry point for the DLL. sl@0: // sl@0: // Returns: KErrNone sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: GLDEF_C TInt E32Dll(TDllReason) sl@0: { sl@0: return KErrNone; sl@0: } sl@0: #endif sl@0: sl@0: //Checks if a descriptor starts with another descriptor at the begining. sl@0: LOCAL_C TBool IsStartOf(const TDesC8& aEscapeSequence, const TDesC8& aBuffer) sl@0: { sl@0: const TInt lengthOfStart=aEscapeSequence.Length(); sl@0: return (aBuffer.Length()>=lengthOfStart) && (aBuffer.Left(lengthOfStart)==aEscapeSequence); sl@0: } sl@0: // ----------------------------------------------------------------------------- sl@0: // MatchesEscapeSequence() sl@0: //If the remainder of the foreign text starts with the passed escapesequence, modifies the remainder of the foreign text sl@0: //and sets the homogeneous run buffer that uses the same conversion data. sl@0: //The homogeneous run buffer is the buffer that will use a single conversion data, it doesn't contain the attribute code sl@0: // neither it contains the script switching code. sl@0: //The aRemainderOfForeign buffer sl@0: sl@0: // Returns: ETrue: If the sequence contains the escape sequence sl@0: // EFalse: If the sequence does not contain the escape sequence sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: LOCAL_C TBool MatchesEscapeSequence(TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, sl@0: TPtrC8& aRemainderOfForeign, const TDesC8& aEscapeSequence) sl@0: { sl@0: const TInt lengthOfEscapeSequence=aEscapeSequence.Length(); sl@0: if (IsStartOf(aEscapeSequence, aRemainderOfForeign)) sl@0: { sl@0: aRemainderOfForeign.Set(aRemainderOfForeign.Mid(lengthOfEscapeSequence)); sl@0: const TInt startOfNextEscapeSequence=aRemainderOfForeign.Locate(KControlCharacterEscape); sl@0: if (startOfNextEscapeSequence==KErrNotFound) sl@0: { sl@0: aHomogeneousRun.Set(aRemainderOfForeign); sl@0: aRemainderOfForeign.Set(NULL, 0); sl@0: } sl@0: else sl@0: { sl@0: aHomogeneousRun.Set(aRemainderOfForeign.Left(startOfNextEscapeSequence)); sl@0: aRemainderOfForeign.Set(aRemainderOfForeign.Mid(startOfNextEscapeSequence)); sl@0: } sl@0: aNumberOfForeignBytesConsumed+=lengthOfEscapeSequence; sl@0: return ETrue; sl@0: } sl@0: return EFalse; sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // NextHomogeneousForeignRun() sl@0: //Matches the escape sequence of each of the elements of the SState array with the remainder of sl@0: //foreign text and if the escape sequence matches with the start of remainder of the foreign text, sl@0: //then the conversion data is set to the conversion data corresponding to the escape sequence sl@0: //Also the homogeneous foreign text for conversion with the same escape sequence is set. sl@0: sl@0: // Returns: ETrue: If length of the remainder of foreign buffer is nonzero. sl@0: // EFalse: If length of the remainder of foreign buffer is zero. sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: sl@0: LOCAL_C TBool NextHomogeneousForeignRun(const SCnvConversionData*& aConversionData, TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TArray& aArrayOfStates, TUint& aOutputConversionFlags) sl@0: { sl@0: TBool returnValue = EFalse; sl@0: TBool foundState = EFalse; sl@0: __ASSERT_DEBUG((aRemainderOfForeign.Length()==0) || (aRemainderOfForeign[0]==KControlCharacterEscape), User::Panic(KPanicReason,EPanicBadRemainderOfForeign)); sl@0: if (aRemainderOfForeign.Length()==0) sl@0: { sl@0: return returnValue; sl@0: } sl@0: const TInt numberOfStates=aArrayOfStates.Count(); sl@0: TInt i; sl@0: for (i=0; i0) sl@0: { sl@0: returnValue = ETrue; sl@0: } sl@0: return returnValue; sl@0: } sl@0: // ----------------------------------------------------------------------------- sl@0: // ConvertFromUnicodeIntermediateBufferInPlace() sl@0: //Default implementation for conversion to the intermediate buffer sl@0: //It modifies the unicode buffer before it is converted back to iscii. sl@0: //The current implementation of iscii plug-in doesn't require any sl@0: //modification to the default implementation sl@0: // Returns: Nothing sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: sl@0: LOCAL_C void ConvertFromUnicodeIntermediateBufferInPlace(TInt aStartPositionInDescriptor, TDes8& aDescriptor, TInt& aNumberOfCharactersThatDroppedOut) sl@0: { sl@0: CnvUtilities::ConvertFromIntermediateBufferInPlace(aStartPositionInDescriptor, aDescriptor, aNumberOfCharactersThatDroppedOut, KEscapeSequenceDevanagari, 1); sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // DoFindAndModifyBuffer() sl@0: //Modifies the iscii buffer by replacing the search buffer with the replacement buffer. sl@0: //Introduced for handling multibyte iscii sequence. sl@0: //Takes the search buffer array and the replacement buffer arrays as input to it and replaces all sl@0: //the occurances of the search buffer with the corresponding replace buffer. sl@0: // Returns: Nothing sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: LOCAL_C void DoFindAndModifyBuffer(TDes8& aModifyBuffer,const TDesC8& aSearchBuffer,const TDesC8& aReplaceBuffer) sl@0: { sl@0: FOREVER sl@0: { sl@0: TInt offset; sl@0: __ASSERT_ALWAYS((aSearchBuffer.Length()>= aReplaceBuffer.Length()),User::Panic(KPanicReason,EPanicBadReplacementBuffer)); sl@0: if((offset = aModifyBuffer.Find(aSearchBuffer)) != KErrNotFound) sl@0: { sl@0: TUint8 *pointerToBuffer = const_cast (aModifyBuffer.Ptr()); sl@0: Mem::Copy(pointerToBuffer+offset,aReplaceBuffer.Ptr(),aReplaceBuffer.Length()); sl@0: Mem::Copy(pointerToBuffer+offset+aReplaceBuffer.Length(),pointerToBuffer+offset+aSearchBuffer.Length(),aModifyBuffer.Length()-aSearchBuffer.Length()-offset); sl@0: aModifyBuffer.SetLength(aModifyBuffer.Length() - aSearchBuffer.Length() + aReplaceBuffer.Length()); sl@0: } sl@0: else sl@0: break; sl@0: } sl@0: sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // FindAndModifyBuffer() sl@0: //Modifies the iscii buffer by replacing the search buffer with the replacement buffer. sl@0: //Calls DoFindAndModifyBuffer() and supplies the search buffer and replacement buffer. sl@0: //Introduced for handling multibyte iscii sequence. sl@0: // Returns: Nothing sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: LOCAL_C void FindAndModifyBuffer(TDes8& aModifyBuffer) sl@0: { sl@0: RArray searchBuffer; sl@0: RArray replaceBuffer; sl@0: sl@0: TInt ret = KErrNone; sl@0: ret |= searchBuffer.Append(KExplicitHalant().Mid(0)); sl@0: ret |= searchBuffer.Append(KSoftHalant().Mid(0)); sl@0: ret |= searchBuffer.Append(KOm().Mid(0)); sl@0: ret |= searchBuffer.Append(KAvagraha().Mid(0)); sl@0: sl@0: ret |= replaceBuffer.Append(KReplacementForExplicitHalant().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForSoftHalant().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForOm().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForAvagraha().Mid(0)); sl@0: sl@0: __ASSERT_DEBUG(!ret, User::Panic(_L("RArray append failure"), ret)); sl@0: sl@0: for(TInt counter=0;counter& anArrayOfSearches,RArray& anArrayOfReplaces) sl@0: { sl@0: TPtr8 buffer(const_cast(aBuffer.Ptr()),aBuffer.Length(),aBuffer.Length()); sl@0: TUint count = anArrayOfSearches.Count(); sl@0: FOREVER sl@0: { sl@0: TBool flag = EFalse; sl@0: for(TUint i=0;i searchBuffer; sl@0: RArray replaceBuffer; sl@0: TInt ret =KErrNone; sl@0: ret |= searchBuffer.Append(KExplicitHalant().Mid(0)); sl@0: ret |= searchBuffer.Append(KSoftHalant().Mid(0)); sl@0: ret |= searchBuffer.Append(KOm().Mid(0)); sl@0: ret |= searchBuffer.Append(KAvagraha().Mid(0)); sl@0: sl@0: ret |= replaceBuffer.Append(KReplacementForExplicitHalant().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForSoftHalant().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForOm().Mid(0)); sl@0: ret |= replaceBuffer.Append(KReplacementForAvagraha().Mid(0)); sl@0: sl@0: __ASSERT_DEBUG(!ret, User::Panic(_L("RArray append failure"), ret)); sl@0: sl@0: DoNormalizeReturnValue(aReturnValue,aBuffer,searchBuffer,replaceBuffer); sl@0: searchBuffer.Reset(); sl@0: replaceBuffer.Reset(); sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // HandleHomogeneousRun() sl@0: //Handles a homogeneous foreign buffer and converts the foreign buffer to unicode sl@0: //On return the aUnicode argument contains the converted unicode data. sl@0: //Also it sets the return value, returned from the conversion. The return value also sl@0: //takes into account if there is any buffer modification done before passing it to sl@0: //CCnvCharacterSetConverter::DoConvertToUnicode() sl@0: //buffers. sl@0: sl@0: // Returns: Nothing sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: LOCAL_C void HandleHomogeneousRun(const SCnvConversionData*& aConversionData, sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aHomogeneousForeign, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, sl@0: TUint& aOutputConversionFlags, sl@0: TUint aInputConversionFlags,TInt& aNumberOfForeignBytesConsumed, sl@0: TInt& aReturnValue) sl@0: { sl@0: TInt numberOfUnconvertibleCharacters; sl@0: TInt indexOfFirstByteOfFirstUnconvertibleCharacter; sl@0: TUint noOfConsumedBytes = 0; sl@0: if(aConversionData == NULL) sl@0: { sl@0: aReturnValue = CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: return; sl@0: } sl@0: aReturnValue = CCnvCharacterSetConverter::DoConvertToUnicode(*aConversionData,aDefaultEndiannessOfForeignCharacters, sl@0: aUnicode,aHomogeneousForeign,numberOfUnconvertibleCharacters, sl@0: indexOfFirstByteOfFirstUnconvertibleCharacter,aOutputConversionFlags, sl@0: sl@0: //The numberOfUnconvertibleCharacters and indexOfFirstByteOfFirstUnconvertibleCharacter are the values with sl@0: //respect to the intermediate iscii buffer and original values aIndexOfFirstByteOfFirstUnconvertibleCharacter and sl@0: //aNumberOfUnconvertibleCharacters need to be adjusted accordingly. sl@0: sl@0: aInputConversionFlags); sl@0: if(numberOfUnconvertibleCharacters>0) sl@0: { sl@0: if(aNumberOfUnconvertibleCharacters == 0) sl@0: { sl@0: aIndexOfFirstByteOfFirstUnconvertibleCharacter = aNumberOfForeignBytesConsumed + indexOfFirstByteOfFirstUnconvertibleCharacter; sl@0: } sl@0: aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters; sl@0: } sl@0: noOfConsumedBytes = aHomogeneousForeign.Length(); sl@0: //To Check whether it is really required. sl@0: NormalizeReturnValue(noOfConsumedBytes,aHomogeneousForeign); sl@0: aNumberOfForeignBytesConsumed+=noOfConsumedBytes; sl@0: if(aReturnValue>0) sl@0: { sl@0: TUint normalizedReturnValue = aReturnValue; sl@0: sl@0: //There original iscii buffer copied to an intermediate iscii buffer and then modified sl@0: //and is then passed for conversion. Now, after conversion, the return value needs to sl@0: //be adjusted according to the original buffer. NormalizeReturnValue() does the sl@0: //same thing. sl@0: sl@0: NormalizeReturnValue(normalizedReturnValue,aHomogeneousForeign); sl@0: aNumberOfForeignBytesConsumed-=normalizedReturnValue; sl@0: aReturnValue=normalizedReturnValue; sl@0: } sl@0: sl@0: //The HandleHomogeneousRun() method is called in a loop and once there is some sl@0: //iscii codes converted to unicode, the ConvertToUnicode() should not return sl@0: //CCnvCharacterSetConverter::EErrorIllFormedInput even though the conversion sl@0: //method does not convert any of the iscii codes ppassed. To ensure that once the sl@0: //first non-zero number of iscii codes are converted, the internal input conversion sl@0: //flag is set to EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable. sl@0: sl@0: if(aNumberOfForeignBytesConsumed>0) sl@0: { sl@0: aInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable; sl@0: } sl@0: return; sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // IsTruncatedDoubleByteIsciiSequence() sl@0: //Checks if anIsciiBuffer is a part of multi byte iscii sequence truncated in the middle. sl@0: //If it is a truncated sequence, then returns ETrue else returns EFalse. sl@0: // sl@0: // Returns: ETrue: If the intermediate input iscii buffer is truncated sl@0: // EFalse: If the intermediate input iscii buffer is not truncated sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: LOCAL_C TBool IsTruncatedDoubleByteIsciiSequence(const TDesC8& anIsciiBuffer) sl@0: { sl@0: RArray searchBuffer; sl@0: if(anIsciiBuffer.Length () == 0) sl@0: return EFalse; sl@0: if(anIsciiBuffer[anIsciiBuffer.Length()-1] == 0xEF) sl@0: return ETrue; sl@0: sl@0: TInt appendret = KErrNone; sl@0: appendret |= searchBuffer.Append(KSoftHalant().Mid(0)); sl@0: appendret |= searchBuffer.Append(KOm().Mid(0)); sl@0: appendret |= searchBuffer.Append(KAvagraha().Mid(0)); sl@0: appendret |= searchBuffer.Append(KExplicitHalant().Mid(0)); sl@0: __ASSERT_DEBUG(!ret, User::Panic(_L("RArray append failure"), ret)); sl@0: sl@0: TBool ret = EFalse; sl@0: TBool isNotTruncated =EFalse; sl@0: sl@0: //First check if the intermediate iscii buffer is ending with a complete multi byte sequence. sl@0: //If it ends with a complete multi byte sequence, no need to check if the last character of sl@0: //intermediate iscii is same as first character of multi byte iscii sequence. And return EFalse. sl@0: for(TUint counter = 0;counter aArrayOfCharacterSets; sl@0: aArrayOfCharacterSets[0].iConversionData = &conversionData; sl@0: aArrayOfCharacterSets[0].iConvertFromIntermediateBufferInPlace = ConvertFromUnicodeIntermediateBufferInPlace; sl@0: aArrayOfCharacterSets[0].iEscapeSequence = &KEscapeSequenceDevanagari(); sl@0: sl@0: return CnvUtilities::ConvertFromUnicode( sl@0: aDefaultEndiannessOfForeignCharacters, sl@0: aReplacementForUnconvertibleUnicodeCharacters, sl@0: aForeign, sl@0: aUnicode, sl@0: aIndicesOfUnconvertibleCharacters, sl@0: aArrayOfCharacterSets.Array()); sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // ConvertToUnicode() sl@0: //The main conversion function for converting from iscii to unicode sl@0: //Loaded and called by the character conversion framework. sl@0: //To support some of the iscii characters, the input forign buffer is sl@0: //copied to an intermediate buffer and then is then modified and sl@0: //CCnvCharactersetConverter::DoConvertToUnicode() is called with sl@0: //the modified buffer. For extensibility of iscii to other Indic languages sl@0: //it uses CnvUtilities::SState datastructure. CnvUtilities::SState is a sl@0: //Symbian defined class for modal charactersets. The escape sequence sl@0: //is specified to ATR followed by the script selection code and the conversion sl@0: //data is specified to be the conversion for the particular script. For the time sl@0: //being only Devanagari with script selection code 0x42 is supported. If sl@0: //any of the other script codes are used the conversion leads to unconvertible sl@0: //character i.e. 0xFFFD. sl@0: // Returns: The numbet of iscii codes it could not convert. sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: EXPORT_C TInt ConvertToUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aForeign, sl@0: TInt& aState, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter ) sl@0: { sl@0: aNumberOfUnconvertibleCharacters = 0; sl@0: TUint aOutputConversionFlags = CCnvCharacterSetConverter::KStateDefault; sl@0: aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1; sl@0: TUint internalInputConversionFlags = 0; sl@0: TInt numberOfForeignBytesConsumed=0; sl@0: TPtrC8 remainderOfForeign(aForeign); sl@0: TInt returnValue; sl@0: TBool flag = EFalse; sl@0: TBool isSkipMatchSequence = EFalse; sl@0: const SCnvConversionData* convData; sl@0: //Set the iscii conversion data and escape sequence for Devanagari. sl@0: TFixedArray modals; sl@0: modals[0].iConversionData = &conversionData; sl@0: modals[0].iEscapeSequence = &KEscapeSequenceDevanagari(); sl@0: sl@0: aUnicode.SetLength(0); sl@0: sl@0: //The internal input conversion flag for conversion is set to CCnvCharacterSetConverter::EInputConversionFlagAppend sl@0: //so that for each conversion in the conversion loop, the generated conversion buffer is appened to the aUnicode buffer. sl@0: internalInputConversionFlags |= CCnvCharacterSetConverter::EInputConversionFlagAppend; sl@0: if (aForeign.Length()==0) sl@0: { sl@0: return 0; sl@0: } sl@0: //Get the conversion data from the previous call else the conversion data is set to the default sl@0: //conversion data, i.e. Devanagari. sl@0: convData=(aState!=CCnvCharacterSetConverter::KStateDefault)? REINTERPRET_CAST(const SCnvConversionData*, aState): modals[0].iConversionData; sl@0: FOREVER sl@0: { sl@0: TBuf8 intermediateBuffer; sl@0: TUint numberOfForeignBytesConsumedThisTime = 0; sl@0: if((remainderOfForeign.Length() >=KMaximumLengthOfIntermediateBuffer) && (aUnicode.MaxLength()-aUnicode.Length() >=KMaximumLengthOfIntermediateBuffer)) sl@0: { sl@0: numberOfForeignBytesConsumedThisTime = KMaximumLengthOfIntermediateBuffer; sl@0: intermediateBuffer = remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime); sl@0: //If the intermediate buffer is a part of truncated buffer sequence but the sl@0: //actual input buffer is not truncated then truncated sequence is not converted. sl@0: //The intermediate buffer is modified so as not to contain the truncated sequence. sl@0: sl@0: flag = IsTruncatedDoubleByteIsciiSequence(intermediateBuffer); sl@0: if(flag) sl@0: { sl@0: numberOfForeignBytesConsumedThisTime --; sl@0: intermediateBuffer = remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime); sl@0: } sl@0: sl@0: } sl@0: else sl@0: { sl@0: flag = IsTruncatedDoubleByteIsciiSequence(remainderOfForeign.Left(aUnicode.MaxLength()-aUnicode.Length())); sl@0: if(!flag) sl@0: { sl@0: numberOfForeignBytesConsumedThisTime = aUnicode.MaxLength()-aUnicode.Length(); sl@0: intermediateBuffer = remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime); sl@0: } sl@0: else sl@0: { sl@0: if(aForeign.Length()>(numberOfForeignBytesConsumed+aUnicode.Length())) sl@0: { sl@0: numberOfForeignBytesConsumedThisTime = aUnicode.MaxLength()-aUnicode.Length()-1; sl@0: intermediateBuffer = remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime); sl@0: break; sl@0: } sl@0: else sl@0: { sl@0: numberOfForeignBytesConsumedThisTime = aUnicode.MaxLength()-aUnicode.Length(); sl@0: intermediateBuffer = remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime); sl@0: } sl@0: } sl@0: } sl@0: sl@0: //The input intermediate iscii buffer is modified with the search and replace sl@0: //buffers. It is required for supporting multibyte iscii sequences. sl@0: FindAndModifyBuffer(intermediateBuffer); sl@0: TPtrC8 remainderOfForeignInternal(intermediateBuffer); sl@0: TPtrC8 homogeneousRun; sl@0: const TInt startOfNextEscapeSequence=intermediateBuffer.Locate(KControlCharacterEscape); sl@0: if (startOfNextEscapeSequence!=0) sl@0: { sl@0: if (startOfNextEscapeSequence==KErrNotFound) sl@0: { sl@0: homogeneousRun.Set(remainderOfForeignInternal); sl@0: remainderOfForeignInternal.Set(NULL, 0); sl@0: } sl@0: else sl@0: { sl@0: __ASSERT_DEBUG(startOfNextEscapeSequence>0, User::Panic(KPanicReason,EPanicBadStartOfNextEscapeSequence)); sl@0: homogeneousRun.Set(remainderOfForeignInternal.Left(startOfNextEscapeSequence)); sl@0: remainderOfForeignInternal.Set(remainderOfForeignInternal.Mid(startOfNextEscapeSequence)); sl@0: } sl@0: isSkipMatchSequence = ETrue; sl@0: } sl@0: FOREVER sl@0: { sl@0: if(!isSkipMatchSequence) sl@0: { sl@0: if (!NextHomogeneousForeignRun(convData, numberOfForeignBytesConsumed, homogeneousRun, sl@0: remainderOfForeignInternal, modals.Array(), aOutputConversionFlags)) sl@0: { sl@0: break; sl@0: } sl@0: } sl@0: HandleHomogeneousRun( convData, aDefaultEndiannessOfForeignCharacters, aUnicode, homogeneousRun, aNumberOfUnconvertibleCharacters, sl@0: aIndexOfFirstByteOfFirstUnconvertibleCharacter,aOutputConversionFlags,internalInputConversionFlags, sl@0: numberOfForeignBytesConsumed,returnValue); sl@0: if(returnValue<0) sl@0: { sl@0: return returnValue; sl@0: } sl@0: isSkipMatchSequence = EFalse; sl@0: } sl@0: if(returnValue > 0) sl@0: break; sl@0: if ((!flag && (numberOfForeignBytesConsumedThisTime != KMaximumLengthOfIntermediateBuffer)) || (flag && (numberOfForeignBytesConsumedThisTime != (KMaximumLengthOfIntermediateBuffer-1) ))) sl@0: break; sl@0: remainderOfForeign.Set(aForeign.Mid(numberOfForeignBytesConsumed)); sl@0: } sl@0: //If the number of iscii bytes consumed by the conversion is zero also the output conversion sl@0: //flag is not set to EOutputConversionFlagInputIsTruncated, then return EErrorIllFormedInput. sl@0: if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated)) sl@0: { sl@0: return CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: } sl@0: //Set the conversion data sothat next time when ConvertToUnicode() is called, sl@0: //will use the previous conversion data. sl@0: aState=REINTERPRET_CAST(TInt, convData); sl@0: return aForeign.Length()-numberOfForeignBytesConsumed; sl@0: } sl@0: sl@0: // ----------------------------------------------------------------------------- sl@0: // IsInThisCharacterSetL() sl@0: //The method tells how probable it is that a sample piece of text is encoded in this character set. sl@0: //On return aConfidenceLevel, indicates how confident the function is about its return value. For sl@0: //iscii it is the default implementation and it does not implement the autodetect functionality. sl@0: //Loaded and called by the character conversion framework. sl@0: // sl@0: // Returns: EFalse: To tell CCnvCharacterSetConverter::AutoDetectCharacterSetL() that the plug-in DLL sl@0: // is not implementing a function of this signature and is therefore empty. sl@0: // sl@0: // ----------------------------------------------------------------------------- sl@0: // sl@0: sl@0: sl@0: //Default implementation for IsInThisCharacterSetL() sl@0: sl@0: EXPORT_C TBool IsInThisCharacterSetL( sl@0: TBool& aSetToTrue, sl@0: TInt& aConfidenceLevel, sl@0: const TDesC8& ) sl@0: { sl@0: aSetToTrue = EFalse; sl@0: aConfidenceLevel = 0; sl@0: return EFalse; sl@0: } sl@0: sl@0: EXPORT_C void Reserved_2() sl@0: { sl@0: } sl@0: sl@0: EXPORT_C void Reserved_3() sl@0: { sl@0: } sl@0: sl@0: EXPORT_C void Reserved_4() sl@0: { sl@0: } sl@0: sl@0: EXPORT_C void Reserved_5() sl@0: { sl@0: } sl@0: sl@0: EXPORT_C void Reserved_6() sl@0: { sl@0: } sl@0: sl@0: EXPORT_C void Reserved_7() sl@0: { sl@0: } sl@0: sl@0: EXPORT_C void Reserved_8() sl@0: { sl@0: } sl@0: sl@0: #endif //EKA2