sl@0: // Copyright (c) 2006-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: // All rights reserved. sl@0: // This component and the accompanying materials are made available sl@0: // under the terms of "Eclipse Public License v1.0" sl@0: // which accompanies this distribution, and is available sl@0: // at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: // sl@0: // Initial Contributors: sl@0: // Nokia Corporation - initial contribution. sl@0: // sl@0: // Contributors: sl@0: // sl@0: // Description: sl@0: // Classes defined in this file are used for Unicode compression and decompression. sl@0: // Their code is borrowed from Symbian, only with some changes such as the "Panic" function sl@0: // is changed to exit the program. The Symbian coding standard will be kept in the code. sl@0: // sl@0: // sl@0: sl@0: sl@0: #if !defined(__UNICODECOMPRESSION_H__) sl@0: #define __UNICODECOMPRESSION_H__ sl@0: sl@0: #include sl@0: #include sl@0: #include sl@0: #include sl@0: #include "pcstoreconst.h" sl@0: sl@0: namespace PCStore sl@0: { sl@0: /** sl@0: @internalComponent sl@0: */ sl@0: class TUnicodeCompressionState sl@0: { sl@0: public: sl@0: TUnicodeCompressionState(); sl@0: void Reset(); sl@0: static TInt StaticWindowIndex(TUint16 aCode); sl@0: static TInt DynamicWindowOffsetIndex(TUint16 aCode); sl@0: static TUint32 DynamicWindowBase(TInt aOffsetIndex); sl@0: static TBool EncodeAsIs(TUint16 aCode); sl@0: sl@0: enum TPanic sl@0: { sl@0: EUnhandledByte, // expander code fails to handle all possible byte codes sl@0: ENotUnicode, // expander can't handle Unicode values outside range 0x0..0x10FFFF; sl@0: // that is, 16-bit codes plus 32-bit codes that can be expressed using sl@0: // 16-bit surrogates sl@0: EOutputBufferOverflow // output buffer is not big enough sl@0: }; sl@0: sl@0: static void Panic(TPanic aPanic); sl@0: sl@0: protected: sl@0: sl@0: enum sl@0: { sl@0: EStaticWindows = 8, sl@0: EDynamicWindows = 8, sl@0: ESpecialBases = 7 sl@0: }; sl@0: sl@0: TBool iUnicodeMode; // TRUE if in Unicode mode as opposed to single-byte mode sl@0: TUint32 iActiveWindowBase; // base of the active window - bases are 32-bit because they sl@0: // can be set to the surrogate area, which represents codes sl@0: // from 0x00010000 to 0x0010FFFF - planes 1-16 of ISO-10646. sl@0: static const TUint32 iStaticWindow[EStaticWindows]; // bases of the static windows sl@0: static const TUint32 iDynamicWindowDefault[EDynamicWindows]; // default bases of the dynamic windows sl@0: static const TUint16 iSpecialBase[ESpecialBases]; // bases for window offsets F9..FF sl@0: sl@0: TUint32 iDynamicWindow[EDynamicWindows]; // bases of the dynamic windows sl@0: TInt iUnicodeWords; // Unicode words processed; read by compressor, written by expander sl@0: TInt iMaxUnicodeWords; // maximum number of Unicode words to read or write sl@0: TInt iCompressedBytes; // compressed bytes processed: read by expander, written by compressor sl@0: TInt iMaxCompressedBytes; // maximum number of compressed bytes to read or write sl@0: }; sl@0: sl@0: /** sl@0: @internalComponent sl@0: */ sl@0: class MUnicodeSource sl@0: { sl@0: public: sl@0: virtual TUint16 ReadUnicodeValueL() = 0; sl@0: }; sl@0: sl@0: /** sl@0: @internalComponent sl@0: sl@0: A class to read Unicode values directly from memory. sl@0: */ sl@0: class TMemoryUnicodeSource: public MUnicodeSource sl@0: { sl@0: public: sl@0: inline TMemoryUnicodeSource(const TUint16* aPtr); sl@0: inline TUint16 ReadUnicodeValueL(); sl@0: sl@0: private: sl@0: const TUint16* iPtr; sl@0: }; sl@0: sl@0: /** sl@0: @internalComponent sl@0: sl@0: A class to read Unicode values from a stream built on a memory object. sl@0: */ sl@0: class TMemoryStreamUnicodeSource: public MUnicodeSource sl@0: { sl@0: public: sl@0: inline TMemoryStreamUnicodeSource(CStoreReadStream& aStream); sl@0: inline TUint16 ReadUnicodeValueL(); sl@0: sl@0: private: sl@0: CStoreReadStream& iStream; sl@0: }; sl@0: sl@0: /** sl@0: @internalComponent sl@0: sl@0: */ sl@0: class MUnicodeSink sl@0: { sl@0: public: sl@0: virtual void WriteUnicodeValueL(TUint16 aValue) = 0; sl@0: }; sl@0: sl@0: /** sl@0: @internalComponent sl@0: sl@0: A class to write Unicode values directly to memory. sl@0: */ sl@0: class TMemoryUnicodeSink: public MUnicodeSink sl@0: { sl@0: public: sl@0: inline TMemoryUnicodeSink(TUint16* aPtr); sl@0: inline void WriteUnicodeValueL(TUint16 aValue); sl@0: sl@0: private: sl@0: TUint16* iPtr; sl@0: }; sl@0: sl@0: /** sl@0: @internalComponent sl@0: sl@0: A class to write Unicode values to a stream built on a memory object. sl@0: */ sl@0: class TMemoryStreamUnicodeSink: public MUnicodeSink sl@0: { sl@0: public: sl@0: inline TMemoryStreamUnicodeSink(CStoreWriteStream& aStream); sl@0: inline void WriteUnicodeValueL(TUint16 aValue); sl@0: sl@0: private: sl@0: CStoreWriteStream& iStream; sl@0: }; sl@0: sl@0: /** sl@0: @internalComponent sl@0: sl@0: A class to hold functions to compress text using the Standard Compression Scheme for Unicode. sl@0: */ sl@0: class TUnicodeCompressor: public TUnicodeCompressionState sl@0: { sl@0: public: sl@0: TUnicodeCompressor(); sl@0: void CompressL(CStoreWriteStream& aOutput,MUnicodeSource& aInput, sl@0: TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt, sl@0: TInt* aOutputBytes = static_cast(NULL),TInt* aInputWords = static_cast(NULL)); sl@0: void CompressL(TUint8* aOutput,MUnicodeSource& aInput, sl@0: TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt, sl@0: TInt* aOutputBytes = static_cast(NULL),TInt* aInputWords = static_cast(NULL)); sl@0: TInt FlushL(CStoreWriteStream& aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes); sl@0: TInt FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes); sl@0: static TInt CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords); sl@0: sl@0: private: sl@0: sl@0: // A structure to store a character and its treatment code sl@0: struct TAction sl@0: { sl@0: // Treatment codes: static and dynamic window numbers, plain ASCII or plain Unicode sl@0: enum sl@0: { sl@0: EPlainUnicode = -2, // character cannot be expressed as ASCII or using static or dynamic windows sl@0: EPlainASCII = -1, // character can be emitted as an ASCII code sl@0: EFirstDynamic = 0, // values 0..255 are for dynamic windows with offsets at these places in the offset table sl@0: ELastDynamic = 255, sl@0: EFirstStatic = 256, // values 256..263 are for static windows 0..7 sl@0: ELastStatic = 263 sl@0: }; sl@0: sl@0: inline TAction(); sl@0: TAction(TUint16 aCode); sl@0: sl@0: TUint16 iCode; // Unicode value of the character sl@0: TInt iTreatment; // treatment code: see above sl@0: }; sl@0: sl@0: void DoCompressL(CStoreWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput, sl@0: TInt aMaxCompressedBytes,TInt aMaxUnicodeWords, sl@0: TInt* aCompressedBytes,TInt* aUnicodeWords); sl@0: void FlushInputBufferL(); sl@0: void FlushOutputBufferL(); sl@0: void WriteRunL(); sl@0: void WriteCharacter(const TAction& aAction); sl@0: void WriteSCharacter(const TAction& aAction); sl@0: void WriteUCharacter(TUint16 aCode); sl@0: void WriteByte(TUint aByte); sl@0: void WriteCharacterFromBuffer(); sl@0: void SelectTreatment(TInt aTreatment); sl@0: sl@0: enum sl@0: { sl@0: EMaxInputBufferSize = 4, sl@0: EMaxOutputBufferSize = EMaxInputBufferSize * 3 // no Unicode character can be encoded as more than three bytes sl@0: }; sl@0: TAction iInputBuffer[EMaxInputBufferSize]; // circular buffer; queue of Unicode characters to be processed sl@0: TInt iInputBufferStart; // position of first Unicode character to be processed sl@0: TInt iInputBufferSize; // characters in the input buffer sl@0: TUint8 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of compressed bytes to be output sl@0: TInt iOutputBufferStart; // position of first compressed byte to be output sl@0: TInt iOutputBufferSize; // characters in the output buffer sl@0: TInt iDynamicWindowIndex; // index of the current dynamic window sl@0: CStoreWriteStream* iOutputStream; // if non-null, output is to this stream sl@0: TUint8* iOutputPointer; // if non-null, output is to memory sl@0: MUnicodeSource* iInput; // input object sl@0: }; sl@0: sl@0: /** sl@0: @internalComponent sl@0: sl@0: A class to hold functions to expand text using the Standard Compression Scheme for Unicode. sl@0: */ sl@0: class TUnicodeExpander: public TUnicodeCompressionState sl@0: { sl@0: public: sl@0: TUnicodeExpander(); sl@0: void ExpandL(MUnicodeSink& aOutput,CStoreReadStream& aInput, sl@0: TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt, sl@0: TInt* aOutputWords = static_cast(NULL),TInt* aInputBytes = static_cast(NULL)); sl@0: void ExpandL(MUnicodeSink& aOutput,const TUint8* aInput, sl@0: TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt, sl@0: TInt* aOutputWords = static_cast(NULL),TInt* aInputBytes = static_cast(NULL)); sl@0: TInt FlushL(MUnicodeSink& aOutput,TInt aMaxOutputWords,TInt& aOutputWords); sl@0: static TInt ExpandedSizeL(CStoreReadStream& aInput,TInt aInputBytes); sl@0: static TInt ExpandedSizeL(const TUint8* aInput,TInt aInputBytes); sl@0: sl@0: private: sl@0: void DoExpandL(MUnicodeSink* aOutput,CStoreReadStream* aInputStream,const TUint8* aInputPointer, sl@0: TInt aMaxOutputWords,TInt aMaxInputBytes, sl@0: TInt* aOutputWords,TInt* aInputBytes); sl@0: void HandleByteL(); sl@0: void FlushOutputBufferL(); sl@0: TBool HandleSByteL(TUint8 aByte); sl@0: TBool HandleUByteL(TUint8 aByte); sl@0: TBool ReadByteL(TUint8& aByte); sl@0: TBool QuoteUnicodeL(); sl@0: TBool DefineWindowL(TInt aIndex); sl@0: TBool DefineExpansionWindowL(); sl@0: void WriteChar(TText aChar); sl@0: void WriteChar32(TUint aChar); sl@0: sl@0: enum sl@0: { sl@0: EMaxInputBufferSize = 3, // no Unicode character can be encoded as more than 3 bytes sl@0: EMaxOutputBufferSize = 2 // no byte can be expanded into more than 2 Unicode characters sl@0: }; sl@0: TUint8 iInputBuffer[EMaxInputBufferSize]; // buffer containing a group of compressed bytes representing sl@0: // a single operation; when an input source ends in the sl@0: // middle of an operation, this buffer enables the next sl@0: // expansion to start in the correct state sl@0: TInt iInputBufferStart; // next read position in the input buffer sl@0: TInt iInputBufferSize; // bytes in the input buffer sl@0: TUint16 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of Unicode characters to be output sl@0: TInt iOutputBufferStart; // position of first Unicode character to be output sl@0: TInt iOutputBufferSize; // characters in the output buffer sl@0: MUnicodeSink* iOutput; // output object sl@0: CStoreReadStream* iInputStream; // if non-null, input is from this stream sl@0: const TUint8* iInputPointer; // if non-null, input is from memory sl@0: }; sl@0: sl@0: // inline functions start here sl@0: sl@0: inline TMemoryUnicodeSource::TMemoryUnicodeSource(const TUint16* aPtr): sl@0: iPtr(aPtr) sl@0: { sl@0: } sl@0: sl@0: inline TUint16 TMemoryUnicodeSource::ReadUnicodeValueL() sl@0: { sl@0: return *iPtr++; sl@0: } sl@0: sl@0: inline TMemoryStreamUnicodeSource::TMemoryStreamUnicodeSource(CStoreReadStream& aStream): sl@0: iStream(aStream) sl@0: { sl@0: } sl@0: sl@0: inline TUint16 TMemoryStreamUnicodeSource::ReadUnicodeValueL() sl@0: { sl@0: TUint16 x; sl@0: iStream.Read(reinterpret_cast(&x),sizeof(TUint16)); sl@0: return x; sl@0: } sl@0: sl@0: inline TMemoryUnicodeSink::TMemoryUnicodeSink(TUint16* aPtr): sl@0: iPtr(aPtr) sl@0: { sl@0: } sl@0: sl@0: inline void TMemoryUnicodeSink::WriteUnicodeValueL(TUint16 aValue) sl@0: { sl@0: *iPtr++ = aValue; sl@0: } sl@0: sl@0: inline TMemoryStreamUnicodeSink::TMemoryStreamUnicodeSink(CStoreWriteStream& aStream): sl@0: iStream(aStream) sl@0: { sl@0: } sl@0: sl@0: inline void TMemoryStreamUnicodeSink::WriteUnicodeValueL(TUint16 aValue) sl@0: { sl@0: iStream.Write(reinterpret_cast(&aValue),sizeof(TUint16)); sl@0: } sl@0: sl@0: inline TUnicodeCompressor::TAction::TAction(): sl@0: iCode(0), sl@0: iTreatment(EPlainUnicode) sl@0: { sl@0: } sl@0: } sl@0: #endif // !defined(__UNICODECOMPRESSION_H__)