sl@0: // Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: // All rights reserved. sl@0: // This component and the accompanying materials are made available sl@0: // under the terms of "Eclipse Public License v1.0" sl@0: // which accompanies this distribution, and is available sl@0: // at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: // sl@0: // Initial Contributors: sl@0: // Nokia Corporation - initial contribution. sl@0: // sl@0: // Contributors: sl@0: // sl@0: // Description: sl@0: // Header for the Standard Compression Scheme for Unicode. sl@0: // This code is compiled only in the Unicode build. sl@0: // sl@0: // sl@0: sl@0: #ifndef __S32UCMP_H__ sl@0: #define __S32UCMP_H__ 1 sl@0: sl@0: #ifdef _UNICODE sl@0: sl@0: #include sl@0: #include sl@0: sl@0: /** sl@0: * @publishedAll sl@0: * @released sl@0: */ sl@0: class TUnicodeCompressionState sl@0: { sl@0: public: sl@0: TUnicodeCompressionState(); sl@0: void Reset(); sl@0: static TInt StaticWindowIndex(TUint16 aCode); sl@0: static TInt DynamicWindowOffsetIndex(TUint16 aCode); sl@0: static TUint32 DynamicWindowBase(TInt aOffsetIndex); sl@0: static TBool EncodeAsIs(TUint16 aCode); sl@0: sl@0: enum TPanic sl@0: { sl@0: EUnhandledByte, // expander code fails to handle all possible byte codes sl@0: ENotUnicode, // expander can't handle Unicode values outside range 0x0..0x10FFFF; sl@0: // that is, 16-bit codes plus 32-bit codes that can be expressed using sl@0: // 16-bit surrogates sl@0: EOutputBufferOverflow // output buffer is not big enough sl@0: }; sl@0: sl@0: static void Panic(TPanic aPanic); sl@0: sl@0: protected: sl@0: sl@0: enum sl@0: { sl@0: EStaticWindows = 8, sl@0: EDynamicWindows = 8, sl@0: ESpecialBases = 7 sl@0: }; sl@0: sl@0: TBool iUnicodeMode; // TRUE if in Unicode mode as opposed to single-byte mode sl@0: TUint32 iActiveWindowBase; // base of the active window - bases are 32-bit because they sl@0: // can be set to the surrogate area, which represents codes sl@0: // from 0x00010000 to 0x0010FFFF - planes 1-16 of ISO-10646. sl@0: static const TUint32 iStaticWindow[EStaticWindows]; // bases of the static windows sl@0: static const TUint32 iDynamicWindowDefault[EDynamicWindows]; // default bases of the dynamic windows sl@0: static const TUint16 iSpecialBase[ESpecialBases]; // bases for window offsets F9..FF sl@0: sl@0: TUint32 iDynamicWindow[EDynamicWindows]; // bases of the dynamic windows sl@0: TInt iUnicodeWords; // Unicode words processed; read by compressor, written by expander sl@0: TInt iMaxUnicodeWords; // maximum number of Unicode words to read or write sl@0: TInt iCompressedBytes; // compressed bytes processed: read by expander, written by compressor sl@0: TInt iMaxCompressedBytes; // maximum number of compressed bytes to read or write sl@0: }; sl@0: sl@0: /** sl@0: * @publishedAll sl@0: * @released sl@0: */ sl@0: class MUnicodeSource sl@0: { sl@0: public: sl@0: virtual TUint16 ReadUnicodeValueL() = 0; sl@0: }; sl@0: sl@0: /** sl@0: * @publishedAll sl@0: * @released sl@0: A class to read Unicode values directly from memory. sl@0: */ sl@0: class TMemoryUnicodeSource: public MUnicodeSource sl@0: { sl@0: public: sl@0: inline TMemoryUnicodeSource(const TUint16* aPtr); sl@0: inline TUint16 ReadUnicodeValueL(); sl@0: sl@0: private: sl@0: const TUint16* iPtr; sl@0: }; sl@0: sl@0: /** sl@0: * @publishedAll sl@0: * @released sl@0: A class to read Unicode values from a stream built on a memory object. sl@0: */ sl@0: class TMemoryStreamUnicodeSource: public MUnicodeSource sl@0: { sl@0: public: sl@0: inline TMemoryStreamUnicodeSource(RReadStream& aStream); sl@0: inline TUint16 ReadUnicodeValueL(); sl@0: sl@0: private: sl@0: RReadStream& iStream; sl@0: }; sl@0: sl@0: /** sl@0: * @publishedAll sl@0: * @released sl@0: */ sl@0: class MUnicodeSink sl@0: { sl@0: public: sl@0: virtual void WriteUnicodeValueL(TUint16 aValue) = 0; sl@0: }; sl@0: sl@0: /** sl@0: * @publishedAll sl@0: * @released sl@0: A class to write Unicode values directly to memory. sl@0: */ sl@0: class TMemoryUnicodeSink: public MUnicodeSink sl@0: { sl@0: public: sl@0: inline TMemoryUnicodeSink(TUint16* aPtr); sl@0: inline void WriteUnicodeValueL(TUint16 aValue); sl@0: sl@0: private: sl@0: TUint16* iPtr; sl@0: }; sl@0: sl@0: /** sl@0: * @publishedAll sl@0: * @released sl@0: A class to write Unicode values to a stream built on a memory object. sl@0: */ sl@0: class TMemoryStreamUnicodeSink: public MUnicodeSink sl@0: { sl@0: public: sl@0: inline TMemoryStreamUnicodeSink(RWriteStream& aStream); sl@0: inline void WriteUnicodeValueL(TUint16 aValue); sl@0: sl@0: private: sl@0: RWriteStream& iStream; sl@0: }; sl@0: sl@0: /** sl@0: * @publishedAll sl@0: * @released sl@0: sl@0: A class to hold functions to compress text using the Standard Compression Scheme for Unicode. sl@0: sl@0: A note on error handling and leaving. sl@0: sl@0: Although all the public functions except the constructor can leave, it is possible to guarantee success: that is, sl@0: guarantee that a call will not leave, and that compression will be completed. To do this, (i) supply a MUnicodeSource sl@0: object with a non-leaving ReadUnicodeValueL function, such as a TMemoryUnicodeSource; (ii) write output to a sl@0: RWriteStream with a non-leaving WriteL function, or to a buffer that you already know to be big enough, which can be sl@0: found out using CompressedSizeL. sl@0: sl@0: This guarantee of success is particularly useful when compressing from one memory buffer to another. sl@0: */ sl@0: class TUnicodeCompressor: public TUnicodeCompressionState sl@0: { sl@0: public: sl@0: IMPORT_C TUnicodeCompressor(); sl@0: IMPORT_C void CompressL(RWriteStream& aOutput,MUnicodeSource& aInput, sl@0: TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt, sl@0: TInt* aOutputBytes = NULL,TInt* aInputWords = NULL); sl@0: IMPORT_C void CompressL(TUint8* aOutput,MUnicodeSource& aInput, sl@0: TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt, sl@0: TInt* aOutputBytes = NULL,TInt* aInputWords = NULL); sl@0: IMPORT_C TInt FlushL(RWriteStream& aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes); sl@0: IMPORT_C TInt FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes); sl@0: IMPORT_C static TInt CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords); sl@0: sl@0: private: sl@0: sl@0: // A structure to store a character and its treatment code sl@0: struct TAction sl@0: { sl@0: // Treatment codes: static and dynamic window numbers, plain ASCII or plain Unicode sl@0: enum sl@0: { sl@0: EPlainUnicode = -2, // character cannot be expressed as ASCII or using static or dynamic windows sl@0: EPlainASCII = -1, // character can be emitted as an ASCII code sl@0: EFirstDynamic = 0, // values 0..255 are for dynamic windows with offsets at these places in the offset table sl@0: ELastDynamic = 255, sl@0: EFirstStatic = 256, // values 256..263 are for static windows 0..7 sl@0: ELastStatic = 263 sl@0: }; sl@0: sl@0: inline TAction(); sl@0: TAction(TUint16 aCode); sl@0: sl@0: TUint16 iCode; // Unicode value of the character sl@0: TInt iTreatment; // treatment code: see above sl@0: }; sl@0: sl@0: void DoCompressL(RWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput, sl@0: TInt aMaxCompressedBytes,TInt aMaxUnicodeWords, sl@0: TInt* aCompressedBytes,TInt* aUnicodeWords); sl@0: void FlushInputBufferL(); sl@0: void FlushOutputBufferL(); sl@0: void WriteRunL(); sl@0: void WriteCharacter(const TAction& aAction); sl@0: void WriteSCharacter(const TAction& aAction); sl@0: void WriteUCharacter(TUint16 aCode); sl@0: void WriteByte(TUint aByte); sl@0: void WriteCharacterFromBuffer(); sl@0: void SelectTreatment(TInt aTreatment); sl@0: sl@0: enum sl@0: { sl@0: EMaxInputBufferSize = 4, sl@0: EMaxOutputBufferSize = EMaxInputBufferSize * 3 // no Unicode character can be encoded as more than three bytes sl@0: }; sl@0: TAction iInputBuffer[EMaxInputBufferSize]; // circular buffer; queue of Unicode characters to be processed sl@0: TInt iInputBufferStart; // position of first Unicode character to be processed sl@0: TInt iInputBufferSize; // characters in the input buffer sl@0: TUint8 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of compressed bytes to be output sl@0: TInt iOutputBufferStart; // position of first compressed byte to be output sl@0: TInt iOutputBufferSize; // characters in the output buffer sl@0: TInt iDynamicWindowIndex; // index of the current dynamic window sl@0: RWriteStream* iOutputStream; // if non-null, output is to this stream sl@0: TUint8* iOutputPointer; // if non-null, output is to memory sl@0: MUnicodeSource* iInput; // input object sl@0: }; sl@0: sl@0: /** sl@0: * @publishedAll sl@0: * @released sl@0: sl@0: A class to hold functions to expand text using the Standard Compression Scheme for Unicode. sl@0: sl@0: A note on error handling and leaving. sl@0: sl@0: Although all the public functions except the constructor can leave, it is possible to guarantee success: that is, sl@0: guarantee that a call will not leave, and that expansion will be completed. To do this, (i) supply a MUnicodeSink sl@0: object with a non-leaving WriteUnicodeValueL function, such as a TMemoryUnicodeSink; (ii) read input from a RReadStream sl@0: with a non-leaving ReadL function; (iii) supply a big enough buffer to write the ouput; you can find out how big by sl@0: calling ExpandedSizeL, using methods (i) and (ii) to guarantee success. sl@0: sl@0: This guarantee of success is particularly useful when expanding from one memory buffer to another. sl@0: */ sl@0: class TUnicodeExpander: public TUnicodeCompressionState sl@0: { sl@0: public: sl@0: IMPORT_C TUnicodeExpander(); sl@0: IMPORT_C void ExpandL(MUnicodeSink& aOutput,RReadStream& aInput, sl@0: TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt, sl@0: TInt* aOutputWords = NULL,TInt* aInputBytes = NULL); sl@0: IMPORT_C void ExpandL(MUnicodeSink& aOutput,const TUint8* aInput, sl@0: TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt, sl@0: TInt* aOutputWords = NULL,TInt* aInputBytes = NULL); sl@0: IMPORT_C TInt FlushL(MUnicodeSink& aOutput,TInt aMaxOutputWords,TInt& aOutputWords); sl@0: IMPORT_C static TInt ExpandedSizeL(RReadStream& aInput,TInt aInputBytes); sl@0: IMPORT_C static TInt ExpandedSizeL(const TUint8* aInput,TInt aInputBytes); sl@0: sl@0: private: sl@0: void DoExpandL(MUnicodeSink* aOutput,RReadStream* aInputStream,const TUint8* aInputPointer, sl@0: TInt aMaxOutputWords,TInt aMaxInputBytes, sl@0: TInt* aOutputWords,TInt* aInputBytes); sl@0: void HandleByteL(); sl@0: void FlushOutputBufferL(); sl@0: TBool HandleSByteL(TUint8 aByte); sl@0: TBool HandleUByteL(TUint8 aByte); sl@0: TBool ReadByteL(TUint8& aByte); sl@0: TBool QuoteUnicodeL(); sl@0: TBool DefineWindowL(TInt aIndex); sl@0: TBool DefineExpansionWindowL(); sl@0: void WriteChar(TText aChar); sl@0: void WriteChar32(TUint aChar); sl@0: sl@0: enum sl@0: { sl@0: EMaxInputBufferSize = 3, // no Unicode character can be encoded as more than 3 bytes sl@0: EMaxOutputBufferSize = 2 // no byte can be expanded into more than 2 Unicode characters sl@0: }; sl@0: TUint8 iInputBuffer[EMaxInputBufferSize]; // buffer containing a group of compressed bytes representing sl@0: // a single operation; when an input source ends in the sl@0: // middle of an operation, this buffer enables the next sl@0: // expansion to start in the correct state sl@0: TInt iInputBufferStart; // next read position in the input buffer sl@0: TInt iInputBufferSize; // bytes in the input buffer sl@0: TUint16 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of Unicode characters to be output sl@0: TInt iOutputBufferStart; // position of first Unicode character to be output sl@0: TInt iOutputBufferSize; // characters in the output buffer sl@0: MUnicodeSink* iOutput; // output object sl@0: RReadStream* iInputStream; // if non-null, input is from this stream sl@0: const TUint8* iInputPointer; // if non-null, input is from memory sl@0: }; sl@0: sl@0: // inline functions start here sl@0: sl@0: inline TMemoryUnicodeSource::TMemoryUnicodeSource(const TUint16* aPtr): sl@0: iPtr(aPtr) sl@0: { sl@0: } sl@0: sl@0: inline TUint16 TMemoryUnicodeSource::ReadUnicodeValueL() sl@0: { sl@0: return *iPtr++; sl@0: } sl@0: sl@0: inline TMemoryStreamUnicodeSource::TMemoryStreamUnicodeSource(RReadStream& aStream): sl@0: iStream(aStream) sl@0: { sl@0: } sl@0: sl@0: inline TUint16 TMemoryStreamUnicodeSource::ReadUnicodeValueL() sl@0: { sl@0: TUint16 x; sl@0: iStream.ReadL((TUint8*)&x,sizeof(TUint16)); sl@0: return x; sl@0: } sl@0: sl@0: inline TMemoryUnicodeSink::TMemoryUnicodeSink(TUint16* aPtr): sl@0: iPtr(aPtr) sl@0: { sl@0: } sl@0: sl@0: inline void TMemoryUnicodeSink::WriteUnicodeValueL(TUint16 aValue) sl@0: { sl@0: *iPtr++ = aValue; sl@0: } sl@0: sl@0: inline TMemoryStreamUnicodeSink::TMemoryStreamUnicodeSink(RWriteStream& aStream): sl@0: iStream(aStream) sl@0: { sl@0: } sl@0: sl@0: inline void TMemoryStreamUnicodeSink::WriteUnicodeValueL(TUint16 aValue) sl@0: { sl@0: iStream.WriteL((TUint8*)&aValue,sizeof(TUint16)); sl@0: } sl@0: sl@0: inline TUnicodeCompressor::TAction::TAction(): sl@0: iCode(0), sl@0: iTreatment(EPlainUnicode) sl@0: { sl@0: } sl@0: sl@0: #endif // _UNICODE sl@0: sl@0: #endif // __S32UCMP_H__