williamr@2: // Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies). williamr@2: // All rights reserved. williamr@2: // This component and the accompanying materials are made available williamr@4: // under the terms of "Eclipse Public License v1.0" williamr@2: // which accompanies this distribution, and is available williamr@4: // at the URL "http://www.eclipse.org/legal/epl-v10.html". williamr@2: // williamr@2: // Initial Contributors: williamr@2: // Nokia Corporation - initial contribution. williamr@2: // williamr@2: // Contributors: williamr@2: // williamr@2: // Description: williamr@2: // Header for the Standard Compression Scheme for Unicode. williamr@2: // This code is compiled only in the Unicode build. williamr@2: // williamr@2: // williamr@2: williamr@2: #ifndef __S32UCMP_H__ williamr@2: #define __S32UCMP_H__ 1 williamr@2: williamr@2: #ifdef _UNICODE williamr@2: williamr@2: #include williamr@2: #include williamr@2: williamr@2: /** williamr@2: * @publishedAll williamr@2: * @released williamr@2: */ williamr@2: class TUnicodeCompressionState williamr@2: { williamr@2: public: williamr@2: TUnicodeCompressionState(); williamr@2: void Reset(); williamr@2: static TInt StaticWindowIndex(TUint16 aCode); williamr@2: static TInt DynamicWindowOffsetIndex(TUint16 aCode); williamr@2: static TUint32 DynamicWindowBase(TInt aOffsetIndex); williamr@2: static TBool EncodeAsIs(TUint16 aCode); williamr@2: williamr@2: enum TPanic williamr@2: { williamr@2: EUnhandledByte, // expander code fails to handle all possible byte codes williamr@2: ENotUnicode, // expander can't handle Unicode values outside range 0x0..0x10FFFF; williamr@2: // that is, 16-bit codes plus 32-bit codes that can be expressed using williamr@2: // 16-bit surrogates williamr@2: EOutputBufferOverflow // output buffer is not big enough williamr@2: }; williamr@2: williamr@2: static void Panic(TPanic aPanic); williamr@2: williamr@2: protected: williamr@2: williamr@2: enum williamr@2: { williamr@2: EStaticWindows = 8, williamr@2: EDynamicWindows = 8, williamr@2: ESpecialBases = 7 williamr@2: }; williamr@2: williamr@2: TBool iUnicodeMode; // TRUE if in Unicode mode as opposed to single-byte mode williamr@2: TUint32 iActiveWindowBase; // base of the active window - bases are 32-bit because they williamr@2: // can be set to the surrogate area, which represents codes williamr@2: // from 0x00010000 to 0x0010FFFF - planes 1-16 of ISO-10646. williamr@2: static const TUint32 iStaticWindow[EStaticWindows]; // bases of the static windows williamr@2: static const TUint32 iDynamicWindowDefault[EDynamicWindows]; // default bases of the dynamic windows williamr@2: static const TUint16 iSpecialBase[ESpecialBases]; // bases for window offsets F9..FF williamr@2: williamr@2: TUint32 iDynamicWindow[EDynamicWindows]; // bases of the dynamic windows williamr@2: TInt iUnicodeWords; // Unicode words processed; read by compressor, written by expander williamr@2: TInt iMaxUnicodeWords; // maximum number of Unicode words to read or write williamr@2: TInt iCompressedBytes; // compressed bytes processed: read by expander, written by compressor williamr@2: TInt iMaxCompressedBytes; // maximum number of compressed bytes to read or write williamr@2: }; williamr@2: williamr@2: /** williamr@2: * @publishedAll williamr@2: * @released williamr@2: */ williamr@2: class MUnicodeSource williamr@2: { williamr@2: public: williamr@2: virtual TUint16 ReadUnicodeValueL() = 0; williamr@2: }; williamr@2: williamr@2: /** williamr@2: * @publishedAll williamr@2: * @released williamr@2: A class to read Unicode values directly from memory. williamr@2: */ williamr@2: class TMemoryUnicodeSource: public MUnicodeSource williamr@2: { williamr@2: public: williamr@2: inline TMemoryUnicodeSource(const TUint16* aPtr); williamr@2: inline TUint16 ReadUnicodeValueL(); williamr@2: williamr@2: private: williamr@2: const TUint16* iPtr; williamr@2: }; williamr@2: williamr@2: /** williamr@2: * @publishedAll williamr@2: * @released williamr@2: A class to read Unicode values from a stream built on a memory object. williamr@2: */ williamr@2: class TMemoryStreamUnicodeSource: public MUnicodeSource williamr@2: { williamr@2: public: williamr@2: inline TMemoryStreamUnicodeSource(RReadStream& aStream); williamr@2: inline TUint16 ReadUnicodeValueL(); williamr@2: williamr@2: private: williamr@2: RReadStream& iStream; williamr@2: }; williamr@2: williamr@2: /** williamr@2: * @publishedAll williamr@2: * @released williamr@2: */ williamr@2: class MUnicodeSink williamr@2: { williamr@2: public: williamr@2: virtual void WriteUnicodeValueL(TUint16 aValue) = 0; williamr@2: }; williamr@2: williamr@2: /** williamr@2: * @publishedAll williamr@2: * @released williamr@2: A class to write Unicode values directly to memory. williamr@2: */ williamr@2: class TMemoryUnicodeSink: public MUnicodeSink williamr@2: { williamr@2: public: williamr@2: inline TMemoryUnicodeSink(TUint16* aPtr); williamr@2: inline void WriteUnicodeValueL(TUint16 aValue); williamr@2: williamr@2: private: williamr@2: TUint16* iPtr; williamr@2: }; williamr@2: williamr@2: /** williamr@2: * @publishedAll williamr@2: * @released williamr@2: A class to write Unicode values to a stream built on a memory object. williamr@2: */ williamr@2: class TMemoryStreamUnicodeSink: public MUnicodeSink williamr@2: { williamr@2: public: williamr@2: inline TMemoryStreamUnicodeSink(RWriteStream& aStream); williamr@2: inline void WriteUnicodeValueL(TUint16 aValue); williamr@2: williamr@2: private: williamr@2: RWriteStream& iStream; williamr@2: }; williamr@2: williamr@2: /** williamr@2: * @publishedAll williamr@2: * @released williamr@2: williamr@2: A class to hold functions to compress text using the Standard Compression Scheme for Unicode. williamr@2: williamr@2: A note on error handling and leaving. williamr@2: williamr@2: Although all the public functions except the constructor can leave, it is possible to guarantee success: that is, williamr@2: guarantee that a call will not leave, and that compression will be completed. To do this, (i) supply a MUnicodeSource williamr@2: object with a non-leaving ReadUnicodeValueL function, such as a TMemoryUnicodeSource; (ii) write output to a williamr@2: RWriteStream with a non-leaving WriteL function, or to a buffer that you already know to be big enough, which can be williamr@2: found out using CompressedSizeL. williamr@2: williamr@2: This guarantee of success is particularly useful when compressing from one memory buffer to another. williamr@2: */ williamr@2: class TUnicodeCompressor: public TUnicodeCompressionState williamr@2: { williamr@2: public: williamr@2: IMPORT_C TUnicodeCompressor(); williamr@2: IMPORT_C void CompressL(RWriteStream& aOutput,MUnicodeSource& aInput, williamr@2: TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt, williamr@2: TInt* aOutputBytes = NULL,TInt* aInputWords = NULL); williamr@2: IMPORT_C void CompressL(TUint8* aOutput,MUnicodeSource& aInput, williamr@2: TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt, williamr@2: TInt* aOutputBytes = NULL,TInt* aInputWords = NULL); williamr@2: IMPORT_C TInt FlushL(RWriteStream& aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes); williamr@2: IMPORT_C TInt FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes); williamr@2: IMPORT_C static TInt CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords); williamr@2: williamr@2: private: williamr@2: williamr@2: // A structure to store a character and its treatment code williamr@2: struct TAction williamr@2: { williamr@2: // Treatment codes: static and dynamic window numbers, plain ASCII or plain Unicode williamr@2: enum williamr@2: { williamr@2: EPlainUnicode = -2, // character cannot be expressed as ASCII or using static or dynamic windows williamr@2: EPlainASCII = -1, // character can be emitted as an ASCII code williamr@2: EFirstDynamic = 0, // values 0..255 are for dynamic windows with offsets at these places in the offset table williamr@2: ELastDynamic = 255, williamr@2: EFirstStatic = 256, // values 256..263 are for static windows 0..7 williamr@2: ELastStatic = 263 williamr@2: }; williamr@2: williamr@2: inline TAction(); williamr@2: TAction(TUint16 aCode); williamr@2: williamr@2: TUint16 iCode; // Unicode value of the character williamr@2: TInt iTreatment; // treatment code: see above williamr@2: }; williamr@2: williamr@2: void DoCompressL(RWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput, williamr@2: TInt aMaxCompressedBytes,TInt aMaxUnicodeWords, williamr@2: TInt* aCompressedBytes,TInt* aUnicodeWords); williamr@2: void FlushInputBufferL(); williamr@2: void FlushOutputBufferL(); williamr@2: void WriteRunL(); williamr@2: void WriteCharacter(const TAction& aAction); williamr@2: void WriteSCharacter(const TAction& aAction); williamr@2: void WriteUCharacter(TUint16 aCode); williamr@2: void WriteByte(TUint aByte); williamr@2: void WriteCharacterFromBuffer(); williamr@2: void SelectTreatment(TInt aTreatment); williamr@2: williamr@2: enum williamr@2: { williamr@2: EMaxInputBufferSize = 4, williamr@2: EMaxOutputBufferSize = EMaxInputBufferSize * 3 // no Unicode character can be encoded as more than three bytes williamr@2: }; williamr@2: TAction iInputBuffer[EMaxInputBufferSize]; // circular buffer; queue of Unicode characters to be processed williamr@2: TInt iInputBufferStart; // position of first Unicode character to be processed williamr@2: TInt iInputBufferSize; // characters in the input buffer williamr@2: TUint8 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of compressed bytes to be output williamr@2: TInt iOutputBufferStart; // position of first compressed byte to be output williamr@2: TInt iOutputBufferSize; // characters in the output buffer williamr@2: TInt iDynamicWindowIndex; // index of the current dynamic window williamr@2: RWriteStream* iOutputStream; // if non-null, output is to this stream williamr@2: TUint8* iOutputPointer; // if non-null, output is to memory williamr@2: MUnicodeSource* iInput; // input object williamr@2: }; williamr@2: williamr@2: /** williamr@2: * @publishedAll williamr@2: * @released williamr@2: williamr@2: A class to hold functions to expand text using the Standard Compression Scheme for Unicode. williamr@2: williamr@2: A note on error handling and leaving. williamr@2: williamr@2: Although all the public functions except the constructor can leave, it is possible to guarantee success: that is, williamr@2: guarantee that a call will not leave, and that expansion will be completed. To do this, (i) supply a MUnicodeSink williamr@2: object with a non-leaving WriteUnicodeValueL function, such as a TMemoryUnicodeSink; (ii) read input from a RReadStream williamr@2: with a non-leaving ReadL function; (iii) supply a big enough buffer to write the ouput; you can find out how big by williamr@2: calling ExpandedSizeL, using methods (i) and (ii) to guarantee success. williamr@2: williamr@2: This guarantee of success is particularly useful when expanding from one memory buffer to another. williamr@2: */ williamr@2: class TUnicodeExpander: public TUnicodeCompressionState williamr@2: { williamr@2: public: williamr@2: IMPORT_C TUnicodeExpander(); williamr@2: IMPORT_C void ExpandL(MUnicodeSink& aOutput,RReadStream& aInput, williamr@2: TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt, williamr@2: TInt* aOutputWords = NULL,TInt* aInputBytes = NULL); williamr@2: IMPORT_C void ExpandL(MUnicodeSink& aOutput,const TUint8* aInput, williamr@2: TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt, williamr@2: TInt* aOutputWords = NULL,TInt* aInputBytes = NULL); williamr@2: IMPORT_C TInt FlushL(MUnicodeSink& aOutput,TInt aMaxOutputWords,TInt& aOutputWords); williamr@2: IMPORT_C static TInt ExpandedSizeL(RReadStream& aInput,TInt aInputBytes); williamr@2: IMPORT_C static TInt ExpandedSizeL(const TUint8* aInput,TInt aInputBytes); williamr@2: williamr@2: private: williamr@2: void DoExpandL(MUnicodeSink* aOutput,RReadStream* aInputStream,const TUint8* aInputPointer, williamr@2: TInt aMaxOutputWords,TInt aMaxInputBytes, williamr@2: TInt* aOutputWords,TInt* aInputBytes); williamr@2: void HandleByteL(); williamr@2: void FlushOutputBufferL(); williamr@2: TBool HandleSByteL(TUint8 aByte); williamr@2: TBool HandleUByteL(TUint8 aByte); williamr@2: TBool ReadByteL(TUint8& aByte); williamr@2: TBool QuoteUnicodeL(); williamr@2: TBool DefineWindowL(TInt aIndex); williamr@2: TBool DefineExpansionWindowL(); williamr@2: void WriteChar(TText aChar); williamr@2: void WriteChar32(TUint aChar); williamr@2: williamr@2: enum williamr@2: { williamr@2: EMaxInputBufferSize = 3, // no Unicode character can be encoded as more than 3 bytes williamr@2: EMaxOutputBufferSize = 2 // no byte can be expanded into more than 2 Unicode characters williamr@2: }; williamr@2: TUint8 iInputBuffer[EMaxInputBufferSize]; // buffer containing a group of compressed bytes representing williamr@2: // a single operation; when an input source ends in the williamr@2: // middle of an operation, this buffer enables the next williamr@2: // expansion to start in the correct state williamr@2: TInt iInputBufferStart; // next read position in the input buffer williamr@2: TInt iInputBufferSize; // bytes in the input buffer williamr@2: TUint16 iOutputBuffer[EMaxOutputBufferSize]; // circular buffer; queue of Unicode characters to be output williamr@2: TInt iOutputBufferStart; // position of first Unicode character to be output williamr@2: TInt iOutputBufferSize; // characters in the output buffer williamr@2: MUnicodeSink* iOutput; // output object williamr@2: RReadStream* iInputStream; // if non-null, input is from this stream williamr@2: const TUint8* iInputPointer; // if non-null, input is from memory williamr@2: }; williamr@2: williamr@2: // inline functions start here williamr@2: williamr@2: inline TMemoryUnicodeSource::TMemoryUnicodeSource(const TUint16* aPtr): williamr@2: iPtr(aPtr) williamr@2: { williamr@2: } williamr@2: williamr@2: inline TUint16 TMemoryUnicodeSource::ReadUnicodeValueL() williamr@2: { williamr@2: return *iPtr++; williamr@2: } williamr@2: williamr@2: inline TMemoryStreamUnicodeSource::TMemoryStreamUnicodeSource(RReadStream& aStream): williamr@2: iStream(aStream) williamr@2: { williamr@2: } williamr@2: williamr@2: inline TUint16 TMemoryStreamUnicodeSource::ReadUnicodeValueL() williamr@2: { williamr@2: TUint16 x; williamr@2: iStream.ReadL((TUint8*)&x,sizeof(TUint16)); williamr@2: return x; williamr@2: } williamr@2: williamr@2: inline TMemoryUnicodeSink::TMemoryUnicodeSink(TUint16* aPtr): williamr@2: iPtr(aPtr) williamr@2: { williamr@2: } williamr@2: williamr@2: inline void TMemoryUnicodeSink::WriteUnicodeValueL(TUint16 aValue) williamr@2: { williamr@2: *iPtr++ = aValue; williamr@2: } williamr@2: williamr@2: inline TMemoryStreamUnicodeSink::TMemoryStreamUnicodeSink(RWriteStream& aStream): williamr@2: iStream(aStream) williamr@2: { williamr@2: } williamr@2: williamr@2: inline void TMemoryStreamUnicodeSink::WriteUnicodeValueL(TUint16 aValue) williamr@2: { williamr@2: iStream.WriteL((TUint8*)&aValue,sizeof(TUint16)); williamr@2: } williamr@2: williamr@2: inline TUnicodeCompressor::TAction::TAction(): williamr@2: iCode(0), williamr@2: iTreatment(EPlainUnicode) williamr@2: { williamr@2: } williamr@2: williamr@2: #endif // _UNICODE williamr@2: williamr@2: #endif // __S32UCMP_H__