os/persistentdata/persistentstorage/store/pcstore/src/unicodecompression.h
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
     1 // Copyright (c) 2006-2009 Nokia Corporation and/or its subsidiary(-ies).
     2 // All rights reserved.
     3 // This component and the accompanying materials are made available
     4 // under the terms of "Eclipse Public License v1.0"
     5 // which accompanies this distribution, and is available
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
     7 //
     8 // Initial Contributors:
     9 // Nokia Corporation - initial contribution.
    10 //
    11 // Contributors:
    12 //
    13 // Description:
    14 // Classes defined in this file are used for Unicode compression and decompression.
    15 // Their code is borrowed from Symbian, only with some changes such as the "Panic" function 
    16 // is changed to exit the program. The Symbian coding standard will be kept in the code.
    17 // 
    18 //
    19 
    20 
    21 #if !defined(__UNICODECOMPRESSION_H__)
    22 #define __UNICODECOMPRESSION_H__
    23 
    24 #include <stdio.h>
    25 #include <pcstore/pcstoredef.h>
    26 #include <pcstore/storereadstream.h>
    27 #include <pcstore/storewritestream.h>
    28 #include "pcstoreconst.h"
    29 
    30 namespace PCStore
    31 {
    32 /**
    33 @internalComponent
    34 */
    35 class TUnicodeCompressionState
    36     {
    37     public:
    38     TUnicodeCompressionState();
    39     void Reset();
    40     static TInt StaticWindowIndex(TUint16 aCode);
    41     static TInt DynamicWindowOffsetIndex(TUint16 aCode);
    42     static TUint32 DynamicWindowBase(TInt aOffsetIndex);
    43     static TBool EncodeAsIs(TUint16 aCode);
    44 
    45     enum TPanic
    46         {
    47         EUnhandledByte,         // expander code fails to handle all possible byte codes
    48         ENotUnicode,            // expander can't handle Unicode values outside range 0x0..0x10FFFF;
    49                                 // that is, 16-bit codes plus 32-bit codes that can be expressed using
    50                                 // 16-bit surrogates
    51         EOutputBufferOverflow   // output buffer is not big enough
    52         };
    53 
    54     static void Panic(TPanic aPanic);
    55 
    56     protected:
    57 
    58     enum
    59         {
    60         EStaticWindows = 8,
    61         EDynamicWindows = 8,
    62         ESpecialBases = 7
    63         };
    64 
    65     TBool iUnicodeMode;                                 // TRUE if in Unicode mode as opposed to single-byte mode
    66     TUint32 iActiveWindowBase;                          // base of the active window - bases are 32-bit because they
    67                                                         // can be set to the surrogate area, which represents codes
    68                                                         // from 0x00010000 to 0x0010FFFF - planes 1-16 of ISO-10646.
    69     static const TUint32 iStaticWindow[EStaticWindows]; // bases of the static windows
    70     static const TUint32 iDynamicWindowDefault[EDynamicWindows];    // default bases of the dynamic windows
    71     static const TUint16 iSpecialBase[ESpecialBases];   // bases for window offsets F9..FF
    72 
    73     TUint32 iDynamicWindow[EDynamicWindows];            // bases of the dynamic windows
    74     TInt iUnicodeWords;                                 // Unicode words processed; read by compressor, written by expander
    75     TInt iMaxUnicodeWords;                              // maximum number of Unicode words to read or write
    76     TInt iCompressedBytes;                              // compressed bytes processed: read by expander, written by compressor
    77     TInt iMaxCompressedBytes;                           // maximum number of compressed bytes to read or write
    78     };
    79 
    80 /**
    81 @internalComponent
    82 */
    83 class MUnicodeSource
    84     {
    85     public:
    86     virtual TUint16 ReadUnicodeValueL() = 0;
    87     };
    88 
    89 /**
    90 @internalComponent
    91 
    92 A class to read Unicode values directly from memory.
    93 */
    94 class TMemoryUnicodeSource: public MUnicodeSource
    95     {
    96     public:
    97     inline TMemoryUnicodeSource(const TUint16* aPtr);
    98     inline TUint16 ReadUnicodeValueL();
    99 
   100     private:
   101     const TUint16* iPtr;
   102     };
   103 
   104 /**
   105 @internalComponent
   106  
   107 A class to read Unicode values from a stream built on a memory object.
   108 */
   109 class TMemoryStreamUnicodeSource: public MUnicodeSource
   110     {
   111     public:
   112     inline TMemoryStreamUnicodeSource(CStoreReadStream& aStream);
   113     inline TUint16 ReadUnicodeValueL();
   114 
   115     private:
   116     CStoreReadStream& iStream;
   117     };
   118 
   119 /**
   120 @internalComponent
   121  
   122 */
   123 class MUnicodeSink
   124     {
   125     public:
   126     virtual void WriteUnicodeValueL(TUint16 aValue) = 0;
   127     };
   128 
   129 /**
   130 @internalComponent
   131 
   132 A class to write Unicode values directly to memory.
   133 */
   134 class TMemoryUnicodeSink: public MUnicodeSink
   135     {
   136     public:
   137     inline TMemoryUnicodeSink(TUint16* aPtr);
   138     inline void WriteUnicodeValueL(TUint16 aValue);
   139 
   140     private:
   141     TUint16* iPtr;
   142     };
   143 
   144 /**
   145 @internalComponent
   146 
   147 A class to write Unicode values to a stream built on a memory object.
   148 */
   149 class TMemoryStreamUnicodeSink: public MUnicodeSink
   150     {
   151     public:
   152     inline TMemoryStreamUnicodeSink(CStoreWriteStream& aStream);
   153     inline void WriteUnicodeValueL(TUint16 aValue);
   154 
   155     private:
   156     CStoreWriteStream& iStream;
   157     };
   158 
   159 /**
   160 @internalComponent
   161  
   162 A class to hold functions to compress text using the Standard Compression Scheme for Unicode.
   163 */
   164 class TUnicodeCompressor: public TUnicodeCompressionState
   165     {
   166     public:
   167     TUnicodeCompressor();
   168     void CompressL(CStoreWriteStream& aOutput,MUnicodeSource& aInput,
   169                             TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt,
   170                             TInt* aOutputBytes = static_cast<TInt*>(NULL),TInt* aInputWords = static_cast<TInt*>(NULL));
   171     void CompressL(TUint8* aOutput,MUnicodeSource& aInput,
   172                             TInt aMaxOutputBytes = KMaxTInt,TInt aMaxInputWords = KMaxTInt,
   173                             TInt* aOutputBytes = static_cast<TInt*>(NULL),TInt* aInputWords = static_cast<TInt*>(NULL));
   174     TInt FlushL(CStoreWriteStream& aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes);
   175     TInt FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes);
   176     static TInt CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords);
   177 
   178     private:
   179 
   180      // A structure to store a character and its treatment code
   181     struct TAction
   182         {
   183         // Treatment codes: static and dynamic window numbers, plain ASCII or plain Unicode
   184         enum
   185             {
   186             EPlainUnicode = -2, // character cannot be expressed as ASCII or using static or dynamic windows
   187             EPlainASCII = -1,   // character can be emitted as an ASCII code
   188             EFirstDynamic = 0,  // values 0..255 are for dynamic windows with offsets at these places in the offset table
   189             ELastDynamic = 255,
   190             EFirstStatic = 256, // values 256..263 are for static windows 0..7
   191             ELastStatic = 263
   192             };
   193 
   194         inline TAction();
   195         TAction(TUint16 aCode);
   196 
   197         TUint16 iCode;      // Unicode value of the character
   198         TInt iTreatment;    // treatment code: see above
   199         };
   200 
   201     void DoCompressL(CStoreWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput,
   202                      TInt aMaxCompressedBytes,TInt aMaxUnicodeWords,
   203                      TInt* aCompressedBytes,TInt* aUnicodeWords);
   204     void FlushInputBufferL();
   205     void FlushOutputBufferL();
   206     void WriteRunL();
   207     void WriteCharacter(const TAction& aAction);
   208     void WriteSCharacter(const TAction& aAction);
   209     void WriteUCharacter(TUint16 aCode);
   210     void WriteByte(TUint aByte);
   211     void WriteCharacterFromBuffer();
   212     void SelectTreatment(TInt aTreatment);
   213 
   214     enum
   215         {
   216         EMaxInputBufferSize = 4,
   217         EMaxOutputBufferSize = EMaxInputBufferSize * 3  // no Unicode character can be encoded as more than three bytes
   218         };
   219     TAction iInputBuffer[EMaxInputBufferSize];          // circular buffer; queue of Unicode characters to be processed
   220     TInt iInputBufferStart;                             // position of first Unicode character to be processed
   221     TInt iInputBufferSize;                              // characters in the input buffer
   222     TUint8 iOutputBuffer[EMaxOutputBufferSize];         // circular buffer; queue of compressed bytes to be output
   223     TInt iOutputBufferStart;                            // position of first compressed byte to be output
   224     TInt iOutputBufferSize;                             // characters in the output buffer
   225     TInt iDynamicWindowIndex;                           // index of the current dynamic window
   226     CStoreWriteStream* iOutputStream;                        // if non-null, output is to this stream
   227     TUint8* iOutputPointer;                             // if non-null, output is to memory
   228     MUnicodeSource* iInput;                             // input object
   229     };
   230 
   231 /**
   232 @internalComponent
   233 
   234 A class to hold functions to expand text using the Standard Compression Scheme for Unicode.
   235 */
   236 class TUnicodeExpander: public TUnicodeCompressionState
   237     {
   238     public:
   239     TUnicodeExpander();
   240     void ExpandL(MUnicodeSink& aOutput,CStoreReadStream& aInput,
   241                           TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt,
   242                           TInt* aOutputWords = static_cast<TInt*>(NULL),TInt* aInputBytes = static_cast<TInt*>(NULL));
   243     void ExpandL(MUnicodeSink& aOutput,const TUint8* aInput,
   244                           TInt aMaxOutputWords = KMaxTInt,TInt aMaxInputBytes = KMaxTInt,
   245                           TInt* aOutputWords = static_cast<TInt*>(NULL),TInt* aInputBytes = static_cast<TInt*>(NULL));
   246     TInt FlushL(MUnicodeSink& aOutput,TInt aMaxOutputWords,TInt& aOutputWords);
   247     static TInt ExpandedSizeL(CStoreReadStream& aInput,TInt aInputBytes);
   248     static TInt ExpandedSizeL(const TUint8* aInput,TInt aInputBytes);
   249 
   250     private:
   251     void DoExpandL(MUnicodeSink* aOutput,CStoreReadStream* aInputStream,const TUint8* aInputPointer,
   252                    TInt aMaxOutputWords,TInt aMaxInputBytes,
   253                    TInt* aOutputWords,TInt* aInputBytes);
   254     void HandleByteL();
   255     void FlushOutputBufferL();
   256     TBool HandleSByteL(TUint8 aByte);
   257     TBool HandleUByteL(TUint8 aByte);
   258     TBool ReadByteL(TUint8& aByte);
   259     TBool QuoteUnicodeL();
   260     TBool DefineWindowL(TInt aIndex);
   261     TBool DefineExpansionWindowL();
   262     void WriteChar(TText aChar);
   263     void WriteChar32(TUint aChar);
   264 
   265     enum
   266         {
   267         EMaxInputBufferSize = 3,                        // no Unicode character can be encoded as more than 3 bytes
   268         EMaxOutputBufferSize = 2                        // no byte can be expanded into more than 2 Unicode characters
   269         };
   270     TUint8 iInputBuffer[EMaxInputBufferSize];           // buffer containing a group of compressed bytes representing
   271                                                         // a single operation; when an input source ends in the
   272                                                         // middle of an operation, this buffer enables the next
   273                                                         // expansion to start in the correct state
   274     TInt iInputBufferStart;                             // next read position in the input buffer
   275     TInt iInputBufferSize;                              // bytes in the input buffer
   276     TUint16 iOutputBuffer[EMaxOutputBufferSize];        // circular buffer; queue of Unicode characters to be output
   277     TInt iOutputBufferStart;                            // position of first Unicode character to be output
   278     TInt iOutputBufferSize;                             // characters in the output buffer
   279     MUnicodeSink* iOutput;                              // output object
   280     CStoreReadStream* iInputStream;                          // if non-null, input is from this stream
   281     const TUint8* iInputPointer;                        // if non-null, input is from memory
   282     };
   283 
   284 // inline functions start here
   285 
   286 inline TMemoryUnicodeSource::TMemoryUnicodeSource(const TUint16* aPtr):
   287     iPtr(aPtr)
   288     {
   289     }
   290 
   291 inline TUint16 TMemoryUnicodeSource::ReadUnicodeValueL()
   292     {
   293     return *iPtr++;
   294     }
   295 
   296 inline TMemoryStreamUnicodeSource::TMemoryStreamUnicodeSource(CStoreReadStream& aStream):
   297     iStream(aStream)
   298     {
   299     }
   300 
   301 inline TUint16 TMemoryStreamUnicodeSource::ReadUnicodeValueL()
   302     {
   303     TUint16 x;
   304     iStream.Read(reinterpret_cast<TUint8*>(&x),sizeof(TUint16));
   305     return x;
   306     }
   307 
   308 inline TMemoryUnicodeSink::TMemoryUnicodeSink(TUint16* aPtr):
   309     iPtr(aPtr)
   310     {
   311     }
   312 
   313 inline void TMemoryUnicodeSink::WriteUnicodeValueL(TUint16 aValue)
   314     {
   315     *iPtr++ = aValue;
   316     }
   317 
   318 inline TMemoryStreamUnicodeSink::TMemoryStreamUnicodeSink(CStoreWriteStream& aStream):
   319     iStream(aStream)
   320     {
   321     }
   322 
   323 inline void TMemoryStreamUnicodeSink::WriteUnicodeValueL(TUint16 aValue)
   324     {
   325     iStream.Write(reinterpret_cast<TUint8*>(&aValue),sizeof(TUint16));
   326     }
   327 
   328 inline TUnicodeCompressor::TAction::TAction():
   329     iCode(0),
   330     iTreatment(EPlainUnicode)
   331     {
   332     }
   333 }
   334 #endif // !defined(__UNICODECOMPRESSION_H__)