os/persistentdata/persistentstorage/dbms/pcdbms/ustor/US_COMP.CPP
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 // Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies).
     2 // All rights reserved.
     3 // This component and the accompanying materials are made available
     4 // under the terms of "Eclipse Public License v1.0"
     5 // which accompanies this distribution, and is available
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
     7 //
     8 // Initial Contributors:
     9 // Nokia Corporation - initial contribution.
    10 //
    11 // Contributors:
    12 //
    13 // Description:
    14 // Store database compression code
    15 // 
    16 //
    17 
    18 #include "US_STD.H"
    19 #include <s32mem.h>
    20 #include "U32STD_DBMS.H"
    21 
    22 #ifdef __WINS__
    23 #define __EXTRA_DEFLATE
    24 #endif
    25 
    26 // deflation constants
    27 const TInt KDeflateMinLength=3;
    28 const TInt KDeflateMaxLength=258;
    29 const TInt KDeflateMaxDistance=4096;
    30 const TInt KDeflateDistCodeBase=0x200;
    31 // huffman coding/decoding
    32 const TInt KHuffMaxCodeLength=25;
    33 const TInt KHuffTerminate=1;
    34 const TUint KBitsEmpty=0x80000000u;
    35 const TUint KBitsInit=KBitsEmpty>>1;
    36 const TUint KBitsFull=KBitsEmpty>>8;
    37 const TUint KBitsEOF=KBitsEmpty>>9;
    38 const TUint KBitsNext=0x80u;
    39 // encoding storage
    40 const TInt KDeflateMetaCodes=26;
    41 // hashing
    42 const TUint KDeflateHashMultiplier=0xAC4B9B19u;
    43 const TInt KDeflateHashShift=24;
    44 
    45 class Huffman
    46 	{
    47 public:
    48 	static void EncodingL(TUint32* aEncoding,TInt aCodes);
    49 	static void Decoding(TUint32* aDecoding,TInt aCodes,TInt aBase=0);
    50 private:
    51 	typedef TUint16 THuff;
    52 	enum {KLeaf=0x8000};
    53 	struct TNode
    54 		{
    55 		THuff iLeft;
    56 		THuff iRight;
    57 		};
    58 	struct TLeaf
    59 		{
    60 		TUint iCount;
    61 		THuff iVal;
    62 		};
    63 private:
    64 	static void Lengths(TUint32* aLengths,const TNode* aNodes,TInt aNode,TInt aLen);
    65 	static TUint32* SubTree(TUint32* aPtr,const TUint32* aValue,TUint32** aLevel);
    66 	};
    67 
    68 class THuffEncoder
    69 	{
    70 public:
    71 	THuffEncoder(RWriteStream& aStream);
    72 //
    73 	void EncodeL(TUint aCode,TInt aLength);
    74 	void EncodeL(TUint32 aHuffCode);
    75 	void CompleteL();
    76 private:
    77 	enum {EBufSize=0x100};
    78 private:
    79 	TUint8 iBuf[EBufSize];
    80 	RWriteStream& iStream;
    81 	TUint32 iCode;		// code in production
    82 	TInt iBits;
    83 	TUint8* iWrite;
    84 	};
    85 
    86 class HDeflateHash
    87 	{
    88 public:
    89 	inline static HDeflateHash& NewLC(TInt aLinks);
    90 //
    91 	inline TInt First(const TUint8* aPtr,TInt aPos);
    92 	inline TInt Next(TInt aPos,TInt aOffset) const;
    93 private:
    94 	inline HDeflateHash();
    95 	inline static TInt Hash(const TUint8* aPtr);
    96 private:
    97 	typedef TUint16 TOffset;
    98 private:
    99 	TInt iHash[256];
   100 	TOffset iOffset[1];	// or more
   101 	};
   102 
   103 class MDeflater
   104 	{
   105 public:
   106 	void DeflateL(const TUint8* aBase,TInt aLength);
   107 private:
   108 	const TUint8* DoDeflateL(const TUint8* aBase,const TUint8* aEnd,HDeflateHash& aHash);
   109 	static TInt Match(const TUint8* aPtr,const TUint8* aEnd,TInt aPos,HDeflateHash& aHas);
   110 	void SegmentL(TInt aLength,TInt aDistance);
   111 	virtual void LitLenL(TInt aCode) =0;
   112 	virtual void OffsetL(TInt aCode) =0;
   113 	virtual void ExtraL(TInt aLen,TUint aBits) =0;
   114 	};
   115 
   116 class TInflater
   117 	{
   118 public:
   119 	TInflater(const TUint8* aIn,const CDbStoreCompression::TEncoding& aDecoding);
   120 	TInt Inflate();
   121 	inline const TUint8* Ptr() const;
   122 	inline static TInt BufferSize();
   123 private:
   124 	const TUint8* iIn;
   125 	TUint iBits;
   126 	const TUint8* iRptr;			// partial segment
   127 	TInt iLen;
   128 	const TUint32* iLitLenTree;
   129 	const TUint32* iDistTree;
   130 	TUint8 iOut[KDeflateMaxDistance];	// circular buffer for distance matches
   131 	};
   132 
   133 NONSHARABLE_CLASS(TDeflateStats) : public MDeflater
   134 	{
   135 public:
   136 	inline TDeflateStats(CDbStoreCompression::TEncoding& aEncoding);
   137 private:
   138 // from MDeflater
   139 	void LitLenL(TInt aCode);
   140 	void OffsetL(TInt aCode);
   141 	void ExtraL(TInt aLen,TUint aBits);
   142 private:
   143 	CDbStoreCompression::TEncoding& iEncoding;
   144 	};
   145 
   146 NONSHARABLE_CLASS(TDeflater) : public MDeflater
   147 	{
   148 public:
   149 	inline TDeflater(THuffEncoder& aEncoder,const CDbStoreCompression::TEncoding& aEncoding);
   150 private:
   151 // from MDeflater
   152 	void LitLenL(TInt aCode);
   153 	void OffsetL(TInt aCode);
   154 	void ExtraL(TInt aLen,TUint aBits);
   155 private:
   156 	THuffEncoder& iEncoder;
   157 	const CDbStoreCompression::TEncoding& iEncoding;
   158 	};
   159 
   160 NONSHARABLE_CLASS(HDeflateBuf) : public TBufBuf
   161 	{
   162 public:
   163 	enum TMode {EAnalysis,EDeflate};	// mirror CDbStoreCompression enum
   164 public:
   165 	static HDeflateBuf* NewL(MStreamBuf* aHost,CDbStoreCompression::TEncoding& aEncoding,TMode aMode);
   166 private:
   167 	inline HDeflateBuf(MStreamBuf* aHost,CDbStoreCompression::TEncoding& aEncoding,CBufBase* aBuf,TMode aMode);
   168 	virtual inline ~HDeflateBuf();
   169 // from MStreamBuf
   170 	void DoSynchL();
   171 	void DoRelease();
   172 private:
   173 	RWriteStream iHost;
   174 	CDbStoreCompression::TEncoding& iEncoding;
   175 	CBufBase* iBuf;
   176 	TMode iMode;
   177 	};
   178 
   179 NONSHARABLE_CLASS(HInflateBuf) : public TBufBuf
   180 	{
   181 public:
   182 	static HInflateBuf* NewL(MStreamBuf* aHost,const CDbStoreCompression::TEncoding& aEncoding);
   183 private:
   184 	inline HInflateBuf(CBufBase* aBuf);
   185 	virtual inline ~HInflateBuf();
   186 // from MStreamBuf
   187 	void DoRelease();
   188 private:
   189 	CBufBase* iBuf;
   190 	};
   191 
   192 NONSHARABLE_CLASS(CDbStoreTable::CCompressor) : public CBase, public CCluster::MAlter
   193 	{
   194 public:
   195 	inline CCompressor();
   196 	~CCompressor();
   197 	void ProcessL(CDbStoreTable* aTable);
   198 private:
   199 	TUint8* AlterRecordL(TUint8* aWPtr,const TUint8* aRPtr,TInt aLength);
   200 private:
   201 	CDbStoreTable* iTable;
   202 	RDbRow iRow;
   203 	};
   204 
   205 // Class Huffman
   206 //
   207 // This class builds a huffman encoding from a frequency table and builds
   208 // a decoding tree from a code-lengths table
   209 //
   210 // the encoding generated is based on the rule that given two symbols s1 and s2, with 
   211 // code length l1 and l2, and huffman codes h1 and h2:
   212 //
   213 // if l1<l2 then h1<h2 when compared lexicographically
   214 // if l1==l2 and s1<s2 then h1<h2 ditto
   215 //
   216 // This allows the encoding to be stored compactly as a table of code lengths
   217 
   218 //
   219 // recursive function to calculate the code lengths from the node tree
   220 //
   221 void Huffman::Lengths(TUint32* aLengths,const TNode* aNodes,TInt aNode,TInt aLen)
   222 	{
   223 	__ASSERT(aLen<KHuffMaxCodeLength);
   224 	++aLen;
   225 	const TNode& node=aNodes[aNode];
   226 	if (node.iLeft&KLeaf)
   227 		aLengths[node.iLeft&~KLeaf]=aLen;
   228 	else
   229 		Lengths(aLengths,aNodes,node.iLeft,aLen);
   230 	if (node.iRight&KLeaf)
   231 		aLengths[node.iRight&~KLeaf]=aLen;
   232 	else
   233 		Lengths(aLengths,aNodes,node.iRight,aLen);
   234 	}
   235 
   236 //
   237 // write the subtree below aPtr and return the head
   238 //
   239 TUint32* Huffman::SubTree(TUint32* aPtr,const TUint32* aValue,TUint32** aLevel)
   240 	{
   241 	TUint32* l=*aLevel++;
   242 	if (l>aValue)
   243 		{
   244 		TUint32* sub1=SubTree(aPtr,aValue,aLevel);	// 0-tree first
   245 		aPtr=SubTree(sub1,aValue-(aPtr-sub1)-1,aLevel);			// 1-tree
   246 		TInt branch=(TUint8*)sub1-(TUint8*)aPtr;
   247 		*--aPtr=branch;
   248 		}
   249 	else if (l==aValue)
   250 		{
   251 		TUint term0=*aValue--;						// 0-term
   252 		aPtr=SubTree(aPtr,aValue,aLevel);			// 1-tree
   253 		*--aPtr=term0>>16;
   254 		}
   255 	else	// l<iNext
   256 		{
   257 		TUint term0=*aValue--;						// 0-term
   258 		TUint term1=*aValue--;
   259 		*--aPtr=(term1>>16<<16)|(term0>>16);
   260 		}
   261 	return aPtr;
   262 	}
   263 
   264 //
   265 // Build a huffman encoding table from a symbol frequency table
   266 // aTable contains frequency data on input for aCodes symbols
   267 // aTable contains the huffman encoding on output
   268 //
   269 void Huffman::EncodingL(TUint32* aTable,TInt aCodes)
   270 	{
   271 //
   272 // step 1. Sort the values into decreasing order of frequency
   273 //
   274 	TLeaf* leaves=new(ELeave) TLeaf[aCodes];
   275 	CleanupArrayDeletePushL(leaves);
   276 	TInt lCount=0;
   277 	TInt ii;
   278 	for (ii=0;ii<aCodes;++ii)
   279 		{
   280 		TUint c=aTable[ii];
   281 		if (c==0)
   282 			continue;	// no coding for ii
   283 		TInt jj;
   284 		for (jj=0;jj<lCount;++jj)
   285 			{
   286 			if (leaves[jj].iCount<=c)
   287 				break;
   288 			}
   289 		Mem::Move(leaves+jj+1,leaves+jj,sizeof(TLeaf)*(lCount-jj));
   290 		leaves[jj].iCount=c;
   291 		leaves[jj].iVal=THuff(ii|KLeaf);
   292 		lCount++;
   293 		}
   294 //
   295 // Huffman algorithm: pair off least frequent nodes and reorder
   296 // result is the code lengths in aTable[]
   297 //
   298 	if (lCount==1)	// special case for a single value (always encode as "0")
   299 		aTable[leaves[0].iVal&~KLeaf]=1;
   300 	else if (lCount>1)
   301 		{	//	don't encode for empty coding: leaves in order now
   302 		TInt max=0;
   303 		TNode* nodes=new(ELeave) TNode[lCount-1];
   304 		while (--lCount>0)
   305 			{
   306 			TNode& node=nodes[max];
   307 			node.iLeft=leaves[lCount-1].iVal;
   308 			node.iRight=leaves[lCount].iVal;
   309 			// re-order the leaves now to reflect new combined frequency
   310 			TUint c=leaves[lCount-1].iCount+leaves[lCount].iCount;
   311 			TInt jj=lCount;
   312 			while (--jj>0)
   313 				{
   314 				if (leaves[jj-1].iCount>=c)
   315 					break;
   316 				}
   317 			Mem::Move(leaves+jj+1,leaves+jj,sizeof(TLeaf)*(lCount-1-jj));
   318 			// update new leaf
   319 			leaves[jj].iCount=c;
   320 			leaves[jj].iVal=THuff(max);
   321 			max++;
   322 			}
   323 		Lengths(aTable,nodes,leaves[0].iVal,0);
   324 		delete[] nodes;
   325 		}
   326 	CleanupStack::PopAndDestroy();		// leaves
   327 //
   328 // step 3: Generate the coding based on the code lengths
   329 //
   330 	TInt lenCount[KHuffMaxCodeLength];
   331 	Mem::FillZ(lenCount,sizeof(lenCount));
   332 
   333 	for (ii=aCodes;--ii>=0;)
   334 		{
   335 		TInt len=aTable[ii]-1;
   336 		if (len>=0)
   337 			++lenCount[len];
   338 		}
   339 
   340 	TUint nextCode[KHuffMaxCodeLength];
   341 	TUint code=0;
   342 	for (ii=0;ii<KHuffMaxCodeLength;++ii)
   343 		{
   344 		nextCode[ii]=code;
   345 		code=(code+lenCount[ii])<<1;
   346 		}
   347 
   348 	for (ii=0;ii<aCodes;++ii)
   349 		{
   350 		TInt len=aTable[ii];
   351 		if (len)
   352 			{
   353 			aTable[ii] = (nextCode[len-1]<<(KHuffMaxCodeLength-len))|(len<<KHuffMaxCodeLength);
   354 			++nextCode[len-1];
   355 			}
   356 		}
   357 	}
   358 
   359 //
   360 // generate the decoding tree from the huffman code length data
   361 // output alphabet is [aBase,aBase+aCodes)
   362 //
   363 void Huffman::Decoding(TUint32* aDecoding,TInt aCodes,TInt aBase)
   364 	{
   365 	TInt counts[KHuffMaxCodeLength];
   366 	Mem::FillZ(counts,sizeof(counts));
   367 	TInt codes=0;
   368 	TInt ii=aCodes;
   369 	while (--ii>=0)
   370 		{
   371 		TUint len=aDecoding[ii];
   372 		__ASSERT(len<=(TUint)KHuffMaxCodeLength);
   373 		if (len)
   374 			{
   375 			++counts[len-1];
   376 			++codes;
   377 			}
   378 		}
   379 //
   380 	TUint32* level[KHuffMaxCodeLength];
   381 	TUint32* lit=aDecoding+codes;
   382 	for (ii=0;ii<KHuffMaxCodeLength;++ii)
   383 		{
   384 		level[ii]=lit;
   385 		lit-=counts[ii];
   386 		}
   387 	aBase=(aBase<<17)+(KHuffTerminate<<16);
   388 	for (ii=0;ii<aCodes;++ii)
   389 		{
   390 		TUint len=TUint8(aDecoding[ii]);
   391 		if (len)
   392 			*--level[len-1]|=(ii<<17)+aBase;
   393 		}
   394 	if (codes==1)	// codes==1 special case: tree is not complete
   395 		*aDecoding>>=16;	// 0-terminate at root
   396 	else if (codes>1)
   397 		{
   398 		__DEBUG(TUint32* p =)SubTree(aDecoding+codes-1,aDecoding+codes-1,level);
   399 		__ASSERT(p==aDecoding);
   400 		}
   401 	}
   402 
   403 // Class HDeflateHash
   404 
   405 inline HDeflateHash::HDeflateHash()
   406 	{TInt* p=iHash+256;do *--p=-KDeflateMaxDistance-1; while (p>iHash);}
   407 
   408 inline HDeflateHash& HDeflateHash::NewLC(TInt aLinks)
   409 	{
   410 	__ASSERT(!(KDeflateMaxDistance&(KDeflateMaxDistance-1)));	// ensure power of two
   411 	return *new(User::AllocLC(_FOFF(HDeflateHash,iOffset[Min(aLinks,KDeflateMaxDistance)]))) HDeflateHash;
   412 	}
   413 
   414 inline TInt HDeflateHash::Hash(const TUint8* aPtr)
   415 	{
   416 	TUint x=aPtr[0]|(aPtr[1]<<8)|(aPtr[2]<<16);
   417 	return (x*KDeflateHashMultiplier)>>KDeflateHashShift;
   418 	}
   419 
   420 inline TInt HDeflateHash::First(const TUint8* aPtr,TInt aPos)
   421 	{
   422 	TInt h=Hash(aPtr);
   423 	TInt offset=Min(aPos-iHash[h],KDeflateMaxDistance<<1);
   424 	iHash[h]=aPos;
   425 	iOffset[aPos&(KDeflateMaxDistance-1)]=TOffset(offset);
   426 	return offset;
   427 	}
   428 
   429 inline TInt HDeflateHash::Next(TInt aPos,TInt aOffset) const
   430 	{return aOffset+iOffset[(aPos-aOffset)&(KDeflateMaxDistance-1)];}
   431 
   432 
   433 // Class TDeflater
   434 //
   435 // generic deflation algorithm, can do either statistics and the encoder
   436 
   437 TInt MDeflater::Match(const TUint8* aPtr,const TUint8* aEnd,TInt aPos,HDeflateHash& aHash)
   438 	{
   439 	TInt offset=aHash.First(aPtr,aPos);
   440 	if (offset>KDeflateMaxDistance)
   441 		return 0;
   442 	TInt match=0;
   443 	aEnd=Min(aEnd,aPtr+KDeflateMaxLength);
   444 	TUint8 c=*aPtr;
   445 	do
   446 		{
   447 		const TUint8* p=aPtr-offset;
   448 		if (p[match>>16]==c)
   449 			{	// might be a better match
   450 			const TUint8* m=aPtr;
   451 			for (;;)
   452 				{
   453 				if (*p++!=*m++)
   454 					break;
   455 				if (m<aEnd)
   456 					continue;
   457 				return ((m-aPtr)<<16)|offset;
   458 				}
   459 			TInt l=m-aPtr-1;
   460 			if (l>match>>16)
   461 				{
   462 				match=(l<<16)|offset;
   463 				c=m[-1];
   464 				}
   465 			}
   466 		offset=aHash.Next(aPos,offset);
   467 		} while (offset<=KDeflateMaxDistance);
   468 	return match;
   469 	}
   470 
   471 //
   472 // Apply the deflation algorithm to the data [aBase,aEnd)
   473 // Return a pointer after the last byte that was deflated (which may not be aEnd)
   474 //
   475 const TUint8* MDeflater::DoDeflateL(const TUint8* aBase,const TUint8* aEnd,HDeflateHash& aHash)
   476 	{
   477 	__ASSERT(aEnd-aBase>KDeflateMinLength);
   478 //
   479 	const TUint8* ptr=aBase;
   480 #ifdef __EXTRA_DEFLATE
   481 	TInt prev=0;		// the previous deflation match
   482 #endif
   483 	do
   484 		{
   485 		TInt match=Match(ptr,aEnd,ptr-aBase,aHash);
   486 #ifdef __EXTRA_DEFLATE
   487 // Extra deflation applies two optimisations which double the time taken
   488 // 1. If we have a match at p, then test for a better match at p+1 before using it
   489 // 2. When we have a match, add the hash links for all the data which will be skipped 
   490 		if (match>>16 < prev>>16)
   491 			{	// use the previous match--it was better
   492 			TInt len=prev>>16;
   493 			SegmentL(len,prev-(len<<16));
   494 			// fill in missing hash entries for better compression
   495 			const TUint8* e=ptr+len-2;
   496 			do
   497 				{
   498 				++ptr;
   499 				aHash.First(ptr,ptr-aBase);
   500 				} while (ptr<e);
   501 			prev=0;
   502 			}
   503 		else if (match<=(KDeflateMinLength<<16))
   504 			LitLenL(*ptr);			// no deflation match here
   505 		else
   506 			{	// save this match and test the next position
   507 			if (prev)	// we had a match at ptr-1, but this is better
   508 				LitLenL(ptr[-1]);
   509 			prev=match;
   510 			}
   511 		++ptr;
   512 #else
   513 // Basic deflation will store any match found, and not update the hash links for the
   514 // data which is skipped
   515 		if (match<=(KDeflateMinLength<<16))		// no match
   516 			LitLenL(*ptr++);
   517 		else
   518 			{	// store the match
   519 			TInt len=match>>16;
   520 			SegmentL(len,match-(len<<16));
   521 			ptr+=len;
   522 			}
   523 #endif
   524 		} while (ptr+KDeflateMinLength-1<aEnd);
   525 #ifdef __EXTRA_DEFLATE
   526 	if (prev)
   527 		{		// emit the stored match
   528 		TInt len=prev>>16;
   529 		SegmentL(len,prev-(len<<16));
   530 		ptr+=len-1;
   531 		}
   532 #endif
   533 	return ptr;
   534 	}
   535 
   536 //
   537 // The generic deflation algorithm
   538 //
   539 void MDeflater::DeflateL(const TUint8* aBase,TInt aLength)
   540 	{
   541 	const TUint8* end=aBase+aLength;
   542 	if (aLength>KDeflateMinLength)
   543 		{	// deflation kicks in if there is enough data
   544 		HDeflateHash& hash=HDeflateHash::NewLC(aLength);
   545 		aBase=DoDeflateL(aBase,end,hash);
   546 		CleanupStack::PopAndDestroy();
   547 		}
   548 	while (aBase<end)					// emit remaining bytes
   549 		LitLenL(*aBase++);
   550 	LitLenL(CDbStoreCompression::TEncoding::EEos);	// eos marker
   551 	}
   552 
   553 //
   554 // Turn a (length,offset) pair into the deflation codes+extra bits before calling
   555 // the specific LitLen(), Offset() and Extra() functions.
   556 //
   557 void MDeflater::SegmentL(TInt aLength,TInt aDistance)
   558 	{
   559 	__ASSERT(aLength>=KDeflateMinLength && aLength<=KDeflateMaxLength);
   560 	aLength-=KDeflateMinLength;
   561 	TInt extralen=0;
   562 	TUint len=aLength;
   563 	while (len>=8)
   564 		{
   565 		++extralen;
   566 		len>>=1;
   567 		}
   568 	__ASSERT((extralen<<2)+len<CDbStoreCompression::TEncoding::ELengths);
   569 	LitLenL((extralen<<2)+len+CDbStoreCompression::TEncoding::ELiterals);
   570 	if (extralen)
   571 		ExtraL(extralen,TUint(aLength)<<(32-extralen));
   572 //
   573 	__ASSERT(aDistance>0 && aDistance<=KDeflateMaxDistance);
   574 	aDistance--;
   575 	extralen=0;
   576 	TUint dist=aDistance;
   577 	while (dist>=8)
   578 		{
   579 		++extralen;
   580 		dist>>=1;
   581 		}
   582 	__ASSERT((extralen<<2)+dist<CDbStoreCompression::TEncoding::EDistances);
   583 	OffsetL((extralen<<2)+dist);
   584 	if (extralen)
   585 		ExtraL(extralen,TUint(aDistance)<<(32-extralen));
   586 	}
   587 
   588 // Class TDeflateStats
   589 //
   590 // This class analyses the data stream to generate the frequency tables 
   591 // for the deflation algorithm
   592 
   593 inline TDeflateStats::TDeflateStats(CDbStoreCompression::TEncoding& aEncoding)
   594 	:iEncoding(aEncoding)
   595 	{}
   596 
   597 void TDeflateStats::LitLenL(TInt aCode)
   598 	{
   599 	++iEncoding.iLitLen[aCode];
   600 	}
   601 
   602 void TDeflateStats::OffsetL(TInt aCode)
   603 	{
   604 	++iEncoding.iDistance[aCode];
   605 	}
   606 
   607 void TDeflateStats::ExtraL(TInt,TUint)
   608 	{}
   609 
   610 // Class THuffEncoder
   611 //
   612 // This class generates the byte stream of huffman codes, writing them out to the stream
   613 
   614 THuffEncoder::THuffEncoder(RWriteStream& aStream)
   615 	:iStream(aStream),iCode(0),iBits(-8),iWrite(iBuf)
   616 	{}
   617 
   618 //
   619 // Store a huffman code generated by Huffman::EncodingL()
   620 //
   621 void THuffEncoder::EncodeL(TUint32 aHuffCode)
   622 	{
   623 	EncodeL(aHuffCode<<(32-KHuffMaxCodeLength),aHuffCode>>KHuffMaxCodeLength);
   624 	}
   625 
   626 //
   627 // Store aLength bits from the most significant bits of aCode
   628 //
   629 void THuffEncoder::EncodeL(TUint aCode,TInt aLength)
   630 	{
   631 	TInt bits=iBits;
   632 	TUint code=iCode|(aCode>>(bits+8));
   633 	bits+=aLength;
   634 	if (bits>=0)
   635 		{
   636 		TUint8* write=iWrite;
   637 		do
   638 			{
   639 			if (write-EBufSize==iBuf)
   640 				{
   641 				iStream.WriteL(iBuf,EBufSize);
   642 				write=iBuf;
   643 				}
   644 			*write++=TUint8(code>>24);
   645 			code<<=8;
   646 			bits-=8;
   647 			} while (bits>=0);
   648 		iWrite=write;
   649 		}
   650 	iCode=code;
   651 	iBits=bits;
   652 	}
   653 
   654 //
   655 // Terminate the huffman coding. The longest code is always 1111111111
   656 //
   657 void THuffEncoder::CompleteL()
   658 	{
   659 	if (iBits>-8)
   660 		EncodeL(0xffffffffu,-iBits);
   661 	if (iWrite>iBuf)
   662 		iStream.WriteL(iBuf,iWrite-iBuf);
   663 	}
   664 
   665 // Class TDeflater
   666 //
   667 // Extends MDeflater to provide huffman encoding of the output
   668 
   669 //
   670 // construct for encoding
   671 //
   672 inline TDeflater::TDeflater(THuffEncoder& aEncoder,const CDbStoreCompression::TEncoding& aEncoding)
   673 	:iEncoder(aEncoder),iEncoding(aEncoding)
   674 	{}
   675 
   676 void TDeflater::LitLenL(TInt aCode)
   677 	{
   678 	iEncoder.EncodeL(iEncoding.iLitLen[aCode]);
   679 	}
   680 
   681 void TDeflater::OffsetL(TInt aCode)
   682 	{
   683 	iEncoder.EncodeL(iEncoding.iDistance[aCode]);
   684 	}
   685 
   686 void TDeflater::ExtraL(TInt aLen,TUint aBits)
   687 	{
   688 	iEncoder.EncodeL(aBits,aLen);
   689 	}
   690 
   691 // Class TInflater
   692 //
   693 // The inflation algorithm, complete with huffman decoding
   694 
   695 TInflater::TInflater(const TUint8* aIn,const CDbStoreCompression::TEncoding& aEncoding)
   696 	:iIn(aIn),iBits(KBitsInit),iLen(0),iLitLenTree(aEncoding.iLitLen),iDistTree(aEncoding.iDistance)
   697 	{}
   698 
   699 //
   700 // consume all data lag in the history buffer, then decode to fill up the output buffer
   701 //
   702 TInt TInflater::Inflate()
   703 	{
   704 // empty the history buffer into the output
   705 	const TUint8* data=iIn;
   706 	TUint bits=iBits;
   707 	const TUint8* from=iRptr;
   708 	TInt len=iLen;
   709 	TUint8* out=iOut;
   710 	TUint8* const end=out+KDeflateMaxDistance;
   711 	const TUint32* node;
   712 	if (len)
   713 		goto useHistory;
   714 //
   715 	if (bits&KBitsEOF)
   716 		return 0;
   717 //
   718 	node=iLitLenTree;
   719 	while (out<end)
   720 		{
   721 		// get a huffman code
   722 		{
   723 		TUint huff;
   724 		for (;;)
   725 			{
   726 			huff=*node++;
   727 			if ((bits<<=1)&KBitsEmpty)
   728 				bits=*data++|KBitsFull;
   729 			if (bits&KBitsNext)
   730 				{
   731 				if (huff&(KHuffTerminate<<16))
   732 					break;
   733 				}
   734 			else
   735 				{
   736 				if (huff&KHuffTerminate)
   737 					{
   738 					huff<<=16;
   739 					break;
   740 					}
   741 				else
   742 					node=PtrAdd(node,huff);
   743 				}
   744 			}
   745 		TInt val=TInt(huff>>17)-CDbStoreCompression::TEncoding::ELiterals;
   746 		if (val<0)
   747 			{
   748 			*out++=TUint8(val);
   749 			node=iLitLenTree;
   750 			continue;			// another literal/length combo
   751 			}
   752 		if (val==CDbStoreCompression::TEncoding::EEos-CDbStoreCompression::TEncoding::ELiterals)
   753 			{	// eos marker. we're done
   754 			bits=KBitsEOF;
   755 			break;
   756 			}
   757 		// get the extra bits for the code
   758 		TInt code=val&0xff;
   759 		if (code>=8)
   760 			{	// xtra bits
   761 			TInt xtra=(code>>2)-1;
   762 			code-=xtra<<2;
   763 			do
   764 				{
   765 				if ((bits<<=1)&KBitsEmpty)
   766 					bits=*data++|KBitsFull;
   767 				code<<=1;
   768 				if (bits&KBitsNext)
   769 					code|=1;
   770 				} while (--xtra!=0);
   771 			}
   772 		if (val<KDeflateDistCodeBase-CDbStoreCompression::TEncoding::ELiterals)
   773 			{	// length code... get the code
   774 			len=code+KDeflateMinLength;
   775 			__ASSERT(len<=KDeflateMaxLength);
   776 			node=iDistTree;
   777 			continue;			// read the huffman code
   778 			}
   779 		// distance code
   780 		__ASSERT(code<KDeflateMaxDistance);
   781 		from=out-(code+1);
   782 		if (from+KDeflateMaxDistance<end)
   783 			from+=KDeflateMaxDistance;
   784 		}
   785 useHistory:
   786 		TInt tfr=Min(end-out,len);
   787 		len-=tfr;
   788 		do
   789 			{
   790 			*out++=*from++;
   791 			if (from==end)
   792 				from-=KDeflateMaxDistance;
   793 			} while (--tfr!=0);
   794 		node=iLitLenTree;
   795 		};
   796 	iIn=data;
   797 	iBits=bits;
   798 	iRptr=from;
   799 	iLen=len;
   800 	return out-iOut;
   801 	}
   802 
   803 inline const TUint8* TInflater::Ptr() const
   804 	{return iOut;}
   805 inline TInt TInflater::BufferSize()
   806 	{return KDeflateMaxDistance;}
   807 
   808 // Class HDeflateBuf
   809 //
   810 // This stream buffer applies the analysis or deflation and huffman coding
   811 // on the entire stream data when it is committed
   812 
   813 inline HDeflateBuf::HDeflateBuf(MStreamBuf* aHost,CDbStoreCompression::TEncoding& aEncoding,CBufBase* aBuf,TMode aMode)
   814 	:iHost(aHost),iEncoding(aEncoding),iBuf(aBuf),iMode(aMode)
   815 	{Set(*aBuf,0);}
   816 
   817 HDeflateBuf* HDeflateBuf::NewL(MStreamBuf* aHost,CDbStoreCompression::TEncoding& aEncoding,TMode aMode)
   818 	{
   819 	CBufBase* buf=CBufFlat::NewL(512);
   820 	CleanupStack::PushL(buf);
   821 	HDeflateBuf* self=new(ELeave) HDeflateBuf(aHost,aEncoding,buf,aMode);
   822 	CleanupStack::Pop();
   823 	return self;
   824 	}
   825 
   826 inline HDeflateBuf::~HDeflateBuf()
   827 	{delete iBuf;iHost.Release();}
   828 
   829 void HDeflateBuf::DoRelease()
   830 	{
   831 	delete this;
   832 	}
   833 
   834 //
   835 // This is where it all happens
   836 //
   837 void HDeflateBuf::DoSynchL()
   838 	{
   839 	if (iMode==EAnalysis)
   840 		{
   841 		TDeflateStats deflater(iEncoding);
   842 		deflater.DeflateL(iBuf->Ptr(0).Ptr(),iBuf->Size());
   843 		}
   844 	else
   845 		{
   846 		THuffEncoder encoder(iHost);
   847 		TDeflater deflater(encoder,iEncoding);
   848 		deflater.DeflateL(iBuf->Ptr(0).Ptr(),iBuf->Size());
   849 		encoder.CompleteL();
   850 		iHost.CommitL();
   851 		}
   852 	}
   853 
   854 // Class HInflateBuf
   855 //
   856 // Inflate the input stream. This is not a filter, it reads all the input, inflates it and
   857 // keeps it in a memory buffer.
   858 
   859 const TInt KInflateBufSize=0x800;	// 2K
   860 
   861 HInflateBuf::HInflateBuf(CBufBase* aBuf)
   862 	:iBuf(aBuf)
   863 	{
   864 	Set(*aBuf,0,ERead);
   865 	}
   866 
   867 inline HInflateBuf::~HInflateBuf()
   868 	{delete iBuf;}
   869 
   870 void HInflateBuf::DoRelease()
   871 	{
   872 	delete this;
   873 	}
   874 
   875 HInflateBuf* HInflateBuf::NewL(MStreamBuf* aHost,const CDbStoreCompression::TEncoding& aEncoding)
   876 	{
   877 	CBufFlat* host=CBufFlat::NewL(256);
   878 	CleanupStack::PushL(host);
   879 	TUint8 buffer[KInflateBufSize];
   880 	for (;;)
   881 		{
   882 		TInt len=aHost->ReadL(buffer,KInflateBufSize);
   883 		if (len)
   884 			host->InsertL(host->Size(),buffer,len);
   885 		if (len<KInflateBufSize)
   886 			break;
   887 		}
   888 	CBufSeg* out=CBufSeg::NewL(256);
   889 	CleanupStack::PushL(out);
   890 	TInflater* inflater=new(ELeave) TInflater(host->Ptr(0).Ptr(),aEncoding);
   891 	CleanupStack::PushL(inflater);
   892 	for (;;)
   893 		{
   894 		TInt len=inflater->Inflate();
   895 		if (len)
   896 			out->InsertL(out->Size(),inflater->Ptr(),len);
   897 		if (len<inflater->BufferSize())
   898 			break;
   899 		}
   900 	HInflateBuf* buf=new(ELeave) HInflateBuf(out);
   901 	CleanupStack::PopAndDestroy();	// inflater
   902 	CleanupStack::Pop();			// out
   903 	CleanupStack::PopAndDestroy();	// host
   904 	aHost->Release();				// don't need this anymore
   905 	return buf;
   906 	}
   907 
   908 // Class CDbStoreTable::Compressor
   909 //
   910 // This class processes an entire table for analysis or compression, using the
   911 // CDbStoreRecords::AlterL() functionality and call back to ensure that all clusters
   912 // and BLOBs are read and written.
   913 
   914 inline CDbStoreTable::CCompressor::CCompressor()
   915 	{}
   916 
   917 CDbStoreTable::CCompressor::~CCompressor()
   918 	{
   919 	if (iTable)
   920 		iTable->Close();
   921 	iRow.Close();
   922 	}
   923 
   924 //
   925 // Walk through every cluster in the table
   926 //
   927 void CDbStoreTable::CCompressor::ProcessL(CDbStoreTable* aTable)
   928 	{
   929 	iTable=aTable;
   930 	CDbStoreRecords& rec=aTable->StoreRecordsL();
   931 	for (TClusterId cluster=rec.Head();cluster!=KNullClusterId;cluster=rec.AlterL(cluster,*this))
   932 		;
   933 	}
   934 
   935 //
   936 // Compress every blob, and transfer the record from aRPtr to aWPtr
   937 //
   938 TUint8* CDbStoreTable::CCompressor::AlterRecordL(TUint8* aWPtr,const TUint8* aRPtr,TInt aLength)
   939 	{
   940 	if (iTable->Def().Columns().HasLongColumns())
   941 		{
   942 		iTable->CopyToRowL(iRow,TPtrC8(aRPtr,aLength));
   943 		CDbBlobSpace* blobs=iTable->BlobsL();
   944 		TDbColNo col=1;
   945 		HDbColumnSet::TIteratorC iter=iTable->Def().Columns().Begin();
   946 		const HDbColumnSet::TIteratorC end=iTable->Def().Columns().End();
   947 		do
   948 			{
   949 			if (!TDbCol::IsLong(iter->Type()))
   950 				continue;
   951 			TDbBlob& blob=CONST_CAST(TDbBlob&,TDbColumnC(iRow,col).Blob());
   952 			if (blob.IsInline())
   953 				continue;
   954 			// do what has to be done...?
   955 			TUint8* data=(TUint8*)User::AllocLC(blob.Size());
   956 			blobs->ReadLC(blob.Id(),iter->Type())->ReadL(data,blob.Size());
   957 			CleanupStack::PopAndDestroy();	// stream buffer
   958 			// re-write the Blob to compress it
   959 			blobs->DeleteL(blob.Id());
   960 			blob.SetId(blobs->CreateL(iter->Type(),data,blob.Size()));
   961 			CleanupStack::PopAndDestroy();	// data
   962 			} while (++col,++iter<end);
   963 		iTable->CopyFromRow(aWPtr,iRow);
   964 		}
   965 	else
   966 		Mem::Copy(aWPtr,aRPtr,aLength);
   967 	return aWPtr+aLength;
   968 	}
   969 
   970 // Class CDbStoreCompression
   971 //
   972 // This class manages the compression for the database, applying filters as appropriate
   973 // It also defines the extrenalisation format for the huffman trees
   974 
   975 const TInt KDeflationCodes=3*(CDbStoreCompression::TEncoding::ELitLens+CDbStoreCompression::TEncoding::EDistances);
   976 
   977 inline CDbStoreCompression::CDbStoreCompression()
   978 //	:iState(EAnalysis)
   979 	{}
   980 
   981 CDbStoreCompression* CDbStoreCompression::NewL()
   982 	{
   983 	return new(ELeave) CDbStoreCompression;
   984 	}
   985 
   986 //
   987 // Build huffman codings from the freqeuncy tables
   988 //
   989 void CDbStoreCompression::EncodeL()
   990 	{
   991 	__ASSERT(iState==EAnalysis);
   992 	TUint32* p=iEncoding[0].iLitLen;
   993 	TUint32* end=p+KDeflationCodes;
   994 	do
   995 		{
   996 		Huffman::EncodingL(p,TEncoding::ELitLens);
   997 		p+=TEncoding::ELitLens;
   998 		Huffman::EncodingL(p,TEncoding::EDistances);
   999 		p+=TEncoding::EDistances;
  1000 		} while (p<end);
  1001 	iState=EEncoding;
  1002 	}
  1003 
  1004 //
  1005 // Store the encoding tables as a sequence of code lengths
  1006 // The code lengths (0-25) are themselves huffman coded, and the meta coding is stored first
  1007 //
  1008 void CDbStoreCompression::ExternalizeL(RWriteStream& aStream) const
  1009 	{
  1010 	__ASSERT(iState==EEncoding);
  1011 	const TUint32* base=iEncoding[0].iLitLen;
  1012 	const TUint32* end=base+KDeflationCodes;
  1013 	TUint32 codes[KDeflateMetaCodes];
  1014 	Mem::FillZ(codes,sizeof(codes));
  1015 	const TUint32* p=base;
  1016 	do ++codes[*p++>>KHuffMaxCodeLength]; while (p<end);
  1017 	Huffman::EncodingL(codes,KDeflateMetaCodes);
  1018 // save the meta encoding
  1019 	p=codes+KDeflateMetaCodes;
  1020 	do
  1021 		{
  1022 		TUint c0=*--p;
  1023 		TUint c1=*--p;
  1024 		c0>>=KHuffMaxCodeLength;
  1025 		c1>>=KHuffMaxCodeLength;
  1026 		aStream.WriteUint8L((c0<<4)|c1);
  1027 		} while (p>codes);
  1028 // write the encoding
  1029 	THuffEncoder encoder(aStream);
  1030 	p=base;
  1031 	do encoder.EncodeL(codes[*p++>>KHuffMaxCodeLength]); while (p<end);
  1032 	encoder.CompleteL();
  1033 	}
  1034 
  1035 //
  1036 // Internalize a previous saved encoding
  1037 //
  1038 void CDbStoreCompression::InternalizeL(RReadStream& aStream)
  1039 	{
  1040 	__ASSERT(iState!=EEncoding);
  1041 //
  1042 // read the meta encoding
  1043 	TUint32 decode[KDeflateMetaCodes];
  1044 	TUint32* p=decode+KDeflateMetaCodes;
  1045 	do
  1046 		{
  1047 		TUint8 c=aStream.ReadUint8L();
  1048 		*--p=c>>4;
  1049 		*--p=c&0xf;
  1050 		} while (p>decode);
  1051 	Huffman::Decoding(decode,KDeflateMetaCodes);
  1052 // decode the encoding
  1053 	p=iEncoding[0].iLitLen;
  1054 	TUint32* end=p+KDeflationCodes;
  1055 	TUint bits=KBitsInit;
  1056 	do
  1057 		{
  1058 		const TUint32* node=decode;
  1059 		TUint huff;
  1060 		for (;;)
  1061 			{
  1062 			huff=*node++;
  1063 			if ((bits<<=1)&KBitsEmpty)
  1064 				bits=aStream.ReadUint8L()|KBitsFull;
  1065 			if (bits&KBitsNext)
  1066 				{
  1067 				if (huff&(KHuffTerminate<<16))
  1068 					break;
  1069 				}
  1070 			else
  1071 				{
  1072 				if (huff&KHuffTerminate)
  1073 					{
  1074 					huff<<=16;
  1075 					break;
  1076 					}
  1077 				else
  1078 					node=PtrAdd(node,huff);
  1079 				}
  1080 			}
  1081 		*p++=huff>>17;
  1082 		} while (p<end);
  1083 // convert the length tables into huffman decoding trees
  1084 	p=iEncoding[0].iLitLen;
  1085 	do
  1086 		{
  1087 		Huffman::Decoding(p,TEncoding::ELitLens);
  1088 		p+=TEncoding::ELitLens;
  1089 		Huffman::Decoding(p,TEncoding::EDistances,KDeflateDistCodeBase);
  1090 		p+=TEncoding::EDistances;
  1091 		} while (p<end);
  1092 	if (iState==EAnalysis)
  1093 		iState=EDecoding;
  1094 	}
  1095 
  1096 //
  1097 // Apply an inflation filter to a read stream
  1098 //
  1099 MStreamBuf* CDbStoreCompression::FilterL(MStreamBuf* aHost,TUint32,RDbStoreReadStream::TType aType)
  1100 	{
  1101 	if (iState==EDecoding || iState==EInflating)
  1102 		return HInflateBuf::NewL(aHost,iEncoding[aType]);
  1103 	return aHost;
  1104 	}
  1105 
  1106 //
  1107 // Apply a statistics or inflation filter to a write stream
  1108 //
  1109 MStreamBuf* CDbStoreCompression::FilterL(MStreamBuf* aHost,TUint32,RDbStoreWriteStream::TType aType)
  1110 	{
  1111 	TState s=iState;
  1112 	if (s==EDecoding)
  1113 		__LEAVE(KErrWrite);		// read-only database
  1114 	else if (s!=EInflating)
  1115 		{
  1116 		__ASSERT(TInt(EAnalysis)==TInt(HDeflateBuf::EAnalysis));
  1117 		__ASSERT(TInt(EEncoding)==TInt(HDeflateBuf::EDeflate));
  1118 		return HDeflateBuf::NewL(aHost,iEncoding[aType],HDeflateBuf::TMode(s));
  1119 		}
  1120 	return aHost;
  1121 	}
  1122 
  1123 // Class CDbStoreDatabase
  1124 //
  1125 // Compression related code is maintained in this source file
  1126 
  1127 //
  1128 // Iterate across all tables applying analysis or compression to them
  1129 //
  1130 void CDbStoreDatabase::CompressTablesL()
  1131 	{
  1132 	TSglQueIterC<CDbStoreDef> iter(SchemaL());
  1133 	const CDbStoreDef* def;
  1134 	while ((def=iter++)!=0)
  1135 		{
  1136 		CDbStoreTable::CCompressor* comp=new(ELeave) CDbStoreTable::CCompressor;
  1137 		CleanupStack::PushL(comp);
  1138 		comp->ProcessL(STATIC_CAST(CDbStoreTable*,TableL(*def)));
  1139 		CleanupStack::PopAndDestroy();	// comp
  1140 		}
  1141 	}
  1142 
  1143 //
  1144 // Compress or decompress the whole database
  1145 //
  1146 void CDbStoreDatabase::CompressL(TStreamId aStreamId,TZipType aZip)
  1147 	{
  1148 	__ASSERT(iStore);
  1149 	iSchemaId=aStreamId;
  1150 // read the databse header for encryption information
  1151 	RStoreReadStream strm;
  1152 	strm.OpenLC(Store(),aStreamId);
  1153 	ReadHeaderL(strm);
  1154 	CleanupStack::PopAndDestroy();	// strm
  1155 	InitPagePoolL();
  1156 //
  1157 	if (iVersion==EDbStoreCompressed)
  1158 		{
  1159 		iCompression->Inflate();
  1160 		if (aZip==EDeflate)
  1161 			__LEAVE(KErrArgument);		// already compressed
  1162 		}
  1163 	else if (aZip==EInflate)
  1164 		__LEAVE(KErrArgument);		// not compressed
  1165 	else
  1166 		{	// deflate pass #1: analyse the database
  1167 		CompressionL();				// construct the compression filter
  1168 		Transaction().DDLBeginLC();
  1169 		CompressTablesL();
  1170 		iClusterCache->FlushL();		// force through the stats buffer
  1171 		ReplaceSchemaL();				// force through the stats buffer
  1172 		CleanupStack::PopAndDestroy();	// rollback after analysis!
  1173 		iCompression->EncodeL();
  1174 		}
  1175 // now inflate or deflate the data
  1176 	Transaction().DDLBeginLC();
  1177 	CompressTablesL();
  1178 	iVersion=TUint8(aZip==EDeflate ? EDbStoreCompressed : EDbStoreVersion2);
  1179 	Transaction().DDLCommitL();
  1180 	CleanupStack::Pop();		// rollback not required
  1181 	}
  1182 
  1183 void CDbStoreDatabase::CompressL(CStreamStore* aStore,TStreamId aStreamId,TZipType aZip)
  1184 	{
  1185 	CDbStoreDatabase* self=NewLC(aStore);
  1186 	CDbObject* db=self->InterfaceL();	// a reference to the database is required
  1187 	CleanupStack::Pop();	// self
  1188 	db->PushL();
  1189 	self->Transaction().DDLPrepareL(*db);
  1190 	self->CompressL(aStreamId,aZip);
  1191 	CleanupStack::PopAndDestroy();	// db
  1192 	}
  1193 
  1194 // Class RDbStoreDatabase
  1195 
  1196 EXPORT_C void RDbStoreDatabase::CompressL(CStreamStore& aStore,TStreamId aId)
  1197 	{
  1198 	CDbStoreDatabase::CompressL(&aStore,aId,CDbStoreDatabase::EDeflate);
  1199 	}
  1200 
  1201 EXPORT_C void RDbStoreDatabase::DecompressL(CStreamStore& aStore,TStreamId aId)
  1202 	{
  1203 	CDbStoreDatabase::CompressL(&aStore,aId,CDbStoreDatabase::EInflate);
  1204 	}