sl@0: // Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0: // All rights reserved.
sl@0: // This component and the accompanying materials are made available
sl@0: // under the terms of the License "Eclipse Public License v1.0"
sl@0: // which accompanies this distribution, and is available
sl@0: // at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0: //
sl@0: // Initial Contributors:
sl@0: // Nokia Corporation - initial contribution.
sl@0: //
sl@0: // Contributors:
sl@0: //
sl@0: // Description:
sl@0: // e32\euser\us_encode.cpp
sl@0: // 
sl@0: //
sl@0: 
sl@0: #include "e32huffman.h"
sl@0: #include <e32base.h>
sl@0: #include <e32base_private.h>
sl@0: #include <e32panic.h>
sl@0: 
sl@0: _LIT(KCat,"Huffman");
sl@0: // local definitions used for Huffman code generation
sl@0: typedef TUint16 THuff;		/** @internal */
sl@0: const THuff KLeaf=0x8000;	/** @internal */
sl@0: struct TNode
sl@0: /** @internal */
sl@0: 	{
sl@0: 	TUint iCount;
sl@0: 	THuff iLeft;
sl@0: 	THuff iRight;
sl@0: 	};
sl@0: 
sl@0: /** recursive function to calculate the code lengths from the node tree
sl@0: 
sl@0: 	@internalComponent
sl@0: */
sl@0: void HuffmanLengthsL(TUint32* aLengths,const TNode* aNodes,TInt aNode,TInt aLen)
sl@0: 	{
sl@0: 	if (++aLen>Huffman::KMaxCodeLength)
sl@0: 		User::Leave(KErrOverflow);
sl@0: 
sl@0: 	const TNode& node=aNodes[aNode];
sl@0: 	TUint x=node.iLeft;
sl@0: 	if (x&KLeaf)
sl@0: 		aLengths[x&~KLeaf]=aLen;
sl@0: 	else
sl@0: 		HuffmanLengthsL(aLengths,aNodes,x,aLen);
sl@0: 	x=node.iRight;
sl@0: 	if (x&KLeaf)
sl@0: 		aLengths[x&~KLeaf]=aLen;
sl@0: 	else
sl@0: 		HuffmanLengthsL(aLengths,aNodes,x,aLen);
sl@0: 	}
sl@0: 
sl@0: /**	Insert the {aCount,aValue} pair into the already sorted array of nodes
sl@0: 
sl@0: 	@internalComponent
sl@0: */
sl@0: void InsertInOrder(TNode* aNodes, TInt aSize, TUint aCount, TInt aVal)
sl@0: 	{
sl@0: 	// Uses Insertion sort following a binary search...
sl@0: 	TInt l=0, r=aSize;
sl@0: 	while (l < r)
sl@0: 		{
sl@0: 		TInt m = (l+r) >> 1;
sl@0: 		if (aNodes[m].iCount<aCount)
sl@0: 			r=m;
sl@0: 		else
sl@0: 			l=m+1;
sl@0: 		}
sl@0: 	Mem::Copy(aNodes+l+1,aNodes+l,sizeof(TNode)*(aSize-l));
sl@0: 	aNodes[l].iCount=aCount;
sl@0: 	aNodes[l].iRight=TUint16(aVal);
sl@0: 	}
sl@0: 
sl@0: /** Generate a Huffman code
sl@0: 
sl@0: 	This generates a Huffman code for a given set of code frequencies. The output
sl@0: 	is a table of code lengths which can be used to build canonincal encoding tables
sl@0: 	or decoding trees for use with the TBitInput and TBitOutput classes.
sl@0: 
sl@0: 	Entries in the table with a frequency of zero will have a zero code length
sl@0: 	and thus no associated huffman encoding. If each such symbol should have a
sl@0: 	maximum length encoding, they must be given at least a frequency of 1.
sl@0: 
sl@0: 	For an alphabet of n symbols, this algorithm has a transient memory overhead
sl@0: 	of 8n, and a time complexity of O(n*log(n)).
sl@0: 
sl@0: 	@param aFrequency The table of code frequencies
sl@0: 	@param aNumCodes The number of codes in the table
sl@0: 	@param aHuffman The table for the output code-length table. This must be
sl@0: 		the same size as the frequency table, and can safely be the same table
sl@0: 
sl@0: 	@leave KErrNoMemory If memory used for code generation cannot be allocated
sl@0: 
sl@0:   	@panic "USER ???" If the number of codes exceeds Huffman::KMaxCodes
sl@0: */
sl@0: EXPORT_C void Huffman::HuffmanL(const TUint32 aFrequency[],TInt aNumCodes,TUint32 aHuffman[])
sl@0: 	{
sl@0: 	__ASSERT_ALWAYS(TUint(aNumCodes)<=TUint(KMaxCodes),User::Panic(KCat,EHuffmanTooManyCodes));
sl@0: 
sl@0: 	// Sort the values into decreasing order of frequency
sl@0: 	//
sl@0: 	TNode* nodes = new(ELeave) TNode[aNumCodes];
sl@0: 	CleanupArrayDeletePushL(nodes);
sl@0: 	TInt lCount=0;
sl@0: 
sl@0: 	for (TInt ii=0;ii<aNumCodes;++ii)
sl@0: 		{
sl@0: 		TInt c=aFrequency[ii];
sl@0: 		if (c!=0)
sl@0: 			InsertInOrder(nodes,lCount++,c,ii|KLeaf);
sl@0: 		}
sl@0: 
sl@0: 	// default code length is zero
sl@0: 	Mem::FillZ(aHuffman,aNumCodes*sizeof(TUint32));
sl@0: 
sl@0: 	if (lCount==0)
sl@0: 		{
sl@0: 		// no codes with frequency>0. No code has a length
sl@0: 		}
sl@0: 	else if (lCount==1)
sl@0: 		{
sl@0: 		// special case for a single value (always encode as "0")
sl@0: 		aHuffman[nodes[0].iRight&~KLeaf]=1;
sl@0: 		}
sl@0: 	else
sl@0: 		{
sl@0: 		// Huffman algorithm: pair off least frequent nodes and reorder
sl@0: 		//
sl@0: 		do
sl@0: 			{
sl@0: 			--lCount;
sl@0: 			TUint c=nodes[lCount].iCount + nodes[lCount-1].iCount;
sl@0: 			nodes[lCount].iLeft=nodes[lCount-1].iRight;
sl@0: 			// re-order the leaves now to reflect new combined frequency 'c'
sl@0: 			InsertInOrder(nodes,lCount-1,c,lCount);
sl@0: 			} while (lCount>1);
sl@0: 		// generate code lengths in aHuffman[]
sl@0: 		HuffmanLengthsL(aHuffman,nodes,1,0);
sl@0: 		}
sl@0: 	CleanupStack::PopAndDestroy(nodes);
sl@0: 
sl@0: 	__ASSERT_DEBUG(IsValid(aHuffman,aNumCodes),User::Panic(KCat,EHuffmanInvalidCoding));
sl@0: 	}
sl@0: 
sl@0: /** Validate a Huffman encoding
sl@0: 
sl@0: 	This verifies that a Huffman coding described by the code lengths is valid.
sl@0: 	In particular, it ensures that no code exceeds the maximum length and
sl@0: 	that it is possible to generate a canonical coding for the specified lengths.
sl@0: 	
sl@0: 	@param aHuffman The table of code lengths as generated by Huffman::HuffmanL()
sl@0: 	@param aNumCodes The number of codes in the table
sl@0: 
sl@0: 	@return True if the code is valid, otherwise false
sl@0: */
sl@0: EXPORT_C TBool Huffman::IsValid(const TUint32 aHuffman[],TInt aNumCodes)
sl@0: 	{
sl@0: 	// The code is valid if one of the following holds:
sl@0: 	// (a) the code exactly fills the 'code space'
sl@0: 	// (b) there is only a single symbol with code length 1
sl@0: 	// (c) there are no encoded symbols
sl@0: 	//
sl@0: 	TUint remain=1<<KMaxCodeLength;
sl@0: 	TInt totlen=0;
sl@0: 	for (const TUint32* p=aHuffman+aNumCodes; p>aHuffman;)
sl@0: 		{
sl@0: 		TInt len=*--p;
sl@0: 		if (len>0)
sl@0: 			{
sl@0: 			totlen+=len;
sl@0: 			if (len>KMaxCodeLength)
sl@0: 				return EFalse;
sl@0: 			TUint c=1<<(KMaxCodeLength-len);
sl@0: 			if (c>remain)
sl@0: 				return EFalse;
sl@0: 			remain-=c;
sl@0: 			}
sl@0: 		}
sl@0: 
sl@0: 	return remain==0 || totlen<=1;
sl@0: 	}
sl@0: 
sl@0: /** Create a canonical Huffman encoding table
sl@0: 
sl@0: 	This generates the huffman codes used by TBitOutput::HuffmanL() to write huffman
sl@0: 	encoded data. The input is table of code lengths, as generated by Huffman::HuffmanL()
sl@0: 	and must represent a valid huffman code.
sl@0: 	
sl@0: 	@param aHuffman The table of code lengths as generated by Huffman::HuffmanL()
sl@0: 	@param aNumCodes The number of codes in the table
sl@0: 	@param aEncodeTable The table for the output huffman codes. This must be
sl@0: 		the same size as the code-length table, and can safely be the same table
sl@0: 
sl@0: 	@panic "USER ???" If the provided code is not a valid Huffman coding
sl@0: 	
sl@0: 	@see IsValid()
sl@0: 	@see HuffmanL()
sl@0: */
sl@0: EXPORT_C void Huffman::Encoding(const TUint32 aHuffman[],TInt aNumCodes,TUint32 aEncodeTable[])
sl@0: 	{
sl@0: 	__ASSERT_ALWAYS(IsValid(aHuffman,aNumCodes),User::Panic(KCat,EHuffmanInvalidCoding));
sl@0: 
sl@0: 	TFixedArray<TInt,KMaxCodeLength> lenCount;
sl@0: 	lenCount.Reset();
sl@0: 
sl@0: 	TInt ii;
sl@0: 	for (ii=0;ii<aNumCodes;++ii)
sl@0: 		{
sl@0: 		TInt len=aHuffman[ii]-1;
sl@0: 		if (len>=0)
sl@0: 			++lenCount[len];
sl@0: 		}
sl@0: 
sl@0: 	TFixedArray<TUint,KMaxCodeLength> nextCode;
sl@0: 	TUint code=0;
sl@0: 	for (ii=0;ii<KMaxCodeLength;++ii)
sl@0: 		{
sl@0: 		code<<=1;
sl@0: 		nextCode[ii]=code;
sl@0: 		code+=lenCount[ii];
sl@0: 		}
sl@0: 
sl@0: 	for (ii=0;ii<aNumCodes;++ii)
sl@0: 		{
sl@0: 		TInt len=aHuffman[ii];
sl@0: 		if (len==0)
sl@0: 			aEncodeTable[ii]=0;
sl@0: 		else
sl@0: 			{
sl@0: 			aEncodeTable[ii] = (nextCode[len-1]<<(KMaxCodeLength-len))|(len<<KMaxCodeLength);
sl@0: 			++nextCode[len-1];
sl@0: 			}
sl@0: 		}
sl@0: 	}
sl@0: 
sl@0: /** the encoding table for the externalised code
sl@0: 	@internalComponent
sl@0: */
sl@0: const TUint32 HuffmanEncoding[]=
sl@0: 	{
sl@0: 	0x10000000,
sl@0: 	0x1c000000,
sl@0: 	0x12000000,
sl@0: 	0x1d000000,
sl@0: 	0x26000000,
sl@0: 	0x26800000,
sl@0: 	0x2f000000,
sl@0: 	0x37400000,
sl@0: 	0x37600000,
sl@0: 	0x37800000,
sl@0: 	0x3fa00000,
sl@0: 	0x3fb00000,
sl@0: 	0x3fc00000,
sl@0: 	0x3fd00000,
sl@0: 	0x47e00000,
sl@0: 	0x47e80000,
sl@0: 	0x47f00000,
sl@0: 	0x4ff80000,
sl@0: 	0x57fc0000,
sl@0: 	0x5ffe0000,
sl@0: 	0x67ff0000,
sl@0: 	0x77ff8000,
sl@0: 	0x7fffa000,
sl@0: 	0x7fffb000,
sl@0: 	0x7fffc000,
sl@0: 	0x7fffd000,
sl@0: 	0x7fffe000,
sl@0: 	0x87fff000,
sl@0: 	0x87fff800
sl@0: 	};
sl@0: 
sl@0: /** encode 0a as '0' and 0b as '1', return number of symbols created
sl@0: 
sl@0: 	@internalComponent
sl@0: */
sl@0: void EncodeRunLengthL(TBitOutput& aOutput, TInt aLength)
sl@0: 	{
sl@0: 	if (aLength>0)
sl@0: 		{
sl@0: 		EncodeRunLengthL(aOutput,(aLength-1)>>1);
sl@0: 		aOutput.HuffmanL(HuffmanEncoding[1-(aLength&1)]);
sl@0: 		}
sl@0: 	}
sl@0: 
sl@0: /** Store a canonical huffman encoding in compact form
sl@0: 
sl@0: 	As the encoding is canonical, only the code lengths of each code needs to be saved.
sl@0: 
sl@0: 	Due to the nature of code length tables, these can usually be stored very compactly
sl@0: 	by encoding the encoding itself, hence the use of the bit output stream.
sl@0: 	
sl@0: 	@param aOutput The output stream for the encoding
sl@0: 	@param aHuffman The table of code lengths as generated by Huffman::HuffmanL()
sl@0: 	@param aNumCodes The number of huffman codes in the table
sl@0: 
sl@0: 	@leave TBitOutput::HuffmanL()
sl@0: */
sl@0: EXPORT_C void Huffman::ExternalizeL(TBitOutput& aOutput,const TUint32 aHuffman[],TInt aNumCodes)
sl@0: 	{
sl@0: 	// We assume that the code length table is generated by the huffman generator,
sl@0: 	// in which case the maxmimum code length is 27 bits.
sl@0: 	//
sl@0: 	// We apply three transformations to the data:
sl@0: 	// 1. the data goes through a move-to-front coder
sl@0: 	// 2. apply a rle-0 coder which replace runs of '0' with streams of '0a' and '0b'
sl@0: 	// 3. encode the result using a predefined (average) huffman coding
sl@0: 	//
sl@0: 	// This can be done in a single pass over the data, avoiding the need for additional
sl@0: 	// memory.
sl@0: 	//
sl@0: 	// initialise the list for the MTF coder
sl@0: 	TFixedArray<TUint8,Huffman::KMetaCodes> list;
sl@0: 	TInt i;
sl@0: 	for (i=0;i<list.Count();++i)
sl@0: 		list[i]=TUint8(i);
sl@0: 	TInt last=0;
sl@0: 
sl@0: 	TInt rl=0;
sl@0: 	const TUint32* p32=aHuffman;
sl@0: 	const TUint32* e32=p32+aNumCodes;
sl@0: 	while (p32<e32)
sl@0: 		{
sl@0: 		TInt c=*p32++;
sl@0: 		if (c==last)
sl@0: 			++rl;	// repeat of last symbol
sl@0: 		else
sl@0: 			{
sl@0: 			// encode run-length
sl@0: 			EncodeRunLengthL(aOutput,rl);
sl@0: 			rl=0;
sl@0: 			// find code in MTF list
sl@0: 			TInt j;
sl@0: 			for (j=1;list[j]!=c;++j)
sl@0: 				;
sl@0: 			// store this code
sl@0: 			aOutput.HuffmanL(HuffmanEncoding[j+1]);
sl@0: 			// adjust list for MTF algorithm
sl@0: 			while (--j>0)
sl@0: 				list[j+1]=list[j];
sl@0: 			list[1]=TUint8(last);
sl@0: 			last=c;
sl@0: 			}
sl@0: 		}
sl@0: 	// encod any remaining run-length
sl@0: 	EncodeRunLengthL(aOutput,rl);
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** Construct a bit stream output object
sl@0: 
sl@0: 	Following construction the bit stream is ready for writing bits, but will first call
sl@0: 	OverflowL() as the output buffer is 'full'. A derived class can detect this state as
sl@0: 	Ptr() will return null.
sl@0: */
sl@0: EXPORT_C TBitOutput::TBitOutput()
sl@0: 	:iCode(0),iBits(-8),iPtr(0),iEnd(0)
sl@0: 	{}
sl@0: 
sl@0: /** Construct a bit stream output object over a buffer
sl@0: 
sl@0: 	Data will be written to the buffer until it is full, at which point OverflowL() will
sl@0: 	be called. This should handle the data and then can Set() again to reset the buffer
sl@0: 	for further output.
sl@0: 	
sl@0: 	@param aBuf The buffer for output
sl@0: 	@param aSize The size of the buffer in bytes
sl@0: */
sl@0: EXPORT_C TBitOutput::TBitOutput(TUint8* aBuf,TInt aSize)
sl@0: 	:iCode(0),iBits(-8),iPtr(aBuf),iEnd(aBuf+aSize)
sl@0: 	{}
sl@0: 
sl@0: /** Write a huffman code
sl@0: 
sl@0: 	This expects a huffman code value as generated by Huffman::Encoding()
sl@0: 
sl@0: 	@param aHuffCode The huffman code write to the stream
sl@0: 
sl@0: 	@leave OverflowL() If the output buffer is full, OverflowL() is called
sl@0: */
sl@0: EXPORT_C void TBitOutput::HuffmanL(TUint aHuffCode)
sl@0: 	{
sl@0: 	DoWriteL(aHuffCode<<(32-Huffman::KMaxCodeLength),aHuffCode>>Huffman::KMaxCodeLength);
sl@0: 	}
sl@0: 
sl@0: /** Write an arbitrary integer value
sl@0: 
sl@0: 	Write an unsigned integer using the number of bits specified. Only
sl@0: 	the low order bits of the value are written to the output, most
sl@0: 	significant bit first.
sl@0: 
sl@0: 	@param aValue The value to write to the stream
sl@0: 	@param aLength The number of bits to output
sl@0: 
sl@0: 	@leave OverflowL() If the output buffer is full, OverflowL() is called
sl@0: */
sl@0: EXPORT_C void TBitOutput::WriteL(TUint aValue,TInt aLength)
sl@0: 	{
sl@0: 	if (aLength)
sl@0: 		DoWriteL(aValue<<=32-aLength,aLength);
sl@0: 	}
sl@0: 
sl@0: /** Pad the bitstream to the next byte boundary
sl@0: 
sl@0: 	Terminate the bitstream by padding the last byte with the requested value.
sl@0: 	Following this operation the bitstream can continue to be used, the data will
sl@0: 	start at the next byte.
sl@0: 
sl@0: 	@param aPadding The bit value to pad the final byte with
sl@0: 
sl@0: 	@leave OverflowL() If the output buffer is full, OverflowL() is called
sl@0: */
sl@0: EXPORT_C void TBitOutput::PadL(TUint aPadding)
sl@0: 	{
sl@0: 	if (iBits>-8)
sl@0: 		WriteL(aPadding?0xffffffffu:0,-iBits);
sl@0: 	}
sl@0: 
sl@0: /** Write the higher order bits to the stream
sl@0: 	
sl@0: 	@internalComponent
sl@0: */
sl@0: void TBitOutput::DoWriteL(TUint aBits,TInt aSize)
sl@0: 	{
sl@0: 	if (aSize>25)
sl@0: 		{
sl@0: 		// cannot process >25 bits in a single pass
sl@0: 		// so do the top 8 bits first
sl@0: 		ASSERT(aSize<=32);
sl@0: 		DoWriteL(aBits&0xff000000u,8);
sl@0: 		aBits<<=8;
sl@0: 		aSize-=8;
sl@0: 		}
sl@0: 
sl@0: 	TInt bits=iBits;
sl@0: 	TUint code=iCode|(aBits>>(bits+8));
sl@0: 	bits+=aSize;
sl@0: 	if (bits>=0)
sl@0: 		{
sl@0: 		TUint8* p=iPtr;
sl@0: 		do
sl@0: 			{
sl@0: 			if (p==iEnd)
sl@0: 				{
sl@0: 				// run out of buffer space so invoke the overflow handler
sl@0: 				iPtr=p;
sl@0: 				OverflowL();
sl@0: 				p=iPtr;
sl@0: 				ASSERT(p!=iEnd);
sl@0: 				}
sl@0: 			*p++=TUint8(code>>24);
sl@0: 			code<<=8;
sl@0: 			bits-=8;
sl@0: 			} while (bits>=0);
sl@0: 		iPtr=p;
sl@0: 		}
sl@0: 	iCode=code;
sl@0: 	iBits=bits;
sl@0: 	}
sl@0: 
sl@0: /** Handle a full output buffer
sl@0: 
sl@0: 	This virtual function is called when the output buffer is full. It should deal
sl@0: 	with the data in the buffer before reseting the buffer using Set(), allowing
sl@0: 	further data to be written.
sl@0: 
sl@0: 	A derived class can replace this to write the data to a file (for example)
sl@0: 	before marking the buffer as empty.
sl@0: 
sl@0: 	@leave KErrOverflow The default implementation leaves
sl@0: */
sl@0: void TBitOutput::OverflowL()
sl@0: 	{
sl@0: 	User::Leave(KErrOverflow);
sl@0: 	}