os/kernelhwsrv/kernel/eka/include/byte_pair_compress.h
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/kernelhwsrv/kernel/eka/include/byte_pair_compress.h	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,317 @@
     1.4 +// Copyright (c) 2007-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.5 +// All rights reserved.
     1.6 +// This component and the accompanying materials are made available
     1.7 +// under the terms of the License "Eclipse Public License v1.0"
     1.8 +// which accompanies this distribution, and is available
     1.9 +// at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.10 +//
    1.11 +// Initial Contributors:
    1.12 +// Nokia Corporation - initial contribution.
    1.13 +//
    1.14 +// Contributors:
    1.15 +//
    1.16 +// Description:
    1.17 +// e32\include\byte_pair_compress.h
    1.18 +// This header file contains both the prototype and implementation for the byte pair compressor.
    1.19 +// This is done to facilitate sharing the code between the e32 test code and the tools.  The
    1.20 +// implementation is only included if the macro BYTE_PAIR_COMPRESS_INCLUDE_IMPLEMENTATION is
    1.21 +// defined.
    1.22 +// 
    1.23 +// WARNING: This file contains some APIs which are internal and are subject
    1.24 +//          to change without notice. Such APIs should therefore not be used
    1.25 +//          outside the Kernel and Hardware Services package.
    1.26 +//
    1.27 +
    1.28 +#ifndef __BYTE_PAIR_COMPRESS_H__
    1.29 +#define __BYTE_PAIR_COMPRESS_H__
    1.30 +
    1.31 +#include <e32std.h>
    1.32 +
    1.33 +/**
    1.34 + * @internalTechnology
    1.35 + *
    1.36 + * Compress up to 4096 bytes of data usign the byte-pair compression algorithm.
    1.37 + *
    1.38 + * @param aDest Destination buffer, must be at least four times the size of the source data.
    1.39 + * @param aSrc  Source data to compress.
    1.40 + * @param aSize The size of the source data.
    1.41 + */
    1.42 +extern TInt BytePairCompress(TUint8* aDest, TUint8* aSrc, TInt aSize);
    1.43 +
    1.44 +#ifdef BYTE_PAIR_COMPRESS_INCLUDE_IMPLEMENTATION
    1.45 +
    1.46 +const TInt BlockSize = 0x1000;
    1.47 +TInt ExtraSave = 0;
    1.48 +
    1.49 +TUint16 PairCount[0x10000];
    1.50 +TUint16 PairBuffer[BlockSize*2];
    1.51 +
    1.52 +TUint16 GlobalPairs[0x10000] = {0};
    1.53 +TUint16 GlobalTokenCounts[0x100] = {0};
    1.54 +
    1.55 +TUint16 ByteCount[0x100+4];
    1.56 +
    1.57 +void CountBytes(TUint8* data, TInt size)
    1.58 +	{
    1.59 +	memset(ByteCount,0,sizeof(ByteCount));
    1.60 +	TUint8* dataEnd = data+size;
    1.61 +	while(data<dataEnd)
    1.62 +		++ByteCount[*data++];
    1.63 +	}
    1.64 +
    1.65 +inline void ByteUsed(TInt b)
    1.66 +	{
    1.67 +	ByteCount[b] = 0xffff;
    1.68 +	}
    1.69 +
    1.70 +int TieBreak(int b1,int b2)
    1.71 +	{
    1.72 +	return -ByteCount[b1]-ByteCount[b2];
    1.73 +	}
    1.74 +
    1.75 +TInt MostCommonPair(TInt& pair, TUint8* data, TInt size, TInt minFrequency, TInt marker)
    1.76 +	{
    1.77 +	memset(PairCount,0,sizeof(PairCount));
    1.78 +	TUint8* dataEnd = data+size-1;
    1.79 +	TInt pairsFound = 0;
    1.80 +	TInt lastPair = -1;
    1.81 +	while(data<dataEnd)
    1.82 +		{
    1.83 +		TInt b1 = *data++;
    1.84 +		if(b1==marker)
    1.85 +			{
    1.86 +			// skip marker and following byte
    1.87 +			lastPair = -1;
    1.88 +			++data;
    1.89 +			continue;
    1.90 +			}
    1.91 +		TInt b2 = *data;
    1.92 +		if(b2==marker)
    1.93 +			{
    1.94 +			// skip marker and following byte
    1.95 +			lastPair = -1;
    1.96 +			data+=2;
    1.97 +			continue;
    1.98 +			}
    1.99 +		TInt p = (b2<<8)|b1;
   1.100 +		if(p==lastPair)
   1.101 +			{
   1.102 +			// ensure a pair of identical bytes don't get double counted
   1.103 +			lastPair = -1;
   1.104 +			continue;
   1.105 +			}
   1.106 +		lastPair = p;
   1.107 +		++PairCount[p];
   1.108 +		if(PairCount[p]==minFrequency)
   1.109 +			PairBuffer[pairsFound++] = (TUint16)p;
   1.110 +		}
   1.111 +
   1.112 +	TInt bestCount = -1;
   1.113 +	TInt bestPair = -1;
   1.114 +	TInt bestTieBreak = 0;
   1.115 +	TInt p;
   1.116 +	while(pairsFound--)
   1.117 +		{
   1.118 +		p = PairBuffer[pairsFound];
   1.119 +		TInt f=PairCount[p];
   1.120 +		if(f>bestCount)
   1.121 +			{
   1.122 +			bestCount = f;
   1.123 +			bestPair = p;
   1.124 +			bestTieBreak = TieBreak(p&0xff,p>>8);
   1.125 +			}
   1.126 +		else if(f==bestCount)
   1.127 +			{
   1.128 +			TInt tieBreak = TieBreak(p&0xff,p>>8);
   1.129 +			if(tieBreak>bestTieBreak)
   1.130 +				{
   1.131 +				bestCount = f;
   1.132 +				bestPair = p;
   1.133 +				bestTieBreak = tieBreak;
   1.134 +				}
   1.135 +			}
   1.136 +		}
   1.137 +	pair = bestPair;
   1.138 +	return bestCount;
   1.139 +	}
   1.140 +
   1.141 +TInt LeastCommonByte(TInt& byte)
   1.142 +	{
   1.143 +	TInt bestCount = 0xffff;
   1.144 +	TInt bestByte = -1;
   1.145 +	for(TInt b = 0; b<0x100; b++)
   1.146 +		{
   1.147 +		TInt f = ByteCount[b];
   1.148 +		if(f<bestCount)
   1.149 +			{
   1.150 +			bestCount = f;
   1.151 +			bestByte = b;
   1.152 +			}
   1.153 +		}
   1.154 +	byte = bestByte;
   1.155 +	return bestCount;
   1.156 +	}
   1.157 +
   1.158 +TInt BytePairCompress(TUint8* dst, TUint8* src, TInt size)
   1.159 +	{
   1.160 +	TInt originalSize = size;
   1.161 +	TUint8* dst2 = dst+size*2;
   1.162 +	TUint8* in = src;
   1.163 +	TUint8* out = dst;
   1.164 +
   1.165 +	TUint8 tokens[0x100*3];
   1.166 +	TInt tokenCount = 0;
   1.167 +
   1.168 +	CountBytes(in,size);
   1.169 +
   1.170 +	TInt marker = -1;
   1.171 +	TInt overhead = 1+3+LeastCommonByte(marker);
   1.172 +	ByteUsed(marker);
   1.173 +
   1.174 +	TUint8* inEnd = in+size;
   1.175 +	TUint8* outStart = out;
   1.176 +	while(in<inEnd)
   1.177 +		{
   1.178 +		TInt b=*in++;
   1.179 +		if(b==marker)
   1.180 +			*out++ = (TUint8)b;
   1.181 +		*out++ = (TUint8)b;
   1.182 +		}
   1.183 +	size = out-outStart;
   1.184 +
   1.185 +	TInt outToggle = 1;
   1.186 +	in = dst;
   1.187 +	out = dst2;
   1.188 +
   1.189 +	for(TInt r=256; r>0; --r)
   1.190 +		{
   1.191 +		TInt byte;
   1.192 +		TInt byteCount = LeastCommonByte(byte);
   1.193 +		TInt pair;
   1.194 +		TInt pairCount = MostCommonPair(pair,in,size,overhead+1,marker);
   1.195 +		TInt saving = pairCount-byteCount;
   1.196 +		if(saving<=overhead)
   1.197 +			break;
   1.198 +
   1.199 +		overhead = 3;
   1.200 +		if(tokenCount>=32)
   1.201 +			overhead = 2;
   1.202 +
   1.203 +		TUint8* d=tokens+3*tokenCount;
   1.204 +		++tokenCount;
   1.205 +		*d++ = (TUint8)byte;
   1.206 +		ByteUsed(byte);
   1.207 +		*d++ = (TUint8)pair;
   1.208 +		ByteUsed(pair&0xff);
   1.209 +		*d++ = (TUint8)(pair>>8);
   1.210 +		ByteUsed(pair>>8);
   1.211 +		++GlobalPairs[pair];
   1.212 +
   1.213 +		inEnd = in+size;
   1.214 +		outStart = out;
   1.215 +		while(in<inEnd)
   1.216 +			{
   1.217 +			TInt b=*in++;
   1.218 +			if(b==marker)
   1.219 +				{
   1.220 +				*out++ = (TUint8)marker;
   1.221 +				b = *in++;
   1.222 +				}
   1.223 +			else if(b==byte)
   1.224 +				{
   1.225 +				*out++ = (TUint8)marker;
   1.226 +				--byteCount;
   1.227 +				}
   1.228 +			else if(b==(pair&0xff) && in<inEnd && *in==(pair>>8))
   1.229 +				{
   1.230 +				++in;
   1.231 +				b = byte;
   1.232 +				--pairCount;
   1.233 +				}
   1.234 +			*out++ = (TUint8)b;
   1.235 +			}
   1.236 +		ASSERT(!byteCount);
   1.237 +		ASSERT(!pairCount);
   1.238 +		size = out-outStart;
   1.239 +
   1.240 +		outToggle ^= 1;
   1.241 +		if(outToggle)
   1.242 +			{
   1.243 +			in = dst;
   1.244 +			out = dst2;
   1.245 +			}
   1.246 +		else
   1.247 +			{
   1.248 +			in = dst2;
   1.249 +			out = dst;
   1.250 +			}
   1.251 +		}
   1.252 +
   1.253 +	// sort tokens with a bubble sort...
   1.254 +	for(TInt x=0; x<tokenCount-1; x++)
   1.255 +		for(TInt y=x+1; y<tokenCount; y++)
   1.256 +			if(tokens[x*3]>tokens[y*3])
   1.257 +				{
   1.258 +				TInt z = tokens[x*3];
   1.259 +				tokens[x*3] = tokens[y*3];
   1.260 +				tokens[y*3] = (TUint8)z;
   1.261 +				z = tokens[x*3+1];
   1.262 +				tokens[x*3+1] = tokens[y*3+1];
   1.263 +				tokens[y*3+1] = (TUint8)z;
   1.264 +				z = tokens[x*3+2];
   1.265 +				tokens[x*3+2] = tokens[y*3+2];
   1.266 +				tokens[y*3+2] = (TUint8)z;
   1.267 +				}
   1.268 +
   1.269 +	// check for not being able to compress...
   1.270 +	if(size>originalSize)
   1.271 +		{
   1.272 +		*dst++ = 0; // store zero token count
   1.273 +		memcpy(dst,src,originalSize); // store original data
   1.274 +		return originalSize+1;
   1.275 +		}
   1.276 +
   1.277 +	// make sure data is in second buffer (dst2)
   1.278 +	if(in!=dst2)
   1.279 +		memcpy(dst2,dst,size);
   1.280 +
   1.281 +	// store tokens...
   1.282 +	TUint8* originalDst = dst;
   1.283 +	*dst++ = (TUint8)tokenCount;
   1.284 +	if(tokenCount)
   1.285 +		{
   1.286 +		*dst++ = (TUint8)marker;
   1.287 +		if(tokenCount<32)
   1.288 +			{
   1.289 +			memcpy(dst,tokens,tokenCount*3);
   1.290 +			dst += tokenCount*3;
   1.291 +			}
   1.292 +		else
   1.293 +			{
   1.294 +			TUint8* bitMask = dst;
   1.295 +			memset(bitMask,0,32);
   1.296 +			dst += 32;
   1.297 +			TUint8* d=tokens;
   1.298 +			do
   1.299 +				{
   1.300 +				TInt t=*d++;
   1.301 +				bitMask[t>>3] |= (1<<(t&7));
   1.302 +				*dst++ = *d++;
   1.303 +				*dst++ = *d++;
   1.304 +				}
   1.305 +			while(--tokenCount);
   1.306 +			}
   1.307 +		}
   1.308 +	// store data...
   1.309 +	memcpy(dst,dst2,size);
   1.310 +	dst += size;
   1.311 +
   1.312 +	// get stats...
   1.313 +	++GlobalTokenCounts[tokenCount];
   1.314 +
   1.315 +	// return total size of compressed data...
   1.316 +	return dst-originalDst;
   1.317 +	}
   1.318 +
   1.319 +#endif
   1.320 +#endif