os/kernelhwsrv/kernel/eka/include/byte_pair_compress.h
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
// Copyright (c) 2007-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     2
// All rights reserved.
sl@0
     3
// This component and the accompanying materials are made available
sl@0
     4
// under the terms of the License "Eclipse Public License v1.0"
sl@0
     5
// which accompanies this distribution, and is available
sl@0
     6
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     7
//
sl@0
     8
// Initial Contributors:
sl@0
     9
// Nokia Corporation - initial contribution.
sl@0
    10
//
sl@0
    11
// Contributors:
sl@0
    12
//
sl@0
    13
// Description:
sl@0
    14
// e32\include\byte_pair_compress.h
sl@0
    15
// This header file contains both the prototype and implementation for the byte pair compressor.
sl@0
    16
// This is done to facilitate sharing the code between the e32 test code and the tools.  The
sl@0
    17
// implementation is only included if the macro BYTE_PAIR_COMPRESS_INCLUDE_IMPLEMENTATION is
sl@0
    18
// defined.
sl@0
    19
// 
sl@0
    20
// WARNING: This file contains some APIs which are internal and are subject
sl@0
    21
//          to change without notice. Such APIs should therefore not be used
sl@0
    22
//          outside the Kernel and Hardware Services package.
sl@0
    23
//
sl@0
    24
sl@0
    25
#ifndef __BYTE_PAIR_COMPRESS_H__
sl@0
    26
#define __BYTE_PAIR_COMPRESS_H__
sl@0
    27
sl@0
    28
#include <e32std.h>
sl@0
    29
sl@0
    30
/**
sl@0
    31
 * @internalTechnology
sl@0
    32
 *
sl@0
    33
 * Compress up to 4096 bytes of data usign the byte-pair compression algorithm.
sl@0
    34
 *
sl@0
    35
 * @param aDest Destination buffer, must be at least four times the size of the source data.
sl@0
    36
 * @param aSrc  Source data to compress.
sl@0
    37
 * @param aSize The size of the source data.
sl@0
    38
 */
sl@0
    39
extern TInt BytePairCompress(TUint8* aDest, TUint8* aSrc, TInt aSize);
sl@0
    40
sl@0
    41
#ifdef BYTE_PAIR_COMPRESS_INCLUDE_IMPLEMENTATION
sl@0
    42
sl@0
    43
const TInt BlockSize = 0x1000;
sl@0
    44
TInt ExtraSave = 0;
sl@0
    45
sl@0
    46
TUint16 PairCount[0x10000];
sl@0
    47
TUint16 PairBuffer[BlockSize*2];
sl@0
    48
sl@0
    49
TUint16 GlobalPairs[0x10000] = {0};
sl@0
    50
TUint16 GlobalTokenCounts[0x100] = {0};
sl@0
    51
sl@0
    52
TUint16 ByteCount[0x100+4];
sl@0
    53
sl@0
    54
void CountBytes(TUint8* data, TInt size)
sl@0
    55
	{
sl@0
    56
	memset(ByteCount,0,sizeof(ByteCount));
sl@0
    57
	TUint8* dataEnd = data+size;
sl@0
    58
	while(data<dataEnd)
sl@0
    59
		++ByteCount[*data++];
sl@0
    60
	}
sl@0
    61
sl@0
    62
inline void ByteUsed(TInt b)
sl@0
    63
	{
sl@0
    64
	ByteCount[b] = 0xffff;
sl@0
    65
	}
sl@0
    66
sl@0
    67
int TieBreak(int b1,int b2)
sl@0
    68
	{
sl@0
    69
	return -ByteCount[b1]-ByteCount[b2];
sl@0
    70
	}
sl@0
    71
sl@0
    72
TInt MostCommonPair(TInt& pair, TUint8* data, TInt size, TInt minFrequency, TInt marker)
sl@0
    73
	{
sl@0
    74
	memset(PairCount,0,sizeof(PairCount));
sl@0
    75
	TUint8* dataEnd = data+size-1;
sl@0
    76
	TInt pairsFound = 0;
sl@0
    77
	TInt lastPair = -1;
sl@0
    78
	while(data<dataEnd)
sl@0
    79
		{
sl@0
    80
		TInt b1 = *data++;
sl@0
    81
		if(b1==marker)
sl@0
    82
			{
sl@0
    83
			// skip marker and following byte
sl@0
    84
			lastPair = -1;
sl@0
    85
			++data;
sl@0
    86
			continue;
sl@0
    87
			}
sl@0
    88
		TInt b2 = *data;
sl@0
    89
		if(b2==marker)
sl@0
    90
			{
sl@0
    91
			// skip marker and following byte
sl@0
    92
			lastPair = -1;
sl@0
    93
			data+=2;
sl@0
    94
			continue;
sl@0
    95
			}
sl@0
    96
		TInt p = (b2<<8)|b1;
sl@0
    97
		if(p==lastPair)
sl@0
    98
			{
sl@0
    99
			// ensure a pair of identical bytes don't get double counted
sl@0
   100
			lastPair = -1;
sl@0
   101
			continue;
sl@0
   102
			}
sl@0
   103
		lastPair = p;
sl@0
   104
		++PairCount[p];
sl@0
   105
		if(PairCount[p]==minFrequency)
sl@0
   106
			PairBuffer[pairsFound++] = (TUint16)p;
sl@0
   107
		}
sl@0
   108
sl@0
   109
	TInt bestCount = -1;
sl@0
   110
	TInt bestPair = -1;
sl@0
   111
	TInt bestTieBreak = 0;
sl@0
   112
	TInt p;
sl@0
   113
	while(pairsFound--)
sl@0
   114
		{
sl@0
   115
		p = PairBuffer[pairsFound];
sl@0
   116
		TInt f=PairCount[p];
sl@0
   117
		if(f>bestCount)
sl@0
   118
			{
sl@0
   119
			bestCount = f;
sl@0
   120
			bestPair = p;
sl@0
   121
			bestTieBreak = TieBreak(p&0xff,p>>8);
sl@0
   122
			}
sl@0
   123
		else if(f==bestCount)
sl@0
   124
			{
sl@0
   125
			TInt tieBreak = TieBreak(p&0xff,p>>8);
sl@0
   126
			if(tieBreak>bestTieBreak)
sl@0
   127
				{
sl@0
   128
				bestCount = f;
sl@0
   129
				bestPair = p;
sl@0
   130
				bestTieBreak = tieBreak;
sl@0
   131
				}
sl@0
   132
			}
sl@0
   133
		}
sl@0
   134
	pair = bestPair;
sl@0
   135
	return bestCount;
sl@0
   136
	}
sl@0
   137
sl@0
   138
TInt LeastCommonByte(TInt& byte)
sl@0
   139
	{
sl@0
   140
	TInt bestCount = 0xffff;
sl@0
   141
	TInt bestByte = -1;
sl@0
   142
	for(TInt b = 0; b<0x100; b++)
sl@0
   143
		{
sl@0
   144
		TInt f = ByteCount[b];
sl@0
   145
		if(f<bestCount)
sl@0
   146
			{
sl@0
   147
			bestCount = f;
sl@0
   148
			bestByte = b;
sl@0
   149
			}
sl@0
   150
		}
sl@0
   151
	byte = bestByte;
sl@0
   152
	return bestCount;
sl@0
   153
	}
sl@0
   154
sl@0
   155
TInt BytePairCompress(TUint8* dst, TUint8* src, TInt size)
sl@0
   156
	{
sl@0
   157
	TInt originalSize = size;
sl@0
   158
	TUint8* dst2 = dst+size*2;
sl@0
   159
	TUint8* in = src;
sl@0
   160
	TUint8* out = dst;
sl@0
   161
sl@0
   162
	TUint8 tokens[0x100*3];
sl@0
   163
	TInt tokenCount = 0;
sl@0
   164
sl@0
   165
	CountBytes(in,size);
sl@0
   166
sl@0
   167
	TInt marker = -1;
sl@0
   168
	TInt overhead = 1+3+LeastCommonByte(marker);
sl@0
   169
	ByteUsed(marker);
sl@0
   170
sl@0
   171
	TUint8* inEnd = in+size;
sl@0
   172
	TUint8* outStart = out;
sl@0
   173
	while(in<inEnd)
sl@0
   174
		{
sl@0
   175
		TInt b=*in++;
sl@0
   176
		if(b==marker)
sl@0
   177
			*out++ = (TUint8)b;
sl@0
   178
		*out++ = (TUint8)b;
sl@0
   179
		}
sl@0
   180
	size = out-outStart;
sl@0
   181
sl@0
   182
	TInt outToggle = 1;
sl@0
   183
	in = dst;
sl@0
   184
	out = dst2;
sl@0
   185
sl@0
   186
	for(TInt r=256; r>0; --r)
sl@0
   187
		{
sl@0
   188
		TInt byte;
sl@0
   189
		TInt byteCount = LeastCommonByte(byte);
sl@0
   190
		TInt pair;
sl@0
   191
		TInt pairCount = MostCommonPair(pair,in,size,overhead+1,marker);
sl@0
   192
		TInt saving = pairCount-byteCount;
sl@0
   193
		if(saving<=overhead)
sl@0
   194
			break;
sl@0
   195
sl@0
   196
		overhead = 3;
sl@0
   197
		if(tokenCount>=32)
sl@0
   198
			overhead = 2;
sl@0
   199
sl@0
   200
		TUint8* d=tokens+3*tokenCount;
sl@0
   201
		++tokenCount;
sl@0
   202
		*d++ = (TUint8)byte;
sl@0
   203
		ByteUsed(byte);
sl@0
   204
		*d++ = (TUint8)pair;
sl@0
   205
		ByteUsed(pair&0xff);
sl@0
   206
		*d++ = (TUint8)(pair>>8);
sl@0
   207
		ByteUsed(pair>>8);
sl@0
   208
		++GlobalPairs[pair];
sl@0
   209
sl@0
   210
		inEnd = in+size;
sl@0
   211
		outStart = out;
sl@0
   212
		while(in<inEnd)
sl@0
   213
			{
sl@0
   214
			TInt b=*in++;
sl@0
   215
			if(b==marker)
sl@0
   216
				{
sl@0
   217
				*out++ = (TUint8)marker;
sl@0
   218
				b = *in++;
sl@0
   219
				}
sl@0
   220
			else if(b==byte)
sl@0
   221
				{
sl@0
   222
				*out++ = (TUint8)marker;
sl@0
   223
				--byteCount;
sl@0
   224
				}
sl@0
   225
			else if(b==(pair&0xff) && in<inEnd && *in==(pair>>8))
sl@0
   226
				{
sl@0
   227
				++in;
sl@0
   228
				b = byte;
sl@0
   229
				--pairCount;
sl@0
   230
				}
sl@0
   231
			*out++ = (TUint8)b;
sl@0
   232
			}
sl@0
   233
		ASSERT(!byteCount);
sl@0
   234
		ASSERT(!pairCount);
sl@0
   235
		size = out-outStart;
sl@0
   236
sl@0
   237
		outToggle ^= 1;
sl@0
   238
		if(outToggle)
sl@0
   239
			{
sl@0
   240
			in = dst;
sl@0
   241
			out = dst2;
sl@0
   242
			}
sl@0
   243
		else
sl@0
   244
			{
sl@0
   245
			in = dst2;
sl@0
   246
			out = dst;
sl@0
   247
			}
sl@0
   248
		}
sl@0
   249
sl@0
   250
	// sort tokens with a bubble sort...
sl@0
   251
	for(TInt x=0; x<tokenCount-1; x++)
sl@0
   252
		for(TInt y=x+1; y<tokenCount; y++)
sl@0
   253
			if(tokens[x*3]>tokens[y*3])
sl@0
   254
				{
sl@0
   255
				TInt z = tokens[x*3];
sl@0
   256
				tokens[x*3] = tokens[y*3];
sl@0
   257
				tokens[y*3] = (TUint8)z;
sl@0
   258
				z = tokens[x*3+1];
sl@0
   259
				tokens[x*3+1] = tokens[y*3+1];
sl@0
   260
				tokens[y*3+1] = (TUint8)z;
sl@0
   261
				z = tokens[x*3+2];
sl@0
   262
				tokens[x*3+2] = tokens[y*3+2];
sl@0
   263
				tokens[y*3+2] = (TUint8)z;
sl@0
   264
				}
sl@0
   265
sl@0
   266
	// check for not being able to compress...
sl@0
   267
	if(size>originalSize)
sl@0
   268
		{
sl@0
   269
		*dst++ = 0; // store zero token count
sl@0
   270
		memcpy(dst,src,originalSize); // store original data
sl@0
   271
		return originalSize+1;
sl@0
   272
		}
sl@0
   273
sl@0
   274
	// make sure data is in second buffer (dst2)
sl@0
   275
	if(in!=dst2)
sl@0
   276
		memcpy(dst2,dst,size);
sl@0
   277
sl@0
   278
	// store tokens...
sl@0
   279
	TUint8* originalDst = dst;
sl@0
   280
	*dst++ = (TUint8)tokenCount;
sl@0
   281
	if(tokenCount)
sl@0
   282
		{
sl@0
   283
		*dst++ = (TUint8)marker;
sl@0
   284
		if(tokenCount<32)
sl@0
   285
			{
sl@0
   286
			memcpy(dst,tokens,tokenCount*3);
sl@0
   287
			dst += tokenCount*3;
sl@0
   288
			}
sl@0
   289
		else
sl@0
   290
			{
sl@0
   291
			TUint8* bitMask = dst;
sl@0
   292
			memset(bitMask,0,32);
sl@0
   293
			dst += 32;
sl@0
   294
			TUint8* d=tokens;
sl@0
   295
			do
sl@0
   296
				{
sl@0
   297
				TInt t=*d++;
sl@0
   298
				bitMask[t>>3] |= (1<<(t&7));
sl@0
   299
				*dst++ = *d++;
sl@0
   300
				*dst++ = *d++;
sl@0
   301
				}
sl@0
   302
			while(--tokenCount);
sl@0
   303
			}
sl@0
   304
		}
sl@0
   305
	// store data...
sl@0
   306
	memcpy(dst,dst2,size);
sl@0
   307
	dst += size;
sl@0
   308
sl@0
   309
	// get stats...
sl@0
   310
	++GlobalTokenCounts[tokenCount];
sl@0
   311
sl@0
   312
	// return total size of compressed data...
sl@0
   313
	return dst-originalDst;
sl@0
   314
	}
sl@0
   315
sl@0
   316
#endif
sl@0
   317
#endif