1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/kernelhwsrv/kernel/eka/include/byte_pair_compress.h Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,317 @@
1.4 +// Copyright (c) 2007-2009 Nokia Corporation and/or its subsidiary(-ies).
1.5 +// All rights reserved.
1.6 +// This component and the accompanying materials are made available
1.7 +// under the terms of the License "Eclipse Public License v1.0"
1.8 +// which accompanies this distribution, and is available
1.9 +// at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.10 +//
1.11 +// Initial Contributors:
1.12 +// Nokia Corporation - initial contribution.
1.13 +//
1.14 +// Contributors:
1.15 +//
1.16 +// Description:
1.17 +// e32\include\byte_pair_compress.h
1.18 +// This header file contains both the prototype and implementation for the byte pair compressor.
1.19 +// This is done to facilitate sharing the code between the e32 test code and the tools. The
1.20 +// implementation is only included if the macro BYTE_PAIR_COMPRESS_INCLUDE_IMPLEMENTATION is
1.21 +// defined.
1.22 +//
1.23 +// WARNING: This file contains some APIs which are internal and are subject
1.24 +// to change without notice. Such APIs should therefore not be used
1.25 +// outside the Kernel and Hardware Services package.
1.26 +//
1.27 +
1.28 +#ifndef __BYTE_PAIR_COMPRESS_H__
1.29 +#define __BYTE_PAIR_COMPRESS_H__
1.30 +
1.31 +#include <e32std.h>
1.32 +
1.33 +/**
1.34 + * @internalTechnology
1.35 + *
1.36 + * Compress up to 4096 bytes of data usign the byte-pair compression algorithm.
1.37 + *
1.38 + * @param aDest Destination buffer, must be at least four times the size of the source data.
1.39 + * @param aSrc Source data to compress.
1.40 + * @param aSize The size of the source data.
1.41 + */
1.42 +extern TInt BytePairCompress(TUint8* aDest, TUint8* aSrc, TInt aSize);
1.43 +
1.44 +#ifdef BYTE_PAIR_COMPRESS_INCLUDE_IMPLEMENTATION
1.45 +
1.46 +const TInt BlockSize = 0x1000;
1.47 +TInt ExtraSave = 0;
1.48 +
1.49 +TUint16 PairCount[0x10000];
1.50 +TUint16 PairBuffer[BlockSize*2];
1.51 +
1.52 +TUint16 GlobalPairs[0x10000] = {0};
1.53 +TUint16 GlobalTokenCounts[0x100] = {0};
1.54 +
1.55 +TUint16 ByteCount[0x100+4];
1.56 +
1.57 +void CountBytes(TUint8* data, TInt size)
1.58 + {
1.59 + memset(ByteCount,0,sizeof(ByteCount));
1.60 + TUint8* dataEnd = data+size;
1.61 + while(data<dataEnd)
1.62 + ++ByteCount[*data++];
1.63 + }
1.64 +
1.65 +inline void ByteUsed(TInt b)
1.66 + {
1.67 + ByteCount[b] = 0xffff;
1.68 + }
1.69 +
1.70 +int TieBreak(int b1,int b2)
1.71 + {
1.72 + return -ByteCount[b1]-ByteCount[b2];
1.73 + }
1.74 +
1.75 +TInt MostCommonPair(TInt& pair, TUint8* data, TInt size, TInt minFrequency, TInt marker)
1.76 + {
1.77 + memset(PairCount,0,sizeof(PairCount));
1.78 + TUint8* dataEnd = data+size-1;
1.79 + TInt pairsFound = 0;
1.80 + TInt lastPair = -1;
1.81 + while(data<dataEnd)
1.82 + {
1.83 + TInt b1 = *data++;
1.84 + if(b1==marker)
1.85 + {
1.86 + // skip marker and following byte
1.87 + lastPair = -1;
1.88 + ++data;
1.89 + continue;
1.90 + }
1.91 + TInt b2 = *data;
1.92 + if(b2==marker)
1.93 + {
1.94 + // skip marker and following byte
1.95 + lastPair = -1;
1.96 + data+=2;
1.97 + continue;
1.98 + }
1.99 + TInt p = (b2<<8)|b1;
1.100 + if(p==lastPair)
1.101 + {
1.102 + // ensure a pair of identical bytes don't get double counted
1.103 + lastPair = -1;
1.104 + continue;
1.105 + }
1.106 + lastPair = p;
1.107 + ++PairCount[p];
1.108 + if(PairCount[p]==minFrequency)
1.109 + PairBuffer[pairsFound++] = (TUint16)p;
1.110 + }
1.111 +
1.112 + TInt bestCount = -1;
1.113 + TInt bestPair = -1;
1.114 + TInt bestTieBreak = 0;
1.115 + TInt p;
1.116 + while(pairsFound--)
1.117 + {
1.118 + p = PairBuffer[pairsFound];
1.119 + TInt f=PairCount[p];
1.120 + if(f>bestCount)
1.121 + {
1.122 + bestCount = f;
1.123 + bestPair = p;
1.124 + bestTieBreak = TieBreak(p&0xff,p>>8);
1.125 + }
1.126 + else if(f==bestCount)
1.127 + {
1.128 + TInt tieBreak = TieBreak(p&0xff,p>>8);
1.129 + if(tieBreak>bestTieBreak)
1.130 + {
1.131 + bestCount = f;
1.132 + bestPair = p;
1.133 + bestTieBreak = tieBreak;
1.134 + }
1.135 + }
1.136 + }
1.137 + pair = bestPair;
1.138 + return bestCount;
1.139 + }
1.140 +
1.141 +TInt LeastCommonByte(TInt& byte)
1.142 + {
1.143 + TInt bestCount = 0xffff;
1.144 + TInt bestByte = -1;
1.145 + for(TInt b = 0; b<0x100; b++)
1.146 + {
1.147 + TInt f = ByteCount[b];
1.148 + if(f<bestCount)
1.149 + {
1.150 + bestCount = f;
1.151 + bestByte = b;
1.152 + }
1.153 + }
1.154 + byte = bestByte;
1.155 + return bestCount;
1.156 + }
1.157 +
1.158 +TInt BytePairCompress(TUint8* dst, TUint8* src, TInt size)
1.159 + {
1.160 + TInt originalSize = size;
1.161 + TUint8* dst2 = dst+size*2;
1.162 + TUint8* in = src;
1.163 + TUint8* out = dst;
1.164 +
1.165 + TUint8 tokens[0x100*3];
1.166 + TInt tokenCount = 0;
1.167 +
1.168 + CountBytes(in,size);
1.169 +
1.170 + TInt marker = -1;
1.171 + TInt overhead = 1+3+LeastCommonByte(marker);
1.172 + ByteUsed(marker);
1.173 +
1.174 + TUint8* inEnd = in+size;
1.175 + TUint8* outStart = out;
1.176 + while(in<inEnd)
1.177 + {
1.178 + TInt b=*in++;
1.179 + if(b==marker)
1.180 + *out++ = (TUint8)b;
1.181 + *out++ = (TUint8)b;
1.182 + }
1.183 + size = out-outStart;
1.184 +
1.185 + TInt outToggle = 1;
1.186 + in = dst;
1.187 + out = dst2;
1.188 +
1.189 + for(TInt r=256; r>0; --r)
1.190 + {
1.191 + TInt byte;
1.192 + TInt byteCount = LeastCommonByte(byte);
1.193 + TInt pair;
1.194 + TInt pairCount = MostCommonPair(pair,in,size,overhead+1,marker);
1.195 + TInt saving = pairCount-byteCount;
1.196 + if(saving<=overhead)
1.197 + break;
1.198 +
1.199 + overhead = 3;
1.200 + if(tokenCount>=32)
1.201 + overhead = 2;
1.202 +
1.203 + TUint8* d=tokens+3*tokenCount;
1.204 + ++tokenCount;
1.205 + *d++ = (TUint8)byte;
1.206 + ByteUsed(byte);
1.207 + *d++ = (TUint8)pair;
1.208 + ByteUsed(pair&0xff);
1.209 + *d++ = (TUint8)(pair>>8);
1.210 + ByteUsed(pair>>8);
1.211 + ++GlobalPairs[pair];
1.212 +
1.213 + inEnd = in+size;
1.214 + outStart = out;
1.215 + while(in<inEnd)
1.216 + {
1.217 + TInt b=*in++;
1.218 + if(b==marker)
1.219 + {
1.220 + *out++ = (TUint8)marker;
1.221 + b = *in++;
1.222 + }
1.223 + else if(b==byte)
1.224 + {
1.225 + *out++ = (TUint8)marker;
1.226 + --byteCount;
1.227 + }
1.228 + else if(b==(pair&0xff) && in<inEnd && *in==(pair>>8))
1.229 + {
1.230 + ++in;
1.231 + b = byte;
1.232 + --pairCount;
1.233 + }
1.234 + *out++ = (TUint8)b;
1.235 + }
1.236 + ASSERT(!byteCount);
1.237 + ASSERT(!pairCount);
1.238 + size = out-outStart;
1.239 +
1.240 + outToggle ^= 1;
1.241 + if(outToggle)
1.242 + {
1.243 + in = dst;
1.244 + out = dst2;
1.245 + }
1.246 + else
1.247 + {
1.248 + in = dst2;
1.249 + out = dst;
1.250 + }
1.251 + }
1.252 +
1.253 + // sort tokens with a bubble sort...
1.254 + for(TInt x=0; x<tokenCount-1; x++)
1.255 + for(TInt y=x+1; y<tokenCount; y++)
1.256 + if(tokens[x*3]>tokens[y*3])
1.257 + {
1.258 + TInt z = tokens[x*3];
1.259 + tokens[x*3] = tokens[y*3];
1.260 + tokens[y*3] = (TUint8)z;
1.261 + z = tokens[x*3+1];
1.262 + tokens[x*3+1] = tokens[y*3+1];
1.263 + tokens[y*3+1] = (TUint8)z;
1.264 + z = tokens[x*3+2];
1.265 + tokens[x*3+2] = tokens[y*3+2];
1.266 + tokens[y*3+2] = (TUint8)z;
1.267 + }
1.268 +
1.269 + // check for not being able to compress...
1.270 + if(size>originalSize)
1.271 + {
1.272 + *dst++ = 0; // store zero token count
1.273 + memcpy(dst,src,originalSize); // store original data
1.274 + return originalSize+1;
1.275 + }
1.276 +
1.277 + // make sure data is in second buffer (dst2)
1.278 + if(in!=dst2)
1.279 + memcpy(dst2,dst,size);
1.280 +
1.281 + // store tokens...
1.282 + TUint8* originalDst = dst;
1.283 + *dst++ = (TUint8)tokenCount;
1.284 + if(tokenCount)
1.285 + {
1.286 + *dst++ = (TUint8)marker;
1.287 + if(tokenCount<32)
1.288 + {
1.289 + memcpy(dst,tokens,tokenCount*3);
1.290 + dst += tokenCount*3;
1.291 + }
1.292 + else
1.293 + {
1.294 + TUint8* bitMask = dst;
1.295 + memset(bitMask,0,32);
1.296 + dst += 32;
1.297 + TUint8* d=tokens;
1.298 + do
1.299 + {
1.300 + TInt t=*d++;
1.301 + bitMask[t>>3] |= (1<<(t&7));
1.302 + *dst++ = *d++;
1.303 + *dst++ = *d++;
1.304 + }
1.305 + while(--tokenCount);
1.306 + }
1.307 + }
1.308 + // store data...
1.309 + memcpy(dst,dst2,size);
1.310 + dst += size;
1.311 +
1.312 + // get stats...
1.313 + ++GlobalTokenCounts[tokenCount];
1.314 +
1.315 + // return total size of compressed data...
1.316 + return dst-originalDst;
1.317 + }
1.318 +
1.319 +#endif
1.320 +#endif