1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/persistentdata/persistentstorage/store/pcstore/src/unicodecompression.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,746 @@
1.4 +// Copyright (c) 2006-2009 Nokia Corporation and/or its subsidiary(-ies).
1.5 +// All rights reserved.
1.6 +// This component and the accompanying materials are made available
1.7 +// under the terms of "Eclipse Public License v1.0"
1.8 +// which accompanies this distribution, and is available
1.9 +// at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.10 +//
1.11 +// Initial Contributors:
1.12 +// Nokia Corporation - initial contribution.
1.13 +//
1.14 +// Contributors:
1.15 +//
1.16 +// Description:
1.17 +// Classes implemented in this file are used for Unicode compression and decompression.
1.18 +// Their code is borrowed from Symbian, only with some changes such as the "Panic" function
1.19 +// is changed to exit the program. The Symbian coding standard will be kept in the code.
1.20 +//
1.21 +//
1.22 +
1.23 +#include <stdlib.h>
1.24 +#include "unicodecompression.h"
1.25 +
1.26 +namespace PCStore
1.27 +{
1.28 +const TUint32 TUnicodeCompressionState::iStaticWindow[EStaticWindows] =
1.29 + {
1.30 + 0x0000, // tags
1.31 + 0x0080, // Latin-1 supplement
1.32 + 0x0100, // Latin Extended-A
1.33 + 0x0300, // Combining Diacritics
1.34 + 0x2000, // General Punctuation
1.35 + 0x2080, // Currency Symbols
1.36 + 0x2100, // Letterlike Symbols and Number Forms
1.37 + 0x3000 // CJK Symbols and Punctuation
1.38 + };
1.39 +
1.40 +const TUint32 TUnicodeCompressionState::iDynamicWindowDefault[EDynamicWindows] =
1.41 + {
1.42 + 0x0080, // Latin-1 supplement
1.43 + 0x00C0, // parts of Latin-1 supplement and Latin Extended-A
1.44 + 0x0400, // Cyrillic
1.45 + 0x0600, // Arabic
1.46 + 0x0900, // Devanagari
1.47 + 0x3040, // Hiragana
1.48 + 0x30A0, // Katakana
1.49 + 0xFF00 // Fullwidth ASCII
1.50 + };
1.51 +
1.52 +const TUint16 TUnicodeCompressionState::iSpecialBase[ESpecialBases] =
1.53 + {
1.54 + 0x00C0, // Latin 1 letters (not symbols) and some of Extended-A
1.55 + 0x0250, // IPA extensions
1.56 + 0x0370, // Greek
1.57 + 0x0530, // Armenian
1.58 + 0x3040, // Hiragana
1.59 + 0x30A0, // Katakana
1.60 + 0xFF60 // Halfwidth katakana
1.61 + };
1.62 +
1.63 +// Single-byte mode tag values
1.64 +const TUint8 SQ0 = 0x01; // <byte> quote from window 0
1.65 +const TUint8 SDX = 0x0B; // <hbyte> <lbyte> define window in expansion area
1.66 +const TUint8 SQU = 0x0E; // <hbyte> <lbyte> quote Unicode value
1.67 +const TUint8 SCU = 0x0F; // switch to Unicode mode
1.68 +const TUint8 SC0 = 0x10; // select dynamic window 0
1.69 +const TUint8 SD0 = 0x18; // <byte> set dynamic window 0 index to <byte> and select it
1.70 +
1.71 +// Unicode mode tag values
1.72 +const TUint8 UC0 = 0xE0; // select dynamic window 0 and switch to single-byte mode
1.73 +const TUint8 UD0 = 0xE8; // <byte> set dynamic window 0 index to <byte>, select it and switch to
1.74 + // single-byte mode
1.75 +const TUint8 UQU = 0xF0; // <hbyte>, <lbyte> quote Unicode value
1.76 +const TUint8 UDX = 0xF1; // <hbyte>, <lbyte> define window in expansion area and switch to single-byte mode
1.77 +
1.78 +TUnicodeCompressionState::TUnicodeCompressionState():
1.79 + iUnicodeWords(0),
1.80 + iMaxUnicodeWords(0),
1.81 + iCompressedBytes(0),
1.82 + iMaxCompressedBytes(0)
1.83 + {
1.84 + Reset();
1.85 + }
1.86 +
1.87 +void TUnicodeCompressionState::Reset()
1.88 + {
1.89 + iUnicodeMode = false;
1.90 + iActiveWindowBase = 0x0080;
1.91 + for (int i = 0; i < EDynamicWindows; i++)
1.92 + iDynamicWindow[i] = iDynamicWindowDefault[i];
1.93 + }
1.94 +
1.95 +
1.96 +// Return the index of the static window that contains this code, if any, or -1 if there is none.
1.97 +TInt TUnicodeCompressionState::StaticWindowIndex(TUint16 aCode)
1.98 + {
1.99 + for (TInt i = 0; i < EStaticWindows; i++)
1.100 + if (aCode >= iStaticWindow[i] && aCode < iStaticWindow[i] + 128)
1.101 + return i;
1.102 + return -1;
1.103 + }
1.104 +
1.105 +/*
1.106 +If aCode can be accommodated in one of the legal dynamic windows, return the index of that window
1.107 +in the offset table. If not return KErrNotFound.
1.108 +*/
1.109 +TInt TUnicodeCompressionState::DynamicWindowOffsetIndex(TUint16 aCode)
1.110 + {
1.111 + if (aCode < 0x0080)
1.112 + return KErrNotFound;
1.113 + if (aCode >= 0x3400 && aCode <= 0xDFFF)
1.114 + return KErrNotFound;
1.115 +
1.116 + /*
1.117 + Prefer sections that cross half-block boundaries. These are better adapted to actual text.
1.118 + They are represented by offset indices 0xf9..0xff.
1.119 + */
1.120 + for (int i = 0; i < ESpecialBases; i++)
1.121 + if (aCode >= iSpecialBase[i] && aCode < iSpecialBase[i] + 128)
1.122 + return 0xF9 + i;
1.123 +
1.124 + /*
1.125 + Offset indices 0x01..0x67 represent half blocks from 0x0080 to 0x3380 and
1.126 + 0x68..0xA7 represent half blocks from 0xE000 to 0xFF80.
1.127 + */
1.128 + if (aCode >= 0xE000)
1.129 + aCode -= 0xAC00;
1.130 + return aCode / 0x80;
1.131 + }
1.132 +
1.133 +// Return the base of the window represented by offset index <n>. Return 0 if the offset index is illegal.
1.134 +TUint32 TUnicodeCompressionState::DynamicWindowBase(TInt aOffsetIndex)
1.135 + {
1.136 + if (aOffsetIndex >= 0xF9 && aOffsetIndex <= 0xFF)
1.137 + {
1.138 + /*
1.139 + WARNING: don't optimise the following two lines by replacing them with
1.140 + 'return iSpecialBase[aOffsetIndex - 0xF9];'. To do so would re-introduce an error
1.141 + in ARM builds caused by optimisation and consequent erroneous fixing up
1.142 + of the array base: see defect EDNGASR-4AGJQX in ER5U defects.
1.143 + */
1.144 + int special_base_index = aOffsetIndex - 0xF9;
1.145 + return iSpecialBase[special_base_index];
1.146 + }
1.147 + if (aOffsetIndex >= 0x01 && aOffsetIndex <= 0x67)
1.148 + return aOffsetIndex * 0x80;
1.149 + if (aOffsetIndex >= 0x68 && aOffsetIndex <= 0xA7)
1.150 + return aOffsetIndex * 0x80 + 0xAC00;
1.151 + return 0;
1.152 + }
1.153 +
1.154 +TBool TUnicodeCompressionState::EncodeAsIs(TUint16 aCode)
1.155 + {
1.156 + return aCode == 0x0000 || aCode == 0x0009 || aCode == 0x000A || aCode == 0x000D ||
1.157 + (aCode >= 0x0020 && aCode <= 0x007F);
1.158 + }
1.159 +
1.160 +void TUnicodeCompressionState::Panic(TPanic aPanic)
1.161 + {
1.162 + exit(aPanic);
1.163 + }
1.164 +
1.165 +TUnicodeCompressor::TUnicodeCompressor():
1.166 + iInputBufferStart(0),
1.167 + iInputBufferSize(0),
1.168 + iOutputBufferStart(0),
1.169 + iOutputBufferSize(0),
1.170 + iDynamicWindowIndex(0),
1.171 + iOutputStream(NULL),
1.172 + iOutputPointer(NULL),
1.173 + iInput(NULL)
1.174 + {
1.175 + }
1.176 +
1.177 +void TUnicodeCompressor::CompressL(CStoreWriteStream& aOutput,MUnicodeSource& aInput,
1.178 + TInt aMaxOutputBytes,TInt aMaxInputWords,
1.179 + TInt* aOutputBytes,TInt* aInputWords)
1.180 + {
1.181 + DoCompressL(&aOutput,NULL,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords);
1.182 + }
1.183 +
1.184 +void TUnicodeCompressor::CompressL(TUint8* aOutput,MUnicodeSource& aInput,
1.185 + TInt aMaxOutputBytes,TInt aMaxInputWords,
1.186 + TInt* aOutputBytes,TInt* aInputWords)
1.187 + {
1.188 + DoCompressL(NULL,aOutput,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords);
1.189 + }
1.190 +
1.191 +TInt TUnicodeCompressor::FlushL(CStoreWriteStream& aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes)
1.192 + {
1.193 + DoCompressL(&aOutput,NULL,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL);
1.194 + return iOutputBufferSize;
1.195 + }
1.196 +
1.197 +TInt TUnicodeCompressor::FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes)
1.198 + {
1.199 + DoCompressL(NULL,aOutput,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL);
1.200 + return iOutputBufferSize;
1.201 + }
1.202 +
1.203 +TInt TUnicodeCompressor::CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords)
1.204 + {
1.205 + TInt bytes;
1.206 + TUnicodeCompressor c;
1.207 + c.DoCompressL(NULL,NULL,&aInput,KMaxTInt,aInputWords,&bytes,NULL);
1.208 + return bytes;
1.209 + }
1.210 +
1.211 +// Compress until input or output is exhausted or an exception occurs.
1.212 +void TUnicodeCompressor::DoCompressL(CStoreWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput,
1.213 + TInt aMaxOutputBytes,TInt aMaxInputWords,
1.214 + TInt* aOutputBytes,TInt* aInputWords)
1.215 + {
1.216 + iOutputStream = aOutputStream;
1.217 + iOutputPointer = aOutputPointer;
1.218 + iInput = aInput;
1.219 + iMaxCompressedBytes = aMaxOutputBytes;
1.220 + iMaxUnicodeWords = aMaxInputWords;
1.221 + iCompressedBytes = iUnicodeWords = 0;
1.222 + FlushOutputBufferL();
1.223 + if (iInput)
1.224 + {
1.225 + while (iUnicodeWords < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes)
1.226 + {
1.227 + TUint16 x = iInput->ReadUnicodeValueL();
1.228 + TAction action(x);
1.229 + iInputBuffer[(iInputBufferStart + iInputBufferSize) % EMaxInputBufferSize] = action;
1.230 + iInputBufferSize++;
1.231 + iUnicodeWords++;
1.232 + if (iInputBufferSize == EMaxInputBufferSize)
1.233 + WriteRunL();
1.234 + }
1.235 + }
1.236 + FlushInputBufferL();
1.237 + if (aOutputBytes)
1.238 + *aOutputBytes = iCompressedBytes;
1.239 + if (aInputWords)
1.240 + *aInputWords = iUnicodeWords;
1.241 + }
1.242 +
1.243 +TUnicodeCompressor::TAction::TAction(TUint16 aCode):
1.244 + iCode(aCode)
1.245 + {
1.246 + if (TUnicodeCompressionState::EncodeAsIs(aCode))
1.247 + iTreatment = EPlainASCII;
1.248 + else
1.249 + {
1.250 + iTreatment = TUnicodeCompressionState::DynamicWindowOffsetIndex(aCode);
1.251 + if (iTreatment == -1)
1.252 + {
1.253 + iTreatment = TUnicodeCompressionState::StaticWindowIndex(aCode);
1.254 + if (iTreatment == -1)
1.255 + iTreatment = EPlainUnicode;
1.256 + else
1.257 + iTreatment += EFirstStatic;
1.258 + }
1.259 + }
1.260 + }
1.261 +
1.262 +void TUnicodeCompressor::WriteCharacterFromBuffer()
1.263 + {
1.264 + const TAction& action = iInputBuffer[iInputBufferStart];
1.265 + iInputBufferSize--;
1.266 + iInputBufferStart = (iInputBufferStart + 1) % EMaxInputBufferSize;
1.267 + WriteCharacter(action);
1.268 + }
1.269 +
1.270 +void TUnicodeCompressor::FlushInputBufferL()
1.271 + {
1.272 + while (iInputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes)
1.273 + WriteRunL();
1.274 + }
1.275 +
1.276 +void TUnicodeCompressor::WriteRunL()
1.277 + {
1.278 + // Write out any leading characters that can be passed through.
1.279 + if (!iUnicodeMode)
1.280 + while (iInputBufferSize > 0)
1.281 + {
1.282 + const TAction& action = iInputBuffer[iInputBufferStart];
1.283 + if (action.iTreatment == TAction::EPlainASCII ||
1.284 + (action.iCode >= iActiveWindowBase && action.iCode < iActiveWindowBase + 128))
1.285 + WriteCharacterFromBuffer();
1.286 + else
1.287 + break;
1.288 + }
1.289 +
1.290 + // Write a run of characters that cannot be passed through.
1.291 + int i;
1.292 + if (iInputBufferSize > 0)
1.293 + {
1.294 + /*
1.295 + Find a run of characters with the same treatment and select that treatment
1.296 + if the run has more than one character.
1.297 + */
1.298 + int treatment = iInputBuffer[iInputBufferStart].iTreatment;
1.299 + int next_treatment = treatment;
1.300 + int run_size = 1;
1.301 + for (i = 1; i < iInputBufferSize; i++)
1.302 + {
1.303 + int index = (iInputBufferStart + i) % EMaxInputBufferSize;
1.304 + next_treatment = iInputBuffer[index].iTreatment;
1.305 + if (next_treatment != treatment)
1.306 + break;
1.307 + run_size++;
1.308 + }
1.309 + if (run_size > 1)
1.310 + SelectTreatment(treatment);
1.311 + for (i = 0; i < run_size; i++)
1.312 + WriteCharacterFromBuffer();
1.313 + }
1.314 +
1.315 + FlushOutputBufferL();
1.316 + }
1.317 +
1.318 +void TUnicodeCompressor::FlushOutputBufferL()
1.319 + {
1.320 + while (iOutputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes)
1.321 + {
1.322 + TUint8 byte = iOutputBuffer[iOutputBufferStart];
1.323 + if (iOutputPointer)
1.324 + *iOutputPointer++ = byte;
1.325 + else if (iOutputStream)
1.326 + iOutputStream->WriteUint8(byte);
1.327 + iCompressedBytes++;
1.328 + iOutputBufferSize--;
1.329 + iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize;
1.330 + }
1.331 + }
1.332 +
1.333 +void TUnicodeCompressor::SelectTreatment(TInt aTreatment)
1.334 + {
1.335 + if (aTreatment == TAction::EPlainUnicode)
1.336 + {
1.337 + // Switch to Unicode mode if not there already.
1.338 + if (!iUnicodeMode)
1.339 + {
1.340 + WriteByte(SCU);
1.341 + iUnicodeMode = true;
1.342 + }
1.343 + return;
1.344 + }
1.345 +
1.346 + if (aTreatment == TAction::EPlainASCII)
1.347 + {
1.348 + // Switch to single-byte mode, using the current dynamic window, if not there already.
1.349 + if (iUnicodeMode)
1.350 + {
1.351 + WriteByte(UC0 + iDynamicWindowIndex);
1.352 + iUnicodeMode = false;
1.353 + }
1.354 + return;
1.355 + }
1.356 +
1.357 + if (aTreatment >= TAction::EFirstDynamic && aTreatment <= TAction::ELastDynamic)
1.358 + {
1.359 + TUint32 base = DynamicWindowBase(aTreatment);
1.360 +
1.361 + // Switch to the appropriate dynamic window if it is available; if not, redefine and select dynamic window 4.
1.362 + for (int i = 0; i < EDynamicWindows; i++)
1.363 + if (base == iDynamicWindow[i])
1.364 + {
1.365 + if (iUnicodeMode)
1.366 + WriteByte(UC0 + i);
1.367 + else if (i != iDynamicWindowIndex)
1.368 + WriteByte(SC0 + i);
1.369 + iUnicodeMode = false;
1.370 + iDynamicWindowIndex = i;
1.371 + iActiveWindowBase = base;
1.372 + return;
1.373 + }
1.374 + if (iUnicodeMode)
1.375 + WriteByte(UD0 + 4);
1.376 + else
1.377 + WriteByte(SD0 + 4);
1.378 + iDynamicWindowIndex = 4;
1.379 + iUnicodeMode = false;
1.380 + WriteByte(aTreatment);
1.381 + iDynamicWindow[4] = base;
1.382 + iActiveWindowBase = base;
1.383 + return;
1.384 + }
1.385 + }
1.386 +
1.387 +// Write a character without changing mode or window.
1.388 +void TUnicodeCompressor::WriteCharacter(const TAction& aAction)
1.389 + {
1.390 + if (iUnicodeMode)
1.391 + WriteUCharacter(aAction.iCode);
1.392 + else
1.393 + WriteSCharacter(aAction);
1.394 + }
1.395 +
1.396 +void TUnicodeCompressor::WriteUCharacter(TUint16 aCode)
1.397 + {
1.398 + // Emit the 'quote Unicode' tag if the character would conflict with a tag.
1.399 + if (aCode >= 0xE000 && aCode <= 0xF2FF)
1.400 + WriteByte(UQU);
1.401 +
1.402 + // Write the Unicode value big-end first.
1.403 + WriteByte((aCode >> 8) & 0xFF);
1.404 + WriteByte(aCode & 0xFF);
1.405 + }
1.406 +
1.407 +void TUnicodeCompressor::WriteByte(TUint aByte)
1.408 + {
1.409 + if (iOutputBufferSize >= EMaxOutputBufferSize)
1.410 + Panic(EOutputBufferOverflow);
1.411 + iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = (TUint8)aByte;
1.412 + iOutputBufferSize++;
1.413 + }
1.414 +
1.415 +void TUnicodeCompressor::WriteSCharacter(const TAction& aAction)
1.416 + {
1.417 + // Characters in the range 0x0020..0x007F, plus nul, tab, cr, and lf, can be emitted as their low bytes.
1.418 + if (aAction.iTreatment == TAction::EPlainASCII)
1.419 + {
1.420 + WriteByte(aAction.iCode);
1.421 + return;
1.422 + }
1.423 +
1.424 + // Characters in a static window can be written using SQ<n> plus a byte in the range 0x00-0x7F
1.425 + if (aAction.iTreatment >= TAction::EFirstStatic && aAction.iTreatment <= TAction::ELastStatic)
1.426 + {
1.427 + int window = aAction.iTreatment - TAction::EFirstStatic;
1.428 + WriteByte(SQ0 + window);
1.429 + WriteByte(aAction.iCode);
1.430 + return;
1.431 + }
1.432 +
1.433 + // Characters in the current dynamic window can be written as a byte in the range 0x80-0xFF.
1.434 + if (aAction.iCode >= iActiveWindowBase && aAction.iCode < iActiveWindowBase + 128)
1.435 + {
1.436 + WriteByte(aAction.iCode - iActiveWindowBase + 0x80);
1.437 + return;
1.438 + }
1.439 +
1.440 + // Characters in another dynamic window can be written using SQ<n> plus a byte in the range 0x80-0xFF
1.441 + int i;
1.442 + for (i = 0; i < EDynamicWindows; i++)
1.443 + if (aAction.iCode >= iDynamicWindow[i] && aAction.iCode < iDynamicWindow[i] + 128)
1.444 + {
1.445 + WriteByte(SQ0 + i);
1.446 + WriteByte(aAction.iCode - iDynamicWindow[i] + 0x80);
1.447 + return;
1.448 + }
1.449 +
1.450 + // Other characters can be quoted.
1.451 + WriteByte(SQU);
1.452 + WriteByte((aAction.iCode >> 8) & 0xFF);
1.453 + WriteByte(aAction.iCode & 0xFF);
1.454 + return;
1.455 + }
1.456 +
1.457 +
1.458 +TUnicodeExpander::TUnicodeExpander():
1.459 + iInputBufferStart(0),
1.460 + iInputBufferSize(0),
1.461 + iOutputBufferStart(0),
1.462 + iOutputBufferSize(0),
1.463 + iOutput(NULL),
1.464 + iInputStream(NULL),
1.465 + iInputPointer(NULL)
1.466 + {
1.467 + }
1.468 +
1.469 +void TUnicodeExpander::ExpandL(MUnicodeSink& aOutput,CStoreReadStream& aInput,
1.470 + TInt aMaxOutputWords,TInt aMaxInputBytes,
1.471 + TInt* aOutputWords,TInt* aInputBytes)
1.472 + {
1.473 + DoExpandL(&aOutput,&aInput,NULL,aMaxOutputWords,aMaxInputBytes,aOutputWords,aInputBytes);
1.474 + }
1.475 +
1.476 +void TUnicodeExpander::ExpandL(MUnicodeSink& aOutput,const TUint8* aInput,
1.477 + TInt aMaxOutputWords,TInt aMaxInputBytes,
1.478 + TInt* aOutputWords,TInt* aInputBytes)
1.479 + {
1.480 + DoExpandL(&aOutput,NULL,aInput,aMaxOutputWords,aMaxInputBytes,aOutputWords,aInputBytes);
1.481 + }
1.482 +
1.483 +TInt TUnicodeExpander::FlushL(MUnicodeSink& aOutput,TInt aMaxOutputWords,TInt& aOutputWords)
1.484 + {
1.485 + DoExpandL(&aOutput,NULL,NULL,aMaxOutputWords,0,&aOutputWords,NULL);
1.486 + return iOutputBufferSize;
1.487 + }
1.488 +
1.489 +TInt TUnicodeExpander::ExpandedSizeL(CStoreReadStream& aInput,TInt aInputBytes)
1.490 + {
1.491 + TInt words;
1.492 + TUnicodeExpander e;
1.493 + e.DoExpandL(NULL,&aInput,NULL,KMaxTInt,aInputBytes,&words,NULL);
1.494 + return words;
1.495 + }
1.496 +
1.497 +TInt TUnicodeExpander::ExpandedSizeL(const TUint8* aInput,TInt aInputBytes)
1.498 + {
1.499 + TInt words;
1.500 + TUnicodeExpander e;
1.501 + e.DoExpandL(NULL,NULL,aInput,KMaxTInt,aInputBytes,&words,NULL);
1.502 + return words;
1.503 + }
1.504 +
1.505 +// Expand until input or output is exhausted or an exception occurs.
1.506 +void TUnicodeExpander::DoExpandL(MUnicodeSink* aOutput,CStoreReadStream* aInputStream,const TUint8* aInputPointer,
1.507 + TInt aMaxOutputWords,TInt aMaxInputBytes,
1.508 + TInt* aOutputWords,TInt* aInputBytes)
1.509 + {
1.510 + iOutput = aOutput;
1.511 + iInputStream = aInputStream;
1.512 + iInputPointer = aInputPointer;
1.513 + iMaxUnicodeWords = aMaxOutputWords;
1.514 + iMaxCompressedBytes = aMaxInputBytes;
1.515 + iUnicodeWords = iCompressedBytes = 0;
1.516 + iInputBufferStart = 0;
1.517 + FlushOutputBufferL();
1.518 + if (iInputPointer || iInputStream)
1.519 + {
1.520 + while (iUnicodeWords + iOutputBufferSize < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes)
1.521 + HandleByteL();
1.522 + }
1.523 + if (aOutputWords)
1.524 + *aOutputWords = iUnicodeWords;
1.525 + if (aInputBytes)
1.526 + *aInputBytes = iCompressedBytes;
1.527 + }
1.528 +
1.529 +void TUnicodeExpander::HandleByteL()
1.530 + {
1.531 + TUint8 byte;
1.532 + TBool handled = false;
1.533 + if (ReadByteL(byte))
1.534 + {
1.535 + if (iUnicodeMode)
1.536 + handled = HandleUByteL(byte);
1.537 + else
1.538 + handled = HandleSByteL(byte);
1.539 + }
1.540 + iInputBufferStart = 0;
1.541 + if (handled)
1.542 + iInputBufferSize = 0;
1.543 + FlushOutputBufferL();
1.544 + }
1.545 +
1.546 +void TUnicodeExpander::FlushOutputBufferL()
1.547 + {
1.548 + while (iOutputBufferSize > 0 && iUnicodeWords < iMaxUnicodeWords)
1.549 + {
1.550 + if (iOutput)
1.551 + iOutput->WriteUnicodeValueL(iOutputBuffer[iOutputBufferStart]);
1.552 + iUnicodeWords++;
1.553 + iOutputBufferSize--;
1.554 + iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize;
1.555 + }
1.556 + }
1.557 +
1.558 +TBool TUnicodeExpander::HandleSByteL(TUint8 aByte)
1.559 + {
1.560 + // 'Pass-through' codes.
1.561 + if (TUnicodeCompressionState::EncodeAsIs(aByte))
1.562 + {
1.563 + WriteChar(aByte);
1.564 + return true;
1.565 + }
1.566 +
1.567 + // Codes 0x80-0xFF select a character from the active window.
1.568 + if (aByte >= 0x80)
1.569 + {
1.570 + WriteChar32(iActiveWindowBase + aByte - 0x80);
1.571 + return true;
1.572 + }
1.573 +
1.574 + // SQU: quote a Unicode character.
1.575 + if (aByte == SQU)
1.576 + return QuoteUnicodeL();
1.577 +
1.578 + // SCU: switch to Unicode mode.
1.579 + if (aByte == SCU)
1.580 + {
1.581 + iUnicodeMode = true;
1.582 + return true;
1.583 + }
1.584 +
1.585 + // SQn: quote from window n.
1.586 + if (aByte >= SQ0 && aByte <= SQ0 + 7)
1.587 + {
1.588 + int window = aByte - SQ0;
1.589 + TUint8 byte;
1.590 + if (ReadByteL(byte))
1.591 + {
1.592 + TUint32 c = byte;
1.593 + if (c <= 0x7F)
1.594 + c += iStaticWindow[window];
1.595 + else
1.596 + c += iDynamicWindow[window] - 0x80;
1.597 + WriteChar32(c);
1.598 + return true;
1.599 + }
1.600 + else
1.601 + return false;
1.602 + }
1.603 +
1.604 + // SCn: switch to dynamic window n.
1.605 + if (aByte >= SC0 && aByte <= SC0 + 7)
1.606 + {
1.607 + iActiveWindowBase = iDynamicWindow[aByte - SC0];
1.608 + return true;
1.609 + }
1.610 +
1.611 + // SDn: define dynamic window n and switch to it.
1.612 + if (aByte >= SD0 && aByte <= SD0 + 7)
1.613 + return DefineWindowL(aByte - SD0);
1.614 +
1.615 + // SDX: define window in the expansion space.
1.616 + if (aByte == SDX)
1.617 + return DefineExpansionWindowL();
1.618 +
1.619 + Panic(EUnhandledByte);
1.620 + return false;
1.621 + }
1.622 +
1.623 +TBool TUnicodeExpander::HandleUByteL(TUint8 aByte)
1.624 + {
1.625 + // Plain Unicode; get the low byte and emit the Unicode value.
1.626 + if (aByte <= 0xDF || aByte >= 0xF3)
1.627 + {
1.628 + TUint8 lo;
1.629 + if (ReadByteL(lo))
1.630 + {
1.631 + TUint16 c = (TUint16)((aByte << 8) | lo);
1.632 + WriteChar(c);
1.633 + return true;
1.634 + }
1.635 + else
1.636 + return false;
1.637 + }
1.638 +
1.639 + // Quote a Unicode character that would otherwise conflict with a tag.
1.640 + if (aByte == UQU)
1.641 + return QuoteUnicodeL();
1.642 +
1.643 + // UCn: change to single byte mode and select window n.
1.644 + if (aByte >= UC0 && aByte <= UC0 + 7)
1.645 + {
1.646 + iUnicodeMode = false;
1.647 + iActiveWindowBase = iDynamicWindow[aByte - UC0];
1.648 + return true;
1.649 + }
1.650 +
1.651 + // UDn: define dynamic window n and switch to it.
1.652 + if (aByte >= UD0 && aByte <= UD0 + 7)
1.653 + return DefineWindowL(aByte - UD0);
1.654 +
1.655 + // UDX: define window in the expansion space.
1.656 + if (aByte == UDX)
1.657 + return DefineExpansionWindowL();
1.658 +
1.659 + Panic(EUnhandledByte);
1.660 + return false;
1.661 + }
1.662 +
1.663 +TBool TUnicodeExpander::QuoteUnicodeL()
1.664 + {
1.665 + TUint8 hi, lo;
1.666 + if (ReadByteL(hi) && ReadByteL(lo))
1.667 + {
1.668 + TUint16 c = (TUint16)((hi << 8) | lo);
1.669 + WriteChar(c);
1.670 + return true;
1.671 + }
1.672 + else
1.673 + return false;
1.674 + }
1.675 +
1.676 +TBool TUnicodeExpander::DefineWindowL(TInt aIndex)
1.677 + {
1.678 + TUint8 window;
1.679 + if (ReadByteL(window))
1.680 + {
1.681 + iUnicodeMode = false;
1.682 + iActiveWindowBase = DynamicWindowBase(window);
1.683 + iDynamicWindow[aIndex] = iActiveWindowBase;
1.684 + return true;
1.685 + }
1.686 + else
1.687 + return false;
1.688 + }
1.689 +
1.690 +TBool TUnicodeExpander::DefineExpansionWindowL()
1.691 + {
1.692 + TUint8 hi, lo;
1.693 + if (ReadByteL(hi) && ReadByteL(lo))
1.694 + {
1.695 + iUnicodeMode = false;
1.696 + iActiveWindowBase = 0x10000 + (0x80 * ((hi & 0x1F) * 0x100 + lo));
1.697 + iDynamicWindow[hi >> 5] = iActiveWindowBase;
1.698 + return true;
1.699 + }
1.700 + else
1.701 + return false;
1.702 + }
1.703 +
1.704 +// Read either from the buffer (in the case of restarting after source finished in mid-operation) or from the source.
1.705 +TBool TUnicodeExpander::ReadByteL(TUint8& aByte)
1.706 + {
1.707 + if (iInputBufferStart < iInputBufferSize)
1.708 + {
1.709 + aByte = iInputBuffer[iInputBufferStart++];
1.710 + return true;
1.711 + }
1.712 + else if (iCompressedBytes < iMaxCompressedBytes)
1.713 + {
1.714 + if (iInputPointer)
1.715 + aByte = *iInputPointer++;
1.716 + else
1.717 + aByte = iInputStream->ReadUint8();
1.718 + iInputBuffer[iInputBufferStart++] = aByte;
1.719 + iInputBufferSize = iInputBufferStart;
1.720 + iCompressedBytes++;
1.721 + return true;
1.722 + }
1.723 + else
1.724 + return false;
1.725 + }
1.726 +
1.727 +void TUnicodeExpander::WriteChar(TUint16 aChar)
1.728 + {
1.729 + if (iOutputBufferSize >= EMaxOutputBufferSize)
1.730 + Panic(EOutputBufferOverflow);
1.731 + iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = aChar;
1.732 + iOutputBufferSize++;
1.733 + }
1.734 +
1.735 +// Write a Unicode character; write using surrogates if in the range 0x10000..0x10FFFF.
1.736 +void TUnicodeExpander::WriteChar32(TUint aChar)
1.737 + {
1.738 + if (aChar <= 0xFFFF)
1.739 + WriteChar((TUint16)aChar);
1.740 + else if (aChar <= 0x10FFFF)
1.741 + {
1.742 + aChar -= 0x10000; // reduce to 20-bit value in the range 0x0..0xFFFFF
1.743 + WriteChar((TUint16)(0xD800 + (aChar >> 10))); // first high surrogate + high 10 bits
1.744 + WriteChar((TUint16)(0xDC00 + (aChar & 0x03FF))); // first low surrogate + low 10 bits
1.745 + }
1.746 + else
1.747 + Panic(ENotUnicode);
1.748 + }
1.749 +}