sl@0: // Copyright (c) 2006-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: // All rights reserved. sl@0: // This component and the accompanying materials are made available sl@0: // under the terms of "Eclipse Public License v1.0" sl@0: // which accompanies this distribution, and is available sl@0: // at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: // sl@0: // Initial Contributors: sl@0: // Nokia Corporation - initial contribution. sl@0: // sl@0: // Contributors: sl@0: // sl@0: // Description: sl@0: // Classes implemented in this file are used for Unicode compression and decompression. sl@0: // Their code is borrowed from Symbian, only with some changes such as the "Panic" function sl@0: // is changed to exit the program. The Symbian coding standard will be kept in the code. sl@0: // sl@0: // sl@0: sl@0: #include sl@0: #include "unicodecompression.h" sl@0: sl@0: namespace PCStore sl@0: { sl@0: const TUint32 TUnicodeCompressionState::iStaticWindow[EStaticWindows] = sl@0: { sl@0: 0x0000, // tags sl@0: 0x0080, // Latin-1 supplement sl@0: 0x0100, // Latin Extended-A sl@0: 0x0300, // Combining Diacritics sl@0: 0x2000, // General Punctuation sl@0: 0x2080, // Currency Symbols sl@0: 0x2100, // Letterlike Symbols and Number Forms sl@0: 0x3000 // CJK Symbols and Punctuation sl@0: }; sl@0: sl@0: const TUint32 TUnicodeCompressionState::iDynamicWindowDefault[EDynamicWindows] = sl@0: { sl@0: 0x0080, // Latin-1 supplement sl@0: 0x00C0, // parts of Latin-1 supplement and Latin Extended-A sl@0: 0x0400, // Cyrillic sl@0: 0x0600, // Arabic sl@0: 0x0900, // Devanagari sl@0: 0x3040, // Hiragana sl@0: 0x30A0, // Katakana sl@0: 0xFF00 // Fullwidth ASCII sl@0: }; sl@0: sl@0: const TUint16 TUnicodeCompressionState::iSpecialBase[ESpecialBases] = sl@0: { sl@0: 0x00C0, // Latin 1 letters (not symbols) and some of Extended-A sl@0: 0x0250, // IPA extensions sl@0: 0x0370, // Greek sl@0: 0x0530, // Armenian sl@0: 0x3040, // Hiragana sl@0: 0x30A0, // Katakana sl@0: 0xFF60 // Halfwidth katakana sl@0: }; sl@0: sl@0: // Single-byte mode tag values sl@0: const TUint8 SQ0 = 0x01; // quote from window 0 sl@0: const TUint8 SDX = 0x0B; // define window in expansion area sl@0: const TUint8 SQU = 0x0E; // quote Unicode value sl@0: const TUint8 SCU = 0x0F; // switch to Unicode mode sl@0: const TUint8 SC0 = 0x10; // select dynamic window 0 sl@0: const TUint8 SD0 = 0x18; // set dynamic window 0 index to and select it sl@0: sl@0: // Unicode mode tag values sl@0: const TUint8 UC0 = 0xE0; // select dynamic window 0 and switch to single-byte mode sl@0: const TUint8 UD0 = 0xE8; // set dynamic window 0 index to , select it and switch to sl@0: // single-byte mode sl@0: const TUint8 UQU = 0xF0; // , quote Unicode value sl@0: const TUint8 UDX = 0xF1; // , define window in expansion area and switch to single-byte mode sl@0: sl@0: TUnicodeCompressionState::TUnicodeCompressionState(): sl@0: iUnicodeWords(0), sl@0: iMaxUnicodeWords(0), sl@0: iCompressedBytes(0), sl@0: iMaxCompressedBytes(0) sl@0: { sl@0: Reset(); sl@0: } sl@0: sl@0: void TUnicodeCompressionState::Reset() sl@0: { sl@0: iUnicodeMode = false; sl@0: iActiveWindowBase = 0x0080; sl@0: for (int i = 0; i < EDynamicWindows; i++) sl@0: iDynamicWindow[i] = iDynamicWindowDefault[i]; sl@0: } sl@0: sl@0: sl@0: // Return the index of the static window that contains this code, if any, or -1 if there is none. sl@0: TInt TUnicodeCompressionState::StaticWindowIndex(TUint16 aCode) sl@0: { sl@0: for (TInt i = 0; i < EStaticWindows; i++) sl@0: if (aCode >= iStaticWindow[i] && aCode < iStaticWindow[i] + 128) sl@0: return i; sl@0: return -1; sl@0: } sl@0: sl@0: /* sl@0: If aCode can be accommodated in one of the legal dynamic windows, return the index of that window sl@0: in the offset table. If not return KErrNotFound. sl@0: */ sl@0: TInt TUnicodeCompressionState::DynamicWindowOffsetIndex(TUint16 aCode) sl@0: { sl@0: if (aCode < 0x0080) sl@0: return KErrNotFound; sl@0: if (aCode >= 0x3400 && aCode <= 0xDFFF) sl@0: return KErrNotFound; sl@0: sl@0: /* sl@0: Prefer sections that cross half-block boundaries. These are better adapted to actual text. sl@0: They are represented by offset indices 0xf9..0xff. sl@0: */ sl@0: for (int i = 0; i < ESpecialBases; i++) sl@0: if (aCode >= iSpecialBase[i] && aCode < iSpecialBase[i] + 128) sl@0: return 0xF9 + i; sl@0: sl@0: /* sl@0: Offset indices 0x01..0x67 represent half blocks from 0x0080 to 0x3380 and sl@0: 0x68..0xA7 represent half blocks from 0xE000 to 0xFF80. sl@0: */ sl@0: if (aCode >= 0xE000) sl@0: aCode -= 0xAC00; sl@0: return aCode / 0x80; sl@0: } sl@0: sl@0: // Return the base of the window represented by offset index . Return 0 if the offset index is illegal. sl@0: TUint32 TUnicodeCompressionState::DynamicWindowBase(TInt aOffsetIndex) sl@0: { sl@0: if (aOffsetIndex >= 0xF9 && aOffsetIndex <= 0xFF) sl@0: { sl@0: /* sl@0: WARNING: don't optimise the following two lines by replacing them with sl@0: 'return iSpecialBase[aOffsetIndex - 0xF9];'. To do so would re-introduce an error sl@0: in ARM builds caused by optimisation and consequent erroneous fixing up sl@0: of the array base: see defect EDNGASR-4AGJQX in ER5U defects. sl@0: */ sl@0: int special_base_index = aOffsetIndex - 0xF9; sl@0: return iSpecialBase[special_base_index]; sl@0: } sl@0: if (aOffsetIndex >= 0x01 && aOffsetIndex <= 0x67) sl@0: return aOffsetIndex * 0x80; sl@0: if (aOffsetIndex >= 0x68 && aOffsetIndex <= 0xA7) sl@0: return aOffsetIndex * 0x80 + 0xAC00; sl@0: return 0; sl@0: } sl@0: sl@0: TBool TUnicodeCompressionState::EncodeAsIs(TUint16 aCode) sl@0: { sl@0: return aCode == 0x0000 || aCode == 0x0009 || aCode == 0x000A || aCode == 0x000D || sl@0: (aCode >= 0x0020 && aCode <= 0x007F); sl@0: } sl@0: sl@0: void TUnicodeCompressionState::Panic(TPanic aPanic) sl@0: { sl@0: exit(aPanic); sl@0: } sl@0: sl@0: TUnicodeCompressor::TUnicodeCompressor(): sl@0: iInputBufferStart(0), sl@0: iInputBufferSize(0), sl@0: iOutputBufferStart(0), sl@0: iOutputBufferSize(0), sl@0: iDynamicWindowIndex(0), sl@0: iOutputStream(NULL), sl@0: iOutputPointer(NULL), sl@0: iInput(NULL) sl@0: { sl@0: } sl@0: sl@0: void TUnicodeCompressor::CompressL(CStoreWriteStream& aOutput,MUnicodeSource& aInput, sl@0: TInt aMaxOutputBytes,TInt aMaxInputWords, sl@0: TInt* aOutputBytes,TInt* aInputWords) sl@0: { sl@0: DoCompressL(&aOutput,NULL,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords); sl@0: } sl@0: sl@0: void TUnicodeCompressor::CompressL(TUint8* aOutput,MUnicodeSource& aInput, sl@0: TInt aMaxOutputBytes,TInt aMaxInputWords, sl@0: TInt* aOutputBytes,TInt* aInputWords) sl@0: { sl@0: DoCompressL(NULL,aOutput,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords); sl@0: } sl@0: sl@0: TInt TUnicodeCompressor::FlushL(CStoreWriteStream& aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes) sl@0: { sl@0: DoCompressL(&aOutput,NULL,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL); sl@0: return iOutputBufferSize; sl@0: } sl@0: sl@0: TInt TUnicodeCompressor::FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes) sl@0: { sl@0: DoCompressL(NULL,aOutput,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL); sl@0: return iOutputBufferSize; sl@0: } sl@0: sl@0: TInt TUnicodeCompressor::CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords) sl@0: { sl@0: TInt bytes; sl@0: TUnicodeCompressor c; sl@0: c.DoCompressL(NULL,NULL,&aInput,KMaxTInt,aInputWords,&bytes,NULL); sl@0: return bytes; sl@0: } sl@0: sl@0: // Compress until input or output is exhausted or an exception occurs. sl@0: void TUnicodeCompressor::DoCompressL(CStoreWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput, sl@0: TInt aMaxOutputBytes,TInt aMaxInputWords, sl@0: TInt* aOutputBytes,TInt* aInputWords) sl@0: { sl@0: iOutputStream = aOutputStream; sl@0: iOutputPointer = aOutputPointer; sl@0: iInput = aInput; sl@0: iMaxCompressedBytes = aMaxOutputBytes; sl@0: iMaxUnicodeWords = aMaxInputWords; sl@0: iCompressedBytes = iUnicodeWords = 0; sl@0: FlushOutputBufferL(); sl@0: if (iInput) sl@0: { sl@0: while (iUnicodeWords < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes) sl@0: { sl@0: TUint16 x = iInput->ReadUnicodeValueL(); sl@0: TAction action(x); sl@0: iInputBuffer[(iInputBufferStart + iInputBufferSize) % EMaxInputBufferSize] = action; sl@0: iInputBufferSize++; sl@0: iUnicodeWords++; sl@0: if (iInputBufferSize == EMaxInputBufferSize) sl@0: WriteRunL(); sl@0: } sl@0: } sl@0: FlushInputBufferL(); sl@0: if (aOutputBytes) sl@0: *aOutputBytes = iCompressedBytes; sl@0: if (aInputWords) sl@0: *aInputWords = iUnicodeWords; sl@0: } sl@0: sl@0: TUnicodeCompressor::TAction::TAction(TUint16 aCode): sl@0: iCode(aCode) sl@0: { sl@0: if (TUnicodeCompressionState::EncodeAsIs(aCode)) sl@0: iTreatment = EPlainASCII; sl@0: else sl@0: { sl@0: iTreatment = TUnicodeCompressionState::DynamicWindowOffsetIndex(aCode); sl@0: if (iTreatment == -1) sl@0: { sl@0: iTreatment = TUnicodeCompressionState::StaticWindowIndex(aCode); sl@0: if (iTreatment == -1) sl@0: iTreatment = EPlainUnicode; sl@0: else sl@0: iTreatment += EFirstStatic; sl@0: } sl@0: } sl@0: } sl@0: sl@0: void TUnicodeCompressor::WriteCharacterFromBuffer() sl@0: { sl@0: const TAction& action = iInputBuffer[iInputBufferStart]; sl@0: iInputBufferSize--; sl@0: iInputBufferStart = (iInputBufferStart + 1) % EMaxInputBufferSize; sl@0: WriteCharacter(action); sl@0: } sl@0: sl@0: void TUnicodeCompressor::FlushInputBufferL() sl@0: { sl@0: while (iInputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes) sl@0: WriteRunL(); sl@0: } sl@0: sl@0: void TUnicodeCompressor::WriteRunL() sl@0: { sl@0: // Write out any leading characters that can be passed through. sl@0: if (!iUnicodeMode) sl@0: while (iInputBufferSize > 0) sl@0: { sl@0: const TAction& action = iInputBuffer[iInputBufferStart]; sl@0: if (action.iTreatment == TAction::EPlainASCII || sl@0: (action.iCode >= iActiveWindowBase && action.iCode < iActiveWindowBase + 128)) sl@0: WriteCharacterFromBuffer(); sl@0: else sl@0: break; sl@0: } sl@0: sl@0: // Write a run of characters that cannot be passed through. sl@0: int i; sl@0: if (iInputBufferSize > 0) sl@0: { sl@0: /* sl@0: Find a run of characters with the same treatment and select that treatment sl@0: if the run has more than one character. sl@0: */ sl@0: int treatment = iInputBuffer[iInputBufferStart].iTreatment; sl@0: int next_treatment = treatment; sl@0: int run_size = 1; sl@0: for (i = 1; i < iInputBufferSize; i++) sl@0: { sl@0: int index = (iInputBufferStart + i) % EMaxInputBufferSize; sl@0: next_treatment = iInputBuffer[index].iTreatment; sl@0: if (next_treatment != treatment) sl@0: break; sl@0: run_size++; sl@0: } sl@0: if (run_size > 1) sl@0: SelectTreatment(treatment); sl@0: for (i = 0; i < run_size; i++) sl@0: WriteCharacterFromBuffer(); sl@0: } sl@0: sl@0: FlushOutputBufferL(); sl@0: } sl@0: sl@0: void TUnicodeCompressor::FlushOutputBufferL() sl@0: { sl@0: while (iOutputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes) sl@0: { sl@0: TUint8 byte = iOutputBuffer[iOutputBufferStart]; sl@0: if (iOutputPointer) sl@0: *iOutputPointer++ = byte; sl@0: else if (iOutputStream) sl@0: iOutputStream->WriteUint8(byte); sl@0: iCompressedBytes++; sl@0: iOutputBufferSize--; sl@0: iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize; sl@0: } sl@0: } sl@0: sl@0: void TUnicodeCompressor::SelectTreatment(TInt aTreatment) sl@0: { sl@0: if (aTreatment == TAction::EPlainUnicode) sl@0: { sl@0: // Switch to Unicode mode if not there already. sl@0: if (!iUnicodeMode) sl@0: { sl@0: WriteByte(SCU); sl@0: iUnicodeMode = true; sl@0: } sl@0: return; sl@0: } sl@0: sl@0: if (aTreatment == TAction::EPlainASCII) sl@0: { sl@0: // Switch to single-byte mode, using the current dynamic window, if not there already. sl@0: if (iUnicodeMode) sl@0: { sl@0: WriteByte(UC0 + iDynamicWindowIndex); sl@0: iUnicodeMode = false; sl@0: } sl@0: return; sl@0: } sl@0: sl@0: if (aTreatment >= TAction::EFirstDynamic && aTreatment <= TAction::ELastDynamic) sl@0: { sl@0: TUint32 base = DynamicWindowBase(aTreatment); sl@0: sl@0: // Switch to the appropriate dynamic window if it is available; if not, redefine and select dynamic window 4. sl@0: for (int i = 0; i < EDynamicWindows; i++) sl@0: if (base == iDynamicWindow[i]) sl@0: { sl@0: if (iUnicodeMode) sl@0: WriteByte(UC0 + i); sl@0: else if (i != iDynamicWindowIndex) sl@0: WriteByte(SC0 + i); sl@0: iUnicodeMode = false; sl@0: iDynamicWindowIndex = i; sl@0: iActiveWindowBase = base; sl@0: return; sl@0: } sl@0: if (iUnicodeMode) sl@0: WriteByte(UD0 + 4); sl@0: else sl@0: WriteByte(SD0 + 4); sl@0: iDynamicWindowIndex = 4; sl@0: iUnicodeMode = false; sl@0: WriteByte(aTreatment); sl@0: iDynamicWindow[4] = base; sl@0: iActiveWindowBase = base; sl@0: return; sl@0: } sl@0: } sl@0: sl@0: // Write a character without changing mode or window. sl@0: void TUnicodeCompressor::WriteCharacter(const TAction& aAction) sl@0: { sl@0: if (iUnicodeMode) sl@0: WriteUCharacter(aAction.iCode); sl@0: else sl@0: WriteSCharacter(aAction); sl@0: } sl@0: sl@0: void TUnicodeCompressor::WriteUCharacter(TUint16 aCode) sl@0: { sl@0: // Emit the 'quote Unicode' tag if the character would conflict with a tag. sl@0: if (aCode >= 0xE000 && aCode <= 0xF2FF) sl@0: WriteByte(UQU); sl@0: sl@0: // Write the Unicode value big-end first. sl@0: WriteByte((aCode >> 8) & 0xFF); sl@0: WriteByte(aCode & 0xFF); sl@0: } sl@0: sl@0: void TUnicodeCompressor::WriteByte(TUint aByte) sl@0: { sl@0: if (iOutputBufferSize >= EMaxOutputBufferSize) sl@0: Panic(EOutputBufferOverflow); sl@0: iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = (TUint8)aByte; sl@0: iOutputBufferSize++; sl@0: } sl@0: sl@0: void TUnicodeCompressor::WriteSCharacter(const TAction& aAction) sl@0: { sl@0: // Characters in the range 0x0020..0x007F, plus nul, tab, cr, and lf, can be emitted as their low bytes. sl@0: if (aAction.iTreatment == TAction::EPlainASCII) sl@0: { sl@0: WriteByte(aAction.iCode); sl@0: return; sl@0: } sl@0: sl@0: // Characters in a static window can be written using SQ plus a byte in the range 0x00-0x7F sl@0: if (aAction.iTreatment >= TAction::EFirstStatic && aAction.iTreatment <= TAction::ELastStatic) sl@0: { sl@0: int window = aAction.iTreatment - TAction::EFirstStatic; sl@0: WriteByte(SQ0 + window); sl@0: WriteByte(aAction.iCode); sl@0: return; sl@0: } sl@0: sl@0: // Characters in the current dynamic window can be written as a byte in the range 0x80-0xFF. sl@0: if (aAction.iCode >= iActiveWindowBase && aAction.iCode < iActiveWindowBase + 128) sl@0: { sl@0: WriteByte(aAction.iCode - iActiveWindowBase + 0x80); sl@0: return; sl@0: } sl@0: sl@0: // Characters in another dynamic window can be written using SQ plus a byte in the range 0x80-0xFF sl@0: int i; sl@0: for (i = 0; i < EDynamicWindows; i++) sl@0: if (aAction.iCode >= iDynamicWindow[i] && aAction.iCode < iDynamicWindow[i] + 128) sl@0: { sl@0: WriteByte(SQ0 + i); sl@0: WriteByte(aAction.iCode - iDynamicWindow[i] + 0x80); sl@0: return; sl@0: } sl@0: sl@0: // Other characters can be quoted. sl@0: WriteByte(SQU); sl@0: WriteByte((aAction.iCode >> 8) & 0xFF); sl@0: WriteByte(aAction.iCode & 0xFF); sl@0: return; sl@0: } sl@0: sl@0: sl@0: TUnicodeExpander::TUnicodeExpander(): sl@0: iInputBufferStart(0), sl@0: iInputBufferSize(0), sl@0: iOutputBufferStart(0), sl@0: iOutputBufferSize(0), sl@0: iOutput(NULL), sl@0: iInputStream(NULL), sl@0: iInputPointer(NULL) sl@0: { sl@0: } sl@0: sl@0: void TUnicodeExpander::ExpandL(MUnicodeSink& aOutput,CStoreReadStream& aInput, sl@0: TInt aMaxOutputWords,TInt aMaxInputBytes, sl@0: TInt* aOutputWords,TInt* aInputBytes) sl@0: { sl@0: DoExpandL(&aOutput,&aInput,NULL,aMaxOutputWords,aMaxInputBytes,aOutputWords,aInputBytes); sl@0: } sl@0: sl@0: void TUnicodeExpander::ExpandL(MUnicodeSink& aOutput,const TUint8* aInput, sl@0: TInt aMaxOutputWords,TInt aMaxInputBytes, sl@0: TInt* aOutputWords,TInt* aInputBytes) sl@0: { sl@0: DoExpandL(&aOutput,NULL,aInput,aMaxOutputWords,aMaxInputBytes,aOutputWords,aInputBytes); sl@0: } sl@0: sl@0: TInt TUnicodeExpander::FlushL(MUnicodeSink& aOutput,TInt aMaxOutputWords,TInt& aOutputWords) sl@0: { sl@0: DoExpandL(&aOutput,NULL,NULL,aMaxOutputWords,0,&aOutputWords,NULL); sl@0: return iOutputBufferSize; sl@0: } sl@0: sl@0: TInt TUnicodeExpander::ExpandedSizeL(CStoreReadStream& aInput,TInt aInputBytes) sl@0: { sl@0: TInt words; sl@0: TUnicodeExpander e; sl@0: e.DoExpandL(NULL,&aInput,NULL,KMaxTInt,aInputBytes,&words,NULL); sl@0: return words; sl@0: } sl@0: sl@0: TInt TUnicodeExpander::ExpandedSizeL(const TUint8* aInput,TInt aInputBytes) sl@0: { sl@0: TInt words; sl@0: TUnicodeExpander e; sl@0: e.DoExpandL(NULL,NULL,aInput,KMaxTInt,aInputBytes,&words,NULL); sl@0: return words; sl@0: } sl@0: sl@0: // Expand until input or output is exhausted or an exception occurs. sl@0: void TUnicodeExpander::DoExpandL(MUnicodeSink* aOutput,CStoreReadStream* aInputStream,const TUint8* aInputPointer, sl@0: TInt aMaxOutputWords,TInt aMaxInputBytes, sl@0: TInt* aOutputWords,TInt* aInputBytes) sl@0: { sl@0: iOutput = aOutput; sl@0: iInputStream = aInputStream; sl@0: iInputPointer = aInputPointer; sl@0: iMaxUnicodeWords = aMaxOutputWords; sl@0: iMaxCompressedBytes = aMaxInputBytes; sl@0: iUnicodeWords = iCompressedBytes = 0; sl@0: iInputBufferStart = 0; sl@0: FlushOutputBufferL(); sl@0: if (iInputPointer || iInputStream) sl@0: { sl@0: while (iUnicodeWords + iOutputBufferSize < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes) sl@0: HandleByteL(); sl@0: } sl@0: if (aOutputWords) sl@0: *aOutputWords = iUnicodeWords; sl@0: if (aInputBytes) sl@0: *aInputBytes = iCompressedBytes; sl@0: } sl@0: sl@0: void TUnicodeExpander::HandleByteL() sl@0: { sl@0: TUint8 byte; sl@0: TBool handled = false; sl@0: if (ReadByteL(byte)) sl@0: { sl@0: if (iUnicodeMode) sl@0: handled = HandleUByteL(byte); sl@0: else sl@0: handled = HandleSByteL(byte); sl@0: } sl@0: iInputBufferStart = 0; sl@0: if (handled) sl@0: iInputBufferSize = 0; sl@0: FlushOutputBufferL(); sl@0: } sl@0: sl@0: void TUnicodeExpander::FlushOutputBufferL() sl@0: { sl@0: while (iOutputBufferSize > 0 && iUnicodeWords < iMaxUnicodeWords) sl@0: { sl@0: if (iOutput) sl@0: iOutput->WriteUnicodeValueL(iOutputBuffer[iOutputBufferStart]); sl@0: iUnicodeWords++; sl@0: iOutputBufferSize--; sl@0: iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize; sl@0: } sl@0: } sl@0: sl@0: TBool TUnicodeExpander::HandleSByteL(TUint8 aByte) sl@0: { sl@0: // 'Pass-through' codes. sl@0: if (TUnicodeCompressionState::EncodeAsIs(aByte)) sl@0: { sl@0: WriteChar(aByte); sl@0: return true; sl@0: } sl@0: sl@0: // Codes 0x80-0xFF select a character from the active window. sl@0: if (aByte >= 0x80) sl@0: { sl@0: WriteChar32(iActiveWindowBase + aByte - 0x80); sl@0: return true; sl@0: } sl@0: sl@0: // SQU: quote a Unicode character. sl@0: if (aByte == SQU) sl@0: return QuoteUnicodeL(); sl@0: sl@0: // SCU: switch to Unicode mode. sl@0: if (aByte == SCU) sl@0: { sl@0: iUnicodeMode = true; sl@0: return true; sl@0: } sl@0: sl@0: // SQn: quote from window n. sl@0: if (aByte >= SQ0 && aByte <= SQ0 + 7) sl@0: { sl@0: int window = aByte - SQ0; sl@0: TUint8 byte; sl@0: if (ReadByteL(byte)) sl@0: { sl@0: TUint32 c = byte; sl@0: if (c <= 0x7F) sl@0: c += iStaticWindow[window]; sl@0: else sl@0: c += iDynamicWindow[window] - 0x80; sl@0: WriteChar32(c); sl@0: return true; sl@0: } sl@0: else sl@0: return false; sl@0: } sl@0: sl@0: // SCn: switch to dynamic window n. sl@0: if (aByte >= SC0 && aByte <= SC0 + 7) sl@0: { sl@0: iActiveWindowBase = iDynamicWindow[aByte - SC0]; sl@0: return true; sl@0: } sl@0: sl@0: // SDn: define dynamic window n and switch to it. sl@0: if (aByte >= SD0 && aByte <= SD0 + 7) sl@0: return DefineWindowL(aByte - SD0); sl@0: sl@0: // SDX: define window in the expansion space. sl@0: if (aByte == SDX) sl@0: return DefineExpansionWindowL(); sl@0: sl@0: Panic(EUnhandledByte); sl@0: return false; sl@0: } sl@0: sl@0: TBool TUnicodeExpander::HandleUByteL(TUint8 aByte) sl@0: { sl@0: // Plain Unicode; get the low byte and emit the Unicode value. sl@0: if (aByte <= 0xDF || aByte >= 0xF3) sl@0: { sl@0: TUint8 lo; sl@0: if (ReadByteL(lo)) sl@0: { sl@0: TUint16 c = (TUint16)((aByte << 8) | lo); sl@0: WriteChar(c); sl@0: return true; sl@0: } sl@0: else sl@0: return false; sl@0: } sl@0: sl@0: // Quote a Unicode character that would otherwise conflict with a tag. sl@0: if (aByte == UQU) sl@0: return QuoteUnicodeL(); sl@0: sl@0: // UCn: change to single byte mode and select window n. sl@0: if (aByte >= UC0 && aByte <= UC0 + 7) sl@0: { sl@0: iUnicodeMode = false; sl@0: iActiveWindowBase = iDynamicWindow[aByte - UC0]; sl@0: return true; sl@0: } sl@0: sl@0: // UDn: define dynamic window n and switch to it. sl@0: if (aByte >= UD0 && aByte <= UD0 + 7) sl@0: return DefineWindowL(aByte - UD0); sl@0: sl@0: // UDX: define window in the expansion space. sl@0: if (aByte == UDX) sl@0: return DefineExpansionWindowL(); sl@0: sl@0: Panic(EUnhandledByte); sl@0: return false; sl@0: } sl@0: sl@0: TBool TUnicodeExpander::QuoteUnicodeL() sl@0: { sl@0: TUint8 hi, lo; sl@0: if (ReadByteL(hi) && ReadByteL(lo)) sl@0: { sl@0: TUint16 c = (TUint16)((hi << 8) | lo); sl@0: WriteChar(c); sl@0: return true; sl@0: } sl@0: else sl@0: return false; sl@0: } sl@0: sl@0: TBool TUnicodeExpander::DefineWindowL(TInt aIndex) sl@0: { sl@0: TUint8 window; sl@0: if (ReadByteL(window)) sl@0: { sl@0: iUnicodeMode = false; sl@0: iActiveWindowBase = DynamicWindowBase(window); sl@0: iDynamicWindow[aIndex] = iActiveWindowBase; sl@0: return true; sl@0: } sl@0: else sl@0: return false; sl@0: } sl@0: sl@0: TBool TUnicodeExpander::DefineExpansionWindowL() sl@0: { sl@0: TUint8 hi, lo; sl@0: if (ReadByteL(hi) && ReadByteL(lo)) sl@0: { sl@0: iUnicodeMode = false; sl@0: iActiveWindowBase = 0x10000 + (0x80 * ((hi & 0x1F) * 0x100 + lo)); sl@0: iDynamicWindow[hi >> 5] = iActiveWindowBase; sl@0: return true; sl@0: } sl@0: else sl@0: return false; sl@0: } sl@0: sl@0: // Read either from the buffer (in the case of restarting after source finished in mid-operation) or from the source. sl@0: TBool TUnicodeExpander::ReadByteL(TUint8& aByte) sl@0: { sl@0: if (iInputBufferStart < iInputBufferSize) sl@0: { sl@0: aByte = iInputBuffer[iInputBufferStart++]; sl@0: return true; sl@0: } sl@0: else if (iCompressedBytes < iMaxCompressedBytes) sl@0: { sl@0: if (iInputPointer) sl@0: aByte = *iInputPointer++; sl@0: else sl@0: aByte = iInputStream->ReadUint8(); sl@0: iInputBuffer[iInputBufferStart++] = aByte; sl@0: iInputBufferSize = iInputBufferStart; sl@0: iCompressedBytes++; sl@0: return true; sl@0: } sl@0: else sl@0: return false; sl@0: } sl@0: sl@0: void TUnicodeExpander::WriteChar(TUint16 aChar) sl@0: { sl@0: if (iOutputBufferSize >= EMaxOutputBufferSize) sl@0: Panic(EOutputBufferOverflow); sl@0: iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = aChar; sl@0: iOutputBufferSize++; sl@0: } sl@0: sl@0: // Write a Unicode character; write using surrogates if in the range 0x10000..0x10FFFF. sl@0: void TUnicodeExpander::WriteChar32(TUint aChar) sl@0: { sl@0: if (aChar <= 0xFFFF) sl@0: WriteChar((TUint16)aChar); sl@0: else if (aChar <= 0x10FFFF) sl@0: { sl@0: aChar -= 0x10000; // reduce to 20-bit value in the range 0x0..0xFFFFF sl@0: WriteChar((TUint16)(0xD800 + (aChar >> 10))); // first high surrogate + high 10 bits sl@0: WriteChar((TUint16)(0xDC00 + (aChar & 0x03FF))); // first low surrogate + low 10 bits sl@0: } sl@0: else sl@0: Panic(ENotUnicode); sl@0: } sl@0: }