Update contrib.
1 // Copyright (c) 2006-2009 Nokia Corporation and/or its subsidiary(-ies).
2 // All rights reserved.
3 // This component and the accompanying materials are made available
4 // under the terms of "Eclipse Public License v1.0"
5 // which accompanies this distribution, and is available
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
8 // Initial Contributors:
9 // Nokia Corporation - initial contribution.
14 // Classes implemented in this file are used for Unicode compression and decompression.
15 // Their code is borrowed from Symbian, only with some changes such as the "Panic" function
16 // is changed to exit the program. The Symbian coding standard will be kept in the code.
21 #include "unicodecompression.h"
25 const TUint32 TUnicodeCompressionState::iStaticWindow[EStaticWindows] =
28 0x0080, // Latin-1 supplement
29 0x0100, // Latin Extended-A
30 0x0300, // Combining Diacritics
31 0x2000, // General Punctuation
32 0x2080, // Currency Symbols
33 0x2100, // Letterlike Symbols and Number Forms
34 0x3000 // CJK Symbols and Punctuation
37 const TUint32 TUnicodeCompressionState::iDynamicWindowDefault[EDynamicWindows] =
39 0x0080, // Latin-1 supplement
40 0x00C0, // parts of Latin-1 supplement and Latin Extended-A
46 0xFF00 // Fullwidth ASCII
49 const TUint16 TUnicodeCompressionState::iSpecialBase[ESpecialBases] =
51 0x00C0, // Latin 1 letters (not symbols) and some of Extended-A
52 0x0250, // IPA extensions
57 0xFF60 // Halfwidth katakana
60 // Single-byte mode tag values
61 const TUint8 SQ0 = 0x01; // <byte> quote from window 0
62 const TUint8 SDX = 0x0B; // <hbyte> <lbyte> define window in expansion area
63 const TUint8 SQU = 0x0E; // <hbyte> <lbyte> quote Unicode value
64 const TUint8 SCU = 0x0F; // switch to Unicode mode
65 const TUint8 SC0 = 0x10; // select dynamic window 0
66 const TUint8 SD0 = 0x18; // <byte> set dynamic window 0 index to <byte> and select it
68 // Unicode mode tag values
69 const TUint8 UC0 = 0xE0; // select dynamic window 0 and switch to single-byte mode
70 const TUint8 UD0 = 0xE8; // <byte> set dynamic window 0 index to <byte>, select it and switch to
72 const TUint8 UQU = 0xF0; // <hbyte>, <lbyte> quote Unicode value
73 const TUint8 UDX = 0xF1; // <hbyte>, <lbyte> define window in expansion area and switch to single-byte mode
75 TUnicodeCompressionState::TUnicodeCompressionState():
79 iMaxCompressedBytes(0)
84 void TUnicodeCompressionState::Reset()
87 iActiveWindowBase = 0x0080;
88 for (int i = 0; i < EDynamicWindows; i++)
89 iDynamicWindow[i] = iDynamicWindowDefault[i];
93 // Return the index of the static window that contains this code, if any, or -1 if there is none.
94 TInt TUnicodeCompressionState::StaticWindowIndex(TUint16 aCode)
96 for (TInt i = 0; i < EStaticWindows; i++)
97 if (aCode >= iStaticWindow[i] && aCode < iStaticWindow[i] + 128)
103 If aCode can be accommodated in one of the legal dynamic windows, return the index of that window
104 in the offset table. If not return KErrNotFound.
106 TInt TUnicodeCompressionState::DynamicWindowOffsetIndex(TUint16 aCode)
110 if (aCode >= 0x3400 && aCode <= 0xDFFF)
114 Prefer sections that cross half-block boundaries. These are better adapted to actual text.
115 They are represented by offset indices 0xf9..0xff.
117 for (int i = 0; i < ESpecialBases; i++)
118 if (aCode >= iSpecialBase[i] && aCode < iSpecialBase[i] + 128)
122 Offset indices 0x01..0x67 represent half blocks from 0x0080 to 0x3380 and
123 0x68..0xA7 represent half blocks from 0xE000 to 0xFF80.
130 // Return the base of the window represented by offset index <n>. Return 0 if the offset index is illegal.
131 TUint32 TUnicodeCompressionState::DynamicWindowBase(TInt aOffsetIndex)
133 if (aOffsetIndex >= 0xF9 && aOffsetIndex <= 0xFF)
136 WARNING: don't optimise the following two lines by replacing them with
137 'return iSpecialBase[aOffsetIndex - 0xF9];'. To do so would re-introduce an error
138 in ARM builds caused by optimisation and consequent erroneous fixing up
139 of the array base: see defect EDNGASR-4AGJQX in ER5U defects.
141 int special_base_index = aOffsetIndex - 0xF9;
142 return iSpecialBase[special_base_index];
144 if (aOffsetIndex >= 0x01 && aOffsetIndex <= 0x67)
145 return aOffsetIndex * 0x80;
146 if (aOffsetIndex >= 0x68 && aOffsetIndex <= 0xA7)
147 return aOffsetIndex * 0x80 + 0xAC00;
151 TBool TUnicodeCompressionState::EncodeAsIs(TUint16 aCode)
153 return aCode == 0x0000 || aCode == 0x0009 || aCode == 0x000A || aCode == 0x000D ||
154 (aCode >= 0x0020 && aCode <= 0x007F);
157 void TUnicodeCompressionState::Panic(TPanic aPanic)
162 TUnicodeCompressor::TUnicodeCompressor():
163 iInputBufferStart(0),
165 iOutputBufferStart(0),
166 iOutputBufferSize(0),
167 iDynamicWindowIndex(0),
169 iOutputPointer(NULL),
174 void TUnicodeCompressor::CompressL(CStoreWriteStream& aOutput,MUnicodeSource& aInput,
175 TInt aMaxOutputBytes,TInt aMaxInputWords,
176 TInt* aOutputBytes,TInt* aInputWords)
178 DoCompressL(&aOutput,NULL,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords);
181 void TUnicodeCompressor::CompressL(TUint8* aOutput,MUnicodeSource& aInput,
182 TInt aMaxOutputBytes,TInt aMaxInputWords,
183 TInt* aOutputBytes,TInt* aInputWords)
185 DoCompressL(NULL,aOutput,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords);
188 TInt TUnicodeCompressor::FlushL(CStoreWriteStream& aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes)
190 DoCompressL(&aOutput,NULL,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL);
191 return iOutputBufferSize;
194 TInt TUnicodeCompressor::FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes)
196 DoCompressL(NULL,aOutput,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL);
197 return iOutputBufferSize;
200 TInt TUnicodeCompressor::CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords)
203 TUnicodeCompressor c;
204 c.DoCompressL(NULL,NULL,&aInput,KMaxTInt,aInputWords,&bytes,NULL);
208 // Compress until input or output is exhausted or an exception occurs.
209 void TUnicodeCompressor::DoCompressL(CStoreWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput,
210 TInt aMaxOutputBytes,TInt aMaxInputWords,
211 TInt* aOutputBytes,TInt* aInputWords)
213 iOutputStream = aOutputStream;
214 iOutputPointer = aOutputPointer;
216 iMaxCompressedBytes = aMaxOutputBytes;
217 iMaxUnicodeWords = aMaxInputWords;
218 iCompressedBytes = iUnicodeWords = 0;
219 FlushOutputBufferL();
222 while (iUnicodeWords < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes)
224 TUint16 x = iInput->ReadUnicodeValueL();
226 iInputBuffer[(iInputBufferStart + iInputBufferSize) % EMaxInputBufferSize] = action;
229 if (iInputBufferSize == EMaxInputBufferSize)
235 *aOutputBytes = iCompressedBytes;
237 *aInputWords = iUnicodeWords;
240 TUnicodeCompressor::TAction::TAction(TUint16 aCode):
243 if (TUnicodeCompressionState::EncodeAsIs(aCode))
244 iTreatment = EPlainASCII;
247 iTreatment = TUnicodeCompressionState::DynamicWindowOffsetIndex(aCode);
248 if (iTreatment == -1)
250 iTreatment = TUnicodeCompressionState::StaticWindowIndex(aCode);
251 if (iTreatment == -1)
252 iTreatment = EPlainUnicode;
254 iTreatment += EFirstStatic;
259 void TUnicodeCompressor::WriteCharacterFromBuffer()
261 const TAction& action = iInputBuffer[iInputBufferStart];
263 iInputBufferStart = (iInputBufferStart + 1) % EMaxInputBufferSize;
264 WriteCharacter(action);
267 void TUnicodeCompressor::FlushInputBufferL()
269 while (iInputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes)
273 void TUnicodeCompressor::WriteRunL()
275 // Write out any leading characters that can be passed through.
277 while (iInputBufferSize > 0)
279 const TAction& action = iInputBuffer[iInputBufferStart];
280 if (action.iTreatment == TAction::EPlainASCII ||
281 (action.iCode >= iActiveWindowBase && action.iCode < iActiveWindowBase + 128))
282 WriteCharacterFromBuffer();
287 // Write a run of characters that cannot be passed through.
289 if (iInputBufferSize > 0)
292 Find a run of characters with the same treatment and select that treatment
293 if the run has more than one character.
295 int treatment = iInputBuffer[iInputBufferStart].iTreatment;
296 int next_treatment = treatment;
298 for (i = 1; i < iInputBufferSize; i++)
300 int index = (iInputBufferStart + i) % EMaxInputBufferSize;
301 next_treatment = iInputBuffer[index].iTreatment;
302 if (next_treatment != treatment)
307 SelectTreatment(treatment);
308 for (i = 0; i < run_size; i++)
309 WriteCharacterFromBuffer();
312 FlushOutputBufferL();
315 void TUnicodeCompressor::FlushOutputBufferL()
317 while (iOutputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes)
319 TUint8 byte = iOutputBuffer[iOutputBufferStart];
321 *iOutputPointer++ = byte;
322 else if (iOutputStream)
323 iOutputStream->WriteUint8(byte);
326 iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize;
330 void TUnicodeCompressor::SelectTreatment(TInt aTreatment)
332 if (aTreatment == TAction::EPlainUnicode)
334 // Switch to Unicode mode if not there already.
343 if (aTreatment == TAction::EPlainASCII)
345 // Switch to single-byte mode, using the current dynamic window, if not there already.
348 WriteByte(UC0 + iDynamicWindowIndex);
349 iUnicodeMode = false;
354 if (aTreatment >= TAction::EFirstDynamic && aTreatment <= TAction::ELastDynamic)
356 TUint32 base = DynamicWindowBase(aTreatment);
358 // Switch to the appropriate dynamic window if it is available; if not, redefine and select dynamic window 4.
359 for (int i = 0; i < EDynamicWindows; i++)
360 if (base == iDynamicWindow[i])
364 else if (i != iDynamicWindowIndex)
366 iUnicodeMode = false;
367 iDynamicWindowIndex = i;
368 iActiveWindowBase = base;
375 iDynamicWindowIndex = 4;
376 iUnicodeMode = false;
377 WriteByte(aTreatment);
378 iDynamicWindow[4] = base;
379 iActiveWindowBase = base;
384 // Write a character without changing mode or window.
385 void TUnicodeCompressor::WriteCharacter(const TAction& aAction)
388 WriteUCharacter(aAction.iCode);
390 WriteSCharacter(aAction);
393 void TUnicodeCompressor::WriteUCharacter(TUint16 aCode)
395 // Emit the 'quote Unicode' tag if the character would conflict with a tag.
396 if (aCode >= 0xE000 && aCode <= 0xF2FF)
399 // Write the Unicode value big-end first.
400 WriteByte((aCode >> 8) & 0xFF);
401 WriteByte(aCode & 0xFF);
404 void TUnicodeCompressor::WriteByte(TUint aByte)
406 if (iOutputBufferSize >= EMaxOutputBufferSize)
407 Panic(EOutputBufferOverflow);
408 iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = (TUint8)aByte;
412 void TUnicodeCompressor::WriteSCharacter(const TAction& aAction)
414 // Characters in the range 0x0020..0x007F, plus nul, tab, cr, and lf, can be emitted as their low bytes.
415 if (aAction.iTreatment == TAction::EPlainASCII)
417 WriteByte(aAction.iCode);
421 // Characters in a static window can be written using SQ<n> plus a byte in the range 0x00-0x7F
422 if (aAction.iTreatment >= TAction::EFirstStatic && aAction.iTreatment <= TAction::ELastStatic)
424 int window = aAction.iTreatment - TAction::EFirstStatic;
425 WriteByte(SQ0 + window);
426 WriteByte(aAction.iCode);
430 // Characters in the current dynamic window can be written as a byte in the range 0x80-0xFF.
431 if (aAction.iCode >= iActiveWindowBase && aAction.iCode < iActiveWindowBase + 128)
433 WriteByte(aAction.iCode - iActiveWindowBase + 0x80);
437 // Characters in another dynamic window can be written using SQ<n> plus a byte in the range 0x80-0xFF
439 for (i = 0; i < EDynamicWindows; i++)
440 if (aAction.iCode >= iDynamicWindow[i] && aAction.iCode < iDynamicWindow[i] + 128)
443 WriteByte(aAction.iCode - iDynamicWindow[i] + 0x80);
447 // Other characters can be quoted.
449 WriteByte((aAction.iCode >> 8) & 0xFF);
450 WriteByte(aAction.iCode & 0xFF);
455 TUnicodeExpander::TUnicodeExpander():
456 iInputBufferStart(0),
458 iOutputBufferStart(0),
459 iOutputBufferSize(0),
466 void TUnicodeExpander::ExpandL(MUnicodeSink& aOutput,CStoreReadStream& aInput,
467 TInt aMaxOutputWords,TInt aMaxInputBytes,
468 TInt* aOutputWords,TInt* aInputBytes)
470 DoExpandL(&aOutput,&aInput,NULL,aMaxOutputWords,aMaxInputBytes,aOutputWords,aInputBytes);
473 void TUnicodeExpander::ExpandL(MUnicodeSink& aOutput,const TUint8* aInput,
474 TInt aMaxOutputWords,TInt aMaxInputBytes,
475 TInt* aOutputWords,TInt* aInputBytes)
477 DoExpandL(&aOutput,NULL,aInput,aMaxOutputWords,aMaxInputBytes,aOutputWords,aInputBytes);
480 TInt TUnicodeExpander::FlushL(MUnicodeSink& aOutput,TInt aMaxOutputWords,TInt& aOutputWords)
482 DoExpandL(&aOutput,NULL,NULL,aMaxOutputWords,0,&aOutputWords,NULL);
483 return iOutputBufferSize;
486 TInt TUnicodeExpander::ExpandedSizeL(CStoreReadStream& aInput,TInt aInputBytes)
490 e.DoExpandL(NULL,&aInput,NULL,KMaxTInt,aInputBytes,&words,NULL);
494 TInt TUnicodeExpander::ExpandedSizeL(const TUint8* aInput,TInt aInputBytes)
498 e.DoExpandL(NULL,NULL,aInput,KMaxTInt,aInputBytes,&words,NULL);
502 // Expand until input or output is exhausted or an exception occurs.
503 void TUnicodeExpander::DoExpandL(MUnicodeSink* aOutput,CStoreReadStream* aInputStream,const TUint8* aInputPointer,
504 TInt aMaxOutputWords,TInt aMaxInputBytes,
505 TInt* aOutputWords,TInt* aInputBytes)
508 iInputStream = aInputStream;
509 iInputPointer = aInputPointer;
510 iMaxUnicodeWords = aMaxOutputWords;
511 iMaxCompressedBytes = aMaxInputBytes;
512 iUnicodeWords = iCompressedBytes = 0;
513 iInputBufferStart = 0;
514 FlushOutputBufferL();
515 if (iInputPointer || iInputStream)
517 while (iUnicodeWords + iOutputBufferSize < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes)
521 *aOutputWords = iUnicodeWords;
523 *aInputBytes = iCompressedBytes;
526 void TUnicodeExpander::HandleByteL()
529 TBool handled = false;
533 handled = HandleUByteL(byte);
535 handled = HandleSByteL(byte);
537 iInputBufferStart = 0;
539 iInputBufferSize = 0;
540 FlushOutputBufferL();
543 void TUnicodeExpander::FlushOutputBufferL()
545 while (iOutputBufferSize > 0 && iUnicodeWords < iMaxUnicodeWords)
548 iOutput->WriteUnicodeValueL(iOutputBuffer[iOutputBufferStart]);
551 iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize;
555 TBool TUnicodeExpander::HandleSByteL(TUint8 aByte)
557 // 'Pass-through' codes.
558 if (TUnicodeCompressionState::EncodeAsIs(aByte))
564 // Codes 0x80-0xFF select a character from the active window.
567 WriteChar32(iActiveWindowBase + aByte - 0x80);
571 // SQU: quote a Unicode character.
573 return QuoteUnicodeL();
575 // SCU: switch to Unicode mode.
582 // SQn: quote from window n.
583 if (aByte >= SQ0 && aByte <= SQ0 + 7)
585 int window = aByte - SQ0;
591 c += iStaticWindow[window];
593 c += iDynamicWindow[window] - 0x80;
601 // SCn: switch to dynamic window n.
602 if (aByte >= SC0 && aByte <= SC0 + 7)
604 iActiveWindowBase = iDynamicWindow[aByte - SC0];
608 // SDn: define dynamic window n and switch to it.
609 if (aByte >= SD0 && aByte <= SD0 + 7)
610 return DefineWindowL(aByte - SD0);
612 // SDX: define window in the expansion space.
614 return DefineExpansionWindowL();
616 Panic(EUnhandledByte);
620 TBool TUnicodeExpander::HandleUByteL(TUint8 aByte)
622 // Plain Unicode; get the low byte and emit the Unicode value.
623 if (aByte <= 0xDF || aByte >= 0xF3)
628 TUint16 c = (TUint16)((aByte << 8) | lo);
636 // Quote a Unicode character that would otherwise conflict with a tag.
638 return QuoteUnicodeL();
640 // UCn: change to single byte mode and select window n.
641 if (aByte >= UC0 && aByte <= UC0 + 7)
643 iUnicodeMode = false;
644 iActiveWindowBase = iDynamicWindow[aByte - UC0];
648 // UDn: define dynamic window n and switch to it.
649 if (aByte >= UD0 && aByte <= UD0 + 7)
650 return DefineWindowL(aByte - UD0);
652 // UDX: define window in the expansion space.
654 return DefineExpansionWindowL();
656 Panic(EUnhandledByte);
660 TBool TUnicodeExpander::QuoteUnicodeL()
663 if (ReadByteL(hi) && ReadByteL(lo))
665 TUint16 c = (TUint16)((hi << 8) | lo);
673 TBool TUnicodeExpander::DefineWindowL(TInt aIndex)
676 if (ReadByteL(window))
678 iUnicodeMode = false;
679 iActiveWindowBase = DynamicWindowBase(window);
680 iDynamicWindow[aIndex] = iActiveWindowBase;
687 TBool TUnicodeExpander::DefineExpansionWindowL()
690 if (ReadByteL(hi) && ReadByteL(lo))
692 iUnicodeMode = false;
693 iActiveWindowBase = 0x10000 + (0x80 * ((hi & 0x1F) * 0x100 + lo));
694 iDynamicWindow[hi >> 5] = iActiveWindowBase;
701 // Read either from the buffer (in the case of restarting after source finished in mid-operation) or from the source.
702 TBool TUnicodeExpander::ReadByteL(TUint8& aByte)
704 if (iInputBufferStart < iInputBufferSize)
706 aByte = iInputBuffer[iInputBufferStart++];
709 else if (iCompressedBytes < iMaxCompressedBytes)
712 aByte = *iInputPointer++;
714 aByte = iInputStream->ReadUint8();
715 iInputBuffer[iInputBufferStart++] = aByte;
716 iInputBufferSize = iInputBufferStart;
724 void TUnicodeExpander::WriteChar(TUint16 aChar)
726 if (iOutputBufferSize >= EMaxOutputBufferSize)
727 Panic(EOutputBufferOverflow);
728 iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = aChar;
732 // Write a Unicode character; write using surrogates if in the range 0x10000..0x10FFFF.
733 void TUnicodeExpander::WriteChar32(TUint aChar)
736 WriteChar((TUint16)aChar);
737 else if (aChar <= 0x10FFFF)
739 aChar -= 0x10000; // reduce to 20-bit value in the range 0x0..0xFFFFF
740 WriteChar((TUint16)(0xD800 + (aChar >> 10))); // first high surrogate + high 10 bits
741 WriteChar((TUint16)(0xDC00 + (aChar & 0x03FF))); // first low surrogate + low 10 bits