sl@0
|
1 |
// Copyright (c) 1998-2010 Nokia Corporation and/or its subsidiary(-ies).
|
sl@0
|
2 |
// All rights reserved.
|
sl@0
|
3 |
// This component and the accompanying materials are made available
|
sl@0
|
4 |
// under the terms of "Eclipse Public License v1.0"
|
sl@0
|
5 |
// which accompanies this distribution, and is available
|
sl@0
|
6 |
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
|
sl@0
|
7 |
//
|
sl@0
|
8 |
// Initial Contributors:
|
sl@0
|
9 |
// Nokia Corporation - initial contribution.
|
sl@0
|
10 |
//
|
sl@0
|
11 |
// Contributors:
|
sl@0
|
12 |
//
|
sl@0
|
13 |
// Description:
|
sl@0
|
14 |
// Implementation of the Standard Compression Scheme for Unicode.
|
sl@0
|
15 |
// This code is compiled only in the Unicode build.
|
sl@0
|
16 |
//
|
sl@0
|
17 |
//
|
sl@0
|
18 |
|
sl@0
|
19 |
#ifdef _UNICODE
|
sl@0
|
20 |
|
sl@0
|
21 |
#include <s32ucmp.h>
|
sl@0
|
22 |
|
sl@0
|
23 |
const TUint32 TUnicodeCompressionState::iStaticWindow[EStaticWindows] =
|
sl@0
|
24 |
{
|
sl@0
|
25 |
0x0000, // tags
|
sl@0
|
26 |
0x0080, // Latin-1 supplement
|
sl@0
|
27 |
0x0100, // Latin Extended-A
|
sl@0
|
28 |
0x0300, // Combining Diacritics
|
sl@0
|
29 |
0x2000, // General Punctuation
|
sl@0
|
30 |
0x2080, // Currency Symbols
|
sl@0
|
31 |
0x2100, // Letterlike Symbols and Number Forms
|
sl@0
|
32 |
0x3000 // CJK Symbols and Punctuation
|
sl@0
|
33 |
};
|
sl@0
|
34 |
|
sl@0
|
35 |
const TUint32 TUnicodeCompressionState::iDynamicWindowDefault[EDynamicWindows] =
|
sl@0
|
36 |
{
|
sl@0
|
37 |
0x0080, // Latin-1 supplement
|
sl@0
|
38 |
0x00C0, // parts of Latin-1 supplement and Latin Extended-A
|
sl@0
|
39 |
0x0400, // Cyrillic
|
sl@0
|
40 |
0x0600, // Arabic
|
sl@0
|
41 |
0x0900, // Devanagari
|
sl@0
|
42 |
0x3040, // Hiragana
|
sl@0
|
43 |
0x30A0, // Katakana
|
sl@0
|
44 |
0xFF00 // Fullwidth ASCII
|
sl@0
|
45 |
};
|
sl@0
|
46 |
|
sl@0
|
47 |
const TUint16 TUnicodeCompressionState::iSpecialBase[ESpecialBases] =
|
sl@0
|
48 |
{
|
sl@0
|
49 |
0x00C0, // Latin 1 letters (not symbols) and some of Extended-A
|
sl@0
|
50 |
0x0250, // IPA extensions
|
sl@0
|
51 |
0x0370, // Greek
|
sl@0
|
52 |
0x0530, // Armenian
|
sl@0
|
53 |
0x3040, // Hiragana
|
sl@0
|
54 |
0x30A0, // Katakana
|
sl@0
|
55 |
0xFF60 // Halfwidth katakana
|
sl@0
|
56 |
};
|
sl@0
|
57 |
|
sl@0
|
58 |
// Single-byte mode tag values
|
sl@0
|
59 |
const TUint8 SQ0 = 0x01; // <byte> quote from window 0
|
sl@0
|
60 |
const TUint8 SDX = 0x0B; // <hbyte> <lbyte> define window in expansion area
|
sl@0
|
61 |
const TUint8 SQU = 0x0E; // <hbyte> <lbyte> quote Unicode value
|
sl@0
|
62 |
const TUint8 SCU = 0x0F; // switch to Unicode mode
|
sl@0
|
63 |
const TUint8 SC0 = 0x10; // select dynamic window 0
|
sl@0
|
64 |
const TUint8 SD0 = 0x18; // <byte> set dynamic window 0 index to <byte> and select it
|
sl@0
|
65 |
|
sl@0
|
66 |
// Unicode mode tag values
|
sl@0
|
67 |
const TUint8 UC0 = 0xE0; // select dynamic window 0 and switch to single-byte mode
|
sl@0
|
68 |
const TUint8 UD0 = 0xE8; // <byte> set dynamic window 0 index to <byte>, select it and switch to
|
sl@0
|
69 |
// single-byte mode
|
sl@0
|
70 |
const TUint8 UQU = 0xF0; // <hbyte>, <lbyte> quote Unicode value
|
sl@0
|
71 |
const TUint8 UDX = 0xF1; // <hbyte>, <lbyte> define window in expansion area and switch to single-byte mode
|
sl@0
|
72 |
|
sl@0
|
73 |
TUnicodeCompressionState::TUnicodeCompressionState():
|
sl@0
|
74 |
iUnicodeWords(0),
|
sl@0
|
75 |
iMaxUnicodeWords(0),
|
sl@0
|
76 |
iCompressedBytes(0),
|
sl@0
|
77 |
iMaxCompressedBytes(0)
|
sl@0
|
78 |
{
|
sl@0
|
79 |
Reset();
|
sl@0
|
80 |
}
|
sl@0
|
81 |
|
sl@0
|
82 |
void TUnicodeCompressionState::Reset()
|
sl@0
|
83 |
{
|
sl@0
|
84 |
iUnicodeMode = FALSE;
|
sl@0
|
85 |
iActiveWindowBase = 0x0080;
|
sl@0
|
86 |
for (int i = 0; i < EDynamicWindows; i++)
|
sl@0
|
87 |
iDynamicWindow[i] = iDynamicWindowDefault[i];
|
sl@0
|
88 |
}
|
sl@0
|
89 |
|
sl@0
|
90 |
|
sl@0
|
91 |
// Return the index of the static window that contains this code, if any, or -1 if there is none.
|
sl@0
|
92 |
TInt TUnicodeCompressionState::StaticWindowIndex(TUint16 aCode)
|
sl@0
|
93 |
{
|
sl@0
|
94 |
for (TInt i = 0; i < EStaticWindows; i++)
|
sl@0
|
95 |
if (aCode >= iStaticWindow[i] && aCode < iStaticWindow[i] + 128)
|
sl@0
|
96 |
return i;
|
sl@0
|
97 |
return -1;
|
sl@0
|
98 |
}
|
sl@0
|
99 |
|
sl@0
|
100 |
/*
|
sl@0
|
101 |
If aCode can be accommodated in one of the legal dynamic windows, return the index of that window
|
sl@0
|
102 |
in the offset table. If not return KErrNotFound.
|
sl@0
|
103 |
*/
|
sl@0
|
104 |
TInt TUnicodeCompressionState::DynamicWindowOffsetIndex(TUint16 aCode)
|
sl@0
|
105 |
{
|
sl@0
|
106 |
if (aCode < 0x0080)
|
sl@0
|
107 |
return KErrNotFound;
|
sl@0
|
108 |
if (aCode >= 0x3400 && aCode <= 0xDFFF)
|
sl@0
|
109 |
return KErrNotFound;
|
sl@0
|
110 |
|
sl@0
|
111 |
/*
|
sl@0
|
112 |
Prefer sections that cross half-block boundaries. These are better adapted to actual text.
|
sl@0
|
113 |
They are represented by offset indices 0xf9..0xff.
|
sl@0
|
114 |
*/
|
sl@0
|
115 |
for (int i = 0; i < ESpecialBases; i++)
|
sl@0
|
116 |
if (aCode >= iSpecialBase[i] && aCode < iSpecialBase[i] + 128)
|
sl@0
|
117 |
return 0xF9 + i;
|
sl@0
|
118 |
|
sl@0
|
119 |
/*
|
sl@0
|
120 |
Offset indices 0x01..0x67 represent half blocks from 0x0080 to 0x3380 and
|
sl@0
|
121 |
0x68..0xA7 represent half blocks from 0xE000 to 0xFF80.
|
sl@0
|
122 |
*/
|
sl@0
|
123 |
if (aCode >= 0xE000)
|
sl@0
|
124 |
aCode -= 0xAC00;
|
sl@0
|
125 |
return aCode / 0x80;
|
sl@0
|
126 |
}
|
sl@0
|
127 |
|
sl@0
|
128 |
// Return the base of the window represented by offset index <n>. Return 0 if the offset index is illegal.
|
sl@0
|
129 |
TUint32 TUnicodeCompressionState::DynamicWindowBase(TInt aOffsetIndex)
|
sl@0
|
130 |
{
|
sl@0
|
131 |
if (aOffsetIndex >= 0xF9 && aOffsetIndex <= 0xFF)
|
sl@0
|
132 |
{
|
sl@0
|
133 |
/*
|
sl@0
|
134 |
WARNING: don't optimise the following two lines by replacing them with
|
sl@0
|
135 |
'return iSpecialBase[aOffsetIndex - 0xF9];'. To do so would re-introduce an error
|
sl@0
|
136 |
in ARM builds caused by optimisation and consequent erroneous fixing up
|
sl@0
|
137 |
of the array base: see defect EDNGASR-4AGJQX in ER5U defects.
|
sl@0
|
138 |
*/
|
sl@0
|
139 |
int special_base_index = aOffsetIndex - 0xF9;
|
sl@0
|
140 |
return iSpecialBase[special_base_index];
|
sl@0
|
141 |
}
|
sl@0
|
142 |
if (aOffsetIndex >= 0x01 && aOffsetIndex <= 0x67)
|
sl@0
|
143 |
return aOffsetIndex * 0x80;
|
sl@0
|
144 |
if (aOffsetIndex >= 0x68 && aOffsetIndex <= 0xA7)
|
sl@0
|
145 |
return aOffsetIndex * 0x80 + 0xAC00;
|
sl@0
|
146 |
return 0;
|
sl@0
|
147 |
}
|
sl@0
|
148 |
|
sl@0
|
149 |
TBool TUnicodeCompressionState::EncodeAsIs(TUint16 aCode)
|
sl@0
|
150 |
{
|
sl@0
|
151 |
return aCode == 0x0000 || aCode == 0x0009 || aCode == 0x000A || aCode == 0x000D ||
|
sl@0
|
152 |
(aCode >= 0x0020 && aCode <= 0x007F);
|
sl@0
|
153 |
}
|
sl@0
|
154 |
|
sl@0
|
155 |
#pragma BullseyeCoverage off
|
sl@0
|
156 |
|
sl@0
|
157 |
void TUnicodeCompressionState::Panic(TPanic aPanic)
|
sl@0
|
158 |
{
|
sl@0
|
159 |
User::Panic(_L("ucmp"),aPanic);
|
sl@0
|
160 |
}
|
sl@0
|
161 |
|
sl@0
|
162 |
#pragma BullseyeCoverage on
|
sl@0
|
163 |
|
sl@0
|
164 |
EXPORT_C TUnicodeCompressor::TUnicodeCompressor():
|
sl@0
|
165 |
iInputBufferStart(0),
|
sl@0
|
166 |
iInputBufferSize(0),
|
sl@0
|
167 |
iOutputBufferStart(0),
|
sl@0
|
168 |
iOutputBufferSize(0),
|
sl@0
|
169 |
iDynamicWindowIndex(0),
|
sl@0
|
170 |
iOutputStream(NULL),
|
sl@0
|
171 |
iOutputPointer(NULL),
|
sl@0
|
172 |
iInput(NULL)
|
sl@0
|
173 |
{
|
sl@0
|
174 |
}
|
sl@0
|
175 |
|
sl@0
|
176 |
EXPORT_C void TUnicodeCompressor::CompressL(RWriteStream& aOutput,MUnicodeSource& aInput,
|
sl@0
|
177 |
TInt aMaxOutputBytes,TInt aMaxInputWords,
|
sl@0
|
178 |
TInt* aOutputBytes,TInt* aInputWords)
|
sl@0
|
179 |
{
|
sl@0
|
180 |
DoCompressL(&aOutput,NULL,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords);
|
sl@0
|
181 |
}
|
sl@0
|
182 |
|
sl@0
|
183 |
EXPORT_C void TUnicodeCompressor::CompressL(TUint8* aOutput,MUnicodeSource& aInput,
|
sl@0
|
184 |
TInt aMaxOutputBytes,TInt aMaxInputWords,
|
sl@0
|
185 |
TInt* aOutputBytes,TInt* aInputWords)
|
sl@0
|
186 |
{
|
sl@0
|
187 |
DoCompressL(NULL,aOutput,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords);
|
sl@0
|
188 |
}
|
sl@0
|
189 |
|
sl@0
|
190 |
EXPORT_C TInt TUnicodeCompressor::FlushL(RWriteStream& aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes)
|
sl@0
|
191 |
{
|
sl@0
|
192 |
DoCompressL(&aOutput,NULL,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL);
|
sl@0
|
193 |
return iOutputBufferSize;
|
sl@0
|
194 |
}
|
sl@0
|
195 |
|
sl@0
|
196 |
EXPORT_C TInt TUnicodeCompressor::FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes)
|
sl@0
|
197 |
{
|
sl@0
|
198 |
DoCompressL(NULL,aOutput,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL);
|
sl@0
|
199 |
return iOutputBufferSize;
|
sl@0
|
200 |
}
|
sl@0
|
201 |
|
sl@0
|
202 |
EXPORT_C TInt TUnicodeCompressor::CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords)
|
sl@0
|
203 |
{
|
sl@0
|
204 |
TInt bytes;
|
sl@0
|
205 |
TUnicodeCompressor c;
|
sl@0
|
206 |
c.DoCompressL(NULL,NULL,&aInput,KMaxTInt,aInputWords,&bytes,NULL);
|
sl@0
|
207 |
return bytes;
|
sl@0
|
208 |
}
|
sl@0
|
209 |
|
sl@0
|
210 |
// Compress until input or output is exhausted or an exception occurs.
|
sl@0
|
211 |
void TUnicodeCompressor::DoCompressL(RWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput,
|
sl@0
|
212 |
TInt aMaxOutputBytes,TInt aMaxInputWords,
|
sl@0
|
213 |
TInt* aOutputBytes,TInt* aInputWords)
|
sl@0
|
214 |
{
|
sl@0
|
215 |
iOutputStream = aOutputStream;
|
sl@0
|
216 |
iOutputPointer = aOutputPointer;
|
sl@0
|
217 |
iInput = aInput;
|
sl@0
|
218 |
iMaxCompressedBytes = aMaxOutputBytes;
|
sl@0
|
219 |
iMaxUnicodeWords = aMaxInputWords;
|
sl@0
|
220 |
iCompressedBytes = iUnicodeWords = 0;
|
sl@0
|
221 |
FlushOutputBufferL();
|
sl@0
|
222 |
if (iInput)
|
sl@0
|
223 |
{
|
sl@0
|
224 |
while (iUnicodeWords < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes)
|
sl@0
|
225 |
{
|
sl@0
|
226 |
TUint16 x = iInput->ReadUnicodeValueL();
|
sl@0
|
227 |
TAction action(x);
|
sl@0
|
228 |
iInputBuffer[(iInputBufferStart + iInputBufferSize) % EMaxInputBufferSize] = action;
|
sl@0
|
229 |
iInputBufferSize++;
|
sl@0
|
230 |
iUnicodeWords++;
|
sl@0
|
231 |
if (iInputBufferSize == EMaxInputBufferSize)
|
sl@0
|
232 |
WriteRunL();
|
sl@0
|
233 |
}
|
sl@0
|
234 |
}
|
sl@0
|
235 |
FlushInputBufferL();
|
sl@0
|
236 |
if (aOutputBytes)
|
sl@0
|
237 |
*aOutputBytes = iCompressedBytes;
|
sl@0
|
238 |
if (aInputWords)
|
sl@0
|
239 |
*aInputWords = iUnicodeWords;
|
sl@0
|
240 |
}
|
sl@0
|
241 |
|
sl@0
|
242 |
TUnicodeCompressor::TAction::TAction(TUint16 aCode):
|
sl@0
|
243 |
iCode(aCode)
|
sl@0
|
244 |
{
|
sl@0
|
245 |
if (TUnicodeCompressionState::EncodeAsIs(aCode))
|
sl@0
|
246 |
iTreatment = EPlainASCII;
|
sl@0
|
247 |
else
|
sl@0
|
248 |
{
|
sl@0
|
249 |
iTreatment = TUnicodeCompressionState::DynamicWindowOffsetIndex(aCode);
|
sl@0
|
250 |
if (iTreatment == -1)
|
sl@0
|
251 |
{
|
sl@0
|
252 |
iTreatment = TUnicodeCompressionState::StaticWindowIndex(aCode);
|
sl@0
|
253 |
if (iTreatment == -1)
|
sl@0
|
254 |
iTreatment = EPlainUnicode;
|
sl@0
|
255 |
else
|
sl@0
|
256 |
iTreatment += EFirstStatic;
|
sl@0
|
257 |
}
|
sl@0
|
258 |
}
|
sl@0
|
259 |
}
|
sl@0
|
260 |
|
sl@0
|
261 |
void TUnicodeCompressor::WriteCharacterFromBuffer()
|
sl@0
|
262 |
{
|
sl@0
|
263 |
const TAction& action = iInputBuffer[iInputBufferStart];
|
sl@0
|
264 |
iInputBufferSize--;
|
sl@0
|
265 |
iInputBufferStart = (iInputBufferStart + 1) % EMaxInputBufferSize;
|
sl@0
|
266 |
WriteCharacter(action);
|
sl@0
|
267 |
}
|
sl@0
|
268 |
|
sl@0
|
269 |
void TUnicodeCompressor::FlushInputBufferL()
|
sl@0
|
270 |
{
|
sl@0
|
271 |
while (iInputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes)
|
sl@0
|
272 |
WriteRunL();
|
sl@0
|
273 |
}
|
sl@0
|
274 |
|
sl@0
|
275 |
void TUnicodeCompressor::WriteRunL()
|
sl@0
|
276 |
{
|
sl@0
|
277 |
// Write out any leading characters that can be passed through.
|
sl@0
|
278 |
if (!iUnicodeMode)
|
sl@0
|
279 |
while (iInputBufferSize > 0)
|
sl@0
|
280 |
{
|
sl@0
|
281 |
const TAction& action = iInputBuffer[iInputBufferStart];
|
sl@0
|
282 |
if (action.iTreatment == TAction::EPlainASCII ||
|
sl@0
|
283 |
(action.iCode >= iActiveWindowBase && action.iCode < iActiveWindowBase + 128))
|
sl@0
|
284 |
WriteCharacterFromBuffer();
|
sl@0
|
285 |
else
|
sl@0
|
286 |
break;
|
sl@0
|
287 |
}
|
sl@0
|
288 |
|
sl@0
|
289 |
// Write a run of characters that cannot be passed through.
|
sl@0
|
290 |
int i;
|
sl@0
|
291 |
if (iInputBufferSize > 0)
|
sl@0
|
292 |
{
|
sl@0
|
293 |
/*
|
sl@0
|
294 |
Find a run of characters with the same treatment and select that treatment
|
sl@0
|
295 |
if the run has more than one character.
|
sl@0
|
296 |
*/
|
sl@0
|
297 |
int treatment = iInputBuffer[iInputBufferStart].iTreatment;
|
sl@0
|
298 |
int next_treatment = treatment;
|
sl@0
|
299 |
int run_size = 1;
|
sl@0
|
300 |
for (i = 1; i < iInputBufferSize; i++)
|
sl@0
|
301 |
{
|
sl@0
|
302 |
int index = (iInputBufferStart + i) % EMaxInputBufferSize;
|
sl@0
|
303 |
next_treatment = iInputBuffer[index].iTreatment;
|
sl@0
|
304 |
if (next_treatment != treatment)
|
sl@0
|
305 |
break;
|
sl@0
|
306 |
run_size++;
|
sl@0
|
307 |
}
|
sl@0
|
308 |
if (run_size > 1)
|
sl@0
|
309 |
SelectTreatment(treatment);
|
sl@0
|
310 |
for (i = 0; i < run_size; i++)
|
sl@0
|
311 |
WriteCharacterFromBuffer();
|
sl@0
|
312 |
}
|
sl@0
|
313 |
|
sl@0
|
314 |
FlushOutputBufferL();
|
sl@0
|
315 |
}
|
sl@0
|
316 |
|
sl@0
|
317 |
void TUnicodeCompressor::FlushOutputBufferL()
|
sl@0
|
318 |
{
|
sl@0
|
319 |
while (iOutputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes)
|
sl@0
|
320 |
{
|
sl@0
|
321 |
TUint8 byte = iOutputBuffer[iOutputBufferStart];
|
sl@0
|
322 |
if (iOutputPointer)
|
sl@0
|
323 |
*iOutputPointer++ = byte;
|
sl@0
|
324 |
else if (iOutputStream)
|
sl@0
|
325 |
iOutputStream->WriteUint8L(byte);
|
sl@0
|
326 |
iCompressedBytes++;
|
sl@0
|
327 |
iOutputBufferSize--;
|
sl@0
|
328 |
iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize;
|
sl@0
|
329 |
}
|
sl@0
|
330 |
}
|
sl@0
|
331 |
|
sl@0
|
332 |
void TUnicodeCompressor::SelectTreatment(TInt aTreatment)
|
sl@0
|
333 |
{
|
sl@0
|
334 |
if (aTreatment == TAction::EPlainUnicode)
|
sl@0
|
335 |
{
|
sl@0
|
336 |
// Switch to Unicode mode if not there already.
|
sl@0
|
337 |
if (!iUnicodeMode)
|
sl@0
|
338 |
{
|
sl@0
|
339 |
WriteByte(SCU);
|
sl@0
|
340 |
iUnicodeMode = TRUE;
|
sl@0
|
341 |
}
|
sl@0
|
342 |
return;
|
sl@0
|
343 |
}
|
sl@0
|
344 |
|
sl@0
|
345 |
if (aTreatment == TAction::EPlainASCII)
|
sl@0
|
346 |
{
|
sl@0
|
347 |
// Switch to single-byte mode, using the current dynamic window, if not there already.
|
sl@0
|
348 |
if (iUnicodeMode)
|
sl@0
|
349 |
{
|
sl@0
|
350 |
WriteByte(UC0 + iDynamicWindowIndex);
|
sl@0
|
351 |
iUnicodeMode = FALSE;
|
sl@0
|
352 |
}
|
sl@0
|
353 |
return;
|
sl@0
|
354 |
}
|
sl@0
|
355 |
|
sl@0
|
356 |
if (aTreatment >= TAction::EFirstDynamic && aTreatment <= TAction::ELastDynamic)
|
sl@0
|
357 |
{
|
sl@0
|
358 |
TUint32 base = DynamicWindowBase(aTreatment);
|
sl@0
|
359 |
|
sl@0
|
360 |
// Switch to the appropriate dynamic window if it is available; if not, redefine and select dynamic window 4.
|
sl@0
|
361 |
for (int i = 0; i < EDynamicWindows; i++)
|
sl@0
|
362 |
if (base == iDynamicWindow[i])
|
sl@0
|
363 |
{
|
sl@0
|
364 |
if (iUnicodeMode)
|
sl@0
|
365 |
WriteByte(UC0 + i);
|
sl@0
|
366 |
else if (i != iDynamicWindowIndex)
|
sl@0
|
367 |
WriteByte(SC0 + i);
|
sl@0
|
368 |
iUnicodeMode = FALSE;
|
sl@0
|
369 |
iDynamicWindowIndex = i;
|
sl@0
|
370 |
iActiveWindowBase = base;
|
sl@0
|
371 |
return;
|
sl@0
|
372 |
}
|
sl@0
|
373 |
if (iUnicodeMode)
|
sl@0
|
374 |
WriteByte(UD0 + 4);
|
sl@0
|
375 |
else
|
sl@0
|
376 |
WriteByte(SD0 + 4);
|
sl@0
|
377 |
iDynamicWindowIndex = 4;
|
sl@0
|
378 |
iUnicodeMode = FALSE;
|
sl@0
|
379 |
WriteByte(aTreatment);
|
sl@0
|
380 |
iDynamicWindow[4] = base;
|
sl@0
|
381 |
iActiveWindowBase = base;
|
sl@0
|
382 |
return;
|
sl@0
|
383 |
}
|
sl@0
|
384 |
}
|
sl@0
|
385 |
|
sl@0
|
386 |
// Write a character without changing mode or window.
|
sl@0
|
387 |
void TUnicodeCompressor::WriteCharacter(const TAction& aAction)
|
sl@0
|
388 |
{
|
sl@0
|
389 |
if (iUnicodeMode)
|
sl@0
|
390 |
WriteUCharacter(aAction.iCode);
|
sl@0
|
391 |
else
|
sl@0
|
392 |
WriteSCharacter(aAction);
|
sl@0
|
393 |
}
|
sl@0
|
394 |
|
sl@0
|
395 |
void TUnicodeCompressor::WriteUCharacter(TUint16 aCode)
|
sl@0
|
396 |
{
|
sl@0
|
397 |
// Emit the 'quote Unicode' tag if the character would conflict with a tag.
|
sl@0
|
398 |
if (aCode >= 0xE000 && aCode <= 0xF2FF)
|
sl@0
|
399 |
WriteByte(UQU);
|
sl@0
|
400 |
|
sl@0
|
401 |
// Write the Unicode value big-end first.
|
sl@0
|
402 |
WriteByte((aCode >> 8) & 0xFF);
|
sl@0
|
403 |
WriteByte(aCode & 0xFF);
|
sl@0
|
404 |
}
|
sl@0
|
405 |
|
sl@0
|
406 |
void TUnicodeCompressor::WriteByte(TUint aByte)
|
sl@0
|
407 |
{
|
sl@0
|
408 |
if (iOutputBufferSize >= EMaxOutputBufferSize)
|
sl@0
|
409 |
Panic(EOutputBufferOverflow); //Panic here is ok as this is a programming error
|
sl@0
|
410 |
iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = (TUint8)aByte;
|
sl@0
|
411 |
iOutputBufferSize++;
|
sl@0
|
412 |
}
|
sl@0
|
413 |
|
sl@0
|
414 |
void TUnicodeCompressor::WriteSCharacter(const TAction& aAction)
|
sl@0
|
415 |
{
|
sl@0
|
416 |
// Characters in the range 0x0020..0x007F, plus nul, tab, cr, and lf, can be emitted as their low bytes.
|
sl@0
|
417 |
if (aAction.iTreatment == TAction::EPlainASCII)
|
sl@0
|
418 |
{
|
sl@0
|
419 |
WriteByte(aAction.iCode);
|
sl@0
|
420 |
return;
|
sl@0
|
421 |
}
|
sl@0
|
422 |
|
sl@0
|
423 |
// Characters in a static window can be written using SQ<n> plus a byte in the range 0x00-0x7F
|
sl@0
|
424 |
if (aAction.iTreatment >= TAction::EFirstStatic && aAction.iTreatment <= TAction::ELastStatic)
|
sl@0
|
425 |
{
|
sl@0
|
426 |
int window = aAction.iTreatment - TAction::EFirstStatic;
|
sl@0
|
427 |
WriteByte(SQ0 + window);
|
sl@0
|
428 |
WriteByte(aAction.iCode);
|
sl@0
|
429 |
return;
|
sl@0
|
430 |
}
|
sl@0
|
431 |
|
sl@0
|
432 |
// Characters in the current dynamic window can be written as a byte in the range 0x80-0xFF.
|
sl@0
|
433 |
if (aAction.iCode >= iActiveWindowBase && aAction.iCode < iActiveWindowBase + 128)
|
sl@0
|
434 |
{
|
sl@0
|
435 |
WriteByte(aAction.iCode - iActiveWindowBase + 0x80);
|
sl@0
|
436 |
return;
|
sl@0
|
437 |
}
|
sl@0
|
438 |
|
sl@0
|
439 |
// Characters in another dynamic window can be written using SQ<n> plus a byte in the range 0x80-0xFF
|
sl@0
|
440 |
int i;
|
sl@0
|
441 |
for (i = 0; i < EDynamicWindows; i++)
|
sl@0
|
442 |
if (aAction.iCode >= iDynamicWindow[i] && aAction.iCode < iDynamicWindow[i] + 128)
|
sl@0
|
443 |
{
|
sl@0
|
444 |
WriteByte(SQ0 + i);
|
sl@0
|
445 |
WriteByte(aAction.iCode - iDynamicWindow[i] + 0x80);
|
sl@0
|
446 |
return;
|
sl@0
|
447 |
}
|
sl@0
|
448 |
|
sl@0
|
449 |
// Other characters can be quoted.
|
sl@0
|
450 |
WriteByte(SQU);
|
sl@0
|
451 |
WriteByte((aAction.iCode >> 8) & 0xFF);
|
sl@0
|
452 |
WriteByte(aAction.iCode & 0xFF);
|
sl@0
|
453 |
return;
|
sl@0
|
454 |
}
|
sl@0
|
455 |
|
sl@0
|
456 |
EXPORT_C TUnicodeExpander::TUnicodeExpander():
|
sl@0
|
457 |
iInputBufferStart(0),
|
sl@0
|
458 |
iInputBufferSize(0),
|
sl@0
|
459 |
iOutputBufferStart(0),
|
sl@0
|
460 |
iOutputBufferSize(0),
|
sl@0
|
461 |
iOutput(NULL),
|
sl@0
|
462 |
iInputStream(NULL),
|
sl@0
|
463 |
iInputPointer(NULL)
|
sl@0
|
464 |
{
|
sl@0
|
465 |
}
|
sl@0
|
466 |
|
sl@0
|
467 |
EXPORT_C void TUnicodeExpander::ExpandL(MUnicodeSink& aOutput,RReadStream& aInput,
|
sl@0
|
468 |
TInt aMaxOutputWords,TInt aMaxInputBytes,
|
sl@0
|
469 |
TInt* aOutputWords,TInt* aInputBytes)
|
sl@0
|
470 |
{
|
sl@0
|
471 |
DoExpandL(&aOutput,&aInput,NULL,aMaxOutputWords,aMaxInputBytes,aOutputWords,aInputBytes);
|
sl@0
|
472 |
}
|
sl@0
|
473 |
|
sl@0
|
474 |
EXPORT_C void TUnicodeExpander::ExpandL(MUnicodeSink& aOutput,const TUint8* aInput,
|
sl@0
|
475 |
TInt aMaxOutputWords,TInt aMaxInputBytes,
|
sl@0
|
476 |
TInt* aOutputWords,TInt* aInputBytes)
|
sl@0
|
477 |
{
|
sl@0
|
478 |
DoExpandL(&aOutput,NULL,aInput,aMaxOutputWords,aMaxInputBytes,aOutputWords,aInputBytes);
|
sl@0
|
479 |
}
|
sl@0
|
480 |
|
sl@0
|
481 |
EXPORT_C TInt TUnicodeExpander::FlushL(MUnicodeSink& aOutput,TInt aMaxOutputWords,TInt& aOutputWords)
|
sl@0
|
482 |
{
|
sl@0
|
483 |
DoExpandL(&aOutput,NULL,NULL,aMaxOutputWords,0,&aOutputWords,NULL);
|
sl@0
|
484 |
return iOutputBufferSize;
|
sl@0
|
485 |
}
|
sl@0
|
486 |
|
sl@0
|
487 |
EXPORT_C TInt TUnicodeExpander::ExpandedSizeL(RReadStream& aInput,TInt aInputBytes)
|
sl@0
|
488 |
{
|
sl@0
|
489 |
TInt words;
|
sl@0
|
490 |
TUnicodeExpander e;
|
sl@0
|
491 |
e.DoExpandL(NULL,&aInput,NULL,KMaxTInt,aInputBytes,&words,NULL);
|
sl@0
|
492 |
return words;
|
sl@0
|
493 |
}
|
sl@0
|
494 |
|
sl@0
|
495 |
EXPORT_C TInt TUnicodeExpander::ExpandedSizeL(const TUint8* aInput,TInt aInputBytes)
|
sl@0
|
496 |
{
|
sl@0
|
497 |
TInt words;
|
sl@0
|
498 |
TUnicodeExpander e;
|
sl@0
|
499 |
e.DoExpandL(NULL,NULL,aInput,KMaxTInt,aInputBytes,&words,NULL);
|
sl@0
|
500 |
return words;
|
sl@0
|
501 |
}
|
sl@0
|
502 |
|
sl@0
|
503 |
// Expand until input or output is exhausted or an exception occurs.
|
sl@0
|
504 |
void TUnicodeExpander::DoExpandL(MUnicodeSink* aOutput,RReadStream* aInputStream,const TUint8* aInputPointer,
|
sl@0
|
505 |
TInt aMaxOutputWords,TInt aMaxInputBytes,
|
sl@0
|
506 |
TInt* aOutputWords,TInt* aInputBytes)
|
sl@0
|
507 |
{
|
sl@0
|
508 |
iOutput = aOutput;
|
sl@0
|
509 |
iInputStream = aInputStream;
|
sl@0
|
510 |
iInputPointer = aInputPointer;
|
sl@0
|
511 |
iMaxUnicodeWords = aMaxOutputWords;
|
sl@0
|
512 |
iMaxCompressedBytes = aMaxInputBytes;
|
sl@0
|
513 |
iUnicodeWords = iCompressedBytes = 0;
|
sl@0
|
514 |
iInputBufferStart = 0;
|
sl@0
|
515 |
FlushOutputBufferL();
|
sl@0
|
516 |
if (iInputPointer || iInputStream)
|
sl@0
|
517 |
{
|
sl@0
|
518 |
while (iUnicodeWords + iOutputBufferSize < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes)
|
sl@0
|
519 |
HandleByteL();
|
sl@0
|
520 |
}
|
sl@0
|
521 |
if (aOutputWords)
|
sl@0
|
522 |
*aOutputWords = iUnicodeWords;
|
sl@0
|
523 |
if (aInputBytes)
|
sl@0
|
524 |
*aInputBytes = iCompressedBytes;
|
sl@0
|
525 |
}
|
sl@0
|
526 |
|
sl@0
|
527 |
void TUnicodeExpander::HandleByteL()
|
sl@0
|
528 |
{
|
sl@0
|
529 |
TUint8 byte;
|
sl@0
|
530 |
TBool handled = FALSE;
|
sl@0
|
531 |
if (ReadByteL(byte))
|
sl@0
|
532 |
{
|
sl@0
|
533 |
if (iUnicodeMode)
|
sl@0
|
534 |
handled = HandleUByteL(byte);
|
sl@0
|
535 |
else
|
sl@0
|
536 |
handled = HandleSByteL(byte);
|
sl@0
|
537 |
}
|
sl@0
|
538 |
iInputBufferStart = 0;
|
sl@0
|
539 |
if (handled)
|
sl@0
|
540 |
iInputBufferSize = 0;
|
sl@0
|
541 |
FlushOutputBufferL();
|
sl@0
|
542 |
}
|
sl@0
|
543 |
|
sl@0
|
544 |
void TUnicodeExpander::FlushOutputBufferL()
|
sl@0
|
545 |
{
|
sl@0
|
546 |
while (iOutputBufferSize > 0 && iUnicodeWords < iMaxUnicodeWords)
|
sl@0
|
547 |
{
|
sl@0
|
548 |
if (iOutput)
|
sl@0
|
549 |
iOutput->WriteUnicodeValueL(iOutputBuffer[iOutputBufferStart]);
|
sl@0
|
550 |
iUnicodeWords++;
|
sl@0
|
551 |
iOutputBufferSize--;
|
sl@0
|
552 |
iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize;
|
sl@0
|
553 |
}
|
sl@0
|
554 |
}
|
sl@0
|
555 |
|
sl@0
|
556 |
TBool TUnicodeExpander::HandleSByteL(TUint8 aByte)
|
sl@0
|
557 |
{
|
sl@0
|
558 |
// 'Pass-through' codes.
|
sl@0
|
559 |
if (TUnicodeCompressionState::EncodeAsIs(aByte))
|
sl@0
|
560 |
{
|
sl@0
|
561 |
WriteChar(aByte);
|
sl@0
|
562 |
return TRUE;
|
sl@0
|
563 |
}
|
sl@0
|
564 |
|
sl@0
|
565 |
// Codes 0x80-0xFF select a character from the active window.
|
sl@0
|
566 |
if (aByte >= 0x80)
|
sl@0
|
567 |
{
|
sl@0
|
568 |
WriteChar32(iActiveWindowBase + aByte - 0x80);
|
sl@0
|
569 |
return TRUE;
|
sl@0
|
570 |
}
|
sl@0
|
571 |
|
sl@0
|
572 |
// SQU: quote a Unicode character.
|
sl@0
|
573 |
if (aByte == SQU)
|
sl@0
|
574 |
return QuoteUnicodeL();
|
sl@0
|
575 |
|
sl@0
|
576 |
// SCU: switch to Unicode mode.
|
sl@0
|
577 |
if (aByte == SCU)
|
sl@0
|
578 |
{
|
sl@0
|
579 |
iUnicodeMode = TRUE;
|
sl@0
|
580 |
return TRUE;
|
sl@0
|
581 |
}
|
sl@0
|
582 |
|
sl@0
|
583 |
// SQn: quote from window n.
|
sl@0
|
584 |
if (aByte >= SQ0 && aByte <= SQ0 + 7)
|
sl@0
|
585 |
{
|
sl@0
|
586 |
int window = aByte - SQ0;
|
sl@0
|
587 |
TUint8 byte;
|
sl@0
|
588 |
if (ReadByteL(byte))
|
sl@0
|
589 |
{
|
sl@0
|
590 |
TUint32 c = byte;
|
sl@0
|
591 |
if (c <= 0x7F)
|
sl@0
|
592 |
c += iStaticWindow[window];
|
sl@0
|
593 |
else
|
sl@0
|
594 |
c += iDynamicWindow[window] - 0x80;
|
sl@0
|
595 |
WriteChar32(c);
|
sl@0
|
596 |
return TRUE;
|
sl@0
|
597 |
}
|
sl@0
|
598 |
else
|
sl@0
|
599 |
return FALSE;
|
sl@0
|
600 |
}
|
sl@0
|
601 |
|
sl@0
|
602 |
// SCn: switch to dynamic window n.
|
sl@0
|
603 |
if (aByte >= SC0 && aByte <= SC0 + 7)
|
sl@0
|
604 |
{
|
sl@0
|
605 |
iActiveWindowBase = iDynamicWindow[aByte - SC0];
|
sl@0
|
606 |
return TRUE;
|
sl@0
|
607 |
}
|
sl@0
|
608 |
|
sl@0
|
609 |
// SDn: define dynamic window n and switch to it.
|
sl@0
|
610 |
if (aByte >= SD0 && aByte <= SD0 + 7)
|
sl@0
|
611 |
return DefineWindowL(aByte - SD0);
|
sl@0
|
612 |
|
sl@0
|
613 |
// SDX: define window in the expansion space.
|
sl@0
|
614 |
if (aByte == SDX)
|
sl@0
|
615 |
return DefineExpansionWindowL();
|
sl@0
|
616 |
|
sl@0
|
617 |
User::Leave(KErrCorrupt);
|
sl@0
|
618 |
return FALSE;
|
sl@0
|
619 |
}
|
sl@0
|
620 |
|
sl@0
|
621 |
TBool TUnicodeExpander::HandleUByteL(TUint8 aByte)
|
sl@0
|
622 |
{
|
sl@0
|
623 |
// Plain Unicode; get the low byte and emit the Unicode value.
|
sl@0
|
624 |
if (aByte <= 0xDF || aByte >= 0xF3)
|
sl@0
|
625 |
{
|
sl@0
|
626 |
TUint8 lo;
|
sl@0
|
627 |
if (ReadByteL(lo))
|
sl@0
|
628 |
{
|
sl@0
|
629 |
TUint16 c = (TUint16)((aByte << 8) | lo);
|
sl@0
|
630 |
WriteChar(c);
|
sl@0
|
631 |
return TRUE;
|
sl@0
|
632 |
}
|
sl@0
|
633 |
else
|
sl@0
|
634 |
return FALSE;
|
sl@0
|
635 |
}
|
sl@0
|
636 |
|
sl@0
|
637 |
// Quote a Unicode character that would otherwise conflict with a tag.
|
sl@0
|
638 |
if (aByte == UQU)
|
sl@0
|
639 |
return QuoteUnicodeL();
|
sl@0
|
640 |
|
sl@0
|
641 |
// UCn: change to single byte mode and select window n.
|
sl@0
|
642 |
if (aByte >= UC0 && aByte <= UC0 + 7)
|
sl@0
|
643 |
{
|
sl@0
|
644 |
iUnicodeMode = FALSE;
|
sl@0
|
645 |
iActiveWindowBase = iDynamicWindow[aByte - UC0];
|
sl@0
|
646 |
return TRUE;
|
sl@0
|
647 |
}
|
sl@0
|
648 |
|
sl@0
|
649 |
// UDn: define dynamic window n and switch to it.
|
sl@0
|
650 |
if (aByte >= UD0 && aByte <= UD0 + 7)
|
sl@0
|
651 |
return DefineWindowL(aByte - UD0);
|
sl@0
|
652 |
|
sl@0
|
653 |
// UDX: define window in the expansion space.
|
sl@0
|
654 |
if (aByte == UDX)
|
sl@0
|
655 |
return DefineExpansionWindowL();
|
sl@0
|
656 |
|
sl@0
|
657 |
User::Leave(KErrCorrupt);
|
sl@0
|
658 |
return FALSE;
|
sl@0
|
659 |
}
|
sl@0
|
660 |
|
sl@0
|
661 |
TBool TUnicodeExpander::QuoteUnicodeL()
|
sl@0
|
662 |
{
|
sl@0
|
663 |
TUint8 hi, lo;
|
sl@0
|
664 |
if (ReadByteL(hi) && ReadByteL(lo))
|
sl@0
|
665 |
{
|
sl@0
|
666 |
TUint16 c = (TUint16)((hi << 8) | lo);
|
sl@0
|
667 |
WriteChar(c);
|
sl@0
|
668 |
return TRUE;
|
sl@0
|
669 |
}
|
sl@0
|
670 |
else
|
sl@0
|
671 |
return FALSE;
|
sl@0
|
672 |
}
|
sl@0
|
673 |
|
sl@0
|
674 |
TBool TUnicodeExpander::DefineWindowL(TInt aIndex)
|
sl@0
|
675 |
{
|
sl@0
|
676 |
TUint8 window;
|
sl@0
|
677 |
if (ReadByteL(window))
|
sl@0
|
678 |
{
|
sl@0
|
679 |
iUnicodeMode = FALSE;
|
sl@0
|
680 |
iActiveWindowBase = DynamicWindowBase(window);
|
sl@0
|
681 |
iDynamicWindow[aIndex] = iActiveWindowBase;
|
sl@0
|
682 |
return TRUE;
|
sl@0
|
683 |
}
|
sl@0
|
684 |
else
|
sl@0
|
685 |
return FALSE;
|
sl@0
|
686 |
}
|
sl@0
|
687 |
|
sl@0
|
688 |
TBool TUnicodeExpander::DefineExpansionWindowL()
|
sl@0
|
689 |
{
|
sl@0
|
690 |
TUint8 hi, lo;
|
sl@0
|
691 |
if (ReadByteL(hi) && ReadByteL(lo))
|
sl@0
|
692 |
{
|
sl@0
|
693 |
iUnicodeMode = FALSE;
|
sl@0
|
694 |
iActiveWindowBase = 0x10000 + (0x80 * ((hi & 0x1F) * 0x100 + lo));
|
sl@0
|
695 |
iDynamicWindow[hi >> 5] = iActiveWindowBase;
|
sl@0
|
696 |
return TRUE;
|
sl@0
|
697 |
}
|
sl@0
|
698 |
else
|
sl@0
|
699 |
return FALSE;
|
sl@0
|
700 |
}
|
sl@0
|
701 |
|
sl@0
|
702 |
// Read either from the buffer (in the case of restarting after source finished in mid-operation) or from the source.
|
sl@0
|
703 |
TBool TUnicodeExpander::ReadByteL(TUint8& aByte)
|
sl@0
|
704 |
{
|
sl@0
|
705 |
if (iInputBufferStart < iInputBufferSize)
|
sl@0
|
706 |
{
|
sl@0
|
707 |
aByte = iInputBuffer[iInputBufferStart++];
|
sl@0
|
708 |
return TRUE;
|
sl@0
|
709 |
}
|
sl@0
|
710 |
else if (iCompressedBytes < iMaxCompressedBytes)
|
sl@0
|
711 |
{
|
sl@0
|
712 |
if (iInputPointer)
|
sl@0
|
713 |
aByte = *iInputPointer++;
|
sl@0
|
714 |
else
|
sl@0
|
715 |
aByte = iInputStream->ReadUint8L();
|
sl@0
|
716 |
iInputBuffer[iInputBufferStart++] = aByte;
|
sl@0
|
717 |
iInputBufferSize = iInputBufferStart;
|
sl@0
|
718 |
iCompressedBytes++;
|
sl@0
|
719 |
return TRUE;
|
sl@0
|
720 |
}
|
sl@0
|
721 |
else
|
sl@0
|
722 |
return FALSE;
|
sl@0
|
723 |
}
|
sl@0
|
724 |
|
sl@0
|
725 |
void TUnicodeExpander::WriteChar(TUint16 aChar)
|
sl@0
|
726 |
{
|
sl@0
|
727 |
if (iOutputBufferSize >= EMaxOutputBufferSize)
|
sl@0
|
728 |
Panic(EOutputBufferOverflow); //Panic here is ok since this is a programming error
|
sl@0
|
729 |
iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = aChar;
|
sl@0
|
730 |
iOutputBufferSize++;
|
sl@0
|
731 |
}
|
sl@0
|
732 |
|
sl@0
|
733 |
// Write a Unicode character; write using surrogates if in the range 0x10000..0x10FFFF.
|
sl@0
|
734 |
void TUnicodeExpander::WriteChar32(TUint aChar)
|
sl@0
|
735 |
{
|
sl@0
|
736 |
if (aChar <= 0xFFFF)
|
sl@0
|
737 |
WriteChar((TUint16)aChar);
|
sl@0
|
738 |
else if (aChar <= 0x10FFFF)
|
sl@0
|
739 |
{
|
sl@0
|
740 |
aChar -= 0x10000; // reduce to 20-bit value in the range 0x0..0xFFFFF
|
sl@0
|
741 |
WriteChar((TUint16)(0xD800 + (aChar >> 10))); // first high surrogate + high 10 bits
|
sl@0
|
742 |
WriteChar((TUint16)(0xDC00 + (aChar & 0x03FF))); // first low surrogate + low 10 bits
|
sl@0
|
743 |
}
|
sl@0
|
744 |
else
|
sl@0
|
745 |
//Panic to be kept here as impossible to test this case (nor the one before). Biggest value that can be passed is 0xFFFFF
|
sl@0
|
746 |
Panic(ENotUnicode);
|
sl@0
|
747 |
}
|
sl@0
|
748 |
|
sl@0
|
749 |
#endif // _UNICODE
|