1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/textrendering/texthandling/stext/TXTPLAIN.CPP Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,447 @@
1.4 +/*
1.5 +* Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description:
1.18 +* Implementation of the classes that import and export plain text.
1.19 +*
1.20 +*/
1.21 +
1.22 +
1.23 +#include "TXTSTD.H"
1.24 +#include "TXTPLAIN.H"
1.25 +#include "charconv.h"
1.26 +
1.27 +TPlainTextIOState:: TPlainTextIOState(const CPlainText::TImportExportParam& aParam,
1.28 + CPlainText::TImportExportResult& aResult,
1.29 + RWriteStream& aOutput,RReadStream& aInput):
1.30 + iParam(aParam),
1.31 + iResult(aResult),
1.32 + iOutput(aOutput),
1.33 + iInput(aInput),
1.34 + iConverter(NULL),
1.35 + iSwapInput(FALSE),
1.36 + iCheckByteOrder(FALSE)
1.37 + {
1.38 + aResult = CPlainText::TImportExportResult(); // zero output counters; aResult may be re-used.
1.39 + }
1.40 +
1.41 +TText TPlainTextIOState::ReadRawCharL()
1.42 + {
1.43 + TText c;
1.44 + if (iParam.iInputInternal)
1.45 + iInput.ReadL((TUint8*)&c,sizeof(TText));
1.46 + else
1.47 + c = iInput.ReadUint16L();
1.48 + if (iSwapInput)
1.49 + c = (TText)(((c << 8) & 0xFF00) | ((c >> 8) & 0xFF));
1.50 + if (iCheckByteOrder)
1.51 + {
1.52 + if (c == CEditableText::EReversedByteOrderMark)
1.53 + {
1.54 + c = CEditableText::EByteOrderMark;
1.55 + iSwapInput = !iSwapInput;
1.56 + }
1.57 + iCheckByteOrder = FALSE;
1.58 + }
1.59 + iResult.iInputChars++;
1.60 + return c;
1.61 + }
1.62 +
1.63 +void TPlainTextIOState::WriteRawCharL(TText aChar)
1.64 + {
1.65 + if (iResult.iOutputChars < iParam.iMaxOutputChars)
1.66 + {
1.67 + if (iParam.iOutputInternal)
1.68 + iOutput.WriteL((TUint8*)&aChar,sizeof(TText));
1.69 + else
1.70 + iOutput.WriteUint16L(aChar);
1.71 + iResult.iOutputChars++;
1.72 + }
1.73 + }
1.74 +
1.75 +CPlainTextConverter* CPlainTextConverter::NewLC()
1.76 + {
1.77 + CPlainTextConverter* c = new(ELeave) CPlainTextConverter;
1.78 + CleanupStack::PushL(c);
1.79 + c->iConversionBuffer = new(ELeave) TUint8[EConversionBufferSize];
1.80 + return c;
1.81 + }
1.82 +
1.83 +CPlainTextConverter::~CPlainTextConverter()
1.84 + {
1.85 + delete iConverter;
1.86 + delete [] iConversionBuffer;
1.87 + }
1.88 +
1.89 +/*
1.90 +Prepare to convert between Unicode and a foreign encoding.
1.91 +If aSample is non-null it can be used to guess the foreign encoding, but only if iParam.iGuessForeignEncoding is true.
1.92 +*/
1.93 +void CPlainTextConverter::PrepareToConvertL(TPlainTextIOState& aState,const TDesC8* aSample)
1.94 + {
1.95 + RFs rfs;
1.96 +
1.97 + iConverter = CCnvCharacterSetConverter::NewL();
1.98 + if (aState.iParam.iFileSession == NULL)
1.99 + {
1.100 + TInt error = rfs.Connect();
1.101 + User::LeaveIfError(error);
1.102 +
1.103 + CleanupClosePushL(rfs);
1.104 + }
1.105 + else
1.106 + rfs = *aState.iParam.iFileSession;
1.107 +
1.108 + TUint foreign_encoding = aState.iParam.iForeignEncoding;
1.109 +
1.110 + // Try to guess the foreign encoding.
1.111 + if (aSample && aState.iParam.iGuessForeignEncoding)
1.112 + {
1.113 + CArrayFix<CCnvCharacterSetConverter::SCharacterSet>* charsets =
1.114 + CCnvCharacterSetConverter::CreateArrayOfCharacterSetsAvailableLC(rfs);
1.115 + TInt confidence = 0;
1.116 + CCnvCharacterSetConverter::AutoDetectCharacterSetL(confidence,foreign_encoding,*charsets,*aSample);
1.117 + CleanupStack::PopAndDestroy(charsets);
1.118 + if (confidence < 50)
1.119 + User::Leave(KErrNotSupported);
1.120 + }
1.121 +
1.122 + if (iConverter->PrepareToConvertToOrFromL(foreign_encoding,rfs) != CCnvCharacterSetConverter::EAvailable)
1.123 + User::Leave(KErrNotSupported);
1.124 + aState.iResult.iForeignEncoding = foreign_encoding;
1.125 + if (aState.iParam.iFileSession == NULL)
1.126 + {
1.127 + CleanupStack::Pop(); // rfs
1.128 + rfs.Close();
1.129 + }
1.130 + }
1.131 +
1.132 +void TPlainTextWriter::TranslateL(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
1.133 + RWriteStream& aOutput,RReadStream& aInput)
1.134 + {
1.135 + TPlainTextWriter writer(aParam,aResult,aOutput,aInput);
1.136 + writer.TranslateHelperL();
1.137 + }
1.138 +
1.139 +TPlainTextWriter::TPlainTextWriter(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
1.140 + RWriteStream& aOutput,RReadStream& aInput):
1.141 + TPlainTextIOState(aParam,aResult,aOutput,aInput),
1.142 + iLineLength(0),
1.143 + iLineBuffer(NULL),
1.144 + iMaxLineBufferLength(0)
1.145 + {
1.146 + }
1.147 +
1.148 +void TPlainTextWriter::TranslateHelperL()
1.149 + {
1.150 + if (iParam.iForeignEncoding)
1.151 + {
1.152 + iConverter = CPlainTextConverter::NewLC();
1.153 + iConverter->PrepareToConvertL(*this,NULL);
1.154 + }
1.155 +
1.156 + if (iParam.iOrganisation == CPlainText::EOrganiseByLine)
1.157 + iMaxLineLength = iParam.iMaxLineLength;
1.158 + else
1.159 + iMaxLineLength = KMaxTInt; // when exporting by paragraph, the wrapping width has no effect
1.160 + if (iMaxLineLength <= 0)
1.161 + iMaxLineLength = KMaxTInt;
1.162 + iLineLength = 0;
1.163 + if (iMaxLineLength < KMaxTInt)
1.164 + iMaxLineBufferLength = iMaxLineLength;
1.165 + else if (iParam.iForeignEncoding)
1.166 + iMaxLineBufferLength = EDefaultLineBufferSize;
1.167 + if (iMaxLineBufferLength)
1.168 + iLineBuffer = new(ELeave) TText[iMaxLineBufferLength];
1.169 + else
1.170 + iLineBuffer = NULL;
1.171 + CleanupStack::PushL(iLineBuffer);
1.172 + TRAPD(error,TranslateToEofL());
1.173 + if (error == KErrEof)
1.174 + error = KErrNone;
1.175 + if (error == KErrNone)
1.176 + {
1.177 + FlushL();
1.178 + iOutput.CommitL();
1.179 + }
1.180 + CleanupStack::Pop(iLineBuffer);
1.181 + delete [] iLineBuffer;
1.182 + if (iConverter)
1.183 + CleanupStack::PopAndDestroy(iConverter);
1.184 + User::LeaveIfError(error);
1.185 + }
1.186 +
1.187 +void TPlainTextWriter::TranslateToEofL()
1.188 + {
1.189 + while (!Finished())
1.190 + {
1.191 + TText c = ReadRawCharL();
1.192 + switch (c)
1.193 + {
1.194 + // Write a CR-LF at a forced line break if organising by line.
1.195 + case CEditableText::ELineBreak:
1.196 + if (iParam.iOrganisation == CPlainText::EOrganiseByLine)
1.197 + {
1.198 + FlushL();
1.199 + WriteNewLineL();
1.200 + }
1.201 + else
1.202 + WriteCharL(c);
1.203 + break;
1.204 +
1.205 + // Write a CR-LF at the end of the paragraph, then an extra one if lines are split by CR-LFs.
1.206 + case CEditableText::EParagraphDelimiter:
1.207 + FlushL();
1.208 + WriteNewLineL();
1.209 + if (iParam.iOrganisation == CPlainText::EOrganiseByLine)
1.210 + WriteNewLineL();
1.211 + break;
1.212 +
1.213 + default:
1.214 + WriteCharL(c);
1.215 + }
1.216 + }
1.217 + }
1.218 +
1.219 +void TPlainTextWriter::FlushL()
1.220 + {
1.221 + if (iLineBuffer)
1.222 + WriteAndConvertL(iLineBuffer,iLineLength);
1.223 + iLineLength = 0;
1.224 + }
1.225 +
1.226 +void TPlainTextWriter::WriteCharL(TText aChar)
1.227 + {
1.228 + if (iLineBuffer)
1.229 + {
1.230 + if (iLineLength >= iMaxLineBufferLength)
1.231 + {
1.232 + int linebreak = iMaxLineBufferLength;
1.233 + int stripped_linebreak = iMaxLineBufferLength;
1.234 +
1.235 + if (iLineLength >= iMaxLineLength)
1.236 + {
1.237 + for (linebreak = iMaxLineLength; linebreak > 0; linebreak--)
1.238 + if (iLineBuffer[linebreak - 1] == ' ')
1.239 + break;
1.240 + if (linebreak == 0)
1.241 + linebreak = iMaxLineLength;
1.242 +
1.243 + // Strip a single trailing space if any; it is added when text is imported.
1.244 + stripped_linebreak = linebreak;
1.245 + if (iLineBuffer[linebreak - 1] == ' ')
1.246 + stripped_linebreak = linebreak - 1;
1.247 + }
1.248 +
1.249 + WriteAndConvertL(iLineBuffer,stripped_linebreak);
1.250 + if (iLineLength >= iMaxLineLength)
1.251 + WriteNewLineL();
1.252 + int i = linebreak;
1.253 + int j = 0;
1.254 + while (i < iMaxLineBufferLength)
1.255 + iLineBuffer[j++] = iLineBuffer[i++];
1.256 + iLineLength = j;
1.257 + }
1.258 + iLineBuffer[iLineLength++] = aChar;
1.259 + }
1.260 + else
1.261 + WriteRawCharL(aChar);
1.262 + }
1.263 +
1.264 +void TPlainTextWriter::WriteNewLineL()
1.265 + {
1.266 + WriteAndConvertL(_S("\x0d\x0a"),2);
1.267 + }
1.268 +
1.269 +void TPlainTextWriter::WriteAndConvertL(const TText* aText,TInt aLength)
1.270 + {
1.271 + if (iConverter)
1.272 + {
1.273 + while (aLength > 0)
1.274 + {
1.275 + TPtrC source(aText,aLength);
1.276 + TPtr8 dest(iConverter->iConversionBuffer,CPlainTextConverter::EConversionBufferSize);
1.277 + int remainder = iConverter->iConverter->ConvertFromUnicode(dest,source);
1.278 + if (remainder < 0)
1.279 + User::Leave(KErrCorrupt);
1.280 + int available = iParam.iMaxOutputChars - iResult.iOutputChars;
1.281 + if (available < dest.Length())
1.282 + dest.SetLength(available);
1.283 + if (dest.Length() > 0)
1.284 + {
1.285 + iOutput.WriteL(dest);
1.286 + iResult.iOutputChars += dest.Length();
1.287 + }
1.288 + int converted = aLength - remainder;
1.289 + aText += converted;
1.290 + aLength -= converted;
1.291 + }
1.292 + }
1.293 + else
1.294 + {
1.295 + while (aLength-- > 0)
1.296 + WriteRawCharL(*aText++);
1.297 + }
1.298 + }
1.299 +
1.300 +TPlainTextReader::TPlainTextReader(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
1.301 + RWriteStream& aOutput,RReadStream& aInput):
1.302 + TPlainTextIOState(aParam,aResult,aOutput,aInput),
1.303 + iInputBuffer(NULL),
1.304 + iInputLength(0),
1.305 + iInputPos(0),
1.306 + iConversionState(CCnvCharacterSetConverter::KStateDefault)
1.307 + {
1.308 + iCheckByteOrder = TRUE;
1.309 + }
1.310 +
1.311 +void TPlainTextReader::TranslateL(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
1.312 + RWriteStream& aOutput,RReadStream& aInput)
1.313 + {
1.314 + TPlainTextReader reader(aParam,aResult,aOutput,aInput);
1.315 + if(reader.iParam.iOrganisation == CPlainText::EOrganiseByLine)
1.316 + {
1.317 + TLineTextWriter txtWriter(reader);
1.318 + TSLBTransaltor slbTranslator(txtWriter);
1.319 + reader.TranslateHelperL(slbTranslator);
1.320 + }
1.321 + else
1.322 + {
1.323 + TParagraphTextWriter txtWriter(reader);
1.324 + TSLBTransaltor slbTranslator(txtWriter);
1.325 + reader.TranslateHelperL(slbTranslator);
1.326 + }
1.327 + }
1.328 +
1.329 +void TPlainTextReader::TranslateHelperL(TSLBTransaltor& aSLBTranslator)
1.330 + {
1.331 + if (iParam.iForeignEncoding || iParam.iGuessForeignEncoding)
1.332 + {
1.333 + iConverter = CPlainTextConverter::NewLC();
1.334 + iInputBuffer = new(ELeave) TText[EInputBufferSize];
1.335 + CleanupStack::PushL(iInputBuffer);
1.336 + }
1.337 + else
1.338 + iInputBuffer = NULL;
1.339 + TRAPD(error,TranslateToEofL(aSLBTranslator));
1.340 + if (error == KErrEof)
1.341 + error = KErrNone;
1.342 + if (error == KErrNone)
1.343 + iOutput.CommitL();
1.344 + if (iConverter)
1.345 + {
1.346 + CleanupStack::Pop(iInputBuffer);
1.347 + delete [] iInputBuffer;
1.348 + CleanupStack::PopAndDestroy(iConverter);
1.349 + }
1.350 + User::LeaveIfError(error);
1.351 + }
1.352 +
1.353 +void TPlainTextReader::TranslateToEofL(TSLBTransaltor& aSLBTranslator)
1.354 + {
1.355 + while(!Finished())
1.356 + {
1.357 + TText c = ReadAndConvertL();
1.358 + aSLBTranslator.ProcessL(c);
1.359 + }
1.360 + aSLBTranslator.FlushL();
1.361 + }
1.362 +
1.363 +TText TPlainTextReader::ReadAndConvertL()
1.364 + {
1.365 + // Read EConversionBufferSize bytes into a conversion buffer (iConversionBuffer).
1.366 + // Using CharConv convert this into unicode and place in a destination buffer (dest).
1.367 + // This may result in some bytes that cannot be converted (remainder) as that
1.368 + // character encoding is truncated.
1.369 + // This remainder is then moved to the begining of the conversion buffer and more
1.370 + // data read in after it, in effect untruncating that last character.
1.371 + // Before this next read takes place the next converted unicode character is returned
1.372 + // until the destination buffer positional pointers reach the end where more data is
1.373 + // required for processing.
1.374 + //
1.375 + if (iConverter && iInputBuffer)
1.376 + {
1.377 + if (iInputPos >= iInputLength)
1.378 + {
1.379 + /*
1.380 + Attempt to read more foreign characters if there are less than 20,
1.381 + which is the current maximum length of a multibyte character sequence for CHARCONV.
1.382 + Use MStreamBuf::ReadL, which doesn't leave on EOF, rather than RReadStream::ReadL,
1.383 + which does, and doesn't tell you how much was actually read.
1.384 + */
1.385 + if (iConverter->iConversionBufferLength < 20)
1.386 + iConverter->iConversionBufferLength +=
1.387 + iInput.Source()->ReadL(iConverter->iConversionBuffer + iConverter->iConversionBufferLength,
1.388 + CPlainTextConverter::EConversionBufferSize - iConverter->iConversionBufferLength);
1.389 +
1.390 + // Create the converter late so we have a sample of foreign text for auto-detection of the encoding.
1.391 + if (!iConverter->iConverter)
1.392 + {
1.393 + TPtrC8 sample(iConverter->iConversionBuffer,iConverter->iConversionBufferLength);
1.394 + iConverter->PrepareToConvertL(*this,&sample);
1.395 + }
1.396 +
1.397 + // Translate from the foreign encoding to Unicode.
1.398 + TPtr dest(iInputBuffer,0,EInputBufferSize);
1.399 + TPtrC8 source(iConverter->iConversionBuffer,iConverter->iConversionBufferLength);
1.400 + int remainder = iConverter->iConverter->ConvertToUnicode(dest,source,iConversionState);
1.401 + if (remainder < 0)
1.402 + User::Leave(KErrCorrupt);
1.403 +
1.404 + // Move the remaining foreign characters if any to the start of the buffer
1.405 + // so that on the next read it can be joined with its truncated part.
1.406 + for (int i = 0, j = iConverter->iConversionBufferLength - remainder; i < remainder; ++i, ++j)
1.407 + iConverter->iConversionBuffer[i] = iConverter->iConversionBuffer[j];
1.408 + iConverter->iConversionBufferLength = remainder;
1.409 +
1.410 + iInputPos = 0;
1.411 + iInputLength = dest.Length();
1.412 + if (iInputLength == 0)
1.413 + User::Leave(KErrEof);
1.414 + }
1.415 + iResult.iInputChars++;
1.416 + return iInputBuffer[iInputPos++];
1.417 + }
1.418 + else
1.419 + return ReadRawCharL();
1.420 + }
1.421 +
1.422 +/**
1.423 +The method processes the imput characters, writing them to the output, but skipping
1.424 +the picture characters (CEditableText::EPictureCharacter).
1.425 +The method is not called directly and should not be called. It implements
1.426 +MOutputChar::OutputCharL(TChar aChar) and is called from TParagraphTextWriter and
1.427 +TLineTextWriter implementations.
1.428 +@param aChar Character to be processed.
1.429 +*/
1.430 +void TPlainTextReader::OutputCharL(TText aChar)
1.431 + {
1.432 + switch(aChar)
1.433 + {
1.434 + case CEditableText::EByteOrderMark :
1.435 + // leading byte order marks are ignored
1.436 + if(iResult.iInputChars > 1)
1.437 + {
1.438 + WriteRawCharL(aChar);
1.439 + }
1.440 + break;
1.441 + case CEditableText::EPictureCharacter:
1.442 + //Picture characters are ignored because they would cause ETEXT to panic when it attempted to find
1.443 + //the picture corresponding to the character.
1.444 + break;
1.445 + default:
1.446 + WriteRawCharL(aChar);
1.447 + break;
1.448 + }
1.449 + }
1.450 +