os/textandloc/textrendering/texthandling/stext/TXTPLAIN.CPP
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2 * Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies).
     3 * All rights reserved.
     4 * This component and the accompanying materials are made available
     5 * under the terms of "Eclipse Public License v1.0"
     6 * which accompanies this distribution, and is available
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
     8 *
     9 * Initial Contributors:
    10 * Nokia Corporation - initial contribution.
    11 *
    12 * Contributors:
    13 *
    14 * Description: 
    15 * Implementation of the classes that import and export plain text.
    16 *
    17 */
    18 
    19 
    20 #include "TXTSTD.H"
    21 #include "TXTPLAIN.H"
    22 #include "charconv.h"
    23 
    24 TPlainTextIOState::	TPlainTextIOState(const CPlainText::TImportExportParam& aParam,
    25 									  CPlainText::TImportExportResult& aResult,
    26 									  RWriteStream& aOutput,RReadStream& aInput):
    27 	iParam(aParam),
    28 	iResult(aResult),
    29 	iOutput(aOutput),
    30 	iInput(aInput),
    31 	iConverter(NULL),
    32 	iSwapInput(FALSE),
    33 	iCheckByteOrder(FALSE)
    34 	{
    35 	aResult = CPlainText::TImportExportResult();  // zero output counters; aResult may be re-used.
    36 	}
    37 
    38 TText TPlainTextIOState::ReadRawCharL()
    39 	{
    40 	TText c;
    41 	if (iParam.iInputInternal)
    42 		iInput.ReadL((TUint8*)&c,sizeof(TText));
    43 	else
    44 		c = iInput.ReadUint16L();
    45 	if (iSwapInput)
    46 		c = (TText)(((c << 8) & 0xFF00) | ((c >> 8) & 0xFF));
    47 	if (iCheckByteOrder)
    48 		{
    49 		if (c == CEditableText::EReversedByteOrderMark)
    50 			{
    51 			c = CEditableText::EByteOrderMark;
    52 			iSwapInput = !iSwapInput;
    53 			}
    54 		iCheckByteOrder = FALSE;
    55 		}
    56 	iResult.iInputChars++;
    57 	return c;
    58 	}
    59 
    60 void TPlainTextIOState::WriteRawCharL(TText aChar)
    61 	{
    62 	if (iResult.iOutputChars < iParam.iMaxOutputChars)
    63 		{
    64 		if (iParam.iOutputInternal)
    65 			iOutput.WriteL((TUint8*)&aChar,sizeof(TText));
    66 		else
    67 			iOutput.WriteUint16L(aChar);
    68 		iResult.iOutputChars++;
    69 		}
    70 	}
    71 
    72 CPlainTextConverter* CPlainTextConverter::NewLC()
    73 	{
    74 	CPlainTextConverter* c = new(ELeave) CPlainTextConverter;
    75 	CleanupStack::PushL(c);
    76 	c->iConversionBuffer = new(ELeave) TUint8[EConversionBufferSize];
    77 	return c;
    78 	}
    79 
    80 CPlainTextConverter::~CPlainTextConverter()
    81 	{
    82 	delete iConverter;
    83 	delete [] iConversionBuffer;
    84 	}
    85 
    86 /*
    87 Prepare to convert between Unicode and a foreign encoding.
    88 If aSample is non-null it can be used to guess the foreign encoding, but only if iParam.iGuessForeignEncoding is true.
    89 */
    90 void CPlainTextConverter::PrepareToConvertL(TPlainTextIOState& aState,const TDesC8* aSample)
    91 	{
    92 	RFs rfs;
    93 
    94 	iConverter = CCnvCharacterSetConverter::NewL();
    95 	if (aState.iParam.iFileSession == NULL)
    96 		{
    97 		TInt error = rfs.Connect();
    98 		User::LeaveIfError(error);
    99 				
   100 		CleanupClosePushL(rfs);
   101 		}
   102 	else
   103 		rfs = *aState.iParam.iFileSession;
   104 
   105 	TUint foreign_encoding = aState.iParam.iForeignEncoding;
   106 	
   107 	// Try to guess the foreign encoding.
   108 	if (aSample && aState.iParam.iGuessForeignEncoding)
   109 		{
   110 		CArrayFix<CCnvCharacterSetConverter::SCharacterSet>* charsets =
   111 			CCnvCharacterSetConverter::CreateArrayOfCharacterSetsAvailableLC(rfs);
   112 		TInt confidence = 0;
   113 		CCnvCharacterSetConverter::AutoDetectCharacterSetL(confidence,foreign_encoding,*charsets,*aSample);
   114 		CleanupStack::PopAndDestroy(charsets);
   115 		if (confidence < 50)
   116 			User::Leave(KErrNotSupported);
   117 		}
   118 
   119 	if (iConverter->PrepareToConvertToOrFromL(foreign_encoding,rfs) != CCnvCharacterSetConverter::EAvailable)
   120 		User::Leave(KErrNotSupported);
   121 	aState.iResult.iForeignEncoding = foreign_encoding;
   122 	if (aState.iParam.iFileSession == NULL)
   123 		{
   124 		CleanupStack::Pop(); // rfs
   125 		rfs.Close();
   126 		}
   127 	}
   128 
   129 void TPlainTextWriter::TranslateL(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
   130 								  RWriteStream& aOutput,RReadStream& aInput)
   131 	{
   132 	TPlainTextWriter writer(aParam,aResult,aOutput,aInput);
   133 	writer.TranslateHelperL();
   134 	}
   135 
   136 TPlainTextWriter::TPlainTextWriter(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
   137 								   RWriteStream& aOutput,RReadStream& aInput):
   138 	TPlainTextIOState(aParam,aResult,aOutput,aInput),
   139 	iLineLength(0),
   140 	iLineBuffer(NULL),
   141 	iMaxLineBufferLength(0)
   142 	{
   143 	}
   144 
   145 void TPlainTextWriter::TranslateHelperL()
   146 	{
   147 	if (iParam.iForeignEncoding)
   148 		{
   149 		iConverter = CPlainTextConverter::NewLC();
   150 		iConverter->PrepareToConvertL(*this,NULL);
   151 		}
   152 
   153 	if (iParam.iOrganisation == CPlainText::EOrganiseByLine)
   154 		iMaxLineLength = iParam.iMaxLineLength;
   155 	else
   156 		iMaxLineLength = KMaxTInt; // when exporting by paragraph, the wrapping width has no effect
   157 	if (iMaxLineLength <= 0)
   158 		iMaxLineLength = KMaxTInt;
   159 	iLineLength = 0;
   160 	if (iMaxLineLength < KMaxTInt)
   161 		iMaxLineBufferLength = iMaxLineLength;
   162 	else if (iParam.iForeignEncoding)
   163 		iMaxLineBufferLength = EDefaultLineBufferSize;
   164 	if (iMaxLineBufferLength)
   165 		iLineBuffer = new(ELeave) TText[iMaxLineBufferLength];
   166 	else
   167 		iLineBuffer = NULL;
   168 	CleanupStack::PushL(iLineBuffer);
   169 	TRAPD(error,TranslateToEofL());
   170 	if (error == KErrEof)
   171 		error = KErrNone;
   172 	if (error == KErrNone)
   173 		{
   174 		FlushL();
   175 		iOutput.CommitL();
   176 		}
   177 	CleanupStack::Pop(iLineBuffer);
   178 	delete [] iLineBuffer;
   179 	if (iConverter)
   180 		CleanupStack::PopAndDestroy(iConverter);
   181 	User::LeaveIfError(error);
   182 	}
   183 
   184 void TPlainTextWriter::TranslateToEofL()
   185 	{
   186 	while (!Finished())
   187 		{
   188 		TText c = ReadRawCharL();
   189 		switch (c)
   190 			{
   191 			// Write a CR-LF at a forced line break if organising by line.
   192 			case CEditableText::ELineBreak:
   193 				if (iParam.iOrganisation == CPlainText::EOrganiseByLine)
   194 					{
   195 					FlushL();
   196 					WriteNewLineL();
   197 					}
   198 				else
   199 					WriteCharL(c);
   200 				break;
   201 
   202 			// Write a CR-LF at the end of the paragraph, then an extra one if lines are split by CR-LFs.
   203 			case CEditableText::EParagraphDelimiter:
   204 				FlushL();
   205 				WriteNewLineL();
   206 				if (iParam.iOrganisation == CPlainText::EOrganiseByLine)
   207 					WriteNewLineL();
   208 				break;
   209 
   210 			default:
   211 				WriteCharL(c);
   212 			}
   213 		}
   214 	}
   215 
   216 void TPlainTextWriter::FlushL()
   217 	{
   218 	if (iLineBuffer)
   219 		WriteAndConvertL(iLineBuffer,iLineLength);
   220 	iLineLength = 0;
   221 	}
   222 
   223 void TPlainTextWriter::WriteCharL(TText aChar)
   224 	{
   225 	if (iLineBuffer)
   226 		{
   227 		if (iLineLength >= iMaxLineBufferLength)
   228 			{
   229 			int linebreak = iMaxLineBufferLength;
   230 			int stripped_linebreak = iMaxLineBufferLength;
   231 
   232 			if (iLineLength >= iMaxLineLength)
   233 				{
   234 				for (linebreak = iMaxLineLength; linebreak > 0; linebreak--)
   235 					if (iLineBuffer[linebreak - 1] == ' ')
   236 						break;
   237 				if (linebreak == 0)
   238 					linebreak = iMaxLineLength;
   239 
   240 				// Strip a single trailing space if any; it is added when text is imported.
   241 				stripped_linebreak = linebreak;
   242 				if (iLineBuffer[linebreak - 1] == ' ')
   243 					stripped_linebreak = linebreak - 1;
   244 				}
   245 
   246 			WriteAndConvertL(iLineBuffer,stripped_linebreak);
   247 			if (iLineLength >= iMaxLineLength)
   248 				WriteNewLineL();
   249 			int i = linebreak;
   250 			int j = 0;
   251 			while (i < iMaxLineBufferLength)
   252 				iLineBuffer[j++] = iLineBuffer[i++];
   253 			iLineLength = j;
   254 			}
   255 		iLineBuffer[iLineLength++] = aChar;
   256 		}
   257 	else
   258 		WriteRawCharL(aChar);
   259 	}
   260 
   261 void TPlainTextWriter::WriteNewLineL()
   262 	{
   263 	WriteAndConvertL(_S("\x0d\x0a"),2);
   264 	}
   265 
   266 void TPlainTextWriter::WriteAndConvertL(const TText* aText,TInt aLength)
   267 	{
   268 	if (iConverter)
   269 		{
   270 		while (aLength > 0)
   271 			{
   272 			TPtrC source(aText,aLength);
   273 			TPtr8 dest(iConverter->iConversionBuffer,CPlainTextConverter::EConversionBufferSize);
   274 			int remainder = iConverter->iConverter->ConvertFromUnicode(dest,source);
   275 			if (remainder < 0)
   276 				User::Leave(KErrCorrupt);
   277 			int available = iParam.iMaxOutputChars - iResult.iOutputChars;
   278 			if (available < dest.Length())
   279 				dest.SetLength(available);
   280 			if (dest.Length() > 0)
   281 				{
   282 				iOutput.WriteL(dest);
   283 				iResult.iOutputChars += dest.Length();
   284 				}
   285 			int converted = aLength - remainder;
   286 			aText += converted;
   287 			aLength -= converted;
   288 			}
   289 		}
   290 	else
   291 		{
   292 		while (aLength-- > 0)
   293 			WriteRawCharL(*aText++);
   294 		}
   295 	}
   296 
   297 TPlainTextReader::TPlainTextReader(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
   298 								   RWriteStream& aOutput,RReadStream& aInput):
   299 	TPlainTextIOState(aParam,aResult,aOutput,aInput),
   300 	iInputBuffer(NULL),
   301 	iInputLength(0),
   302 	iInputPos(0),
   303 	iConversionState(CCnvCharacterSetConverter::KStateDefault)
   304 	{
   305 	iCheckByteOrder = TRUE;
   306 	}
   307 
   308 void TPlainTextReader::TranslateL(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
   309 								  RWriteStream& aOutput,RReadStream& aInput)
   310 	{
   311 	TPlainTextReader reader(aParam,aResult,aOutput,aInput);
   312 	if(reader.iParam.iOrganisation == CPlainText::EOrganiseByLine)
   313 		{
   314 		TLineTextWriter txtWriter(reader);
   315 		TSLBTransaltor slbTranslator(txtWriter);
   316 		reader.TranslateHelperL(slbTranslator);
   317 		}
   318 	else
   319 		{
   320 		TParagraphTextWriter txtWriter(reader);
   321 		TSLBTransaltor slbTranslator(txtWriter);
   322 		reader.TranslateHelperL(slbTranslator);
   323 		}
   324 	}
   325 
   326 void TPlainTextReader::TranslateHelperL(TSLBTransaltor& aSLBTranslator)
   327 	{
   328 	if (iParam.iForeignEncoding || iParam.iGuessForeignEncoding)
   329 		{
   330 		iConverter = CPlainTextConverter::NewLC();
   331 		iInputBuffer = new(ELeave) TText[EInputBufferSize];
   332 		CleanupStack::PushL(iInputBuffer);
   333 		}
   334 	else
   335 		iInputBuffer = NULL;
   336 	TRAPD(error,TranslateToEofL(aSLBTranslator));
   337 	if (error == KErrEof)
   338 		error = KErrNone;
   339 	if (error == KErrNone)
   340 		iOutput.CommitL();
   341 	if (iConverter)
   342 		{
   343 		CleanupStack::Pop(iInputBuffer);
   344 		delete [] iInputBuffer;
   345 		CleanupStack::PopAndDestroy(iConverter);
   346 		}
   347 	User::LeaveIfError(error);
   348 	}
   349 
   350 void TPlainTextReader::TranslateToEofL(TSLBTransaltor& aSLBTranslator)
   351 	{
   352 	while(!Finished())
   353 		{
   354 		TText c = ReadAndConvertL();
   355 		aSLBTranslator.ProcessL(c);
   356 		}
   357 	aSLBTranslator.FlushL();
   358 	}
   359 
   360 TText TPlainTextReader::ReadAndConvertL()
   361 	{
   362 	// Read EConversionBufferSize bytes into a conversion buffer (iConversionBuffer).
   363 	// Using CharConv convert this into unicode and place in a destination buffer (dest).
   364 	// This may result in some bytes that cannot be converted (remainder) as that
   365 	// character encoding is truncated.
   366 	// This remainder is then moved to the begining of the conversion buffer and more
   367 	// data read in after it, in effect untruncating that last character.
   368 	// Before this next read takes place the next converted unicode character is returned
   369 	// until the destination buffer positional pointers reach the end where more data is
   370 	// required for processing.
   371 	//  
   372 	if (iConverter && iInputBuffer)
   373 		{
   374 		if (iInputPos >= iInputLength)
   375 			{
   376 			/*
   377 			Attempt to read more foreign characters if there are less than 20,
   378 			which is the current maximum length of a multibyte character sequence for CHARCONV.
   379 			Use MStreamBuf::ReadL, which doesn't leave on EOF, rather than RReadStream::ReadL,
   380 			which does, and doesn't tell you how much was actually read.
   381 			*/
   382 			if (iConverter->iConversionBufferLength < 20)
   383 				iConverter->iConversionBufferLength +=
   384 					iInput.Source()->ReadL(iConverter->iConversionBuffer + iConverter->iConversionBufferLength,
   385 										   CPlainTextConverter::EConversionBufferSize - iConverter->iConversionBufferLength);
   386 
   387 			// Create the converter late so we have a sample of foreign text for auto-detection of the encoding.
   388 			if (!iConverter->iConverter)
   389 				{
   390 				TPtrC8 sample(iConverter->iConversionBuffer,iConverter->iConversionBufferLength);
   391 				iConverter->PrepareToConvertL(*this,&sample);
   392 				}
   393 
   394 			// Translate from the foreign encoding to Unicode.
   395 			TPtr dest(iInputBuffer,0,EInputBufferSize);
   396 			TPtrC8 source(iConverter->iConversionBuffer,iConverter->iConversionBufferLength);
   397 			int remainder = iConverter->iConverter->ConvertToUnicode(dest,source,iConversionState);
   398 			if (remainder < 0)
   399 				User::Leave(KErrCorrupt);
   400 
   401 			// Move the remaining foreign characters if any to the start of the buffer
   402 			// so that on the next read it can be joined with its truncated part.
   403 			for (int i = 0, j = iConverter->iConversionBufferLength - remainder; i < remainder; ++i, ++j)
   404 				iConverter->iConversionBuffer[i] = iConverter->iConversionBuffer[j];
   405 			iConverter->iConversionBufferLength = remainder;
   406 
   407 			iInputPos = 0;
   408 			iInputLength = dest.Length();
   409 			if (iInputLength == 0)
   410 				User::Leave(KErrEof);
   411 			}
   412 		iResult.iInputChars++;
   413 		return iInputBuffer[iInputPos++];
   414 		}
   415 	else
   416 		return ReadRawCharL();
   417 	}
   418 
   419 /**
   420 The method processes the imput characters, writing them to the output, but skipping 
   421 the picture characters (CEditableText::EPictureCharacter).
   422 The method is not called directly and should not be called. It implements 
   423 MOutputChar::OutputCharL(TChar aChar) and is called from TParagraphTextWriter and 
   424 TLineTextWriter implementations.
   425 @param aChar Character to be processed.
   426 */
   427 void TPlainTextReader::OutputCharL(TText aChar)
   428 	{
   429 	switch(aChar)
   430 		{
   431 		case CEditableText::EByteOrderMark :
   432 			// leading byte order marks are ignored
   433 			if(iResult.iInputChars > 1)
   434 				{
   435 				WriteRawCharL(aChar);
   436 				}
   437 			break;
   438 		case CEditableText::EPictureCharacter:
   439 			//Picture characters are ignored because they would cause ETEXT to panic when it attempted to find
   440 			//the picture corresponding to the character.
   441 			break;
   442 		default:
   443 			WriteRawCharL(aChar);
   444 			break;
   445 		}
   446 	}
   447