os/textandloc/textrendering/texthandling/stext/TXTPLAIN.CPP
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
/*
sl@0
     2
* Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
* All rights reserved.
sl@0
     4
* This component and the accompanying materials are made available
sl@0
     5
* under the terms of "Eclipse Public License v1.0"
sl@0
     6
* which accompanies this distribution, and is available
sl@0
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
*
sl@0
     9
* Initial Contributors:
sl@0
    10
* Nokia Corporation - initial contribution.
sl@0
    11
*
sl@0
    12
* Contributors:
sl@0
    13
*
sl@0
    14
* Description: 
sl@0
    15
* Implementation of the classes that import and export plain text.
sl@0
    16
*
sl@0
    17
*/
sl@0
    18
sl@0
    19
sl@0
    20
#include "TXTSTD.H"
sl@0
    21
#include "TXTPLAIN.H"
sl@0
    22
#include "charconv.h"
sl@0
    23
sl@0
    24
TPlainTextIOState::	TPlainTextIOState(const CPlainText::TImportExportParam& aParam,
sl@0
    25
									  CPlainText::TImportExportResult& aResult,
sl@0
    26
									  RWriteStream& aOutput,RReadStream& aInput):
sl@0
    27
	iParam(aParam),
sl@0
    28
	iResult(aResult),
sl@0
    29
	iOutput(aOutput),
sl@0
    30
	iInput(aInput),
sl@0
    31
	iConverter(NULL),
sl@0
    32
	iSwapInput(FALSE),
sl@0
    33
	iCheckByteOrder(FALSE)
sl@0
    34
	{
sl@0
    35
	aResult = CPlainText::TImportExportResult();  // zero output counters; aResult may be re-used.
sl@0
    36
	}
sl@0
    37
sl@0
    38
TText TPlainTextIOState::ReadRawCharL()
sl@0
    39
	{
sl@0
    40
	TText c;
sl@0
    41
	if (iParam.iInputInternal)
sl@0
    42
		iInput.ReadL((TUint8*)&c,sizeof(TText));
sl@0
    43
	else
sl@0
    44
		c = iInput.ReadUint16L();
sl@0
    45
	if (iSwapInput)
sl@0
    46
		c = (TText)(((c << 8) & 0xFF00) | ((c >> 8) & 0xFF));
sl@0
    47
	if (iCheckByteOrder)
sl@0
    48
		{
sl@0
    49
		if (c == CEditableText::EReversedByteOrderMark)
sl@0
    50
			{
sl@0
    51
			c = CEditableText::EByteOrderMark;
sl@0
    52
			iSwapInput = !iSwapInput;
sl@0
    53
			}
sl@0
    54
		iCheckByteOrder = FALSE;
sl@0
    55
		}
sl@0
    56
	iResult.iInputChars++;
sl@0
    57
	return c;
sl@0
    58
	}
sl@0
    59
sl@0
    60
void TPlainTextIOState::WriteRawCharL(TText aChar)
sl@0
    61
	{
sl@0
    62
	if (iResult.iOutputChars < iParam.iMaxOutputChars)
sl@0
    63
		{
sl@0
    64
		if (iParam.iOutputInternal)
sl@0
    65
			iOutput.WriteL((TUint8*)&aChar,sizeof(TText));
sl@0
    66
		else
sl@0
    67
			iOutput.WriteUint16L(aChar);
sl@0
    68
		iResult.iOutputChars++;
sl@0
    69
		}
sl@0
    70
	}
sl@0
    71
sl@0
    72
CPlainTextConverter* CPlainTextConverter::NewLC()
sl@0
    73
	{
sl@0
    74
	CPlainTextConverter* c = new(ELeave) CPlainTextConverter;
sl@0
    75
	CleanupStack::PushL(c);
sl@0
    76
	c->iConversionBuffer = new(ELeave) TUint8[EConversionBufferSize];
sl@0
    77
	return c;
sl@0
    78
	}
sl@0
    79
sl@0
    80
CPlainTextConverter::~CPlainTextConverter()
sl@0
    81
	{
sl@0
    82
	delete iConverter;
sl@0
    83
	delete [] iConversionBuffer;
sl@0
    84
	}
sl@0
    85
sl@0
    86
/*
sl@0
    87
Prepare to convert between Unicode and a foreign encoding.
sl@0
    88
If aSample is non-null it can be used to guess the foreign encoding, but only if iParam.iGuessForeignEncoding is true.
sl@0
    89
*/
sl@0
    90
void CPlainTextConverter::PrepareToConvertL(TPlainTextIOState& aState,const TDesC8* aSample)
sl@0
    91
	{
sl@0
    92
	RFs rfs;
sl@0
    93
sl@0
    94
	iConverter = CCnvCharacterSetConverter::NewL();
sl@0
    95
	if (aState.iParam.iFileSession == NULL)
sl@0
    96
		{
sl@0
    97
		TInt error = rfs.Connect();
sl@0
    98
		User::LeaveIfError(error);
sl@0
    99
				
sl@0
   100
		CleanupClosePushL(rfs);
sl@0
   101
		}
sl@0
   102
	else
sl@0
   103
		rfs = *aState.iParam.iFileSession;
sl@0
   104
sl@0
   105
	TUint foreign_encoding = aState.iParam.iForeignEncoding;
sl@0
   106
	
sl@0
   107
	// Try to guess the foreign encoding.
sl@0
   108
	if (aSample && aState.iParam.iGuessForeignEncoding)
sl@0
   109
		{
sl@0
   110
		CArrayFix<CCnvCharacterSetConverter::SCharacterSet>* charsets =
sl@0
   111
			CCnvCharacterSetConverter::CreateArrayOfCharacterSetsAvailableLC(rfs);
sl@0
   112
		TInt confidence = 0;
sl@0
   113
		CCnvCharacterSetConverter::AutoDetectCharacterSetL(confidence,foreign_encoding,*charsets,*aSample);
sl@0
   114
		CleanupStack::PopAndDestroy(charsets);
sl@0
   115
		if (confidence < 50)
sl@0
   116
			User::Leave(KErrNotSupported);
sl@0
   117
		}
sl@0
   118
sl@0
   119
	if (iConverter->PrepareToConvertToOrFromL(foreign_encoding,rfs) != CCnvCharacterSetConverter::EAvailable)
sl@0
   120
		User::Leave(KErrNotSupported);
sl@0
   121
	aState.iResult.iForeignEncoding = foreign_encoding;
sl@0
   122
	if (aState.iParam.iFileSession == NULL)
sl@0
   123
		{
sl@0
   124
		CleanupStack::Pop(); // rfs
sl@0
   125
		rfs.Close();
sl@0
   126
		}
sl@0
   127
	}
sl@0
   128
sl@0
   129
void TPlainTextWriter::TranslateL(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
sl@0
   130
								  RWriteStream& aOutput,RReadStream& aInput)
sl@0
   131
	{
sl@0
   132
	TPlainTextWriter writer(aParam,aResult,aOutput,aInput);
sl@0
   133
	writer.TranslateHelperL();
sl@0
   134
	}
sl@0
   135
sl@0
   136
TPlainTextWriter::TPlainTextWriter(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
sl@0
   137
								   RWriteStream& aOutput,RReadStream& aInput):
sl@0
   138
	TPlainTextIOState(aParam,aResult,aOutput,aInput),
sl@0
   139
	iLineLength(0),
sl@0
   140
	iLineBuffer(NULL),
sl@0
   141
	iMaxLineBufferLength(0)
sl@0
   142
	{
sl@0
   143
	}
sl@0
   144
sl@0
   145
void TPlainTextWriter::TranslateHelperL()
sl@0
   146
	{
sl@0
   147
	if (iParam.iForeignEncoding)
sl@0
   148
		{
sl@0
   149
		iConverter = CPlainTextConverter::NewLC();
sl@0
   150
		iConverter->PrepareToConvertL(*this,NULL);
sl@0
   151
		}
sl@0
   152
sl@0
   153
	if (iParam.iOrganisation == CPlainText::EOrganiseByLine)
sl@0
   154
		iMaxLineLength = iParam.iMaxLineLength;
sl@0
   155
	else
sl@0
   156
		iMaxLineLength = KMaxTInt; // when exporting by paragraph, the wrapping width has no effect
sl@0
   157
	if (iMaxLineLength <= 0)
sl@0
   158
		iMaxLineLength = KMaxTInt;
sl@0
   159
	iLineLength = 0;
sl@0
   160
	if (iMaxLineLength < KMaxTInt)
sl@0
   161
		iMaxLineBufferLength = iMaxLineLength;
sl@0
   162
	else if (iParam.iForeignEncoding)
sl@0
   163
		iMaxLineBufferLength = EDefaultLineBufferSize;
sl@0
   164
	if (iMaxLineBufferLength)
sl@0
   165
		iLineBuffer = new(ELeave) TText[iMaxLineBufferLength];
sl@0
   166
	else
sl@0
   167
		iLineBuffer = NULL;
sl@0
   168
	CleanupStack::PushL(iLineBuffer);
sl@0
   169
	TRAPD(error,TranslateToEofL());
sl@0
   170
	if (error == KErrEof)
sl@0
   171
		error = KErrNone;
sl@0
   172
	if (error == KErrNone)
sl@0
   173
		{
sl@0
   174
		FlushL();
sl@0
   175
		iOutput.CommitL();
sl@0
   176
		}
sl@0
   177
	CleanupStack::Pop(iLineBuffer);
sl@0
   178
	delete [] iLineBuffer;
sl@0
   179
	if (iConverter)
sl@0
   180
		CleanupStack::PopAndDestroy(iConverter);
sl@0
   181
	User::LeaveIfError(error);
sl@0
   182
	}
sl@0
   183
sl@0
   184
void TPlainTextWriter::TranslateToEofL()
sl@0
   185
	{
sl@0
   186
	while (!Finished())
sl@0
   187
		{
sl@0
   188
		TText c = ReadRawCharL();
sl@0
   189
		switch (c)
sl@0
   190
			{
sl@0
   191
			// Write a CR-LF at a forced line break if organising by line.
sl@0
   192
			case CEditableText::ELineBreak:
sl@0
   193
				if (iParam.iOrganisation == CPlainText::EOrganiseByLine)
sl@0
   194
					{
sl@0
   195
					FlushL();
sl@0
   196
					WriteNewLineL();
sl@0
   197
					}
sl@0
   198
				else
sl@0
   199
					WriteCharL(c);
sl@0
   200
				break;
sl@0
   201
sl@0
   202
			// Write a CR-LF at the end of the paragraph, then an extra one if lines are split by CR-LFs.
sl@0
   203
			case CEditableText::EParagraphDelimiter:
sl@0
   204
				FlushL();
sl@0
   205
				WriteNewLineL();
sl@0
   206
				if (iParam.iOrganisation == CPlainText::EOrganiseByLine)
sl@0
   207
					WriteNewLineL();
sl@0
   208
				break;
sl@0
   209
sl@0
   210
			default:
sl@0
   211
				WriteCharL(c);
sl@0
   212
			}
sl@0
   213
		}
sl@0
   214
	}
sl@0
   215
sl@0
   216
void TPlainTextWriter::FlushL()
sl@0
   217
	{
sl@0
   218
	if (iLineBuffer)
sl@0
   219
		WriteAndConvertL(iLineBuffer,iLineLength);
sl@0
   220
	iLineLength = 0;
sl@0
   221
	}
sl@0
   222
sl@0
   223
void TPlainTextWriter::WriteCharL(TText aChar)
sl@0
   224
	{
sl@0
   225
	if (iLineBuffer)
sl@0
   226
		{
sl@0
   227
		if (iLineLength >= iMaxLineBufferLength)
sl@0
   228
			{
sl@0
   229
			int linebreak = iMaxLineBufferLength;
sl@0
   230
			int stripped_linebreak = iMaxLineBufferLength;
sl@0
   231
sl@0
   232
			if (iLineLength >= iMaxLineLength)
sl@0
   233
				{
sl@0
   234
				for (linebreak = iMaxLineLength; linebreak > 0; linebreak--)
sl@0
   235
					if (iLineBuffer[linebreak - 1] == ' ')
sl@0
   236
						break;
sl@0
   237
				if (linebreak == 0)
sl@0
   238
					linebreak = iMaxLineLength;
sl@0
   239
sl@0
   240
				// Strip a single trailing space if any; it is added when text is imported.
sl@0
   241
				stripped_linebreak = linebreak;
sl@0
   242
				if (iLineBuffer[linebreak - 1] == ' ')
sl@0
   243
					stripped_linebreak = linebreak - 1;
sl@0
   244
				}
sl@0
   245
sl@0
   246
			WriteAndConvertL(iLineBuffer,stripped_linebreak);
sl@0
   247
			if (iLineLength >= iMaxLineLength)
sl@0
   248
				WriteNewLineL();
sl@0
   249
			int i = linebreak;
sl@0
   250
			int j = 0;
sl@0
   251
			while (i < iMaxLineBufferLength)
sl@0
   252
				iLineBuffer[j++] = iLineBuffer[i++];
sl@0
   253
			iLineLength = j;
sl@0
   254
			}
sl@0
   255
		iLineBuffer[iLineLength++] = aChar;
sl@0
   256
		}
sl@0
   257
	else
sl@0
   258
		WriteRawCharL(aChar);
sl@0
   259
	}
sl@0
   260
sl@0
   261
void TPlainTextWriter::WriteNewLineL()
sl@0
   262
	{
sl@0
   263
	WriteAndConvertL(_S("\x0d\x0a"),2);
sl@0
   264
	}
sl@0
   265
sl@0
   266
void TPlainTextWriter::WriteAndConvertL(const TText* aText,TInt aLength)
sl@0
   267
	{
sl@0
   268
	if (iConverter)
sl@0
   269
		{
sl@0
   270
		while (aLength > 0)
sl@0
   271
			{
sl@0
   272
			TPtrC source(aText,aLength);
sl@0
   273
			TPtr8 dest(iConverter->iConversionBuffer,CPlainTextConverter::EConversionBufferSize);
sl@0
   274
			int remainder = iConverter->iConverter->ConvertFromUnicode(dest,source);
sl@0
   275
			if (remainder < 0)
sl@0
   276
				User::Leave(KErrCorrupt);
sl@0
   277
			int available = iParam.iMaxOutputChars - iResult.iOutputChars;
sl@0
   278
			if (available < dest.Length())
sl@0
   279
				dest.SetLength(available);
sl@0
   280
			if (dest.Length() > 0)
sl@0
   281
				{
sl@0
   282
				iOutput.WriteL(dest);
sl@0
   283
				iResult.iOutputChars += dest.Length();
sl@0
   284
				}
sl@0
   285
			int converted = aLength - remainder;
sl@0
   286
			aText += converted;
sl@0
   287
			aLength -= converted;
sl@0
   288
			}
sl@0
   289
		}
sl@0
   290
	else
sl@0
   291
		{
sl@0
   292
		while (aLength-- > 0)
sl@0
   293
			WriteRawCharL(*aText++);
sl@0
   294
		}
sl@0
   295
	}
sl@0
   296
sl@0
   297
TPlainTextReader::TPlainTextReader(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
sl@0
   298
								   RWriteStream& aOutput,RReadStream& aInput):
sl@0
   299
	TPlainTextIOState(aParam,aResult,aOutput,aInput),
sl@0
   300
	iInputBuffer(NULL),
sl@0
   301
	iInputLength(0),
sl@0
   302
	iInputPos(0),
sl@0
   303
	iConversionState(CCnvCharacterSetConverter::KStateDefault)
sl@0
   304
	{
sl@0
   305
	iCheckByteOrder = TRUE;
sl@0
   306
	}
sl@0
   307
sl@0
   308
void TPlainTextReader::TranslateL(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
sl@0
   309
								  RWriteStream& aOutput,RReadStream& aInput)
sl@0
   310
	{
sl@0
   311
	TPlainTextReader reader(aParam,aResult,aOutput,aInput);
sl@0
   312
	if(reader.iParam.iOrganisation == CPlainText::EOrganiseByLine)
sl@0
   313
		{
sl@0
   314
		TLineTextWriter txtWriter(reader);
sl@0
   315
		TSLBTransaltor slbTranslator(txtWriter);
sl@0
   316
		reader.TranslateHelperL(slbTranslator);
sl@0
   317
		}
sl@0
   318
	else
sl@0
   319
		{
sl@0
   320
		TParagraphTextWriter txtWriter(reader);
sl@0
   321
		TSLBTransaltor slbTranslator(txtWriter);
sl@0
   322
		reader.TranslateHelperL(slbTranslator);
sl@0
   323
		}
sl@0
   324
	}
sl@0
   325
sl@0
   326
void TPlainTextReader::TranslateHelperL(TSLBTransaltor& aSLBTranslator)
sl@0
   327
	{
sl@0
   328
	if (iParam.iForeignEncoding || iParam.iGuessForeignEncoding)
sl@0
   329
		{
sl@0
   330
		iConverter = CPlainTextConverter::NewLC();
sl@0
   331
		iInputBuffer = new(ELeave) TText[EInputBufferSize];
sl@0
   332
		CleanupStack::PushL(iInputBuffer);
sl@0
   333
		}
sl@0
   334
	else
sl@0
   335
		iInputBuffer = NULL;
sl@0
   336
	TRAPD(error,TranslateToEofL(aSLBTranslator));
sl@0
   337
	if (error == KErrEof)
sl@0
   338
		error = KErrNone;
sl@0
   339
	if (error == KErrNone)
sl@0
   340
		iOutput.CommitL();
sl@0
   341
	if (iConverter)
sl@0
   342
		{
sl@0
   343
		CleanupStack::Pop(iInputBuffer);
sl@0
   344
		delete [] iInputBuffer;
sl@0
   345
		CleanupStack::PopAndDestroy(iConverter);
sl@0
   346
		}
sl@0
   347
	User::LeaveIfError(error);
sl@0
   348
	}
sl@0
   349
sl@0
   350
void TPlainTextReader::TranslateToEofL(TSLBTransaltor& aSLBTranslator)
sl@0
   351
	{
sl@0
   352
	while(!Finished())
sl@0
   353
		{
sl@0
   354
		TText c = ReadAndConvertL();
sl@0
   355
		aSLBTranslator.ProcessL(c);
sl@0
   356
		}
sl@0
   357
	aSLBTranslator.FlushL();
sl@0
   358
	}
sl@0
   359
sl@0
   360
TText TPlainTextReader::ReadAndConvertL()
sl@0
   361
	{
sl@0
   362
	// Read EConversionBufferSize bytes into a conversion buffer (iConversionBuffer).
sl@0
   363
	// Using CharConv convert this into unicode and place in a destination buffer (dest).
sl@0
   364
	// This may result in some bytes that cannot be converted (remainder) as that
sl@0
   365
	// character encoding is truncated.
sl@0
   366
	// This remainder is then moved to the begining of the conversion buffer and more
sl@0
   367
	// data read in after it, in effect untruncating that last character.
sl@0
   368
	// Before this next read takes place the next converted unicode character is returned
sl@0
   369
	// until the destination buffer positional pointers reach the end where more data is
sl@0
   370
	// required for processing.
sl@0
   371
	//  
sl@0
   372
	if (iConverter && iInputBuffer)
sl@0
   373
		{
sl@0
   374
		if (iInputPos >= iInputLength)
sl@0
   375
			{
sl@0
   376
			/*
sl@0
   377
			Attempt to read more foreign characters if there are less than 20,
sl@0
   378
			which is the current maximum length of a multibyte character sequence for CHARCONV.
sl@0
   379
			Use MStreamBuf::ReadL, which doesn't leave on EOF, rather than RReadStream::ReadL,
sl@0
   380
			which does, and doesn't tell you how much was actually read.
sl@0
   381
			*/
sl@0
   382
			if (iConverter->iConversionBufferLength < 20)
sl@0
   383
				iConverter->iConversionBufferLength +=
sl@0
   384
					iInput.Source()->ReadL(iConverter->iConversionBuffer + iConverter->iConversionBufferLength,
sl@0
   385
										   CPlainTextConverter::EConversionBufferSize - iConverter->iConversionBufferLength);
sl@0
   386
sl@0
   387
			// Create the converter late so we have a sample of foreign text for auto-detection of the encoding.
sl@0
   388
			if (!iConverter->iConverter)
sl@0
   389
				{
sl@0
   390
				TPtrC8 sample(iConverter->iConversionBuffer,iConverter->iConversionBufferLength);
sl@0
   391
				iConverter->PrepareToConvertL(*this,&sample);
sl@0
   392
				}
sl@0
   393
sl@0
   394
			// Translate from the foreign encoding to Unicode.
sl@0
   395
			TPtr dest(iInputBuffer,0,EInputBufferSize);
sl@0
   396
			TPtrC8 source(iConverter->iConversionBuffer,iConverter->iConversionBufferLength);
sl@0
   397
			int remainder = iConverter->iConverter->ConvertToUnicode(dest,source,iConversionState);
sl@0
   398
			if (remainder < 0)
sl@0
   399
				User::Leave(KErrCorrupt);
sl@0
   400
sl@0
   401
			// Move the remaining foreign characters if any to the start of the buffer
sl@0
   402
			// so that on the next read it can be joined with its truncated part.
sl@0
   403
			for (int i = 0, j = iConverter->iConversionBufferLength - remainder; i < remainder; ++i, ++j)
sl@0
   404
				iConverter->iConversionBuffer[i] = iConverter->iConversionBuffer[j];
sl@0
   405
			iConverter->iConversionBufferLength = remainder;
sl@0
   406
sl@0
   407
			iInputPos = 0;
sl@0
   408
			iInputLength = dest.Length();
sl@0
   409
			if (iInputLength == 0)
sl@0
   410
				User::Leave(KErrEof);
sl@0
   411
			}
sl@0
   412
		iResult.iInputChars++;
sl@0
   413
		return iInputBuffer[iInputPos++];
sl@0
   414
		}
sl@0
   415
	else
sl@0
   416
		return ReadRawCharL();
sl@0
   417
	}
sl@0
   418
sl@0
   419
/**
sl@0
   420
The method processes the imput characters, writing them to the output, but skipping 
sl@0
   421
the picture characters (CEditableText::EPictureCharacter).
sl@0
   422
The method is not called directly and should not be called. It implements 
sl@0
   423
MOutputChar::OutputCharL(TChar aChar) and is called from TParagraphTextWriter and 
sl@0
   424
TLineTextWriter implementations.
sl@0
   425
@param aChar Character to be processed.
sl@0
   426
*/
sl@0
   427
void TPlainTextReader::OutputCharL(TText aChar)
sl@0
   428
	{
sl@0
   429
	switch(aChar)
sl@0
   430
		{
sl@0
   431
		case CEditableText::EByteOrderMark :
sl@0
   432
			// leading byte order marks are ignored
sl@0
   433
			if(iResult.iInputChars > 1)
sl@0
   434
				{
sl@0
   435
				WriteRawCharL(aChar);
sl@0
   436
				}
sl@0
   437
			break;
sl@0
   438
		case CEditableText::EPictureCharacter:
sl@0
   439
			//Picture characters are ignored because they would cause ETEXT to panic when it attempted to find
sl@0
   440
			//the picture corresponding to the character.
sl@0
   441
			break;
sl@0
   442
		default:
sl@0
   443
			WriteRawCharL(aChar);
sl@0
   444
			break;
sl@0
   445
		}
sl@0
   446
	}
sl@0
   447