sl@0: /* sl@0: * Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: * All rights reserved. sl@0: * This component and the accompanying materials are made available sl@0: * under the terms of "Eclipse Public License v1.0" sl@0: * which accompanies this distribution, and is available sl@0: * at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: * sl@0: * Initial Contributors: sl@0: * Nokia Corporation - initial contribution. sl@0: * sl@0: * Contributors: sl@0: * sl@0: * Description: sl@0: * Implementation of the classes that import and export plain text. sl@0: * sl@0: */ sl@0: sl@0: sl@0: #include "TXTSTD.H" sl@0: #include "TXTPLAIN.H" sl@0: #include "charconv.h" sl@0: sl@0: TPlainTextIOState:: TPlainTextIOState(const CPlainText::TImportExportParam& aParam, sl@0: CPlainText::TImportExportResult& aResult, sl@0: RWriteStream& aOutput,RReadStream& aInput): sl@0: iParam(aParam), sl@0: iResult(aResult), sl@0: iOutput(aOutput), sl@0: iInput(aInput), sl@0: iConverter(NULL), sl@0: iSwapInput(FALSE), sl@0: iCheckByteOrder(FALSE) sl@0: { sl@0: aResult = CPlainText::TImportExportResult(); // zero output counters; aResult may be re-used. sl@0: } sl@0: sl@0: TText TPlainTextIOState::ReadRawCharL() sl@0: { sl@0: TText c; sl@0: if (iParam.iInputInternal) sl@0: iInput.ReadL((TUint8*)&c,sizeof(TText)); sl@0: else sl@0: c = iInput.ReadUint16L(); sl@0: if (iSwapInput) sl@0: c = (TText)(((c << 8) & 0xFF00) | ((c >> 8) & 0xFF)); sl@0: if (iCheckByteOrder) sl@0: { sl@0: if (c == CEditableText::EReversedByteOrderMark) sl@0: { sl@0: c = CEditableText::EByteOrderMark; sl@0: iSwapInput = !iSwapInput; sl@0: } sl@0: iCheckByteOrder = FALSE; sl@0: } sl@0: iResult.iInputChars++; sl@0: return c; sl@0: } sl@0: sl@0: void TPlainTextIOState::WriteRawCharL(TText aChar) sl@0: { sl@0: if (iResult.iOutputChars < iParam.iMaxOutputChars) sl@0: { sl@0: if (iParam.iOutputInternal) sl@0: iOutput.WriteL((TUint8*)&aChar,sizeof(TText)); sl@0: else sl@0: iOutput.WriteUint16L(aChar); sl@0: iResult.iOutputChars++; sl@0: } sl@0: } sl@0: sl@0: CPlainTextConverter* CPlainTextConverter::NewLC() sl@0: { sl@0: CPlainTextConverter* c = new(ELeave) CPlainTextConverter; sl@0: CleanupStack::PushL(c); sl@0: c->iConversionBuffer = new(ELeave) TUint8[EConversionBufferSize]; sl@0: return c; sl@0: } sl@0: sl@0: CPlainTextConverter::~CPlainTextConverter() sl@0: { sl@0: delete iConverter; sl@0: delete [] iConversionBuffer; sl@0: } sl@0: sl@0: /* sl@0: Prepare to convert between Unicode and a foreign encoding. sl@0: If aSample is non-null it can be used to guess the foreign encoding, but only if iParam.iGuessForeignEncoding is true. sl@0: */ sl@0: void CPlainTextConverter::PrepareToConvertL(TPlainTextIOState& aState,const TDesC8* aSample) sl@0: { sl@0: RFs rfs; sl@0: sl@0: iConverter = CCnvCharacterSetConverter::NewL(); sl@0: if (aState.iParam.iFileSession == NULL) sl@0: { sl@0: TInt error = rfs.Connect(); sl@0: User::LeaveIfError(error); sl@0: sl@0: CleanupClosePushL(rfs); sl@0: } sl@0: else sl@0: rfs = *aState.iParam.iFileSession; sl@0: sl@0: TUint foreign_encoding = aState.iParam.iForeignEncoding; sl@0: sl@0: // Try to guess the foreign encoding. sl@0: if (aSample && aState.iParam.iGuessForeignEncoding) sl@0: { sl@0: CArrayFix* charsets = sl@0: CCnvCharacterSetConverter::CreateArrayOfCharacterSetsAvailableLC(rfs); sl@0: TInt confidence = 0; sl@0: CCnvCharacterSetConverter::AutoDetectCharacterSetL(confidence,foreign_encoding,*charsets,*aSample); sl@0: CleanupStack::PopAndDestroy(charsets); sl@0: if (confidence < 50) sl@0: User::Leave(KErrNotSupported); sl@0: } sl@0: sl@0: if (iConverter->PrepareToConvertToOrFromL(foreign_encoding,rfs) != CCnvCharacterSetConverter::EAvailable) sl@0: User::Leave(KErrNotSupported); sl@0: aState.iResult.iForeignEncoding = foreign_encoding; sl@0: if (aState.iParam.iFileSession == NULL) sl@0: { sl@0: CleanupStack::Pop(); // rfs sl@0: rfs.Close(); sl@0: } sl@0: } sl@0: sl@0: void TPlainTextWriter::TranslateL(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult, sl@0: RWriteStream& aOutput,RReadStream& aInput) sl@0: { sl@0: TPlainTextWriter writer(aParam,aResult,aOutput,aInput); sl@0: writer.TranslateHelperL(); sl@0: } sl@0: sl@0: TPlainTextWriter::TPlainTextWriter(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult, sl@0: RWriteStream& aOutput,RReadStream& aInput): sl@0: TPlainTextIOState(aParam,aResult,aOutput,aInput), sl@0: iLineLength(0), sl@0: iLineBuffer(NULL), sl@0: iMaxLineBufferLength(0) sl@0: { sl@0: } sl@0: sl@0: void TPlainTextWriter::TranslateHelperL() sl@0: { sl@0: if (iParam.iForeignEncoding) sl@0: { sl@0: iConverter = CPlainTextConverter::NewLC(); sl@0: iConverter->PrepareToConvertL(*this,NULL); sl@0: } sl@0: sl@0: if (iParam.iOrganisation == CPlainText::EOrganiseByLine) sl@0: iMaxLineLength = iParam.iMaxLineLength; sl@0: else sl@0: iMaxLineLength = KMaxTInt; // when exporting by paragraph, the wrapping width has no effect sl@0: if (iMaxLineLength <= 0) sl@0: iMaxLineLength = KMaxTInt; sl@0: iLineLength = 0; sl@0: if (iMaxLineLength < KMaxTInt) sl@0: iMaxLineBufferLength = iMaxLineLength; sl@0: else if (iParam.iForeignEncoding) sl@0: iMaxLineBufferLength = EDefaultLineBufferSize; sl@0: if (iMaxLineBufferLength) sl@0: iLineBuffer = new(ELeave) TText[iMaxLineBufferLength]; sl@0: else sl@0: iLineBuffer = NULL; sl@0: CleanupStack::PushL(iLineBuffer); sl@0: TRAPD(error,TranslateToEofL()); sl@0: if (error == KErrEof) sl@0: error = KErrNone; sl@0: if (error == KErrNone) sl@0: { sl@0: FlushL(); sl@0: iOutput.CommitL(); sl@0: } sl@0: CleanupStack::Pop(iLineBuffer); sl@0: delete [] iLineBuffer; sl@0: if (iConverter) sl@0: CleanupStack::PopAndDestroy(iConverter); sl@0: User::LeaveIfError(error); sl@0: } sl@0: sl@0: void TPlainTextWriter::TranslateToEofL() sl@0: { sl@0: while (!Finished()) sl@0: { sl@0: TText c = ReadRawCharL(); sl@0: switch (c) sl@0: { sl@0: // Write a CR-LF at a forced line break if organising by line. sl@0: case CEditableText::ELineBreak: sl@0: if (iParam.iOrganisation == CPlainText::EOrganiseByLine) sl@0: { sl@0: FlushL(); sl@0: WriteNewLineL(); sl@0: } sl@0: else sl@0: WriteCharL(c); sl@0: break; sl@0: sl@0: // Write a CR-LF at the end of the paragraph, then an extra one if lines are split by CR-LFs. sl@0: case CEditableText::EParagraphDelimiter: sl@0: FlushL(); sl@0: WriteNewLineL(); sl@0: if (iParam.iOrganisation == CPlainText::EOrganiseByLine) sl@0: WriteNewLineL(); sl@0: break; sl@0: sl@0: default: sl@0: WriteCharL(c); sl@0: } sl@0: } sl@0: } sl@0: sl@0: void TPlainTextWriter::FlushL() sl@0: { sl@0: if (iLineBuffer) sl@0: WriteAndConvertL(iLineBuffer,iLineLength); sl@0: iLineLength = 0; sl@0: } sl@0: sl@0: void TPlainTextWriter::WriteCharL(TText aChar) sl@0: { sl@0: if (iLineBuffer) sl@0: { sl@0: if (iLineLength >= iMaxLineBufferLength) sl@0: { sl@0: int linebreak = iMaxLineBufferLength; sl@0: int stripped_linebreak = iMaxLineBufferLength; sl@0: sl@0: if (iLineLength >= iMaxLineLength) sl@0: { sl@0: for (linebreak = iMaxLineLength; linebreak > 0; linebreak--) sl@0: if (iLineBuffer[linebreak - 1] == ' ') sl@0: break; sl@0: if (linebreak == 0) sl@0: linebreak = iMaxLineLength; sl@0: sl@0: // Strip a single trailing space if any; it is added when text is imported. sl@0: stripped_linebreak = linebreak; sl@0: if (iLineBuffer[linebreak - 1] == ' ') sl@0: stripped_linebreak = linebreak - 1; sl@0: } sl@0: sl@0: WriteAndConvertL(iLineBuffer,stripped_linebreak); sl@0: if (iLineLength >= iMaxLineLength) sl@0: WriteNewLineL(); sl@0: int i = linebreak; sl@0: int j = 0; sl@0: while (i < iMaxLineBufferLength) sl@0: iLineBuffer[j++] = iLineBuffer[i++]; sl@0: iLineLength = j; sl@0: } sl@0: iLineBuffer[iLineLength++] = aChar; sl@0: } sl@0: else sl@0: WriteRawCharL(aChar); sl@0: } sl@0: sl@0: void TPlainTextWriter::WriteNewLineL() sl@0: { sl@0: WriteAndConvertL(_S("\x0d\x0a"),2); sl@0: } sl@0: sl@0: void TPlainTextWriter::WriteAndConvertL(const TText* aText,TInt aLength) sl@0: { sl@0: if (iConverter) sl@0: { sl@0: while (aLength > 0) sl@0: { sl@0: TPtrC source(aText,aLength); sl@0: TPtr8 dest(iConverter->iConversionBuffer,CPlainTextConverter::EConversionBufferSize); sl@0: int remainder = iConverter->iConverter->ConvertFromUnicode(dest,source); sl@0: if (remainder < 0) sl@0: User::Leave(KErrCorrupt); sl@0: int available = iParam.iMaxOutputChars - iResult.iOutputChars; sl@0: if (available < dest.Length()) sl@0: dest.SetLength(available); sl@0: if (dest.Length() > 0) sl@0: { sl@0: iOutput.WriteL(dest); sl@0: iResult.iOutputChars += dest.Length(); sl@0: } sl@0: int converted = aLength - remainder; sl@0: aText += converted; sl@0: aLength -= converted; sl@0: } sl@0: } sl@0: else sl@0: { sl@0: while (aLength-- > 0) sl@0: WriteRawCharL(*aText++); sl@0: } sl@0: } sl@0: sl@0: TPlainTextReader::TPlainTextReader(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult, sl@0: RWriteStream& aOutput,RReadStream& aInput): sl@0: TPlainTextIOState(aParam,aResult,aOutput,aInput), sl@0: iInputBuffer(NULL), sl@0: iInputLength(0), sl@0: iInputPos(0), sl@0: iConversionState(CCnvCharacterSetConverter::KStateDefault) sl@0: { sl@0: iCheckByteOrder = TRUE; sl@0: } sl@0: sl@0: void TPlainTextReader::TranslateL(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult, sl@0: RWriteStream& aOutput,RReadStream& aInput) sl@0: { sl@0: TPlainTextReader reader(aParam,aResult,aOutput,aInput); sl@0: if(reader.iParam.iOrganisation == CPlainText::EOrganiseByLine) sl@0: { sl@0: TLineTextWriter txtWriter(reader); sl@0: TSLBTransaltor slbTranslator(txtWriter); sl@0: reader.TranslateHelperL(slbTranslator); sl@0: } sl@0: else sl@0: { sl@0: TParagraphTextWriter txtWriter(reader); sl@0: TSLBTransaltor slbTranslator(txtWriter); sl@0: reader.TranslateHelperL(slbTranslator); sl@0: } sl@0: } sl@0: sl@0: void TPlainTextReader::TranslateHelperL(TSLBTransaltor& aSLBTranslator) sl@0: { sl@0: if (iParam.iForeignEncoding || iParam.iGuessForeignEncoding) sl@0: { sl@0: iConverter = CPlainTextConverter::NewLC(); sl@0: iInputBuffer = new(ELeave) TText[EInputBufferSize]; sl@0: CleanupStack::PushL(iInputBuffer); sl@0: } sl@0: else sl@0: iInputBuffer = NULL; sl@0: TRAPD(error,TranslateToEofL(aSLBTranslator)); sl@0: if (error == KErrEof) sl@0: error = KErrNone; sl@0: if (error == KErrNone) sl@0: iOutput.CommitL(); sl@0: if (iConverter) sl@0: { sl@0: CleanupStack::Pop(iInputBuffer); sl@0: delete [] iInputBuffer; sl@0: CleanupStack::PopAndDestroy(iConverter); sl@0: } sl@0: User::LeaveIfError(error); sl@0: } sl@0: sl@0: void TPlainTextReader::TranslateToEofL(TSLBTransaltor& aSLBTranslator) sl@0: { sl@0: while(!Finished()) sl@0: { sl@0: TText c = ReadAndConvertL(); sl@0: aSLBTranslator.ProcessL(c); sl@0: } sl@0: aSLBTranslator.FlushL(); sl@0: } sl@0: sl@0: TText TPlainTextReader::ReadAndConvertL() sl@0: { sl@0: // Read EConversionBufferSize bytes into a conversion buffer (iConversionBuffer). sl@0: // Using CharConv convert this into unicode and place in a destination buffer (dest). sl@0: // This may result in some bytes that cannot be converted (remainder) as that sl@0: // character encoding is truncated. sl@0: // This remainder is then moved to the begining of the conversion buffer and more sl@0: // data read in after it, in effect untruncating that last character. sl@0: // Before this next read takes place the next converted unicode character is returned sl@0: // until the destination buffer positional pointers reach the end where more data is sl@0: // required for processing. sl@0: // sl@0: if (iConverter && iInputBuffer) sl@0: { sl@0: if (iInputPos >= iInputLength) sl@0: { sl@0: /* sl@0: Attempt to read more foreign characters if there are less than 20, sl@0: which is the current maximum length of a multibyte character sequence for CHARCONV. sl@0: Use MStreamBuf::ReadL, which doesn't leave on EOF, rather than RReadStream::ReadL, sl@0: which does, and doesn't tell you how much was actually read. sl@0: */ sl@0: if (iConverter->iConversionBufferLength < 20) sl@0: iConverter->iConversionBufferLength += sl@0: iInput.Source()->ReadL(iConverter->iConversionBuffer + iConverter->iConversionBufferLength, sl@0: CPlainTextConverter::EConversionBufferSize - iConverter->iConversionBufferLength); sl@0: sl@0: // Create the converter late so we have a sample of foreign text for auto-detection of the encoding. sl@0: if (!iConverter->iConverter) sl@0: { sl@0: TPtrC8 sample(iConverter->iConversionBuffer,iConverter->iConversionBufferLength); sl@0: iConverter->PrepareToConvertL(*this,&sample); sl@0: } sl@0: sl@0: // Translate from the foreign encoding to Unicode. sl@0: TPtr dest(iInputBuffer,0,EInputBufferSize); sl@0: TPtrC8 source(iConverter->iConversionBuffer,iConverter->iConversionBufferLength); sl@0: int remainder = iConverter->iConverter->ConvertToUnicode(dest,source,iConversionState); sl@0: if (remainder < 0) sl@0: User::Leave(KErrCorrupt); sl@0: sl@0: // Move the remaining foreign characters if any to the start of the buffer sl@0: // so that on the next read it can be joined with its truncated part. sl@0: for (int i = 0, j = iConverter->iConversionBufferLength - remainder; i < remainder; ++i, ++j) sl@0: iConverter->iConversionBuffer[i] = iConverter->iConversionBuffer[j]; sl@0: iConverter->iConversionBufferLength = remainder; sl@0: sl@0: iInputPos = 0; sl@0: iInputLength = dest.Length(); sl@0: if (iInputLength == 0) sl@0: User::Leave(KErrEof); sl@0: } sl@0: iResult.iInputChars++; sl@0: return iInputBuffer[iInputPos++]; sl@0: } sl@0: else sl@0: return ReadRawCharL(); sl@0: } sl@0: sl@0: /** sl@0: The method processes the imput characters, writing them to the output, but skipping sl@0: the picture characters (CEditableText::EPictureCharacter). sl@0: The method is not called directly and should not be called. It implements sl@0: MOutputChar::OutputCharL(TChar aChar) and is called from TParagraphTextWriter and sl@0: TLineTextWriter implementations. sl@0: @param aChar Character to be processed. sl@0: */ sl@0: void TPlainTextReader::OutputCharL(TText aChar) sl@0: { sl@0: switch(aChar) sl@0: { sl@0: case CEditableText::EByteOrderMark : sl@0: // leading byte order marks are ignored sl@0: if(iResult.iInputChars > 1) sl@0: { sl@0: WriteRawCharL(aChar); sl@0: } sl@0: break; sl@0: case CEditableText::EPictureCharacter: sl@0: //Picture characters are ignored because they would cause ETEXT to panic when it attempted to find sl@0: //the picture corresponding to the character. sl@0: break; sl@0: default: sl@0: WriteRawCharL(aChar); sl@0: break; sl@0: } sl@0: } sl@0: