sl@0: /* sl@0: * Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: * All rights reserved. sl@0: * This component and the accompanying materials are made available sl@0: * under the terms of "Eclipse Public License v1.0" sl@0: * which accompanies this distribution, and is available sl@0: * at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: * sl@0: * Initial Contributors: sl@0: * Nokia Corporation - initial contribution. sl@0: * sl@0: * Contributors: sl@0: * sl@0: * Description: sl@0: * sl@0: */ sl@0: sl@0: sl@0: #include "TXTETEXT.H" sl@0: #include "TXTSTD.H" sl@0: #include "OstTraceDefinitions.h" sl@0: #ifdef OST_TRACE_COMPILER_IN_USE sl@0: #include "TXTSCANTraces.h" sl@0: #endif sl@0: sl@0: sl@0: sl@0: EXPORT_C TInt CPlainText::CharPosOfParagraph(TInt& aLength,TInt aParaOffset)const sl@0: /** Finds the length and the start position of a paragraph identified by its sl@0: paragraph number the first paragraph is numbered zero. sl@0: sl@0: Notes: sl@0: sl@0: if aParaOffset is invalid, (equal to or greater than the total number of sl@0: paragraphs), the function's return value is EScanEndOfData sl@0: sl@0: @param aLength On return contains the length of the specified paragraph. sl@0: @param aParaOffset The paragraph number. The first paragraph is numbered zero. sl@0: @return The document position of the first character in the paragraph. */ sl@0: { sl@0: __TEST_INVARIANT; sl@0: sl@0: TInt startPos=aLength=0; sl@0: TUint scanMask=(EScanToUnitStart); sl@0: aLength=ScanParas(startPos,scanMask); sl@0: for (TInt offset=1;offset<=aParaOffset;offset++) sl@0: { sl@0: if (startPos<=EScanEndOfData) sl@0: return EScanEndOfData; sl@0: aLength=ScanParas(startPos,scanMask); sl@0: } sl@0: if (startPos==EScanEndOfData) sl@0: startPos=iByteStore->Size()/sizeof(TText); sl@0: return startPos-aLength; sl@0: } sl@0: sl@0: EXPORT_C TInt CPlainText::ParagraphNumberForPos(TInt& aPos)const sl@0: /** Gets the number of the paragraph which contains a document position. sl@0: Paragraph numbering begins at zero. sl@0: sl@0: @param aPos A document position. Must be valid or a panic occurs. On return, sl@0: contains the document position of the first character in the paragraph in sl@0: which it is located. sl@0: @return The number of the paragraph containing the specified document position. sl@0: The first paragraph is numbered zero. */ sl@0: { sl@0: __TEST_INVARIANT; sl@0: sl@0: if (aPos<0 || aPos>DocumentLength()) sl@0: { sl@0: OstTrace0( TRACE_FATAL, CPLAINTEXT_PARAGRAPHNUMBERFORPOS, "ECharPosBeyondDocument" ); sl@0: } sl@0: __ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument)); sl@0: TUint scanMask=(EScanBackwards|EScanStayIfBoundary|EScanToUnitStart); sl@0: ScanParas(aPos,scanMask); sl@0: // aPos now holds the 1st char of the paragraph. sl@0: TInt paraOffset=0,currentPos=0; sl@0: scanMask=EScanToUnitStart; sl@0: while (currentPosDocumentLength()) sl@0: { sl@0: OstTrace0( TRACE_FATAL, CPLAINTEXT_GETWORDINFO, "ECharPosBeyondDocument" ); sl@0: } sl@0: __ASSERT_ALWAYS(aCurrentPos>=0 && aCurrentPos<=DocumentLength(),Panic(ECharPosBeyondDocument)); sl@0: aStartPos=aLength=aCurrentPos; sl@0: // First find the start of the word. sl@0: TUint scanMask=(EScanBackwards| sl@0: EScanStayIfBoundary| sl@0: EScanToUnitStart); sl@0: if (aPictureIsDelimiter) sl@0: scanMask|=EScanPictureIsDelimiter; sl@0: if (aPunctuationIsDelimiter) sl@0: scanMask|=EScanPunctuationIsDelimiter; sl@0: ScanWords(aStartPos,scanMask); sl@0: // Next find the end of the word. sl@0: scanMask=(EScanStayIfBoundary|EScanToUnitEnd); sl@0: if (aPictureIsDelimiter) sl@0: scanMask|=EScanPictureIsDelimiter; sl@0: if (aPunctuationIsDelimiter) sl@0: scanMask|=EScanPunctuationIsDelimiter; sl@0: ScanWords(aLength,scanMask); sl@0: aLength-=aStartPos; sl@0: if (aLength+aStartPos>(DocumentLength()+1)) sl@0: { sl@0: OstTrace0( TRACE_DUMP, DUP2_CPLAINTEXT_GETWORDINFO, "ECharPosBeyondDocument" ); sl@0: } sl@0: __ASSERT_DEBUG(aLength+aStartPos<=(DocumentLength()+1),Panic(ECharPosBeyondDocument)); sl@0: sl@0: __TEST_INVARIANT; sl@0: } sl@0: sl@0: EXPORT_C TInt CPlainText::ToParagraphStart(TInt& aPos) const sl@0: /** Gets the document position of the start of the paragraph containing the sl@0: specified document position. sl@0: sl@0: @param aPos A document position. Must be valid or a panic occurs. On return, sl@0: contains the document position of the first character in the paragraph in sl@0: which it is located. sl@0: @return The number of characters skipped in scanning to the start of the sl@0: paragraph. */ sl@0: { sl@0: __TEST_INVARIANT; sl@0: if (aPos<0 || aPos>DocumentLength()) sl@0: { sl@0: OstTrace0( TRACE_FATAL, CPLAINTEXT_TOPARAGRAPHSTART, "ECharPosBeyondDocument" ); sl@0: } sl@0: __ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument)); sl@0: sl@0: int skipped = 0; sl@0: while (aPos > 0) sl@0: { sl@0: TPtrC8 p = iByteStore->BackPtr(aPos * sizeof(TText)); sl@0: const TText* start = (TText*)p.Ptr(); sl@0: const TText* end = start + p.Length() / sizeof(TText); sl@0: while (end > start) sl@0: { sl@0: end--; sl@0: if (*end == EParagraphDelimiter) sl@0: return skipped; sl@0: aPos--; sl@0: skipped++; sl@0: } sl@0: } sl@0: return skipped; sl@0: } sl@0: sl@0: EXPORT_C TInt CPlainText::WordCount()const sl@0: /** Gets a count of the number of words in the text object. sl@0: sl@0: @return The number of words in the text object. */ sl@0: {return CountUnits(EUnitIsWord);} sl@0: sl@0: EXPORT_C TInt CPlainText::ParagraphCount()const sl@0: /** Gets a count of the number of paragraphs in the text object. sl@0: sl@0: Note: the paragraph delimiter which terminates every text object means this sl@0: function always returns a count of at least one. sl@0: sl@0: @return The number of paragraphs contained in the text object. */ sl@0: {//return CountUnits(EUnitIsParagraph);} sl@0: TInt pos=0,unitCount=0; sl@0: TUint scanMask=(EScanToUnitStart); sl@0: while (pos!=EScanEndOfData) sl@0: { sl@0: unitCount++; sl@0: ScanParas(pos,scanMask); sl@0: } sl@0: return unitCount; sl@0: } sl@0: sl@0: TInt CPlainText::CountUnits(TUnitOfText aContext)const sl@0: // Returns the number of units in the document, where sl@0: // units are defined by the parameter aContext. sl@0: // sl@0: { sl@0: SScanData scanData; sl@0: TInt pos=0,unitCount=0; sl@0: TUint scanMask=(EScanToUnitStart|EScanJoinDelimiters|EScanStayIfBoundary); sl@0: InitScanControl(pos,scanMask,aContext,scanData); sl@0: ScanUnit(pos,aContext,scanData); sl@0: // The above code skips over leading white space sl@0: scanData.scanMask &=~ EScanStayIfBoundary; sl@0: while(pos!=EScanEndOfData) sl@0: { sl@0: unitCount++; sl@0: ScanUnit(pos,aContext,scanData); sl@0: }// Count complete, so return pos to the beginning of the data sl@0: pos=0; sl@0: return unitCount; sl@0: } sl@0: sl@0: sl@0: sl@0: sl@0: EXPORT_C TInt CPlainText::ScanWords(TInt& aPos,TUint& aScanMask)const sl@0: /** Scans from a document position to the beginning or end of a word. The sl@0: destination is determined by a scan mask. The scan can either be forwards sl@0: (the default) or backwards, and the destination may be the first or last sl@0: character in the word containing the position, or the first character in sl@0: the next word. sl@0: sl@0: Note: If an attempt is made to scan beyond the end of text delimiter, on return, sl@0: aPos is set to EScanEndOfData and the function's return value indicates sl@0: the number of characters skipped in passing the end of text delimiter. sl@0: sl@0: @param aPos The document position from which to scan. Must be valid, or a sl@0: panic occurs. On return, contains the new document position. sl@0: @param aScanMask The scan mask. See the enumeration whose values begin with sl@0: CPlainText::EScanBackwards. sl@0: @return The number of characters skipped to reach the new document position. */ sl@0: { sl@0: if (aPos<0 || aPos>DocumentLength()) sl@0: { sl@0: OstTrace0( TRACE_FATAL, CPLAINTEXT_SCANWORDS, "ECharPosBeyondDocument" ); sl@0: } sl@0: __ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument)); sl@0: SScanData scanData; sl@0: InitScanControl(aPos,aScanMask,EUnitIsWord,scanData); sl@0: return ScanUnit(aPos,EUnitIsWord,scanData); sl@0: } sl@0: sl@0: EXPORT_C TInt CPlainText::ScanParas(TInt& aPos,TUint& aScanMask)const sl@0: /** Scans from a document position to the beginning or end of a paragraph. The sl@0: destination is determined by a scan mask. The scan can either be forwards sl@0: (the default) or backwards, and the destination may be the first or last sl@0: character in the paragraph containing the position, or the first character in sl@0: the next paragraph. sl@0: sl@0: Note: if an attempt is made to scan beyond the end of text delimiter, on return, sl@0: aPos is set to EScanEndOfData ) and the function's return value indicates sl@0: the number of characters skipped in passing the end of text delimiter. sl@0: sl@0: @param aPos The document position to scan from. Must be valid or a panic occurs. sl@0: On return, contains the new document position. sl@0: @param aScanMask The scan mask. See the enumeration whose values begin with sl@0: CPlainText::EScanBackwards. sl@0: @return The number of characters skipped to reach the new document position. */ sl@0: { sl@0: if (aPos<0 || aPos>DocumentLength()) sl@0: { sl@0: OstTrace0( TRACE_FATAL, CPLAINTEXT_SCANPARAS, "ECharPosBeyondDocument" ); sl@0: } sl@0: __ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument)); sl@0: SScanData scanData; sl@0: InitScanControl(aPos,aScanMask,EUnitIsParagraph,scanData); sl@0: return ScanUnit(aPos,EUnitIsParagraph,scanData); sl@0: } sl@0: sl@0: sl@0: void CPlainText::InitScanControl(TInt& aPos,TUint& aScanMask,TUnitOfText aContext,SScanData& aScanData)const sl@0: // Initialises the control segment of the bitmask sl@0: // used to determine when scan has completed. sl@0: // Calls the initialisation of aScanData. sl@0: // sl@0: { sl@0: aScanData.pos=aPos; sl@0: aScanData.scanMask=0; sl@0: aScanData.scanMask|=aScanMask; sl@0: if (aScanData.scanMask & EScanStayIfBoundary) sl@0: {//Scan one unit the other way first. sl@0: aScanData.scanMask ^= EScanBackwards; // Alter scan direction. sl@0: InitScanData(aScanData.pos,aScanData); sl@0: TestForDelimiter(aContext,aScanData); sl@0: aScanData.scanMask ^= EScanBackwards; // Restore original scan direction. sl@0: } sl@0: InitScanData(aScanData.pos,aScanData); sl@0: if(!aPos && (aScanData.scanMask&(EScanStayIfBoundary|EScanBackwards))==EScanStayIfBoundary) sl@0: {//At the start of the buffer and scanning forwards and ScanStayIfBoundary set. sl@0: aScanData.scanMask &=~aScanData.EInsideUnit; sl@0: } sl@0: else sl@0: { sl@0: TestForDelimiter(aContext,aScanData); sl@0: if (aScanData.scanMask & aScanData.EIsDelimiter) sl@0: aScanData.scanMask &=~ aScanData.EInsideUnit; sl@0: else sl@0: aScanData.scanMask |= aScanData.EInsideUnit; sl@0: } sl@0: if (aScanData.scanMask & EScanBackwards) sl@0: {//Set stop flags relative to scan direction sl@0: if (aScanData.scanMask & EScanToUnitStart) sl@0: aScanData.scanMask |= aScanData.EStopEnd; sl@0: if (aScanData.scanMask & EScanToUnitEnd) sl@0: aScanData.scanMask |= aScanData.EStopBegin; sl@0: } sl@0: else sl@0: {//Set stop flags relative to scan direction sl@0: if (aScanData.scanMask & EScanToUnitStart) sl@0: aScanData.scanMask |= aScanData.EStopBegin; sl@0: if (aScanData.scanMask & EScanToUnitEnd) sl@0: aScanData.scanMask |= aScanData.EStopEnd; sl@0: } sl@0: } sl@0: sl@0: sl@0: void CPlainText::InitScanData(TInt aPos,SScanData& aScanData)const sl@0: // Initialises the necessary elements of the scan structure, for ScanXxx sl@0: // methods. These are used to track scan progress. sl@0: // sl@0: { sl@0: aScanData.pos=aPos; sl@0: aScanData.oldPos=aPos; sl@0: aScanData.currentSegLen=0; sl@0: if (aScanData.scanMask & EScanBackwards) sl@0: {//Scanning backwards. sl@0: aScanData.delta=-1; sl@0: aScanData.totalBufLen=aPos; sl@0: } sl@0: else sl@0: {//Scanning forwards. sl@0: aScanData.delta=+1; sl@0: aScanData.totalBufLen=((iByteStore->Size()/sizeof(TText))-aScanData.pos); sl@0: } sl@0: } sl@0: sl@0: sl@0: TInt CPlainText::ScanUnit(TInt& aPos,TUnitOfText aContext,SScanData& aScanData)const sl@0: // Scan from position aPos, by one unit. sl@0: // Update aPos to the current position, and return the number sl@0: // of characters skipped or EScanEndOfData if at the end of the buffer. sl@0: // sl@0: { sl@0: while (TestForDelimiter(aContext,aScanData)) sl@0: {//More data has been read sl@0: if (aScanData.scanMask & aScanData.EInsideUnit) sl@0: { sl@0: if (aScanData.scanMask & aScanData.EIsDelimiter) sl@0: { sl@0: aScanData.scanMask &=~aScanData.EInsideUnit; sl@0: if (aScanData.scanMask & aScanData.EStopEnd) sl@0: break; sl@0: } sl@0: } sl@0: else sl@0: { sl@0: if (aScanData.scanMask & aScanData.EIsDelimiter) sl@0: { sl@0: if (!(aScanData.scanMask & EScanJoinDelimiters)) sl@0: break; sl@0: } sl@0: else sl@0: { sl@0: aScanData.scanMask |= aScanData.EInsideUnit; sl@0: if (aScanData.scanMask & aScanData.EStopBegin) sl@0: break; sl@0: } sl@0: } sl@0: } sl@0: TInt charsSkipped=(aScanData.scanMask & EScanBackwards)? aPos-aScanData.oldPos : sl@0: aScanData.oldPos-aPos; sl@0: aPos=(aScanData.oldPos<(TInt)(iByteStore->Size()/sizeof(TText)))? aScanData.oldPos : EScanEndOfData; sl@0: return charsSkipped; sl@0: } sl@0: sl@0: sl@0: TBool CPlainText::TestForDelimiter(TUnitOfText aContext,SScanData& aScanData) const sl@0: /** Return ETrue if a character was read successfully. sl@0: Set EIsDelimiter flag if the character read sl@0: is a delimiter of the appropriate type.*/ sl@0: { sl@0: aScanData.scanMask |= aScanData.EIsDelimiter; sl@0: aScanData.oldPos = aScanData.pos; sl@0: TChar c; sl@0: if (!GetChar(aScanData,c)) sl@0: return FALSE; sl@0: sl@0: TBool found = FALSE; sl@0: if (aContext == EUnitIsParagraph) sl@0: found = c == EParagraphDelimiter; sl@0: else if (aContext == EUnitIsWord) sl@0: { sl@0: if (c == EParagraphDelimiter || sl@0: c == ELineBreak || sl@0: c == EPageBreak || sl@0: c == ESpace || sl@0: c == ETabCharacter) sl@0: found = TRUE; sl@0: if (!found && (aScanData.scanMask & EScanPictureIsDelimiter)) sl@0: { sl@0: if (c == EPictureCharacter) sl@0: found = TRUE; sl@0: } sl@0: if (!found) sl@0: { sl@0: TChar::TCategory cat = c.GetCategory(); sl@0: if (cat == TChar::EZsCategory) // it's a space sl@0: found = TRUE; sl@0: if (!found && sl@0: (aScanData.scanMask & EScanPunctuationIsDelimiter) && sl@0: (cat & TChar::EPunctuationGroup)) sl@0: { sl@0: if (c != EHyphenMinus && sl@0: c != EApostrophe && sl@0: c != EPotentialHyphen && sl@0: c != EHyphen && sl@0: c != ERightSingleQuote) sl@0: found = TRUE; sl@0: } sl@0: } sl@0: } sl@0: sl@0: if (found) sl@0: aScanData.scanMask |= aScanData.EIsDelimiter; sl@0: else sl@0: aScanData.scanMask &= ~aScanData.EIsDelimiter; sl@0: return TRUE; sl@0: } sl@0: sl@0: sl@0: TBool CPlainText::GetChar(SScanData& aScanData,TChar& aChar)const sl@0: // If there is one, write the next character to aChar, and return ETrue, sl@0: // otherwise return EFalse. sl@0: // sl@0: { sl@0: TPtrC view(_S("a")); // Dummy value cos no default constructor. sl@0: if (!aScanData.currentSegLen) sl@0: {//First time in, or new segment. sl@0: if (!aScanData.totalBufLen) sl@0: {//No data left to read. sl@0: return EFalse; sl@0: } sl@0: if (aScanData.delta < 0) sl@0: {//Scanning backwards sl@0: TPtrC8 tempView=iByteStore->BackPtr(aScanData.pos*sizeof(TText)); sl@0: view.Set((TText*)tempView.Ptr(),tempView.Length()/sizeof(TText)); sl@0: } sl@0: else sl@0: {//Scanning forwards sl@0: TPtrC8 tempView=iByteStore->Ptr(aScanData.pos*sizeof(TText)); sl@0: view.Set((TText*)tempView.Ptr(),tempView.Length()/sizeof(TText)); sl@0: }//Ptr now returns a TDes. sl@0: aScanData.currentSegLen=(TInt)Min(aScanData.totalBufLen,(TInt)view.Length()); sl@0: aScanData.totalBufLen-=aScanData.currentSegLen; sl@0: aScanData.buf=(TText*)view.Ptr(); sl@0: if (aScanData.delta < 0) sl@0: aScanData.buf+=aScanData.currentSegLen-1; sl@0: } sl@0: aChar=*(aScanData.buf); // aChar takes the character just read. sl@0: aScanData.buf+=aScanData.delta; // pText increments correctly for both ASCII and Unicode sl@0: aScanData.pos+=aScanData.delta; sl@0: aScanData.currentSegLen-=1; // len holds the number of chars left, for ASCII and Unicode sl@0: return ETrue; sl@0: } sl@0: sl@0: