1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/textrendering/texthandling/stext/TXTSCAN.CPP Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,470 @@
1.4 +/*
1.5 +* Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description:
1.18 +*
1.19 +*/
1.20 +
1.21 +
1.22 +#include "TXTETEXT.H"
1.23 +#include "TXTSTD.H"
1.24 +#include "OstTraceDefinitions.h"
1.25 +#ifdef OST_TRACE_COMPILER_IN_USE
1.26 +#include "TXTSCANTraces.h"
1.27 +#endif
1.28 +
1.29 +
1.30 +
1.31 +EXPORT_C TInt CPlainText::CharPosOfParagraph(TInt& aLength,TInt aParaOffset)const
1.32 +/** Finds the length and the start position of a paragraph identified by its
1.33 +paragraph number the first paragraph is numbered zero.
1.34 +
1.35 +Notes:
1.36 +
1.37 +if aParaOffset is invalid, (equal to or greater than the total number of
1.38 +paragraphs), the function's return value is EScanEndOfData
1.39 +
1.40 +@param aLength On return contains the length of the specified paragraph.
1.41 +@param aParaOffset The paragraph number. The first paragraph is numbered zero.
1.42 +@return The document position of the first character in the paragraph. */
1.43 + {
1.44 + __TEST_INVARIANT;
1.45 +
1.46 + TInt startPos=aLength=0;
1.47 + TUint scanMask=(EScanToUnitStart);
1.48 + aLength=ScanParas(startPos,scanMask);
1.49 + for (TInt offset=1;offset<=aParaOffset;offset++)
1.50 + {
1.51 + if (startPos<=EScanEndOfData)
1.52 + return EScanEndOfData;
1.53 + aLength=ScanParas(startPos,scanMask);
1.54 + }
1.55 + if (startPos==EScanEndOfData)
1.56 + startPos=iByteStore->Size()/sizeof(TText);
1.57 + return startPos-aLength;
1.58 + }
1.59 +
1.60 +EXPORT_C TInt CPlainText::ParagraphNumberForPos(TInt& aPos)const
1.61 +/** Gets the number of the paragraph which contains a document position.
1.62 +Paragraph numbering begins at zero.
1.63 +
1.64 +@param aPos A document position. Must be valid or a panic occurs. On return,
1.65 +contains the document position of the first character in the paragraph in
1.66 +which it is located.
1.67 +@return The number of the paragraph containing the specified document position.
1.68 +The first paragraph is numbered zero. */
1.69 + {
1.70 + __TEST_INVARIANT;
1.71 +
1.72 + if (aPos<0 || aPos>DocumentLength())
1.73 + {
1.74 + OstTrace0( TRACE_FATAL, CPLAINTEXT_PARAGRAPHNUMBERFORPOS, "ECharPosBeyondDocument" );
1.75 + }
1.76 + __ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument));
1.77 + TUint scanMask=(EScanBackwards|EScanStayIfBoundary|EScanToUnitStart);
1.78 + ScanParas(aPos,scanMask);
1.79 + // aPos now holds the 1st char of the paragraph.
1.80 + TInt paraOffset=0,currentPos=0;
1.81 + scanMask=EScanToUnitStart;
1.82 + while (currentPos<aPos)
1.83 + {// Move forwards, counting paragraphs until we get to the current position.
1.84 + paraOffset++;
1.85 + ScanParas(currentPos,scanMask);
1.86 + }
1.87 + return paraOffset;
1.88 + }
1.89 +
1.90 +EXPORT_C void CPlainText::GetWordInfo(TInt aCurrentPos,TInt& aStartPos,TInt& aLength,
1.91 + TBool aPictureIsDelimiter,TBool aPunctuationIsDelimiter)const
1.92 +/** Gets the start position and length of the word that contains the document
1.93 +position specified.
1.94 +
1.95 +@param aCurrentPos A document position. Must be valid or a panic occurs.
1.96 +@param aStartPos On return, the document position of the first character in
1.97 +the word containing the position aCurrentPos.
1.98 +@param aLength On return, the length of the word containing document position
1.99 +aCurrentPos.
1.100 +@param aPictureIsDelimiter ETrue if picture characters should be treated as
1.101 +word delimiters, false if not. For example, when navigating text, this might
1.102 +be EFalse, but when spell checking, it might be ETrue.
1.103 +@param aPunctuationIsDelimiter ETrue if punctuation characters should be treated
1.104 +as word delimiters, EFalse if not. */
1.105 + {
1.106 + __TEST_INVARIANT;
1.107 +
1.108 + if (aCurrentPos<0 || aCurrentPos>DocumentLength())
1.109 + {
1.110 + OstTrace0( TRACE_FATAL, CPLAINTEXT_GETWORDINFO, "ECharPosBeyondDocument" );
1.111 + }
1.112 + __ASSERT_ALWAYS(aCurrentPos>=0 && aCurrentPos<=DocumentLength(),Panic(ECharPosBeyondDocument));
1.113 + aStartPos=aLength=aCurrentPos;
1.114 + // First find the start of the word.
1.115 + TUint scanMask=(EScanBackwards|
1.116 + EScanStayIfBoundary|
1.117 + EScanToUnitStart);
1.118 + if (aPictureIsDelimiter)
1.119 + scanMask|=EScanPictureIsDelimiter;
1.120 + if (aPunctuationIsDelimiter)
1.121 + scanMask|=EScanPunctuationIsDelimiter;
1.122 + ScanWords(aStartPos,scanMask);
1.123 + // Next find the end of the word.
1.124 + scanMask=(EScanStayIfBoundary|EScanToUnitEnd);
1.125 + if (aPictureIsDelimiter)
1.126 + scanMask|=EScanPictureIsDelimiter;
1.127 + if (aPunctuationIsDelimiter)
1.128 + scanMask|=EScanPunctuationIsDelimiter;
1.129 + ScanWords(aLength,scanMask);
1.130 + aLength-=aStartPos;
1.131 + if (aLength+aStartPos>(DocumentLength()+1))
1.132 + {
1.133 + OstTrace0( TRACE_DUMP, DUP2_CPLAINTEXT_GETWORDINFO, "ECharPosBeyondDocument" );
1.134 + }
1.135 + __ASSERT_DEBUG(aLength+aStartPos<=(DocumentLength()+1),Panic(ECharPosBeyondDocument));
1.136 +
1.137 + __TEST_INVARIANT;
1.138 + }
1.139 +
1.140 +EXPORT_C TInt CPlainText::ToParagraphStart(TInt& aPos) const
1.141 +/** Gets the document position of the start of the paragraph containing the
1.142 +specified document position.
1.143 +
1.144 +@param aPos A document position. Must be valid or a panic occurs. On return,
1.145 +contains the document position of the first character in the paragraph in
1.146 +which it is located.
1.147 +@return The number of characters skipped in scanning to the start of the
1.148 +paragraph. */
1.149 + {
1.150 + __TEST_INVARIANT;
1.151 + if (aPos<0 || aPos>DocumentLength())
1.152 + {
1.153 + OstTrace0( TRACE_FATAL, CPLAINTEXT_TOPARAGRAPHSTART, "ECharPosBeyondDocument" );
1.154 + }
1.155 + __ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument));
1.156 +
1.157 + int skipped = 0;
1.158 + while (aPos > 0)
1.159 + {
1.160 + TPtrC8 p = iByteStore->BackPtr(aPos * sizeof(TText));
1.161 + const TText* start = (TText*)p.Ptr();
1.162 + const TText* end = start + p.Length() / sizeof(TText);
1.163 + while (end > start)
1.164 + {
1.165 + end--;
1.166 + if (*end == EParagraphDelimiter)
1.167 + return skipped;
1.168 + aPos--;
1.169 + skipped++;
1.170 + }
1.171 + }
1.172 + return skipped;
1.173 + }
1.174 +
1.175 +EXPORT_C TInt CPlainText::WordCount()const
1.176 +/** Gets a count of the number of words in the text object.
1.177 +
1.178 +@return The number of words in the text object. */
1.179 + {return CountUnits(EUnitIsWord);}
1.180 +
1.181 +EXPORT_C TInt CPlainText::ParagraphCount()const
1.182 +/** Gets a count of the number of paragraphs in the text object.
1.183 +
1.184 +Note: the paragraph delimiter which terminates every text object means this
1.185 +function always returns a count of at least one.
1.186 +
1.187 +@return The number of paragraphs contained in the text object. */
1.188 + {//return CountUnits(EUnitIsParagraph);}
1.189 + TInt pos=0,unitCount=0;
1.190 + TUint scanMask=(EScanToUnitStart);
1.191 + while (pos!=EScanEndOfData)
1.192 + {
1.193 + unitCount++;
1.194 + ScanParas(pos,scanMask);
1.195 + }
1.196 + return unitCount;
1.197 + }
1.198 +
1.199 +TInt CPlainText::CountUnits(TUnitOfText aContext)const
1.200 +// Returns the number of units in the document, where
1.201 +// units are defined by the parameter aContext.
1.202 +//
1.203 + {
1.204 + SScanData scanData;
1.205 + TInt pos=0,unitCount=0;
1.206 + TUint scanMask=(EScanToUnitStart|EScanJoinDelimiters|EScanStayIfBoundary);
1.207 + InitScanControl(pos,scanMask,aContext,scanData);
1.208 + ScanUnit(pos,aContext,scanData);
1.209 + // The above code skips over leading white space
1.210 + scanData.scanMask &=~ EScanStayIfBoundary;
1.211 + while(pos!=EScanEndOfData)
1.212 + {
1.213 + unitCount++;
1.214 + ScanUnit(pos,aContext,scanData);
1.215 + }// Count complete, so return pos to the beginning of the data
1.216 + pos=0;
1.217 + return unitCount;
1.218 + }
1.219 +
1.220 +
1.221 +
1.222 +
1.223 +EXPORT_C TInt CPlainText::ScanWords(TInt& aPos,TUint& aScanMask)const
1.224 +/** Scans from a document position to the beginning or end of a word. The
1.225 +destination is determined by a scan mask. The scan can either be forwards
1.226 +(the default) or backwards, and the destination may be the first or last
1.227 +character in the word containing the position, or the first character in
1.228 +the next word.
1.229 +
1.230 +Note: If an attempt is made to scan beyond the end of text delimiter, on return,
1.231 +aPos is set to EScanEndOfData and the function's return value indicates
1.232 +the number of characters skipped in passing the end of text delimiter.
1.233 +
1.234 +@param aPos The document position from which to scan. Must be valid, or a
1.235 +panic occurs. On return, contains the new document position.
1.236 +@param aScanMask The scan mask. See the enumeration whose values begin with
1.237 +CPlainText::EScanBackwards.
1.238 +@return The number of characters skipped to reach the new document position. */
1.239 + {
1.240 + if (aPos<0 || aPos>DocumentLength())
1.241 + {
1.242 + OstTrace0( TRACE_FATAL, CPLAINTEXT_SCANWORDS, "ECharPosBeyondDocument" );
1.243 + }
1.244 + __ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument));
1.245 + SScanData scanData;
1.246 + InitScanControl(aPos,aScanMask,EUnitIsWord,scanData);
1.247 + return ScanUnit(aPos,EUnitIsWord,scanData);
1.248 + }
1.249 +
1.250 +EXPORT_C TInt CPlainText::ScanParas(TInt& aPos,TUint& aScanMask)const
1.251 +/** Scans from a document position to the beginning or end of a paragraph. The
1.252 +destination is determined by a scan mask. The scan can either be forwards
1.253 +(the default) or backwards, and the destination may be the first or last
1.254 +character in the paragraph containing the position, or the first character in
1.255 +the next paragraph.
1.256 +
1.257 +Note: if an attempt is made to scan beyond the end of text delimiter, on return,
1.258 +aPos is set to EScanEndOfData ) and the function's return value indicates
1.259 +the number of characters skipped in passing the end of text delimiter.
1.260 +
1.261 +@param aPos The document position to scan from. Must be valid or a panic occurs.
1.262 +On return, contains the new document position.
1.263 +@param aScanMask The scan mask. See the enumeration whose values begin with
1.264 +CPlainText::EScanBackwards.
1.265 +@return The number of characters skipped to reach the new document position. */
1.266 + {
1.267 + if (aPos<0 || aPos>DocumentLength())
1.268 + {
1.269 + OstTrace0( TRACE_FATAL, CPLAINTEXT_SCANPARAS, "ECharPosBeyondDocument" );
1.270 + }
1.271 + __ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument));
1.272 + SScanData scanData;
1.273 + InitScanControl(aPos,aScanMask,EUnitIsParagraph,scanData);
1.274 + return ScanUnit(aPos,EUnitIsParagraph,scanData);
1.275 + }
1.276 +
1.277 +
1.278 +void CPlainText::InitScanControl(TInt& aPos,TUint& aScanMask,TUnitOfText aContext,SScanData& aScanData)const
1.279 +// Initialises the control segment of the bitmask
1.280 +// used to determine when scan has completed.
1.281 +// Calls the initialisation of aScanData.
1.282 +//
1.283 + {
1.284 + aScanData.pos=aPos;
1.285 + aScanData.scanMask=0;
1.286 + aScanData.scanMask|=aScanMask;
1.287 + if (aScanData.scanMask & EScanStayIfBoundary)
1.288 + {//Scan one unit the other way first.
1.289 + aScanData.scanMask ^= EScanBackwards; // Alter scan direction.
1.290 + InitScanData(aScanData.pos,aScanData);
1.291 + TestForDelimiter(aContext,aScanData);
1.292 + aScanData.scanMask ^= EScanBackwards; // Restore original scan direction.
1.293 + }
1.294 + InitScanData(aScanData.pos,aScanData);
1.295 + if(!aPos && (aScanData.scanMask&(EScanStayIfBoundary|EScanBackwards))==EScanStayIfBoundary)
1.296 + {//At the start of the buffer and scanning forwards and ScanStayIfBoundary set.
1.297 + aScanData.scanMask &=~aScanData.EInsideUnit;
1.298 + }
1.299 + else
1.300 + {
1.301 + TestForDelimiter(aContext,aScanData);
1.302 + if (aScanData.scanMask & aScanData.EIsDelimiter)
1.303 + aScanData.scanMask &=~ aScanData.EInsideUnit;
1.304 + else
1.305 + aScanData.scanMask |= aScanData.EInsideUnit;
1.306 + }
1.307 + if (aScanData.scanMask & EScanBackwards)
1.308 + {//Set stop flags relative to scan direction
1.309 + if (aScanData.scanMask & EScanToUnitStart)
1.310 + aScanData.scanMask |= aScanData.EStopEnd;
1.311 + if (aScanData.scanMask & EScanToUnitEnd)
1.312 + aScanData.scanMask |= aScanData.EStopBegin;
1.313 + }
1.314 + else
1.315 + {//Set stop flags relative to scan direction
1.316 + if (aScanData.scanMask & EScanToUnitStart)
1.317 + aScanData.scanMask |= aScanData.EStopBegin;
1.318 + if (aScanData.scanMask & EScanToUnitEnd)
1.319 + aScanData.scanMask |= aScanData.EStopEnd;
1.320 + }
1.321 + }
1.322 +
1.323 +
1.324 +void CPlainText::InitScanData(TInt aPos,SScanData& aScanData)const
1.325 +// Initialises the necessary elements of the scan structure, for ScanXxx
1.326 +// methods. These are used to track scan progress.
1.327 +//
1.328 + {
1.329 + aScanData.pos=aPos;
1.330 + aScanData.oldPos=aPos;
1.331 + aScanData.currentSegLen=0;
1.332 + if (aScanData.scanMask & EScanBackwards)
1.333 + {//Scanning backwards.
1.334 + aScanData.delta=-1;
1.335 + aScanData.totalBufLen=aPos;
1.336 + }
1.337 + else
1.338 + {//Scanning forwards.
1.339 + aScanData.delta=+1;
1.340 + aScanData.totalBufLen=((iByteStore->Size()/sizeof(TText))-aScanData.pos);
1.341 + }
1.342 + }
1.343 +
1.344 +
1.345 +TInt CPlainText::ScanUnit(TInt& aPos,TUnitOfText aContext,SScanData& aScanData)const
1.346 +// Scan from position aPos, by one unit.
1.347 +// Update aPos to the current position, and return the number
1.348 +// of characters skipped or EScanEndOfData if at the end of the buffer.
1.349 +//
1.350 + {
1.351 + while (TestForDelimiter(aContext,aScanData))
1.352 + {//More data has been read
1.353 + if (aScanData.scanMask & aScanData.EInsideUnit)
1.354 + {
1.355 + if (aScanData.scanMask & aScanData.EIsDelimiter)
1.356 + {
1.357 + aScanData.scanMask &=~aScanData.EInsideUnit;
1.358 + if (aScanData.scanMask & aScanData.EStopEnd)
1.359 + break;
1.360 + }
1.361 + }
1.362 + else
1.363 + {
1.364 + if (aScanData.scanMask & aScanData.EIsDelimiter)
1.365 + {
1.366 + if (!(aScanData.scanMask & EScanJoinDelimiters))
1.367 + break;
1.368 + }
1.369 + else
1.370 + {
1.371 + aScanData.scanMask |= aScanData.EInsideUnit;
1.372 + if (aScanData.scanMask & aScanData.EStopBegin)
1.373 + break;
1.374 + }
1.375 + }
1.376 + }
1.377 + TInt charsSkipped=(aScanData.scanMask & EScanBackwards)? aPos-aScanData.oldPos :
1.378 + aScanData.oldPos-aPos;
1.379 + aPos=(aScanData.oldPos<(TInt)(iByteStore->Size()/sizeof(TText)))? aScanData.oldPos : EScanEndOfData;
1.380 + return charsSkipped;
1.381 + }
1.382 +
1.383 +
1.384 +TBool CPlainText::TestForDelimiter(TUnitOfText aContext,SScanData& aScanData) const
1.385 +/** Return ETrue if a character was read successfully.
1.386 +Set EIsDelimiter flag if the character read
1.387 +is a delimiter of the appropriate type.*/
1.388 + {
1.389 + aScanData.scanMask |= aScanData.EIsDelimiter;
1.390 + aScanData.oldPos = aScanData.pos;
1.391 + TChar c;
1.392 + if (!GetChar(aScanData,c))
1.393 + return FALSE;
1.394 +
1.395 + TBool found = FALSE;
1.396 + if (aContext == EUnitIsParagraph)
1.397 + found = c == EParagraphDelimiter;
1.398 + else if (aContext == EUnitIsWord)
1.399 + {
1.400 + if (c == EParagraphDelimiter ||
1.401 + c == ELineBreak ||
1.402 + c == EPageBreak ||
1.403 + c == ESpace ||
1.404 + c == ETabCharacter)
1.405 + found = TRUE;
1.406 + if (!found && (aScanData.scanMask & EScanPictureIsDelimiter))
1.407 + {
1.408 + if (c == EPictureCharacter)
1.409 + found = TRUE;
1.410 + }
1.411 + if (!found)
1.412 + {
1.413 + TChar::TCategory cat = c.GetCategory();
1.414 + if (cat == TChar::EZsCategory) // it's a space
1.415 + found = TRUE;
1.416 + if (!found &&
1.417 + (aScanData.scanMask & EScanPunctuationIsDelimiter) &&
1.418 + (cat & TChar::EPunctuationGroup))
1.419 + {
1.420 + if (c != EHyphenMinus &&
1.421 + c != EApostrophe &&
1.422 + c != EPotentialHyphen &&
1.423 + c != EHyphen &&
1.424 + c != ERightSingleQuote)
1.425 + found = TRUE;
1.426 + }
1.427 + }
1.428 + }
1.429 +
1.430 + if (found)
1.431 + aScanData.scanMask |= aScanData.EIsDelimiter;
1.432 + else
1.433 + aScanData.scanMask &= ~aScanData.EIsDelimiter;
1.434 + return TRUE;
1.435 + }
1.436 +
1.437 +
1.438 +TBool CPlainText::GetChar(SScanData& aScanData,TChar& aChar)const
1.439 +// If there is one, write the next character to aChar, and return ETrue,
1.440 +// otherwise return EFalse.
1.441 +//
1.442 + {
1.443 + TPtrC view(_S("a")); // Dummy value cos no default constructor.
1.444 + if (!aScanData.currentSegLen)
1.445 + {//First time in, or new segment.
1.446 + if (!aScanData.totalBufLen)
1.447 + {//No data left to read.
1.448 + return EFalse;
1.449 + }
1.450 + if (aScanData.delta < 0)
1.451 + {//Scanning backwards
1.452 + TPtrC8 tempView=iByteStore->BackPtr(aScanData.pos*sizeof(TText));
1.453 + view.Set((TText*)tempView.Ptr(),tempView.Length()/sizeof(TText));
1.454 + }
1.455 + else
1.456 + {//Scanning forwards
1.457 + TPtrC8 tempView=iByteStore->Ptr(aScanData.pos*sizeof(TText));
1.458 + view.Set((TText*)tempView.Ptr(),tempView.Length()/sizeof(TText));
1.459 + }//Ptr now returns a TDes.
1.460 + aScanData.currentSegLen=(TInt)Min(aScanData.totalBufLen,(TInt)view.Length());
1.461 + aScanData.totalBufLen-=aScanData.currentSegLen;
1.462 + aScanData.buf=(TText*)view.Ptr();
1.463 + if (aScanData.delta < 0)
1.464 + aScanData.buf+=aScanData.currentSegLen-1;
1.465 + }
1.466 + aChar=*(aScanData.buf); // aChar takes the character just read.
1.467 + aScanData.buf+=aScanData.delta; // pText increments correctly for both ASCII and Unicode
1.468 + aScanData.pos+=aScanData.delta;
1.469 + aScanData.currentSegLen-=1; // len holds the number of chars left, for ASCII and Unicode
1.470 + return ETrue;
1.471 + }
1.472 +
1.473 +