os/textandloc/textrendering/texthandling/stext/TXTSCAN.CPP
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2 * Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
     3 * All rights reserved.
     4 * This component and the accompanying materials are made available
     5 * under the terms of "Eclipse Public License v1.0"
     6 * which accompanies this distribution, and is available
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
     8 *
     9 * Initial Contributors:
    10 * Nokia Corporation - initial contribution.
    11 *
    12 * Contributors:
    13 *
    14 * Description: 
    15 *
    16 */
    17 
    18 
    19 #include "TXTETEXT.H"
    20 #include "TXTSTD.H"
    21 #include "OstTraceDefinitions.h"
    22 #ifdef OST_TRACE_COMPILER_IN_USE
    23 #include "TXTSCANTraces.h"
    24 #endif
    25 
    26 
    27 
    28 EXPORT_C TInt CPlainText::CharPosOfParagraph(TInt& aLength,TInt aParaOffset)const
    29 /** Finds the length and the start position of a paragraph identified by its 
    30 paragraph number the first paragraph is numbered zero.
    31 
    32 Notes:
    33 
    34 if aParaOffset is invalid, (equal to or greater than the total number of 
    35 paragraphs), the function's return value is EScanEndOfData
    36 
    37 @param aLength On return contains the length of the specified paragraph. 
    38 @param aParaOffset The paragraph number. The first paragraph is numbered zero. 
    39 @return The document position of the first character in the paragraph. */
    40 	{
    41 	__TEST_INVARIANT;
    42 
    43 	TInt startPos=aLength=0;
    44 	TUint scanMask=(EScanToUnitStart);
    45 	aLength=ScanParas(startPos,scanMask);
    46 	for (TInt offset=1;offset<=aParaOffset;offset++)
    47 		{
    48 		if (startPos<=EScanEndOfData)
    49 			return EScanEndOfData;
    50 		aLength=ScanParas(startPos,scanMask);
    51 		}
    52 	if (startPos==EScanEndOfData)
    53 		startPos=iByteStore->Size()/sizeof(TText);
    54 	return startPos-aLength;
    55 	}
    56 
    57 EXPORT_C TInt CPlainText::ParagraphNumberForPos(TInt& aPos)const
    58 /** Gets the number of the paragraph which contains a document position. 
    59 Paragraph numbering begins at zero.
    60 
    61 @param aPos A document position. Must be valid or a panic occurs. On return, 
    62 contains the document position of the first character in the paragraph in 
    63 which it is located. 
    64 @return The number of the paragraph containing the specified document position.
    65 The first paragraph is numbered zero. */
    66 	{
    67 	__TEST_INVARIANT;
    68 
    69 	if (aPos<0 || aPos>DocumentLength())
    70 	    {
    71 	    OstTrace0( TRACE_FATAL, CPLAINTEXT_PARAGRAPHNUMBERFORPOS, "ECharPosBeyondDocument" );
    72 	    }
    73 	__ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument));
    74 	TUint scanMask=(EScanBackwards|EScanStayIfBoundary|EScanToUnitStart);
    75 	ScanParas(aPos,scanMask);
    76 	// aPos now holds the 1st char of the paragraph.
    77 	TInt paraOffset=0,currentPos=0;
    78 	scanMask=EScanToUnitStart;
    79 	while (currentPos<aPos)
    80 		{// Move forwards, counting paragraphs until we get to the current position.
    81 		paraOffset++;
    82 		ScanParas(currentPos,scanMask);
    83 		}
    84 	return paraOffset;
    85 	}
    86 
    87 EXPORT_C void CPlainText::GetWordInfo(TInt aCurrentPos,TInt& aStartPos,TInt& aLength,
    88 									  TBool aPictureIsDelimiter,TBool aPunctuationIsDelimiter)const
    89 /** Gets the start position and length of the word that contains the document 
    90 position specified.
    91 
    92 @param aCurrentPos A document position. Must be valid or a panic occurs. 
    93 @param aStartPos On return, the document position of the first character in 
    94 the word containing the position aCurrentPos.
    95 @param aLength On return, the length of the word containing document position 
    96 aCurrentPos.
    97 @param aPictureIsDelimiter ETrue if picture characters should be treated as 
    98 word delimiters, false if not. For example, when navigating text, this might 
    99 be EFalse, but when spell checking, it might be ETrue. 
   100 @param aPunctuationIsDelimiter ETrue if punctuation characters should be treated 
   101 as word delimiters, EFalse if not. */
   102 	{
   103 	__TEST_INVARIANT;
   104 
   105 	if (aCurrentPos<0 || aCurrentPos>DocumentLength())
   106 	    {
   107 	    OstTrace0( TRACE_FATAL, CPLAINTEXT_GETWORDINFO, "ECharPosBeyondDocument" );
   108 	    }
   109 	__ASSERT_ALWAYS(aCurrentPos>=0 && aCurrentPos<=DocumentLength(),Panic(ECharPosBeyondDocument));
   110 	aStartPos=aLength=aCurrentPos;
   111 	// First find the start of the word.
   112 	TUint scanMask=(EScanBackwards|
   113 					EScanStayIfBoundary|
   114 					EScanToUnitStart);
   115 	if (aPictureIsDelimiter)
   116 		scanMask|=EScanPictureIsDelimiter;
   117 	if (aPunctuationIsDelimiter)
   118 		scanMask|=EScanPunctuationIsDelimiter;
   119 	ScanWords(aStartPos,scanMask);
   120 	// Next find the end of the word.
   121 	scanMask=(EScanStayIfBoundary|EScanToUnitEnd);
   122 	if (aPictureIsDelimiter)
   123 		scanMask|=EScanPictureIsDelimiter;
   124 	if (aPunctuationIsDelimiter)
   125 		scanMask|=EScanPunctuationIsDelimiter;
   126 	ScanWords(aLength,scanMask);
   127 	aLength-=aStartPos;
   128 	if (aLength+aStartPos>(DocumentLength()+1))
   129 	    {
   130 	    OstTrace0( TRACE_DUMP, DUP2_CPLAINTEXT_GETWORDINFO, "ECharPosBeyondDocument" );
   131 	    }
   132 	__ASSERT_DEBUG(aLength+aStartPos<=(DocumentLength()+1),Panic(ECharPosBeyondDocument));
   133 
   134 	__TEST_INVARIANT;
   135 	}
   136 
   137 EXPORT_C TInt CPlainText::ToParagraphStart(TInt& aPos) const
   138 /** Gets the document position of the start of the paragraph containing the 
   139 specified document position.
   140 
   141 @param aPos A document position. Must be valid or a panic occurs. On return, 
   142 contains the document position of the first character in the paragraph in 
   143 which it is located.
   144 @return The number of characters skipped in scanning to the start of the 
   145 paragraph. */
   146 	{
   147 	__TEST_INVARIANT;
   148 	if (aPos<0 || aPos>DocumentLength())
   149 	    {
   150 	    OstTrace0( TRACE_FATAL, CPLAINTEXT_TOPARAGRAPHSTART, "ECharPosBeyondDocument" );
   151 	    }
   152 	__ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument));
   153 
   154 	int skipped = 0;
   155 	while (aPos > 0)
   156 		{
   157 		TPtrC8 p = iByteStore->BackPtr(aPos * sizeof(TText));
   158 		const TText* start = (TText*)p.Ptr();
   159 		const TText* end = start + p.Length() / sizeof(TText);
   160 		while (end > start)
   161 			{
   162 			end--;
   163 			if (*end == EParagraphDelimiter)
   164 				return skipped;
   165 			aPos--;
   166 			skipped++;
   167 			}
   168 		}
   169 	return skipped;
   170 	}
   171 
   172 EXPORT_C TInt CPlainText::WordCount()const
   173 /** Gets a count of the number of words in the text object.
   174 
   175 @return The number of words in the text object. */
   176 	{return CountUnits(EUnitIsWord);}
   177 
   178 EXPORT_C TInt CPlainText::ParagraphCount()const
   179 /** Gets a count of the number of paragraphs in the text object.
   180 
   181 Note: the paragraph delimiter which terminates every text object means this 
   182 function always returns a count of at least one.
   183 
   184 @return The number of paragraphs contained in the text object. */
   185 	{//return CountUnits(EUnitIsParagraph);}
   186 	TInt pos=0,unitCount=0;
   187 	TUint scanMask=(EScanToUnitStart);
   188 	while (pos!=EScanEndOfData)
   189 		{
   190 		unitCount++;
   191 		ScanParas(pos,scanMask);
   192 		}
   193 	return unitCount;
   194 	}
   195 
   196 TInt CPlainText::CountUnits(TUnitOfText aContext)const
   197 // Returns the number of units in the document, where
   198 // units are defined by the parameter aContext.
   199 //
   200 	{
   201 	SScanData scanData;
   202 	TInt pos=0,unitCount=0;
   203 	TUint scanMask=(EScanToUnitStart|EScanJoinDelimiters|EScanStayIfBoundary);
   204 	InitScanControl(pos,scanMask,aContext,scanData);
   205 	ScanUnit(pos,aContext,scanData);
   206 	// The above code skips over leading white space
   207 	scanData.scanMask &=~ EScanStayIfBoundary;
   208 	while(pos!=EScanEndOfData)
   209 		{
   210 		unitCount++;
   211 		ScanUnit(pos,aContext,scanData);
   212 		}//	Count complete, so return pos to the beginning of the data
   213 	pos=0;
   214 	return unitCount;
   215 	}
   216 
   217 
   218 
   219 
   220 EXPORT_C TInt CPlainText::ScanWords(TInt& aPos,TUint& aScanMask)const
   221 /** Scans from a document position to the beginning or end of a word. The 
   222 destination is determined by a scan mask. The scan can either be forwards 
   223 (the default) or backwards, and the destination may be the first or last 
   224 character in the word containing the position, or the first character in 
   225 the next word.
   226 
   227 Note: If an attempt is made to scan beyond the end of text delimiter, on return, 
   228 aPos is set to EScanEndOfData  and the function's return value indicates 
   229 the number of characters skipped in passing the end of text delimiter.
   230 
   231 @param aPos The document position from which to scan. Must be valid, or a 
   232 panic occurs. On return, contains the new document position. 
   233 @param aScanMask The scan mask. See the enumeration whose values begin with 
   234 CPlainText::EScanBackwards. 
   235 @return The number of characters skipped to reach the new document position. */
   236 	{
   237 	if (aPos<0 || aPos>DocumentLength())
   238 	    {
   239 	    OstTrace0( TRACE_FATAL, CPLAINTEXT_SCANWORDS, "ECharPosBeyondDocument" );
   240 	    }
   241 	__ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument));
   242 	SScanData scanData;
   243 	InitScanControl(aPos,aScanMask,EUnitIsWord,scanData);
   244 	return ScanUnit(aPos,EUnitIsWord,scanData);
   245 	}
   246 
   247 EXPORT_C TInt CPlainText::ScanParas(TInt& aPos,TUint& aScanMask)const
   248 /** Scans from a document position to the beginning or end of a paragraph. The 
   249 destination is determined by a scan mask. The scan can either be forwards 
   250 (the default) or backwards, and the destination may be the first or last 
   251 character in the paragraph containing the position, or the first character in 
   252 the next paragraph. 
   253 
   254 Note: if an attempt is made to scan beyond the end of text delimiter, on return, 
   255 aPos is set to EScanEndOfData ) and the function's return value indicates 
   256 the number of characters skipped in passing the end of text delimiter.
   257 
   258 @param aPos The document position to scan from. Must be valid or a panic occurs. 
   259 On return, contains the new document position. 
   260 @param aScanMask The scan mask. See the enumeration whose values begin with 
   261 CPlainText::EScanBackwards. 
   262 @return The number of characters skipped to reach the new document position. */
   263 	{
   264 	if (aPos<0 || aPos>DocumentLength())
   265 	    {
   266 	    OstTrace0( TRACE_FATAL, CPLAINTEXT_SCANPARAS, "ECharPosBeyondDocument" );
   267 	    }
   268 	__ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument));
   269 	SScanData scanData;
   270 	InitScanControl(aPos,aScanMask,EUnitIsParagraph,scanData);
   271 	return ScanUnit(aPos,EUnitIsParagraph,scanData);
   272 	}
   273 
   274 
   275 void CPlainText::InitScanControl(TInt& aPos,TUint& aScanMask,TUnitOfText aContext,SScanData& aScanData)const
   276 // Initialises the control segment of the bitmask
   277 // used to determine when scan has completed.
   278 // Calls the initialisation of aScanData.
   279 //
   280 	{
   281 	aScanData.pos=aPos;
   282 	aScanData.scanMask=0;
   283 	aScanData.scanMask|=aScanMask;
   284 	if (aScanData.scanMask & EScanStayIfBoundary)
   285 		{//Scan one unit the other way first.
   286 			aScanData.scanMask ^= EScanBackwards;  // Alter scan direction.
   287 			InitScanData(aScanData.pos,aScanData);				
   288 			TestForDelimiter(aContext,aScanData);
   289 			aScanData.scanMask ^= EScanBackwards;  // Restore original scan direction.
   290 		}
   291 	InitScanData(aScanData.pos,aScanData);
   292 	if(!aPos && (aScanData.scanMask&(EScanStayIfBoundary|EScanBackwards))==EScanStayIfBoundary)
   293 		{//At the start of the buffer and scanning forwards and ScanStayIfBoundary set.
   294 		aScanData.scanMask &=~aScanData.EInsideUnit;
   295 		}
   296 	else 
   297 		{
   298 		TestForDelimiter(aContext,aScanData);
   299 		if (aScanData.scanMask & aScanData.EIsDelimiter)
   300 			aScanData.scanMask &=~ aScanData.EInsideUnit;
   301 		else
   302  			aScanData.scanMask |= aScanData.EInsideUnit;
   303 		}
   304 	if (aScanData.scanMask & EScanBackwards)
   305 		{//Set stop flags relative to scan direction
   306 		if (aScanData.scanMask &  EScanToUnitStart)
   307 			aScanData.scanMask |= aScanData.EStopEnd;
   308 		if (aScanData.scanMask &  EScanToUnitEnd)
   309 			aScanData.scanMask |= aScanData.EStopBegin;
   310 		}
   311 	else
   312 		{//Set stop flags relative to scan direction
   313 		if (aScanData.scanMask &  EScanToUnitStart)
   314 			aScanData.scanMask |= aScanData.EStopBegin;
   315 		if (aScanData.scanMask &  EScanToUnitEnd)
   316 			aScanData.scanMask |= aScanData.EStopEnd;
   317 		}
   318 	}
   319 
   320 
   321 void CPlainText::InitScanData(TInt aPos,SScanData& aScanData)const
   322 // Initialises the necessary elements of the scan structure, for ScanXxx 
   323 // methods.  These are used to track scan progress.
   324 //
   325 	{
   326 	aScanData.pos=aPos;
   327 	aScanData.oldPos=aPos;
   328 	aScanData.currentSegLen=0;
   329 	if (aScanData.scanMask & EScanBackwards)
   330 		{//Scanning backwards.
   331 		aScanData.delta=-1;
   332 		aScanData.totalBufLen=aPos;
   333 		}
   334 	else
   335 		{//Scanning forwards.
   336 		aScanData.delta=+1;
   337 		aScanData.totalBufLen=((iByteStore->Size()/sizeof(TText))-aScanData.pos);
   338 		}
   339 	}	
   340 
   341 
   342 TInt CPlainText::ScanUnit(TInt& aPos,TUnitOfText aContext,SScanData& aScanData)const
   343 // Scan from position aPos, by one unit.
   344 // Update aPos to the current position, and return the number
   345 // of characters skipped or EScanEndOfData if at the end of the buffer.
   346 //
   347 	{
   348 	while (TestForDelimiter(aContext,aScanData))
   349 		{//More data has been read
   350 		if (aScanData.scanMask & aScanData.EInsideUnit)
   351 			{
   352 			if (aScanData.scanMask & aScanData.EIsDelimiter)
   353 				{
   354 				aScanData.scanMask &=~aScanData.EInsideUnit;
   355 				if (aScanData.scanMask & aScanData.EStopEnd)
   356 					break;
   357 				}
   358 			 }
   359 		else
   360 			{
   361 			if (aScanData.scanMask & aScanData.EIsDelimiter)
   362 				{
   363 				if (!(aScanData.scanMask & EScanJoinDelimiters))
   364 					break;
   365 				}
   366 	   		else
   367 				{
   368 				aScanData.scanMask |= aScanData.EInsideUnit;
   369 				if (aScanData.scanMask & aScanData.EStopBegin)
   370 					break;
   371 				}
   372 			}
   373 		}
   374 	TInt charsSkipped=(aScanData.scanMask & EScanBackwards)? aPos-aScanData.oldPos : 
   375 										aScanData.oldPos-aPos;
   376 	aPos=(aScanData.oldPos<(TInt)(iByteStore->Size()/sizeof(TText)))? aScanData.oldPos : EScanEndOfData;
   377 	return charsSkipped;
   378 	}
   379 
   380 
   381 TBool CPlainText::TestForDelimiter(TUnitOfText aContext,SScanData& aScanData) const
   382 /** Return ETrue if a character was read successfully.
   383 Set EIsDelimiter flag if the character read
   384 is a delimiter of the appropriate type.*/	
   385     {
   386 	aScanData.scanMask |= aScanData.EIsDelimiter;
   387 	aScanData.oldPos = aScanData.pos;
   388 	TChar c;
   389 	if (!GetChar(aScanData,c))
   390 		return FALSE;
   391 
   392 	TBool found = FALSE;
   393 	if (aContext == EUnitIsParagraph)
   394 		found = c == EParagraphDelimiter;
   395 	else if (aContext == EUnitIsWord)
   396 		{
   397 		if (c == EParagraphDelimiter ||
   398 			c == ELineBreak ||
   399 			c == EPageBreak ||
   400 			c == ESpace ||
   401 			c == ETabCharacter)
   402 			found = TRUE;
   403 		if (!found && (aScanData.scanMask & EScanPictureIsDelimiter))
   404 			{
   405 			if (c == EPictureCharacter)
   406 				found = TRUE;
   407 			}
   408 		if (!found)
   409 			{
   410 			TChar::TCategory cat = c.GetCategory();
   411 			if (cat == TChar::EZsCategory) // it's a space
   412 				found = TRUE;
   413 			if (!found &&
   414 				(aScanData.scanMask & EScanPunctuationIsDelimiter) &&
   415 				(cat & TChar::EPunctuationGroup))
   416 				{
   417 				if (c != EHyphenMinus &&
   418 					c != EApostrophe &&
   419 					c != EPotentialHyphen &&
   420 					c != EHyphen && 
   421 					c != ERightSingleQuote)
   422 					found = TRUE;
   423 				}
   424 			}
   425 		}
   426 
   427 	if (found)
   428 		aScanData.scanMask |= aScanData.EIsDelimiter;
   429 	else
   430 		aScanData.scanMask &= ~aScanData.EIsDelimiter;
   431 	return TRUE;
   432 	}
   433 
   434 
   435 TBool CPlainText::GetChar(SScanData& aScanData,TChar& aChar)const
   436 // If there is one, write the next character to aChar, and return ETrue,
   437 // otherwise return EFalse.
   438 //
   439 	{
   440 	TPtrC view(_S("a"));  // Dummy value cos no default constructor.
   441 	if (!aScanData.currentSegLen)	
   442 		{//First time in, or new segment.
   443 		if (!aScanData.totalBufLen)
   444 			{//No data left to read.
   445 			return EFalse;	
   446 			}
   447 		if (aScanData.delta < 0)
   448 			{//Scanning backwards
   449 			TPtrC8 tempView=iByteStore->BackPtr(aScanData.pos*sizeof(TText));
   450 			view.Set((TText*)tempView.Ptr(),tempView.Length()/sizeof(TText));
   451 			}
   452 		else
   453 			{//Scanning forwards
   454 			TPtrC8 tempView=iByteStore->Ptr(aScanData.pos*sizeof(TText));
   455 			view.Set((TText*)tempView.Ptr(),tempView.Length()/sizeof(TText));
   456 			}//Ptr now returns a TDes.
   457 		aScanData.currentSegLen=(TInt)Min(aScanData.totalBufLen,(TInt)view.Length());
   458 		aScanData.totalBufLen-=aScanData.currentSegLen;
   459 		aScanData.buf=(TText*)view.Ptr();
   460 		if (aScanData.delta < 0)
   461 			aScanData.buf+=aScanData.currentSegLen-1;
   462 		}
   463 	aChar=*(aScanData.buf);  // aChar takes the character just read.
   464 	aScanData.buf+=aScanData.delta;  // pText increments correctly for both ASCII and Unicode
   465 	aScanData.pos+=aScanData.delta;
   466 	aScanData.currentSegLen-=1;  // len holds the number of chars left, for ASCII and Unicode
   467 	return ETrue;
   468 	}
   469 
   470