sl@0
|
1 |
/*
|
sl@0
|
2 |
********************************************************************
|
sl@0
|
3 |
*
|
sl@0
|
4 |
* Copyright (C) 1997-2005, International Business Machines
|
sl@0
|
5 |
* Corporation and others. All Rights Reserved.
|
sl@0
|
6 |
*
|
sl@0
|
7 |
********************************************************************
|
sl@0
|
8 |
*/
|
sl@0
|
9 |
|
sl@0
|
10 |
#ifndef CHARITER_H
|
sl@0
|
11 |
#define CHARITER_H
|
sl@0
|
12 |
|
sl@0
|
13 |
#include "unicode/utypes.h"
|
sl@0
|
14 |
#include "unicode/uobject.h"
|
sl@0
|
15 |
#include "unicode/unistr.h"
|
sl@0
|
16 |
/**
|
sl@0
|
17 |
* \file
|
sl@0
|
18 |
* \brief C++ API: Character Iterator
|
sl@0
|
19 |
*/
|
sl@0
|
20 |
|
sl@0
|
21 |
U_NAMESPACE_BEGIN
|
sl@0
|
22 |
/**
|
sl@0
|
23 |
* Abstract class that defines an API for forward-only iteration
|
sl@0
|
24 |
* on text objects.
|
sl@0
|
25 |
* This is a minimal interface for iteration without random access
|
sl@0
|
26 |
* or backwards iteration. It is especially useful for wrapping
|
sl@0
|
27 |
* streams with converters into an object for collation or
|
sl@0
|
28 |
* normalization.
|
sl@0
|
29 |
*
|
sl@0
|
30 |
* <p>Characters can be accessed in two ways: as code units or as
|
sl@0
|
31 |
* code points.
|
sl@0
|
32 |
* Unicode code points are 21-bit integers and are the scalar values
|
sl@0
|
33 |
* of Unicode characters. ICU uses the type UChar32 for them.
|
sl@0
|
34 |
* Unicode code units are the storage units of a given
|
sl@0
|
35 |
* Unicode/UCS Transformation Format (a character encoding scheme).
|
sl@0
|
36 |
* With UTF-16, all code points can be represented with either one
|
sl@0
|
37 |
* or two code units ("surrogates").
|
sl@0
|
38 |
* String storage is typically based on code units, while properties
|
sl@0
|
39 |
* of characters are typically determined using code point values.
|
sl@0
|
40 |
* Some processes may be designed to work with sequences of code units,
|
sl@0
|
41 |
* or it may be known that all characters that are important to an
|
sl@0
|
42 |
* algorithm can be represented with single code units.
|
sl@0
|
43 |
* Other processes will need to use the code point access functions.</p>
|
sl@0
|
44 |
*
|
sl@0
|
45 |
* <p>ForwardCharacterIterator provides nextPostInc() to access
|
sl@0
|
46 |
* a code unit and advance an internal position into the text object,
|
sl@0
|
47 |
* similar to a <code>return text[position++]</code>.<br>
|
sl@0
|
48 |
* It provides next32PostInc() to access a code point and advance an internal
|
sl@0
|
49 |
* position.</p>
|
sl@0
|
50 |
*
|
sl@0
|
51 |
* <p>next32PostInc() assumes that the current position is that of
|
sl@0
|
52 |
* the beginning of a code point, i.e., of its first code unit.
|
sl@0
|
53 |
* After next32PostInc(), this will be true again.
|
sl@0
|
54 |
* In general, access to code units and code points in the same
|
sl@0
|
55 |
* iteration loop should not be mixed. In UTF-16, if the current position
|
sl@0
|
56 |
* is on a second code unit (Low Surrogate), then only that code unit
|
sl@0
|
57 |
* is returned even by next32PostInc().</p>
|
sl@0
|
58 |
*
|
sl@0
|
59 |
* <p>For iteration with either function, there are two ways to
|
sl@0
|
60 |
* check for the end of the iteration. When there are no more
|
sl@0
|
61 |
* characters in the text object:
|
sl@0
|
62 |
* <ul>
|
sl@0
|
63 |
* <li>The hasNext() function returns FALSE.</li>
|
sl@0
|
64 |
* <li>nextPostInc() and next32PostInc() return DONE
|
sl@0
|
65 |
* when one attempts to read beyond the end of the text object.</li>
|
sl@0
|
66 |
* </ul>
|
sl@0
|
67 |
*
|
sl@0
|
68 |
* Example:
|
sl@0
|
69 |
* \code
|
sl@0
|
70 |
* void function1(ForwardCharacterIterator &it) {
|
sl@0
|
71 |
* UChar32 c;
|
sl@0
|
72 |
* while(it.hasNext()) {
|
sl@0
|
73 |
* c=it.next32PostInc();
|
sl@0
|
74 |
* // use c
|
sl@0
|
75 |
* }
|
sl@0
|
76 |
* }
|
sl@0
|
77 |
*
|
sl@0
|
78 |
* void function1(ForwardCharacterIterator &it) {
|
sl@0
|
79 |
* UChar c;
|
sl@0
|
80 |
* while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
|
sl@0
|
81 |
* // use c
|
sl@0
|
82 |
* }
|
sl@0
|
83 |
* }
|
sl@0
|
84 |
* \endcode
|
sl@0
|
85 |
* </p>
|
sl@0
|
86 |
*
|
sl@0
|
87 |
* @stable ICU 2.0
|
sl@0
|
88 |
*/
|
sl@0
|
89 |
class U_COMMON_API ForwardCharacterIterator : public UObject {
|
sl@0
|
90 |
public:
|
sl@0
|
91 |
/**
|
sl@0
|
92 |
* Value returned by most of ForwardCharacterIterator's functions
|
sl@0
|
93 |
* when the iterator has reached the limits of its iteration.
|
sl@0
|
94 |
* @stable ICU 2.0
|
sl@0
|
95 |
*/
|
sl@0
|
96 |
enum { DONE = 0xffff };
|
sl@0
|
97 |
|
sl@0
|
98 |
/**
|
sl@0
|
99 |
* Destructor.
|
sl@0
|
100 |
* @stable ICU 2.0
|
sl@0
|
101 |
*/
|
sl@0
|
102 |
virtual ~ForwardCharacterIterator();
|
sl@0
|
103 |
|
sl@0
|
104 |
/**
|
sl@0
|
105 |
* Returns true when both iterators refer to the same
|
sl@0
|
106 |
* character in the same character-storage object.
|
sl@0
|
107 |
* @param that The ForwardCharacterIterator to be compared for equality
|
sl@0
|
108 |
* @return true when both iterators refer to the same
|
sl@0
|
109 |
* character in the same character-storage object
|
sl@0
|
110 |
* @stable ICU 2.0
|
sl@0
|
111 |
*/
|
sl@0
|
112 |
virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
|
sl@0
|
113 |
|
sl@0
|
114 |
/**
|
sl@0
|
115 |
* Returns true when the iterators refer to different
|
sl@0
|
116 |
* text-storage objects, or to different characters in the
|
sl@0
|
117 |
* same text-storage object.
|
sl@0
|
118 |
* @param that The ForwardCharacterIterator to be compared for inequality
|
sl@0
|
119 |
* @return true when the iterators refer to different
|
sl@0
|
120 |
* text-storage objects, or to different characters in the
|
sl@0
|
121 |
* same text-storage object
|
sl@0
|
122 |
* @stable ICU 2.0
|
sl@0
|
123 |
*/
|
sl@0
|
124 |
inline UBool operator!=(const ForwardCharacterIterator& that) const;
|
sl@0
|
125 |
|
sl@0
|
126 |
/**
|
sl@0
|
127 |
* Generates a hash code for this iterator.
|
sl@0
|
128 |
* @return the hash code.
|
sl@0
|
129 |
* @stable ICU 2.0
|
sl@0
|
130 |
*/
|
sl@0
|
131 |
virtual int32_t hashCode(void) const = 0;
|
sl@0
|
132 |
|
sl@0
|
133 |
/**
|
sl@0
|
134 |
* Returns a UClassID for this ForwardCharacterIterator ("poor man's
|
sl@0
|
135 |
* RTTI").<P> Despite the fact that this function is public,
|
sl@0
|
136 |
* DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API!
|
sl@0
|
137 |
* @return a UClassID for this ForwardCharacterIterator
|
sl@0
|
138 |
* @stable ICU 2.0
|
sl@0
|
139 |
*/
|
sl@0
|
140 |
virtual UClassID getDynamicClassID(void) const = 0;
|
sl@0
|
141 |
|
sl@0
|
142 |
/**
|
sl@0
|
143 |
* Gets the current code unit for returning and advances to the next code unit
|
sl@0
|
144 |
* in the iteration range
|
sl@0
|
145 |
* (toward endIndex()). If there are
|
sl@0
|
146 |
* no more code units to return, returns DONE.
|
sl@0
|
147 |
* @return the current code unit.
|
sl@0
|
148 |
* @stable ICU 2.0
|
sl@0
|
149 |
*/
|
sl@0
|
150 |
virtual UChar nextPostInc(void) = 0;
|
sl@0
|
151 |
|
sl@0
|
152 |
/**
|
sl@0
|
153 |
* Gets the current code point for returning and advances to the next code point
|
sl@0
|
154 |
* in the iteration range
|
sl@0
|
155 |
* (toward endIndex()). If there are
|
sl@0
|
156 |
* no more code points to return, returns DONE.
|
sl@0
|
157 |
* @return the current code point.
|
sl@0
|
158 |
* @stable ICU 2.0
|
sl@0
|
159 |
*/
|
sl@0
|
160 |
virtual UChar32 next32PostInc(void) = 0;
|
sl@0
|
161 |
|
sl@0
|
162 |
/**
|
sl@0
|
163 |
* Returns FALSE if there are no more code units or code points
|
sl@0
|
164 |
* at or after the current position in the iteration range.
|
sl@0
|
165 |
* This is used with nextPostInc() or next32PostInc() in forward
|
sl@0
|
166 |
* iteration.
|
sl@0
|
167 |
* @returns FALSE if there are no more code units or code points
|
sl@0
|
168 |
* at or after the current position in the iteration range.
|
sl@0
|
169 |
* @stable ICU 2.0
|
sl@0
|
170 |
*/
|
sl@0
|
171 |
virtual UBool hasNext() = 0;
|
sl@0
|
172 |
|
sl@0
|
173 |
protected:
|
sl@0
|
174 |
/** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
|
sl@0
|
175 |
ForwardCharacterIterator();
|
sl@0
|
176 |
|
sl@0
|
177 |
/** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
|
sl@0
|
178 |
ForwardCharacterIterator(const ForwardCharacterIterator &other);
|
sl@0
|
179 |
|
sl@0
|
180 |
/**
|
sl@0
|
181 |
* Assignment operator to be overridden in the implementing class.
|
sl@0
|
182 |
* @stable ICU 2.0
|
sl@0
|
183 |
*/
|
sl@0
|
184 |
ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
|
sl@0
|
185 |
};
|
sl@0
|
186 |
|
sl@0
|
187 |
/**
|
sl@0
|
188 |
* Abstract class that defines an API for iteration
|
sl@0
|
189 |
* on text objects.
|
sl@0
|
190 |
* This is an interface for forward and backward iteration
|
sl@0
|
191 |
* and random access into a text object.
|
sl@0
|
192 |
*
|
sl@0
|
193 |
* <p>The API provides backward compatibility to the Java and older ICU
|
sl@0
|
194 |
* CharacterIterator classes but extends them significantly:
|
sl@0
|
195 |
* <ol>
|
sl@0
|
196 |
* <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
|
sl@0
|
197 |
* <li>While the old API functions provided forward iteration with
|
sl@0
|
198 |
* "pre-increment" semantics, the new one also provides functions
|
sl@0
|
199 |
* with "post-increment" semantics. They are more efficient and should
|
sl@0
|
200 |
* be the preferred iterator functions for new implementations.
|
sl@0
|
201 |
* The backward iteration always had "pre-decrement" semantics, which
|
sl@0
|
202 |
* are efficient.</li>
|
sl@0
|
203 |
* <li>Just like ForwardCharacterIterator, it provides access to
|
sl@0
|
204 |
* both code units and code points. Code point access versions are available
|
sl@0
|
205 |
* for the old and the new iteration semantics.</li>
|
sl@0
|
206 |
* <li>There are new functions for setting and moving the current position
|
sl@0
|
207 |
* without returning a character, for efficiency.</li>
|
sl@0
|
208 |
* </ol>
|
sl@0
|
209 |
*
|
sl@0
|
210 |
* See ForwardCharacterIterator for examples for using the new forward iteration
|
sl@0
|
211 |
* functions. For backward iteration, there is also a hasPrevious() function
|
sl@0
|
212 |
* that can be used analogously to hasNext().
|
sl@0
|
213 |
* The old functions work as before and are shown below.</p>
|
sl@0
|
214 |
*
|
sl@0
|
215 |
* <p>Examples for some of the new functions:</p>
|
sl@0
|
216 |
*
|
sl@0
|
217 |
* Forward iteration with hasNext():
|
sl@0
|
218 |
* \code
|
sl@0
|
219 |
* void forward1(CharacterIterator &it) {
|
sl@0
|
220 |
* UChar32 c;
|
sl@0
|
221 |
* for(it.setToStart(); it.hasNext();) {
|
sl@0
|
222 |
* c=it.next32PostInc();
|
sl@0
|
223 |
* // use c
|
sl@0
|
224 |
* }
|
sl@0
|
225 |
* }
|
sl@0
|
226 |
* \endcode
|
sl@0
|
227 |
* Forward iteration more similar to loops with the old forward iteration,
|
sl@0
|
228 |
* showing a way to convert simple for() loops:
|
sl@0
|
229 |
* \code
|
sl@0
|
230 |
* void forward2(CharacterIterator &it) {
|
sl@0
|
231 |
* UChar c;
|
sl@0
|
232 |
* for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
|
sl@0
|
233 |
* // use c
|
sl@0
|
234 |
* }
|
sl@0
|
235 |
* }
|
sl@0
|
236 |
* \endcode
|
sl@0
|
237 |
* Backward iteration with setToEnd() and hasPrevious():
|
sl@0
|
238 |
* \code
|
sl@0
|
239 |
* void backward1(CharacterIterator &it) {
|
sl@0
|
240 |
* UChar32 c;
|
sl@0
|
241 |
* for(it.setToEnd(); it.hasPrevious();) {
|
sl@0
|
242 |
* c=it.previous32();
|
sl@0
|
243 |
* // use c
|
sl@0
|
244 |
* }
|
sl@0
|
245 |
* }
|
sl@0
|
246 |
* \endcode
|
sl@0
|
247 |
* Backward iteration with a more traditional for() loop:
|
sl@0
|
248 |
* \code
|
sl@0
|
249 |
* void backward2(CharacterIterator &it) {
|
sl@0
|
250 |
* UChar c;
|
sl@0
|
251 |
* for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
|
sl@0
|
252 |
* // use c
|
sl@0
|
253 |
* }
|
sl@0
|
254 |
* }
|
sl@0
|
255 |
* \endcode
|
sl@0
|
256 |
*
|
sl@0
|
257 |
* Example for random access:
|
sl@0
|
258 |
* \code
|
sl@0
|
259 |
* void random(CharacterIterator &it) {
|
sl@0
|
260 |
* // set to the third code point from the beginning
|
sl@0
|
261 |
* it.move32(3, CharacterIterator::kStart);
|
sl@0
|
262 |
* // get a code point from here without moving the position
|
sl@0
|
263 |
* UChar32 c=it.current32();
|
sl@0
|
264 |
* // get the position
|
sl@0
|
265 |
* int32_t pos=it.getIndex();
|
sl@0
|
266 |
* // get the previous code unit
|
sl@0
|
267 |
* UChar u=it.previous();
|
sl@0
|
268 |
* // move back one more code unit
|
sl@0
|
269 |
* it.move(-1, CharacterIterator::kCurrent);
|
sl@0
|
270 |
* // set the position back to where it was
|
sl@0
|
271 |
* // and read the same code point c and move beyond it
|
sl@0
|
272 |
* it.setIndex(pos);
|
sl@0
|
273 |
* if(c!=it.next32PostInc()) {
|
sl@0
|
274 |
* exit(1); // CharacterIterator inconsistent
|
sl@0
|
275 |
* }
|
sl@0
|
276 |
* }
|
sl@0
|
277 |
* \endcode
|
sl@0
|
278 |
*
|
sl@0
|
279 |
* <p>Examples, especially for the old API:</p>
|
sl@0
|
280 |
*
|
sl@0
|
281 |
* Function processing characters, in this example simple output
|
sl@0
|
282 |
* <pre>
|
sl@0
|
283 |
* \code
|
sl@0
|
284 |
* void processChar( UChar c )
|
sl@0
|
285 |
* {
|
sl@0
|
286 |
* cout << " " << c;
|
sl@0
|
287 |
* }
|
sl@0
|
288 |
* \endcode
|
sl@0
|
289 |
* </pre>
|
sl@0
|
290 |
* Traverse the text from start to finish
|
sl@0
|
291 |
* <pre>
|
sl@0
|
292 |
* \code
|
sl@0
|
293 |
* void traverseForward(CharacterIterator& iter)
|
sl@0
|
294 |
* {
|
sl@0
|
295 |
* for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
|
sl@0
|
296 |
* processChar(c);
|
sl@0
|
297 |
* }
|
sl@0
|
298 |
* }
|
sl@0
|
299 |
* \endcode
|
sl@0
|
300 |
* </pre>
|
sl@0
|
301 |
* Traverse the text backwards, from end to start
|
sl@0
|
302 |
* <pre>
|
sl@0
|
303 |
* \code
|
sl@0
|
304 |
* void traverseBackward(CharacterIterator& iter)
|
sl@0
|
305 |
* {
|
sl@0
|
306 |
* for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
|
sl@0
|
307 |
* processChar(c);
|
sl@0
|
308 |
* }
|
sl@0
|
309 |
* }
|
sl@0
|
310 |
* \endcode
|
sl@0
|
311 |
* </pre>
|
sl@0
|
312 |
* Traverse both forward and backward from a given position in the text.
|
sl@0
|
313 |
* Calls to notBoundary() in this example represents some additional stopping criteria.
|
sl@0
|
314 |
* <pre>
|
sl@0
|
315 |
* \code
|
sl@0
|
316 |
* void traverseOut(CharacterIterator& iter, int32_t pos)
|
sl@0
|
317 |
* {
|
sl@0
|
318 |
* UChar c;
|
sl@0
|
319 |
* for (c = iter.setIndex(pos);
|
sl@0
|
320 |
* c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
sl@0
|
321 |
* c = iter.next()) {}
|
sl@0
|
322 |
* int32_t end = iter.getIndex();
|
sl@0
|
323 |
* for (c = iter.setIndex(pos);
|
sl@0
|
324 |
* c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
sl@0
|
325 |
* c = iter.previous()) {}
|
sl@0
|
326 |
* int32_t start = iter.getIndex() + 1;
|
sl@0
|
327 |
*
|
sl@0
|
328 |
* cout << "start: " << start << " end: " << end << endl;
|
sl@0
|
329 |
* for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
|
sl@0
|
330 |
* processChar(c);
|
sl@0
|
331 |
* }
|
sl@0
|
332 |
* }
|
sl@0
|
333 |
* \endcode
|
sl@0
|
334 |
* </pre>
|
sl@0
|
335 |
* Creating a StringCharacterIterator and calling the test functions
|
sl@0
|
336 |
* <pre>
|
sl@0
|
337 |
* \code
|
sl@0
|
338 |
* void CharacterIterator_Example( void )
|
sl@0
|
339 |
* {
|
sl@0
|
340 |
* cout << endl << "===== CharacterIterator_Example: =====" << endl;
|
sl@0
|
341 |
* UnicodeString text("Ein kleiner Satz.");
|
sl@0
|
342 |
* StringCharacterIterator iterator(text);
|
sl@0
|
343 |
* cout << "----- traverseForward: -----------" << endl;
|
sl@0
|
344 |
* traverseForward( iterator );
|
sl@0
|
345 |
* cout << endl << endl << "----- traverseBackward: ----------" << endl;
|
sl@0
|
346 |
* traverseBackward( iterator );
|
sl@0
|
347 |
* cout << endl << endl << "----- traverseOut: ---------------" << endl;
|
sl@0
|
348 |
* traverseOut( iterator, 7 );
|
sl@0
|
349 |
* cout << endl << endl << "-----" << endl;
|
sl@0
|
350 |
* }
|
sl@0
|
351 |
* \endcode
|
sl@0
|
352 |
* </pre>
|
sl@0
|
353 |
*
|
sl@0
|
354 |
* @stable ICU 2.0
|
sl@0
|
355 |
*/
|
sl@0
|
356 |
class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
|
sl@0
|
357 |
public:
|
sl@0
|
358 |
/**
|
sl@0
|
359 |
* Origin enumeration for the move() and move32() functions.
|
sl@0
|
360 |
* @stable ICU 2.0
|
sl@0
|
361 |
*/
|
sl@0
|
362 |
enum EOrigin { kStart, kCurrent, kEnd };
|
sl@0
|
363 |
|
sl@0
|
364 |
/**
|
sl@0
|
365 |
* Returns a pointer to a new CharacterIterator of the same
|
sl@0
|
366 |
* concrete class as this one, and referring to the same
|
sl@0
|
367 |
* character in the same text-storage object as this one. The
|
sl@0
|
368 |
* caller is responsible for deleting the new clone.
|
sl@0
|
369 |
* @return a pointer to a new CharacterIterator
|
sl@0
|
370 |
* @stable ICU 2.0
|
sl@0
|
371 |
*/
|
sl@0
|
372 |
virtual CharacterIterator* clone(void) const = 0;
|
sl@0
|
373 |
|
sl@0
|
374 |
/**
|
sl@0
|
375 |
* Sets the iterator to refer to the first code unit in its
|
sl@0
|
376 |
* iteration range, and returns that code unit.
|
sl@0
|
377 |
* This can be used to begin an iteration with next().
|
sl@0
|
378 |
* @return the first code unit in its iteration range.
|
sl@0
|
379 |
* @stable ICU 2.0
|
sl@0
|
380 |
*/
|
sl@0
|
381 |
virtual UChar first(void) = 0;
|
sl@0
|
382 |
|
sl@0
|
383 |
/**
|
sl@0
|
384 |
* Sets the iterator to refer to the first code unit in its
|
sl@0
|
385 |
* iteration range, returns that code unit, and moves the position
|
sl@0
|
386 |
* to the second code unit. This is an alternative to setToStart()
|
sl@0
|
387 |
* for forward iteration with nextPostInc().
|
sl@0
|
388 |
* @return the first code unit in its iteration range.
|
sl@0
|
389 |
* @stable ICU 2.0
|
sl@0
|
390 |
*/
|
sl@0
|
391 |
virtual UChar firstPostInc(void);
|
sl@0
|
392 |
|
sl@0
|
393 |
/**
|
sl@0
|
394 |
* Sets the iterator to refer to the first code point in its
|
sl@0
|
395 |
* iteration range, and returns that code unit,
|
sl@0
|
396 |
* This can be used to begin an iteration with next32().
|
sl@0
|
397 |
* Note that an iteration with next32PostInc(), beginning with,
|
sl@0
|
398 |
* e.g., setToStart() or firstPostInc(), is more efficient.
|
sl@0
|
399 |
* @return the first code point in its iteration range.
|
sl@0
|
400 |
* @stable ICU 2.0
|
sl@0
|
401 |
*/
|
sl@0
|
402 |
virtual UChar32 first32(void) = 0;
|
sl@0
|
403 |
|
sl@0
|
404 |
/**
|
sl@0
|
405 |
* Sets the iterator to refer to the first code point in its
|
sl@0
|
406 |
* iteration range, returns that code point, and moves the position
|
sl@0
|
407 |
* to the second code point. This is an alternative to setToStart()
|
sl@0
|
408 |
* for forward iteration with next32PostInc().
|
sl@0
|
409 |
* @return the first code point in its iteration range.
|
sl@0
|
410 |
* @stable ICU 2.0
|
sl@0
|
411 |
*/
|
sl@0
|
412 |
virtual UChar32 first32PostInc(void);
|
sl@0
|
413 |
|
sl@0
|
414 |
/**
|
sl@0
|
415 |
* Sets the iterator to refer to the first code unit or code point in its
|
sl@0
|
416 |
* iteration range. This can be used to begin a forward
|
sl@0
|
417 |
* iteration with nextPostInc() or next32PostInc().
|
sl@0
|
418 |
* @return the start position of the iteration range
|
sl@0
|
419 |
* @stable ICU 2.0
|
sl@0
|
420 |
*/
|
sl@0
|
421 |
inline int32_t setToStart();
|
sl@0
|
422 |
|
sl@0
|
423 |
/**
|
sl@0
|
424 |
* Sets the iterator to refer to the last code unit in its
|
sl@0
|
425 |
* iteration range, and returns that code unit.
|
sl@0
|
426 |
* This can be used to begin an iteration with previous().
|
sl@0
|
427 |
* @return the last code unit.
|
sl@0
|
428 |
* @stable ICU 2.0
|
sl@0
|
429 |
*/
|
sl@0
|
430 |
virtual UChar last(void) = 0;
|
sl@0
|
431 |
|
sl@0
|
432 |
/**
|
sl@0
|
433 |
* Sets the iterator to refer to the last code point in its
|
sl@0
|
434 |
* iteration range, and returns that code unit.
|
sl@0
|
435 |
* This can be used to begin an iteration with previous32().
|
sl@0
|
436 |
* @return the last code point.
|
sl@0
|
437 |
* @stable ICU 2.0
|
sl@0
|
438 |
*/
|
sl@0
|
439 |
virtual UChar32 last32(void) = 0;
|
sl@0
|
440 |
|
sl@0
|
441 |
/**
|
sl@0
|
442 |
* Sets the iterator to the end of its iteration range, just behind
|
sl@0
|
443 |
* the last code unit or code point. This can be used to begin a backward
|
sl@0
|
444 |
* iteration with previous() or previous32().
|
sl@0
|
445 |
* @return the end position of the iteration range
|
sl@0
|
446 |
* @stable ICU 2.0
|
sl@0
|
447 |
*/
|
sl@0
|
448 |
inline int32_t setToEnd();
|
sl@0
|
449 |
|
sl@0
|
450 |
/**
|
sl@0
|
451 |
* Sets the iterator to refer to the "position"-th code unit
|
sl@0
|
452 |
* in the text-storage object the iterator refers to, and
|
sl@0
|
453 |
* returns that code unit.
|
sl@0
|
454 |
* @param position the "position"-th code unit in the text-storage object
|
sl@0
|
455 |
* @return the "position"-th code unit.
|
sl@0
|
456 |
* @stable ICU 2.0
|
sl@0
|
457 |
*/
|
sl@0
|
458 |
virtual UChar setIndex(int32_t position) = 0;
|
sl@0
|
459 |
|
sl@0
|
460 |
/**
|
sl@0
|
461 |
* Sets the iterator to refer to the beginning of the code point
|
sl@0
|
462 |
* that contains the "position"-th code unit
|
sl@0
|
463 |
* in the text-storage object the iterator refers to, and
|
sl@0
|
464 |
* returns that code point.
|
sl@0
|
465 |
* The current position is adjusted to the beginning of the code point
|
sl@0
|
466 |
* (its first code unit).
|
sl@0
|
467 |
* @param position the "position"-th code unit in the text-storage object
|
sl@0
|
468 |
* @return the "position"-th code point.
|
sl@0
|
469 |
* @stable ICU 2.0
|
sl@0
|
470 |
*/
|
sl@0
|
471 |
virtual UChar32 setIndex32(int32_t position) = 0;
|
sl@0
|
472 |
|
sl@0
|
473 |
/**
|
sl@0
|
474 |
* Returns the code unit the iterator currently refers to.
|
sl@0
|
475 |
* @return the current code unit.
|
sl@0
|
476 |
* @stable ICU 2.0
|
sl@0
|
477 |
*/
|
sl@0
|
478 |
virtual UChar current(void) const = 0;
|
sl@0
|
479 |
|
sl@0
|
480 |
/**
|
sl@0
|
481 |
* Returns the code point the iterator currently refers to.
|
sl@0
|
482 |
* @return the current code point.
|
sl@0
|
483 |
* @stable ICU 2.0
|
sl@0
|
484 |
*/
|
sl@0
|
485 |
virtual UChar32 current32(void) const = 0;
|
sl@0
|
486 |
|
sl@0
|
487 |
/**
|
sl@0
|
488 |
* Advances to the next code unit in the iteration range
|
sl@0
|
489 |
* (toward endIndex()), and returns that code unit. If there are
|
sl@0
|
490 |
* no more code units to return, returns DONE.
|
sl@0
|
491 |
* @return the next code unit.
|
sl@0
|
492 |
* @stable ICU 2.0
|
sl@0
|
493 |
*/
|
sl@0
|
494 |
virtual UChar next(void) = 0;
|
sl@0
|
495 |
|
sl@0
|
496 |
/**
|
sl@0
|
497 |
* Advances to the next code point in the iteration range
|
sl@0
|
498 |
* (toward endIndex()), and returns that code point. If there are
|
sl@0
|
499 |
* no more code points to return, returns DONE.
|
sl@0
|
500 |
* Note that iteration with "pre-increment" semantics is less
|
sl@0
|
501 |
* efficient than iteration with "post-increment" semantics
|
sl@0
|
502 |
* that is provided by next32PostInc().
|
sl@0
|
503 |
* @return the next code point.
|
sl@0
|
504 |
* @stable ICU 2.0
|
sl@0
|
505 |
*/
|
sl@0
|
506 |
virtual UChar32 next32(void) = 0;
|
sl@0
|
507 |
|
sl@0
|
508 |
/**
|
sl@0
|
509 |
* Advances to the previous code unit in the iteration range
|
sl@0
|
510 |
* (toward startIndex()), and returns that code unit. If there are
|
sl@0
|
511 |
* no more code units to return, returns DONE.
|
sl@0
|
512 |
* @return the previous code unit.
|
sl@0
|
513 |
* @stable ICU 2.0
|
sl@0
|
514 |
*/
|
sl@0
|
515 |
virtual UChar previous(void) = 0;
|
sl@0
|
516 |
|
sl@0
|
517 |
/**
|
sl@0
|
518 |
* Advances to the previous code point in the iteration range
|
sl@0
|
519 |
* (toward startIndex()), and returns that code point. If there are
|
sl@0
|
520 |
* no more code points to return, returns DONE.
|
sl@0
|
521 |
* @return the previous code point.
|
sl@0
|
522 |
* @stable ICU 2.0
|
sl@0
|
523 |
*/
|
sl@0
|
524 |
virtual UChar32 previous32(void) = 0;
|
sl@0
|
525 |
|
sl@0
|
526 |
/**
|
sl@0
|
527 |
* Returns FALSE if there are no more code units or code points
|
sl@0
|
528 |
* before the current position in the iteration range.
|
sl@0
|
529 |
* This is used with previous() or previous32() in backward
|
sl@0
|
530 |
* iteration.
|
sl@0
|
531 |
* @return FALSE if there are no more code units or code points
|
sl@0
|
532 |
* before the current position in the iteration range, return TRUE otherwise.
|
sl@0
|
533 |
* @stable ICU 2.0
|
sl@0
|
534 |
*/
|
sl@0
|
535 |
virtual UBool hasPrevious() = 0;
|
sl@0
|
536 |
|
sl@0
|
537 |
/**
|
sl@0
|
538 |
* Returns the numeric index in the underlying text-storage
|
sl@0
|
539 |
* object of the character returned by first(). Since it's
|
sl@0
|
540 |
* possible to create an iterator that iterates across only
|
sl@0
|
541 |
* part of a text-storage object, this number isn't
|
sl@0
|
542 |
* necessarily 0.
|
sl@0
|
543 |
* @returns the numeric index in the underlying text-storage
|
sl@0
|
544 |
* object of the character returned by first().
|
sl@0
|
545 |
* @stable ICU 2.0
|
sl@0
|
546 |
*/
|
sl@0
|
547 |
inline int32_t startIndex(void) const;
|
sl@0
|
548 |
|
sl@0
|
549 |
/**
|
sl@0
|
550 |
* Returns the numeric index in the underlying text-storage
|
sl@0
|
551 |
* object of the position immediately BEYOND the character
|
sl@0
|
552 |
* returned by last().
|
sl@0
|
553 |
* @return the numeric index in the underlying text-storage
|
sl@0
|
554 |
* object of the position immediately BEYOND the character
|
sl@0
|
555 |
* returned by last().
|
sl@0
|
556 |
* @stable ICU 2.0
|
sl@0
|
557 |
*/
|
sl@0
|
558 |
inline int32_t endIndex(void) const;
|
sl@0
|
559 |
|
sl@0
|
560 |
/**
|
sl@0
|
561 |
* Returns the numeric index in the underlying text-storage
|
sl@0
|
562 |
* object of the character the iterator currently refers to
|
sl@0
|
563 |
* (i.e., the character returned by current()).
|
sl@0
|
564 |
* @return the numberic index in the text-storage object of
|
sl@0
|
565 |
* the character the iterator currently refers to
|
sl@0
|
566 |
* @stable ICU 2.0
|
sl@0
|
567 |
*/
|
sl@0
|
568 |
inline int32_t getIndex(void) const;
|
sl@0
|
569 |
|
sl@0
|
570 |
/**
|
sl@0
|
571 |
* Returns the length of the entire text in the underlying
|
sl@0
|
572 |
* text-storage object.
|
sl@0
|
573 |
* @return the length of the entire text in the text-storage object
|
sl@0
|
574 |
* @stable ICU 2.0
|
sl@0
|
575 |
*/
|
sl@0
|
576 |
inline int32_t getLength() const;
|
sl@0
|
577 |
|
sl@0
|
578 |
/**
|
sl@0
|
579 |
* Moves the current position relative to the start or end of the
|
sl@0
|
580 |
* iteration range, or relative to the current position itself.
|
sl@0
|
581 |
* The movement is expressed in numbers of code units forward
|
sl@0
|
582 |
* or backward by specifying a positive or negative delta.
|
sl@0
|
583 |
* @param delta the position relative to origin. A positive delta means forward;
|
sl@0
|
584 |
* a negative delta means backward.
|
sl@0
|
585 |
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
|
sl@0
|
586 |
* @return the new position
|
sl@0
|
587 |
* @stable ICU 2.0
|
sl@0
|
588 |
*/
|
sl@0
|
589 |
virtual int32_t move(int32_t delta, EOrigin origin) = 0;
|
sl@0
|
590 |
|
sl@0
|
591 |
/**
|
sl@0
|
592 |
* Moves the current position relative to the start or end of the
|
sl@0
|
593 |
* iteration range, or relative to the current position itself.
|
sl@0
|
594 |
* The movement is expressed in numbers of code points forward
|
sl@0
|
595 |
* or backward by specifying a positive or negative delta.
|
sl@0
|
596 |
* @param delta the position relative to origin. A positive delta means forward;
|
sl@0
|
597 |
* a negative delta means backward.
|
sl@0
|
598 |
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
|
sl@0
|
599 |
* @return the new position
|
sl@0
|
600 |
* @stable ICU 2.0
|
sl@0
|
601 |
*/
|
sl@0
|
602 |
virtual int32_t move32(int32_t delta, EOrigin origin) = 0;
|
sl@0
|
603 |
|
sl@0
|
604 |
/**
|
sl@0
|
605 |
* Copies the text under iteration into the UnicodeString
|
sl@0
|
606 |
* referred to by "result".
|
sl@0
|
607 |
* @param result Receives a copy of the text under iteration.
|
sl@0
|
608 |
* @stable ICU 2.0
|
sl@0
|
609 |
*/
|
sl@0
|
610 |
virtual void getText(UnicodeString& result) = 0;
|
sl@0
|
611 |
|
sl@0
|
612 |
protected:
|
sl@0
|
613 |
/**
|
sl@0
|
614 |
* Empty constructor.
|
sl@0
|
615 |
* @stable ICU 2.0
|
sl@0
|
616 |
*/
|
sl@0
|
617 |
CharacterIterator();
|
sl@0
|
618 |
|
sl@0
|
619 |
/**
|
sl@0
|
620 |
* Constructor, just setting the length field in this base class.
|
sl@0
|
621 |
* @stable ICU 2.0
|
sl@0
|
622 |
*/
|
sl@0
|
623 |
CharacterIterator(int32_t length);
|
sl@0
|
624 |
|
sl@0
|
625 |
/**
|
sl@0
|
626 |
* Constructor, just setting the length and position fields in this base class.
|
sl@0
|
627 |
* @stable ICU 2.0
|
sl@0
|
628 |
*/
|
sl@0
|
629 |
CharacterIterator(int32_t length, int32_t position);
|
sl@0
|
630 |
|
sl@0
|
631 |
/**
|
sl@0
|
632 |
* Constructor, just setting the length, start, end, and position fields in this base class.
|
sl@0
|
633 |
* @stable ICU 2.0
|
sl@0
|
634 |
*/
|
sl@0
|
635 |
CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
|
sl@0
|
636 |
|
sl@0
|
637 |
/**
|
sl@0
|
638 |
* Copy constructor.
|
sl@0
|
639 |
*
|
sl@0
|
640 |
* @param that The CharacterIterator to be copied
|
sl@0
|
641 |
* @stable ICU 2.0
|
sl@0
|
642 |
*/
|
sl@0
|
643 |
CharacterIterator(const CharacterIterator &that);
|
sl@0
|
644 |
|
sl@0
|
645 |
/**
|
sl@0
|
646 |
* Assignment operator. Sets this CharacterIterator to have the same behavior,
|
sl@0
|
647 |
* as the one passed in.
|
sl@0
|
648 |
* @param that The CharacterIterator passed in.
|
sl@0
|
649 |
* @return the newly set CharacterIterator.
|
sl@0
|
650 |
* @stable ICU 2.0
|
sl@0
|
651 |
*/
|
sl@0
|
652 |
CharacterIterator &operator=(const CharacterIterator &that);
|
sl@0
|
653 |
|
sl@0
|
654 |
/**
|
sl@0
|
655 |
* Base class text length field.
|
sl@0
|
656 |
* Necessary this for correct getText() and hashCode().
|
sl@0
|
657 |
* @stable ICU 2.0
|
sl@0
|
658 |
*/
|
sl@0
|
659 |
int32_t textLength;
|
sl@0
|
660 |
|
sl@0
|
661 |
/**
|
sl@0
|
662 |
* Base class field for the current position.
|
sl@0
|
663 |
* @stable ICU 2.0
|
sl@0
|
664 |
*/
|
sl@0
|
665 |
int32_t pos;
|
sl@0
|
666 |
|
sl@0
|
667 |
/**
|
sl@0
|
668 |
* Base class field for the start of the iteration range.
|
sl@0
|
669 |
* @stable ICU 2.0
|
sl@0
|
670 |
*/
|
sl@0
|
671 |
int32_t begin;
|
sl@0
|
672 |
|
sl@0
|
673 |
/**
|
sl@0
|
674 |
* Base class field for the end of the iteration range.
|
sl@0
|
675 |
* @stable ICU 2.0
|
sl@0
|
676 |
*/
|
sl@0
|
677 |
int32_t end;
|
sl@0
|
678 |
};
|
sl@0
|
679 |
|
sl@0
|
680 |
inline UBool
|
sl@0
|
681 |
ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
|
sl@0
|
682 |
return !operator==(that);
|
sl@0
|
683 |
}
|
sl@0
|
684 |
|
sl@0
|
685 |
inline int32_t
|
sl@0
|
686 |
CharacterIterator::setToStart() {
|
sl@0
|
687 |
return move(0, kStart);
|
sl@0
|
688 |
}
|
sl@0
|
689 |
|
sl@0
|
690 |
inline int32_t
|
sl@0
|
691 |
CharacterIterator::setToEnd() {
|
sl@0
|
692 |
return move(0, kEnd);
|
sl@0
|
693 |
}
|
sl@0
|
694 |
|
sl@0
|
695 |
inline int32_t
|
sl@0
|
696 |
CharacterIterator::startIndex(void) const {
|
sl@0
|
697 |
return begin;
|
sl@0
|
698 |
}
|
sl@0
|
699 |
|
sl@0
|
700 |
inline int32_t
|
sl@0
|
701 |
CharacterIterator::endIndex(void) const {
|
sl@0
|
702 |
return end;
|
sl@0
|
703 |
}
|
sl@0
|
704 |
|
sl@0
|
705 |
inline int32_t
|
sl@0
|
706 |
CharacterIterator::getIndex(void) const {
|
sl@0
|
707 |
return pos;
|
sl@0
|
708 |
}
|
sl@0
|
709 |
|
sl@0
|
710 |
inline int32_t
|
sl@0
|
711 |
CharacterIterator::getLength(void) const {
|
sl@0
|
712 |
return textLength;
|
sl@0
|
713 |
}
|
sl@0
|
714 |
|
sl@0
|
715 |
U_NAMESPACE_END
|
sl@0
|
716 |
#endif
|