sl@0
|
1 |
/*
|
sl@0
|
2 |
**********************************************************************
|
sl@0
|
3 |
* Copyright (c) 2002-2005, International Business Machines
|
sl@0
|
4 |
* Corporation and others. All Rights Reserved.
|
sl@0
|
5 |
**********************************************************************
|
sl@0
|
6 |
*/
|
sl@0
|
7 |
#ifndef USETITER_H
|
sl@0
|
8 |
#define USETITER_H
|
sl@0
|
9 |
|
sl@0
|
10 |
#include "unicode/utypes.h"
|
sl@0
|
11 |
#include "unicode/uobject.h"
|
sl@0
|
12 |
#include "unicode/unistr.h"
|
sl@0
|
13 |
|
sl@0
|
14 |
/**
|
sl@0
|
15 |
* \file
|
sl@0
|
16 |
* \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
|
sl@0
|
17 |
*/
|
sl@0
|
18 |
|
sl@0
|
19 |
U_NAMESPACE_BEGIN
|
sl@0
|
20 |
|
sl@0
|
21 |
class UnicodeSet;
|
sl@0
|
22 |
class UnicodeString;
|
sl@0
|
23 |
|
sl@0
|
24 |
/**
|
sl@0
|
25 |
*
|
sl@0
|
26 |
* UnicodeSetIterator iterates over the contents of a UnicodeSet. It
|
sl@0
|
27 |
* iterates over either code points or code point ranges. After all
|
sl@0
|
28 |
* code points or ranges have been returned, it returns the
|
sl@0
|
29 |
* multicharacter strings of the UnicodSet, if any.
|
sl@0
|
30 |
*
|
sl@0
|
31 |
* <p>To iterate over code points, use a loop like this:
|
sl@0
|
32 |
* <pre>
|
sl@0
|
33 |
* UnicodeSetIterator it(set);
|
sl@0
|
34 |
* while (set.next()) {
|
sl@0
|
35 |
* if (set.isString()) {
|
sl@0
|
36 |
* processString(set.getString());
|
sl@0
|
37 |
* } else {
|
sl@0
|
38 |
* processCodepoint(set.getCodepoint());
|
sl@0
|
39 |
* }
|
sl@0
|
40 |
* }
|
sl@0
|
41 |
* </pre>
|
sl@0
|
42 |
*
|
sl@0
|
43 |
* <p>To iterate over code point ranges, use a loop like this:
|
sl@0
|
44 |
* <pre>
|
sl@0
|
45 |
* UnicodeSetIterator it(set);
|
sl@0
|
46 |
* while (it.nextRange()) {
|
sl@0
|
47 |
* if (it.isString()) {
|
sl@0
|
48 |
* processString(it.getString());
|
sl@0
|
49 |
* } else {
|
sl@0
|
50 |
* processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
|
sl@0
|
51 |
* }
|
sl@0
|
52 |
* }
|
sl@0
|
53 |
* </pre>
|
sl@0
|
54 |
* @author M. Davis
|
sl@0
|
55 |
* @stable ICU 2.4
|
sl@0
|
56 |
*/
|
sl@0
|
57 |
class U_COMMON_API UnicodeSetIterator : public UObject {
|
sl@0
|
58 |
|
sl@0
|
59 |
protected:
|
sl@0
|
60 |
|
sl@0
|
61 |
/**
|
sl@0
|
62 |
* Value of <tt>codepoint</tt> if the iterator points to a string.
|
sl@0
|
63 |
* If <tt>codepoint == IS_STRING</tt>, then examine
|
sl@0
|
64 |
* <tt>string</tt> for the current iteration result.
|
sl@0
|
65 |
* @stable ICU 2.4
|
sl@0
|
66 |
*/
|
sl@0
|
67 |
enum { IS_STRING = -1 };
|
sl@0
|
68 |
|
sl@0
|
69 |
/**
|
sl@0
|
70 |
* Current code point, or the special value <tt>IS_STRING</tt>, if
|
sl@0
|
71 |
* the iterator points to a string.
|
sl@0
|
72 |
* @stable ICU 2.4
|
sl@0
|
73 |
*/
|
sl@0
|
74 |
UChar32 codepoint;
|
sl@0
|
75 |
|
sl@0
|
76 |
/**
|
sl@0
|
77 |
* When iterating over ranges using <tt>nextRange()</tt>,
|
sl@0
|
78 |
* <tt>codepointEnd</tt> contains the inclusive end of the
|
sl@0
|
79 |
* iteration range, if <tt>codepoint != IS_STRING</tt>. If
|
sl@0
|
80 |
* iterating over code points using <tt>next()</tt>, or if
|
sl@0
|
81 |
* <tt>codepoint == IS_STRING</tt>, then the value of
|
sl@0
|
82 |
* <tt>codepointEnd</tt> is undefined.
|
sl@0
|
83 |
* @stable ICU 2.4
|
sl@0
|
84 |
*/
|
sl@0
|
85 |
UChar32 codepointEnd;
|
sl@0
|
86 |
|
sl@0
|
87 |
/**
|
sl@0
|
88 |
* If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
|
sl@0
|
89 |
* to the current string. If <tt>codepoint != IS_STRING</tt>, the
|
sl@0
|
90 |
* value of <tt>string</tt> is undefined.
|
sl@0
|
91 |
* @stable ICU 2.4
|
sl@0
|
92 |
*/
|
sl@0
|
93 |
const UnicodeString* string;
|
sl@0
|
94 |
|
sl@0
|
95 |
public:
|
sl@0
|
96 |
|
sl@0
|
97 |
/**
|
sl@0
|
98 |
* Create an iterator over the given set. The iterator is valid
|
sl@0
|
99 |
* only so long as <tt>set</tt> is valid.
|
sl@0
|
100 |
* @param set set to iterate over
|
sl@0
|
101 |
* @stable ICU 2.4
|
sl@0
|
102 |
*/
|
sl@0
|
103 |
UnicodeSetIterator(const UnicodeSet& set);
|
sl@0
|
104 |
|
sl@0
|
105 |
/**
|
sl@0
|
106 |
* Create an iterator over nothing. <tt>next()</tt> and
|
sl@0
|
107 |
* <tt>nextRange()</tt> return false. This is a convenience
|
sl@0
|
108 |
* constructor allowing the target to be set later.
|
sl@0
|
109 |
* @stable ICU 2.4
|
sl@0
|
110 |
*/
|
sl@0
|
111 |
UnicodeSetIterator();
|
sl@0
|
112 |
|
sl@0
|
113 |
/**
|
sl@0
|
114 |
* Destructor.
|
sl@0
|
115 |
* @stable ICU 2.4
|
sl@0
|
116 |
*/
|
sl@0
|
117 |
virtual ~UnicodeSetIterator();
|
sl@0
|
118 |
|
sl@0
|
119 |
/**
|
sl@0
|
120 |
* Returns true if the current element is a string. If so, the
|
sl@0
|
121 |
* caller can retrieve it with <tt>getString()</tt>. If this
|
sl@0
|
122 |
* method returns false, the current element is a code point or
|
sl@0
|
123 |
* code point range, depending on whether <tt>next()</tt> or
|
sl@0
|
124 |
* <tt>nextRange()</tt> was called, and the caller can retrieve it
|
sl@0
|
125 |
* with <tt>getCodepoint()</tt> and, for a range,
|
sl@0
|
126 |
* <tt>getCodepointEnd()</tt>.
|
sl@0
|
127 |
* @stable ICU 2.4
|
sl@0
|
128 |
*/
|
sl@0
|
129 |
inline UBool isString() const;
|
sl@0
|
130 |
|
sl@0
|
131 |
/**
|
sl@0
|
132 |
* Returns the current code point, if <tt>isString()</tt> returned
|
sl@0
|
133 |
* false. Otherwise returns an undefined result.
|
sl@0
|
134 |
* @stable ICU 2.4
|
sl@0
|
135 |
*/
|
sl@0
|
136 |
inline UChar32 getCodepoint() const;
|
sl@0
|
137 |
|
sl@0
|
138 |
/**
|
sl@0
|
139 |
* Returns the end of the current code point range, if
|
sl@0
|
140 |
* <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
|
sl@0
|
141 |
* called. Otherwise returns an undefined result.
|
sl@0
|
142 |
* @stable ICU 2.4
|
sl@0
|
143 |
*/
|
sl@0
|
144 |
inline UChar32 getCodepointEnd() const;
|
sl@0
|
145 |
|
sl@0
|
146 |
/**
|
sl@0
|
147 |
* Returns the current string, if <tt>isString()</tt> returned
|
sl@0
|
148 |
* true. Otherwise returns an undefined result.
|
sl@0
|
149 |
* @stable ICU 2.4
|
sl@0
|
150 |
*/
|
sl@0
|
151 |
inline const UnicodeString& getString() const;
|
sl@0
|
152 |
|
sl@0
|
153 |
/**
|
sl@0
|
154 |
* Returns the next element in the set, either a single code point
|
sl@0
|
155 |
* or a string. If there are no more elements in the set, return
|
sl@0
|
156 |
* false. If <tt>codepoint == IS_STRING</tt>, the value is a
|
sl@0
|
157 |
* string in the <tt>string</tt> field. Otherwise the value is a
|
sl@0
|
158 |
* single code point in the <tt>codepoint</tt> field.
|
sl@0
|
159 |
*
|
sl@0
|
160 |
* <p>The order of iteration is all code points in sorted order,
|
sl@0
|
161 |
* followed by all strings sorted order. <tt>codepointEnd</tt> is
|
sl@0
|
162 |
* undefined after calling this method. <tt>string</tt> is
|
sl@0
|
163 |
* undefined unless <tt>codepoint == IS_STRING</tt>. Do not mix
|
sl@0
|
164 |
* calls to <tt>next()</tt> and <tt>nextRange()</tt> without
|
sl@0
|
165 |
* calling <tt>reset()</tt> between them. The results of doing so
|
sl@0
|
166 |
* are undefined.
|
sl@0
|
167 |
*
|
sl@0
|
168 |
* @return true if there was another element in the set and this
|
sl@0
|
169 |
* object contains the element.
|
sl@0
|
170 |
* @stable ICU 2.4
|
sl@0
|
171 |
*/
|
sl@0
|
172 |
UBool next();
|
sl@0
|
173 |
|
sl@0
|
174 |
/**
|
sl@0
|
175 |
* Returns the next element in the set, either a code point range
|
sl@0
|
176 |
* or a string. If there are no more elements in the set, return
|
sl@0
|
177 |
* false. If <tt>codepoint == IS_STRING</tt>, the value is a
|
sl@0
|
178 |
* string in the <tt>string</tt> field. Otherwise the value is a
|
sl@0
|
179 |
* range of one or more code points from <tt>codepoint</tt> to
|
sl@0
|
180 |
* <tt>codepointeEnd</tt> inclusive.
|
sl@0
|
181 |
*
|
sl@0
|
182 |
* <p>The order of iteration is all code points ranges in sorted
|
sl@0
|
183 |
* order, followed by all strings sorted order. Ranges are
|
sl@0
|
184 |
* disjoint and non-contiguous. <tt>string</tt> is undefined
|
sl@0
|
185 |
* unless <tt>codepoint == IS_STRING</tt>. Do not mix calls to
|
sl@0
|
186 |
* <tt>next()</tt> and <tt>nextRange()</tt> without calling
|
sl@0
|
187 |
* <tt>reset()</tt> between them. The results of doing so are
|
sl@0
|
188 |
* undefined.
|
sl@0
|
189 |
*
|
sl@0
|
190 |
* @return true if there was another element in the set and this
|
sl@0
|
191 |
* object contains the element.
|
sl@0
|
192 |
* @stable ICU 2.4
|
sl@0
|
193 |
*/
|
sl@0
|
194 |
UBool nextRange();
|
sl@0
|
195 |
|
sl@0
|
196 |
/**
|
sl@0
|
197 |
* Sets this iterator to visit the elements of the given set and
|
sl@0
|
198 |
* resets it to the start of that set. The iterator is valid only
|
sl@0
|
199 |
* so long as <tt>set</tt> is valid.
|
sl@0
|
200 |
* @param set the set to iterate over.
|
sl@0
|
201 |
* @stable ICU 2.4
|
sl@0
|
202 |
*/
|
sl@0
|
203 |
void reset(const UnicodeSet& set);
|
sl@0
|
204 |
|
sl@0
|
205 |
/**
|
sl@0
|
206 |
* Resets this iterator to the start of the set.
|
sl@0
|
207 |
* @stable ICU 2.4
|
sl@0
|
208 |
*/
|
sl@0
|
209 |
void reset();
|
sl@0
|
210 |
|
sl@0
|
211 |
/**
|
sl@0
|
212 |
* ICU "poor man's RTTI", returns a UClassID for this class.
|
sl@0
|
213 |
*
|
sl@0
|
214 |
* @stable ICU 2.4
|
sl@0
|
215 |
*/
|
sl@0
|
216 |
static UClassID U_EXPORT2 getStaticClassID();
|
sl@0
|
217 |
|
sl@0
|
218 |
/**
|
sl@0
|
219 |
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
sl@0
|
220 |
*
|
sl@0
|
221 |
* @stable ICU 2.4
|
sl@0
|
222 |
*/
|
sl@0
|
223 |
virtual UClassID getDynamicClassID() const;
|
sl@0
|
224 |
|
sl@0
|
225 |
// ======================= PRIVATES ===========================
|
sl@0
|
226 |
|
sl@0
|
227 |
protected:
|
sl@0
|
228 |
|
sl@0
|
229 |
// endElement and nextElements are really UChar32's, but we keep
|
sl@0
|
230 |
// them as signed int32_t's so we can do comparisons with
|
sl@0
|
231 |
// endElement set to -1. Leave them as int32_t's.
|
sl@0
|
232 |
/** The set
|
sl@0
|
233 |
* @stable ICU 2.4
|
sl@0
|
234 |
*/
|
sl@0
|
235 |
const UnicodeSet* set;
|
sl@0
|
236 |
/** End range
|
sl@0
|
237 |
* @stable ICU 2.4
|
sl@0
|
238 |
*/
|
sl@0
|
239 |
int32_t endRange;
|
sl@0
|
240 |
/** Range
|
sl@0
|
241 |
* @stable ICU 2.4
|
sl@0
|
242 |
*/
|
sl@0
|
243 |
int32_t range;
|
sl@0
|
244 |
/** End element
|
sl@0
|
245 |
* @stable ICU 2.4
|
sl@0
|
246 |
*/
|
sl@0
|
247 |
int32_t endElement;
|
sl@0
|
248 |
/** Next element
|
sl@0
|
249 |
* @stable ICU 2.4
|
sl@0
|
250 |
*/
|
sl@0
|
251 |
int32_t nextElement;
|
sl@0
|
252 |
//UBool abbreviated;
|
sl@0
|
253 |
/** Next string
|
sl@0
|
254 |
* @stable ICU 2.4
|
sl@0
|
255 |
*/
|
sl@0
|
256 |
int32_t nextString;
|
sl@0
|
257 |
/** String count
|
sl@0
|
258 |
* @stable ICU 2.4
|
sl@0
|
259 |
*/
|
sl@0
|
260 |
int32_t stringCount;
|
sl@0
|
261 |
|
sl@0
|
262 |
/** Copy constructor. Disallowed.
|
sl@0
|
263 |
* @stable ICU 2.4
|
sl@0
|
264 |
*/
|
sl@0
|
265 |
UnicodeSetIterator(const UnicodeSetIterator&); // disallow
|
sl@0
|
266 |
|
sl@0
|
267 |
/** Assignment operator. Disallowed.
|
sl@0
|
268 |
* @stable ICU 2.4
|
sl@0
|
269 |
*/
|
sl@0
|
270 |
UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
|
sl@0
|
271 |
|
sl@0
|
272 |
/** Load range
|
sl@0
|
273 |
* @stable ICU 2.4
|
sl@0
|
274 |
*/
|
sl@0
|
275 |
virtual void loadRange(int32_t range);
|
sl@0
|
276 |
|
sl@0
|
277 |
};
|
sl@0
|
278 |
|
sl@0
|
279 |
inline UBool UnicodeSetIterator::isString() const {
|
sl@0
|
280 |
return codepoint == (UChar32)IS_STRING;
|
sl@0
|
281 |
}
|
sl@0
|
282 |
|
sl@0
|
283 |
inline UChar32 UnicodeSetIterator::getCodepoint() const {
|
sl@0
|
284 |
return codepoint;
|
sl@0
|
285 |
}
|
sl@0
|
286 |
|
sl@0
|
287 |
inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
|
sl@0
|
288 |
return codepointEnd;
|
sl@0
|
289 |
}
|
sl@0
|
290 |
|
sl@0
|
291 |
inline const UnicodeString& UnicodeSetIterator::getString() const {
|
sl@0
|
292 |
return *string;
|
sl@0
|
293 |
}
|
sl@0
|
294 |
|
sl@0
|
295 |
U_NAMESPACE_END
|
sl@0
|
296 |
|
sl@0
|
297 |
#endif
|