1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/usetiter.h Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,297 @@
1.4 +/*
1.5 +**********************************************************************
1.6 +* Copyright (c) 2002-2005, International Business Machines
1.7 +* Corporation and others. All Rights Reserved.
1.8 +**********************************************************************
1.9 +*/
1.10 +#ifndef USETITER_H
1.11 +#define USETITER_H
1.12 +
1.13 +#include "unicode/utypes.h"
1.14 +#include "unicode/uobject.h"
1.15 +#include "unicode/unistr.h"
1.16 +
1.17 +/**
1.18 + * \file
1.19 + * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
1.20 + */
1.21 +
1.22 +U_NAMESPACE_BEGIN
1.23 +
1.24 +class UnicodeSet;
1.25 +class UnicodeString;
1.26 +
1.27 +/**
1.28 + *
1.29 + * UnicodeSetIterator iterates over the contents of a UnicodeSet. It
1.30 + * iterates over either code points or code point ranges. After all
1.31 + * code points or ranges have been returned, it returns the
1.32 + * multicharacter strings of the UnicodSet, if any.
1.33 + *
1.34 + * <p>To iterate over code points, use a loop like this:
1.35 + * <pre>
1.36 + * UnicodeSetIterator it(set);
1.37 + * while (set.next()) {
1.38 + * if (set.isString()) {
1.39 + * processString(set.getString());
1.40 + * } else {
1.41 + * processCodepoint(set.getCodepoint());
1.42 + * }
1.43 + * }
1.44 + * </pre>
1.45 + *
1.46 + * <p>To iterate over code point ranges, use a loop like this:
1.47 + * <pre>
1.48 + * UnicodeSetIterator it(set);
1.49 + * while (it.nextRange()) {
1.50 + * if (it.isString()) {
1.51 + * processString(it.getString());
1.52 + * } else {
1.53 + * processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
1.54 + * }
1.55 + * }
1.56 + * </pre>
1.57 + * @author M. Davis
1.58 + * @stable ICU 2.4
1.59 + */
1.60 +class U_COMMON_API UnicodeSetIterator : public UObject {
1.61 +
1.62 + protected:
1.63 +
1.64 + /**
1.65 + * Value of <tt>codepoint</tt> if the iterator points to a string.
1.66 + * If <tt>codepoint == IS_STRING</tt>, then examine
1.67 + * <tt>string</tt> for the current iteration result.
1.68 + * @stable ICU 2.4
1.69 + */
1.70 + enum { IS_STRING = -1 };
1.71 +
1.72 + /**
1.73 + * Current code point, or the special value <tt>IS_STRING</tt>, if
1.74 + * the iterator points to a string.
1.75 + * @stable ICU 2.4
1.76 + */
1.77 + UChar32 codepoint;
1.78 +
1.79 + /**
1.80 + * When iterating over ranges using <tt>nextRange()</tt>,
1.81 + * <tt>codepointEnd</tt> contains the inclusive end of the
1.82 + * iteration range, if <tt>codepoint != IS_STRING</tt>. If
1.83 + * iterating over code points using <tt>next()</tt>, or if
1.84 + * <tt>codepoint == IS_STRING</tt>, then the value of
1.85 + * <tt>codepointEnd</tt> is undefined.
1.86 + * @stable ICU 2.4
1.87 + */
1.88 + UChar32 codepointEnd;
1.89 +
1.90 + /**
1.91 + * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
1.92 + * to the current string. If <tt>codepoint != IS_STRING</tt>, the
1.93 + * value of <tt>string</tt> is undefined.
1.94 + * @stable ICU 2.4
1.95 + */
1.96 + const UnicodeString* string;
1.97 +
1.98 + public:
1.99 +
1.100 + /**
1.101 + * Create an iterator over the given set. The iterator is valid
1.102 + * only so long as <tt>set</tt> is valid.
1.103 + * @param set set to iterate over
1.104 + * @stable ICU 2.4
1.105 + */
1.106 + UnicodeSetIterator(const UnicodeSet& set);
1.107 +
1.108 + /**
1.109 + * Create an iterator over nothing. <tt>next()</tt> and
1.110 + * <tt>nextRange()</tt> return false. This is a convenience
1.111 + * constructor allowing the target to be set later.
1.112 + * @stable ICU 2.4
1.113 + */
1.114 + UnicodeSetIterator();
1.115 +
1.116 + /**
1.117 + * Destructor.
1.118 + * @stable ICU 2.4
1.119 + */
1.120 + virtual ~UnicodeSetIterator();
1.121 +
1.122 + /**
1.123 + * Returns true if the current element is a string. If so, the
1.124 + * caller can retrieve it with <tt>getString()</tt>. If this
1.125 + * method returns false, the current element is a code point or
1.126 + * code point range, depending on whether <tt>next()</tt> or
1.127 + * <tt>nextRange()</tt> was called, and the caller can retrieve it
1.128 + * with <tt>getCodepoint()</tt> and, for a range,
1.129 + * <tt>getCodepointEnd()</tt>.
1.130 + * @stable ICU 2.4
1.131 + */
1.132 + inline UBool isString() const;
1.133 +
1.134 + /**
1.135 + * Returns the current code point, if <tt>isString()</tt> returned
1.136 + * false. Otherwise returns an undefined result.
1.137 + * @stable ICU 2.4
1.138 + */
1.139 + inline UChar32 getCodepoint() const;
1.140 +
1.141 + /**
1.142 + * Returns the end of the current code point range, if
1.143 + * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
1.144 + * called. Otherwise returns an undefined result.
1.145 + * @stable ICU 2.4
1.146 + */
1.147 + inline UChar32 getCodepointEnd() const;
1.148 +
1.149 + /**
1.150 + * Returns the current string, if <tt>isString()</tt> returned
1.151 + * true. Otherwise returns an undefined result.
1.152 + * @stable ICU 2.4
1.153 + */
1.154 + inline const UnicodeString& getString() const;
1.155 +
1.156 + /**
1.157 + * Returns the next element in the set, either a single code point
1.158 + * or a string. If there are no more elements in the set, return
1.159 + * false. If <tt>codepoint == IS_STRING</tt>, the value is a
1.160 + * string in the <tt>string</tt> field. Otherwise the value is a
1.161 + * single code point in the <tt>codepoint</tt> field.
1.162 + *
1.163 + * <p>The order of iteration is all code points in sorted order,
1.164 + * followed by all strings sorted order. <tt>codepointEnd</tt> is
1.165 + * undefined after calling this method. <tt>string</tt> is
1.166 + * undefined unless <tt>codepoint == IS_STRING</tt>. Do not mix
1.167 + * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
1.168 + * calling <tt>reset()</tt> between them. The results of doing so
1.169 + * are undefined.
1.170 + *
1.171 + * @return true if there was another element in the set and this
1.172 + * object contains the element.
1.173 + * @stable ICU 2.4
1.174 + */
1.175 + UBool next();
1.176 +
1.177 + /**
1.178 + * Returns the next element in the set, either a code point range
1.179 + * or a string. If there are no more elements in the set, return
1.180 + * false. If <tt>codepoint == IS_STRING</tt>, the value is a
1.181 + * string in the <tt>string</tt> field. Otherwise the value is a
1.182 + * range of one or more code points from <tt>codepoint</tt> to
1.183 + * <tt>codepointeEnd</tt> inclusive.
1.184 + *
1.185 + * <p>The order of iteration is all code points ranges in sorted
1.186 + * order, followed by all strings sorted order. Ranges are
1.187 + * disjoint and non-contiguous. <tt>string</tt> is undefined
1.188 + * unless <tt>codepoint == IS_STRING</tt>. Do not mix calls to
1.189 + * <tt>next()</tt> and <tt>nextRange()</tt> without calling
1.190 + * <tt>reset()</tt> between them. The results of doing so are
1.191 + * undefined.
1.192 + *
1.193 + * @return true if there was another element in the set and this
1.194 + * object contains the element.
1.195 + * @stable ICU 2.4
1.196 + */
1.197 + UBool nextRange();
1.198 +
1.199 + /**
1.200 + * Sets this iterator to visit the elements of the given set and
1.201 + * resets it to the start of that set. The iterator is valid only
1.202 + * so long as <tt>set</tt> is valid.
1.203 + * @param set the set to iterate over.
1.204 + * @stable ICU 2.4
1.205 + */
1.206 + void reset(const UnicodeSet& set);
1.207 +
1.208 + /**
1.209 + * Resets this iterator to the start of the set.
1.210 + * @stable ICU 2.4
1.211 + */
1.212 + void reset();
1.213 +
1.214 + /**
1.215 + * ICU "poor man's RTTI", returns a UClassID for this class.
1.216 + *
1.217 + * @stable ICU 2.4
1.218 + */
1.219 + static UClassID U_EXPORT2 getStaticClassID();
1.220 +
1.221 + /**
1.222 + * ICU "poor man's RTTI", returns a UClassID for the actual class.
1.223 + *
1.224 + * @stable ICU 2.4
1.225 + */
1.226 + virtual UClassID getDynamicClassID() const;
1.227 +
1.228 + // ======================= PRIVATES ===========================
1.229 +
1.230 + protected:
1.231 +
1.232 + // endElement and nextElements are really UChar32's, but we keep
1.233 + // them as signed int32_t's so we can do comparisons with
1.234 + // endElement set to -1. Leave them as int32_t's.
1.235 + /** The set
1.236 + * @stable ICU 2.4
1.237 + */
1.238 + const UnicodeSet* set;
1.239 + /** End range
1.240 + * @stable ICU 2.4
1.241 + */
1.242 + int32_t endRange;
1.243 + /** Range
1.244 + * @stable ICU 2.4
1.245 + */
1.246 + int32_t range;
1.247 + /** End element
1.248 + * @stable ICU 2.4
1.249 + */
1.250 + int32_t endElement;
1.251 + /** Next element
1.252 + * @stable ICU 2.4
1.253 + */
1.254 + int32_t nextElement;
1.255 + //UBool abbreviated;
1.256 + /** Next string
1.257 + * @stable ICU 2.4
1.258 + */
1.259 + int32_t nextString;
1.260 + /** String count
1.261 + * @stable ICU 2.4
1.262 + */
1.263 + int32_t stringCount;
1.264 +
1.265 + /** Copy constructor. Disallowed.
1.266 + * @stable ICU 2.4
1.267 + */
1.268 + UnicodeSetIterator(const UnicodeSetIterator&); // disallow
1.269 +
1.270 + /** Assignment operator. Disallowed.
1.271 + * @stable ICU 2.4
1.272 + */
1.273 + UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
1.274 +
1.275 + /** Load range
1.276 + * @stable ICU 2.4
1.277 + */
1.278 + virtual void loadRange(int32_t range);
1.279 +
1.280 +};
1.281 +
1.282 +inline UBool UnicodeSetIterator::isString() const {
1.283 + return codepoint == (UChar32)IS_STRING;
1.284 +}
1.285 +
1.286 +inline UChar32 UnicodeSetIterator::getCodepoint() const {
1.287 + return codepoint;
1.288 +}
1.289 +
1.290 +inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
1.291 + return codepointEnd;
1.292 +}
1.293 +
1.294 +inline const UnicodeString& UnicodeSetIterator::getString() const {
1.295 + return *string;
1.296 +}
1.297 +
1.298 +U_NAMESPACE_END
1.299 +
1.300 +#endif