sl@0: /* sl@0: ********************************************************************** sl@0: * Copyright (c) 2002-2005, International Business Machines sl@0: * Corporation and others. All Rights Reserved. sl@0: ********************************************************************** sl@0: */ sl@0: #ifndef USETITER_H sl@0: #define USETITER_H sl@0: sl@0: #include "unicode/utypes.h" sl@0: #include "unicode/uobject.h" sl@0: #include "unicode/unistr.h" sl@0: sl@0: /** sl@0: * \file sl@0: * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet. sl@0: */ sl@0: sl@0: U_NAMESPACE_BEGIN sl@0: sl@0: class UnicodeSet; sl@0: class UnicodeString; sl@0: sl@0: /** sl@0: * sl@0: * UnicodeSetIterator iterates over the contents of a UnicodeSet. It sl@0: * iterates over either code points or code point ranges. After all sl@0: * code points or ranges have been returned, it returns the sl@0: * multicharacter strings of the UnicodSet, if any. sl@0: * sl@0: *

To iterate over code points, use a loop like this: sl@0: *

sl@0:  * UnicodeSetIterator it(set);
sl@0:  * while (set.next()) {
sl@0:  *   if (set.isString()) {
sl@0:  *     processString(set.getString());
sl@0:  *   } else {
sl@0:  *     processCodepoint(set.getCodepoint());
sl@0:  *   }
sl@0:  * }
sl@0:  * 
sl@0: * sl@0: *

To iterate over code point ranges, use a loop like this: sl@0: *

sl@0:  * UnicodeSetIterator it(set);
sl@0:  * while (it.nextRange()) {
sl@0:  *   if (it.isString()) {
sl@0:  *     processString(it.getString());
sl@0:  *   } else {
sl@0:  *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
sl@0:  *   }
sl@0:  * }
sl@0:  * 
sl@0: * @author M. Davis sl@0: * @stable ICU 2.4 sl@0: */ sl@0: class U_COMMON_API UnicodeSetIterator : public UObject { sl@0: sl@0: protected: sl@0: sl@0: /** sl@0: * Value of codepoint if the iterator points to a string. sl@0: * If codepoint == IS_STRING, then examine sl@0: * string for the current iteration result. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: enum { IS_STRING = -1 }; sl@0: sl@0: /** sl@0: * Current code point, or the special value IS_STRING, if sl@0: * the iterator points to a string. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: UChar32 codepoint; sl@0: sl@0: /** sl@0: * When iterating over ranges using nextRange(), sl@0: * codepointEnd contains the inclusive end of the sl@0: * iteration range, if codepoint != IS_STRING. If sl@0: * iterating over code points using next(), or if sl@0: * codepoint == IS_STRING, then the value of sl@0: * codepointEnd is undefined. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: UChar32 codepointEnd; sl@0: sl@0: /** sl@0: * If codepoint == IS_STRING, then string points sl@0: * to the current string. If codepoint != IS_STRING, the sl@0: * value of string is undefined. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: const UnicodeString* string; sl@0: sl@0: public: sl@0: sl@0: /** sl@0: * Create an iterator over the given set. The iterator is valid sl@0: * only so long as set is valid. sl@0: * @param set set to iterate over sl@0: * @stable ICU 2.4 sl@0: */ sl@0: UnicodeSetIterator(const UnicodeSet& set); sl@0: sl@0: /** sl@0: * Create an iterator over nothing. next() and sl@0: * nextRange() return false. This is a convenience sl@0: * constructor allowing the target to be set later. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: UnicodeSetIterator(); sl@0: sl@0: /** sl@0: * Destructor. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: virtual ~UnicodeSetIterator(); sl@0: sl@0: /** sl@0: * Returns true if the current element is a string. If so, the sl@0: * caller can retrieve it with getString(). If this sl@0: * method returns false, the current element is a code point or sl@0: * code point range, depending on whether next() or sl@0: * nextRange() was called, and the caller can retrieve it sl@0: * with getCodepoint() and, for a range, sl@0: * getCodepointEnd(). sl@0: * @stable ICU 2.4 sl@0: */ sl@0: inline UBool isString() const; sl@0: sl@0: /** sl@0: * Returns the current code point, if isString() returned sl@0: * false. Otherwise returns an undefined result. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: inline UChar32 getCodepoint() const; sl@0: sl@0: /** sl@0: * Returns the end of the current code point range, if sl@0: * isString() returned false and nextRange() was sl@0: * called. Otherwise returns an undefined result. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: inline UChar32 getCodepointEnd() const; sl@0: sl@0: /** sl@0: * Returns the current string, if isString() returned sl@0: * true. Otherwise returns an undefined result. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: inline const UnicodeString& getString() const; sl@0: sl@0: /** sl@0: * Returns the next element in the set, either a single code point sl@0: * or a string. If there are no more elements in the set, return sl@0: * false. If codepoint == IS_STRING, the value is a sl@0: * string in the string field. Otherwise the value is a sl@0: * single code point in the codepoint field. sl@0: * sl@0: *

The order of iteration is all code points in sorted order, sl@0: * followed by all strings sorted order. codepointEnd is sl@0: * undefined after calling this method. string is sl@0: * undefined unless codepoint == IS_STRING. Do not mix sl@0: * calls to next() and nextRange() without sl@0: * calling reset() between them. The results of doing so sl@0: * are undefined. sl@0: * sl@0: * @return true if there was another element in the set and this sl@0: * object contains the element. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: UBool next(); sl@0: sl@0: /** sl@0: * Returns the next element in the set, either a code point range sl@0: * or a string. If there are no more elements in the set, return sl@0: * false. If codepoint == IS_STRING, the value is a sl@0: * string in the string field. Otherwise the value is a sl@0: * range of one or more code points from codepoint to sl@0: * codepointeEnd inclusive. sl@0: * sl@0: *

The order of iteration is all code points ranges in sorted sl@0: * order, followed by all strings sorted order. Ranges are sl@0: * disjoint and non-contiguous. string is undefined sl@0: * unless codepoint == IS_STRING. Do not mix calls to sl@0: * next() and nextRange() without calling sl@0: * reset() between them. The results of doing so are sl@0: * undefined. sl@0: * sl@0: * @return true if there was another element in the set and this sl@0: * object contains the element. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: UBool nextRange(); sl@0: sl@0: /** sl@0: * Sets this iterator to visit the elements of the given set and sl@0: * resets it to the start of that set. The iterator is valid only sl@0: * so long as set is valid. sl@0: * @param set the set to iterate over. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: void reset(const UnicodeSet& set); sl@0: sl@0: /** sl@0: * Resets this iterator to the start of the set. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: void reset(); sl@0: sl@0: /** sl@0: * ICU "poor man's RTTI", returns a UClassID for this class. sl@0: * sl@0: * @stable ICU 2.4 sl@0: */ sl@0: static UClassID U_EXPORT2 getStaticClassID(); sl@0: sl@0: /** sl@0: * ICU "poor man's RTTI", returns a UClassID for the actual class. sl@0: * sl@0: * @stable ICU 2.4 sl@0: */ sl@0: virtual UClassID getDynamicClassID() const; sl@0: sl@0: // ======================= PRIVATES =========================== sl@0: sl@0: protected: sl@0: sl@0: // endElement and nextElements are really UChar32's, but we keep sl@0: // them as signed int32_t's so we can do comparisons with sl@0: // endElement set to -1. Leave them as int32_t's. sl@0: /** The set sl@0: * @stable ICU 2.4 sl@0: */ sl@0: const UnicodeSet* set; sl@0: /** End range sl@0: * @stable ICU 2.4 sl@0: */ sl@0: int32_t endRange; sl@0: /** Range sl@0: * @stable ICU 2.4 sl@0: */ sl@0: int32_t range; sl@0: /** End element sl@0: * @stable ICU 2.4 sl@0: */ sl@0: int32_t endElement; sl@0: /** Next element sl@0: * @stable ICU 2.4 sl@0: */ sl@0: int32_t nextElement; sl@0: //UBool abbreviated; sl@0: /** Next string sl@0: * @stable ICU 2.4 sl@0: */ sl@0: int32_t nextString; sl@0: /** String count sl@0: * @stable ICU 2.4 sl@0: */ sl@0: int32_t stringCount; sl@0: sl@0: /** Copy constructor. Disallowed. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: UnicodeSetIterator(const UnicodeSetIterator&); // disallow sl@0: sl@0: /** Assignment operator. Disallowed. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow sl@0: sl@0: /** Load range sl@0: * @stable ICU 2.4 sl@0: */ sl@0: virtual void loadRange(int32_t range); sl@0: sl@0: }; sl@0: sl@0: inline UBool UnicodeSetIterator::isString() const { sl@0: return codepoint == (UChar32)IS_STRING; sl@0: } sl@0: sl@0: inline UChar32 UnicodeSetIterator::getCodepoint() const { sl@0: return codepoint; sl@0: } sl@0: sl@0: inline UChar32 UnicodeSetIterator::getCodepointEnd() const { sl@0: return codepointEnd; sl@0: } sl@0: sl@0: inline const UnicodeString& UnicodeSetIterator::getString() const { sl@0: return *string; sl@0: } sl@0: sl@0: U_NAMESPACE_END sl@0: sl@0: #endif