os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/usetiter.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
**********************************************************************
sl@0
     3
* Copyright (c) 2002-2005, International Business Machines
sl@0
     4
* Corporation and others.  All Rights Reserved.
sl@0
     5
**********************************************************************
sl@0
     6
*/
sl@0
     7
#ifndef USETITER_H
sl@0
     8
#define USETITER_H
sl@0
     9
sl@0
    10
#include "unicode/utypes.h"
sl@0
    11
#include "unicode/uobject.h"
sl@0
    12
#include "unicode/unistr.h"
sl@0
    13
sl@0
    14
/**
sl@0
    15
 * \file 
sl@0
    16
 * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
sl@0
    17
 */
sl@0
    18
sl@0
    19
U_NAMESPACE_BEGIN
sl@0
    20
sl@0
    21
class UnicodeSet;
sl@0
    22
class UnicodeString;
sl@0
    23
sl@0
    24
/**
sl@0
    25
 *
sl@0
    26
 * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
sl@0
    27
 * iterates over either code points or code point ranges.  After all
sl@0
    28
 * code points or ranges have been returned, it returns the
sl@0
    29
 * multicharacter strings of the UnicodSet, if any.
sl@0
    30
 *
sl@0
    31
 * <p>To iterate over code points, use a loop like this:
sl@0
    32
 * <pre>
sl@0
    33
 * UnicodeSetIterator it(set);
sl@0
    34
 * while (set.next()) {
sl@0
    35
 *   if (set.isString()) {
sl@0
    36
 *     processString(set.getString());
sl@0
    37
 *   } else {
sl@0
    38
 *     processCodepoint(set.getCodepoint());
sl@0
    39
 *   }
sl@0
    40
 * }
sl@0
    41
 * </pre>
sl@0
    42
 *
sl@0
    43
 * <p>To iterate over code point ranges, use a loop like this:
sl@0
    44
 * <pre>
sl@0
    45
 * UnicodeSetIterator it(set);
sl@0
    46
 * while (it.nextRange()) {
sl@0
    47
 *   if (it.isString()) {
sl@0
    48
 *     processString(it.getString());
sl@0
    49
 *   } else {
sl@0
    50
 *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
sl@0
    51
 *   }
sl@0
    52
 * }
sl@0
    53
 * </pre>
sl@0
    54
 * @author M. Davis
sl@0
    55
 * @stable ICU 2.4
sl@0
    56
 */
sl@0
    57
class U_COMMON_API UnicodeSetIterator : public UObject {
sl@0
    58
sl@0
    59
 protected:
sl@0
    60
sl@0
    61
    /**
sl@0
    62
     * Value of <tt>codepoint</tt> if the iterator points to a string.
sl@0
    63
     * If <tt>codepoint == IS_STRING</tt>, then examine
sl@0
    64
     * <tt>string</tt> for the current iteration result.
sl@0
    65
     * @stable ICU 2.4
sl@0
    66
     */
sl@0
    67
    enum { IS_STRING = -1 };
sl@0
    68
sl@0
    69
    /**
sl@0
    70
     * Current code point, or the special value <tt>IS_STRING</tt>, if
sl@0
    71
     * the iterator points to a string.
sl@0
    72
     * @stable ICU 2.4
sl@0
    73
     */
sl@0
    74
    UChar32 codepoint;
sl@0
    75
sl@0
    76
    /**
sl@0
    77
     * When iterating over ranges using <tt>nextRange()</tt>,
sl@0
    78
     * <tt>codepointEnd</tt> contains the inclusive end of the
sl@0
    79
     * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
sl@0
    80
     * iterating over code points using <tt>next()</tt>, or if
sl@0
    81
     * <tt>codepoint == IS_STRING</tt>, then the value of
sl@0
    82
     * <tt>codepointEnd</tt> is undefined.
sl@0
    83
     * @stable ICU 2.4
sl@0
    84
     */
sl@0
    85
    UChar32 codepointEnd;
sl@0
    86
sl@0
    87
    /**
sl@0
    88
     * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
sl@0
    89
     * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
sl@0
    90
     * value of <tt>string</tt> is undefined.
sl@0
    91
     * @stable ICU 2.4
sl@0
    92
     */
sl@0
    93
    const UnicodeString* string;
sl@0
    94
sl@0
    95
 public:
sl@0
    96
sl@0
    97
    /**
sl@0
    98
     * Create an iterator over the given set.  The iterator is valid
sl@0
    99
     * only so long as <tt>set</tt> is valid.
sl@0
   100
     * @param set set to iterate over
sl@0
   101
     * @stable ICU 2.4
sl@0
   102
     */
sl@0
   103
    UnicodeSetIterator(const UnicodeSet& set);
sl@0
   104
sl@0
   105
    /**
sl@0
   106
     * Create an iterator over nothing.  <tt>next()</tt> and
sl@0
   107
     * <tt>nextRange()</tt> return false. This is a convenience
sl@0
   108
     * constructor allowing the target to be set later.
sl@0
   109
     * @stable ICU 2.4
sl@0
   110
     */
sl@0
   111
    UnicodeSetIterator();
sl@0
   112
sl@0
   113
    /**
sl@0
   114
     * Destructor.
sl@0
   115
     * @stable ICU 2.4
sl@0
   116
     */
sl@0
   117
    virtual ~UnicodeSetIterator();
sl@0
   118
sl@0
   119
    /**
sl@0
   120
     * Returns true if the current element is a string.  If so, the
sl@0
   121
     * caller can retrieve it with <tt>getString()</tt>.  If this
sl@0
   122
     * method returns false, the current element is a code point or
sl@0
   123
     * code point range, depending on whether <tt>next()</tt> or
sl@0
   124
     * <tt>nextRange()</tt> was called, and the caller can retrieve it
sl@0
   125
     * with <tt>getCodepoint()</tt> and, for a range,
sl@0
   126
     * <tt>getCodepointEnd()</tt>.
sl@0
   127
     * @stable ICU 2.4
sl@0
   128
     */
sl@0
   129
    inline UBool isString() const;
sl@0
   130
sl@0
   131
    /**
sl@0
   132
     * Returns the current code point, if <tt>isString()</tt> returned
sl@0
   133
     * false.  Otherwise returns an undefined result.
sl@0
   134
     * @stable ICU 2.4
sl@0
   135
     */
sl@0
   136
    inline UChar32 getCodepoint() const;
sl@0
   137
sl@0
   138
    /**
sl@0
   139
     * Returns the end of the current code point range, if
sl@0
   140
     * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
sl@0
   141
     * called.  Otherwise returns an undefined result.
sl@0
   142
     * @stable ICU 2.4
sl@0
   143
     */
sl@0
   144
    inline UChar32 getCodepointEnd() const;
sl@0
   145
sl@0
   146
    /**
sl@0
   147
     * Returns the current string, if <tt>isString()</tt> returned
sl@0
   148
     * true.  Otherwise returns an undefined result.
sl@0
   149
     * @stable ICU 2.4
sl@0
   150
     */
sl@0
   151
    inline const UnicodeString& getString() const;
sl@0
   152
sl@0
   153
    /**
sl@0
   154
     * Returns the next element in the set, either a single code point
sl@0
   155
     * or a string.  If there are no more elements in the set, return
sl@0
   156
     * false.  If <tt>codepoint == IS_STRING</tt>, the value is a
sl@0
   157
     * string in the <tt>string</tt> field.  Otherwise the value is a
sl@0
   158
     * single code point in the <tt>codepoint</tt> field.
sl@0
   159
     *
sl@0
   160
     * <p>The order of iteration is all code points in sorted order,
sl@0
   161
     * followed by all strings sorted order.  <tt>codepointEnd</tt> is
sl@0
   162
     * undefined after calling this method.  <tt>string</tt> is
sl@0
   163
     * undefined unless <tt>codepoint == IS_STRING</tt>.  Do not mix
sl@0
   164
     * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
sl@0
   165
     * calling <tt>reset()</tt> between them.  The results of doing so
sl@0
   166
     * are undefined.
sl@0
   167
     *
sl@0
   168
     * @return true if there was another element in the set and this
sl@0
   169
     * object contains the element.
sl@0
   170
     * @stable ICU 2.4
sl@0
   171
     */
sl@0
   172
    UBool next();
sl@0
   173
sl@0
   174
    /**
sl@0
   175
     * Returns the next element in the set, either a code point range
sl@0
   176
     * or a string.  If there are no more elements in the set, return
sl@0
   177
     * false.  If <tt>codepoint == IS_STRING</tt>, the value is a
sl@0
   178
     * string in the <tt>string</tt> field.  Otherwise the value is a
sl@0
   179
     * range of one or more code points from <tt>codepoint</tt> to
sl@0
   180
     * <tt>codepointeEnd</tt> inclusive.
sl@0
   181
     *
sl@0
   182
     * <p>The order of iteration is all code points ranges in sorted
sl@0
   183
     * order, followed by all strings sorted order.  Ranges are
sl@0
   184
     * disjoint and non-contiguous.  <tt>string</tt> is undefined
sl@0
   185
     * unless <tt>codepoint == IS_STRING</tt>.  Do not mix calls to
sl@0
   186
     * <tt>next()</tt> and <tt>nextRange()</tt> without calling
sl@0
   187
     * <tt>reset()</tt> between them.  The results of doing so are
sl@0
   188
     * undefined.
sl@0
   189
     *
sl@0
   190
     * @return true if there was another element in the set and this
sl@0
   191
     * object contains the element.
sl@0
   192
     * @stable ICU 2.4
sl@0
   193
     */
sl@0
   194
    UBool nextRange();
sl@0
   195
sl@0
   196
    /**
sl@0
   197
     * Sets this iterator to visit the elements of the given set and
sl@0
   198
     * resets it to the start of that set.  The iterator is valid only
sl@0
   199
     * so long as <tt>set</tt> is valid.
sl@0
   200
     * @param set the set to iterate over.
sl@0
   201
     * @stable ICU 2.4
sl@0
   202
     */
sl@0
   203
    void reset(const UnicodeSet& set);
sl@0
   204
sl@0
   205
    /**
sl@0
   206
     * Resets this iterator to the start of the set.
sl@0
   207
     * @stable ICU 2.4
sl@0
   208
     */
sl@0
   209
    void reset();
sl@0
   210
sl@0
   211
    /**
sl@0
   212
     * ICU "poor man's RTTI", returns a UClassID for this class.
sl@0
   213
     *
sl@0
   214
     * @stable ICU 2.4
sl@0
   215
     */
sl@0
   216
    static UClassID U_EXPORT2 getStaticClassID();
sl@0
   217
sl@0
   218
    /**
sl@0
   219
     * ICU "poor man's RTTI", returns a UClassID for the actual class.
sl@0
   220
     *
sl@0
   221
     * @stable ICU 2.4
sl@0
   222
     */
sl@0
   223
    virtual UClassID getDynamicClassID() const;
sl@0
   224
sl@0
   225
    // ======================= PRIVATES ===========================
sl@0
   226
sl@0
   227
 protected:
sl@0
   228
sl@0
   229
    // endElement and nextElements are really UChar32's, but we keep
sl@0
   230
    // them as signed int32_t's so we can do comparisons with
sl@0
   231
    // endElement set to -1.  Leave them as int32_t's.
sl@0
   232
    /** The set
sl@0
   233
     * @stable ICU 2.4
sl@0
   234
     */
sl@0
   235
    const UnicodeSet* set;
sl@0
   236
    /** End range
sl@0
   237
     * @stable ICU 2.4
sl@0
   238
     */
sl@0
   239
    int32_t endRange;
sl@0
   240
    /** Range
sl@0
   241
     * @stable ICU 2.4
sl@0
   242
     */
sl@0
   243
    int32_t range;
sl@0
   244
    /** End element
sl@0
   245
     * @stable ICU 2.4
sl@0
   246
     */
sl@0
   247
    int32_t endElement;
sl@0
   248
    /** Next element
sl@0
   249
     * @stable ICU 2.4
sl@0
   250
     */
sl@0
   251
    int32_t nextElement;
sl@0
   252
    //UBool abbreviated;
sl@0
   253
    /** Next string
sl@0
   254
     * @stable ICU 2.4
sl@0
   255
     */
sl@0
   256
    int32_t nextString;
sl@0
   257
    /** String count
sl@0
   258
     * @stable ICU 2.4
sl@0
   259
     */
sl@0
   260
    int32_t stringCount;
sl@0
   261
sl@0
   262
    /** Copy constructor. Disallowed.
sl@0
   263
     * @stable ICU 2.4
sl@0
   264
     */
sl@0
   265
    UnicodeSetIterator(const UnicodeSetIterator&); // disallow
sl@0
   266
sl@0
   267
    /** Assignment operator. Disallowed.
sl@0
   268
     * @stable ICU 2.4
sl@0
   269
     */
sl@0
   270
    UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
sl@0
   271
sl@0
   272
    /** Load range
sl@0
   273
     * @stable ICU 2.4
sl@0
   274
     */
sl@0
   275
    virtual void loadRange(int32_t range);
sl@0
   276
sl@0
   277
};
sl@0
   278
sl@0
   279
inline UBool UnicodeSetIterator::isString() const {
sl@0
   280
    return codepoint == (UChar32)IS_STRING;
sl@0
   281
}
sl@0
   282
sl@0
   283
inline UChar32 UnicodeSetIterator::getCodepoint() const {
sl@0
   284
    return codepoint;
sl@0
   285
}
sl@0
   286
sl@0
   287
inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
sl@0
   288
    return codepointEnd;
sl@0
   289
}
sl@0
   290
sl@0
   291
inline const UnicodeString& UnicodeSetIterator::getString() const {
sl@0
   292
    return *string;
sl@0
   293
}
sl@0
   294
sl@0
   295
U_NAMESPACE_END
sl@0
   296
sl@0
   297
#endif