os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/chariter.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
********************************************************************
sl@0
     3
*
sl@0
     4
*   Copyright (C) 1997-2005, International Business Machines
sl@0
     5
*   Corporation and others.  All Rights Reserved.
sl@0
     6
*
sl@0
     7
********************************************************************
sl@0
     8
*/
sl@0
     9
sl@0
    10
#ifndef CHARITER_H
sl@0
    11
#define CHARITER_H
sl@0
    12
sl@0
    13
#include "unicode/utypes.h"
sl@0
    14
#include "unicode/uobject.h"
sl@0
    15
#include "unicode/unistr.h"
sl@0
    16
/**
sl@0
    17
 * \file
sl@0
    18
 * \brief C++ API: Character Iterator
sl@0
    19
 */
sl@0
    20
 
sl@0
    21
U_NAMESPACE_BEGIN
sl@0
    22
/**
sl@0
    23
 * Abstract class that defines an API for forward-only iteration
sl@0
    24
 * on text objects.
sl@0
    25
 * This is a minimal interface for iteration without random access
sl@0
    26
 * or backwards iteration. It is especially useful for wrapping
sl@0
    27
 * streams with converters into an object for collation or
sl@0
    28
 * normalization.
sl@0
    29
 *
sl@0
    30
 * <p>Characters can be accessed in two ways: as code units or as
sl@0
    31
 * code points.
sl@0
    32
 * Unicode code points are 21-bit integers and are the scalar values
sl@0
    33
 * of Unicode characters. ICU uses the type UChar32 for them.
sl@0
    34
 * Unicode code units are the storage units of a given
sl@0
    35
 * Unicode/UCS Transformation Format (a character encoding scheme).
sl@0
    36
 * With UTF-16, all code points can be represented with either one
sl@0
    37
 * or two code units ("surrogates").
sl@0
    38
 * String storage is typically based on code units, while properties
sl@0
    39
 * of characters are typically determined using code point values.
sl@0
    40
 * Some processes may be designed to work with sequences of code units,
sl@0
    41
 * or it may be known that all characters that are important to an
sl@0
    42
 * algorithm can be represented with single code units.
sl@0
    43
 * Other processes will need to use the code point access functions.</p>
sl@0
    44
 *
sl@0
    45
 * <p>ForwardCharacterIterator provides nextPostInc() to access
sl@0
    46
 * a code unit and advance an internal position into the text object,
sl@0
    47
 * similar to a <code>return text[position++]</code>.<br>
sl@0
    48
 * It provides next32PostInc() to access a code point and advance an internal
sl@0
    49
 * position.</p>
sl@0
    50
 *
sl@0
    51
 * <p>next32PostInc() assumes that the current position is that of
sl@0
    52
 * the beginning of a code point, i.e., of its first code unit.
sl@0
    53
 * After next32PostInc(), this will be true again.
sl@0
    54
 * In general, access to code units and code points in the same
sl@0
    55
 * iteration loop should not be mixed. In UTF-16, if the current position
sl@0
    56
 * is on a second code unit (Low Surrogate), then only that code unit
sl@0
    57
 * is returned even by next32PostInc().</p>
sl@0
    58
 *
sl@0
    59
 * <p>For iteration with either function, there are two ways to
sl@0
    60
 * check for the end of the iteration. When there are no more
sl@0
    61
 * characters in the text object:
sl@0
    62
 * <ul>
sl@0
    63
 * <li>The hasNext() function returns FALSE.</li>
sl@0
    64
 * <li>nextPostInc() and next32PostInc() return DONE
sl@0
    65
 *     when one attempts to read beyond the end of the text object.</li>
sl@0
    66
 * </ul>
sl@0
    67
 *
sl@0
    68
 * Example:
sl@0
    69
 * \code 
sl@0
    70
 * void function1(ForwardCharacterIterator &it) {
sl@0
    71
 *     UChar32 c;
sl@0
    72
 *     while(it.hasNext()) {
sl@0
    73
 *         c=it.next32PostInc();
sl@0
    74
 *         // use c
sl@0
    75
 *     }
sl@0
    76
 * }
sl@0
    77
 *
sl@0
    78
 * void function1(ForwardCharacterIterator &it) {
sl@0
    79
 *     UChar c;
sl@0
    80
 *     while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
sl@0
    81
 *         // use c
sl@0
    82
 *      }
sl@0
    83
 *  }
sl@0
    84
 * \endcode
sl@0
    85
 * </p>
sl@0
    86
 *
sl@0
    87
 * @stable ICU 2.0
sl@0
    88
 */
sl@0
    89
class U_COMMON_API ForwardCharacterIterator : public UObject {
sl@0
    90
public:
sl@0
    91
    /**
sl@0
    92
     * Value returned by most of ForwardCharacterIterator's functions
sl@0
    93
     * when the iterator has reached the limits of its iteration.
sl@0
    94
     * @stable ICU 2.0
sl@0
    95
     */
sl@0
    96
    enum { DONE = 0xffff };
sl@0
    97
    
sl@0
    98
    /**
sl@0
    99
     * Destructor.  
sl@0
   100
     * @stable ICU 2.0
sl@0
   101
     */
sl@0
   102
    virtual ~ForwardCharacterIterator();
sl@0
   103
    
sl@0
   104
    /**
sl@0
   105
     * Returns true when both iterators refer to the same
sl@0
   106
     * character in the same character-storage object.  
sl@0
   107
     * @param that The ForwardCharacterIterator to be compared for equality
sl@0
   108
     * @return true when both iterators refer to the same
sl@0
   109
     * character in the same character-storage object
sl@0
   110
     * @stable ICU 2.0
sl@0
   111
     */
sl@0
   112
    virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
sl@0
   113
    
sl@0
   114
    /**
sl@0
   115
     * Returns true when the iterators refer to different
sl@0
   116
     * text-storage objects, or to different characters in the
sl@0
   117
     * same text-storage object.  
sl@0
   118
     * @param that The ForwardCharacterIterator to be compared for inequality
sl@0
   119
     * @return true when the iterators refer to different
sl@0
   120
     * text-storage objects, or to different characters in the
sl@0
   121
     * same text-storage object
sl@0
   122
     * @stable ICU 2.0
sl@0
   123
     */
sl@0
   124
    inline UBool operator!=(const ForwardCharacterIterator& that) const;
sl@0
   125
    
sl@0
   126
    /**
sl@0
   127
     * Generates a hash code for this iterator.  
sl@0
   128
     * @return the hash code.
sl@0
   129
     * @stable ICU 2.0
sl@0
   130
     */
sl@0
   131
    virtual int32_t hashCode(void) const = 0;
sl@0
   132
    
sl@0
   133
    /**
sl@0
   134
     * Returns a UClassID for this ForwardCharacterIterator ("poor man's
sl@0
   135
     * RTTI").<P> Despite the fact that this function is public,
sl@0
   136
     * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API! 
sl@0
   137
     * @return a UClassID for this ForwardCharacterIterator 
sl@0
   138
     * @stable ICU 2.0
sl@0
   139
     */
sl@0
   140
    virtual UClassID getDynamicClassID(void) const = 0;
sl@0
   141
    
sl@0
   142
    /**
sl@0
   143
     * Gets the current code unit for returning and advances to the next code unit
sl@0
   144
     * in the iteration range
sl@0
   145
     * (toward endIndex()).  If there are
sl@0
   146
     * no more code units to return, returns DONE.
sl@0
   147
     * @return the current code unit.
sl@0
   148
     * @stable ICU 2.0
sl@0
   149
     */
sl@0
   150
    virtual UChar         nextPostInc(void) = 0;
sl@0
   151
    
sl@0
   152
    /**
sl@0
   153
     * Gets the current code point for returning and advances to the next code point
sl@0
   154
     * in the iteration range
sl@0
   155
     * (toward endIndex()).  If there are
sl@0
   156
     * no more code points to return, returns DONE.
sl@0
   157
     * @return the current code point.
sl@0
   158
     * @stable ICU 2.0
sl@0
   159
     */
sl@0
   160
    virtual UChar32       next32PostInc(void) = 0;
sl@0
   161
    
sl@0
   162
    /**
sl@0
   163
     * Returns FALSE if there are no more code units or code points
sl@0
   164
     * at or after the current position in the iteration range.
sl@0
   165
     * This is used with nextPostInc() or next32PostInc() in forward
sl@0
   166
     * iteration.
sl@0
   167
     * @returns FALSE if there are no more code units or code points
sl@0
   168
     * at or after the current position in the iteration range.
sl@0
   169
     * @stable ICU 2.0
sl@0
   170
     */
sl@0
   171
    virtual UBool        hasNext() = 0;
sl@0
   172
    
sl@0
   173
protected:
sl@0
   174
    /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
sl@0
   175
    ForwardCharacterIterator();
sl@0
   176
    
sl@0
   177
    /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
sl@0
   178
    ForwardCharacterIterator(const ForwardCharacterIterator &other);
sl@0
   179
    
sl@0
   180
    /**
sl@0
   181
     * Assignment operator to be overridden in the implementing class.
sl@0
   182
     * @stable ICU 2.0
sl@0
   183
     */
sl@0
   184
    ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
sl@0
   185
};
sl@0
   186
sl@0
   187
/**
sl@0
   188
 * Abstract class that defines an API for iteration
sl@0
   189
 * on text objects.
sl@0
   190
 * This is an interface for forward and backward iteration
sl@0
   191
 * and random access into a text object.
sl@0
   192
 *
sl@0
   193
 * <p>The API provides backward compatibility to the Java and older ICU
sl@0
   194
 * CharacterIterator classes but extends them significantly:
sl@0
   195
 * <ol>
sl@0
   196
 * <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
sl@0
   197
 * <li>While the old API functions provided forward iteration with
sl@0
   198
 *     "pre-increment" semantics, the new one also provides functions
sl@0
   199
 *     with "post-increment" semantics. They are more efficient and should
sl@0
   200
 *     be the preferred iterator functions for new implementations.
sl@0
   201
 *     The backward iteration always had "pre-decrement" semantics, which
sl@0
   202
 *     are efficient.</li>
sl@0
   203
 * <li>Just like ForwardCharacterIterator, it provides access to
sl@0
   204
 *     both code units and code points. Code point access versions are available
sl@0
   205
 *     for the old and the new iteration semantics.</li>
sl@0
   206
 * <li>There are new functions for setting and moving the current position
sl@0
   207
 *     without returning a character, for efficiency.</li>
sl@0
   208
 * </ol>
sl@0
   209
 *
sl@0
   210
 * See ForwardCharacterIterator for examples for using the new forward iteration
sl@0
   211
 * functions. For backward iteration, there is also a hasPrevious() function
sl@0
   212
 * that can be used analogously to hasNext().
sl@0
   213
 * The old functions work as before and are shown below.</p>
sl@0
   214
 *
sl@0
   215
 * <p>Examples for some of the new functions:</p>
sl@0
   216
 *
sl@0
   217
 * Forward iteration with hasNext():
sl@0
   218
 * \code
sl@0
   219
 * void forward1(CharacterIterator &it) {
sl@0
   220
 *     UChar32 c;
sl@0
   221
 *     for(it.setToStart(); it.hasNext();) {
sl@0
   222
 *         c=it.next32PostInc();
sl@0
   223
 *         // use c
sl@0
   224
 *     }
sl@0
   225
 *  }
sl@0
   226
 * \endcode
sl@0
   227
 * Forward iteration more similar to loops with the old forward iteration,
sl@0
   228
 * showing a way to convert simple for() loops:
sl@0
   229
 * \code
sl@0
   230
 * void forward2(CharacterIterator &it) {
sl@0
   231
 *     UChar c;
sl@0
   232
 *     for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
sl@0
   233
 *          // use c
sl@0
   234
 *      }
sl@0
   235
 * }
sl@0
   236
 * \endcode
sl@0
   237
 * Backward iteration with setToEnd() and hasPrevious():
sl@0
   238
 * \code
sl@0
   239
 *  void backward1(CharacterIterator &it) {
sl@0
   240
 *      UChar32 c;
sl@0
   241
 *      for(it.setToEnd(); it.hasPrevious();) {
sl@0
   242
 *         c=it.previous32();
sl@0
   243
 *          // use c
sl@0
   244
 *      }
sl@0
   245
 *  }
sl@0
   246
 * \endcode
sl@0
   247
 * Backward iteration with a more traditional for() loop:
sl@0
   248
 * \code
sl@0
   249
 * void backward2(CharacterIterator &it) {
sl@0
   250
 *     UChar c;
sl@0
   251
 *     for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
sl@0
   252
 *         // use c
sl@0
   253
 *      }
sl@0
   254
 *  }
sl@0
   255
 * \endcode
sl@0
   256
 *
sl@0
   257
 * Example for random access:
sl@0
   258
 * \code
sl@0
   259
 *  void random(CharacterIterator &it) {
sl@0
   260
 *      // set to the third code point from the beginning
sl@0
   261
 *      it.move32(3, CharacterIterator::kStart);
sl@0
   262
 *      // get a code point from here without moving the position
sl@0
   263
 *      UChar32 c=it.current32();
sl@0
   264
 *      // get the position
sl@0
   265
 *      int32_t pos=it.getIndex();
sl@0
   266
 *      // get the previous code unit
sl@0
   267
 *      UChar u=it.previous();
sl@0
   268
 *      // move back one more code unit
sl@0
   269
 *      it.move(-1, CharacterIterator::kCurrent);
sl@0
   270
 *      // set the position back to where it was
sl@0
   271
 *      // and read the same code point c and move beyond it
sl@0
   272
 *      it.setIndex(pos);
sl@0
   273
 *      if(c!=it.next32PostInc()) {
sl@0
   274
 *          exit(1); // CharacterIterator inconsistent
sl@0
   275
 *      }
sl@0
   276
 *  }
sl@0
   277
 * \endcode
sl@0
   278
 *
sl@0
   279
 * <p>Examples, especially for the old API:</p>
sl@0
   280
 *
sl@0
   281
 * Function processing characters, in this example simple output
sl@0
   282
 * <pre>
sl@0
   283
 * \code
sl@0
   284
 *  void processChar( UChar c )
sl@0
   285
 *  {
sl@0
   286
 *      cout << " " << c;
sl@0
   287
 *  }
sl@0
   288
 * \endcode
sl@0
   289
 * </pre>
sl@0
   290
 * Traverse the text from start to finish
sl@0
   291
 * <pre> 
sl@0
   292
 * \code
sl@0
   293
 *  void traverseForward(CharacterIterator& iter)
sl@0
   294
 *  {
sl@0
   295
 *      for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
sl@0
   296
 *          processChar(c);
sl@0
   297
 *      }
sl@0
   298
 *  }
sl@0
   299
 * \endcode
sl@0
   300
 * </pre>
sl@0
   301
 * Traverse the text backwards, from end to start
sl@0
   302
 * <pre>
sl@0
   303
 * \code
sl@0
   304
 *  void traverseBackward(CharacterIterator& iter)
sl@0
   305
 *  {
sl@0
   306
 *      for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
sl@0
   307
 *          processChar(c);
sl@0
   308
 *      }
sl@0
   309
 *  }
sl@0
   310
 * \endcode
sl@0
   311
 * </pre>
sl@0
   312
 * Traverse both forward and backward from a given position in the text. 
sl@0
   313
 * Calls to notBoundary() in this example represents some additional stopping criteria.
sl@0
   314
 * <pre>
sl@0
   315
 * \code
sl@0
   316
 * void traverseOut(CharacterIterator& iter, int32_t pos)
sl@0
   317
 * {
sl@0
   318
 *      UChar c;
sl@0
   319
 *      for (c = iter.setIndex(pos);
sl@0
   320
 *      c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
sl@0
   321
 *          c = iter.next()) {}
sl@0
   322
 *      int32_t end = iter.getIndex();
sl@0
   323
 *      for (c = iter.setIndex(pos);
sl@0
   324
 *          c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
sl@0
   325
 *          c = iter.previous()) {}
sl@0
   326
 *      int32_t start = iter.getIndex() + 1;
sl@0
   327
 *  
sl@0
   328
 *      cout << "start: " << start << " end: " << end << endl;
sl@0
   329
 *      for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
sl@0
   330
 *          processChar(c);
sl@0
   331
 *     }
sl@0
   332
 *  }
sl@0
   333
 * \endcode
sl@0
   334
 * </pre>
sl@0
   335
 * Creating a StringCharacterIterator and calling the test functions
sl@0
   336
 * <pre>
sl@0
   337
 * \code
sl@0
   338
 *  void CharacterIterator_Example( void )
sl@0
   339
 *   {
sl@0
   340
 *       cout << endl << "===== CharacterIterator_Example: =====" << endl;
sl@0
   341
 *       UnicodeString text("Ein kleiner Satz.");
sl@0
   342
 *       StringCharacterIterator iterator(text);
sl@0
   343
 *       cout << "----- traverseForward: -----------" << endl;
sl@0
   344
 *       traverseForward( iterator );
sl@0
   345
 *       cout << endl << endl << "----- traverseBackward: ----------" << endl;
sl@0
   346
 *       traverseBackward( iterator );
sl@0
   347
 *       cout << endl << endl << "----- traverseOut: ---------------" << endl;
sl@0
   348
 *       traverseOut( iterator, 7 );
sl@0
   349
 *       cout << endl << endl << "-----" << endl;
sl@0
   350
 *   }
sl@0
   351
 * \endcode
sl@0
   352
 * </pre>
sl@0
   353
 *
sl@0
   354
 * @stable ICU 2.0
sl@0
   355
 */
sl@0
   356
class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
sl@0
   357
public:
sl@0
   358
    /**
sl@0
   359
     * Origin enumeration for the move() and move32() functions.
sl@0
   360
     * @stable ICU 2.0
sl@0
   361
     */
sl@0
   362
    enum EOrigin { kStart, kCurrent, kEnd };
sl@0
   363
sl@0
   364
    /**
sl@0
   365
     * Returns a pointer to a new CharacterIterator of the same
sl@0
   366
     * concrete class as this one, and referring to the same
sl@0
   367
     * character in the same text-storage object as this one.  The
sl@0
   368
     * caller is responsible for deleting the new clone.  
sl@0
   369
     * @return a pointer to a new CharacterIterator
sl@0
   370
     * @stable ICU 2.0
sl@0
   371
     */
sl@0
   372
    virtual CharacterIterator* clone(void) const = 0;
sl@0
   373
sl@0
   374
    /**
sl@0
   375
     * Sets the iterator to refer to the first code unit in its
sl@0
   376
     * iteration range, and returns that code unit.
sl@0
   377
     * This can be used to begin an iteration with next().
sl@0
   378
     * @return the first code unit in its iteration range.
sl@0
   379
     * @stable ICU 2.0
sl@0
   380
     */
sl@0
   381
    virtual UChar         first(void) = 0;
sl@0
   382
sl@0
   383
    /**
sl@0
   384
     * Sets the iterator to refer to the first code unit in its
sl@0
   385
     * iteration range, returns that code unit, and moves the position
sl@0
   386
     * to the second code unit. This is an alternative to setToStart()
sl@0
   387
     * for forward iteration with nextPostInc().
sl@0
   388
     * @return the first code unit in its iteration range.
sl@0
   389
     * @stable ICU 2.0
sl@0
   390
     */
sl@0
   391
    virtual UChar         firstPostInc(void);
sl@0
   392
sl@0
   393
    /**
sl@0
   394
     * Sets the iterator to refer to the first code point in its
sl@0
   395
     * iteration range, and returns that code unit,
sl@0
   396
     * This can be used to begin an iteration with next32().
sl@0
   397
     * Note that an iteration with next32PostInc(), beginning with,
sl@0
   398
     * e.g., setToStart() or firstPostInc(), is more efficient.
sl@0
   399
     * @return the first code point in its iteration range.
sl@0
   400
     * @stable ICU 2.0
sl@0
   401
     */
sl@0
   402
    virtual UChar32       first32(void) = 0;
sl@0
   403
sl@0
   404
    /**
sl@0
   405
     * Sets the iterator to refer to the first code point in its
sl@0
   406
     * iteration range, returns that code point, and moves the position
sl@0
   407
     * to the second code point. This is an alternative to setToStart()
sl@0
   408
     * for forward iteration with next32PostInc().
sl@0
   409
     * @return the first code point in its iteration range.
sl@0
   410
     * @stable ICU 2.0
sl@0
   411
     */
sl@0
   412
    virtual UChar32       first32PostInc(void);
sl@0
   413
sl@0
   414
    /**
sl@0
   415
     * Sets the iterator to refer to the first code unit or code point in its
sl@0
   416
     * iteration range. This can be used to begin a forward
sl@0
   417
     * iteration with nextPostInc() or next32PostInc().
sl@0
   418
     * @return the start position of the iteration range
sl@0
   419
     * @stable ICU 2.0
sl@0
   420
     */
sl@0
   421
    inline int32_t    setToStart();
sl@0
   422
sl@0
   423
    /**
sl@0
   424
     * Sets the iterator to refer to the last code unit in its
sl@0
   425
     * iteration range, and returns that code unit.
sl@0
   426
     * This can be used to begin an iteration with previous().
sl@0
   427
     * @return the last code unit.
sl@0
   428
     * @stable ICU 2.0
sl@0
   429
     */
sl@0
   430
    virtual UChar         last(void) = 0;
sl@0
   431
        
sl@0
   432
    /**
sl@0
   433
     * Sets the iterator to refer to the last code point in its
sl@0
   434
     * iteration range, and returns that code unit.
sl@0
   435
     * This can be used to begin an iteration with previous32().
sl@0
   436
     * @return the last code point.
sl@0
   437
     * @stable ICU 2.0
sl@0
   438
     */
sl@0
   439
    virtual UChar32       last32(void) = 0;
sl@0
   440
sl@0
   441
    /**
sl@0
   442
     * Sets the iterator to the end of its iteration range, just behind
sl@0
   443
     * the last code unit or code point. This can be used to begin a backward
sl@0
   444
     * iteration with previous() or previous32().
sl@0
   445
     * @return the end position of the iteration range
sl@0
   446
     * @stable ICU 2.0
sl@0
   447
     */
sl@0
   448
    inline int32_t    setToEnd();
sl@0
   449
sl@0
   450
    /**
sl@0
   451
     * Sets the iterator to refer to the "position"-th code unit
sl@0
   452
     * in the text-storage object the iterator refers to, and
sl@0
   453
     * returns that code unit.  
sl@0
   454
     * @param position the "position"-th code unit in the text-storage object
sl@0
   455
     * @return the "position"-th code unit.
sl@0
   456
     * @stable ICU 2.0
sl@0
   457
     */
sl@0
   458
    virtual UChar         setIndex(int32_t position) = 0;
sl@0
   459
sl@0
   460
    /**
sl@0
   461
     * Sets the iterator to refer to the beginning of the code point
sl@0
   462
     * that contains the "position"-th code unit
sl@0
   463
     * in the text-storage object the iterator refers to, and
sl@0
   464
     * returns that code point.
sl@0
   465
     * The current position is adjusted to the beginning of the code point
sl@0
   466
     * (its first code unit).
sl@0
   467
     * @param position the "position"-th code unit in the text-storage object
sl@0
   468
     * @return the "position"-th code point.
sl@0
   469
     * @stable ICU 2.0
sl@0
   470
     */
sl@0
   471
    virtual UChar32       setIndex32(int32_t position) = 0;
sl@0
   472
sl@0
   473
    /**
sl@0
   474
     * Returns the code unit the iterator currently refers to. 
sl@0
   475
     * @return the current code unit. 
sl@0
   476
     * @stable ICU 2.0
sl@0
   477
     */
sl@0
   478
    virtual UChar         current(void) const = 0;
sl@0
   479
        
sl@0
   480
    /**
sl@0
   481
     * Returns the code point the iterator currently refers to.  
sl@0
   482
     * @return the current code point.
sl@0
   483
     * @stable ICU 2.0
sl@0
   484
     */
sl@0
   485
    virtual UChar32       current32(void) const = 0;
sl@0
   486
        
sl@0
   487
    /**
sl@0
   488
     * Advances to the next code unit in the iteration range
sl@0
   489
     * (toward endIndex()), and returns that code unit.  If there are
sl@0
   490
     * no more code units to return, returns DONE.
sl@0
   491
     * @return the next code unit.
sl@0
   492
     * @stable ICU 2.0
sl@0
   493
     */
sl@0
   494
    virtual UChar         next(void) = 0;
sl@0
   495
        
sl@0
   496
    /**
sl@0
   497
     * Advances to the next code point in the iteration range
sl@0
   498
     * (toward endIndex()), and returns that code point.  If there are
sl@0
   499
     * no more code points to return, returns DONE.
sl@0
   500
     * Note that iteration with "pre-increment" semantics is less
sl@0
   501
     * efficient than iteration with "post-increment" semantics
sl@0
   502
     * that is provided by next32PostInc().
sl@0
   503
     * @return the next code point.
sl@0
   504
     * @stable ICU 2.0
sl@0
   505
     */
sl@0
   506
    virtual UChar32       next32(void) = 0;
sl@0
   507
        
sl@0
   508
    /**
sl@0
   509
     * Advances to the previous code unit in the iteration range
sl@0
   510
     * (toward startIndex()), and returns that code unit.  If there are
sl@0
   511
     * no more code units to return, returns DONE.  
sl@0
   512
     * @return the previous code unit.
sl@0
   513
     * @stable ICU 2.0
sl@0
   514
     */
sl@0
   515
    virtual UChar         previous(void) = 0;
sl@0
   516
sl@0
   517
    /**
sl@0
   518
     * Advances to the previous code point in the iteration range
sl@0
   519
     * (toward startIndex()), and returns that code point.  If there are
sl@0
   520
     * no more code points to return, returns DONE. 
sl@0
   521
     * @return the previous code point. 
sl@0
   522
     * @stable ICU 2.0
sl@0
   523
     */
sl@0
   524
    virtual UChar32       previous32(void) = 0;
sl@0
   525
sl@0
   526
    /**
sl@0
   527
     * Returns FALSE if there are no more code units or code points
sl@0
   528
     * before the current position in the iteration range.
sl@0
   529
     * This is used with previous() or previous32() in backward
sl@0
   530
     * iteration.
sl@0
   531
     * @return FALSE if there are no more code units or code points
sl@0
   532
     * before the current position in the iteration range, return TRUE otherwise.
sl@0
   533
     * @stable ICU 2.0
sl@0
   534
     */
sl@0
   535
    virtual UBool        hasPrevious() = 0;
sl@0
   536
sl@0
   537
    /**
sl@0
   538
     * Returns the numeric index in the underlying text-storage
sl@0
   539
     * object of the character returned by first().  Since it's
sl@0
   540
     * possible to create an iterator that iterates across only
sl@0
   541
     * part of a text-storage object, this number isn't
sl@0
   542
     * necessarily 0.  
sl@0
   543
     * @returns the numeric index in the underlying text-storage
sl@0
   544
     * object of the character returned by first().
sl@0
   545
     * @stable ICU 2.0
sl@0
   546
     */
sl@0
   547
    inline int32_t       startIndex(void) const;
sl@0
   548
        
sl@0
   549
    /**
sl@0
   550
     * Returns the numeric index in the underlying text-storage
sl@0
   551
     * object of the position immediately BEYOND the character
sl@0
   552
     * returned by last().  
sl@0
   553
     * @return the numeric index in the underlying text-storage
sl@0
   554
     * object of the position immediately BEYOND the character
sl@0
   555
     * returned by last().
sl@0
   556
     * @stable ICU 2.0
sl@0
   557
     */
sl@0
   558
    inline int32_t       endIndex(void) const;
sl@0
   559
        
sl@0
   560
    /**
sl@0
   561
     * Returns the numeric index in the underlying text-storage
sl@0
   562
     * object of the character the iterator currently refers to
sl@0
   563
     * (i.e., the character returned by current()).  
sl@0
   564
     * @return the numberic index in the text-storage object of 
sl@0
   565
     * the character the iterator currently refers to
sl@0
   566
     * @stable ICU 2.0
sl@0
   567
     */
sl@0
   568
    inline int32_t       getIndex(void) const;
sl@0
   569
sl@0
   570
    /**
sl@0
   571
     * Returns the length of the entire text in the underlying
sl@0
   572
     * text-storage object.
sl@0
   573
     * @return the length of the entire text in the text-storage object
sl@0
   574
     * @stable ICU 2.0
sl@0
   575
     */
sl@0
   576
    inline int32_t           getLength() const;
sl@0
   577
sl@0
   578
    /**
sl@0
   579
     * Moves the current position relative to the start or end of the
sl@0
   580
     * iteration range, or relative to the current position itself.
sl@0
   581
     * The movement is expressed in numbers of code units forward
sl@0
   582
     * or backward by specifying a positive or negative delta.
sl@0
   583
     * @param delta the position relative to origin. A positive delta means forward;
sl@0
   584
     * a negative delta means backward.
sl@0
   585
     * @param origin Origin enumeration {kStart, kCurrent, kEnd}
sl@0
   586
     * @return the new position
sl@0
   587
     * @stable ICU 2.0
sl@0
   588
     */
sl@0
   589
    virtual int32_t      move(int32_t delta, EOrigin origin) = 0;
sl@0
   590
sl@0
   591
    /**
sl@0
   592
     * Moves the current position relative to the start or end of the
sl@0
   593
     * iteration range, or relative to the current position itself.
sl@0
   594
     * The movement is expressed in numbers of code points forward
sl@0
   595
     * or backward by specifying a positive or negative delta.
sl@0
   596
     * @param delta the position relative to origin. A positive delta means forward;
sl@0
   597
     * a negative delta means backward.
sl@0
   598
     * @param origin Origin enumeration {kStart, kCurrent, kEnd}
sl@0
   599
     * @return the new position
sl@0
   600
     * @stable ICU 2.0
sl@0
   601
     */
sl@0
   602
    virtual int32_t      move32(int32_t delta, EOrigin origin) = 0;
sl@0
   603
sl@0
   604
    /**
sl@0
   605
     * Copies the text under iteration into the UnicodeString
sl@0
   606
     * referred to by "result".  
sl@0
   607
     * @param result Receives a copy of the text under iteration.  
sl@0
   608
     * @stable ICU 2.0
sl@0
   609
     */
sl@0
   610
    virtual void            getText(UnicodeString&  result) = 0;
sl@0
   611
sl@0
   612
protected:
sl@0
   613
    /**
sl@0
   614
     * Empty constructor.
sl@0
   615
     * @stable ICU 2.0
sl@0
   616
     */
sl@0
   617
    CharacterIterator();
sl@0
   618
sl@0
   619
    /**
sl@0
   620
     * Constructor, just setting the length field in this base class.
sl@0
   621
     * @stable ICU 2.0
sl@0
   622
     */
sl@0
   623
    CharacterIterator(int32_t length);
sl@0
   624
sl@0
   625
    /**
sl@0
   626
     * Constructor, just setting the length and position fields in this base class.
sl@0
   627
     * @stable ICU 2.0
sl@0
   628
     */
sl@0
   629
    CharacterIterator(int32_t length, int32_t position);
sl@0
   630
sl@0
   631
    /**
sl@0
   632
     * Constructor, just setting the length, start, end, and position fields in this base class.
sl@0
   633
     * @stable ICU 2.0
sl@0
   634
     */
sl@0
   635
    CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
sl@0
   636
  
sl@0
   637
    /**
sl@0
   638
     * Copy constructor.
sl@0
   639
     *
sl@0
   640
     * @param that The CharacterIterator to be copied
sl@0
   641
     * @stable ICU 2.0
sl@0
   642
     */
sl@0
   643
    CharacterIterator(const CharacterIterator &that);
sl@0
   644
sl@0
   645
    /**
sl@0
   646
     * Assignment operator.  Sets this CharacterIterator to have the same behavior,
sl@0
   647
     * as the one passed in.
sl@0
   648
     * @param that The CharacterIterator passed in.
sl@0
   649
     * @return the newly set CharacterIterator.
sl@0
   650
     * @stable ICU 2.0
sl@0
   651
     */
sl@0
   652
    CharacterIterator &operator=(const CharacterIterator &that);
sl@0
   653
sl@0
   654
    /**
sl@0
   655
     * Base class text length field.
sl@0
   656
     * Necessary this for correct getText() and hashCode().
sl@0
   657
     * @stable ICU 2.0
sl@0
   658
     */
sl@0
   659
    int32_t textLength;
sl@0
   660
sl@0
   661
    /**
sl@0
   662
     * Base class field for the current position.
sl@0
   663
     * @stable ICU 2.0
sl@0
   664
     */
sl@0
   665
    int32_t  pos;
sl@0
   666
sl@0
   667
    /**
sl@0
   668
     * Base class field for the start of the iteration range.
sl@0
   669
     * @stable ICU 2.0
sl@0
   670
     */
sl@0
   671
    int32_t  begin;
sl@0
   672
sl@0
   673
    /**
sl@0
   674
     * Base class field for the end of the iteration range.
sl@0
   675
     * @stable ICU 2.0
sl@0
   676
     */
sl@0
   677
    int32_t  end;
sl@0
   678
};
sl@0
   679
sl@0
   680
inline UBool
sl@0
   681
ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
sl@0
   682
    return !operator==(that);
sl@0
   683
}
sl@0
   684
sl@0
   685
inline int32_t
sl@0
   686
CharacterIterator::setToStart() {
sl@0
   687
    return move(0, kStart);
sl@0
   688
}
sl@0
   689
sl@0
   690
inline int32_t
sl@0
   691
CharacterIterator::setToEnd() {
sl@0
   692
    return move(0, kEnd);
sl@0
   693
}
sl@0
   694
sl@0
   695
inline int32_t
sl@0
   696
CharacterIterator::startIndex(void) const {
sl@0
   697
    return begin;
sl@0
   698
}
sl@0
   699
sl@0
   700
inline int32_t
sl@0
   701
CharacterIterator::endIndex(void) const {
sl@0
   702
    return end;
sl@0
   703
}
sl@0
   704
sl@0
   705
inline int32_t
sl@0
   706
CharacterIterator::getIndex(void) const {
sl@0
   707
    return pos;
sl@0
   708
}
sl@0
   709
sl@0
   710
inline int32_t
sl@0
   711
CharacterIterator::getLength(void) const {
sl@0
   712
    return textLength;
sl@0
   713
}
sl@0
   714
sl@0
   715
U_NAMESPACE_END
sl@0
   716
#endif