os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/uiter.h
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200 (2012-06-15)
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
/*
sl@0
     2
*******************************************************************************
sl@0
     3
*
sl@0
     4
*   Copyright (C) 2002-2005, International Business Machines
sl@0
     5
*   Corporation and others.  All Rights Reserved.
sl@0
     6
*
sl@0
     7
*******************************************************************************
sl@0
     8
*   file name:  uiter.h
sl@0
     9
*   encoding:   US-ASCII
sl@0
    10
*   tab size:   8 (not used)
sl@0
    11
*   indentation:4
sl@0
    12
*
sl@0
    13
*   created on: 2002jan18
sl@0
    14
*   created by: Markus W. Scherer
sl@0
    15
*/
sl@0
    16
sl@0
    17
#ifndef __UITER_H__
sl@0
    18
#define __UITER_H__
sl@0
    19
sl@0
    20
/**
sl@0
    21
 * \file
sl@0
    22
 * \brief C API: Unicode Character Iteration
sl@0
    23
 *
sl@0
    24
 * @see UCharIterator
sl@0
    25
 */
sl@0
    26
sl@0
    27
#include "unicode/utypes.h"
sl@0
    28
sl@0
    29
#ifdef XP_CPLUSPLUS
sl@0
    30
    U_NAMESPACE_BEGIN
sl@0
    31
sl@0
    32
    class CharacterIterator;
sl@0
    33
    class Replaceable;
sl@0
    34
sl@0
    35
    U_NAMESPACE_END
sl@0
    36
#endif
sl@0
    37
sl@0
    38
U_CDECL_BEGIN
sl@0
    39
sl@0
    40
struct UCharIterator;
sl@0
    41
typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
sl@0
    42
sl@0
    43
/**
sl@0
    44
 * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
sl@0
    45
 * @see UCharIteratorMove
sl@0
    46
 * @see UCharIterator
sl@0
    47
 * @stable ICU 2.1
sl@0
    48
 */
sl@0
    49
typedef enum UCharIteratorOrigin {
sl@0
    50
    UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
sl@0
    51
} UCharIteratorOrigin;
sl@0
    52
sl@0
    53
/** Constants for UCharIterator. @stable ICU 2.6 */
sl@0
    54
enum {
sl@0
    55
    /**
sl@0
    56
     * Constant value that may be returned by UCharIteratorMove
sl@0
    57
     * indicating that the final UTF-16 index is not known, but that the move succeeded.
sl@0
    58
     * This can occur when moving relative to limit or length, or
sl@0
    59
     * when moving relative to the current index after a setState()
sl@0
    60
     * when the current UTF-16 index is not known.
sl@0
    61
     *
sl@0
    62
     * It would be very inefficient to have to count from the beginning of the text
sl@0
    63
     * just to get the current/limit/length index after moving relative to it.
sl@0
    64
     * The actual index can be determined with getIndex(UITER_CURRENT)
sl@0
    65
     * which will count the UChars if necessary.
sl@0
    66
     *
sl@0
    67
     * @stable ICU 2.6
sl@0
    68
     */
sl@0
    69
    UITER_UNKNOWN_INDEX=-2
sl@0
    70
};
sl@0
    71
sl@0
    72
sl@0
    73
/**
sl@0
    74
 * Constant for UCharIterator getState() indicating an error or
sl@0
    75
 * an unknown state.
sl@0
    76
 * Returned by uiter_getState()/UCharIteratorGetState
sl@0
    77
 * when an error occurs.
sl@0
    78
 * Also, some UCharIterator implementations may not be able to return
sl@0
    79
 * a valid state for each position. This will be clearly documented
sl@0
    80
 * for each such iterator (none of the public ones here).
sl@0
    81
 *
sl@0
    82
 * @stable ICU 2.6
sl@0
    83
 */
sl@0
    84
#define UITER_NO_STATE ((uint32_t)0xffffffff)
sl@0
    85
sl@0
    86
/**
sl@0
    87
 * Function type declaration for UCharIterator.getIndex().
sl@0
    88
 *
sl@0
    89
 * Gets the current position, or the start or limit of the
sl@0
    90
 * iteration range.
sl@0
    91
 *
sl@0
    92
 * This function may perform slowly for UITER_CURRENT after setState() was called,
sl@0
    93
 * or for UITER_LENGTH, because an iterator implementation may have to count
sl@0
    94
 * UChars if the underlying storage is not UTF-16.
sl@0
    95
 *
sl@0
    96
 * @param iter the UCharIterator structure ("this pointer")
sl@0
    97
 * @param origin get the 0, start, limit, length, or current index
sl@0
    98
 * @return the requested index, or U_SENTINEL in an error condition
sl@0
    99
 *
sl@0
   100
 * @see UCharIteratorOrigin
sl@0
   101
 * @see UCharIterator
sl@0
   102
 * @stable ICU 2.1
sl@0
   103
 */
sl@0
   104
typedef int32_t U_CALLCONV
sl@0
   105
UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
sl@0
   106
sl@0
   107
/**
sl@0
   108
 * Function type declaration for UCharIterator.move().
sl@0
   109
 *
sl@0
   110
 * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
sl@0
   111
 *
sl@0
   112
 * Moves the current position relative to the start or limit of the
sl@0
   113
 * iteration range, or relative to the current position itself.
sl@0
   114
 * The movement is expressed in numbers of code units forward
sl@0
   115
 * or backward by specifying a positive or negative delta.
sl@0
   116
 * Out of bounds movement will be pinned to the start or limit.
sl@0
   117
 *
sl@0
   118
 * This function may perform slowly for moving relative to UITER_LENGTH
sl@0
   119
 * because an iterator implementation may have to count the rest of the
sl@0
   120
 * UChars if the native storage is not UTF-16.
sl@0
   121
 *
sl@0
   122
 * When moving relative to the limit or length, or
sl@0
   123
 * relative to the current position after setState() was called,
sl@0
   124
 * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
sl@0
   125
 * determination of the actual UTF-16 index.
sl@0
   126
 * The actual index can be determined with getIndex(UITER_CURRENT)
sl@0
   127
 * which will count the UChars if necessary.
sl@0
   128
 * See UITER_UNKNOWN_INDEX for details.
sl@0
   129
 *
sl@0
   130
 * @param iter the UCharIterator structure ("this pointer")
sl@0
   131
 * @param delta can be positive, zero, or negative
sl@0
   132
 * @param origin move relative to the 0, start, limit, length, or current index
sl@0
   133
 * @return the new index, or U_SENTINEL on an error condition,
sl@0
   134
 *         or UITER_UNKNOWN_INDEX when the index is not known.
sl@0
   135
 *
sl@0
   136
 * @see UCharIteratorOrigin
sl@0
   137
 * @see UCharIterator
sl@0
   138
 * @see UITER_UNKNOWN_INDEX
sl@0
   139
 * @stable ICU 2.1
sl@0
   140
 */
sl@0
   141
typedef int32_t U_CALLCONV
sl@0
   142
UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
sl@0
   143
sl@0
   144
/**
sl@0
   145
 * Function type declaration for UCharIterator.hasNext().
sl@0
   146
 *
sl@0
   147
 * Check if current() and next() can still
sl@0
   148
 * return another code unit.
sl@0
   149
 *
sl@0
   150
 * @param iter the UCharIterator structure ("this pointer")
sl@0
   151
 * @return boolean value for whether current() and next() can still return another code unit
sl@0
   152
 *
sl@0
   153
 * @see UCharIterator
sl@0
   154
 * @stable ICU 2.1
sl@0
   155
 */
sl@0
   156
typedef UBool U_CALLCONV
sl@0
   157
UCharIteratorHasNext(UCharIterator *iter);
sl@0
   158
sl@0
   159
/**
sl@0
   160
 * Function type declaration for UCharIterator.hasPrevious().
sl@0
   161
 *
sl@0
   162
 * Check if previous() can still return another code unit.
sl@0
   163
 *
sl@0
   164
 * @param iter the UCharIterator structure ("this pointer")
sl@0
   165
 * @return boolean value for whether previous() can still return another code unit
sl@0
   166
 *
sl@0
   167
 * @see UCharIterator
sl@0
   168
 * @stable ICU 2.1
sl@0
   169
 */
sl@0
   170
typedef UBool U_CALLCONV
sl@0
   171
UCharIteratorHasPrevious(UCharIterator *iter);
sl@0
   172
 
sl@0
   173
/**
sl@0
   174
 * Function type declaration for UCharIterator.current().
sl@0
   175
 *
sl@0
   176
 * Return the code unit at the current position,
sl@0
   177
 * or U_SENTINEL if there is none (index is at the limit).
sl@0
   178
 *
sl@0
   179
 * @param iter the UCharIterator structure ("this pointer")
sl@0
   180
 * @return the current code unit
sl@0
   181
 *
sl@0
   182
 * @see UCharIterator
sl@0
   183
 * @stable ICU 2.1
sl@0
   184
 */
sl@0
   185
typedef UChar32 U_CALLCONV
sl@0
   186
UCharIteratorCurrent(UCharIterator *iter);
sl@0
   187
sl@0
   188
/**
sl@0
   189
 * Function type declaration for UCharIterator.next().
sl@0
   190
 *
sl@0
   191
 * Return the code unit at the current index and increment
sl@0
   192
 * the index (post-increment, like s[i++]),
sl@0
   193
 * or return U_SENTINEL if there is none (index is at the limit).
sl@0
   194
 *
sl@0
   195
 * @param iter the UCharIterator structure ("this pointer")
sl@0
   196
 * @return the current code unit (and post-increment the current index)
sl@0
   197
 *
sl@0
   198
 * @see UCharIterator
sl@0
   199
 * @stable ICU 2.1
sl@0
   200
 */
sl@0
   201
typedef UChar32 U_CALLCONV
sl@0
   202
UCharIteratorNext(UCharIterator *iter);
sl@0
   203
sl@0
   204
/**
sl@0
   205
 * Function type declaration for UCharIterator.previous().
sl@0
   206
 *
sl@0
   207
 * Decrement the index and return the code unit from there
sl@0
   208
 * (pre-decrement, like s[--i]),
sl@0
   209
 * or return U_SENTINEL if there is none (index is at the start).
sl@0
   210
 *
sl@0
   211
 * @param iter the UCharIterator structure ("this pointer")
sl@0
   212
 * @return the previous code unit (after pre-decrementing the current index)
sl@0
   213
 *
sl@0
   214
 * @see UCharIterator
sl@0
   215
 * @stable ICU 2.1
sl@0
   216
 */
sl@0
   217
typedef UChar32 U_CALLCONV
sl@0
   218
UCharIteratorPrevious(UCharIterator *iter);
sl@0
   219
sl@0
   220
/**
sl@0
   221
 * Function type declaration for UCharIterator.reservedFn().
sl@0
   222
 * Reserved for future use.
sl@0
   223
 *
sl@0
   224
 * @param iter the UCharIterator structure ("this pointer")
sl@0
   225
 * @param something some integer argument
sl@0
   226
 * @return some integer
sl@0
   227
 *
sl@0
   228
 * @see UCharIterator
sl@0
   229
 * @stable ICU 2.1
sl@0
   230
 */
sl@0
   231
typedef int32_t U_CALLCONV
sl@0
   232
UCharIteratorReserved(UCharIterator *iter, int32_t something);
sl@0
   233
sl@0
   234
/**
sl@0
   235
 * Function type declaration for UCharIterator.getState().
sl@0
   236
 *
sl@0
   237
 * Get the "state" of the iterator in the form of a single 32-bit word.
sl@0
   238
 * It is recommended that the state value be calculated to be as small as
sl@0
   239
 * is feasible. For strings with limited lengths, fewer than 32 bits may
sl@0
   240
 * be sufficient.
sl@0
   241
 *
sl@0
   242
 * This is used together with setState()/UCharIteratorSetState
sl@0
   243
 * to save and restore the iterator position more efficiently than with
sl@0
   244
 * getIndex()/move().
sl@0
   245
 *
sl@0
   246
 * The iterator state is defined as a uint32_t value because it is designed
sl@0
   247
 * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
sl@0
   248
 * of the character iterator.
sl@0
   249
 *
sl@0
   250
 * With some UCharIterator implementations (e.g., UTF-8),
sl@0
   251
 * getting and setting the UTF-16 index with existing functions
sl@0
   252
 * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
sl@0
   253
 * relatively slow because the iterator has to "walk" from a known index
sl@0
   254
 * to the requested one.
sl@0
   255
 * This takes more time the farther it needs to go.
sl@0
   256
 *
sl@0
   257
 * An opaque state value allows an iterator implementation to provide
sl@0
   258
 * an internal index (UTF-8: the source byte array index) for
sl@0
   259
 * fast, constant-time restoration.
sl@0
   260
 *
sl@0
   261
 * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
sl@0
   262
 * the UTF-16 index may not be restored as well, but the iterator can deliver
sl@0
   263
 * the correct text contents and move relative to the current position
sl@0
   264
 * without performance degradation.
sl@0
   265
 *
sl@0
   266
 * Some UCharIterator implementations may not be able to return
sl@0
   267
 * a valid state for each position, in which case they return UITER_NO_STATE instead.
sl@0
   268
 * This will be clearly documented for each such iterator (none of the public ones here).
sl@0
   269
 *
sl@0
   270
 * @param iter the UCharIterator structure ("this pointer")
sl@0
   271
 * @return the state word
sl@0
   272
 *
sl@0
   273
 * @see UCharIterator
sl@0
   274
 * @see UCharIteratorSetState
sl@0
   275
 * @see UITER_NO_STATE
sl@0
   276
 * @stable ICU 2.6
sl@0
   277
 */
sl@0
   278
typedef uint32_t U_CALLCONV
sl@0
   279
UCharIteratorGetState(const UCharIterator *iter);
sl@0
   280
sl@0
   281
/**
sl@0
   282
 * Function type declaration for UCharIterator.setState().
sl@0
   283
 *
sl@0
   284
 * Restore the "state" of the iterator using a state word from a getState() call.
sl@0
   285
 * The iterator object need not be the same one as for which getState() was called,
sl@0
   286
 * but it must be of the same type (set up using the same uiter_setXYZ function)
sl@0
   287
 * and it must iterate over the same string
sl@0
   288
 * (binary identical regardless of memory address).
sl@0
   289
 * For more about the state word see UCharIteratorGetState.
sl@0
   290
 *
sl@0
   291
 * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
sl@0
   292
 * the UTF-16 index may not be restored as well, but the iterator can deliver
sl@0
   293
 * the correct text contents and move relative to the current position
sl@0
   294
 * without performance degradation.
sl@0
   295
 *
sl@0
   296
 * @param iter the UCharIterator structure ("this pointer")
sl@0
   297
 * @param state the state word from a getState() call
sl@0
   298
 *              on a same-type, same-string iterator
sl@0
   299
 * @param pErrorCode Must be a valid pointer to an error code value,
sl@0
   300
 *                   which must not indicate a failure before the function call.
sl@0
   301
 *
sl@0
   302
 * @see UCharIterator
sl@0
   303
 * @see UCharIteratorGetState
sl@0
   304
 * @stable ICU 2.6
sl@0
   305
 */
sl@0
   306
typedef void U_CALLCONV
sl@0
   307
UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
sl@0
   308
sl@0
   309
sl@0
   310
/**
sl@0
   311
 * C API for code unit iteration.
sl@0
   312
 * This can be used as a C wrapper around
sl@0
   313
 * CharacterIterator, Replaceable, or implemented using simple strings, etc.
sl@0
   314
 *
sl@0
   315
 * There are two roles for using UCharIterator:
sl@0
   316
 *
sl@0
   317
 * A "provider" sets the necessary function pointers and controls the "protected"
sl@0
   318
 * fields of the UCharIterator structure. A "provider" passes a UCharIterator
sl@0
   319
 * into C APIs that need a UCharIterator as an abstract, flexible string interface.
sl@0
   320
 *
sl@0
   321
 * Implementations of such C APIs are "callers" of UCharIterator functions;
sl@0
   322
 * they only use the "public" function pointers and never access the "protected"
sl@0
   323
 * fields directly.
sl@0
   324
 *
sl@0
   325
 * The current() and next() functions only check the current index against the
sl@0
   326
 * limit, and previous() only checks the current index against the start,
sl@0
   327
 * to see if the iterator already reached the end of the iteration range.
sl@0
   328
 *
sl@0
   329
 * The assumption - in all iterators - is that the index is moved via the API,
sl@0
   330
 * which means it won't go out of bounds, or the index is modified by
sl@0
   331
 * user code that knows enough about the iterator implementation to set valid
sl@0
   332
 * index values.
sl@0
   333
 *
sl@0
   334
 * UCharIterator functions return code unit values 0..0xffff,
sl@0
   335
 * or U_SENTINEL if the iteration bounds are reached.
sl@0
   336
 *
sl@0
   337
 * @stable ICU 2.1
sl@0
   338
 */
sl@0
   339
struct UCharIterator {
sl@0
   340
    /**
sl@0
   341
     * (protected) Pointer to string or wrapped object or similar.
sl@0
   342
     * Not used by caller.
sl@0
   343
     * @stable ICU 2.1
sl@0
   344
     */
sl@0
   345
    const void *context;
sl@0
   346
sl@0
   347
    /**
sl@0
   348
     * (protected) Length of string or similar.
sl@0
   349
     * Not used by caller.
sl@0
   350
     * @stable ICU 2.1
sl@0
   351
     */
sl@0
   352
    int32_t length;
sl@0
   353
sl@0
   354
    /**
sl@0
   355
     * (protected) Start index or similar.
sl@0
   356
     * Not used by caller.
sl@0
   357
     * @stable ICU 2.1
sl@0
   358
     */
sl@0
   359
    int32_t start;
sl@0
   360
sl@0
   361
    /**
sl@0
   362
     * (protected) Current index or similar.
sl@0
   363
     * Not used by caller.
sl@0
   364
     * @stable ICU 2.1
sl@0
   365
     */
sl@0
   366
    int32_t index;
sl@0
   367
sl@0
   368
    /**
sl@0
   369
     * (protected) Limit index or similar.
sl@0
   370
     * Not used by caller.
sl@0
   371
     * @stable ICU 2.1
sl@0
   372
     */
sl@0
   373
    int32_t limit;
sl@0
   374
sl@0
   375
    /**
sl@0
   376
     * (protected) Used by UTF-8 iterators and possibly others.
sl@0
   377
     * @stable ICU 2.1
sl@0
   378
     */
sl@0
   379
    int32_t reservedField;
sl@0
   380
sl@0
   381
    /**
sl@0
   382
     * (public) Returns the current position or the
sl@0
   383
     * start or limit index of the iteration range.
sl@0
   384
     *
sl@0
   385
     * @see UCharIteratorGetIndex
sl@0
   386
     * @stable ICU 2.1
sl@0
   387
     */
sl@0
   388
    UCharIteratorGetIndex *getIndex;
sl@0
   389
sl@0
   390
    /**
sl@0
   391
     * (public) Moves the current position relative to the start or limit of the
sl@0
   392
     * iteration range, or relative to the current position itself.
sl@0
   393
     * The movement is expressed in numbers of code units forward
sl@0
   394
     * or backward by specifying a positive or negative delta.
sl@0
   395
     *
sl@0
   396
     * @see UCharIteratorMove
sl@0
   397
     * @stable ICU 2.1
sl@0
   398
     */
sl@0
   399
    UCharIteratorMove *move;
sl@0
   400
sl@0
   401
    /**
sl@0
   402
     * (public) Check if current() and next() can still
sl@0
   403
     * return another code unit.
sl@0
   404
     *
sl@0
   405
     * @see UCharIteratorHasNext
sl@0
   406
     * @stable ICU 2.1
sl@0
   407
     */
sl@0
   408
    UCharIteratorHasNext *hasNext;
sl@0
   409
sl@0
   410
    /**
sl@0
   411
     * (public) Check if previous() can still return another code unit.
sl@0
   412
     *
sl@0
   413
     * @see UCharIteratorHasPrevious
sl@0
   414
     * @stable ICU 2.1
sl@0
   415
     */
sl@0
   416
    UCharIteratorHasPrevious *hasPrevious;
sl@0
   417
sl@0
   418
    /**
sl@0
   419
     * (public) Return the code unit at the current position,
sl@0
   420
     * or U_SENTINEL if there is none (index is at the limit).
sl@0
   421
     *
sl@0
   422
     * @see UCharIteratorCurrent
sl@0
   423
     * @stable ICU 2.1
sl@0
   424
     */
sl@0
   425
    UCharIteratorCurrent *current;
sl@0
   426
sl@0
   427
    /**
sl@0
   428
     * (public) Return the code unit at the current index and increment
sl@0
   429
     * the index (post-increment, like s[i++]),
sl@0
   430
     * or return U_SENTINEL if there is none (index is at the limit).
sl@0
   431
     *
sl@0
   432
     * @see UCharIteratorNext
sl@0
   433
     * @stable ICU 2.1
sl@0
   434
     */
sl@0
   435
    UCharIteratorNext *next;
sl@0
   436
sl@0
   437
    /**
sl@0
   438
     * (public) Decrement the index and return the code unit from there
sl@0
   439
     * (pre-decrement, like s[--i]),
sl@0
   440
     * or return U_SENTINEL if there is none (index is at the start).
sl@0
   441
     *
sl@0
   442
     * @see UCharIteratorPrevious
sl@0
   443
     * @stable ICU 2.1
sl@0
   444
     */
sl@0
   445
    UCharIteratorPrevious *previous;
sl@0
   446
sl@0
   447
    /**
sl@0
   448
     * (public) Reserved for future use. Currently NULL.
sl@0
   449
     *
sl@0
   450
     * @see UCharIteratorReserved
sl@0
   451
     * @stable ICU 2.1
sl@0
   452
     */
sl@0
   453
    UCharIteratorReserved *reservedFn;
sl@0
   454
sl@0
   455
    /**
sl@0
   456
     * (public) Return the state of the iterator, to be restored later with setState().
sl@0
   457
     * This function pointer is NULL if the iterator does not implement it.
sl@0
   458
     *
sl@0
   459
     * @see UCharIteratorGet
sl@0
   460
     * @stable ICU 2.6
sl@0
   461
     */
sl@0
   462
    UCharIteratorGetState *getState;
sl@0
   463
sl@0
   464
    /**
sl@0
   465
     * (public) Restore the iterator state from the state word from a call
sl@0
   466
     * to getState().
sl@0
   467
     * This function pointer is NULL if the iterator does not implement it.
sl@0
   468
     *
sl@0
   469
     * @see UCharIteratorSet
sl@0
   470
     * @stable ICU 2.6
sl@0
   471
     */
sl@0
   472
    UCharIteratorSetState *setState;
sl@0
   473
};
sl@0
   474
sl@0
   475
/**
sl@0
   476
 * Helper function for UCharIterator to get the code point
sl@0
   477
 * at the current index.
sl@0
   478
 *
sl@0
   479
 * Return the code point that includes the code unit at the current position,
sl@0
   480
 * or U_SENTINEL if there is none (index is at the limit).
sl@0
   481
 * If the current code unit is a lead or trail surrogate,
sl@0
   482
 * then the following or preceding surrogate is used to form
sl@0
   483
 * the code point value.
sl@0
   484
 *
sl@0
   485
 * @param iter the UCharIterator structure ("this pointer")
sl@0
   486
 * @return the current code point
sl@0
   487
 *
sl@0
   488
 * @see UCharIterator
sl@0
   489
 * @see U16_GET
sl@0
   490
 * @see UnicodeString::char32At()
sl@0
   491
 * @stable ICU 2.1
sl@0
   492
 */
sl@0
   493
U_STABLE UChar32 U_EXPORT2
sl@0
   494
uiter_current32(UCharIterator *iter);
sl@0
   495
sl@0
   496
/**
sl@0
   497
 * Helper function for UCharIterator to get the next code point.
sl@0
   498
 *
sl@0
   499
 * Return the code point at the current index and increment
sl@0
   500
 * the index (post-increment, like s[i++]),
sl@0
   501
 * or return U_SENTINEL if there is none (index is at the limit).
sl@0
   502
 *
sl@0
   503
 * @param iter the UCharIterator structure ("this pointer")
sl@0
   504
 * @return the current code point (and post-increment the current index)
sl@0
   505
 *
sl@0
   506
 * @see UCharIterator
sl@0
   507
 * @see U16_NEXT
sl@0
   508
 * @stable ICU 2.1
sl@0
   509
 */
sl@0
   510
U_STABLE UChar32 U_EXPORT2
sl@0
   511
uiter_next32(UCharIterator *iter);
sl@0
   512
sl@0
   513
/**
sl@0
   514
 * Helper function for UCharIterator to get the previous code point.
sl@0
   515
 *
sl@0
   516
 * Decrement the index and return the code point from there
sl@0
   517
 * (pre-decrement, like s[--i]),
sl@0
   518
 * or return U_SENTINEL if there is none (index is at the start).
sl@0
   519
 *
sl@0
   520
 * @param iter the UCharIterator structure ("this pointer")
sl@0
   521
 * @return the previous code point (after pre-decrementing the current index)
sl@0
   522
 *
sl@0
   523
 * @see UCharIterator
sl@0
   524
 * @see U16_PREV
sl@0
   525
 * @stable ICU 2.1
sl@0
   526
 */
sl@0
   527
U_STABLE UChar32 U_EXPORT2
sl@0
   528
uiter_previous32(UCharIterator *iter);
sl@0
   529
sl@0
   530
/**
sl@0
   531
 * Get the "state" of the iterator in the form of a single 32-bit word.
sl@0
   532
 * This is a convenience function that calls iter->getState(iter)
sl@0
   533
 * if iter->getState is not NULL;
sl@0
   534
 * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
sl@0
   535
 *
sl@0
   536
 * Some UCharIterator implementations may not be able to return
sl@0
   537
 * a valid state for each position, in which case they return UITER_NO_STATE instead.
sl@0
   538
 * This will be clearly documented for each such iterator (none of the public ones here).
sl@0
   539
 *
sl@0
   540
 * @param iter the UCharIterator structure ("this pointer")
sl@0
   541
 * @return the state word
sl@0
   542
 *
sl@0
   543
 * @see UCharIterator
sl@0
   544
 * @see UCharIteratorGetState
sl@0
   545
 * @see UITER_NO_STATE
sl@0
   546
 * @stable ICU 2.6
sl@0
   547
 */
sl@0
   548
U_STABLE uint32_t U_EXPORT2
sl@0
   549
uiter_getState(const UCharIterator *iter);
sl@0
   550
sl@0
   551
/**
sl@0
   552
 * Restore the "state" of the iterator using a state word from a getState() call.
sl@0
   553
 * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
sl@0
   554
 * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
sl@0
   555
 *
sl@0
   556
 * @param iter the UCharIterator structure ("this pointer")
sl@0
   557
 * @param state the state word from a getState() call
sl@0
   558
 *              on a same-type, same-string iterator
sl@0
   559
 * @param pErrorCode Must be a valid pointer to an error code value,
sl@0
   560
 *                   which must not indicate a failure before the function call.
sl@0
   561
 *
sl@0
   562
 * @see UCharIterator
sl@0
   563
 * @see UCharIteratorSetState
sl@0
   564
 * @stable ICU 2.6
sl@0
   565
 */
sl@0
   566
U_STABLE void U_EXPORT2
sl@0
   567
uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
sl@0
   568
sl@0
   569
/**
sl@0
   570
 * Set up a UCharIterator to iterate over a string.
sl@0
   571
 *
sl@0
   572
 * Sets the UCharIterator function pointers for iteration over the string s
sl@0
   573
 * with iteration boundaries start=index=0 and length=limit=string length.
sl@0
   574
 * The "provider" may set the start, index, and limit values at any time
sl@0
   575
 * within the range 0..length.
sl@0
   576
 * The length field will be ignored.
sl@0
   577
 *
sl@0
   578
 * The string pointer s is set into UCharIterator.context without copying
sl@0
   579
 * or reallocating the string contents.
sl@0
   580
 *
sl@0
   581
 * getState() simply returns the current index.
sl@0
   582
 * move() will always return the final index.
sl@0
   583
 *
sl@0
   584
 * @param iter UCharIterator structure to be set for iteration
sl@0
   585
 * @param s String to iterate over
sl@0
   586
 * @param length Length of s, or -1 if NUL-terminated
sl@0
   587
 *
sl@0
   588
 * @see UCharIterator
sl@0
   589
 * @stable ICU 2.1
sl@0
   590
 */
sl@0
   591
U_STABLE void U_EXPORT2
sl@0
   592
uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
sl@0
   593
sl@0
   594
/**
sl@0
   595
 * Set up a UCharIterator to iterate over a UTF-16BE string
sl@0
   596
 * (byte vector with a big-endian pair of bytes per UChar).
sl@0
   597
 *
sl@0
   598
 * Everything works just like with a normal UChar iterator (uiter_setString),
sl@0
   599
 * except that UChars are assembled from byte pairs,
sl@0
   600
 * and that the length argument here indicates an even number of bytes.
sl@0
   601
 *
sl@0
   602
 * getState() simply returns the current index.
sl@0
   603
 * move() will always return the final index.
sl@0
   604
 *
sl@0
   605
 * @param iter UCharIterator structure to be set for iteration
sl@0
   606
 * @param s UTF-16BE string to iterate over
sl@0
   607
 * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
sl@0
   608
 *               (NUL means pair of 0 bytes at even index from s)
sl@0
   609
 *
sl@0
   610
 * @see UCharIterator
sl@0
   611
 * @see uiter_setString
sl@0
   612
 * @stable ICU 2.6
sl@0
   613
 */
sl@0
   614
U_STABLE void U_EXPORT2
sl@0
   615
uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
sl@0
   616
sl@0
   617
/**
sl@0
   618
 * Set up a UCharIterator to iterate over a UTF-8 string.
sl@0
   619
 *
sl@0
   620
 * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
sl@0
   621
 * with UTF-8 iteration boundaries 0 and length.
sl@0
   622
 * The implementation counts the UTF-16 index on the fly and
sl@0
   623
 * lazily evaluates the UTF-16 length of the text.
sl@0
   624
 *
sl@0
   625
 * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
sl@0
   626
 * When the reservedField is not 0, then it contains a supplementary code point
sl@0
   627
 * and the UTF-16 index is between the two corresponding surrogates.
sl@0
   628
 * At that point, the UTF-8 index is behind that code point.
sl@0
   629
 *
sl@0
   630
 * The UTF-8 string pointer s is set into UCharIterator.context without copying
sl@0
   631
 * or reallocating the string contents.
sl@0
   632
 *
sl@0
   633
 * getState() returns a state value consisting of
sl@0
   634
 * - the current UTF-8 source byte index (bits 31..1)
sl@0
   635
 * - a flag (bit 0) that indicates whether the UChar position is in the middle
sl@0
   636
 *   of a surrogate pair
sl@0
   637
 *   (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
sl@0
   638
 *
sl@0
   639
 * getState() cannot also encode the UTF-16 index in the state value.
sl@0
   640
 * move(relative to limit or length), or
sl@0
   641
 * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
sl@0
   642
 *
sl@0
   643
 * @param iter UCharIterator structure to be set for iteration
sl@0
   644
 * @param s UTF-8 string to iterate over
sl@0
   645
 * @param length Length of s in bytes, or -1 if NUL-terminated
sl@0
   646
 *
sl@0
   647
 * @see UCharIterator
sl@0
   648
 * @stable ICU 2.6
sl@0
   649
 */
sl@0
   650
U_STABLE void U_EXPORT2
sl@0
   651
uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
sl@0
   652
sl@0
   653
#ifdef XP_CPLUSPLUS
sl@0
   654
sl@0
   655
/**
sl@0
   656
 * Set up a UCharIterator to wrap around a C++ CharacterIterator.
sl@0
   657
 *
sl@0
   658
 * Sets the UCharIterator function pointers for iteration using the
sl@0
   659
 * CharacterIterator charIter.
sl@0
   660
 *
sl@0
   661
 * The CharacterIterator pointer charIter is set into UCharIterator.context
sl@0
   662
 * without copying or cloning the CharacterIterator object.
sl@0
   663
 * The other "protected" UCharIterator fields are set to 0 and will be ignored.
sl@0
   664
 * The iteration index and boundaries are controlled by the CharacterIterator.
sl@0
   665
 *
sl@0
   666
 * getState() simply returns the current index.
sl@0
   667
 * move() will always return the final index.
sl@0
   668
 *
sl@0
   669
 * @param iter UCharIterator structure to be set for iteration
sl@0
   670
 * @param charIter CharacterIterator to wrap
sl@0
   671
 *
sl@0
   672
 * @see UCharIterator
sl@0
   673
 * @stable ICU 2.1
sl@0
   674
 */
sl@0
   675
U_STABLE void U_EXPORT2
sl@0
   676
uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter);
sl@0
   677
sl@0
   678
/**
sl@0
   679
 * Set up a UCharIterator to iterate over a C++ Replaceable.
sl@0
   680
 *
sl@0
   681
 * Sets the UCharIterator function pointers for iteration over the
sl@0
   682
 * Replaceable rep with iteration boundaries start=index=0 and
sl@0
   683
 * length=limit=rep->length().
sl@0
   684
 * The "provider" may set the start, index, and limit values at any time
sl@0
   685
 * within the range 0..length=rep->length().
sl@0
   686
 * The length field will be ignored.
sl@0
   687
 *
sl@0
   688
 * The Replaceable pointer rep is set into UCharIterator.context without copying
sl@0
   689
 * or cloning/reallocating the Replaceable object.
sl@0
   690
 *
sl@0
   691
 * getState() simply returns the current index.
sl@0
   692
 * move() will always return the final index.
sl@0
   693
 *
sl@0
   694
 * @param iter UCharIterator structure to be set for iteration
sl@0
   695
 * @param rep Replaceable to iterate over
sl@0
   696
 *
sl@0
   697
 * @see UCharIterator
sl@0
   698
 * @stable ICU 2.1
sl@0
   699
 */
sl@0
   700
U_STABLE void U_EXPORT2
sl@0
   701
uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep);
sl@0
   702
sl@0
   703
#endif
sl@0
   704
sl@0
   705
U_CDECL_END
sl@0
   706
sl@0
   707
#endif