sl@0: /* sl@0: ******************************************************************************* sl@0: * sl@0: * Copyright (C) 2002-2005, International Business Machines sl@0: * Corporation and others. All Rights Reserved. sl@0: * sl@0: ******************************************************************************* sl@0: * file name: uiter.h sl@0: * encoding: US-ASCII sl@0: * tab size: 8 (not used) sl@0: * indentation:4 sl@0: * sl@0: * created on: 2002jan18 sl@0: * created by: Markus W. Scherer sl@0: */ sl@0: sl@0: #ifndef __UITER_H__ sl@0: #define __UITER_H__ sl@0: sl@0: /** sl@0: * \file sl@0: * \brief C API: Unicode Character Iteration sl@0: * sl@0: * @see UCharIterator sl@0: */ sl@0: sl@0: #include "unicode/utypes.h" sl@0: sl@0: #ifdef XP_CPLUSPLUS sl@0: U_NAMESPACE_BEGIN sl@0: sl@0: class CharacterIterator; sl@0: class Replaceable; sl@0: sl@0: U_NAMESPACE_END sl@0: #endif sl@0: sl@0: U_CDECL_BEGIN sl@0: sl@0: struct UCharIterator; sl@0: typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ sl@0: sl@0: /** sl@0: * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). sl@0: * @see UCharIteratorMove sl@0: * @see UCharIterator sl@0: * @stable ICU 2.1 sl@0: */ sl@0: typedef enum UCharIteratorOrigin { sl@0: UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH sl@0: } UCharIteratorOrigin; sl@0: sl@0: /** Constants for UCharIterator. @stable ICU 2.6 */ sl@0: enum { sl@0: /** sl@0: * Constant value that may be returned by UCharIteratorMove sl@0: * indicating that the final UTF-16 index is not known, but that the move succeeded. sl@0: * This can occur when moving relative to limit or length, or sl@0: * when moving relative to the current index after a setState() sl@0: * when the current UTF-16 index is not known. sl@0: * sl@0: * It would be very inefficient to have to count from the beginning of the text sl@0: * just to get the current/limit/length index after moving relative to it. sl@0: * The actual index can be determined with getIndex(UITER_CURRENT) sl@0: * which will count the UChars if necessary. sl@0: * sl@0: * @stable ICU 2.6 sl@0: */ sl@0: UITER_UNKNOWN_INDEX=-2 sl@0: }; sl@0: sl@0: sl@0: /** sl@0: * Constant for UCharIterator getState() indicating an error or sl@0: * an unknown state. sl@0: * Returned by uiter_getState()/UCharIteratorGetState sl@0: * when an error occurs. sl@0: * Also, some UCharIterator implementations may not be able to return sl@0: * a valid state for each position. This will be clearly documented sl@0: * for each such iterator (none of the public ones here). sl@0: * sl@0: * @stable ICU 2.6 sl@0: */ sl@0: #define UITER_NO_STATE ((uint32_t)0xffffffff) sl@0: sl@0: /** sl@0: * Function type declaration for UCharIterator.getIndex(). sl@0: * sl@0: * Gets the current position, or the start or limit of the sl@0: * iteration range. sl@0: * sl@0: * This function may perform slowly for UITER_CURRENT after setState() was called, sl@0: * or for UITER_LENGTH, because an iterator implementation may have to count sl@0: * UChars if the underlying storage is not UTF-16. sl@0: * sl@0: * @param iter the UCharIterator structure ("this pointer") sl@0: * @param origin get the 0, start, limit, length, or current index sl@0: * @return the requested index, or U_SENTINEL in an error condition sl@0: * sl@0: * @see UCharIteratorOrigin sl@0: * @see UCharIterator sl@0: * @stable ICU 2.1 sl@0: */ sl@0: typedef int32_t U_CALLCONV sl@0: UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); sl@0: sl@0: /** sl@0: * Function type declaration for UCharIterator.move(). sl@0: * sl@0: * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). sl@0: * sl@0: * Moves the current position relative to the start or limit of the sl@0: * iteration range, or relative to the current position itself. sl@0: * The movement is expressed in numbers of code units forward sl@0: * or backward by specifying a positive or negative delta. sl@0: * Out of bounds movement will be pinned to the start or limit. sl@0: * sl@0: * This function may perform slowly for moving relative to UITER_LENGTH sl@0: * because an iterator implementation may have to count the rest of the sl@0: * UChars if the native storage is not UTF-16. sl@0: * sl@0: * When moving relative to the limit or length, or sl@0: * relative to the current position after setState() was called, sl@0: * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient sl@0: * determination of the actual UTF-16 index. sl@0: * The actual index can be determined with getIndex(UITER_CURRENT) sl@0: * which will count the UChars if necessary. sl@0: * See UITER_UNKNOWN_INDEX for details. sl@0: * sl@0: * @param iter the UCharIterator structure ("this pointer") sl@0: * @param delta can be positive, zero, or negative sl@0: * @param origin move relative to the 0, start, limit, length, or current index sl@0: * @return the new index, or U_SENTINEL on an error condition, sl@0: * or UITER_UNKNOWN_INDEX when the index is not known. sl@0: * sl@0: * @see UCharIteratorOrigin sl@0: * @see UCharIterator sl@0: * @see UITER_UNKNOWN_INDEX sl@0: * @stable ICU 2.1 sl@0: */ sl@0: typedef int32_t U_CALLCONV sl@0: UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); sl@0: sl@0: /** sl@0: * Function type declaration for UCharIterator.hasNext(). sl@0: * sl@0: * Check if current() and next() can still sl@0: * return another code unit. sl@0: * sl@0: * @param iter the UCharIterator structure ("this pointer") sl@0: * @return boolean value for whether current() and next() can still return another code unit sl@0: * sl@0: * @see UCharIterator sl@0: * @stable ICU 2.1 sl@0: */ sl@0: typedef UBool U_CALLCONV sl@0: UCharIteratorHasNext(UCharIterator *iter); sl@0: sl@0: /** sl@0: * Function type declaration for UCharIterator.hasPrevious(). sl@0: * sl@0: * Check if previous() can still return another code unit. sl@0: * sl@0: * @param iter the UCharIterator structure ("this pointer") sl@0: * @return boolean value for whether previous() can still return another code unit sl@0: * sl@0: * @see UCharIterator sl@0: * @stable ICU 2.1 sl@0: */ sl@0: typedef UBool U_CALLCONV sl@0: UCharIteratorHasPrevious(UCharIterator *iter); sl@0: sl@0: /** sl@0: * Function type declaration for UCharIterator.current(). sl@0: * sl@0: * Return the code unit at the current position, sl@0: * or U_SENTINEL if there is none (index is at the limit). sl@0: * sl@0: * @param iter the UCharIterator structure ("this pointer") sl@0: * @return the current code unit sl@0: * sl@0: * @see UCharIterator sl@0: * @stable ICU 2.1 sl@0: */ sl@0: typedef UChar32 U_CALLCONV sl@0: UCharIteratorCurrent(UCharIterator *iter); sl@0: sl@0: /** sl@0: * Function type declaration for UCharIterator.next(). sl@0: * sl@0: * Return the code unit at the current index and increment sl@0: * the index (post-increment, like s[i++]), sl@0: * or return U_SENTINEL if there is none (index is at the limit). sl@0: * sl@0: * @param iter the UCharIterator structure ("this pointer") sl@0: * @return the current code unit (and post-increment the current index) sl@0: * sl@0: * @see UCharIterator sl@0: * @stable ICU 2.1 sl@0: */ sl@0: typedef UChar32 U_CALLCONV sl@0: UCharIteratorNext(UCharIterator *iter); sl@0: sl@0: /** sl@0: * Function type declaration for UCharIterator.previous(). sl@0: * sl@0: * Decrement the index and return the code unit from there sl@0: * (pre-decrement, like s[--i]), sl@0: * or return U_SENTINEL if there is none (index is at the start). sl@0: * sl@0: * @param iter the UCharIterator structure ("this pointer") sl@0: * @return the previous code unit (after pre-decrementing the current index) sl@0: * sl@0: * @see UCharIterator sl@0: * @stable ICU 2.1 sl@0: */ sl@0: typedef UChar32 U_CALLCONV sl@0: UCharIteratorPrevious(UCharIterator *iter); sl@0: sl@0: /** sl@0: * Function type declaration for UCharIterator.reservedFn(). sl@0: * Reserved for future use. sl@0: * sl@0: * @param iter the UCharIterator structure ("this pointer") sl@0: * @param something some integer argument sl@0: * @return some integer sl@0: * sl@0: * @see UCharIterator sl@0: * @stable ICU 2.1 sl@0: */ sl@0: typedef int32_t U_CALLCONV sl@0: UCharIteratorReserved(UCharIterator *iter, int32_t something); sl@0: sl@0: /** sl@0: * Function type declaration for UCharIterator.getState(). sl@0: * sl@0: * Get the "state" of the iterator in the form of a single 32-bit word. sl@0: * It is recommended that the state value be calculated to be as small as sl@0: * is feasible. For strings with limited lengths, fewer than 32 bits may sl@0: * be sufficient. sl@0: * sl@0: * This is used together with setState()/UCharIteratorSetState sl@0: * to save and restore the iterator position more efficiently than with sl@0: * getIndex()/move(). sl@0: * sl@0: * The iterator state is defined as a uint32_t value because it is designed sl@0: * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state sl@0: * of the character iterator. sl@0: * sl@0: * With some UCharIterator implementations (e.g., UTF-8), sl@0: * getting and setting the UTF-16 index with existing functions sl@0: * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but sl@0: * relatively slow because the iterator has to "walk" from a known index sl@0: * to the requested one. sl@0: * This takes more time the farther it needs to go. sl@0: * sl@0: * An opaque state value allows an iterator implementation to provide sl@0: * an internal index (UTF-8: the source byte array index) for sl@0: * fast, constant-time restoration. sl@0: * sl@0: * After calling setState(), a getIndex(UITER_CURRENT) may be slow because sl@0: * the UTF-16 index may not be restored as well, but the iterator can deliver sl@0: * the correct text contents and move relative to the current position sl@0: * without performance degradation. sl@0: * sl@0: * Some UCharIterator implementations may not be able to return sl@0: * a valid state for each position, in which case they return UITER_NO_STATE instead. sl@0: * This will be clearly documented for each such iterator (none of the public ones here). sl@0: * sl@0: * @param iter the UCharIterator structure ("this pointer") sl@0: * @return the state word sl@0: * sl@0: * @see UCharIterator sl@0: * @see UCharIteratorSetState sl@0: * @see UITER_NO_STATE sl@0: * @stable ICU 2.6 sl@0: */ sl@0: typedef uint32_t U_CALLCONV sl@0: UCharIteratorGetState(const UCharIterator *iter); sl@0: sl@0: /** sl@0: * Function type declaration for UCharIterator.setState(). sl@0: * sl@0: * Restore the "state" of the iterator using a state word from a getState() call. sl@0: * The iterator object need not be the same one as for which getState() was called, sl@0: * but it must be of the same type (set up using the same uiter_setXYZ function) sl@0: * and it must iterate over the same string sl@0: * (binary identical regardless of memory address). sl@0: * For more about the state word see UCharIteratorGetState. sl@0: * sl@0: * After calling setState(), a getIndex(UITER_CURRENT) may be slow because sl@0: * the UTF-16 index may not be restored as well, but the iterator can deliver sl@0: * the correct text contents and move relative to the current position sl@0: * without performance degradation. sl@0: * sl@0: * @param iter the UCharIterator structure ("this pointer") sl@0: * @param state the state word from a getState() call sl@0: * on a same-type, same-string iterator sl@0: * @param pErrorCode Must be a valid pointer to an error code value, sl@0: * which must not indicate a failure before the function call. sl@0: * sl@0: * @see UCharIterator sl@0: * @see UCharIteratorGetState sl@0: * @stable ICU 2.6 sl@0: */ sl@0: typedef void U_CALLCONV sl@0: UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); sl@0: sl@0: sl@0: /** sl@0: * C API for code unit iteration. sl@0: * This can be used as a C wrapper around sl@0: * CharacterIterator, Replaceable, or implemented using simple strings, etc. sl@0: * sl@0: * There are two roles for using UCharIterator: sl@0: * sl@0: * A "provider" sets the necessary function pointers and controls the "protected" sl@0: * fields of the UCharIterator structure. A "provider" passes a UCharIterator sl@0: * into C APIs that need a UCharIterator as an abstract, flexible string interface. sl@0: * sl@0: * Implementations of such C APIs are "callers" of UCharIterator functions; sl@0: * they only use the "public" function pointers and never access the "protected" sl@0: * fields directly. sl@0: * sl@0: * The current() and next() functions only check the current index against the sl@0: * limit, and previous() only checks the current index against the start, sl@0: * to see if the iterator already reached the end of the iteration range. sl@0: * sl@0: * The assumption - in all iterators - is that the index is moved via the API, sl@0: * which means it won't go out of bounds, or the index is modified by sl@0: * user code that knows enough about the iterator implementation to set valid sl@0: * index values. sl@0: * sl@0: * UCharIterator functions return code unit values 0..0xffff, sl@0: * or U_SENTINEL if the iteration bounds are reached. sl@0: * sl@0: * @stable ICU 2.1 sl@0: */ sl@0: struct UCharIterator { sl@0: /** sl@0: * (protected) Pointer to string or wrapped object or similar. sl@0: * Not used by caller. sl@0: * @stable ICU 2.1 sl@0: */ sl@0: const void *context; sl@0: sl@0: /** sl@0: * (protected) Length of string or similar. sl@0: * Not used by caller. sl@0: * @stable ICU 2.1 sl@0: */ sl@0: int32_t length; sl@0: sl@0: /** sl@0: * (protected) Start index or similar. sl@0: * Not used by caller. sl@0: * @stable ICU 2.1 sl@0: */ sl@0: int32_t start; sl@0: sl@0: /** sl@0: * (protected) Current index or similar. sl@0: * Not used by caller. sl@0: * @stable ICU 2.1 sl@0: */ sl@0: int32_t index; sl@0: sl@0: /** sl@0: * (protected) Limit index or similar. sl@0: * Not used by caller. sl@0: * @stable ICU 2.1 sl@0: */ sl@0: int32_t limit; sl@0: sl@0: /** sl@0: * (protected) Used by UTF-8 iterators and possibly others. sl@0: * @stable ICU 2.1 sl@0: */ sl@0: int32_t reservedField; sl@0: sl@0: /** sl@0: * (public) Returns the current position or the sl@0: * start or limit index of the iteration range. sl@0: * sl@0: * @see UCharIteratorGetIndex sl@0: * @stable ICU 2.1 sl@0: */ sl@0: UCharIteratorGetIndex *getIndex; sl@0: sl@0: /** sl@0: * (public) Moves the current position relative to the start or limit of the sl@0: * iteration range, or relative to the current position itself. sl@0: * The movement is expressed in numbers of code units forward sl@0: * or backward by specifying a positive or negative delta. sl@0: * sl@0: * @see UCharIteratorMove sl@0: * @stable ICU 2.1 sl@0: */ sl@0: UCharIteratorMove *move; sl@0: sl@0: /** sl@0: * (public) Check if current() and next() can still sl@0: * return another code unit. sl@0: * sl@0: * @see UCharIteratorHasNext sl@0: * @stable ICU 2.1 sl@0: */ sl@0: UCharIteratorHasNext *hasNext; sl@0: sl@0: /** sl@0: * (public) Check if previous() can still return another code unit. sl@0: * sl@0: * @see UCharIteratorHasPrevious sl@0: * @stable ICU 2.1 sl@0: */ sl@0: UCharIteratorHasPrevious *hasPrevious; sl@0: sl@0: /** sl@0: * (public) Return the code unit at the current position, sl@0: * or U_SENTINEL if there is none (index is at the limit). sl@0: * sl@0: * @see UCharIteratorCurrent sl@0: * @stable ICU 2.1 sl@0: */ sl@0: UCharIteratorCurrent *current; sl@0: sl@0: /** sl@0: * (public) Return the code unit at the current index and increment sl@0: * the index (post-increment, like s[i++]), sl@0: * or return U_SENTINEL if there is none (index is at the limit). sl@0: * sl@0: * @see UCharIteratorNext sl@0: * @stable ICU 2.1 sl@0: */ sl@0: UCharIteratorNext *next; sl@0: sl@0: /** sl@0: * (public) Decrement the index and return the code unit from there sl@0: * (pre-decrement, like s[--i]), sl@0: * or return U_SENTINEL if there is none (index is at the start). sl@0: * sl@0: * @see UCharIteratorPrevious sl@0: * @stable ICU 2.1 sl@0: */ sl@0: UCharIteratorPrevious *previous; sl@0: sl@0: /** sl@0: * (public) Reserved for future use. Currently NULL. sl@0: * sl@0: * @see UCharIteratorReserved sl@0: * @stable ICU 2.1 sl@0: */ sl@0: UCharIteratorReserved *reservedFn; sl@0: sl@0: /** sl@0: * (public) Return the state of the iterator, to be restored later with setState(). sl@0: * This function pointer is NULL if the iterator does not implement it. sl@0: * sl@0: * @see UCharIteratorGet sl@0: * @stable ICU 2.6 sl@0: */ sl@0: UCharIteratorGetState *getState; sl@0: sl@0: /** sl@0: * (public) Restore the iterator state from the state word from a call sl@0: * to getState(). sl@0: * This function pointer is NULL if the iterator does not implement it. sl@0: * sl@0: * @see UCharIteratorSet sl@0: * @stable ICU 2.6 sl@0: */ sl@0: UCharIteratorSetState *setState; sl@0: }; sl@0: sl@0: /** sl@0: * Helper function for UCharIterator to get the code point sl@0: * at the current index. sl@0: * sl@0: * Return the code point that includes the code unit at the current position, sl@0: * or U_SENTINEL if there is none (index is at the limit). sl@0: * If the current code unit is a lead or trail surrogate, sl@0: * then the following or preceding surrogate is used to form sl@0: * the code point value. sl@0: * sl@0: * @param iter the UCharIterator structure ("this pointer") sl@0: * @return the current code point sl@0: * sl@0: * @see UCharIterator sl@0: * @see U16_GET sl@0: * @see UnicodeString::char32At() sl@0: * @stable ICU 2.1 sl@0: */ sl@0: U_STABLE UChar32 U_EXPORT2 sl@0: uiter_current32(UCharIterator *iter); sl@0: sl@0: /** sl@0: * Helper function for UCharIterator to get the next code point. sl@0: * sl@0: * Return the code point at the current index and increment sl@0: * the index (post-increment, like s[i++]), sl@0: * or return U_SENTINEL if there is none (index is at the limit). sl@0: * sl@0: * @param iter the UCharIterator structure ("this pointer") sl@0: * @return the current code point (and post-increment the current index) sl@0: * sl@0: * @see UCharIterator sl@0: * @see U16_NEXT sl@0: * @stable ICU 2.1 sl@0: */ sl@0: U_STABLE UChar32 U_EXPORT2 sl@0: uiter_next32(UCharIterator *iter); sl@0: sl@0: /** sl@0: * Helper function for UCharIterator to get the previous code point. sl@0: * sl@0: * Decrement the index and return the code point from there sl@0: * (pre-decrement, like s[--i]), sl@0: * or return U_SENTINEL if there is none (index is at the start). sl@0: * sl@0: * @param iter the UCharIterator structure ("this pointer") sl@0: * @return the previous code point (after pre-decrementing the current index) sl@0: * sl@0: * @see UCharIterator sl@0: * @see U16_PREV sl@0: * @stable ICU 2.1 sl@0: */ sl@0: U_STABLE UChar32 U_EXPORT2 sl@0: uiter_previous32(UCharIterator *iter); sl@0: sl@0: /** sl@0: * Get the "state" of the iterator in the form of a single 32-bit word. sl@0: * This is a convenience function that calls iter->getState(iter) sl@0: * if iter->getState is not NULL; sl@0: * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. sl@0: * sl@0: * Some UCharIterator implementations may not be able to return sl@0: * a valid state for each position, in which case they return UITER_NO_STATE instead. sl@0: * This will be clearly documented for each such iterator (none of the public ones here). sl@0: * sl@0: * @param iter the UCharIterator structure ("this pointer") sl@0: * @return the state word sl@0: * sl@0: * @see UCharIterator sl@0: * @see UCharIteratorGetState sl@0: * @see UITER_NO_STATE sl@0: * @stable ICU 2.6 sl@0: */ sl@0: U_STABLE uint32_t U_EXPORT2 sl@0: uiter_getState(const UCharIterator *iter); sl@0: sl@0: /** sl@0: * Restore the "state" of the iterator using a state word from a getState() call. sl@0: * This is a convenience function that calls iter->setState(iter, state, pErrorCode) sl@0: * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. sl@0: * sl@0: * @param iter the UCharIterator structure ("this pointer") sl@0: * @param state the state word from a getState() call sl@0: * on a same-type, same-string iterator sl@0: * @param pErrorCode Must be a valid pointer to an error code value, sl@0: * which must not indicate a failure before the function call. sl@0: * sl@0: * @see UCharIterator sl@0: * @see UCharIteratorSetState sl@0: * @stable ICU 2.6 sl@0: */ sl@0: U_STABLE void U_EXPORT2 sl@0: uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); sl@0: sl@0: /** sl@0: * Set up a UCharIterator to iterate over a string. sl@0: * sl@0: * Sets the UCharIterator function pointers for iteration over the string s sl@0: * with iteration boundaries start=index=0 and length=limit=string length. sl@0: * The "provider" may set the start, index, and limit values at any time sl@0: * within the range 0..length. sl@0: * The length field will be ignored. sl@0: * sl@0: * The string pointer s is set into UCharIterator.context without copying sl@0: * or reallocating the string contents. sl@0: * sl@0: * getState() simply returns the current index. sl@0: * move() will always return the final index. sl@0: * sl@0: * @param iter UCharIterator structure to be set for iteration sl@0: * @param s String to iterate over sl@0: * @param length Length of s, or -1 if NUL-terminated sl@0: * sl@0: * @see UCharIterator sl@0: * @stable ICU 2.1 sl@0: */ sl@0: U_STABLE void U_EXPORT2 sl@0: uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); sl@0: sl@0: /** sl@0: * Set up a UCharIterator to iterate over a UTF-16BE string sl@0: * (byte vector with a big-endian pair of bytes per UChar). sl@0: * sl@0: * Everything works just like with a normal UChar iterator (uiter_setString), sl@0: * except that UChars are assembled from byte pairs, sl@0: * and that the length argument here indicates an even number of bytes. sl@0: * sl@0: * getState() simply returns the current index. sl@0: * move() will always return the final index. sl@0: * sl@0: * @param iter UCharIterator structure to be set for iteration sl@0: * @param s UTF-16BE string to iterate over sl@0: * @param length Length of s as an even number of bytes, or -1 if NUL-terminated sl@0: * (NUL means pair of 0 bytes at even index from s) sl@0: * sl@0: * @see UCharIterator sl@0: * @see uiter_setString sl@0: * @stable ICU 2.6 sl@0: */ sl@0: U_STABLE void U_EXPORT2 sl@0: uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); sl@0: sl@0: /** sl@0: * Set up a UCharIterator to iterate over a UTF-8 string. sl@0: * sl@0: * Sets the UCharIterator function pointers for iteration over the UTF-8 string s sl@0: * with UTF-8 iteration boundaries 0 and length. sl@0: * The implementation counts the UTF-16 index on the fly and sl@0: * lazily evaluates the UTF-16 length of the text. sl@0: * sl@0: * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. sl@0: * When the reservedField is not 0, then it contains a supplementary code point sl@0: * and the UTF-16 index is between the two corresponding surrogates. sl@0: * At that point, the UTF-8 index is behind that code point. sl@0: * sl@0: * The UTF-8 string pointer s is set into UCharIterator.context without copying sl@0: * or reallocating the string contents. sl@0: * sl@0: * getState() returns a state value consisting of sl@0: * - the current UTF-8 source byte index (bits 31..1) sl@0: * - a flag (bit 0) that indicates whether the UChar position is in the middle sl@0: * of a surrogate pair sl@0: * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) sl@0: * sl@0: * getState() cannot also encode the UTF-16 index in the state value. sl@0: * move(relative to limit or length), or sl@0: * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. sl@0: * sl@0: * @param iter UCharIterator structure to be set for iteration sl@0: * @param s UTF-8 string to iterate over sl@0: * @param length Length of s in bytes, or -1 if NUL-terminated sl@0: * sl@0: * @see UCharIterator sl@0: * @stable ICU 2.6 sl@0: */ sl@0: U_STABLE void U_EXPORT2 sl@0: uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); sl@0: sl@0: #ifdef XP_CPLUSPLUS sl@0: sl@0: /** sl@0: * Set up a UCharIterator to wrap around a C++ CharacterIterator. sl@0: * sl@0: * Sets the UCharIterator function pointers for iteration using the sl@0: * CharacterIterator charIter. sl@0: * sl@0: * The CharacterIterator pointer charIter is set into UCharIterator.context sl@0: * without copying or cloning the CharacterIterator object. sl@0: * The other "protected" UCharIterator fields are set to 0 and will be ignored. sl@0: * The iteration index and boundaries are controlled by the CharacterIterator. sl@0: * sl@0: * getState() simply returns the current index. sl@0: * move() will always return the final index. sl@0: * sl@0: * @param iter UCharIterator structure to be set for iteration sl@0: * @param charIter CharacterIterator to wrap sl@0: * sl@0: * @see UCharIterator sl@0: * @stable ICU 2.1 sl@0: */ sl@0: U_STABLE void U_EXPORT2 sl@0: uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter); sl@0: sl@0: /** sl@0: * Set up a UCharIterator to iterate over a C++ Replaceable. sl@0: * sl@0: * Sets the UCharIterator function pointers for iteration over the sl@0: * Replaceable rep with iteration boundaries start=index=0 and sl@0: * length=limit=rep->length(). sl@0: * The "provider" may set the start, index, and limit values at any time sl@0: * within the range 0..length=rep->length(). sl@0: * The length field will be ignored. sl@0: * sl@0: * The Replaceable pointer rep is set into UCharIterator.context without copying sl@0: * or cloning/reallocating the Replaceable object. sl@0: * sl@0: * getState() simply returns the current index. sl@0: * move() will always return the final index. sl@0: * sl@0: * @param iter UCharIterator structure to be set for iteration sl@0: * @param rep Replaceable to iterate over sl@0: * sl@0: * @see UCharIterator sl@0: * @stable ICU 2.1 sl@0: */ sl@0: U_STABLE void U_EXPORT2 sl@0: uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep); sl@0: sl@0: #endif sl@0: sl@0: U_CDECL_END sl@0: sl@0: #endif