os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/utext.h
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/utext.h	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,1293 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2004-2005, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*   file name:  utext.h
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2004oct06
    1.17 +*   created by: Markus W. Scherer
    1.18 +*/
    1.19 +
    1.20 +#ifndef __UTEXT_H__
    1.21 +#define __UTEXT_H__
    1.22 +
    1.23 +/**
    1.24 + * \file
    1.25 + * \brief C API: Abstract Unicode Text API
    1.26 + *
    1.27 + * The Text Access API provides a means to allow text that is stored in alternative
    1.28 + * formats to work with ICU services.  ICU normally operates on text that is
    1.29 + * stored UTF-16 format, in (UChar *) arrays for the C APIs or as type
    1.30 + * UnicodeString for C++ APIs.
    1.31 + *
    1.32 + * ICU Text Access allows other formats, such as UTF-8 or non-contiguous
    1.33 + * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services.
    1.34 + *
    1.35 + * There are three general classes of usage for UText:
    1.36 + *
    1.37 + *     Application Level Use.  This is the simplest usage - applications would
    1.38 + *     use one of the utext_open() functions on their input text, and pass
    1.39 + *     the resulting UText to the desired ICU service.
    1.40 + *
    1.41 + *     Second is usage in ICU Services, such as break iteration, that will need to
    1.42 + *     operate on input presented to them as a UText.  These implementations
    1.43 + *     will need to use the iteration and related UText functions to gain
    1.44 + *     access to the actual text.
    1.45 + *
    1.46 + *     The third class of UText users are "text providers."  These are the
    1.47 + *     UText implementations for the various text storage formats.  An application
    1.48 + *     or system with a unique text storage format can implement a set of
    1.49 + *     UText provider functions for that format, which will then allow
    1.50 + *     ICU services to operate on that format.
    1.51 + *
    1.52 + *
    1.53 + * <em>Iterating over text</em>
    1.54 + *
    1.55 + * Here is sample code for a forward iteration over the contents of a UText
    1.56 + *
    1.57 + * \code
    1.58 + *    UChar32  c;
    1.59 + *    UText    *ut = whatever();
    1.60 + *
    1.61 + *    for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) {
    1.62 + *       // do whatever with the codepoint c here.
    1.63 + *    }
    1.64 + * \endcode
    1.65 + *
    1.66 + * And here is similar code to iterate in the reverse direction, from the end
    1.67 + * of the text towards the beginning.
    1.68 + *
    1.69 + * \code
    1.70 + *    UChar32  c;
    1.71 + *    UText    *ut = whatever();
    1.72 + *    int      textLength = utext_nativeLength(ut);
    1.73 + *    for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) {
    1.74 + *       // do whatever with the codepoint c here.
    1.75 + *    }
    1.76 + * \endcode
    1.77 + *
    1.78 + * <em>Characters and Indexing</em>
    1.79 + *
    1.80 + * Indexing into text by UText functions is nearly always in terms of the native
    1.81 + * indexing of the underlying text storage.  The storage format could be UTF-8
    1.82 + * or UTF-32, for example.  When coding to the UText access API, no assumptions
    1.83 + * can be made regarding the size of characters, or how far an index
    1.84 + * may move when iterating between characters.
    1.85 + *
    1.86 + * All indices supplied to UText functions are pinned to the length of the
    1.87 + * text.  An out-of-bounds index is not considered to be an error, but is
    1.88 + * adjusted to be in the range  0 <= index <= length of input text.
    1.89 + *
    1.90 + *
    1.91 + * When an index position is returned from a UText function, it will be
    1.92 + * a native index to the underlying text.  In the case of multi-unit characters,
    1.93 + * it will  always refer to the first position of the character,
    1.94 + * never to the interior.  This is essentially the same thing as saying that
    1.95 + * a returned index will always point to a boundary between characters.
    1.96 + *
    1.97 + * When a native index is supplied to a UText function, all indices that
    1.98 + * refer to any part of a multi-unit character representation are considered
    1.99 + * to be equivalent.  In the case of multi-unit characters, an incoming index
   1.100 + * will be logically normalized to refer to the start of the character.
   1.101 + * 
   1.102 + * It is possible to test whether a native index is on a code point boundary
   1.103 + * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex().
   1.104 + * If the index is returned unchanged, it was on a code point boundary.  If
   1.105 + * an adjusted index is returned, the original index referred to the
   1.106 + * interior of a character.
   1.107 + *
   1.108 + */
   1.109 +
   1.110 +
   1.111 +
   1.112 +#include "unicode/utypes.h"
   1.113 +#ifdef XP_CPLUSPLUS
   1.114 +#include "unicode/rep.h"
   1.115 +#include "unicode/unistr.h"
   1.116 +#endif
   1.117 +
   1.118 +#ifndef U_HIDE_DRAFT_API
   1.119 +
   1.120 +U_CDECL_BEGIN
   1.121 +
   1.122 +struct UText;
   1.123 +typedef struct UText UText; /**< C typedef for struct UText. @draft ICU 3.4 */
   1.124 +
   1.125 +struct UTextChunk;
   1.126 +typedef struct UTextChunk UTextChunk; /**< C typedef for struct UTextChunk. @draft ICU 3.4 */
   1.127 +
   1.128 +
   1.129 +
   1.130 +/***************************************************************************************
   1.131 + *
   1.132 + *   C Functions for creating UText wrappers around various kinds of text strings.
   1.133 + *
   1.134 + ****************************************************************************************/
   1.135 +
   1.136 +
   1.137 +/**
   1.138 +  * utext_close    Close function for UText instances.
   1.139 +  *                Cleans up, releases any resources being held by an
   1.140 +  *                open UText.
   1.141 +  * <p/>
   1.142 +  *   If the UText was originally allocated by one of the utext_open functions,
   1.143 +  *   the storage associated with the utext will also be freed.
   1.144 +  *   If the UText storage originated with the application, as it would with
   1.145 +  *   a local or static instance, the storage will not be deleted.
   1.146 +  *
   1.147 +  *   An open UText can be reset to refer to new string by using one of the utext_open()
   1.148 +  *   functions without first closing the UText.  
   1.149 +  *
   1.150 +  * @param ut  The UText to be closed.
   1.151 +  * @return    NULL if the UText struct was deleted by the close.  If the UText struct
   1.152 +  *            was originally provided by the caller to the open function, it is
   1.153 +  *            returned by this function, and may be safely used again in
   1.154 +  *            a subsequent utext_open.
   1.155 +  *
   1.156 +  * @draft ICU 3.4
   1.157 +  */
   1.158 +U_DRAFT UText * U_EXPORT2
   1.159 +utext_close(UText *ut);
   1.160 +
   1.161 +
   1.162 +/**
   1.163 + * Open a read-only UText implementation for UTF-8 strings.
   1.164 + * 
   1.165 + * \htmlonly
   1.166 + * Any invalid UTF-8 in the input will be handled in this way:
   1.167 + * a sequence of bytes that has the form of a truncated, but otherwise valid,
   1.168 + * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. 
   1.169 + * Any other illegal bytes will each be replaced by a \uFFFD.
   1.170 + * \endhtmlonly
   1.171 + * 
   1.172 + * @param ut     Pointer to a UText struct.  If NULL, a new UText will be created.
   1.173 + *               If non-NULL, must refer to an initialized UText struct, which will then
   1.174 + *               be reset to reference the specified UTF-8 string.
   1.175 + * @param s      A UTF-8 string
   1.176 + * @param length The length of the UTF-8 string in bytes, or -1 if the string is
   1.177 + *               zero terminated.
   1.178 + * @param status Errors are returned here.
   1.179 + * @return       A pointer to the UText.  If a pre-allocated UText was provided, it
   1.180 + *               will always be used and returned.
   1.181 + * @draft ICU 3.4
   1.182 + */
   1.183 +U_DRAFT UText * U_EXPORT2
   1.184 +utext_openUTF8(UText *ut, const char *s, int32_t length, UErrorCode *status);
   1.185 +
   1.186 +
   1.187 +/**
   1.188 + * Open a read-only UText for UChar * string.
   1.189 + * 
   1.190 + * @param ut     Pointer to a UText struct.  If NULL, a new UText will be created.
   1.191 + *               If non-NULL, must refer to an initialized UText struct, which will then
   1.192 + *               be reset to reference the specified UChar string.
   1.193 + * @param s      A UChar (UTF-16) string
   1.194 + * @param length The number of UChars in the input string, or -1 if the string is
   1.195 + *               zero terminated.
   1.196 + * @param status Errors are returned here.
   1.197 + * @return       A pointer to the UText.  If a pre-allocated UText was provided, it
   1.198 + *               will always be used and returned.
   1.199 + * @draft ICU 3.4
   1.200 + */
   1.201 +U_DRAFT UText * U_EXPORT2
   1.202 +utext_openUChars(UText *ut, const UChar *s, int32_t length, UErrorCode *status);
   1.203 +
   1.204 +
   1.205 +#ifdef XP_CPLUSPLUS
   1.206 +/**
   1.207 + * Open a writable UText for a non-const UnicodeString. 
   1.208 + * 
   1.209 + * @param ut      Pointer to a UText struct.  If NULL, a new UText will be created.
   1.210 + *                 If non-NULL, must refer to an initialized UText struct, which will then
   1.211 + *                 be reset to reference the specified input string.
   1.212 + * @param s       A UnicodeString.
   1.213 + * @param status Errors are returned here.
   1.214 + * @return        Pointer to the UText.  If a UText was supplied as input, this
   1.215 + *                 will always be used and returned.
   1.216 + * @draft ICU 3.4
   1.217 + */
   1.218 +U_DRAFT UText * U_EXPORT2
   1.219 +utext_openUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status);
   1.220 +
   1.221 +
   1.222 +/**
   1.223 + * Open a UText for a const UnicodeString.   The resulting UText will not be writable.
   1.224 + * 
   1.225 + * @param ut    Pointer to a UText struct.  If NULL, a new UText will be created.
   1.226 + *               If non-NULL, must refer to an initialized UText struct, which will then
   1.227 + *               be reset to reference the specified input string.
   1.228 + * @param s      A const UnicodeString to be wrapped.
   1.229 + * @param status Errors are returned here.
   1.230 + * @return       Pointer to the UText.  If a UText was supplied as input, this
   1.231 + *               will always be used and returned.
   1.232 + * @draft ICU 3.4
   1.233 + */
   1.234 +U_DRAFT UText * U_EXPORT2
   1.235 +utext_openConstUnicodeString(UText *ut, const UnicodeString *s, UErrorCode *status);
   1.236 +
   1.237 +
   1.238 +/**
   1.239 + * Open a writable UText implementation for an ICU Replaceable object.
   1.240 + * @param ut    Pointer to a UText struct.  If NULL, a new UText will be created.
   1.241 + *               If non-NULL, must refer to an already existing UText, which will then
   1.242 + *               be reset to reference the specified replaceable text.
   1.243 + * @param rep    A Replaceable text object.
   1.244 + * @param status Errors are returned here.
   1.245 + * @return       Pointer to the UText.  If a UText was supplied as input, this
   1.246 + *               will always be used and returned.
   1.247 + * @see Replaceable
   1.248 + * @draft ICU 3.4
   1.249 + */
   1.250 +U_DRAFT UText * U_EXPORT2
   1.251 +utext_openReplaceable(UText *ut, Replaceable *rep, UErrorCode *status);
   1.252 +
   1.253 +#endif
   1.254 +
   1.255 +
   1.256 +/**
   1.257 +  *  clone a UText.  Much like opening a UText where the source text is itself
   1.258 +  *  another UText.
   1.259 +  *
   1.260 +  *  A deep clone will copy both the UText data structures and the underlying text.
   1.261 +  *  The original and cloned UText will operate completely independently; modifications
   1.262 +  *  made to the text in one will not effect the other.  Text providers are not
   1.263 +  *  required to support deep clones.  The user of clone() must check the status return
   1.264 +  *  and be prepared to handle failures.
   1.265 +  *
   1.266 +  *  A shallow clone replicates only the UText data structures; it does not make
   1.267 +  *  a copy of the underlying text.  Shallow clones can be used as an efficient way to 
   1.268 +  *  have multiple iterators active in a single text string that is not being
   1.269 +  *  modified.
   1.270 +  *
   1.271 +  *  A shallow clone operation will not fail, barring truly exceptional conditions such
   1.272 +  *  as memory allocation failures.
   1.273 +  *
   1.274 +  *  A UText and its clone may be safely concurrently accessed by separate threads.
   1.275 +  *  This is true for both shallow and deep clones.
   1.276 +  *  It is the responsibility of the Text Provider to ensure that this thread safety
   1.277 +  *  constraint is met.
   1.278 +  *
   1.279 +  *  @param dest   A UText struct to be filled in with the result of the clone operation,
   1.280 +  *                or NULL if the clone function should heap-allocate a new UText struct.
   1.281 +  *  @param src    The UText to be cloned.
   1.282 +  *  @param deep   TRUE to request a deep clone, FALSE for a shallow clone.
   1.283 +  *  @param status Errors are returned here.  For deep clones, U_UNSUPPORTED_ERROR
   1.284 +  *                will be returned if the text provider is unable to clone the
   1.285 +  *                original text.
   1.286 +  *  @return       The newly created clone, or NULL if the clone operation failed.
   1.287 +  *  @draft ICU 3.4
   1.288 +  */
   1.289 +U_DRAFT UText * U_EXPORT2
   1.290 +utext_clone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
   1.291 +
   1.292 +
   1.293 +/*****************************************************************************
   1.294 + *
   1.295 + *   C Functions to work with the text represeted by a UText wrapper
   1.296 + *
   1.297 + *****************************************************************************/
   1.298 +
   1.299 +/**
   1.300 +  * Get the length of the text.  Depending on the characteristics
   1.301 +  * of the underlying text representation, this may be expensive.  
   1.302 +  * @see  utext_isLengthExpensive()
   1.303 +  *
   1.304 +  *
   1.305 +  * @param ut  the text to be accessed.
   1.306 +  * @return the length of the text, expressed in native units.
   1.307 +  *
   1.308 +  * @draft ICU 3.4
   1.309 +  */
   1.310 +U_DRAFT int32_t U_EXPORT2
   1.311 +utext_nativeLength(UText *ut);
   1.312 +
   1.313 +/**
   1.314 + *  Return TRUE if calculating the length of the text could be expensive.
   1.315 + *  Finding the length of NUL terminated strings is considered to be expensive.
   1.316 + *
   1.317 + *  Note that the value of this function may change
   1.318 + *  as the result of other operations on a UText.
   1.319 + *  Once the length of a string has been discovered, it will no longer
   1.320 + *  be expensive to report it.
   1.321 + *
   1.322 + * @param ut the text to be accessed.
   1.323 + * @return TRUE if determining the length of the text could be time consuming.
   1.324 + * @draft ICU 3.4
   1.325 + */
   1.326 +U_DRAFT UBool U_EXPORT2
   1.327 +utext_isLengthExpensive(const UText *ut);
   1.328 +
   1.329 +/**
   1.330 + * Returns the code point at the requested index,
   1.331 + * or U_SENTINEL (-1) if it is out of bounds.
   1.332 + *
   1.333 + * If the specified index points to the interior of a multi-unit
   1.334 + * character - one of the trail bytes of a UTF-8 sequence, for example -
   1.335 + * the complete code point will be returned.
   1.336 + *
   1.337 + * The iteration position will be set to the start of the returned code point.
   1.338 + *
   1.339 + * This function is roughly equivalent to the the sequence
   1.340 + *    utext_setNativeIndex(index);
   1.341 + *    utext_current32();
   1.342 + * (There is a difference if the index is out of bounds by being less than zero)
   1.343 + * 
   1.344 + * @param ut the text to be accessed
   1.345 + * @param nativeIndex the native index of the character to be accessed.  If the index points
   1.346 + *        to other than the first unit of a multi-unit character, it will be adjusted
   1.347 + *        to the start of the character.
   1.348 + * @return the code point at the specified index.
   1.349 + * @draft ICU 3.4
   1.350 + */
   1.351 +U_DRAFT UChar32 U_EXPORT2
   1.352 +utext_char32At(UText *ut, int32_t nativeIndex);
   1.353 +
   1.354 +
   1.355 +/**
   1.356 + *
   1.357 + * Get the code point at the current iteration position,
   1.358 + * or U_SENTINEL (-1) if the iteration has reached the end of
   1.359 + * the input text.
   1.360 + *
   1.361 + * @param ut the text to be accessed.
   1.362 + * @return the Unicode code point at the current iterator position.
   1.363 + * @draft ICU 3.4
   1.364 + */
   1.365 +U_DRAFT UChar32 U_EXPORT2
   1.366 +utext_current32(UText *ut);
   1.367 +
   1.368 +
   1.369 +/**
   1.370 + * Get the code point at the current iteration position of the UText, and
   1.371 + * advance the position to the first index following the character.
   1.372 + * Returns U_SENTINEL (-1) if the position is at the end of the
   1.373 + * text.
   1.374 + * This is a post-increment operation
   1.375 + *
   1.376 + * An inline macro version of this function, UTEXT_NEXT32(), 
   1.377 + * is available for performance critical use.
   1.378 + *
   1.379 + * @param ut the text to be accessed.
   1.380 + * @return the Unicode code point at the iteration position.
   1.381 + * @see UTEXT_NEXT32
   1.382 + * @draft ICU 3.4
   1.383 + */
   1.384 +U_DRAFT UChar32 U_EXPORT2
   1.385 +utext_next32(UText *ut);
   1.386 +
   1.387 +
   1.388 +/**
   1.389 + *  Move the iterator position to the character (code point) whose
   1.390 + *  index precedes the current position, and return that character.
   1.391 + *  This is a pre-decrement operation.
   1.392 + *  Returns U_SENTINEL (-1) if the position is at the start of the  text.
   1.393 + *  This is a pre-decrement operation.
   1.394 + *
   1.395 + * An inline macro version of this function, UTEXT_PREVIOUS32(), 
   1.396 + * is available for performance critical use.
   1.397 + *
   1.398 + *  @param ut the text to be accessed.
   1.399 + *  @return the previous UChar32 code point, or U_SENTINEL (-1) 
   1.400 + *          if the iteration has reached the start of the text.
   1.401 + *  @see UTEXT_PREVIOUS32
   1.402 + *  @draft ICU 3.4
   1.403 + */
   1.404 +U_DRAFT UChar32 U_EXPORT2
   1.405 +utext_previous32(UText *ut);
   1.406 +
   1.407 +
   1.408 +/**
   1.409 +  * Set the iteration index, access the text for forward iteration,
   1.410 +  * and return the code point starting at or before that index.
   1.411 +  * Leave the iteration index at the start of the following code point.
   1.412 +  *
   1.413 +  * This function is the most efficient and convenient way to
   1.414 +  * begin a forward iteration.
   1.415 +  *
   1.416 +  *  @param ut the text to be accessed.
   1.417 +  *  @param nativeIndex Iteration index, in the native units of the text provider.
   1.418 +  *  @return Code point which starts at or before index,
   1.419 +  *         or U_SENTINEL (-1) if it is out of bounds.
   1.420 +  * @draft ICU 3.4
   1.421 +  */
   1.422 +U_DRAFT UChar32 U_EXPORT2
   1.423 +utext_next32From(UText *ut, int32_t nativeIndex);
   1.424 +
   1.425 +
   1.426 +
   1.427 +/**
   1.428 +  * Set the iteration index, and return the code point preceding the
   1.429 +  * one specified by the initial index.  Leave the iteration position
   1.430 +  * at the start of the returned code point.
   1.431 +  *
   1.432 +  * This function is the most efficient and convenient way to
   1.433 +  * begin a backwards iteration.
   1.434 +  *
   1.435 +  * @param ut the text to be accessed.
   1.436 +  * @param nativeIndex Iteration index in the native units of the text provider.
   1.437 +  * @return Code point preceding the one at the initial index,
   1.438 +  *         or U_SENTINEL (-1) if it is out of bounds.
   1.439 +  *
   1.440 +  * @draft ICU 3.4
   1.441 +  */
   1.442 +U_DRAFT UChar32 U_EXPORT2
   1.443 +utext_previous32From(UText *ut, int32_t nativeIndex);
   1.444 +
   1.445 +/**
   1.446 +  * Get the current iterator position, which can range from 0 to 
   1.447 +  * the length of the text.
   1.448 +  * The position is a native index into the input text, in whatever format it
   1.449 +  * may have, and may not always correspond to a UChar (UTF-16) index
   1.450 +  * into the text.  The returned position will always be aligned to a
   1.451 +  * code point boundary 
   1.452 +  *
   1.453 +  * @param ut the text to be accessed.
   1.454 +  * @return the current index position, in the native units of the text provider.
   1.455 +  * @draft ICU 3.4
   1.456 +  */
   1.457 +U_DRAFT int32_t U_EXPORT2
   1.458 +utext_getNativeIndex(UText *ut);
   1.459 +
   1.460 +/**
   1.461 +  * Set the current iteration position to the nearest code point
   1.462 +  * boundary at or preceding the specified index.
   1.463 +  * The index is in the native units of the original input text.
   1.464 +  * If the index is out of range, it will be trimmed to be within
   1.465 +  * the range of the input text.
   1.466 +  * <p/>
   1.467 +  * It will usually be more efficient to begin an iteration
   1.468 +  * using the functions utext_next32From() or utext_previous32From()
   1.469 +  * rather than setIndex().
   1.470 +  * <p/>
   1.471 +  * Moving the index position to an adjacent character is best done
   1.472 +  * with utext_next32(), utext_previous32() or utext_moveIndex32().
   1.473 +  * Attempting to do direct arithmetic on the index position is
   1.474 +  * complicated by the fact that the size (in native units) of a
   1.475 +  * character depends on the underlying representation of the character
   1.476 +  * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not
   1.477 +  * easily knowable.
   1.478 +  *
   1.479 +  * @param ut the text to be accessed.
   1.480 +  * @param nativeIndex the native unit index of the new iteration position.
   1.481 +  * @draft ICU 3.4
   1.482 +  */
   1.483 +U_DRAFT void U_EXPORT2
   1.484 +utext_setNativeIndex(UText *ut, int32_t nativeIndex);
   1.485 +
   1.486 +/**
   1.487 +  * Move the iterator postion by delta code points.  The number of code points
   1.488 +  * is a signed number; a negative delta will move the iterator backwards,
   1.489 +  * towards the start of the text.
   1.490 +  * <p/>
   1.491 +  * The index is moved by <code>delta</code> code points
   1.492 +  * forward or backward, but no further backward than to 0 and
   1.493 +  * no further forward than to utext_nativeLength().
   1.494 +  * The resulting index value will be in between 0 and length, inclusive.
   1.495 +  * <p/>
   1.496 +  * Because the index is kept in the native units of the text provider, the
   1.497 +  * actual numeric amount by which the index moves depends on the
   1.498 +  * underlying text storage representation of the text provider.
   1.499 +  *
   1.500 +  * @param ut the text to be accessed.
   1.501 +  * @param delta the signed number of code points to move the iteration position.
   1.502 +  * @return TRUE if the position could be moved the requested number of positions while
   1.503 +  *              staying within the range [0 - text length].
   1.504 +  * @draft ICU 3.4
   1.505 +  */
   1.506 +U_DRAFT UBool U_EXPORT2
   1.507 +utext_moveIndex32(UText *ut, int32_t delta);
   1.508 +
   1.509 +
   1.510 +/**
   1.511 + *
   1.512 + * Extract text from a UText into a UChar buffer.  The range of text to be extracted
   1.513 + * is specified in the native indices of the UText provider.  These may not necessarily
   1.514 + * be UTF-16 indices.
   1.515 + * <p/>
   1.516 + * The size (number of 16 bit UChars) in the data to be extracted is returned.  The
   1.517 + * full number of UChars is returned, even when the extracted text is truncated
   1.518 + * because the specified buffer size is too small.
   1.519 + *
   1.520 + * The extracted string will (if you are a user) / must (if you are a text provider)
   1.521 + * be NUL-terminated if there is sufficient space in the destination buffer.  This
   1.522 + * terminating NUL is not included in the returned length.
   1.523 + *
   1.524 + * @param  ut    the UText from which to extract data.
   1.525 + * @param  nativeStart the native index of the first character to extract.
   1.526 + * @param  nativeLimit the native string index of the position following the last
   1.527 + *               character to extract.  If the specified limit is greater than the length
   1.528 + *               of the text, the limit will be trimmed back to the text length.
   1.529 + * @param  dest  the UChar (UTF-16) buffer into which the extracted text is placed
   1.530 + * @param  destCapacity  The size, in UChars, of the destination buffer.  May be zero
   1.531 + *               for precomputing the required size.
   1.532 + * @param  status receives any error status.
   1.533 + *         U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the 
   1.534 + *         buffer was too small.  Returns number of UChars for preflighting.
   1.535 + * @return Number of UChars in the data to be extracted.  Does not include a trailing NUL.
   1.536 + *
   1.537 + * @draft ICU 3.4
   1.538 + */
   1.539 +U_DRAFT int32_t U_EXPORT2
   1.540 +utext_extract(UText *ut,
   1.541 +             int32_t nativeStart, int32_t nativeLimit,
   1.542 +             UChar *dest, int32_t destCapacity,
   1.543 +             UErrorCode *status);
   1.544 +
   1.545 +
   1.546 +
   1.547 +/************************************************************************************
   1.548 + *
   1.549 + *  #define inline versions of selected performance-critical text access functions
   1.550 + *          Caution:  do not use auto increment++ or decrement-- expressions
   1.551 + *                    as parameters to these macros.
   1.552 + *
   1.553 + *          For most use, where there is no extreme performance constraint, the
   1.554 + *          normal, non-inline functions are a better choice.  The resulting code
   1.555 + *          will be smaller, and, if the need ever arises, easier to debug.
   1.556 + *
   1.557 + *          These are implemented as #defines rather than real functions
   1.558 + *          because there is no fully portable way to do inline functions in plain C.
   1.559 + *
   1.560 + ************************************************************************************/
   1.561 +
   1.562 +/**
   1.563 + * inline version of utext_next32(), for performance-critical situations.
   1.564 + *
   1.565 + * Get the code point at the current iteration position of the UText, and
   1.566 + * advance the position to the first index following the character.
   1.567 + * This is a post-increment operation.
   1.568 + * Returns U_SENTINEL (-1) if the position is at the end of the
   1.569 + * text.
   1.570 + *
   1.571 + * @draft ICU 3.4
   1.572 + */
   1.573 +#define UTEXT_NEXT32(ut)  \
   1.574 +    ((ut)->chunk.offset < (ut)->chunk.length && ((ut)->chunk.contents)[(ut)->chunk.offset]<0xd800 ? \
   1.575 +    ((ut)->chunk.contents)[((ut)->chunk.offset)++] : utext_next32(ut))
   1.576 +
   1.577 +/**
   1.578 + * inline version of utext_previous32(), for performance-critical situations.
   1.579 + *
   1.580 + *  Move the iterator position to the character (code point) whose
   1.581 + *  index precedes the current position, and return that character.
   1.582 + *  This is a pre-decrement operation.
   1.583 + *  Returns U_SENTINEL (-1) if the position is at the start of the  text.
   1.584 + *
   1.585 + * @draft ICU 3.4
   1.586 + */
   1.587 +#define UTEXT_PREVIOUS32(ut)  \
   1.588 +    ((ut)->chunk.offset > 0 && \
   1.589 +     (ut)->chunk.contents[(ut)->chunk.offset-1] < 0xd800 ? \
   1.590 +          (ut)->chunk.contents[--((ut)->chunk.offset)]  :  utext_previous32(ut))
   1.591 +
   1.592 +
   1.593 +
   1.594 +
   1.595 +/************************************************************************************
   1.596 + *
   1.597 + *   Functions related to writing or modifying the text.
   1.598 + *   These will work only with modifiable UTexts.  Attempting to
   1.599 + *   modify a read-only UText will return an error status.
   1.600 + *
   1.601 + ************************************************************************************/
   1.602 +
   1.603 +
   1.604 +/**
   1.605 + *  Return TRUE if the text can be written with utext_replace() or
   1.606 + *  utext_copy().  For the text to be writable, the text provider must
   1.607 + *  be of a type that supports writing.
   1.608 + *
   1.609 + * @param  ut   the UText to be tested.
   1.610 + * @return TRUE if the text is modifiable.
   1.611 + * @draft ICU 3.4
   1.612 + *
   1.613 + */
   1.614 +U_DRAFT UBool U_EXPORT2
   1.615 +utext_isWritable(const UText *ut);
   1.616 +
   1.617 +
   1.618 +/**
   1.619 +  * Test whether there is meta data associated with the text.
   1.620 +  * @see Replaceable::hasMetaData()
   1.621 +  *
   1.622 +  * @param ut The UText to be tested
   1.623 +  * @return TRUE if the underlying text includes meta data.
   1.624 +  * @draft ICU 3.4
   1.625 +  */
   1.626 +U_DRAFT UBool U_EXPORT2
   1.627 +utext_hasMetaData(const UText *ut);
   1.628 +
   1.629 +
   1.630 +/**
   1.631 + * Replace a range of the original text with a replacement text.
   1.632 + *
   1.633 + * Leaves the current iteration position at the position following the
   1.634 + *  newly inserted replacement text.
   1.635 + *
   1.636 + * This function is only available on UText types that support writing,
   1.637 + * that is, ones where utext_isWritable() returns TRUE.
   1.638 + *
   1.639 + * When using this function, there should be only a single UText opened onto the
   1.640 + * underlying native text string.  Behavior after a replace operation
   1.641 + * on a UText is undefined for any other additional UTexts that refer to the
   1.642 + * modified string.
   1.643 + *
   1.644 + * @param ut               the UText representing the text to be operated on.
   1.645 + * @param nativeStart      the native index of the start of the region to be replaced
   1.646 + * @param nativeLimit      the native index of the character following the region to be replaced.
   1.647 + * @param replacementText  pointer to the replacement text
   1.648 + * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated.
   1.649 + * @param status           receives any error status.  Possible errors include
   1.650 + *                         U_NO_WRITE_PERMISSION
   1.651 + *
   1.652 + * @return The signed number of (native) storage units by which
   1.653 + *         the length of the text expanded or contracted.
   1.654 + *
   1.655 + * @draft ICU 3.4
   1.656 + */
   1.657 +U_DRAFT int32_t U_EXPORT2
   1.658 +utext_replace(UText *ut,
   1.659 +             int32_t nativeStart, int32_t nativeLimit,
   1.660 +             const UChar *replacementText, int32_t replacementLength,
   1.661 +             UErrorCode *status);
   1.662 +
   1.663 +
   1.664 +
   1.665 +/**
   1.666 + *
   1.667 + * Copy or move a substring from one position to another within the text,
   1.668 + * while retaining any metadata associated with the text.
   1.669 + * This function is used to duplicate or reorder substrings.
   1.670 + * The destination index must not overlap the source range.
   1.671 + *
   1.672 + * The text to be copied or moved is inserted at destIndex;
   1.673 + * it does not replace or overwrite any existing text.
   1.674 + *
   1.675 + * This function is only available on UText types that support writing,
   1.676 + * that is, ones where utext_isWritable() returns TRUE.
   1.677 + *
   1.678 + * When using this function, there should be only a single UText opened onto the
   1.679 + * underlying native text string.  Behavior after a copy operation
   1.680 + * on a UText is undefined in any other additional UTexts that refer to the
   1.681 + * modified string.
   1.682 + *
   1.683 + * @param ut           The UText representing the text to be operated on.
   1.684 + * @param nativeStart  The native index of the start of the region to be copied or moved
   1.685 + * @param nativeLimit  The native index of the character position following the region to be copied.
   1.686 + * @param destIndex    The native destination index to which the source substring is copied or moved.
   1.687 + * @param move         If TRUE, then the substring is moved, not copied/duplicated.
   1.688 + * @param status       receives any error status.  Possible errors include U_NO_WRITE_PERMISSION
   1.689 + *                       
   1.690 + * @draft ICU 3.4
   1.691 + */
   1.692 +U_DRAFT void U_EXPORT2
   1.693 +utext_copy(UText *ut,
   1.694 +          int32_t nativeStart, int32_t nativeLimit,
   1.695 +          int32_t destIndex,
   1.696 +          UBool move,
   1.697 +          UErrorCode *status);
   1.698 +
   1.699 +
   1.700 +
   1.701 +
   1.702 +
   1.703 +/****************************************************************************************
   1.704 + *
   1.705 + *   The following items are required by text providers implementations -
   1.706 + *    by packages that are writing UText wrappers for additional types of text strings.
   1.707 + *    These declarations are not needed by applications that use already existing
   1.708 + *    UText functions for wrapping strings or accessing text data that has been
   1.709 + *    wrapped in a UText.
   1.710 + *
   1.711 + *****************************************************************************************/
   1.712 +
   1.713 +
   1.714 +/**
   1.715 +  *  Descriptor of a chunk, or segment of text in UChar format.
   1.716 +  *
   1.717 +  *  UText provider implementations surface their text in the form of UTextChunks.
   1.718 +  *
   1.719 +  *  If the native form of the text if UTF-16, a chunk will typically refer back to the
   1.720 +  *   original native text storage.  If the native format is something else, chunks
   1.721 +  *   will typically refer to a buffer maintained by the provider that contains
   1.722 +  *   some amount input that has been converted to UTF-16 (UChar) form.
   1.723 +  *
   1.724 +  * @draft ICU 3.4
   1.725 +  */  
   1.726 +struct UTextChunk {
   1.727 +    /** Pointer to contents of text chunk.  UChar format.   */
   1.728 +    const UChar *contents;
   1.729 +
   1.730 +    /**  Index within the contents of the current iteration position. */
   1.731 +    int32_t     offset;  
   1.732 +
   1.733 +    /** Number of UChars in the chunk. */
   1.734 +    int32_t     length;
   1.735 +
   1.736 +    /** (Native) text index corresponding to the start of the chunk. */
   1.737 +    int32_t     nativeStart;
   1.738 +
   1.739 +    /** (Native) text index corresponding to the end of the chunk (contents+length). */
   1.740 +    int32_t     nativeLimit;
   1.741 +
   1.742 +    /** If TRUE, then non-UTF-16 indexes are used in this chunk. */
   1.743 +    UBool       nonUTF16Indexes;
   1.744 +
   1.745 +    /** Unused. */
   1.746 +    UBool       padding1, padding2, padding3;
   1.747 +
   1.748 +    /** Unused. */
   1.749 +    int32_t     padInt1, padInt2;
   1.750 +
   1.751 +    /** Contains sizeof(UTextChunk) and allows the future addition of fields. */
   1.752 +    int32_t     sizeOfStruct;
   1.753 +};
   1.754 +
   1.755 +
   1.756 +/**
   1.757 + * UText provider properties (bit field indexes).
   1.758 + *
   1.759 + * @see UText
   1.760 + * @draft ICU 3.4
   1.761 + */
   1.762 +enum {
   1.763 +    /**
   1.764 +     * The provider works with non-UTF-16 ("native") text indexes.
   1.765 +     * For example, byte indexes into UTF-8 text or UTF-32 indexes into UTF-32 text.
   1.766 +     * @draft ICU 3.4
   1.767 +     */
   1.768 +    UTEXT_PROVIDER_NON_UTF16_INDEXES = 0,
   1.769 +    /**
   1.770 +     * It is potentially time consuming for the provider to determine the length of the text.
   1.771 +     * @draft ICU 3.4
   1.772 +     */
   1.773 +    UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1,
   1.774 +    /**
   1.775 +     * Text chunks remain valid and usable until the text object is modified or
   1.776 +     * deleted, not just until the next time the access() function is called
   1.777 +     * (which is the default).
   1.778 +     * @draft ICU 3.4
   1.779 +     */
   1.780 +    UTEXT_PROVIDER_STABLE_CHUNKS = 2,
   1.781 +    /**
   1.782 +     * The provider supports modifying the text via the replace() and copy()
   1.783 +     * functions.
   1.784 +     * @see Replaceable
   1.785 +     * @draft ICU 3.4
   1.786 +     */
   1.787 +    UTEXT_PROVIDER_WRITABLE = 3,
   1.788 +    /**
   1.789 +     * There is meta data associated with the text.
   1.790 +     * @see Replaceable::hasMetaData()
   1.791 +     * @draft ICU 3.4
   1.792 +     */
   1.793 +    UTEXT_PROVIDER_HAS_META_DATA = 4
   1.794 +};
   1.795 +
   1.796 +/**
   1.797 +  * Function type declaration for UText.clone().
   1.798 +  *
   1.799 +  *  clone a UText.  Much like opening a UText where the source text is itself
   1.800 +  *  another UText.
   1.801 +  *
   1.802 +  *  A deep clone will copy both the UText data structures and the underlying text.
   1.803 +  *  The original and cloned UText will operate completely independently; modifications
   1.804 +  *  made to the text in one will not effect the other.  Text providers are not
   1.805 +  *  required to support deep clones.  The user of clone() must check the status return
   1.806 +  *  and be prepared to handle failures.
   1.807 +  *
   1.808 +  *  A shallow clone replicates only the UText data structures; it does not make
   1.809 +  *  a copy of the underlying text.  Shallow clones can be used as an efficient way to 
   1.810 +  *  have multiple iterators active in a single text string that is not being
   1.811 +  *  modified.
   1.812 +  *
   1.813 +  *  A shallow clone operation must not fail except for truly exceptional conditions such
   1.814 +  *  as memory allocation failures.
   1.815 +  *
   1.816 +  *  A UText and its clone may be safely concurrently accessed by separate threads.
   1.817 +  *  This is true for both shallow and deep clones.
   1.818 +  *  It is the responsibility of the Text Provider to ensure that this thread safety
   1.819 +  *  constraint is met.
   1.820 +
   1.821 +  *
   1.822 +  *  @param dest   A UText struct to be filled in with the result of the clone operation,
   1.823 +  *                or NULL if the clone function should heap-allocate a new UText struct.
   1.824 +  *  @param src    The UText to be cloned.
   1.825 +  *  @param deep   TRUE to request a deep clone, FALSE for a shallow clone.
   1.826 +  *  @param status Errors are returned here.  For deep clones, U_UNSUPPORTED_ERROR
   1.827 +  *                should be returned if the text provider is unable to clone the
   1.828 +  *                original text.
   1.829 +  *  @return       The newly created clone, or NULL if the clone operation failed.
   1.830 +  *
   1.831 +  * @draft ICU 3.4
   1.832 +  */
   1.833 +typedef UText * U_CALLCONV
   1.834 +UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
   1.835 +
   1.836 +
   1.837 +/**
   1.838 + * Function type declaration for UText.nativeLength().
   1.839 + *
   1.840 + * @param ut the UText to get the length of.
   1.841 + * @return the length, in the native units of the original text string.
   1.842 + * @see UText
   1.843 + * @draft ICU 3.4
   1.844 + */
   1.845 +typedef int32_t U_CALLCONV
   1.846 +UTextNativeLength(UText *ut);
   1.847 +
   1.848 +/**
   1.849 + * Function type declaration for UText.access().  Get the description of the text chunk
   1.850 + *  containing the text at a requested native index.  The UText's iteration
   1.851 + *  position will be left at the requested index.  If the index is out
   1.852 + *  of bounds, the iteration position will be left at the start or end
   1.853 + *  of the string, as appropriate.
   1.854 + *
   1.855 + *  Chunks must begin and end on code point boundaries.  A single code point
   1.856 + *  comprised of multiple storage units must never span a chunk boundary.
   1.857 + *
   1.858 + *
   1.859 + * @param ut          the UText being accessed.
   1.860 + * @param nativeIndex Requested index of the text to be accessed.
   1.861 + * @param forward     If TRUE, then the returned chunk must contain text
   1.862 + *                    starting from the index, so that start<=index<limit.
   1.863 + *                    If FALSE, then the returned chunk must contain text
   1.864 + *                    before the index, so that start<index<=limit.
   1.865 + * @return            True if the requested index could be accessed.  The chunk
   1.866 + *                    will contain the requested text.
   1.867 + *                    False value if a chunk cannot be accessed
   1.868 + *                    (the requested index is out of bounds).
   1.869 + *
   1.870 + * @see UText
   1.871 + * @draft ICU 3.4
   1.872 + */
   1.873 +typedef UBool U_CALLCONV
   1.874 +UTextAccess(UText *ut, int32_t nativeIndex, UBool forward, UTextChunk *chunk);
   1.875 +
   1.876 +/**
   1.877 + * Function type declaration for UText.extract().
   1.878 + *
   1.879 + * Extract text from a UText into a UChar buffer.  The range of text to be extracted
   1.880 + * is specified in the native indices of the UText provider.  These may not necessarily
   1.881 + * be UTF-16 indices.
   1.882 + * <p/>
   1.883 + * The size (number of 16 bit UChars) in the data to be extracted is returned.  The
   1.884 + * full amount is returned, even when the specified buffer size is smaller.
   1.885 + *
   1.886 + * The extracted string will (if you are a user) / must (if you are a text provider)
   1.887 + * be NUL-terminated if there is sufficient space in the destination buffer.
   1.888 + *
   1.889 + * @param  ut            the UText from which to extract data.
   1.890 + * @param  nativeStart   the native index of the first characer to extract.
   1.891 + * @param  nativeLimit   the native string index of the position following the last
   1.892 + *                       character to extract.
   1.893 + * @param  dest          the UChar (UTF-16) buffer into which the extracted text is placed
   1.894 + * @param  destCapacity  The size, in UChars, of the destination buffer.  May be zero
   1.895 + *                       for precomputing the required size.
   1.896 + * @param  status        receives any error status.
   1.897 + *                       If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for
   1.898 + *                       preflighting.
   1.899 + * @return Number of UChars in the data.  Does not include a trailing NUL.
   1.900 + *
   1.901 + * @draft ICU 3.4
   1.902 + */
   1.903 +typedef int32_t U_CALLCONV
   1.904 +UTextExtract(UText *ut,
   1.905 +             int32_t nativeStart, int32_t nativeLimit,
   1.906 +             UChar *dest, int32_t destCapacity,
   1.907 +             UErrorCode *status);
   1.908 +
   1.909 +/**
   1.910 + * Function type declaration for UText.replace().
   1.911 + *
   1.912 + * Replace a range of the original text with a replacement text.
   1.913 + *
   1.914 + * Leaves the current iteration position at the position following the
   1.915 + *  newly inserted replacement text.
   1.916 + *
   1.917 + * This function need only be implemented on UText types that support writing.
   1.918 + *
   1.919 + * When using this function, there should be only a single UText opened onto the
   1.920 + * underlying native text string.  The function is responsible for updating the
   1.921 + * text chunk within the UText to reflect the updated iteration position,
   1.922 + * taking into account any changes to the underlying string's structure caused
   1.923 + * by the replace operation.
   1.924 + *
   1.925 + * @param ut               the UText representing the text to be operated on.
   1.926 + * @param nativeStart      the index of the start of the region to be replaced
   1.927 + * @param nativeLimit      the index of the character following the region to be replaced.
   1.928 + * @param replacementText  pointer to the replacement text
   1.929 + * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated.
   1.930 + * @param status           receives any error status.  Possible errors include
   1.931 + *                         U_NO_WRITE_PERMISSION
   1.932 + *
   1.933 + * @return The signed number of (native) storage units by which
   1.934 + *         the length of the text expanded or contracted.
   1.935 + *
   1.936 + * @draft ICU 3.4
   1.937 + */
   1.938 +typedef int32_t U_CALLCONV
   1.939 +UTextReplace(UText *ut,
   1.940 +             int32_t nativeStart, int32_t nativeLimit,
   1.941 +             const UChar *replacementText, int32_t replacmentLength,
   1.942 +             UErrorCode *status);
   1.943 +
   1.944 +/**
   1.945 + * Function type declaration for UText.copy().
   1.946 + *
   1.947 + * Copy or move a substring from one position to another within the text,
   1.948 + * while retaining any metadata associated with the text.
   1.949 + * This function is used to duplicate or reorder substrings.
   1.950 + * The destination index must not overlap the source range.
   1.951 + *
   1.952 + * The text to be copied or moved is inserted at destIndex;
   1.953 + * it does not replace or overwrite any existing text.
   1.954 + *
   1.955 + * This function need only be implemented for UText types that support writing.
   1.956 + *
   1.957 + * When using this function, there should be only a single UText opened onto the
   1.958 + * underlying native text string.  The function is responsible for updating the
   1.959 + * text chunk within the UText to reflect the updated iteration position,
   1.960 + * taking into account any changes to the underlying string's structure caused
   1.961 + * by the replace operation.
   1.962 + *
   1.963 + * @param ut           The UText representing the text to be operated on.
   1.964 + * @param nativeStart  The index of the start of the region to be copied or moved
   1.965 + * @param nativeLimit  The index of the character following the region to be replaced.
   1.966 + * @param nativeDest   The destination index to which the source substring is copied or moved.
   1.967 + * @param move         If TRUE, then the substring is moved, not copied/duplicated.
   1.968 + * @param status       receives any error status.  Possible errors include U_NO_WRITE_PERMISSION
   1.969 + *
   1.970 + * @draft ICU 3.4
   1.971 + */
   1.972 +typedef void U_CALLCONV
   1.973 +UTextCopy(UText *ut,
   1.974 +          int32_t nativeStart, int32_t nativeLimit,
   1.975 +          int32_t nativeDest,
   1.976 +          UBool move,
   1.977 +          UErrorCode *status);
   1.978 +
   1.979 +/**
   1.980 + * Function type declaration for UText.mapOffsetToNative().
   1.981 + * Map from a UChar offset within the current text chunk within the UText to
   1.982 + *  the corresponding native index in the original source text.
   1.983 + *
   1.984 + * This is required only for text providers that do not use native UTF-16 indexes.
   1.985 + *
   1.986 + * TODO:  specify behavior with out-of-bounds offset?  Shouldn't ever occur.
   1.987 + *
   1.988 + * @param ut     the UText.
   1.989 + * @param offset UTF-16 offset within text chunk 
   1.990 + *               0<=offset<=chunk->length.
   1.991 + * @return Absolute (native) index corresponding to the specified chunk offset.
   1.992 + *         The returned native index should always be to a code point boundary.
   1.993 + *
   1.994 + * @draft ICU 3.4
   1.995 + */
   1.996 +typedef int32_t U_CALLCONV
   1.997 +UTextMapOffsetToNative(UText *ut, int32_t offset);
   1.998 +
   1.999 +/**
  1.1000 + * Function type declaration for UText.mapIndexToUTF16().
  1.1001 + * Map from a native index to a UChar offset within a text chunk
  1.1002 + *
  1.1003 + * This function is required only for text providers that do not use native UTF-16 indexes.
  1.1004 + *
  1.1005 + * @param ut          The UText containing the text chunk.
  1.1006 + * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit.
  1.1007 + * @return            Chunk-relative UTF-16 offset corresponding to the specified native
  1.1008 + *                    index.
  1.1009 + *
  1.1010 + * TODO:  specify behavior with out-of-bounds index?  Shouldn't ever occur.
  1.1011 + * @draft ICU 3.4
  1.1012 + */
  1.1013 +typedef int32_t U_CALLCONV
  1.1014 +UTextMapNativeIndexToUTF16(UText *ut, int32_t nativeIndex);
  1.1015 +
  1.1016 +
  1.1017 +/**
  1.1018 + * Function type declaration for UText.utextClose().
  1.1019 + *
  1.1020 + * A Text Provider close function is only required for provider types that make
  1.1021 + *  allocations in their open function (or other functions) that must be 
  1.1022 + *  cleaned when the UText is closed.
  1.1023 + *
  1.1024 + * The allocation of the UText struct itself and any "extra" storage
  1.1025 + * associated with the UText is handled by the common UText implementation
  1.1026 + * and does not require provider specific cleanup in a close function.
  1.1027 + *
  1.1028 + * Most UText provider implementations do not need to implement this function.
  1.1029 + *
  1.1030 + * @param ut A UText object to be closed.
  1.1031 + *
  1.1032 + * @draft ICU 3.4
  1.1033 + */
  1.1034 +typedef void U_CALLCONV
  1.1035 +UTextClose(UText *ut);
  1.1036 +
  1.1037 +
  1.1038 +/**
  1.1039 +  *   UText struct.  Provides the interface between the generic UText access code
  1.1040 +  *                  and the UText provider code that works on specific kinds of
  1.1041 +  *                  text  (UTF-8, noncontiguous UTF-16, whatever.)
  1.1042 +  *
  1.1043 +  *                  Applications that are using predefined types of text providers
  1.1044 +  *                  to pass text data to ICU services will have no need to view the
  1.1045 +  *                  internals of the UText structs that they open.
  1.1046 +  *
  1.1047 +  * @draft ICU 3.4
  1.1048 +  */
  1.1049 +struct UText {
  1.1050 +    /**
  1.1051 +     * (protected) Pointer to string or wrapped object or similar.
  1.1052 +     * Not used by caller.
  1.1053 +     * @draft ICU 3.4
  1.1054 +     */
  1.1055 +    const void *context;
  1.1056 +
  1.1057 +    /**
  1.1058 +     * (protected) Pointer fields available for use by the text provider.
  1.1059 +     * Not used by UText common code.
  1.1060 +     * @draft ICU 3.4
  1.1061 +     */
  1.1062 +    const void *p, *q, *r;
  1.1063 +
  1.1064 +    /**
  1.1065 +     *  (protected)  Pointer to additional space requested by the
  1.1066 +     *               text provider during the utext_open operation.
  1.1067 +     * @draft ICU 3.4
  1.1068 +     */
  1.1069 +    void          *pExtra;
  1.1070 +
  1.1071 +    /**
  1.1072 +     *   (protected)  Size in bytes of the extra space (pExtra).
  1.1073 +     *  @draft ICU 3.4
  1.1074 +     */
  1.1075 +    int32_t        extraSize;
  1.1076 +
  1.1077 +    /**
  1.1078 +     *     (private)  Flags for managing the allocation and freeing of
  1.1079 +     *                memory associated with this UText.
  1.1080 +     * @internal
  1.1081 +     */
  1.1082 +    int32_t        flags;
  1.1083 +
  1.1084 +    /**
  1.1085 +     *     (private)  Magic.  Try to detect when we are handed junk.
  1.1086 +     *                        utext_openXYZ() functions take an initialized,
  1.1087 +     *                        but not necessarily open, UText struct as an,
  1.1088 +     *                        optional fill-in parameter.  This magic field
  1.1089 +     *                        is used to check for that initialization.
  1.1090 +     *                        Text provider close functions must NOT clear
  1.1091 +     *                        the magic field because that would prevent
  1.1092 +     *                        reuse of the UText struct.
  1.1093 +     * @internal
  1.1094 +     */
  1.1095 +    uint32_t       magic;
  1.1096 +
  1.1097 +
  1.1098 +    /**
  1.1099 +     * (public) sizeOfStruct=sizeof(UText)
  1.1100 +     * Allows possible backward compatible extension.
  1.1101 +     *
  1.1102 +     * @draft ICU 3.4
  1.1103 +     */
  1.1104 +    int32_t         sizeOfStruct;
  1.1105 +
  1.1106 +    /**
  1.1107 +      * (protected) Integer fields for use by text provider.
  1.1108 +      * Not used by caller.
  1.1109 +      * @draft ICU 3.4
  1.1110 +      */
  1.1111 +    int32_t         a, b, c;
  1.1112 +
  1.1113 +
  1.1114 +    /**
  1.1115 +      *  Text provider properties.  This set of flags is maintainted by the
  1.1116 +      *                             text provider implementation.
  1.1117 +      *  @draft ICU 3.4
  1.1118 +      */
  1.1119 +    int32_t providerProperties;     
  1.1120 +
  1.1121 +
  1.1122 +
  1.1123 +    /**  descriptor for the text chunk that includes or is adjacent to
  1.1124 +      *  the current iteration position.
  1.1125 +      *   @draft ICU 3.4
  1.1126 +      */
  1.1127 +    UTextChunk      chunk;   
  1.1128 +
  1.1129 +
  1.1130 +    /**
  1.1131 +     * (public) Function pointer for UTextClone
  1.1132 +     *
  1.1133 +     * @see UTextClone
  1.1134 +     * @draft ICU 3.4
  1.1135 +     */
  1.1136 +    UTextClone *clone;
  1.1137 +
  1.1138 +    /**
  1.1139 +     * (public) function pointer for UTextLength
  1.1140 +     * May be expensive to compute!
  1.1141 +     *
  1.1142 +     * @see UTextLength
  1.1143 +     * @draft ICU 3.4
  1.1144 +     */
  1.1145 +    UTextNativeLength *nativeLength;
  1.1146 +
  1.1147 +    /**
  1.1148 +     * (public) Function pointer for UTextAccess.
  1.1149 +     *
  1.1150 +     * @see UTextAccess
  1.1151 +     * @draft ICU 3.4
  1.1152 +     */
  1.1153 +    UTextAccess *access;
  1.1154 +
  1.1155 +    /**
  1.1156 +     * (public) Function pointer for UTextExtract.
  1.1157 +     *
  1.1158 +     * @see UTextExtract
  1.1159 +     * @draft ICU 3.4
  1.1160 +     */
  1.1161 +    UTextExtract *extract;
  1.1162 +
  1.1163 +    /**
  1.1164 +     * (public) Function pointer for UTextReplace.
  1.1165 +     *
  1.1166 +     * @see UTextReplace
  1.1167 +     * @draft ICU 3.4
  1.1168 +     */
  1.1169 +    UTextReplace *replace;
  1.1170 +
  1.1171 +    /**
  1.1172 +     * (public) Function pointer for UTextCopy.
  1.1173 +     *
  1.1174 +     * @see UTextCopy
  1.1175 +     * @draft ICU 3.4
  1.1176 +     */
  1.1177 +    UTextCopy *copy;
  1.1178 +
  1.1179 +    /**
  1.1180 +     * (public) Function pointer for UTextMapOffsetToNative.
  1.1181 +     *
  1.1182 +     * @see UTextMapOffsetToNative
  1.1183 +     * @draft ICU 3.4
  1.1184 +     */
  1.1185 +    UTextMapOffsetToNative *mapOffsetToNative;
  1.1186 +
  1.1187 +    /**
  1.1188 +     * (public) Function pointer for UTextMapNativeIndexToUTF16.
  1.1189 +     *
  1.1190 +     * @see UTextMapNativeIndexToUTF16
  1.1191 +     * @draft ICU 3.4
  1.1192 +     */
  1.1193 +    UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16;
  1.1194 +
  1.1195 +    /**
  1.1196 +     * (public) Function pointer for UTextClose.
  1.1197 +      *
  1.1198 +      * @see UTextClose
  1.1199 +      * @draft ICU 3.4
  1.1200 +      */
  1.1201 +    UTextClose  *close;
  1.1202 +};
  1.1203 +
  1.1204 +
  1.1205 +/**
  1.1206 + *  Common function for use by Text Provider implementations to allocate and/or initialize
  1.1207 + *  a new UText struct.  To be called in the implementation of utext_open() functions.
  1.1208 + *  If the supplied UText parameter is null, a new UText struct will be allocated on the heap.
  1.1209 + *  If the supplied UText is already open, the provider's close function will be called
  1.1210 + *  so that the struct can be reused by the open that is in progress.
  1.1211 + *
  1.1212 + * @param ut   pointer to a UText struct to be re-used, or null if a new UText
  1.1213 + *             should be allocated.
  1.1214 + * @param extraSpace The amount of additional space to be allocated as part
  1.1215 + *             of this UText, for use by types of providers that require
  1.1216 + *             additional storage.
  1.1217 + * @param status Errors are returned here.
  1.1218 + * @return pointer to the UText, allocated if necessary, with extra space set up if requested.
  1.1219 + * @draft ICU 3.4
  1.1220 + */
  1.1221 +U_DRAFT UText * U_EXPORT2
  1.1222 +utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
  1.1223 +
  1.1224 +/**
  1.1225 +  * @internal
  1.1226 +  */
  1.1227 +enum {
  1.1228 +    UTEXT_MAGIC = 0x345ad82c
  1.1229 +};
  1.1230 +
  1.1231 +
  1.1232 +/**
  1.1233 + *  Initializer for a UTextChunk
  1.1234 + *  @internal
  1.1235 + */
  1.1236 +#define UTEXT_CHUNK_INIT   {                               \
  1.1237 +                  NULL,                /* contents      */ \
  1.1238 +                  0,                   /* offset        */ \
  1.1239 +                  0,                   /* length        */ \
  1.1240 +                  0,                   /* start         */ \
  1.1241 +                  0,                   /* limit         */ \
  1.1242 +                  FALSE,               /* nonUTF16idx   */ \
  1.1243 +                  FALSE, FALSE, FALSE, /* padding1,2,3  */ \
  1.1244 +                  0, 0,                /* padInt1, 2    */ \
  1.1245 +                  sizeof(UTextChunk)                       \
  1.1246 +}               
  1.1247 +
  1.1248 +
  1.1249 +
  1.1250 +/**
  1.1251 + * Initializer for the first part of a UText struct, the part that is
  1.1252 + *  in common for all types of text providers.
  1.1253 + *
  1.1254 + * @internal
  1.1255 + */
  1.1256 +#define UTEXT_INITIALIZER_HEAD  \
  1.1257 +                  NULL,                 /* context       */ \
  1.1258 +                  NULL, NULL, NULL,     /* p, q, r       */ \
  1.1259 +                  NULL,                 /* pExtra        */ \
  1.1260 +                  0,                    /* extraSize     */ \
  1.1261 +                  0,                    /* flags         */ \
  1.1262 +                  UTEXT_MAGIC,          /* magic         */ \
  1.1263 +                  sizeof(UText),        /* sizeOfStruct  */ \
  1.1264 +                  0, 0, 0,              /* a, b, c       */ \
  1.1265 +                  0,                    /* providerProps */ \
  1.1266 +                  UTEXT_CHUNK_INIT      /* UTextChunk    */
  1.1267 +
  1.1268 +
  1.1269 +
  1.1270 +/**
  1.1271 + * initializer to be used with local (stack) instances of a UText
  1.1272 + *  struct.  UText structs must be initialized before passing
  1.1273 + *  them to one of the utext_open functions.
  1.1274 + *
  1.1275 + * @draft ICU 3.4
  1.1276 + */
  1.1277 +#define UTEXT_INITIALIZER {                                \
  1.1278 +                  UTEXT_INITIALIZER_HEAD,                  \
  1.1279 +                  NULL,                 /* clone ()     */ \
  1.1280 +                  NULL,                 /* length ()    */ \
  1.1281 +                  NULL,                 /* access ()    */ \
  1.1282 +                  NULL,                 /* extract ()   */ \
  1.1283 +                  NULL,                 /* replace ()   */ \
  1.1284 +                  NULL,                 /* copy ()      */ \
  1.1285 +                  NULL, NULL,           /* map * 2 ()   */ \
  1.1286 +                  NULL                  /* close ()     */ \
  1.1287 +}
  1.1288 +
  1.1289 +
  1.1290 +U_CDECL_END
  1.1291 +
  1.1292 +
  1.1293 +
  1.1294 +#endif /* U_HIDE_DRAFT_API */
  1.1295 +
  1.1296 +#endif