os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/ustring.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2 **********************************************************************
     3 *   Copyright (C) 1998-2005, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 **********************************************************************
     6 *
     7 * File ustring.h
     8 *
     9 * Modification History:
    10 *
    11 *   Date        Name        Description
    12 *   12/07/98    bertrand    Creation.
    13 ******************************************************************************
    14 */
    15 
    16 #ifndef USTRING_H
    17 #define USTRING_H
    18 
    19 #include "unicode/utypes.h"
    20 #include "unicode/putil.h"
    21 #include "unicode/uiter.h"
    22 
    23 /** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
    24 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
    25 #   define UBRK_TYPEDEF_UBREAK_ITERATOR
    26     typedef void UBreakIterator;
    27 #endif
    28 
    29 /**
    30  * \file
    31  * \brief C API: Unicode string handling functions
    32  *
    33  * These C API functions provide general Unicode string handling.
    34  *
    35  * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
    36  * functions. (For example, they do not check for bad arguments like NULL string pointers.)
    37  * In some cases, only the thread-safe variant of such a function is implemented here
    38  * (see u_strtok_r()).
    39  *
    40  * Other functions provide more Unicode-specific functionality like locale-specific
    41  * upper/lower-casing and string comparison in code point order.
    42  *
    43  * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
    44  * UTF-16 encodes each Unicode code point with either one or two UChar code units.
    45  * (This is the default form of Unicode, and a forward-compatible extension of the original,
    46  * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
    47  * in 1996.)
    48  *
    49  * Some APIs accept a 32-bit UChar32 value for a single code point.
    50  *
    51  * ICU also handles 16-bit Unicode text with unpaired surrogates.
    52  * Such text is not well-formed UTF-16.
    53  * Code-point-related functions treat unpaired surrogates as surrogate code points,
    54  * i.e., as separate units.
    55  *
    56  * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
    57  * it is much more efficient even for random access because the code unit values
    58  * for single-unit characters vs. lead units vs. trail units are completely disjoint.
    59  * This means that it is easy to determine character (code point) boundaries from
    60  * random offsets in the string.
    61  *
    62  * Unicode (UTF-16) string processing is optimized for the single-unit case.
    63  * Although it is important to support supplementary characters
    64  * (which use pairs of lead/trail code units called "surrogates"),
    65  * their occurrence is rare. Almost all characters in modern use require only
    66  * a single UChar code unit (i.e., their code point values are <=0xffff).
    67  *
    68  * For more details see the User Guide Strings chapter (http://icu.sourceforge.net/userguide/strings.html).
    69  * For a discussion of the handling of unpaired surrogates see also
    70  * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
    71  */
    72 
    73 /**
    74  * Determine the length of an array of UChar.
    75  *
    76  * @param s The array of UChars, NULL (U+0000) terminated.
    77  * @return The number of UChars in <code>chars</code>, minus the terminator.
    78  * @stable ICU 2.0
    79  */
    80 U_STABLE int32_t U_EXPORT2
    81 u_strlen(const UChar *s);
    82 
    83 /**
    84  * Count Unicode code points in the length UChar code units of the string.
    85  * A code point may occupy either one or two UChar code units.
    86  * Counting code points involves reading all code units.
    87  *
    88  * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
    89  *
    90  * @param s The input string.
    91  * @param length The number of UChar code units to be checked, or -1 to count all
    92  *               code points before the first NUL (U+0000).
    93  * @return The number of code points in the specified code units.
    94  * @stable ICU 2.0
    95  */
    96 U_STABLE int32_t U_EXPORT2
    97 u_countChar32(const UChar *s, int32_t length);
    98 
    99 /**
   100  * Check if the string contains more Unicode code points than a certain number.
   101  * This is more efficient than counting all code points in the entire string
   102  * and comparing that number with a threshold.
   103  * This function may not need to scan the string at all if the length is known
   104  * (not -1 for NUL-termination) and falls within a certain range, and
   105  * never needs to count more than 'number+1' code points.
   106  * Logically equivalent to (u_countChar32(s, length)>number).
   107  * A Unicode code point may occupy either one or two UChar code units.
   108  *
   109  * @param s The input string.
   110  * @param length The length of the string, or -1 if it is NUL-terminated.
   111  * @param number The number of code points in the string is compared against
   112  *               the 'number' parameter.
   113  * @return Boolean value for whether the string contains more Unicode code points
   114  *         than 'number'. Same as (u_countChar32(s, length)>number).
   115  * @stable ICU 2.4
   116  */
   117 U_STABLE UBool U_EXPORT2
   118 u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
   119 
   120 /**
   121  * Concatenate two ustrings.  Appends a copy of <code>src</code>,
   122  * including the null terminator, to <code>dst</code>. The initial copied
   123  * character from <code>src</code> overwrites the null terminator in <code>dst</code>.
   124  *
   125  * @param dst The destination string.
   126  * @param src The source string.
   127  * @return A pointer to <code>dst</code>.
   128  * @stable ICU 2.0
   129  */
   130 U_STABLE UChar* U_EXPORT2
   131 u_strcat(UChar     *dst, 
   132     const UChar     *src);
   133 
   134 /**
   135  * Concatenate two ustrings.  
   136  * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.
   137  * Adds a terminating NUL.
   138  * If src is too long, then only <code>n-1</code> characters will be copied
   139  * before the terminating NUL.
   140  * If <code>n&lt;=0</code> then dst is not modified.
   141  *
   142  * @param dst The destination string.
   143  * @param src The source string.
   144  * @param n The maximum number of characters to compare.
   145  * @return A pointer to <code>dst</code>.
   146  * @stable ICU 2.0
   147  */
   148 U_STABLE UChar* U_EXPORT2
   149 u_strncat(UChar     *dst, 
   150      const UChar     *src, 
   151      int32_t     n);
   152 
   153 /**
   154  * Find the first occurrence of a substring in a string.
   155  * The substring is found at code point boundaries.
   156  * That means that if the substring begins with
   157  * a trail surrogate or ends with a lead surrogate,
   158  * then it is found only if these surrogates stand alone in the text.
   159  * Otherwise, the substring edge units would be matched against
   160  * halves of surrogate pairs.
   161  *
   162  * @param s The string to search (NUL-terminated).
   163  * @param substring The substring to find (NUL-terminated).
   164  * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
   165  *         or <code>s</code> itself if the <code>substring</code> is empty,
   166  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
   167  * @stable ICU 2.0
   168  *
   169  * @see u_strrstr
   170  * @see u_strFindFirst
   171  * @see u_strFindLast
   172  */
   173 U_STABLE UChar * U_EXPORT2
   174 u_strstr(const UChar *s, const UChar *substring);
   175 
   176 /**
   177  * Find the first occurrence of a substring in a string.
   178  * The substring is found at code point boundaries.
   179  * That means that if the substring begins with
   180  * a trail surrogate or ends with a lead surrogate,
   181  * then it is found only if these surrogates stand alone in the text.
   182  * Otherwise, the substring edge units would be matched against
   183  * halves of surrogate pairs.
   184  *
   185  * @param s The string to search.
   186  * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
   187  * @param substring The substring to find (NUL-terminated).
   188  * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
   189  * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
   190  *         or <code>s</code> itself if the <code>substring</code> is empty,
   191  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
   192  * @stable ICU 2.4
   193  *
   194  * @see u_strstr
   195  * @see u_strFindLast
   196  */
   197 U_STABLE UChar * U_EXPORT2
   198 u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
   199 
   200 /**
   201  * Find the first occurrence of a BMP code point in a string.
   202  * A surrogate code point is found only if its match in the text is not
   203  * part of a surrogate pair.
   204  * A NUL character is found at the string terminator.
   205  *
   206  * @param s The string to search (NUL-terminated).
   207  * @param c The BMP code point to find.
   208  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
   209  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
   210  * @stable ICU 2.0
   211  *
   212  * @see u_strchr32
   213  * @see u_memchr
   214  * @see u_strstr
   215  * @see u_strFindFirst
   216  */
   217 U_STABLE UChar * U_EXPORT2
   218 u_strchr(const UChar *s, UChar c);
   219 
   220 /**
   221  * Find the first occurrence of a code point in a string.
   222  * A surrogate code point is found only if its match in the text is not
   223  * part of a surrogate pair.
   224  * A NUL character is found at the string terminator.
   225  *
   226  * @param s The string to search (NUL-terminated).
   227  * @param c The code point to find.
   228  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
   229  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
   230  * @stable ICU 2.0
   231  *
   232  * @see u_strchr
   233  * @see u_memchr32
   234  * @see u_strstr
   235  * @see u_strFindFirst
   236  */
   237 U_STABLE UChar * U_EXPORT2
   238 u_strchr32(const UChar *s, UChar32 c);
   239 
   240 /**
   241  * Find the last occurrence of a substring in a string.
   242  * The substring is found at code point boundaries.
   243  * That means that if the substring begins with
   244  * a trail surrogate or ends with a lead surrogate,
   245  * then it is found only if these surrogates stand alone in the text.
   246  * Otherwise, the substring edge units would be matched against
   247  * halves of surrogate pairs.
   248  *
   249  * @param s The string to search (NUL-terminated).
   250  * @param substring The substring to find (NUL-terminated).
   251  * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
   252  *         or <code>s</code> itself if the <code>substring</code> is empty,
   253  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
   254  * @stable ICU 2.4
   255  *
   256  * @see u_strstr
   257  * @see u_strFindFirst
   258  * @see u_strFindLast
   259  */
   260 U_STABLE UChar * U_EXPORT2
   261 u_strrstr(const UChar *s, const UChar *substring);
   262 
   263 /**
   264  * Find the last occurrence of a substring in a string.
   265  * The substring is found at code point boundaries.
   266  * That means that if the substring begins with
   267  * a trail surrogate or ends with a lead surrogate,
   268  * then it is found only if these surrogates stand alone in the text.
   269  * Otherwise, the substring edge units would be matched against
   270  * halves of surrogate pairs.
   271  *
   272  * @param s The string to search.
   273  * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
   274  * @param substring The substring to find (NUL-terminated).
   275  * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
   276  * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
   277  *         or <code>s</code> itself if the <code>substring</code> is empty,
   278  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
   279  * @stable ICU 2.4
   280  *
   281  * @see u_strstr
   282  * @see u_strFindLast
   283  */
   284 U_STABLE UChar * U_EXPORT2
   285 u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
   286 
   287 /**
   288  * Find the last occurrence of a BMP code point in a string.
   289  * A surrogate code point is found only if its match in the text is not
   290  * part of a surrogate pair.
   291  * A NUL character is found at the string terminator.
   292  *
   293  * @param s The string to search (NUL-terminated).
   294  * @param c The BMP code point to find.
   295  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
   296  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
   297  * @stable ICU 2.4
   298  *
   299  * @see u_strrchr32
   300  * @see u_memrchr
   301  * @see u_strrstr
   302  * @see u_strFindLast
   303  */
   304 U_STABLE UChar * U_EXPORT2
   305 u_strrchr(const UChar *s, UChar c);
   306 
   307 /**
   308  * Find the last occurrence of a code point in a string.
   309  * A surrogate code point is found only if its match in the text is not
   310  * part of a surrogate pair.
   311  * A NUL character is found at the string terminator.
   312  *
   313  * @param s The string to search (NUL-terminated).
   314  * @param c The code point to find.
   315  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
   316  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
   317  * @stable ICU 2.4
   318  *
   319  * @see u_strrchr
   320  * @see u_memchr32
   321  * @see u_strrstr
   322  * @see u_strFindLast
   323  */
   324 U_STABLE UChar * U_EXPORT2
   325 u_strrchr32(const UChar *s, UChar32 c);
   326 
   327 /**
   328  * Locates the first occurrence in the string <code>string</code> of any of the characters
   329  * in the string <code>matchSet</code>.
   330  * Works just like C's strpbrk but with Unicode.
   331  *
   332  * @param string The string in which to search, NUL-terminated.
   333  * @param matchSet A NUL-terminated string defining a set of code points
   334  *                 for which to search in the text string.
   335  * @return A pointer to the  character in <code>string</code> that matches one of the
   336  *         characters in <code>matchSet</code>, or NULL if no such character is found.
   337  * @stable ICU 2.0
   338  */
   339 U_STABLE UChar * U_EXPORT2
   340 u_strpbrk(const UChar *string, const UChar *matchSet);
   341 
   342 /**
   343  * Returns the number of consecutive characters in <code>string</code>,
   344  * beginning with the first, that do not occur somewhere in <code>matchSet</code>.
   345  * Works just like C's strcspn but with Unicode.
   346  *
   347  * @param string The string in which to search, NUL-terminated.
   348  * @param matchSet A NUL-terminated string defining a set of code points
   349  *                 for which to search in the text string.
   350  * @return The number of initial characters in <code>string</code> that do not
   351  *         occur in <code>matchSet</code>.
   352  * @see u_strspn
   353  * @stable ICU 2.0
   354  */
   355 U_STABLE int32_t U_EXPORT2
   356 u_strcspn(const UChar *string, const UChar *matchSet);
   357 
   358 /**
   359  * Returns the number of consecutive characters in <code>string</code>,
   360  * beginning with the first, that occur somewhere in <code>matchSet</code>.
   361  * Works just like C's strspn but with Unicode.
   362  *
   363  * @param string The string in which to search, NUL-terminated.
   364  * @param matchSet A NUL-terminated string defining a set of code points
   365  *                 for which to search in the text string.
   366  * @return The number of initial characters in <code>string</code> that do
   367  *         occur in <code>matchSet</code>.
   368  * @see u_strcspn
   369  * @stable ICU 2.0
   370  */
   371 U_STABLE int32_t U_EXPORT2
   372 u_strspn(const UChar *string, const UChar *matchSet);
   373 
   374 /**
   375  * The string tokenizer API allows an application to break a string into
   376  * tokens. Unlike strtok(), the saveState (the current pointer within the
   377  * original string) is maintained in saveState. In the first call, the
   378  * argument src is a pointer to the string. In subsequent calls to
   379  * return successive tokens of that string, src must be specified as
   380  * NULL. The value saveState is set by this function to maintain the
   381  * function's position within the string, and on each subsequent call
   382  * you must give this argument the same variable. This function does
   383  * handle surrogate pairs. This function is similar to the strtok_r()
   384  * the POSIX Threads Extension (1003.1c-1995) version.
   385  *
   386  * @param src String containing token(s). This string will be modified.
   387  *            After the first call to u_strtok_r(), this argument must
   388  *            be NULL to get to the next token.
   389  * @param delim Set of delimiter characters (Unicode code points).
   390  * @param saveState The current pointer within the original string,
   391  *              which is set by this function. The saveState
   392  *              parameter should the address of a local variable of type
   393  *              UChar *. (i.e. defined "Uhar *myLocalSaveState" and use
   394  *              &myLocalSaveState for this parameter).
   395  * @return A pointer to the next token found in src, or NULL
   396  *         when there are no more tokens.
   397  * @stable ICU 2.0
   398  */
   399 U_STABLE UChar * U_EXPORT2
   400 u_strtok_r(UChar    *src, 
   401      const UChar    *delim,
   402            UChar   **saveState);
   403 
   404 /**
   405  * Compare two Unicode strings for bitwise equality (code unit order).
   406  *
   407  * @param s1 A string to compare.
   408  * @param s2 A string to compare.
   409  * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
   410  * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive
   411  * value if <code>s1</code> is bitwise greater than <code>s2</code>.
   412  * @stable ICU 2.0
   413  */
   414 U_STABLE int32_t  U_EXPORT2
   415 u_strcmp(const UChar     *s1, 
   416          const UChar     *s2);
   417 
   418 /**
   419  * Compare two Unicode strings in code point order.
   420  * See u_strCompare for details.
   421  *
   422  * @param s1 A string to compare.
   423  * @param s2 A string to compare.
   424  * @return a negative/zero/positive integer corresponding to whether
   425  * the first string is less than/equal to/greater than the second one
   426  * in code point order
   427  * @stable ICU 2.0
   428  */
   429 U_STABLE int32_t U_EXPORT2
   430 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
   431 
   432 /**
   433  * Compare two Unicode strings (binary order).
   434  *
   435  * The comparison can be done in code unit order or in code point order.
   436  * They differ only in UTF-16 when
   437  * comparing supplementary code points (U+10000..U+10ffff)
   438  * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
   439  * In code unit order, high BMP code points sort after supplementary code points
   440  * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
   441  *
   442  * This functions works with strings of different explicitly specified lengths
   443  * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
   444  * NUL-terminated strings are possible with length arguments of -1.
   445  *
   446  * @param s1 First source string.
   447  * @param length1 Length of first source string, or -1 if NUL-terminated.
   448  *
   449  * @param s2 Second source string.
   450  * @param length2 Length of second source string, or -1 if NUL-terminated.
   451  *
   452  * @param codePointOrder Choose between code unit order (FALSE)
   453  *                       and code point order (TRUE).
   454  *
   455  * @return <0 or 0 or >0 as usual for string comparisons
   456  *
   457  * @stable ICU 2.2
   458  */
   459 U_STABLE int32_t U_EXPORT2
   460 u_strCompare(const UChar *s1, int32_t length1,
   461              const UChar *s2, int32_t length2,
   462              UBool codePointOrder);
   463 
   464 /**
   465  * Compare two Unicode strings (binary order)
   466  * as presented by UCharIterator objects.
   467  * Works otherwise just like u_strCompare().
   468  *
   469  * Both iterators are reset to their start positions.
   470  * When the function returns, it is undefined where the iterators
   471  * have stopped.
   472  *
   473  * @param iter1 First source string iterator.
   474  * @param iter2 Second source string iterator.
   475  * @param codePointOrder Choose between code unit order (FALSE)
   476  *                       and code point order (TRUE).
   477  *
   478  * @return <0 or 0 or >0 as usual for string comparisons
   479  *
   480  * @see u_strCompare
   481  *
   482  * @stable ICU 2.6
   483  */
   484 U_STABLE int32_t U_EXPORT2
   485 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
   486 
   487 #ifndef U_COMPARE_CODE_POINT_ORDER
   488 /* see also unistr.h and unorm.h */
   489 /**
   490  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
   491  * Compare strings in code point order instead of code unit order.
   492  * @stable ICU 2.2
   493  */
   494 #define U_COMPARE_CODE_POINT_ORDER  0x8000
   495 #endif
   496 
   497 /**
   498  * Compare two strings case-insensitively using full case folding.
   499  * This is equivalent to
   500  *   u_strCompare(u_strFoldCase(s1, options),
   501  *                u_strFoldCase(s2, options),
   502  *                (options&U_COMPARE_CODE_POINT_ORDER)!=0).
   503  *
   504  * The comparison can be done in UTF-16 code unit order or in code point order.
   505  * They differ only when comparing supplementary code points (U+10000..U+10ffff)
   506  * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
   507  * In code unit order, high BMP code points sort after supplementary code points
   508  * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
   509  *
   510  * This functions works with strings of different explicitly specified lengths
   511  * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
   512  * NUL-terminated strings are possible with length arguments of -1.
   513  *
   514  * @param s1 First source string.
   515  * @param length1 Length of first source string, or -1 if NUL-terminated.
   516  *
   517  * @param s2 Second source string.
   518  * @param length2 Length of second source string, or -1 if NUL-terminated.
   519  *
   520  * @param options A bit set of options:
   521  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   522  *     Comparison in code unit order with default case folding.
   523  *
   524  *   - U_COMPARE_CODE_POINT_ORDER
   525  *     Set to choose code point order instead of code unit order
   526  *     (see u_strCompare for details).
   527  *
   528  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   529  *
   530  * @param pErrorCode Must be a valid pointer to an error code value,
   531  *                  which must not indicate a failure before the function call.
   532  *
   533  * @return <0 or 0 or >0 as usual for string comparisons
   534  *
   535  * @stable ICU 2.2
   536  */
   537 U_STABLE int32_t U_EXPORT2
   538 u_strCaseCompare(const UChar *s1, int32_t length1,
   539                  const UChar *s2, int32_t length2,
   540                  uint32_t options,
   541                  UErrorCode *pErrorCode);
   542 
   543 /**
   544  * Compare two ustrings for bitwise equality. 
   545  * Compares at most <code>n</code> characters.
   546  *
   547  * @param ucs1 A string to compare.
   548  * @param ucs2 A string to compare.
   549  * @param n The maximum number of characters to compare.
   550  * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
   551  * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
   552  * value if <code>s1</code> is bitwise greater than <code>s2</code>.
   553  * @stable ICU 2.0
   554  */
   555 U_STABLE int32_t U_EXPORT2
   556 u_strncmp(const UChar     *ucs1, 
   557      const UChar     *ucs2, 
   558      int32_t     n);
   559 
   560 /**
   561  * Compare two Unicode strings in code point order.
   562  * This is different in UTF-16 from u_strncmp() if supplementary characters are present.
   563  * For details, see u_strCompare().
   564  *
   565  * @param s1 A string to compare.
   566  * @param s2 A string to compare.
   567  * @param n The maximum number of characters to compare.
   568  * @return a negative/zero/positive integer corresponding to whether
   569  * the first string is less than/equal to/greater than the second one
   570  * in code point order
   571  * @stable ICU 2.0
   572  */
   573 U_STABLE int32_t U_EXPORT2
   574 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
   575 
   576 /**
   577  * Compare two strings case-insensitively using full case folding.
   578  * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
   579  *
   580  * @param s1 A string to compare.
   581  * @param s2 A string to compare.
   582  * @param options A bit set of options:
   583  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   584  *     Comparison in code unit order with default case folding.
   585  *
   586  *   - U_COMPARE_CODE_POINT_ORDER
   587  *     Set to choose code point order instead of code unit order
   588  *     (see u_strCompare for details).
   589  *
   590  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   591  *
   592  * @return A negative, zero, or positive integer indicating the comparison result.
   593  * @stable ICU 2.0
   594  */
   595 U_STABLE int32_t U_EXPORT2
   596 u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
   597 
   598 /**
   599  * Compare two strings case-insensitively using full case folding.
   600  * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
   601  * u_strFoldCase(s2, at most n, options)).
   602  *
   603  * @param s1 A string to compare.
   604  * @param s2 A string to compare.
   605  * @param n The maximum number of characters each string to case-fold and then compare.
   606  * @param options A bit set of options:
   607  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   608  *     Comparison in code unit order with default case folding.
   609  *
   610  *   - U_COMPARE_CODE_POINT_ORDER
   611  *     Set to choose code point order instead of code unit order
   612  *     (see u_strCompare for details).
   613  *
   614  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   615  *
   616  * @return A negative, zero, or positive integer indicating the comparison result.
   617  * @stable ICU 2.0
   618  */
   619 U_STABLE int32_t U_EXPORT2
   620 u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
   621 
   622 /**
   623  * Compare two strings case-insensitively using full case folding.
   624  * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
   625  * u_strFoldCase(s2, n, options)).
   626  *
   627  * @param s1 A string to compare.
   628  * @param s2 A string to compare.
   629  * @param length The number of characters in each string to case-fold and then compare.
   630  * @param options A bit set of options:
   631  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   632  *     Comparison in code unit order with default case folding.
   633  *
   634  *   - U_COMPARE_CODE_POINT_ORDER
   635  *     Set to choose code point order instead of code unit order
   636  *     (see u_strCompare for details).
   637  *
   638  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   639  *
   640  * @return A negative, zero, or positive integer indicating the comparison result.
   641  * @stable ICU 2.0
   642  */
   643 U_STABLE int32_t U_EXPORT2
   644 u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
   645 
   646 /**
   647  * Copy a ustring. Adds a null terminator.
   648  *
   649  * @param dst The destination string.
   650  * @param src The source string.
   651  * @return A pointer to <code>dst</code>.
   652  * @stable ICU 2.0
   653  */
   654 U_STABLE UChar* U_EXPORT2
   655 u_strcpy(UChar     *dst, 
   656     const UChar     *src);
   657 
   658 /**
   659  * Copy a ustring.
   660  * Copies at most <code>n</code> characters.  The result will be null terminated
   661  * if the length of <code>src</code> is less than <code>n</code>.
   662  *
   663  * @param dst The destination string.
   664  * @param src The source string.
   665  * @param n The maximum number of characters to copy.
   666  * @return A pointer to <code>dst</code>.
   667  * @stable ICU 2.0
   668  */
   669 U_STABLE UChar* U_EXPORT2
   670 u_strncpy(UChar     *dst, 
   671      const UChar     *src, 
   672      int32_t     n);
   673 
   674 #if !UCONFIG_NO_CONVERSION
   675 
   676 /**
   677  * Copy a byte string encoded in the default codepage to a ustring.
   678  * Adds a null terminator.
   679  * Performs a host byte to UChar conversion
   680  *
   681  * @param dst The destination string.
   682  * @param src The source string.
   683  * @return A pointer to <code>dst</code>.
   684  * @stable ICU 2.0
   685  */
   686 U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
   687                const char *src );
   688 
   689 /**
   690  * Copy a byte string encoded in the default codepage to a ustring.
   691  * Copies at most <code>n</code> characters.  The result will be null terminated
   692  * if the length of <code>src</code> is less than <code>n</code>.
   693  * Performs a host byte to UChar conversion
   694  *
   695  * @param dst The destination string.
   696  * @param src The source string.
   697  * @param n The maximum number of characters to copy.
   698  * @return A pointer to <code>dst</code>.
   699  * @stable ICU 2.0
   700  */
   701 U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
   702             const char *src,
   703             int32_t n);
   704 
   705 /**
   706  * Copy ustring to a byte string encoded in the default codepage.
   707  * Adds a null terminator.
   708  * Performs a UChar to host byte conversion
   709  *
   710  * @param dst The destination string.
   711  * @param src The source string.
   712  * @return A pointer to <code>dst</code>.
   713  * @stable ICU 2.0
   714  */
   715 U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
   716             const UChar *src );
   717 
   718 /**
   719  * Copy ustring to a byte string encoded in the default codepage.
   720  * Copies at most <code>n</code> characters.  The result will be null terminated
   721  * if the length of <code>src</code> is less than <code>n</code>.
   722  * Performs a UChar to host byte conversion
   723  *
   724  * @param dst The destination string.
   725  * @param src The source string.
   726  * @param n The maximum number of characters to copy.
   727  * @return A pointer to <code>dst</code>.
   728  * @stable ICU 2.0
   729  */
   730 U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
   731             const UChar *src,
   732             int32_t n );
   733 
   734 #endif
   735 
   736 /**
   737  * Synonym for memcpy(), but with UChars only.
   738  * @param dest The destination string
   739  * @param src The source string
   740  * @param count The number of characters to copy
   741  * @return A pointer to <code>dest</code>
   742  * @stable ICU 2.0
   743  */
   744 U_STABLE UChar* U_EXPORT2
   745 u_memcpy(UChar *dest, const UChar *src, int32_t count);
   746 
   747 /**
   748  * Synonym for memmove(), but with UChars only.
   749  * @param dest The destination string
   750  * @param src The source string
   751  * @param count The number of characters to move
   752  * @return A pointer to <code>dest</code>
   753  * @stable ICU 2.0
   754  */
   755 U_STABLE UChar* U_EXPORT2
   756 u_memmove(UChar *dest, const UChar *src, int32_t count);
   757 
   758 /**
   759  * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.
   760  *
   761  * @param dest The destination string.
   762  * @param c The character to initialize the string.
   763  * @param count The maximum number of characters to set.
   764  * @return A pointer to <code>dest</code>.
   765  * @stable ICU 2.0
   766  */
   767 U_STABLE UChar* U_EXPORT2
   768 u_memset(UChar *dest, UChar c, int32_t count);
   769 
   770 /**
   771  * Compare the first <code>count</code> UChars of each buffer.
   772  *
   773  * @param buf1 The first string to compare.
   774  * @param buf2 The second string to compare.
   775  * @param count The maximum number of UChars to compare.
   776  * @return When buf1 < buf2, a negative number is returned.
   777  *      When buf1 == buf2, 0 is returned.
   778  *      When buf1 > buf2, a positive number is returned.
   779  * @stable ICU 2.0
   780  */
   781 U_STABLE int32_t U_EXPORT2
   782 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
   783 
   784 /**
   785  * Compare two Unicode strings in code point order.
   786  * This is different in UTF-16 from u_memcmp() if supplementary characters are present.
   787  * For details, see u_strCompare().
   788  *
   789  * @param s1 A string to compare.
   790  * @param s2 A string to compare.
   791  * @param count The maximum number of characters to compare.
   792  * @return a negative/zero/positive integer corresponding to whether
   793  * the first string is less than/equal to/greater than the second one
   794  * in code point order
   795  * @stable ICU 2.0
   796  */
   797 U_STABLE int32_t U_EXPORT2
   798 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
   799 
   800 /**
   801  * Find the first occurrence of a BMP code point in a string.
   802  * A surrogate code point is found only if its match in the text is not
   803  * part of a surrogate pair.
   804  * A NUL character is found at the string terminator.
   805  *
   806  * @param s The string to search (contains <code>count</code> UChars).
   807  * @param c The BMP code point to find.
   808  * @param count The length of the string.
   809  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
   810  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
   811  * @stable ICU 2.0
   812  *
   813  * @see u_strchr
   814  * @see u_memchr32
   815  * @see u_strFindFirst
   816  */
   817 U_STABLE UChar* U_EXPORT2
   818 u_memchr(const UChar *s, UChar c, int32_t count);
   819 
   820 /**
   821  * Find the first occurrence of a code point in a string.
   822  * A surrogate code point is found only if its match in the text is not
   823  * part of a surrogate pair.
   824  * A NUL character is found at the string terminator.
   825  *
   826  * @param s The string to search (contains <code>count</code> UChars).
   827  * @param c The code point to find.
   828  * @param count The length of the string.
   829  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
   830  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
   831  * @stable ICU 2.0
   832  *
   833  * @see u_strchr32
   834  * @see u_memchr
   835  * @see u_strFindFirst
   836  */
   837 U_STABLE UChar* U_EXPORT2
   838 u_memchr32(const UChar *s, UChar32 c, int32_t count);
   839 
   840 /**
   841  * Find the last occurrence of a BMP code point in a string.
   842  * A surrogate code point is found only if its match in the text is not
   843  * part of a surrogate pair.
   844  * A NUL character is found at the string terminator.
   845  *
   846  * @param s The string to search (contains <code>count</code> UChars).
   847  * @param c The BMP code point to find.
   848  * @param count The length of the string.
   849  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
   850  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
   851  * @stable ICU 2.4
   852  *
   853  * @see u_strrchr
   854  * @see u_memrchr32
   855  * @see u_strFindLast
   856  */
   857 U_STABLE UChar* U_EXPORT2
   858 u_memrchr(const UChar *s, UChar c, int32_t count);
   859 
   860 /**
   861  * Find the last occurrence of a code point in a string.
   862  * A surrogate code point is found only if its match in the text is not
   863  * part of a surrogate pair.
   864  * A NUL character is found at the string terminator.
   865  *
   866  * @param s The string to search (contains <code>count</code> UChars).
   867  * @param c The code point to find.
   868  * @param count The length of the string.
   869  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
   870  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
   871  * @stable ICU 2.4
   872  *
   873  * @see u_strrchr32
   874  * @see u_memrchr
   875  * @see u_strFindLast
   876  */
   877 U_STABLE UChar* U_EXPORT2
   878 u_memrchr32(const UChar *s, UChar32 c, int32_t count);
   879 
   880 /**
   881  * Unicode String literals in C.
   882  * We need one macro to declare a variable for the string
   883  * and to statically preinitialize it if possible,
   884  * and a second macro to dynamically intialize such a string variable if necessary.
   885  *
   886  * The macros are defined for maximum performance.
   887  * They work only for strings that contain "invariant characters", i.e.,
   888  * only latin letters, digits, and some punctuation.
   889  * See utypes.h for details.
   890  *
   891  * A pair of macros for a single string must be used with the same
   892  * parameters.
   893  * The string parameter must be a C string literal.
   894  * The length of the string, not including the terminating
   895  * <code>NUL</code>, must be specified as a constant.
   896  * The U_STRING_DECL macro should be invoked exactly once for one
   897  * such string variable before it is used.
   898  *
   899  * Usage:
   900  * <pre>
   901  *    U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
   902  *    U_STRING_DECL(ustringVar2, "jumps 5%", 8);
   903  *    static UBool didInit=FALSE;
   904  * 
   905  *    int32_t function() {
   906  *        if(!didInit) {
   907  *            U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
   908  *            U_STRING_INIT(ustringVar2, "jumps 5%", 8);
   909  *            didInit=TRUE;
   910  *        }
   911  *        return u_strcmp(ustringVar1, ustringVar2);
   912  *    }
   913  * </pre>
   914  * @stable ICU 2.0
   915  */
   916 #if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
   917 #   define U_STRING_DECL(var, cs, length) static const wchar_t var[(length)+1]={ L ## cs }
   918     /**@stable ICU 2.0 */
   919 #   define U_STRING_INIT(var, cs, length)
   920 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
   921 #   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (const UChar *)cs }
   922     /**@stable ICU 2.0 */
   923 #   define U_STRING_INIT(var, cs, length)
   924 #else
   925 #   define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
   926     /**@stable ICU 2.0 */
   927 #   define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
   928 #endif
   929 
   930 /**
   931  * Unescape a string of characters and write the resulting
   932  * Unicode characters to the destination buffer.  The following escape
   933  * sequences are recognized:
   934  *
   935  * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
   936  * \\Uhhhhhhhh   8 hex digits
   937  * \\xhh         1-2 hex digits
   938  * \\x{h...}     1-8 hex digits
   939  * \\ooo         1-3 octal digits; o in [0-7]
   940  * \\cX          control-X; X is masked with 0x1F
   941  *
   942  * as well as the standard ANSI C escapes:
   943  *
   944  * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
   945  * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
   946  * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
   947  *
   948  * Anything else following a backslash is generically escaped.  For
   949  * example, "[a\\-z]" returns "[a-z]".
   950  *
   951  * If an escape sequence is ill-formed, this method returns an empty
   952  * string.  An example of an ill-formed sequence is "\\u" followed by
   953  * fewer than 4 hex digits.
   954  *
   955  * The above characters are recognized in the compiler's codepage,
   956  * that is, they are coded as 'u', '\\', etc.  Characters that are
   957  * not parts of escape sequences are converted using u_charsToUChars().
   958  *
   959  * This function is similar to UnicodeString::unescape() but not
   960  * identical to it.  The latter takes a source UnicodeString, so it
   961  * does escape recognition but no conversion.
   962  *
   963  * @param src a zero-terminated string of invariant characters
   964  * @param dest pointer to buffer to receive converted and unescaped
   965  * text and, if there is room, a zero terminator.  May be NULL for
   966  * preflighting, in which case no UChars will be written, but the
   967  * return value will still be valid.  On error, an empty string is
   968  * stored here (if possible).
   969  * @param destCapacity the number of UChars that may be written at
   970  * dest.  Ignored if dest == NULL.
   971  * @return the length of unescaped string.
   972  * @see u_unescapeAt
   973  * @see UnicodeString#unescape()
   974  * @see UnicodeString#unescapeAt()
   975  * @stable ICU 2.0
   976  */
   977 U_STABLE int32_t U_EXPORT2
   978 u_unescape(const char *src,
   979            UChar *dest, int32_t destCapacity);
   980 
   981 U_CDECL_BEGIN
   982 /**
   983  * Callback function for u_unescapeAt() that returns a character of
   984  * the source text given an offset and a context pointer.  The context
   985  * pointer will be whatever is passed into u_unescapeAt().
   986  *
   987  * @param offset pointer to the offset that will be passed to u_unescapeAt().
   988  * @param context an opaque pointer passed directly into u_unescapeAt()
   989  * @return the character represented by the escape sequence at
   990  * offset
   991  * @see u_unescapeAt
   992  * @stable ICU 2.0
   993  */
   994 typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
   995 U_CDECL_END
   996 
   997 /**
   998  * Unescape a single sequence. The character at offset-1 is assumed
   999  * (without checking) to be a backslash.  This method takes a callback
  1000  * pointer to a function that returns the UChar at a given offset.  By
  1001  * varying this callback, ICU functions are able to unescape char*
  1002  * strings, UnicodeString objects, and UFILE pointers.
  1003  *
  1004  * If offset is out of range, or if the escape sequence is ill-formed,
  1005  * (UChar32)0xFFFFFFFF is returned.  See documentation of u_unescape()
  1006  * for a list of recognized sequences.
  1007  *
  1008  * @param charAt callback function that returns a UChar of the source
  1009  * text given an offset and a context pointer.
  1010  * @param offset pointer to the offset that will be passed to charAt.
  1011  * The offset value will be updated upon return to point after the
  1012  * last parsed character of the escape sequence.  On error the offset
  1013  * is unchanged.
  1014  * @param length the number of characters in the source text.  The
  1015  * last character of the source text is considered to be at offset
  1016  * length-1.
  1017  * @param context an opaque pointer passed directly into charAt.
  1018  * @return the character represented by the escape sequence at
  1019  * offset, or (UChar32)0xFFFFFFFF on error.
  1020  * @see u_unescape()
  1021  * @see UnicodeString#unescape()
  1022  * @see UnicodeString#unescapeAt()
  1023  * @stable ICU 2.0
  1024  */
  1025 U_STABLE UChar32 U_EXPORT2
  1026 u_unescapeAt(UNESCAPE_CHAR_AT charAt,
  1027              int32_t *offset,
  1028              int32_t length,
  1029              void *context);
  1030 
  1031 /**
  1032  * Uppercase the characters in a string.
  1033  * Casing is locale-dependent and context-sensitive.
  1034  * The result may be longer or shorter than the original.
  1035  * The source string and the destination buffer are allowed to overlap.
  1036  *
  1037  * @param dest      A buffer for the result string. The result will be zero-terminated if
  1038  *                  the buffer is large enough.
  1039  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
  1040  *                  dest may be NULL and the function will only return the length of the result
  1041  *                  without writing any of the result string.
  1042  * @param src       The original string
  1043  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
  1044  * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
  1045  * @param pErrorCode Must be a valid pointer to an error code value,
  1046  *                  which must not indicate a failure before the function call.
  1047  * @return The length of the result string. It may be greater than destCapacity. In that case,
  1048  *         only some of the result was written to the destination buffer.
  1049  * @stable ICU 2.0
  1050  */
  1051 U_STABLE int32_t U_EXPORT2
  1052 u_strToUpper(UChar *dest, int32_t destCapacity,
  1053              const UChar *src, int32_t srcLength,
  1054              const char *locale,
  1055              UErrorCode *pErrorCode);
  1056 
  1057 /**
  1058  * Lowercase the characters in a string.
  1059  * Casing is locale-dependent and context-sensitive.
  1060  * The result may be longer or shorter than the original.
  1061  * The source string and the destination buffer are allowed to overlap.
  1062  *
  1063  * @param dest      A buffer for the result string. The result will be zero-terminated if
  1064  *                  the buffer is large enough.
  1065  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
  1066  *                  dest may be NULL and the function will only return the length of the result
  1067  *                  without writing any of the result string.
  1068  * @param src       The original string
  1069  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
  1070  * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
  1071  * @param pErrorCode Must be a valid pointer to an error code value,
  1072  *                  which must not indicate a failure before the function call.
  1073  * @return The length of the result string. It may be greater than destCapacity. In that case,
  1074  *         only some of the result was written to the destination buffer.
  1075  * @stable ICU 2.0
  1076  */
  1077 U_STABLE int32_t U_EXPORT2
  1078 u_strToLower(UChar *dest, int32_t destCapacity,
  1079              const UChar *src, int32_t srcLength,
  1080              const char *locale,
  1081              UErrorCode *pErrorCode);
  1082 
  1083 #if !UCONFIG_NO_BREAK_ITERATION
  1084 
  1085 /**
  1086  * Titlecase a string.
  1087  * Casing is locale-dependent and context-sensitive.
  1088  * Titlecasing uses a break iterator to find the first characters of words
  1089  * that are to be titlecased. It titlecases those characters and lowercases
  1090  * all others.
  1091  *
  1092  * The titlecase break iterator can be provided to customize for arbitrary
  1093  * styles, using rules and dictionaries beyond the standard iterators.
  1094  * It may be more efficient to always provide an iterator to avoid
  1095  * opening and closing one for each string.
  1096  * The standard titlecase iterator for the root locale implements the
  1097  * algorithm of Unicode TR 21.
  1098  *
  1099  * This function uses only the first() and next() methods of the
  1100  * provided break iterator.
  1101  *
  1102  * The result may be longer or shorter than the original.
  1103  * The source string and the destination buffer are allowed to overlap.
  1104  *
  1105  * @param dest      A buffer for the result string. The result will be zero-terminated if
  1106  *                  the buffer is large enough.
  1107  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
  1108  *                  dest may be NULL and the function will only return the length of the result
  1109  *                  without writing any of the result string.
  1110  * @param src       The original string
  1111  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
  1112  * @param titleIter A break iterator to find the first characters of words
  1113  *                  that are to be titlecased.
  1114  *                  If none is provided (NULL), then a standard titlecase
  1115  *                  break iterator is opened.
  1116  * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
  1117  * @param pErrorCode Must be a valid pointer to an error code value,
  1118  *                  which must not indicate a failure before the function call.
  1119  * @return The length of the result string. It may be greater than destCapacity. In that case,
  1120  *         only some of the result was written to the destination buffer.
  1121  * @stable ICU 2.1
  1122  */
  1123 U_STABLE int32_t U_EXPORT2
  1124 u_strToTitle(UChar *dest, int32_t destCapacity,
  1125              const UChar *src, int32_t srcLength,
  1126              UBreakIterator *titleIter,
  1127              const char *locale,
  1128              UErrorCode *pErrorCode);
  1129 
  1130 #endif
  1131 
  1132 /**
  1133  * Case-fold the characters in a string.
  1134  * Case-folding is locale-independent and not context-sensitive,
  1135  * but there is an option for whether to include or exclude mappings for dotted I
  1136  * and dotless i that are marked with 'I' in CaseFolding.txt.
  1137  * The result may be longer or shorter than the original.
  1138  * The source string and the destination buffer are allowed to overlap.
  1139  *
  1140  * @param dest      A buffer for the result string. The result will be zero-terminated if
  1141  *                  the buffer is large enough.
  1142  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
  1143  *                  dest may be NULL and the function will only return the length of the result
  1144  *                  without writing any of the result string.
  1145  * @param src       The original string
  1146  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
  1147  * @param options   Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
  1148  * @param pErrorCode Must be a valid pointer to an error code value,
  1149  *                  which must not indicate a failure before the function call.
  1150  * @return The length of the result string. It may be greater than destCapacity. In that case,
  1151  *         only some of the result was written to the destination buffer.
  1152  * @stable ICU 2.0
  1153  */
  1154 U_STABLE int32_t U_EXPORT2
  1155 u_strFoldCase(UChar *dest, int32_t destCapacity,
  1156               const UChar *src, int32_t srcLength,
  1157               uint32_t options,
  1158               UErrorCode *pErrorCode);
  1159 
  1160 /**
  1161  * Converts a sequence of UChars to wchar_t units.
  1162  *
  1163  * @param dest          A buffer for the result string. The result will be zero-terminated if
  1164  *                      the buffer is large enough.
  1165  * @param destCapacity  The size of the buffer (number of wchar_t's). If it is 0, then
  1166  *                      dest may be NULL and the function will only return the length of the 
  1167  *                      result without writing any of the result string (pre-flighting).
  1168  * @param pDestLength   A pointer to receive the number of units written to the destination. If 
  1169  *                      pDestLength!=NULL then *pDestLength is always set to the 
  1170  *                      number of output units corresponding to the transformation of 
  1171  *                      all the input units, even in case of a buffer overflow.
  1172  * @param src           The original source string
  1173  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
  1174  * @param pErrorCode    Must be a valid pointer to an error code value,
  1175  *                      which must not indicate a failure before the function call.
  1176  * @return The pointer to destination buffer.
  1177  * @stable ICU 2.0
  1178  */
  1179 U_STABLE wchar_t* U_EXPORT2
  1180 u_strToWCS(wchar_t *dest, 
  1181            int32_t destCapacity,
  1182            int32_t *pDestLength,
  1183            const UChar *src, 
  1184            int32_t srcLength,
  1185            UErrorCode *pErrorCode);
  1186 /**
  1187  * Converts a sequence of wchar_t units to UChars
  1188  *
  1189  * @param dest          A buffer for the result string. The result will be zero-terminated if
  1190  *                      the buffer is large enough.
  1191  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
  1192  *                      dest may be NULL and the function will only return the length of the 
  1193  *                      result without writing any of the result string (pre-flighting).
  1194  * @param pDestLength   A pointer to receive the number of units written to the destination. If 
  1195  *                      pDestLength!=NULL then *pDestLength is always set to the 
  1196  *                      number of output units corresponding to the transformation of 
  1197  *                      all the input units, even in case of a buffer overflow.
  1198  * @param src           The original source string
  1199  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
  1200  * @param pErrorCode    Must be a valid pointer to an error code value,
  1201  *                      which must not indicate a failure before the function call.
  1202  * @return The pointer to destination buffer.
  1203  * @stable ICU 2.0
  1204  */
  1205 U_STABLE UChar* U_EXPORT2
  1206 u_strFromWCS(UChar   *dest,
  1207              int32_t destCapacity, 
  1208              int32_t *pDestLength,
  1209              const wchar_t *src,
  1210              int32_t srcLength,
  1211              UErrorCode *pErrorCode);
  1212 /**
  1213  * Converts a sequence of UChars (UTF-16) to UTF-8 bytes
  1214  *
  1215  * @param dest          A buffer for the result string. The result will be zero-terminated if
  1216  *                      the buffer is large enough.
  1217  * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
  1218  *                      dest may be NULL and the function will only return the length of the 
  1219  *                      result without writing any of the result string (pre-flighting).
  1220  * @param pDestLength   A pointer to receive the number of units written to the destination. If 
  1221  *                      pDestLength!=NULL then *pDestLength is always set to the 
  1222  *                      number of output units corresponding to the transformation of 
  1223  *                      all the input units, even in case of a buffer overflow.
  1224  * @param src           The original source string
  1225  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
  1226  * @param pErrorCode    Must be a valid pointer to an error code value,
  1227  *                      which must not indicate a failure before the function call.
  1228  * @return The pointer to destination buffer.
  1229  * @stable ICU 2.0
  1230  */
  1231 U_STABLE char* U_EXPORT2 
  1232 u_strToUTF8(char *dest,           
  1233             int32_t destCapacity,
  1234             int32_t *pDestLength,
  1235             const UChar *src, 
  1236             int32_t srcLength,
  1237             UErrorCode *pErrorCode);
  1238 
  1239 /**
  1240  * Converts a sequence of UTF-8 bytes to UChars (UTF-16).
  1241  *
  1242  * @param dest          A buffer for the result string. The result will be zero-terminated if
  1243  *                      the buffer is large enough.
  1244  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
  1245  *                      dest may be NULL and the function will only return the length of the 
  1246  *                      result without writing any of the result string (pre-flighting).
  1247  * @param pDestLength   A pointer to receive the number of units written to the destination. If 
  1248  *                      pDestLength!=NULL then *pDestLength is always set to the 
  1249  *                      number of output units corresponding to the transformation of 
  1250  *                      all the input units, even in case of a buffer overflow.
  1251  * @param src           The original source string
  1252  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
  1253  * @param pErrorCode    Must be a valid pointer to an error code value,
  1254  *                      which must not indicate a failure before the function call.
  1255  * @return The pointer to destination buffer.
  1256  * @stable ICU 2.0
  1257  */
  1258 U_STABLE UChar* U_EXPORT2
  1259 u_strFromUTF8(UChar *dest,             
  1260               int32_t destCapacity,
  1261               int32_t *pDestLength,
  1262               const char *src, 
  1263               int32_t srcLength,
  1264               UErrorCode *pErrorCode);
  1265 
  1266 /**
  1267  * Converts a sequence of UChars (UTF-16) to UTF32 units.
  1268  *
  1269  * @param dest          A buffer for the result string. The result will be zero-terminated if
  1270  *                      the buffer is large enough.
  1271  * @param destCapacity  The size of the buffer (number of UChar32s). If it is 0, then
  1272  *                      dest may be NULL and the function will only return the length of the 
  1273  *                      result without writing any of the result string (pre-flighting).
  1274  * @param pDestLength   A pointer to receive the number of units written to the destination. If 
  1275  *                      pDestLength!=NULL then *pDestLength is always set to the 
  1276  *                      number of output units corresponding to the transformation of 
  1277  *                      all the input units, even in case of a buffer overflow.
  1278  * @param src           The original source string
  1279  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
  1280  * @param pErrorCode    Must be a valid pointer to an error code value,
  1281  *                      which must not indicate a failure before the function call.
  1282  * @return The pointer to destination buffer.
  1283  * @stable ICU 2.0
  1284  */
  1285 U_STABLE UChar32* U_EXPORT2 
  1286 u_strToUTF32(UChar32 *dest, 
  1287              int32_t  destCapacity,
  1288              int32_t  *pDestLength,
  1289              const UChar *src, 
  1290              int32_t  srcLength,
  1291              UErrorCode *pErrorCode);
  1292 
  1293 /**
  1294  * Converts a sequence of UTF32 units to UChars (UTF-16)
  1295  *
  1296  * @param dest          A buffer for the result string. The result will be zero-terminated if
  1297  *                      the buffer is large enough.
  1298  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
  1299  *                      dest may be NULL and the function will only return the length of the 
  1300  *                      result without writing any of the result string (pre-flighting).
  1301  * @param pDestLength   A pointer to receive the number of units written to the destination. If 
  1302  *                      pDestLength!=NULL then *pDestLength is always set to the 
  1303  *                      number of output units corresponding to the transformation of 
  1304  *                      all the input units, even in case of a buffer overflow.
  1305  * @param src           The original source string
  1306  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
  1307  * @param pErrorCode    Must be a valid pointer to an error code value,
  1308  *                      which must not indicate a failure before the function call.
  1309  * @return The pointer to destination buffer.
  1310  * @stable ICU 2.0
  1311  */
  1312 U_STABLE UChar* U_EXPORT2 
  1313 u_strFromUTF32(UChar   *dest,
  1314                int32_t destCapacity, 
  1315                int32_t *pDestLength,
  1316                const UChar32 *src,
  1317                int32_t srcLength,
  1318                UErrorCode *pErrorCode);
  1319 
  1320 #endif