Symaptic: os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/ustring.h@260cb5ec6c19

     1 /*

     2 **********************************************************************

     3 *   Copyright (C) 1998-2005, International Business Machines

     4 *   Corporation and others.  All Rights Reserved.

     5 **********************************************************************

6 *

     7 * File ustring.h

8 *

     9 * Modification History:

    10 *

    11 *   Date        Name        Description

    12 *   12/07/98    bertrand    Creation.

    13 ******************************************************************************

    14 */

    16 #ifndef USTRING_H

    17 #define USTRING_H

    19 #include "unicode/utypes.h"

    20 #include "unicode/putil.h"

    21 #include "unicode/uiter.h"

    23 /** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/

    24 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR

    25 #   define UBRK_TYPEDEF_UBREAK_ITERATOR

    26     typedef void UBreakIterator;

    27 #endif

    29 /**

    30  * \file

    31  * \brief C API: Unicode string handling functions

    32  *

    33  * These C API functions provide general Unicode string handling.

    34  *

    35  * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>

    36  * functions. (For example, they do not check for bad arguments like NULL string pointers.)

    37  * In some cases, only the thread-safe variant of such a function is implemented here

    38  * (see u_strtok_r()).

    39  *

    40  * Other functions provide more Unicode-specific functionality like locale-specific

    41  * upper/lower-casing and string comparison in code point order.

    42  *

    43  * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.

    44  * UTF-16 encodes each Unicode code point with either one or two UChar code units.

    45  * (This is the default form of Unicode, and a forward-compatible extension of the original,

    46  * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0

    47  * in 1996.)

    48  *

    49  * Some APIs accept a 32-bit UChar32 value for a single code point.

    50  *

    51  * ICU also handles 16-bit Unicode text with unpaired surrogates.

    52  * Such text is not well-formed UTF-16.

    53  * Code-point-related functions treat unpaired surrogates as surrogate code points,

    54  * i.e., as separate units.

    55  *

    56  * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),

    57  * it is much more efficient even for random access because the code unit values

    58  * for single-unit characters vs. lead units vs. trail units are completely disjoint.

    59  * This means that it is easy to determine character (code point) boundaries from

    60  * random offsets in the string.

    61  *

    62  * Unicode (UTF-16) string processing is optimized for the single-unit case.

    63  * Although it is important to support supplementary characters

    64  * (which use pairs of lead/trail code units called "surrogates"),

    65  * their occurrence is rare. Almost all characters in modern use require only

    66  * a single UChar code unit (i.e., their code point values are <=0xffff).

    67  *

    68  * For more details see the User Guide Strings chapter (http://icu.sourceforge.net/userguide/strings.html).

    69  * For a discussion of the handling of unpaired surrogates see also

    70  * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.

    71  */

    73 /**

    74  * Determine the length of an array of UChar.

    75  *

    76  * @param s The array of UChars, NULL (U+0000) terminated.

    77  * @return The number of UChars in <code>chars</code>, minus the terminator.

    78  * @stable ICU 2.0

    79  */

    80 U_STABLE int32_t U_EXPORT2

    81 u_strlen(const UChar *s);

    83 /**

    84  * Count Unicode code points in the length UChar code units of the string.

    85  * A code point may occupy either one or two UChar code units.

    86  * Counting code points involves reading all code units.

    87  *

    88  * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).

    89  *

    90  * @param s The input string.

    91  * @param length The number of UChar code units to be checked, or -1 to count all

    92  *               code points before the first NUL (U+0000).

    93  * @return The number of code points in the specified code units.

    94  * @stable ICU 2.0

    95  */

    96 U_STABLE int32_t U_EXPORT2

    97 u_countChar32(const UChar *s, int32_t length);

    99 /**

   100  * Check if the string contains more Unicode code points than a certain number.

   101  * This is more efficient than counting all code points in the entire string

   102  * and comparing that number with a threshold.

   103  * This function may not need to scan the string at all if the length is known

   104  * (not -1 for NUL-termination) and falls within a certain range, and

   105  * never needs to count more than 'number+1' code points.

   106  * Logically equivalent to (u_countChar32(s, length)>number).

   107  * A Unicode code point may occupy either one or two UChar code units.

   108  *

   109  * @param s The input string.

   110  * @param length The length of the string, or -1 if it is NUL-terminated.

   111  * @param number The number of code points in the string is compared against

   112  *               the 'number' parameter.

   113  * @return Boolean value for whether the string contains more Unicode code points

   114  *         than 'number'. Same as (u_countChar32(s, length)>number).

   115  * @stable ICU 2.4

   116  */

   117 U_STABLE UBool U_EXPORT2

   118 u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);

   120 /**

   121  * Concatenate two ustrings.  Appends a copy of <code>src</code>,

   122  * including the null terminator, to <code>dst</code>. The initial copied

   123  * character from <code>src</code> overwrites the null terminator in <code>dst</code>.

   124  *

   125  * @param dst The destination string.

   126  * @param src The source string.

   127  * @return A pointer to <code>dst</code>.

   128  * @stable ICU 2.0

   129  */

   130 U_STABLE UChar* U_EXPORT2

   131 u_strcat(UChar     *dst,

   132     const UChar     *src);

   134 /**

   135  * Concatenate two ustrings.

   136  * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.

   137  * Adds a terminating NUL.

   138  * If src is too long, then only <code>n-1</code> characters will be copied

   139  * before the terminating NUL.

   140  * If <code>n&lt;=0</code> then dst is not modified.

   141  *

   142  * @param dst The destination string.

   143  * @param src The source string.

   144  * @param n The maximum number of characters to compare.

   145  * @return A pointer to <code>dst</code>.

   146  * @stable ICU 2.0

   147  */

   148 U_STABLE UChar* U_EXPORT2

   149 u_strncat(UChar     *dst,

   150      const UChar     *src,

   151      int32_t     n);

   153 /**

   154  * Find the first occurrence of a substring in a string.

   155  * The substring is found at code point boundaries.

   156  * That means that if the substring begins with

   157  * a trail surrogate or ends with a lead surrogate,

   158  * then it is found only if these surrogates stand alone in the text.

   159  * Otherwise, the substring edge units would be matched against

   160  * halves of surrogate pairs.

   161  *

   162  * @param s The string to search (NUL-terminated).

   163  * @param substring The substring to find (NUL-terminated).

   164  * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,

   165  *         or <code>s</code> itself if the <code>substring</code> is empty,

   166  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.

   167  * @stable ICU 2.0

   168  *

   169  * @see u_strrstr

   170  * @see u_strFindFirst

   171  * @see u_strFindLast

   172  */

   173 U_STABLE UChar * U_EXPORT2

   174 u_strstr(const UChar *s, const UChar *substring);

   176 /**

   177  * Find the first occurrence of a substring in a string.

   178  * The substring is found at code point boundaries.

   179  * That means that if the substring begins with

   180  * a trail surrogate or ends with a lead surrogate,

   181  * then it is found only if these surrogates stand alone in the text.

   182  * Otherwise, the substring edge units would be matched against

   183  * halves of surrogate pairs.

   184  *

   185  * @param s The string to search.

   186  * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.

   187  * @param substring The substring to find (NUL-terminated).

   188  * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.

   189  * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,

   190  *         or <code>s</code> itself if the <code>substring</code> is empty,

   191  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.

   192  * @stable ICU 2.4

   193  *

   194  * @see u_strstr

   195  * @see u_strFindLast

   196  */

   197 U_STABLE UChar * U_EXPORT2

   198 u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);

   200 /**

   201  * Find the first occurrence of a BMP code point in a string.

   202  * A surrogate code point is found only if its match in the text is not

   203  * part of a surrogate pair.

   204  * A NUL character is found at the string terminator.

   205  *

   206  * @param s The string to search (NUL-terminated).

   207  * @param c The BMP code point to find.

   208  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>

   209  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.

   210  * @stable ICU 2.0

   211  *

   212  * @see u_strchr32

   213  * @see u_memchr

   214  * @see u_strstr

   215  * @see u_strFindFirst

   216  */

   217 U_STABLE UChar * U_EXPORT2

   218 u_strchr(const UChar *s, UChar c);

   220 /**

   221  * Find the first occurrence of a code point in a string.

   222  * A surrogate code point is found only if its match in the text is not

   223  * part of a surrogate pair.

   224  * A NUL character is found at the string terminator.

   225  *

   226  * @param s The string to search (NUL-terminated).

   227  * @param c The code point to find.

   228  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>

   229  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.

   230  * @stable ICU 2.0

   231  *

   232  * @see u_strchr

   233  * @see u_memchr32

   234  * @see u_strstr

   235  * @see u_strFindFirst

   236  */

   237 U_STABLE UChar * U_EXPORT2

   238 u_strchr32(const UChar *s, UChar32 c);

   240 /**

   241  * Find the last occurrence of a substring in a string.

   242  * The substring is found at code point boundaries.

   243  * That means that if the substring begins with

   244  * a trail surrogate or ends with a lead surrogate,

   245  * then it is found only if these surrogates stand alone in the text.

   246  * Otherwise, the substring edge units would be matched against

   247  * halves of surrogate pairs.

   248  *

   249  * @param s The string to search (NUL-terminated).

   250  * @param substring The substring to find (NUL-terminated).

   251  * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,

   252  *         or <code>s</code> itself if the <code>substring</code> is empty,

   253  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.

   254  * @stable ICU 2.4

   255  *

   256  * @see u_strstr

   257  * @see u_strFindFirst

   258  * @see u_strFindLast

   259  */

   260 U_STABLE UChar * U_EXPORT2

   261 u_strrstr(const UChar *s, const UChar *substring);

   263 /**

   264  * Find the last occurrence of a substring in a string.

   265  * The substring is found at code point boundaries.

   266  * That means that if the substring begins with

   267  * a trail surrogate or ends with a lead surrogate,

   268  * then it is found only if these surrogates stand alone in the text.

   269  * Otherwise, the substring edge units would be matched against

   270  * halves of surrogate pairs.

   271  *

   272  * @param s The string to search.

   273  * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.

   274  * @param substring The substring to find (NUL-terminated).

   275  * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.

   276  * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,

   277  *         or <code>s</code> itself if the <code>substring</code> is empty,

   278  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.

   279  * @stable ICU 2.4

   280  *

   281  * @see u_strstr

   282  * @see u_strFindLast

   283  */

   284 U_STABLE UChar * U_EXPORT2

   285 u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);

   287 /**

   288  * Find the last occurrence of a BMP code point in a string.

   289  * A surrogate code point is found only if its match in the text is not

   290  * part of a surrogate pair.

   291  * A NUL character is found at the string terminator.

   292  *

   293  * @param s The string to search (NUL-terminated).

   294  * @param c The BMP code point to find.

   295  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>

   296  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.

   297  * @stable ICU 2.4

   298  *

   299  * @see u_strrchr32

   300  * @see u_memrchr

   301  * @see u_strrstr

   302  * @see u_strFindLast

   303  */

   304 U_STABLE UChar * U_EXPORT2

   305 u_strrchr(const UChar *s, UChar c);

   307 /**

   308  * Find the last occurrence of a code point in a string.

   309  * A surrogate code point is found only if its match in the text is not

   310  * part of a surrogate pair.

   311  * A NUL character is found at the string terminator.

   312  *

   313  * @param s The string to search (NUL-terminated).

   314  * @param c The code point to find.

   315  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>

   316  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.

   317  * @stable ICU 2.4

   318  *

   319  * @see u_strrchr

   320  * @see u_memchr32

   321  * @see u_strrstr

   322  * @see u_strFindLast

   323  */

   324 U_STABLE UChar * U_EXPORT2

   325 u_strrchr32(const UChar *s, UChar32 c);

   327 /**

   328  * Locates the first occurrence in the string <code>string</code> of any of the characters

   329  * in the string <code>matchSet</code>.

   330  * Works just like C's strpbrk but with Unicode.

   331  *

   332  * @param string The string in which to search, NUL-terminated.

   333  * @param matchSet A NUL-terminated string defining a set of code points

   334  *                 for which to search in the text string.

   335  * @return A pointer to the  character in <code>string</code> that matches one of the

   336  *         characters in <code>matchSet</code>, or NULL if no such character is found.

   337  * @stable ICU 2.0

   338  */

   339 U_STABLE UChar * U_EXPORT2

   340 u_strpbrk(const UChar *string, const UChar *matchSet);

   342 /**

   343  * Returns the number of consecutive characters in <code>string</code>,

   344  * beginning with the first, that do not occur somewhere in <code>matchSet</code>.

   345  * Works just like C's strcspn but with Unicode.

   346  *

   347  * @param string The string in which to search, NUL-terminated.

   348  * @param matchSet A NUL-terminated string defining a set of code points

   349  *                 for which to search in the text string.

   350  * @return The number of initial characters in <code>string</code> that do not

   351  *         occur in <code>matchSet</code>.

   352  * @see u_strspn

   353  * @stable ICU 2.0

   354  */

   355 U_STABLE int32_t U_EXPORT2

   356 u_strcspn(const UChar *string, const UChar *matchSet);

   358 /**

   359  * Returns the number of consecutive characters in <code>string</code>,

   360  * beginning with the first, that occur somewhere in <code>matchSet</code>.

   361  * Works just like C's strspn but with Unicode.

   362  *

   363  * @param string The string in which to search, NUL-terminated.

   364  * @param matchSet A NUL-terminated string defining a set of code points

   365  *                 for which to search in the text string.

   366  * @return The number of initial characters in <code>string</code> that do

   367  *         occur in <code>matchSet</code>.

   368  * @see u_strcspn

   369  * @stable ICU 2.0

   370  */

   371 U_STABLE int32_t U_EXPORT2

   372 u_strspn(const UChar *string, const UChar *matchSet);

   374 /**

   375  * The string tokenizer API allows an application to break a string into

   376  * tokens. Unlike strtok(), the saveState (the current pointer within the

   377  * original string) is maintained in saveState. In the first call, the

   378  * argument src is a pointer to the string. In subsequent calls to

   379  * return successive tokens of that string, src must be specified as

   380  * NULL. The value saveState is set by this function to maintain the

   381  * function's position within the string, and on each subsequent call

   382  * you must give this argument the same variable. This function does

   383  * handle surrogate pairs. This function is similar to the strtok_r()

   384  * the POSIX Threads Extension (1003.1c-1995) version.

   385  *

   386  * @param src String containing token(s). This string will be modified.

   387  *            After the first call to u_strtok_r(), this argument must

   388  *            be NULL to get to the next token.

   389  * @param delim Set of delimiter characters (Unicode code points).

   390  * @param saveState The current pointer within the original string,

   391  *              which is set by this function. The saveState

   392  *              parameter should the address of a local variable of type

   393  *              UChar *. (i.e. defined "Uhar *myLocalSaveState" and use

   394  *              &myLocalSaveState for this parameter).

   395  * @return A pointer to the next token found in src, or NULL

   396  *         when there are no more tokens.

   397  * @stable ICU 2.0

   398  */

   399 U_STABLE UChar * U_EXPORT2

   400 u_strtok_r(UChar    *src,

   401      const UChar    *delim,

   402            UChar   **saveState);

   404 /**

   405  * Compare two Unicode strings for bitwise equality (code unit order).

   406  *

   407  * @param s1 A string to compare.

   408  * @param s2 A string to compare.

   409  * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative

   410  * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive

   411  * value if <code>s1</code> is bitwise greater than <code>s2</code>.

   412  * @stable ICU 2.0

   413  */

   414 U_STABLE int32_t  U_EXPORT2

   415 u_strcmp(const UChar     *s1,

   416          const UChar     *s2);

   418 /**

   419  * Compare two Unicode strings in code point order.

   420  * See u_strCompare for details.

   421  *

   422  * @param s1 A string to compare.

   423  * @param s2 A string to compare.

   424  * @return a negative/zero/positive integer corresponding to whether

   425  * the first string is less than/equal to/greater than the second one

   426  * in code point order

   427  * @stable ICU 2.0

   428  */

   429 U_STABLE int32_t U_EXPORT2

   430 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);

   432 /**

   433  * Compare two Unicode strings (binary order).

   434  *

   435  * The comparison can be done in code unit order or in code point order.

   436  * They differ only in UTF-16 when

   437  * comparing supplementary code points (U+10000..U+10ffff)

   438  * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).

   439  * In code unit order, high BMP code points sort after supplementary code points

   440  * because they are stored as pairs of surrogates which are at U+d800..U+dfff.

   441  *

   442  * This functions works with strings of different explicitly specified lengths

   443  * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.

   444  * NUL-terminated strings are possible with length arguments of -1.

   445  *

   446  * @param s1 First source string.

   447  * @param length1 Length of first source string, or -1 if NUL-terminated.

   448  *

   449  * @param s2 Second source string.

   450  * @param length2 Length of second source string, or -1 if NUL-terminated.

   451  *

   452  * @param codePointOrder Choose between code unit order (FALSE)

   453  *                       and code point order (TRUE).

   454  *

   455  * @return <0 or 0 or >0 as usual for string comparisons

   456  *

   457  * @stable ICU 2.2

   458  */

   459 U_STABLE int32_t U_EXPORT2

   460 u_strCompare(const UChar *s1, int32_t length1,

   461              const UChar *s2, int32_t length2,

   462              UBool codePointOrder);

   464 /**

   465  * Compare two Unicode strings (binary order)

   466  * as presented by UCharIterator objects.

   467  * Works otherwise just like u_strCompare().

   468  *

   469  * Both iterators are reset to their start positions.

   470  * When the function returns, it is undefined where the iterators

   471  * have stopped.

   472  *

   473  * @param iter1 First source string iterator.

   474  * @param iter2 Second source string iterator.

   475  * @param codePointOrder Choose between code unit order (FALSE)

   476  *                       and code point order (TRUE).

   477  *

   478  * @return <0 or 0 or >0 as usual for string comparisons

   479  *

   480  * @see u_strCompare

   481  *

   482  * @stable ICU 2.6

   483  */

   484 U_STABLE int32_t U_EXPORT2

   485 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);

   487 #ifndef U_COMPARE_CODE_POINT_ORDER

   488 /* see also unistr.h and unorm.h */

   489 /**

   490  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:

   491  * Compare strings in code point order instead of code unit order.

   492  * @stable ICU 2.2

   493  */

   494 #define U_COMPARE_CODE_POINT_ORDER  0x8000

   495 #endif

   497 /**

   498  * Compare two strings case-insensitively using full case folding.

   499  * This is equivalent to

   500  *   u_strCompare(u_strFoldCase(s1, options),

   501  *                u_strFoldCase(s2, options),

   502  *                (options&U_COMPARE_CODE_POINT_ORDER)!=0).

   503  *

   504  * The comparison can be done in UTF-16 code unit order or in code point order.

   505  * They differ only when comparing supplementary code points (U+10000..U+10ffff)

   506  * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).

   507  * In code unit order, high BMP code points sort after supplementary code points

   508  * because they are stored as pairs of surrogates which are at U+d800..U+dfff.

   509  *

   510  * This functions works with strings of different explicitly specified lengths

   511  * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.

   512  * NUL-terminated strings are possible with length arguments of -1.

   513  *

   514  * @param s1 First source string.

   515  * @param length1 Length of first source string, or -1 if NUL-terminated.

   516  *

   517  * @param s2 Second source string.

   518  * @param length2 Length of second source string, or -1 if NUL-terminated.

   519  *

   520  * @param options A bit set of options:

   521  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   522  *     Comparison in code unit order with default case folding.

   523  *

   524  *   - U_COMPARE_CODE_POINT_ORDER

   525  *     Set to choose code point order instead of code unit order

   526  *     (see u_strCompare for details).

   527  *

   528  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   529  *

   530  * @param pErrorCode Must be a valid pointer to an error code value,

   531  *                  which must not indicate a failure before the function call.

   532  *

   533  * @return <0 or 0 or >0 as usual for string comparisons

   534  *

   535  * @stable ICU 2.2

   536  */

   537 U_STABLE int32_t U_EXPORT2

   538 u_strCaseCompare(const UChar *s1, int32_t length1,

   539                  const UChar *s2, int32_t length2,

   540                  uint32_t options,

   541                  UErrorCode *pErrorCode);

   543 /**

   544  * Compare two ustrings for bitwise equality.

   545  * Compares at most <code>n</code> characters.

   546  *

   547  * @param ucs1 A string to compare.

   548  * @param ucs2 A string to compare.

   549  * @param n The maximum number of characters to compare.

   550  * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative

   551  * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive

   552  * value if <code>s1</code> is bitwise greater than <code>s2</code>.

   553  * @stable ICU 2.0

   554  */

   555 U_STABLE int32_t U_EXPORT2

   556 u_strncmp(const UChar     *ucs1,

   557      const UChar     *ucs2,

   558      int32_t     n);

   560 /**

   561  * Compare two Unicode strings in code point order.

   562  * This is different in UTF-16 from u_strncmp() if supplementary characters are present.

   563  * For details, see u_strCompare().

   564  *

   565  * @param s1 A string to compare.

   566  * @param s2 A string to compare.

   567  * @param n The maximum number of characters to compare.

   568  * @return a negative/zero/positive integer corresponding to whether

   569  * the first string is less than/equal to/greater than the second one

   570  * in code point order

   571  * @stable ICU 2.0

   572  */

   573 U_STABLE int32_t U_EXPORT2

   574 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);

   576 /**

   577  * Compare two strings case-insensitively using full case folding.

   578  * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).

   579  *

   580  * @param s1 A string to compare.

   581  * @param s2 A string to compare.

   582  * @param options A bit set of options:

   583  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   584  *     Comparison in code unit order with default case folding.

   585  *

   586  *   - U_COMPARE_CODE_POINT_ORDER

   587  *     Set to choose code point order instead of code unit order

   588  *     (see u_strCompare for details).

   589  *

   590  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   591  *

   592  * @return A negative, zero, or positive integer indicating the comparison result.

   593  * @stable ICU 2.0

   594  */

   595 U_STABLE int32_t U_EXPORT2

   596 u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);

   598 /**

   599  * Compare two strings case-insensitively using full case folding.

   600  * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),

   601  * u_strFoldCase(s2, at most n, options)).

   602  *

   603  * @param s1 A string to compare.

   604  * @param s2 A string to compare.

   605  * @param n The maximum number of characters each string to case-fold and then compare.

   606  * @param options A bit set of options:

   607  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   608  *     Comparison in code unit order with default case folding.

   609  *

   610  *   - U_COMPARE_CODE_POINT_ORDER

   611  *     Set to choose code point order instead of code unit order

   612  *     (see u_strCompare for details).

   613  *

   614  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   615  *

   616  * @return A negative, zero, or positive integer indicating the comparison result.

   617  * @stable ICU 2.0

   618  */

   619 U_STABLE int32_t U_EXPORT2

   620 u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);

   622 /**

   623  * Compare two strings case-insensitively using full case folding.

   624  * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),

   625  * u_strFoldCase(s2, n, options)).

   626  *

   627  * @param s1 A string to compare.

   628  * @param s2 A string to compare.

   629  * @param length The number of characters in each string to case-fold and then compare.

   630  * @param options A bit set of options:

   631  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   632  *     Comparison in code unit order with default case folding.

   633  *

   634  *   - U_COMPARE_CODE_POINT_ORDER

   635  *     Set to choose code point order instead of code unit order

   636  *     (see u_strCompare for details).

   637  *

   638  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   639  *

   640  * @return A negative, zero, or positive integer indicating the comparison result.

   641  * @stable ICU 2.0

   642  */

   643 U_STABLE int32_t U_EXPORT2

   644 u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);

   646 /**

   647  * Copy a ustring. Adds a null terminator.

   648  *

   649  * @param dst The destination string.

   650  * @param src The source string.

   651  * @return A pointer to <code>dst</code>.

   652  * @stable ICU 2.0

   653  */

   654 U_STABLE UChar* U_EXPORT2

   655 u_strcpy(UChar     *dst,

   656     const UChar     *src);

   658 /**

   659  * Copy a ustring.

   660  * Copies at most <code>n</code> characters.  The result will be null terminated

   661  * if the length of <code>src</code> is less than <code>n</code>.

   662  *

   663  * @param dst The destination string.

   664  * @param src The source string.

   665  * @param n The maximum number of characters to copy.

   666  * @return A pointer to <code>dst</code>.

   667  * @stable ICU 2.0

   668  */

   669 U_STABLE UChar* U_EXPORT2

   670 u_strncpy(UChar     *dst,

   671      const UChar     *src,

   672      int32_t     n);

   674 #if !UCONFIG_NO_CONVERSION

   676 /**

   677  * Copy a byte string encoded in the default codepage to a ustring.

   678  * Adds a null terminator.

   679  * Performs a host byte to UChar conversion

   680  *

   681  * @param dst The destination string.

   682  * @param src The source string.

   683  * @return A pointer to <code>dst</code>.

   684  * @stable ICU 2.0

   685  */

   686 U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,

   687                const char *src );

   689 /**

   690  * Copy a byte string encoded in the default codepage to a ustring.

   691  * Copies at most <code>n</code> characters.  The result will be null terminated

   692  * if the length of <code>src</code> is less than <code>n</code>.

   693  * Performs a host byte to UChar conversion

   694  *

   695  * @param dst The destination string.

   696  * @param src The source string.

   697  * @param n The maximum number of characters to copy.

   698  * @return A pointer to <code>dst</code>.

   699  * @stable ICU 2.0

   700  */

   701 U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,

   702             const char *src,

   703             int32_t n);

   705 /**

   706  * Copy ustring to a byte string encoded in the default codepage.

   707  * Adds a null terminator.

   708  * Performs a UChar to host byte conversion

   709  *

   710  * @param dst The destination string.

   711  * @param src The source string.

   712  * @return A pointer to <code>dst</code>.

   713  * @stable ICU 2.0

   714  */

   715 U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,

   716             const UChar *src );

   718 /**

   719  * Copy ustring to a byte string encoded in the default codepage.

   720  * Copies at most <code>n</code> characters.  The result will be null terminated

   721  * if the length of <code>src</code> is less than <code>n</code>.

   722  * Performs a UChar to host byte conversion

   723  *

   724  * @param dst The destination string.

   725  * @param src The source string.

   726  * @param n The maximum number of characters to copy.

   727  * @return A pointer to <code>dst</code>.

   728  * @stable ICU 2.0

   729  */

   730 U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,

   731             const UChar *src,

   732             int32_t n );

   734 #endif

   736 /**

   737  * Synonym for memcpy(), but with UChars only.

   738  * @param dest The destination string

   739  * @param src The source string

   740  * @param count The number of characters to copy

   741  * @return A pointer to <code>dest</code>

   742  * @stable ICU 2.0

   743  */

   744 U_STABLE UChar* U_EXPORT2

   745 u_memcpy(UChar *dest, const UChar *src, int32_t count);

   747 /**

   748  * Synonym for memmove(), but with UChars only.

   749  * @param dest The destination string

   750  * @param src The source string

   751  * @param count The number of characters to move

   752  * @return A pointer to <code>dest</code>

   753  * @stable ICU 2.0

   754  */

   755 U_STABLE UChar* U_EXPORT2

   756 u_memmove(UChar *dest, const UChar *src, int32_t count);

   758 /**

   759  * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.

   760  *

   761  * @param dest The destination string.

   762  * @param c The character to initialize the string.

   763  * @param count The maximum number of characters to set.

   764  * @return A pointer to <code>dest</code>.

   765  * @stable ICU 2.0

   766  */

   767 U_STABLE UChar* U_EXPORT2

   768 u_memset(UChar *dest, UChar c, int32_t count);

   770 /**

   771  * Compare the first <code>count</code> UChars of each buffer.

   772  *

   773  * @param buf1 The first string to compare.

   774  * @param buf2 The second string to compare.

   775  * @param count The maximum number of UChars to compare.

   776  * @return When buf1 < buf2, a negative number is returned.

   777  *      When buf1 == buf2, 0 is returned.

   778  *      When buf1 > buf2, a positive number is returned.

   779  * @stable ICU 2.0

   780  */

   781 U_STABLE int32_t U_EXPORT2

   782 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);

   784 /**

   785  * Compare two Unicode strings in code point order.

   786  * This is different in UTF-16 from u_memcmp() if supplementary characters are present.

   787  * For details, see u_strCompare().

   788  *

   789  * @param s1 A string to compare.

   790  * @param s2 A string to compare.

   791  * @param count The maximum number of characters to compare.

   792  * @return a negative/zero/positive integer corresponding to whether

   793  * the first string is less than/equal to/greater than the second one

   794  * in code point order

   795  * @stable ICU 2.0

   796  */

   797 U_STABLE int32_t U_EXPORT2

   798 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);

   800 /**

   801  * Find the first occurrence of a BMP code point in a string.

   802  * A surrogate code point is found only if its match in the text is not

   803  * part of a surrogate pair.

   804  * A NUL character is found at the string terminator.

   805  *

   806  * @param s The string to search (contains <code>count</code> UChars).

   807  * @param c The BMP code point to find.

   808  * @param count The length of the string.

   809  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>

   810  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.

   811  * @stable ICU 2.0

   812  *

   813  * @see u_strchr

   814  * @see u_memchr32

   815  * @see u_strFindFirst

   816  */

   817 U_STABLE UChar* U_EXPORT2

   818 u_memchr(const UChar *s, UChar c, int32_t count);

   820 /**

   821  * Find the first occurrence of a code point in a string.

   822  * A surrogate code point is found only if its match in the text is not

   823  * part of a surrogate pair.

   824  * A NUL character is found at the string terminator.

   825  *

   826  * @param s The string to search (contains <code>count</code> UChars).

   827  * @param c The code point to find.

   828  * @param count The length of the string.

   829  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>

   830  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.

   831  * @stable ICU 2.0

   832  *

   833  * @see u_strchr32

   834  * @see u_memchr

   835  * @see u_strFindFirst

   836  */

   837 U_STABLE UChar* U_EXPORT2

   838 u_memchr32(const UChar *s, UChar32 c, int32_t count);

   840 /**

   841  * Find the last occurrence of a BMP code point in a string.

   842  * A surrogate code point is found only if its match in the text is not

   843  * part of a surrogate pair.

   844  * A NUL character is found at the string terminator.

   845  *

   846  * @param s The string to search (contains <code>count</code> UChars).

   847  * @param c The BMP code point to find.

   848  * @param count The length of the string.

   849  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>

   850  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.

   851  * @stable ICU 2.4

   852  *

   853  * @see u_strrchr

   854  * @see u_memrchr32

   855  * @see u_strFindLast

   856  */

   857 U_STABLE UChar* U_EXPORT2

   858 u_memrchr(const UChar *s, UChar c, int32_t count);

   860 /**

   861  * Find the last occurrence of a code point in a string.

   862  * A surrogate code point is found only if its match in the text is not

   863  * part of a surrogate pair.

   864  * A NUL character is found at the string terminator.

   865  *

   866  * @param s The string to search (contains <code>count</code> UChars).

   867  * @param c The code point to find.

   868  * @param count The length of the string.

   869  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>

   870  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.

   871  * @stable ICU 2.4

   872  *

   873  * @see u_strrchr32

   874  * @see u_memrchr

   875  * @see u_strFindLast

   876  */

   877 U_STABLE UChar* U_EXPORT2

   878 u_memrchr32(const UChar *s, UChar32 c, int32_t count);

   880 /**

   881  * Unicode String literals in C.

   882  * We need one macro to declare a variable for the string

   883  * and to statically preinitialize it if possible,

   884  * and a second macro to dynamically intialize such a string variable if necessary.

   885  *

   886  * The macros are defined for maximum performance.

   887  * They work only for strings that contain "invariant characters", i.e.,

   888  * only latin letters, digits, and some punctuation.

   889  * See utypes.h for details.

   890  *

   891  * A pair of macros for a single string must be used with the same

   892  * parameters.

   893  * The string parameter must be a C string literal.

   894  * The length of the string, not including the terminating

   895  * <code>NUL</code>, must be specified as a constant.

   896  * The U_STRING_DECL macro should be invoked exactly once for one

   897  * such string variable before it is used.

   898  *

   899  * Usage:

   900  * <pre>

   901  *    U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);

   902  *    U_STRING_DECL(ustringVar2, "jumps 5%", 8);

   903  *    static UBool didInit=FALSE;

   904  *

   905  *    int32_t function() {

   906  *        if(!didInit) {

   907  *            U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);

   908  *            U_STRING_INIT(ustringVar2, "jumps 5%", 8);

   909  *            didInit=TRUE;

   910  *        }

   911  *        return u_strcmp(ustringVar1, ustringVar2);

   912  *    }

   913  * </pre>

   914  * @stable ICU 2.0

   915  */

   916 #if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))

   917 #   define U_STRING_DECL(var, cs, length) static const wchar_t var[(length)+1]={ L ## cs }

   918     /**@stable ICU 2.0 */

   919 #   define U_STRING_INIT(var, cs, length)

   920 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY

   921 #   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (const UChar *)cs }

   922     /**@stable ICU 2.0 */

   923 #   define U_STRING_INIT(var, cs, length)

   924 #else

   925 #   define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]

   926     /**@stable ICU 2.0 */

   927 #   define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)

   928 #endif

   930 /**

   931  * Unescape a string of characters and write the resulting

   932  * Unicode characters to the destination buffer.  The following escape

   933  * sequences are recognized:

   934  *

   935  * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]

   936  * \\Uhhhhhhhh   8 hex digits

   937  * \\xhh         1-2 hex digits

   938  * \\x{h...}     1-8 hex digits

   939  * \\ooo         1-3 octal digits; o in [0-7]

   940  * \\cX          control-X; X is masked with 0x1F

   941  *

   942  * as well as the standard ANSI C escapes:

   943  *

   944  * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,

   945  * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,

   946  * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C

   947  *

   948  * Anything else following a backslash is generically escaped.  For

   949  * example, "[a\\-z]" returns "[a-z]".

   950  *

   951  * If an escape sequence is ill-formed, this method returns an empty

   952  * string.  An example of an ill-formed sequence is "\\u" followed by

   953  * fewer than 4 hex digits.

   954  *

   955  * The above characters are recognized in the compiler's codepage,

   956  * that is, they are coded as 'u', '\\', etc.  Characters that are

   957  * not parts of escape sequences are converted using u_charsToUChars().

   958  *

   959  * This function is similar to UnicodeString::unescape() but not

   960  * identical to it.  The latter takes a source UnicodeString, so it

   961  * does escape recognition but no conversion.

   962  *

   963  * @param src a zero-terminated string of invariant characters

   964  * @param dest pointer to buffer to receive converted and unescaped

   965  * text and, if there is room, a zero terminator.  May be NULL for

   966  * preflighting, in which case no UChars will be written, but the

   967  * return value will still be valid.  On error, an empty string is

   968  * stored here (if possible).

   969  * @param destCapacity the number of UChars that may be written at

   970  * dest.  Ignored if dest == NULL.

   971  * @return the length of unescaped string.

   972  * @see u_unescapeAt

   973  * @see UnicodeString#unescape()

   974  * @see UnicodeString#unescapeAt()

   975  * @stable ICU 2.0

   976  */

   977 U_STABLE int32_t U_EXPORT2

   978 u_unescape(const char *src,

   979            UChar *dest, int32_t destCapacity);

   981 U_CDECL_BEGIN

   982 /**

   983  * Callback function for u_unescapeAt() that returns a character of

   984  * the source text given an offset and a context pointer.  The context

   985  * pointer will be whatever is passed into u_unescapeAt().

   986  *

   987  * @param offset pointer to the offset that will be passed to u_unescapeAt().

   988  * @param context an opaque pointer passed directly into u_unescapeAt()

   989  * @return the character represented by the escape sequence at

   990  * offset

   991  * @see u_unescapeAt

   992  * @stable ICU 2.0

   993  */

   994 typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);

   995 U_CDECL_END

   997 /**

   998  * Unescape a single sequence. The character at offset-1 is assumed

   999  * (without checking) to be a backslash.  This method takes a callback

  1000  * pointer to a function that returns the UChar at a given offset.  By

  1001  * varying this callback, ICU functions are able to unescape char*

  1002  * strings, UnicodeString objects, and UFILE pointers.

  1003  *

  1004  * If offset is out of range, or if the escape sequence is ill-formed,

  1005  * (UChar32)0xFFFFFFFF is returned.  See documentation of u_unescape()

  1006  * for a list of recognized sequences.

  1007  *

  1008  * @param charAt callback function that returns a UChar of the source

  1009  * text given an offset and a context pointer.

  1010  * @param offset pointer to the offset that will be passed to charAt.

  1011  * The offset value will be updated upon return to point after the

  1012  * last parsed character of the escape sequence.  On error the offset

  1013  * is unchanged.

  1014  * @param length the number of characters in the source text.  The

  1015  * last character of the source text is considered to be at offset

  1016  * length-1.

  1017  * @param context an opaque pointer passed directly into charAt.

  1018  * @return the character represented by the escape sequence at

  1019  * offset, or (UChar32)0xFFFFFFFF on error.

  1020  * @see u_unescape()

  1021  * @see UnicodeString#unescape()

  1022  * @see UnicodeString#unescapeAt()

  1023  * @stable ICU 2.0

  1024  */

  1025 U_STABLE UChar32 U_EXPORT2

  1026 u_unescapeAt(UNESCAPE_CHAR_AT charAt,

  1027              int32_t *offset,

  1028              int32_t length,

  1029              void *context);

  1031 /**

  1032  * Uppercase the characters in a string.

  1033  * Casing is locale-dependent and context-sensitive.

  1034  * The result may be longer or shorter than the original.

  1035  * The source string and the destination buffer are allowed to overlap.

  1036  *

  1037  * @param dest      A buffer for the result string. The result will be zero-terminated if

  1038  *                  the buffer is large enough.

  1039  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then

  1040  *                  dest may be NULL and the function will only return the length of the result

  1041  *                  without writing any of the result string.

  1042  * @param src       The original string

  1043  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.

  1044  * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.

  1045  * @param pErrorCode Must be a valid pointer to an error code value,

  1046  *                  which must not indicate a failure before the function call.

  1047  * @return The length of the result string. It may be greater than destCapacity. In that case,

  1048  *         only some of the result was written to the destination buffer.

  1049  * @stable ICU 2.0

  1050  */

  1051 U_STABLE int32_t U_EXPORT2

  1052 u_strToUpper(UChar *dest, int32_t destCapacity,

  1053              const UChar *src, int32_t srcLength,

  1054              const char *locale,

  1055              UErrorCode *pErrorCode);

  1057 /**

  1058  * Lowercase the characters in a string.

  1059  * Casing is locale-dependent and context-sensitive.

  1060  * The result may be longer or shorter than the original.

  1061  * The source string and the destination buffer are allowed to overlap.

  1062  *

  1063  * @param dest      A buffer for the result string. The result will be zero-terminated if

  1064  *                  the buffer is large enough.

  1065  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then

  1066  *                  dest may be NULL and the function will only return the length of the result

  1067  *                  without writing any of the result string.

  1068  * @param src       The original string

  1069  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.

  1070  * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.

  1071  * @param pErrorCode Must be a valid pointer to an error code value,

  1072  *                  which must not indicate a failure before the function call.

  1073  * @return The length of the result string. It may be greater than destCapacity. In that case,

  1074  *         only some of the result was written to the destination buffer.

  1075  * @stable ICU 2.0

  1076  */

  1077 U_STABLE int32_t U_EXPORT2

  1078 u_strToLower(UChar *dest, int32_t destCapacity,

  1079              const UChar *src, int32_t srcLength,

  1080              const char *locale,

  1081              UErrorCode *pErrorCode);

  1083 #if !UCONFIG_NO_BREAK_ITERATION

  1085 /**

  1086  * Titlecase a string.

  1087  * Casing is locale-dependent and context-sensitive.

  1088  * Titlecasing uses a break iterator to find the first characters of words

  1089  * that are to be titlecased. It titlecases those characters and lowercases

  1090  * all others.

  1091  *

  1092  * The titlecase break iterator can be provided to customize for arbitrary

  1093  * styles, using rules and dictionaries beyond the standard iterators.

  1094  * It may be more efficient to always provide an iterator to avoid

  1095  * opening and closing one for each string.

  1096  * The standard titlecase iterator for the root locale implements the

  1097  * algorithm of Unicode TR 21.

  1098  *

  1099  * This function uses only the first() and next() methods of the

  1100  * provided break iterator.

  1101  *

  1102  * The result may be longer or shorter than the original.

  1103  * The source string and the destination buffer are allowed to overlap.

  1104  *

  1105  * @param dest      A buffer for the result string. The result will be zero-terminated if

  1106  *                  the buffer is large enough.

  1107  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then

  1108  *                  dest may be NULL and the function will only return the length of the result

  1109  *                  without writing any of the result string.

  1110  * @param src       The original string

  1111  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.

  1112  * @param titleIter A break iterator to find the first characters of words

  1113  *                  that are to be titlecased.

  1114  *                  If none is provided (NULL), then a standard titlecase

  1115  *                  break iterator is opened.

  1116  * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.

  1117  * @param pErrorCode Must be a valid pointer to an error code value,

  1118  *                  which must not indicate a failure before the function call.

  1119  * @return The length of the result string. It may be greater than destCapacity. In that case,

  1120  *         only some of the result was written to the destination buffer.

  1121  * @stable ICU 2.1

  1122  */

  1123 U_STABLE int32_t U_EXPORT2

  1124 u_strToTitle(UChar *dest, int32_t destCapacity,

  1125              const UChar *src, int32_t srcLength,

  1126              UBreakIterator *titleIter,

  1127              const char *locale,

  1128              UErrorCode *pErrorCode);

  1130 #endif

  1132 /**

  1133  * Case-fold the characters in a string.

  1134  * Case-folding is locale-independent and not context-sensitive,

  1135  * but there is an option for whether to include or exclude mappings for dotted I

  1136  * and dotless i that are marked with 'I' in CaseFolding.txt.

  1137  * The result may be longer or shorter than the original.

  1138  * The source string and the destination buffer are allowed to overlap.

  1139  *

  1140  * @param dest      A buffer for the result string. The result will be zero-terminated if

  1141  *                  the buffer is large enough.

  1142  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then

  1143  *                  dest may be NULL and the function will only return the length of the result

  1144  *                  without writing any of the result string.

  1145  * @param src       The original string

  1146  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.

  1147  * @param options   Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I

  1148  * @param pErrorCode Must be a valid pointer to an error code value,

  1149  *                  which must not indicate a failure before the function call.

  1150  * @return The length of the result string. It may be greater than destCapacity. In that case,

  1151  *         only some of the result was written to the destination buffer.

  1152  * @stable ICU 2.0

  1153  */

  1154 U_STABLE int32_t U_EXPORT2

  1155 u_strFoldCase(UChar *dest, int32_t destCapacity,

  1156               const UChar *src, int32_t srcLength,

  1157               uint32_t options,

  1158               UErrorCode *pErrorCode);

  1160 /**

  1161  * Converts a sequence of UChars to wchar_t units.

  1162  *

  1163  * @param dest          A buffer for the result string. The result will be zero-terminated if

  1164  *                      the buffer is large enough.

  1165  * @param destCapacity  The size of the buffer (number of wchar_t's). If it is 0, then

  1166  *                      dest may be NULL and the function will only return the length of the

  1167  *                      result without writing any of the result string (pre-flighting).

  1168  * @param pDestLength   A pointer to receive the number of units written to the destination. If

  1169  *                      pDestLength!=NULL then *pDestLength is always set to the

  1170  *                      number of output units corresponding to the transformation of

  1171  *                      all the input units, even in case of a buffer overflow.

  1172  * @param src           The original source string

  1173  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.

  1174  * @param pErrorCode    Must be a valid pointer to an error code value,

  1175  *                      which must not indicate a failure before the function call.

  1176  * @return The pointer to destination buffer.

  1177  * @stable ICU 2.0

  1178  */

  1179 U_STABLE wchar_t* U_EXPORT2

  1180 u_strToWCS(wchar_t *dest,

  1181            int32_t destCapacity,

  1182            int32_t *pDestLength,

  1183            const UChar *src,

  1184            int32_t srcLength,

  1185            UErrorCode *pErrorCode);

  1186 /**

  1187  * Converts a sequence of wchar_t units to UChars

  1188  *

  1189  * @param dest          A buffer for the result string. The result will be zero-terminated if

  1190  *                      the buffer is large enough.

  1191  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then

  1192  *                      dest may be NULL and the function will only return the length of the

  1193  *                      result without writing any of the result string (pre-flighting).

  1194  * @param pDestLength   A pointer to receive the number of units written to the destination. If

  1195  *                      pDestLength!=NULL then *pDestLength is always set to the

  1196  *                      number of output units corresponding to the transformation of

  1197  *                      all the input units, even in case of a buffer overflow.

  1198  * @param src           The original source string

  1199  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.

  1200  * @param pErrorCode    Must be a valid pointer to an error code value,

  1201  *                      which must not indicate a failure before the function call.

  1202  * @return The pointer to destination buffer.

  1203  * @stable ICU 2.0

  1204  */

  1205 U_STABLE UChar* U_EXPORT2

  1206 u_strFromWCS(UChar   *dest,

  1207              int32_t destCapacity,

  1208              int32_t *pDestLength,

  1209              const wchar_t *src,

  1210              int32_t srcLength,

  1211              UErrorCode *pErrorCode);

  1212 /**

  1213  * Converts a sequence of UChars (UTF-16) to UTF-8 bytes

  1214  *

  1215  * @param dest          A buffer for the result string. The result will be zero-terminated if

  1216  *                      the buffer is large enough.

  1217  * @param destCapacity  The size of the buffer (number of chars). If it is 0, then

  1218  *                      dest may be NULL and the function will only return the length of the

  1219  *                      result without writing any of the result string (pre-flighting).

  1220  * @param pDestLength   A pointer to receive the number of units written to the destination. If

  1221  *                      pDestLength!=NULL then *pDestLength is always set to the

  1222  *                      number of output units corresponding to the transformation of

  1223  *                      all the input units, even in case of a buffer overflow.

  1224  * @param src           The original source string

  1225  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.

  1226  * @param pErrorCode    Must be a valid pointer to an error code value,

  1227  *                      which must not indicate a failure before the function call.

  1228  * @return The pointer to destination buffer.

  1229  * @stable ICU 2.0

  1230  */

  1231 U_STABLE char* U_EXPORT2

  1232 u_strToUTF8(char *dest,

  1233             int32_t destCapacity,

  1234             int32_t *pDestLength,

  1235             const UChar *src,

  1236             int32_t srcLength,

  1237             UErrorCode *pErrorCode);

  1239 /**

  1240  * Converts a sequence of UTF-8 bytes to UChars (UTF-16).

  1241  *

  1242  * @param dest          A buffer for the result string. The result will be zero-terminated if

  1243  *                      the buffer is large enough.

  1244  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then

  1245  *                      dest may be NULL and the function will only return the length of the

  1246  *                      result without writing any of the result string (pre-flighting).

  1247  * @param pDestLength   A pointer to receive the number of units written to the destination. If

  1248  *                      pDestLength!=NULL then *pDestLength is always set to the

  1249  *                      number of output units corresponding to the transformation of

  1250  *                      all the input units, even in case of a buffer overflow.

  1251  * @param src           The original source string

  1252  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.

  1253  * @param pErrorCode    Must be a valid pointer to an error code value,

  1254  *                      which must not indicate a failure before the function call.

  1255  * @return The pointer to destination buffer.

  1256  * @stable ICU 2.0

  1257  */

  1258 U_STABLE UChar* U_EXPORT2

  1259 u_strFromUTF8(UChar *dest,

  1260               int32_t destCapacity,

  1261               int32_t *pDestLength,

  1262               const char *src,

  1263               int32_t srcLength,

  1264               UErrorCode *pErrorCode);

  1266 /**

  1267  * Converts a sequence of UChars (UTF-16) to UTF32 units.

  1268  *

  1269  * @param dest          A buffer for the result string. The result will be zero-terminated if

  1270  *                      the buffer is large enough.

  1271  * @param destCapacity  The size of the buffer (number of UChar32s). If it is 0, then

  1272  *                      dest may be NULL and the function will only return the length of the

  1273  *                      result without writing any of the result string (pre-flighting).

  1274  * @param pDestLength   A pointer to receive the number of units written to the destination. If

  1275  *                      pDestLength!=NULL then *pDestLength is always set to the

  1276  *                      number of output units corresponding to the transformation of

  1277  *                      all the input units, even in case of a buffer overflow.

  1278  * @param src           The original source string

  1279  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.

  1280  * @param pErrorCode    Must be a valid pointer to an error code value,

  1281  *                      which must not indicate a failure before the function call.

  1282  * @return The pointer to destination buffer.

  1283  * @stable ICU 2.0

  1284  */

  1285 U_STABLE UChar32* U_EXPORT2

  1286 u_strToUTF32(UChar32 *dest,

  1287              int32_t  destCapacity,

  1288              int32_t  *pDestLength,

  1289              const UChar *src,

  1290              int32_t  srcLength,

  1291              UErrorCode *pErrorCode);

  1293 /**

  1294  * Converts a sequence of UTF32 units to UChars (UTF-16)

  1295  *

  1296  * @param dest          A buffer for the result string. The result will be zero-terminated if

  1297  *                      the buffer is large enough.

  1298  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then

  1299  *                      dest may be NULL and the function will only return the length of the

  1300  *                      result without writing any of the result string (pre-flighting).

  1301  * @param pDestLength   A pointer to receive the number of units written to the destination. If

  1302  *                      pDestLength!=NULL then *pDestLength is always set to the

  1303  *                      number of output units corresponding to the transformation of

  1304  *                      all the input units, even in case of a buffer overflow.

  1305  * @param src           The original source string

  1306  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.

  1307  * @param pErrorCode    Must be a valid pointer to an error code value,

  1308  *                      which must not indicate a failure before the function call.

  1309  * @return The pointer to destination buffer.

  1310  * @stable ICU 2.0

  1311  */

  1312 U_STABLE UChar* U_EXPORT2

  1313 u_strFromUTF32(UChar   *dest,

  1314                int32_t destCapacity,

  1315                int32_t *pDestLength,

  1316                const UChar32 *src,

  1317                int32_t srcLength,

  1318                UErrorCode *pErrorCode);

  1320 #endif

author	sl
	Tue, 10 Jun 2014 14:32:02 +0200
changeset 1	260cb5ec6c19
permissions	-rw-r--r--