os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/unistr.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2 **********************************************************************
     3 *   Copyright (C) 1998-2005, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 **********************************************************************
     6 *
     7 * File unistr.h
     8 *
     9 * Modification History:
    10 *
    11 *   Date        Name        Description
    12 *   09/25/98    stephen     Creation.
    13 *   11/11/98    stephen     Changed per 11/9 code review.
    14 *   04/20/99    stephen     Overhauled per 4/16 code review.
    15 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
    16 *                           handleReplaceBetween(); other methods unchanged.
    17 *   06/25/01    grhoten     Remove dependency on iostream.
    18 ******************************************************************************
    19 */
    20 
    21 #ifndef UNISTR_H
    22 #define UNISTR_H
    23 
    24 /**
    25  * \file 
    26  * \brief C++ API: Unicode String 
    27  */
    28 
    29 #include "unicode/rep.h"
    30 
    31 struct UConverter;          // unicode/ucnv.h
    32 class  StringThreadTest;
    33 
    34 #ifndef U_COMPARE_CODE_POINT_ORDER
    35 /* see also ustring.h and unorm.h */
    36 /**
    37  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
    38  * Compare strings in code point order instead of code unit order.
    39  * @stable ICU 2.2
    40  */
    41 #define U_COMPARE_CODE_POINT_ORDER  0x8000
    42 #endif
    43 
    44 #ifndef USTRING_H
    45 /* see ustring.h */
    46 U_STABLE int32_t U_EXPORT2
    47 u_strlen(const UChar *s);
    48 #endif
    49 
    50 U_NAMESPACE_BEGIN
    51 
    52 class Locale;               // unicode/locid.h
    53 class StringCharacterIterator;
    54 class BreakIterator;        // unicode/brkiter.h
    55 
    56 /* The <iostream> include has been moved to unicode/ustream.h */
    57 
    58 /**
    59  * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
    60  * which constructs a Unicode string from an invariant-character char * string.
    61  * About invariant characters see utypes.h.
    62  * This constructor has no runtime dependency on conversion code and is
    63  * therefore recommended over ones taking a charset name string
    64  * (where the empty string "" indicates invariant-character conversion).
    65  *
    66  * @draft ICU 3.2
    67  */
    68 #define US_INV UnicodeString::kInvariant
    69 
    70 /**
    71  * Unicode String literals in C++.
    72  * Dependent on the platform properties, different UnicodeString
    73  * constructors should be used to create a UnicodeString object from
    74  * a string literal.
    75  * The macros are defined for maximum performance.
    76  * They work only for strings that contain "invariant characters", i.e.,
    77  * only latin letters, digits, and some punctuation.
    78  * See utypes.h for details.
    79  *
    80  * The string parameter must be a C string literal.
    81  * The length of the string, not including the terminating
    82  * <code>NUL</code>, must be specified as a constant.
    83  * The U_STRING_DECL macro should be invoked exactly once for one
    84  * such string variable before it is used.
    85  * @stable ICU 2.0
    86  */
    87 #if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
    88 #   define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)L ## cs, _length)
    89 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
    90 #   define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)cs, _length)
    91 #else
    92 #   define UNICODE_STRING(cs, _length) UnicodeString(cs, _length, US_INV)
    93 #endif
    94 
    95 /**
    96  * Unicode String literals in C++.
    97  * Dependent on the platform properties, different UnicodeString
    98  * constructors should be used to create a UnicodeString object from
    99  * a string literal.
   100  * The macros are defined for improved performance.
   101  * They work only for strings that contain "invariant characters", i.e.,
   102  * only latin letters, digits, and some punctuation.
   103  * See utypes.h for details.
   104  *
   105  * The string parameter must be a C string literal.
   106  * @stable ICU 2.0
   107  */
   108 #if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
   109 #   define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)L ## cs, -1)
   110 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
   111 #   define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)cs, -1)
   112 #else
   113 #   define UNICODE_STRING_SIMPLE(cs) UnicodeString(cs, -1, US_INV)
   114 #endif
   115 
   116 /**
   117  * UnicodeString is a string class that stores Unicode characters directly and provides
   118  * similar functionality as the Java String and StringBuffer classes.
   119  * It is a concrete implementation of the abstract class Replaceable (for transliteration).
   120  *
   121  * The UnicodeString class is not suitable for subclassing.
   122  *
   123  * <p>For an overview of Unicode strings in C and C++ see the
   124  * <a href="http://icu.sourceforge.net/userguide/strings.html">User Guide Strings chapter</a>.</p>
   125  *
   126  * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
   127  * A Unicode character may be stored with either one code unit
   128  * (the most common case) or with a matched pair of special code units
   129  * ("surrogates"). The data type for code units is UChar. 
   130  * For single-character handling, a Unicode character code <em>point</em> is a value
   131  * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
   132  *
   133  * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
   134  * This is the same as with multi-byte char* strings in traditional string handling.
   135  * Operations on partial strings typically do not test for code point boundaries.
   136  * If necessary, the user needs to take care of such boundaries by testing for the code unit
   137  * values or by using functions like
   138  * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
   139  * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
   140  *
   141  * UnicodeString methods are more lenient with regard to input parameter values
   142  * than other ICU APIs. In particular:
   143  * - If indexes are out of bounds for a UnicodeString object
   144  *   (<0 or >length()) then they are "pinned" to the nearest boundary.
   145  * - If primitive string pointer values (e.g., const UChar * or char *)
   146  *   for input strings are NULL, then those input string parameters are treated
   147  *   as if they pointed to an empty string.
   148  *   However, this is <em>not</em> the case for char * parameters for charset names
   149  *   or other IDs.
   150  * - Most UnicodeString methods do not take a UErrorCode parameter because
   151  *   there are usually very few opportunities for failure other than a shortage
   152  *   of memory, error codes in low-level C++ string methods would be inconvenient,
   153  *   and the error code as the last parameter (ICU convention) would prevent
   154  *   the use of default parameter values.
   155  *   Instead, such methods set the UnicodeString into a "bogus" state
   156  *   (see isBogus()) if an error occurs.
   157  *
   158  * In string comparisons, two UnicodeString objects that are both "bogus"
   159  * compare equal (to be transitive and prevent endless loops in sorting),
   160  * and a "bogus" string compares less than any non-"bogus" one.
   161  *
   162  * Const UnicodeString methods are thread-safe. Multiple threads can use
   163  * const methods on the same UnicodeString object simultaneously,
   164  * but non-const methods must not be called concurrently (in multiple threads)
   165  * with any other (const or non-const) methods.
   166  *
   167  * Similarly, const UnicodeString & parameters are thread-safe.
   168  * One object may be passed in as such a parameter concurrently in multiple threads.
   169  * This includes the const UnicodeString & parameters for
   170  * copy construction, assignment, and cloning.
   171  *
   172  * <p>UnicodeString uses several storage methods.
   173  * String contents can be stored inside the UnicodeString object itself,
   174  * in an allocated and shared buffer, or in an outside buffer that is "aliased".
   175  * Most of this is done transparently, but careful aliasing in particular provides
   176  * significant performance improvements.
   177  * Also, the internal buffer is accessible via special functions.
   178  * For details see the
   179  * <a href="http://icu.sourceforge.net/userguide/strings.html">User Guide Strings chapter</a>.</p>
   180  *
   181  * @see utf.h
   182  * @see CharacterIterator
   183  * @stable ICU 2.0
   184  */
   185 class U_COMMON_API UnicodeString : public Replaceable
   186 {
   187 public:
   188 
   189   /**
   190    * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
   191    * which constructs a Unicode string from an invariant-character char * string.
   192    * Use the macro US_INV instead of the full qualification for this value.
   193    *
   194    * @see US_INV
   195    * @draft ICU 3.2
   196    */
   197   enum EInvariant {
   198     /**
   199      * @see EInvariant
   200      * @draft ICU 3.2
   201      */
   202     kInvariant
   203   };
   204 
   205   //========================================
   206   // Read-only operations
   207   //========================================
   208 
   209   /* Comparison - bitwise only - for international comparison use collation */
   210 
   211   /**
   212    * Equality operator. Performs only bitwise comparison.
   213    * @param text The UnicodeString to compare to this one.
   214    * @return TRUE if <TT>text</TT> contains the same characters as this one,
   215    * FALSE otherwise.
   216    * @stable ICU 2.0
   217    */
   218   inline UBool operator== (const UnicodeString& text) const;
   219 
   220   /**
   221    * Inequality operator. Performs only bitwise comparison.
   222    * @param text The UnicodeString to compare to this one.
   223    * @return FALSE if <TT>text</TT> contains the same characters as this one,
   224    * TRUE otherwise.
   225    * @stable ICU 2.0
   226    */
   227   inline UBool operator!= (const UnicodeString& text) const;
   228 
   229   /**
   230    * Greater than operator. Performs only bitwise comparison.
   231    * @param text The UnicodeString to compare to this one.
   232    * @return TRUE if the characters in this are bitwise
   233    * greater than the characters in <code>text</code>, FALSE otherwise
   234    * @stable ICU 2.0
   235    */
   236   inline UBool operator> (const UnicodeString& text) const;
   237 
   238   /**
   239    * Less than operator. Performs only bitwise comparison.
   240    * @param text The UnicodeString to compare to this one.
   241    * @return TRUE if the characters in this are bitwise
   242    * less than the characters in <code>text</code>, FALSE otherwise
   243    * @stable ICU 2.0
   244    */
   245   inline UBool operator< (const UnicodeString& text) const;
   246 
   247   /**
   248    * Greater than or equal operator. Performs only bitwise comparison.
   249    * @param text The UnicodeString to compare to this one.
   250    * @return TRUE if the characters in this are bitwise
   251    * greater than or equal to the characters in <code>text</code>, FALSE otherwise
   252    * @stable ICU 2.0
   253    */
   254   inline UBool operator>= (const UnicodeString& text) const;
   255 
   256   /**
   257    * Less than or equal operator. Performs only bitwise comparison.
   258    * @param text The UnicodeString to compare to this one.
   259    * @return TRUE if the characters in this are bitwise
   260    * less than or equal to the characters in <code>text</code>, FALSE otherwise
   261    * @stable ICU 2.0
   262    */
   263   inline UBool operator<= (const UnicodeString& text) const;
   264 
   265   /**
   266    * Compare the characters bitwise in this UnicodeString to
   267    * the characters in <code>text</code>.
   268    * @param text The UnicodeString to compare to this one.
   269    * @return The result of bitwise character comparison: 0 if this
   270    * contains the same characters as <code>text</code>, -1 if the characters in
   271    * this are bitwise less than the characters in <code>text</code>, +1 if the
   272    * characters in this are bitwise greater than the characters
   273    * in <code>text</code>.
   274    * @stable ICU 2.0
   275    */
   276   inline int8_t compare(const UnicodeString& text) const;
   277 
   278   /**
   279    * Compare the characters bitwise in the range
   280    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
   281    * in <TT>text</TT>
   282    * @param start the offset at which the compare operation begins
   283    * @param length the number of characters of text to compare.
   284    * @param text the other text to be compared against this string.
   285    * @return The result of bitwise character comparison: 0 if this
   286    * contains the same characters as <code>text</code>, -1 if the characters in
   287    * this are bitwise less than the characters in <code>text</code>, +1 if the
   288    * characters in this are bitwise greater than the characters
   289    * in <code>text</code>.
   290    * @stable ICU 2.0
   291    */
   292   inline int8_t compare(int32_t start,
   293          int32_t length,
   294          const UnicodeString& text) const;
   295 
   296   /**
   297    * Compare the characters bitwise in the range
   298    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
   299    * in <TT>srcText</TT> in the range
   300    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   301    * @param start the offset at which the compare operation begins
   302    * @param length the number of characters in this to compare.
   303    * @param srcText the text to be compared
   304    * @param srcStart the offset into <TT>srcText</TT> to start comparison
   305    * @param srcLength the number of characters in <TT>src</TT> to compare
   306    * @return The result of bitwise character comparison: 0 if this
   307    * contains the same characters as <code>srcText</code>, -1 if the characters in
   308    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
   309    * characters in this are bitwise greater than the characters
   310    * in <code>srcText</code>.
   311    * @stable ICU 2.0
   312    */
   313    inline int8_t compare(int32_t start,
   314          int32_t length,
   315          const UnicodeString& srcText,
   316          int32_t srcStart,
   317          int32_t srcLength) const;
   318 
   319   /**
   320    * Compare the characters bitwise in this UnicodeString with the first
   321    * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
   322    * @param srcChars The characters to compare to this UnicodeString.
   323    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
   324    * @return The result of bitwise character comparison: 0 if this
   325    * contains the same characters as <code>srcChars</code>, -1 if the characters in
   326    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
   327    * characters in this are bitwise greater than the characters
   328    * in <code>srcChars</code>.
   329    * @stable ICU 2.0
   330    */
   331   inline int8_t compare(const UChar *srcChars,
   332          int32_t srcLength) const;
   333 
   334   /**
   335    * Compare the characters bitwise in the range
   336    * [<TT>start</TT>, <TT>start + length</TT>) with the first
   337    * <TT>length</TT> characters in <TT>srcChars</TT>
   338    * @param start the offset at which the compare operation begins
   339    * @param length the number of characters to compare.
   340    * @param srcChars the characters to be compared
   341    * @return The result of bitwise character comparison: 0 if this
   342    * contains the same characters as <code>srcChars</code>, -1 if the characters in
   343    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
   344    * characters in this are bitwise greater than the characters
   345    * in <code>srcChars</code>.
   346    * @stable ICU 2.0
   347    */
   348   inline int8_t compare(int32_t start,
   349          int32_t length,
   350          const UChar *srcChars) const;
   351 
   352   /**
   353    * Compare the characters bitwise in the range
   354    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
   355    * in <TT>srcChars</TT> in the range
   356    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   357    * @param start the offset at which the compare operation begins
   358    * @param length the number of characters in this to compare
   359    * @param srcChars the characters to be compared
   360    * @param srcStart the offset into <TT>srcChars</TT> to start comparison
   361    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
   362    * @return The result of bitwise character comparison: 0 if this
   363    * contains the same characters as <code>srcChars</code>, -1 if the characters in
   364    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
   365    * characters in this are bitwise greater than the characters
   366    * in <code>srcChars</code>.
   367    * @stable ICU 2.0
   368    */
   369   inline int8_t compare(int32_t start,
   370          int32_t length,
   371          const UChar *srcChars,
   372          int32_t srcStart,
   373          int32_t srcLength) const;
   374 
   375   /**
   376    * Compare the characters bitwise in the range
   377    * [<TT>start</TT>, <TT>limit</TT>) with the characters
   378    * in <TT>srcText</TT> in the range
   379    * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
   380    * @param start the offset at which the compare operation begins
   381    * @param limit the offset immediately following the compare operation
   382    * @param srcText the text to be compared
   383    * @param srcStart the offset into <TT>srcText</TT> to start comparison
   384    * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
   385    * @return The result of bitwise character comparison: 0 if this
   386    * contains the same characters as <code>srcText</code>, -1 if the characters in
   387    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
   388    * characters in this are bitwise greater than the characters
   389    * in <code>srcText</code>.
   390    * @stable ICU 2.0
   391    */
   392   inline int8_t compareBetween(int32_t start,
   393             int32_t limit,
   394             const UnicodeString& srcText,
   395             int32_t srcStart,
   396             int32_t srcLimit) const;
   397 
   398   /**
   399    * Compare two Unicode strings in code point order.
   400    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
   401    * if supplementary characters are present:
   402    *
   403    * In UTF-16, supplementary characters (with code points U+10000 and above) are
   404    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   405    * which means that they compare as less than some other BMP characters like U+feff.
   406    * This function compares Unicode strings in code point order.
   407    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   408    *
   409    * @param text Another string to compare this one to.
   410    * @return a negative/zero/positive integer corresponding to whether
   411    * this string is less than/equal to/greater than the second one
   412    * in code point order
   413    * @stable ICU 2.0
   414    */
   415   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
   416 
   417   /**
   418    * Compare two Unicode strings in code point order.
   419    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
   420    * if supplementary characters are present:
   421    *
   422    * In UTF-16, supplementary characters (with code points U+10000 and above) are
   423    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   424    * which means that they compare as less than some other BMP characters like U+feff.
   425    * This function compares Unicode strings in code point order.
   426    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   427    *
   428    * @param start The start offset in this string at which the compare operation begins.
   429    * @param length The number of code units from this string to compare.
   430    * @param srcText Another string to compare this one to.
   431    * @return a negative/zero/positive integer corresponding to whether
   432    * this string is less than/equal to/greater than the second one
   433    * in code point order
   434    * @stable ICU 2.0
   435    */
   436   inline int8_t compareCodePointOrder(int32_t start,
   437                                       int32_t length,
   438                                       const UnicodeString& srcText) const;
   439 
   440   /**
   441    * Compare two Unicode strings in code point order.
   442    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
   443    * if supplementary characters are present:
   444    *
   445    * In UTF-16, supplementary characters (with code points U+10000 and above) are
   446    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   447    * which means that they compare as less than some other BMP characters like U+feff.
   448    * This function compares Unicode strings in code point order.
   449    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   450    *
   451    * @param start The start offset in this string at which the compare operation begins.
   452    * @param length The number of code units from this string to compare.
   453    * @param srcText Another string to compare this one to.
   454    * @param srcStart The start offset in that string at which the compare operation begins.
   455    * @param srcLength The number of code units from that string to compare.
   456    * @return a negative/zero/positive integer corresponding to whether
   457    * this string is less than/equal to/greater than the second one
   458    * in code point order
   459    * @stable ICU 2.0
   460    */
   461    inline int8_t compareCodePointOrder(int32_t start,
   462                                        int32_t length,
   463                                        const UnicodeString& srcText,
   464                                        int32_t srcStart,
   465                                        int32_t srcLength) const;
   466 
   467   /**
   468    * Compare two Unicode strings in code point order.
   469    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
   470    * if supplementary characters are present:
   471    *
   472    * In UTF-16, supplementary characters (with code points U+10000 and above) are
   473    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   474    * which means that they compare as less than some other BMP characters like U+feff.
   475    * This function compares Unicode strings in code point order.
   476    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   477    *
   478    * @param srcChars A pointer to another string to compare this one to.
   479    * @param srcLength The number of code units from that string to compare.
   480    * @return a negative/zero/positive integer corresponding to whether
   481    * this string is less than/equal to/greater than the second one
   482    * in code point order
   483    * @stable ICU 2.0
   484    */
   485   inline int8_t compareCodePointOrder(const UChar *srcChars,
   486                                       int32_t srcLength) const;
   487 
   488   /**
   489    * Compare two Unicode strings in code point order.
   490    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
   491    * if supplementary characters are present:
   492    *
   493    * In UTF-16, supplementary characters (with code points U+10000 and above) are
   494    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   495    * which means that they compare as less than some other BMP characters like U+feff.
   496    * This function compares Unicode strings in code point order.
   497    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   498    *
   499    * @param start The start offset in this string at which the compare operation begins.
   500    * @param length The number of code units from this string to compare.
   501    * @param srcChars A pointer to another string to compare this one to.
   502    * @return a negative/zero/positive integer corresponding to whether
   503    * this string is less than/equal to/greater than the second one
   504    * in code point order
   505    * @stable ICU 2.0
   506    */
   507   inline int8_t compareCodePointOrder(int32_t start,
   508                                       int32_t length,
   509                                       const UChar *srcChars) const;
   510 
   511   /**
   512    * Compare two Unicode strings in code point order.
   513    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
   514    * if supplementary characters are present:
   515    *
   516    * In UTF-16, supplementary characters (with code points U+10000 and above) are
   517    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   518    * which means that they compare as less than some other BMP characters like U+feff.
   519    * This function compares Unicode strings in code point order.
   520    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   521    *
   522    * @param start The start offset in this string at which the compare operation begins.
   523    * @param length The number of code units from this string to compare.
   524    * @param srcChars A pointer to another string to compare this one to.
   525    * @param srcStart The start offset in that string at which the compare operation begins.
   526    * @param srcLength The number of code units from that string to compare.
   527    * @return a negative/zero/positive integer corresponding to whether
   528    * this string is less than/equal to/greater than the second one
   529    * in code point order
   530    * @stable ICU 2.0
   531    */
   532   inline int8_t compareCodePointOrder(int32_t start,
   533                                       int32_t length,
   534                                       const UChar *srcChars,
   535                                       int32_t srcStart,
   536                                       int32_t srcLength) const;
   537 
   538   /**
   539    * Compare two Unicode strings in code point order.
   540    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
   541    * if supplementary characters are present:
   542    *
   543    * In UTF-16, supplementary characters (with code points U+10000 and above) are
   544    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   545    * which means that they compare as less than some other BMP characters like U+feff.
   546    * This function compares Unicode strings in code point order.
   547    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   548    *
   549    * @param start The start offset in this string at which the compare operation begins.
   550    * @param limit The offset after the last code unit from this string to compare.
   551    * @param srcText Another string to compare this one to.
   552    * @param srcStart The start offset in that string at which the compare operation begins.
   553    * @param srcLimit The offset after the last code unit from that string to compare.
   554    * @return a negative/zero/positive integer corresponding to whether
   555    * this string is less than/equal to/greater than the second one
   556    * in code point order
   557    * @stable ICU 2.0
   558    */
   559   inline int8_t compareCodePointOrderBetween(int32_t start,
   560                                              int32_t limit,
   561                                              const UnicodeString& srcText,
   562                                              int32_t srcStart,
   563                                              int32_t srcLimit) const;
   564 
   565   /**
   566    * Compare two strings case-insensitively using full case folding.
   567    * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
   568    *
   569    * @param text Another string to compare this one to.
   570    * @param options A bit set of options:
   571    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   572    *     Comparison in code unit order with default case folding.
   573    *
   574    *   - U_COMPARE_CODE_POINT_ORDER
   575    *     Set to choose code point order instead of code unit order
   576    *     (see u_strCompare for details).
   577    *
   578    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   579    *
   580    * @return A negative, zero, or positive integer indicating the comparison result.
   581    * @stable ICU 2.0
   582    */
   583   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
   584 
   585   /**
   586    * Compare two strings case-insensitively using full case folding.
   587    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
   588    *
   589    * @param start The start offset in this string at which the compare operation begins.
   590    * @param length The number of code units from this string to compare.
   591    * @param srcText Another string to compare this one to.
   592    * @param options A bit set of options:
   593    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   594    *     Comparison in code unit order with default case folding.
   595    *
   596    *   - U_COMPARE_CODE_POINT_ORDER
   597    *     Set to choose code point order instead of code unit order
   598    *     (see u_strCompare for details).
   599    *
   600    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   601    *
   602    * @return A negative, zero, or positive integer indicating the comparison result.
   603    * @stable ICU 2.0
   604    */
   605   inline int8_t caseCompare(int32_t start,
   606          int32_t length,
   607          const UnicodeString& srcText,
   608          uint32_t options) const;
   609 
   610   /**
   611    * Compare two strings case-insensitively using full case folding.
   612    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
   613    *
   614    * @param start The start offset in this string at which the compare operation begins.
   615    * @param length The number of code units from this string to compare.
   616    * @param srcText Another string to compare this one to.
   617    * @param srcStart The start offset in that string at which the compare operation begins.
   618    * @param srcLength The number of code units from that string to compare.
   619    * @param options A bit set of options:
   620    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   621    *     Comparison in code unit order with default case folding.
   622    *
   623    *   - U_COMPARE_CODE_POINT_ORDER
   624    *     Set to choose code point order instead of code unit order
   625    *     (see u_strCompare for details).
   626    *
   627    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   628    *
   629    * @return A negative, zero, or positive integer indicating the comparison result.
   630    * @stable ICU 2.0
   631    */
   632   inline int8_t caseCompare(int32_t start,
   633          int32_t length,
   634          const UnicodeString& srcText,
   635          int32_t srcStart,
   636          int32_t srcLength,
   637          uint32_t options) const;
   638 
   639   /**
   640    * Compare two strings case-insensitively using full case folding.
   641    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
   642    *
   643    * @param srcChars A pointer to another string to compare this one to.
   644    * @param srcLength The number of code units from that string to compare.
   645    * @param options A bit set of options:
   646    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   647    *     Comparison in code unit order with default case folding.
   648    *
   649    *   - U_COMPARE_CODE_POINT_ORDER
   650    *     Set to choose code point order instead of code unit order
   651    *     (see u_strCompare for details).
   652    *
   653    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   654    *
   655    * @return A negative, zero, or positive integer indicating the comparison result.
   656    * @stable ICU 2.0
   657    */
   658   inline int8_t caseCompare(const UChar *srcChars,
   659          int32_t srcLength,
   660          uint32_t options) const;
   661 
   662   /**
   663    * Compare two strings case-insensitively using full case folding.
   664    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
   665    *
   666    * @param start The start offset in this string at which the compare operation begins.
   667    * @param length The number of code units from this string to compare.
   668    * @param srcChars A pointer to another string to compare this one to.
   669    * @param options A bit set of options:
   670    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   671    *     Comparison in code unit order with default case folding.
   672    *
   673    *   - U_COMPARE_CODE_POINT_ORDER
   674    *     Set to choose code point order instead of code unit order
   675    *     (see u_strCompare for details).
   676    *
   677    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   678    *
   679    * @return A negative, zero, or positive integer indicating the comparison result.
   680    * @stable ICU 2.0
   681    */
   682   inline int8_t caseCompare(int32_t start,
   683          int32_t length,
   684          const UChar *srcChars,
   685          uint32_t options) const;
   686 
   687   /**
   688    * Compare two strings case-insensitively using full case folding.
   689    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
   690    *
   691    * @param start The start offset in this string at which the compare operation begins.
   692    * @param length The number of code units from this string to compare.
   693    * @param srcChars A pointer to another string to compare this one to.
   694    * @param srcStart The start offset in that string at which the compare operation begins.
   695    * @param srcLength The number of code units from that string to compare.
   696    * @param options A bit set of options:
   697    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   698    *     Comparison in code unit order with default case folding.
   699    *
   700    *   - U_COMPARE_CODE_POINT_ORDER
   701    *     Set to choose code point order instead of code unit order
   702    *     (see u_strCompare for details).
   703    *
   704    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   705    *
   706    * @return A negative, zero, or positive integer indicating the comparison result.
   707    * @stable ICU 2.0
   708    */
   709   inline int8_t caseCompare(int32_t start,
   710          int32_t length,
   711          const UChar *srcChars,
   712          int32_t srcStart,
   713          int32_t srcLength,
   714          uint32_t options) const;
   715 
   716   /**
   717    * Compare two strings case-insensitively using full case folding.
   718    * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
   719    *
   720    * @param start The start offset in this string at which the compare operation begins.
   721    * @param limit The offset after the last code unit from this string to compare.
   722    * @param srcText Another string to compare this one to.
   723    * @param srcStart The start offset in that string at which the compare operation begins.
   724    * @param srcLimit The offset after the last code unit from that string to compare.
   725    * @param options A bit set of options:
   726    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   727    *     Comparison in code unit order with default case folding.
   728    *
   729    *   - U_COMPARE_CODE_POINT_ORDER
   730    *     Set to choose code point order instead of code unit order
   731    *     (see u_strCompare for details).
   732    *
   733    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   734    *
   735    * @return A negative, zero, or positive integer indicating the comparison result.
   736    * @stable ICU 2.0
   737    */
   738   inline int8_t caseCompareBetween(int32_t start,
   739             int32_t limit,
   740             const UnicodeString& srcText,
   741             int32_t srcStart,
   742             int32_t srcLimit,
   743             uint32_t options) const;
   744 
   745   /**
   746    * Determine if this starts with the characters in <TT>text</TT>
   747    * @param text The text to match.
   748    * @return TRUE if this starts with the characters in <TT>text</TT>,
   749    * FALSE otherwise
   750    * @stable ICU 2.0
   751    */
   752   inline UBool startsWith(const UnicodeString& text) const;
   753 
   754   /**
   755    * Determine if this starts with the characters in <TT>srcText</TT>
   756    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   757    * @param srcText The text to match.
   758    * @param srcStart the offset into <TT>srcText</TT> to start matching
   759    * @param srcLength the number of characters in <TT>srcText</TT> to match
   760    * @return TRUE if this starts with the characters in <TT>text</TT>,
   761    * FALSE otherwise
   762    * @stable ICU 2.0
   763    */
   764   inline UBool startsWith(const UnicodeString& srcText,
   765             int32_t srcStart,
   766             int32_t srcLength) const;
   767 
   768   /**
   769    * Determine if this starts with the characters in <TT>srcChars</TT>
   770    * @param srcChars The characters to match.
   771    * @param srcLength the number of characters in <TT>srcChars</TT>
   772    * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
   773    * FALSE otherwise
   774    * @stable ICU 2.0
   775    */
   776   inline UBool startsWith(const UChar *srcChars,
   777             int32_t srcLength) const;
   778 
   779   /**
   780    * Determine if this ends with the characters in <TT>srcChars</TT>
   781    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   782    * @param srcChars The characters to match.
   783    * @param srcStart the offset into <TT>srcText</TT> to start matching
   784    * @param srcLength the number of characters in <TT>srcChars</TT> to match
   785    * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
   786    * @stable ICU 2.0
   787    */
   788   inline UBool startsWith(const UChar *srcChars,
   789             int32_t srcStart,
   790             int32_t srcLength) const;
   791 
   792   /**
   793    * Determine if this ends with the characters in <TT>text</TT>
   794    * @param text The text to match.
   795    * @return TRUE if this ends with the characters in <TT>text</TT>,
   796    * FALSE otherwise
   797    * @stable ICU 2.0
   798    */
   799   inline UBool endsWith(const UnicodeString& text) const;
   800 
   801   /**
   802    * Determine if this ends with the characters in <TT>srcText</TT>
   803    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   804    * @param srcText The text to match.
   805    * @param srcStart the offset into <TT>srcText</TT> to start matching
   806    * @param srcLength the number of characters in <TT>srcText</TT> to match
   807    * @return TRUE if this ends with the characters in <TT>text</TT>,
   808    * FALSE otherwise
   809    * @stable ICU 2.0
   810    */
   811   inline UBool endsWith(const UnicodeString& srcText,
   812           int32_t srcStart,
   813           int32_t srcLength) const;
   814 
   815   /**
   816    * Determine if this ends with the characters in <TT>srcChars</TT>
   817    * @param srcChars The characters to match.
   818    * @param srcLength the number of characters in <TT>srcChars</TT>
   819    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
   820    * FALSE otherwise
   821    * @stable ICU 2.0
   822    */
   823   inline UBool endsWith(const UChar *srcChars,
   824           int32_t srcLength) const;
   825 
   826   /**
   827    * Determine if this ends with the characters in <TT>srcChars</TT>
   828    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   829    * @param srcChars The characters to match.
   830    * @param srcStart the offset into <TT>srcText</TT> to start matching
   831    * @param srcLength the number of characters in <TT>srcChars</TT> to match
   832    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
   833    * FALSE otherwise
   834    * @stable ICU 2.0
   835    */
   836   inline UBool endsWith(const UChar *srcChars,
   837           int32_t srcStart,
   838           int32_t srcLength) const;
   839 
   840 
   841   /* Searching - bitwise only */
   842 
   843   /**
   844    * Locate in this the first occurrence of the characters in <TT>text</TT>,
   845    * using bitwise comparison.
   846    * @param text The text to search for.
   847    * @return The offset into this of the start of <TT>text</TT>,
   848    * or -1 if not found.
   849    * @stable ICU 2.0
   850    */
   851   inline int32_t indexOf(const UnicodeString& text) const;
   852 
   853   /**
   854    * Locate in this the first occurrence of the characters in <TT>text</TT>
   855    * starting at offset <TT>start</TT>, using bitwise comparison.
   856    * @param text The text to search for.
   857    * @param start The offset at which searching will start.
   858    * @return The offset into this of the start of <TT>text</TT>,
   859    * or -1 if not found.
   860    * @stable ICU 2.0
   861    */
   862   inline int32_t indexOf(const UnicodeString& text,
   863               int32_t start) const;
   864 
   865   /**
   866    * Locate in this the first occurrence in the range
   867    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   868    * in <TT>text</TT>, using bitwise comparison.
   869    * @param text The text to search for.
   870    * @param start The offset at which searching will start.
   871    * @param length The number of characters to search
   872    * @return The offset into this of the start of <TT>text</TT>,
   873    * or -1 if not found.
   874    * @stable ICU 2.0
   875    */
   876   inline int32_t indexOf(const UnicodeString& text,
   877               int32_t start,
   878               int32_t length) const;
   879 
   880   /**
   881    * Locate in this the first occurrence in the range
   882    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   883    *  in <TT>srcText</TT> in the range
   884    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
   885    * using bitwise comparison.
   886    * @param srcText The text to search for.
   887    * @param srcStart the offset into <TT>srcText</TT> at which
   888    * to start matching
   889    * @param srcLength the number of characters in <TT>srcText</TT> to match
   890    * @param start the offset into this at which to start matching
   891    * @param length the number of characters in this to search
   892    * @return The offset into this of the start of <TT>text</TT>,
   893    * or -1 if not found.
   894    * @stable ICU 2.0
   895    */
   896   inline int32_t indexOf(const UnicodeString& srcText,
   897               int32_t srcStart,
   898               int32_t srcLength,
   899               int32_t start,
   900               int32_t length) const;
   901 
   902   /**
   903    * Locate in this the first occurrence of the characters in
   904    * <TT>srcChars</TT>
   905    * starting at offset <TT>start</TT>, using bitwise comparison.
   906    * @param srcChars The text to search for.
   907    * @param srcLength the number of characters in <TT>srcChars</TT> to match
   908    * @param start the offset into this at which to start matching
   909    * @return The offset into this of the start of <TT>text</TT>,
   910    * or -1 if not found.
   911    * @stable ICU 2.0
   912    */
   913   inline int32_t indexOf(const UChar *srcChars,
   914               int32_t srcLength,
   915               int32_t start) const;
   916 
   917   /**
   918    * Locate in this the first occurrence in the range
   919    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   920    * in <TT>srcChars</TT>, using bitwise comparison.
   921    * @param srcChars The text to search for.
   922    * @param srcLength the number of characters in <TT>srcChars</TT>
   923    * @param start The offset at which searching will start.
   924    * @param length The number of characters to search
   925    * @return The offset into this of the start of <TT>srcChars</TT>,
   926    * or -1 if not found.
   927    * @stable ICU 2.0
   928    */
   929   inline int32_t indexOf(const UChar *srcChars,
   930               int32_t srcLength,
   931               int32_t start,
   932               int32_t length) const;
   933 
   934   /**
   935    * Locate in this the first occurrence in the range
   936    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   937    * in <TT>srcChars</TT> in the range
   938    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
   939    * using bitwise comparison.
   940    * @param srcChars The text to search for.
   941    * @param srcStart the offset into <TT>srcChars</TT> at which
   942    * to start matching
   943    * @param srcLength the number of characters in <TT>srcChars</TT> to match
   944    * @param start the offset into this at which to start matching
   945    * @param length the number of characters in this to search
   946    * @return The offset into this of the start of <TT>text</TT>,
   947    * or -1 if not found.
   948    * @stable ICU 2.0
   949    */
   950   int32_t indexOf(const UChar *srcChars,
   951               int32_t srcStart,
   952               int32_t srcLength,
   953               int32_t start,
   954               int32_t length) const;
   955 
   956   /**
   957    * Locate in this the first occurrence of the BMP code point <code>c</code>,
   958    * using bitwise comparison.
   959    * @param c The code unit to search for.
   960    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   961    * @stable ICU 2.0
   962    */
   963   inline int32_t indexOf(UChar c) const;
   964 
   965   /**
   966    * Locate in this the first occurrence of the code point <TT>c</TT>,
   967    * using bitwise comparison.
   968    *
   969    * @param c The code point to search for.
   970    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   971    * @stable ICU 2.0
   972    */
   973   inline int32_t indexOf(UChar32 c) const;
   974 
   975   /**
   976    * Locate in this the first occurrence of the BMP code point <code>c</code>,
   977    * starting at offset <TT>start</TT>, using bitwise comparison.
   978    * @param c The code unit to search for.
   979    * @param start The offset at which searching will start.
   980    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   981    * @stable ICU 2.0
   982    */
   983   inline int32_t indexOf(UChar c,
   984               int32_t start) const;
   985 
   986   /**
   987    * Locate in this the first occurrence of the code point <TT>c</TT>
   988    * starting at offset <TT>start</TT>, using bitwise comparison.
   989    *
   990    * @param c The code point to search for.
   991    * @param start The offset at which searching will start.
   992    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   993    * @stable ICU 2.0
   994    */
   995   inline int32_t indexOf(UChar32 c,
   996               int32_t start) const;
   997 
   998   /**
   999    * Locate in this the first occurrence of the BMP code point <code>c</code>
  1000    * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1001    * using bitwise comparison.
  1002    * @param c The code unit to search for.
  1003    * @param start the offset into this at which to start matching
  1004    * @param length the number of characters in this to search
  1005    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1006    * @stable ICU 2.0
  1007    */
  1008   inline int32_t indexOf(UChar c,
  1009               int32_t start,
  1010               int32_t length) const;
  1011 
  1012   /**
  1013    * Locate in this the first occurrence of the code point <TT>c</TT>
  1014    * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1015    * using bitwise comparison.
  1016    *
  1017    * @param c The code point to search for.
  1018    * @param start the offset into this at which to start matching
  1019    * @param length the number of characters in this to search
  1020    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1021    * @stable ICU 2.0
  1022    */
  1023   inline int32_t indexOf(UChar32 c,
  1024               int32_t start,
  1025               int32_t length) const;
  1026 
  1027   /**
  1028    * Locate in this the last occurrence of the characters in <TT>text</TT>,
  1029    * using bitwise comparison.
  1030    * @param text The text to search for.
  1031    * @return The offset into this of the start of <TT>text</TT>,
  1032    * or -1 if not found.
  1033    * @stable ICU 2.0
  1034    */
  1035   inline int32_t lastIndexOf(const UnicodeString& text) const;
  1036 
  1037   /**
  1038    * Locate in this the last occurrence of the characters in <TT>text</TT>
  1039    * starting at offset <TT>start</TT>, using bitwise comparison.
  1040    * @param text The text to search for.
  1041    * @param start The offset at which searching will start.
  1042    * @return The offset into this of the start of <TT>text</TT>,
  1043    * or -1 if not found.
  1044    * @stable ICU 2.0
  1045    */
  1046   inline int32_t lastIndexOf(const UnicodeString& text,
  1047               int32_t start) const;
  1048 
  1049   /**
  1050    * Locate in this the last occurrence in the range
  1051    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1052    * in <TT>text</TT>, using bitwise comparison.
  1053    * @param text The text to search for.
  1054    * @param start The offset at which searching will start.
  1055    * @param length The number of characters to search
  1056    * @return The offset into this of the start of <TT>text</TT>,
  1057    * or -1 if not found.
  1058    * @stable ICU 2.0
  1059    */
  1060   inline int32_t lastIndexOf(const UnicodeString& text,
  1061               int32_t start,
  1062               int32_t length) const;
  1063 
  1064   /**
  1065    * Locate in this the last occurrence in the range
  1066    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1067    * in <TT>srcText</TT> in the range
  1068    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  1069    * using bitwise comparison.
  1070    * @param srcText The text to search for.
  1071    * @param srcStart the offset into <TT>srcText</TT> at which
  1072    * to start matching
  1073    * @param srcLength the number of characters in <TT>srcText</TT> to match
  1074    * @param start the offset into this at which to start matching
  1075    * @param length the number of characters in this to search
  1076    * @return The offset into this of the start of <TT>text</TT>,
  1077    * or -1 if not found.
  1078    * @stable ICU 2.0
  1079    */
  1080   inline int32_t lastIndexOf(const UnicodeString& srcText,
  1081               int32_t srcStart,
  1082               int32_t srcLength,
  1083               int32_t start,
  1084               int32_t length) const;
  1085 
  1086   /**
  1087    * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
  1088    * starting at offset <TT>start</TT>, using bitwise comparison.
  1089    * @param srcChars The text to search for.
  1090    * @param srcLength the number of characters in <TT>srcChars</TT> to match
  1091    * @param start the offset into this at which to start matching
  1092    * @return The offset into this of the start of <TT>text</TT>,
  1093    * or -1 if not found.
  1094    * @stable ICU 2.0
  1095    */
  1096   inline int32_t lastIndexOf(const UChar *srcChars,
  1097               int32_t srcLength,
  1098               int32_t start) const;
  1099 
  1100   /**
  1101    * Locate in this the last occurrence in the range
  1102    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1103    * in <TT>srcChars</TT>, using bitwise comparison.
  1104    * @param srcChars The text to search for.
  1105    * @param srcLength the number of characters in <TT>srcChars</TT>
  1106    * @param start The offset at which searching will start.
  1107    * @param length The number of characters to search
  1108    * @return The offset into this of the start of <TT>srcChars</TT>,
  1109    * or -1 if not found.
  1110    * @stable ICU 2.0
  1111    */
  1112   inline int32_t lastIndexOf(const UChar *srcChars,
  1113               int32_t srcLength,
  1114               int32_t start,
  1115               int32_t length) const;
  1116 
  1117   /**
  1118    * Locate in this the last occurrence in the range
  1119    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1120    * in <TT>srcChars</TT> in the range
  1121    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  1122    * using bitwise comparison.
  1123    * @param srcChars The text to search for.
  1124    * @param srcStart the offset into <TT>srcChars</TT> at which
  1125    * to start matching
  1126    * @param srcLength the number of characters in <TT>srcChars</TT> to match
  1127    * @param start the offset into this at which to start matching
  1128    * @param length the number of characters in this to search
  1129    * @return The offset into this of the start of <TT>text</TT>,
  1130    * or -1 if not found.
  1131    * @stable ICU 2.0
  1132    */
  1133   int32_t lastIndexOf(const UChar *srcChars,
  1134               int32_t srcStart,
  1135               int32_t srcLength,
  1136               int32_t start,
  1137               int32_t length) const;
  1138 
  1139   /**
  1140    * Locate in this the last occurrence of the BMP code point <code>c</code>,
  1141    * using bitwise comparison.
  1142    * @param c The code unit to search for.
  1143    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1144    * @stable ICU 2.0
  1145    */
  1146   inline int32_t lastIndexOf(UChar c) const;
  1147 
  1148   /**
  1149    * Locate in this the last occurrence of the code point <TT>c</TT>,
  1150    * using bitwise comparison.
  1151    *
  1152    * @param c The code point to search for.
  1153    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1154    * @stable ICU 2.0
  1155    */
  1156   inline int32_t lastIndexOf(UChar32 c) const;
  1157 
  1158   /**
  1159    * Locate in this the last occurrence of the BMP code point <code>c</code>
  1160    * starting at offset <TT>start</TT>, using bitwise comparison.
  1161    * @param c The code unit to search for.
  1162    * @param start The offset at which searching will start.
  1163    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1164    * @stable ICU 2.0
  1165    */
  1166   inline int32_t lastIndexOf(UChar c,
  1167               int32_t start) const;
  1168 
  1169   /**
  1170    * Locate in this the last occurrence of the code point <TT>c</TT>
  1171    * starting at offset <TT>start</TT>, using bitwise comparison.
  1172    *
  1173    * @param c The code point to search for.
  1174    * @param start The offset at which searching will start.
  1175    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1176    * @stable ICU 2.0
  1177    */
  1178   inline int32_t lastIndexOf(UChar32 c,
  1179               int32_t start) const;
  1180 
  1181   /**
  1182    * Locate in this the last occurrence of the BMP code point <code>c</code>
  1183    * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1184    * using bitwise comparison.
  1185    * @param c The code unit to search for.
  1186    * @param start the offset into this at which to start matching
  1187    * @param length the number of characters in this to search
  1188    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1189    * @stable ICU 2.0
  1190    */
  1191   inline int32_t lastIndexOf(UChar c,
  1192               int32_t start,
  1193               int32_t length) const;
  1194 
  1195   /**
  1196    * Locate in this the last occurrence of the code point <TT>c</TT>
  1197    * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1198    * using bitwise comparison.
  1199    *
  1200    * @param c The code point to search for.
  1201    * @param start the offset into this at which to start matching
  1202    * @param length the number of characters in this to search
  1203    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1204    * @stable ICU 2.0
  1205    */
  1206   inline int32_t lastIndexOf(UChar32 c,
  1207               int32_t start,
  1208               int32_t length) const;
  1209 
  1210 
  1211   /* Character access */
  1212 
  1213   /**
  1214    * Return the code unit at offset <tt>offset</tt>.
  1215    * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1216    * @param offset a valid offset into the text
  1217    * @return the code unit at offset <tt>offset</tt>
  1218    *         or 0xffff if the offset is not valid for this string
  1219    * @stable ICU 2.0
  1220    */
  1221   inline UChar charAt(int32_t offset) const;
  1222 
  1223   /**
  1224    * Return the code unit at offset <tt>offset</tt>.
  1225    * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1226    * @param offset a valid offset into the text
  1227    * @return the code unit at offset <tt>offset</tt>
  1228    * @stable ICU 2.0
  1229    */
  1230   inline UChar operator[] (int32_t offset) const;
  1231 
  1232   /**
  1233    * Return the code point that contains the code unit
  1234    * at offset <tt>offset</tt>.
  1235    * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1236    * @param offset a valid offset into the text
  1237    * that indicates the text offset of any of the code units
  1238    * that will be assembled into a code point (21-bit value) and returned
  1239    * @return the code point of text at <tt>offset</tt>
  1240    *         or 0xffff if the offset is not valid for this string
  1241    * @stable ICU 2.0
  1242    */
  1243   inline UChar32 char32At(int32_t offset) const;
  1244 
  1245   /**
  1246    * Adjust a random-access offset so that
  1247    * it points to the beginning of a Unicode character.
  1248    * The offset that is passed in points to
  1249    * any code unit of a code point,
  1250    * while the returned offset will point to the first code unit
  1251    * of the same code point.
  1252    * In UTF-16, if the input offset points to a second surrogate
  1253    * of a surrogate pair, then the returned offset will point
  1254    * to the first surrogate.
  1255    * @param offset a valid offset into one code point of the text
  1256    * @return offset of the first code unit of the same code point
  1257    * @see U16_SET_CP_START
  1258    * @stable ICU 2.0
  1259    */
  1260   inline int32_t getChar32Start(int32_t offset) const;
  1261 
  1262   /**
  1263    * Adjust a random-access offset so that
  1264    * it points behind a Unicode character.
  1265    * The offset that is passed in points behind
  1266    * any code unit of a code point,
  1267    * while the returned offset will point behind the last code unit
  1268    * of the same code point.
  1269    * In UTF-16, if the input offset points behind the first surrogate
  1270    * (i.e., to the second surrogate)
  1271    * of a surrogate pair, then the returned offset will point
  1272    * behind the second surrogate (i.e., to the first surrogate).
  1273    * @param offset a valid offset after any code unit of a code point of the text
  1274    * @return offset of the first code unit after the same code point
  1275    * @see U16_SET_CP_LIMIT
  1276    * @stable ICU 2.0
  1277    */
  1278   inline int32_t getChar32Limit(int32_t offset) const;
  1279 
  1280   /**
  1281    * Move the code unit index along the string by delta code points.
  1282    * Interpret the input index as a code unit-based offset into the string,
  1283    * move the index forward or backward by delta code points, and
  1284    * return the resulting index.
  1285    * The input index should point to the first code unit of a code point,
  1286    * if there is more than one.
  1287    *
  1288    * Both input and output indexes are code unit-based as for all
  1289    * string indexes/offsets in ICU (and other libraries, like MBCS char*).
  1290    * If delta<0 then the index is moved backward (toward the start of the string).
  1291    * If delta>0 then the index is moved forward (toward the end of the string).
  1292    *
  1293    * This behaves like CharacterIterator::move32(delta, kCurrent).
  1294    *
  1295    * Behavior for out-of-bounds indexes:
  1296    * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
  1297    * if the input index<0 then it is pinned to 0;
  1298    * if it is index>length() then it is pinned to length().
  1299    * Afterwards, the index is moved by <code>delta</code> code points
  1300    * forward or backward,
  1301    * but no further backward than to 0 and no further forward than to length().
  1302    * The resulting index return value will be in between 0 and length(), inclusively.
  1303    *
  1304    * Examples:
  1305    * <pre>
  1306    * // s has code points 'a' U+10000 'b' U+10ffff U+2029
  1307    * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
  1308    *
  1309    * // initial index: position of U+10000
  1310    * int32_t index=1;
  1311    *
  1312    * // the following examples will all result in index==4, position of U+10ffff
  1313    *
  1314    * // skip 2 code points from some position in the string
  1315    * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
  1316    *
  1317    * // go to the 3rd code point from the start of s (0-based)
  1318    * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
  1319    *
  1320    * // go to the next-to-last code point of s
  1321    * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
  1322    * </pre>
  1323    *
  1324    * @param index input code unit index
  1325    * @param delta (signed) code point count to move the index forward or backward
  1326    *        in the string
  1327    * @return the resulting code unit index
  1328    * @stable ICU 2.0
  1329    */
  1330   int32_t moveIndex32(int32_t index, int32_t delta) const;
  1331 
  1332   /* Substring extraction */
  1333 
  1334   /**
  1335    * Copy the characters in the range
  1336    * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
  1337    * beginning at <tt>dstStart</tt>.
  1338    * If the string aliases to <code>dst</code> itself as an external buffer,
  1339    * then extract() will not copy the contents.
  1340    *
  1341    * @param start offset of first character which will be copied into the array
  1342    * @param length the number of characters to extract
  1343    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
  1344    * must be at least (<tt>dstStart + length</tt>).
  1345    * @param dstStart the offset in <TT>dst</TT> where the first character
  1346    * will be extracted
  1347    * @stable ICU 2.0
  1348    */
  1349   inline void extract(int32_t start,
  1350            int32_t length,
  1351            UChar *dst,
  1352            int32_t dstStart = 0) const;
  1353 
  1354   /**
  1355    * Copy the contents of the string into dest.
  1356    * This is a convenience function that
  1357    * checks if there is enough space in dest,
  1358    * extracts the entire string if possible,
  1359    * and NUL-terminates dest if possible.
  1360    *
  1361    * If the string fits into dest but cannot be NUL-terminated
  1362    * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
  1363    * If the string itself does not fit into dest
  1364    * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
  1365    *
  1366    * If the string aliases to <code>dest</code> itself as an external buffer,
  1367    * then extract() will not copy the contents.
  1368    *
  1369    * @param dest Destination string buffer.
  1370    * @param destCapacity Number of UChars available at dest.
  1371    * @param errorCode ICU error code.
  1372    * @return length()
  1373    * @stable ICU 2.0
  1374    */
  1375   int32_t
  1376   extract(UChar *dest, int32_t destCapacity,
  1377           UErrorCode &errorCode) const;
  1378 
  1379   /**
  1380    * Copy the characters in the range
  1381    * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
  1382    * <tt>target</tt>.
  1383    * @param start offset of first character which will be copied
  1384    * @param length the number of characters to extract
  1385    * @param target UnicodeString into which to copy characters.
  1386    * @return A reference to <TT>target</TT>
  1387    * @stable ICU 2.0
  1388    */
  1389   inline void extract(int32_t start,
  1390            int32_t length,
  1391            UnicodeString& target) const;
  1392 
  1393   /**
  1394    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
  1395    * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
  1396    * @param start offset of first character which will be copied into the array
  1397    * @param limit offset immediately following the last character to be copied
  1398    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
  1399    * must be at least (<tt>dstStart + (limit - start)</tt>).
  1400    * @param dstStart the offset in <TT>dst</TT> where the first character
  1401    * will be extracted
  1402    * @stable ICU 2.0
  1403    */
  1404   inline void extractBetween(int32_t start,
  1405               int32_t limit,
  1406               UChar *dst,
  1407               int32_t dstStart = 0) const;
  1408 
  1409   /**
  1410    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
  1411    * into the UnicodeString <tt>target</tt>.  Replaceable API.
  1412    * @param start offset of first character which will be copied
  1413    * @param limit offset immediately following the last character to be copied
  1414    * @param target UnicodeString into which to copy characters.
  1415    * @return A reference to <TT>target</TT>
  1416    * @stable ICU 2.0
  1417    */
  1418   virtual void extractBetween(int32_t start,
  1419               int32_t limit,
  1420               UnicodeString& target) const;
  1421 
  1422   /**
  1423    * Copy the characters in the range 
  1424    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
  1425    * All characters must be invariant (see utypes.h).
  1426    * Use US_INV as the last, signature-distinguishing parameter.
  1427    *
  1428    * This function does not write any more than <code>targetLength</code>
  1429    * characters but returns the length of the entire output string
  1430    * so that one can allocate a larger buffer and call the function again
  1431    * if necessary.
  1432    * The output string is NUL-terminated if possible.
  1433    *
  1434    * @param start offset of first character which will be copied
  1435    * @param startLength the number of characters to extract
  1436    * @param target the target buffer for extraction, can be NULL
  1437    *               if targetLength is 0
  1438    * @param targetCapacity the length of the target buffer
  1439    * @param inv Signature-distinguishing paramater, use US_INV.
  1440    * @return the output string length, not including the terminating NUL
  1441    * @draft ICU 3.2
  1442    */
  1443   int32_t extract(int32_t start,
  1444            int32_t startLength,
  1445            char *target,
  1446            int32_t targetCapacity,
  1447            enum EInvariant inv) const;
  1448 
  1449 #if !UCONFIG_NO_CONVERSION
  1450 
  1451   /**
  1452    * Copy the characters in the range
  1453    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
  1454    * in a specified codepage.
  1455    * The output string is NUL-terminated.
  1456    *
  1457    * Recommendation: For invariant-character strings use
  1458    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
  1459    * because it avoids object code dependencies of UnicodeString on
  1460    * the conversion code.
  1461    *
  1462    * @param start offset of first character which will be copied
  1463    * @param startLength the number of characters to extract
  1464    * @param target the target buffer for extraction
  1465    * @param codepage the desired codepage for the characters.  0 has
  1466    * the special meaning of the default codepage
  1467    * If <code>codepage</code> is an empty string (<code>""</code>),
  1468    * then a simple conversion is performed on the codepage-invariant
  1469    * subset ("invariant characters") of the platform encoding. See utypes.h.
  1470    * If <TT>target</TT> is NULL, then the number of bytes required for
  1471    * <TT>target</TT> is returned. It is assumed that the target is big enough
  1472    * to fit all of the characters.
  1473    * @return the output string length, not including the terminating NUL
  1474    * @stable ICU 2.0
  1475    */
  1476   inline int32_t extract(int32_t start,
  1477                  int32_t startLength,
  1478                  char *target,
  1479                  const char *codepage = 0) const;
  1480 
  1481   /**
  1482    * Copy the characters in the range
  1483    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
  1484    * in a specified codepage.
  1485    * This function does not write any more than <code>targetLength</code>
  1486    * characters but returns the length of the entire output string
  1487    * so that one can allocate a larger buffer and call the function again
  1488    * if necessary.
  1489    * The output string is NUL-terminated if possible.
  1490    *
  1491    * Recommendation: For invariant-character strings use
  1492    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
  1493    * because it avoids object code dependencies of UnicodeString on
  1494    * the conversion code.
  1495    *
  1496    * @param start offset of first character which will be copied
  1497    * @param startLength the number of characters to extract
  1498    * @param target the target buffer for extraction
  1499    * @param targetLength the length of the target buffer
  1500    * @param codepage the desired codepage for the characters.  0 has
  1501    * the special meaning of the default codepage
  1502    * If <code>codepage</code> is an empty string (<code>""</code>),
  1503    * then a simple conversion is performed on the codepage-invariant
  1504    * subset ("invariant characters") of the platform encoding. See utypes.h.
  1505    * If <TT>target</TT> is NULL, then the number of bytes required for
  1506    * <TT>target</TT> is returned.
  1507    * @return the output string length, not including the terminating NUL
  1508    * @stable ICU 2.0
  1509    */
  1510   int32_t extract(int32_t start,
  1511            int32_t startLength,
  1512            char *target,
  1513            uint32_t targetLength,
  1514            const char *codepage = 0) const;
  1515 
  1516   /**
  1517    * Convert the UnicodeString into a codepage string using an existing UConverter.
  1518    * The output string is NUL-terminated if possible.
  1519    *
  1520    * This function avoids the overhead of opening and closing a converter if
  1521    * multiple strings are extracted.
  1522    *
  1523    * @param dest destination string buffer, can be NULL if destCapacity==0
  1524    * @param destCapacity the number of chars available at dest
  1525    * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
  1526    *        or NULL for the default converter
  1527    * @param errorCode normal ICU error code
  1528    * @return the length of the output string, not counting the terminating NUL;
  1529    *         if the length is greater than destCapacity, then the string will not fit
  1530    *         and a buffer of the indicated length would need to be passed in
  1531    * @stable ICU 2.0
  1532    */
  1533   int32_t extract(char *dest, int32_t destCapacity,
  1534                   UConverter *cnv,
  1535                   UErrorCode &errorCode) const;
  1536 
  1537 #endif
  1538 
  1539   /* Length operations */
  1540 
  1541   /**
  1542    * Return the length of the UnicodeString object.
  1543    * The length is the number of UChar code units are in the UnicodeString.
  1544    * If you want the number of code points, please use countChar32().
  1545    * @return the length of the UnicodeString object
  1546    * @see countChar32
  1547    * @stable ICU 2.0
  1548    */
  1549   inline int32_t length(void) const;
  1550 
  1551   /**
  1552    * Count Unicode code points in the length UChar code units of the string.
  1553    * A code point may occupy either one or two UChar code units.
  1554    * Counting code points involves reading all code units.
  1555    *
  1556    * This functions is basically the inverse of moveIndex32().
  1557    *
  1558    * @param start the index of the first code unit to check
  1559    * @param length the number of UChar code units to check
  1560    * @return the number of code points in the specified code units
  1561    * @see length
  1562    * @stable ICU 2.0
  1563    */
  1564   int32_t
  1565   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
  1566 
  1567   /**
  1568    * Check if the length UChar code units of the string
  1569    * contain more Unicode code points than a certain number.
  1570    * This is more efficient than counting all code points in this part of the string
  1571    * and comparing that number with a threshold.
  1572    * This function may not need to scan the string at all if the length
  1573    * falls within a certain range, and
  1574    * never needs to count more than 'number+1' code points.
  1575    * Logically equivalent to (countChar32(start, length)>number).
  1576    * A Unicode code point may occupy either one or two UChar code units.
  1577    *
  1578    * @param start the index of the first code unit to check (0 for the entire string)
  1579    * @param length the number of UChar code units to check
  1580    *               (use INT32_MAX for the entire string; remember that start/length
  1581    *                values are pinned)
  1582    * @param number The number of code points in the (sub)string is compared against
  1583    *               the 'number' parameter.
  1584    * @return Boolean value for whether the string contains more Unicode code points
  1585    *         than 'number'. Same as (u_countChar32(s, length)>number).
  1586    * @see countChar32
  1587    * @see u_strHasMoreChar32Than
  1588    * @stable ICU 2.4
  1589    */
  1590   UBool
  1591   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
  1592 
  1593   /**
  1594    * Determine if this string is empty.
  1595    * @return TRUE if this string contains 0 characters, FALSE otherwise.
  1596    * @stable ICU 2.0
  1597    */
  1598   inline UBool isEmpty(void) const;
  1599 
  1600   /**
  1601    * Return the capacity of the internal buffer of the UnicodeString object.
  1602    * This is useful together with the getBuffer functions.
  1603    * See there for details.
  1604    *
  1605    * @return the number of UChars available in the internal buffer
  1606    * @see getBuffer
  1607    * @stable ICU 2.0
  1608    */
  1609   inline int32_t getCapacity(void) const;
  1610 
  1611   /* Other operations */
  1612 
  1613   /**
  1614    * Generate a hash code for this object.
  1615    * @return The hash code of this UnicodeString.
  1616    * @stable ICU 2.0
  1617    */
  1618   inline int32_t hashCode(void) const;
  1619 
  1620   /**
  1621    * Determine if this object contains a valid string.
  1622    * A bogus string has no value. It is different from an empty string.
  1623    * It can be used to indicate that no string value is available.
  1624    * getBuffer() and getTerminatedBuffer() return NULL, and
  1625    * length() returns 0.
  1626    *
  1627    * @return TRUE if the string is valid, FALSE otherwise
  1628    * @see setToBogus()
  1629    * @stable ICU 2.0
  1630    */
  1631   inline UBool isBogus(void) const;
  1632 
  1633 
  1634   //========================================
  1635   // Write operations
  1636   //========================================
  1637 
  1638   /* Assignment operations */
  1639 
  1640   /**
  1641    * Assignment operator.  Replace the characters in this UnicodeString
  1642    * with the characters from <TT>srcText</TT>.
  1643    * @param srcText The text containing the characters to replace
  1644    * @return a reference to this
  1645    * @stable ICU 2.0
  1646    */
  1647   UnicodeString &operator=(const UnicodeString &srcText);
  1648 
  1649   /**
  1650    * Almost the same as the assignment operator.
  1651    * Replace the characters in this UnicodeString
  1652    * with the characters from <code>srcText</code>.
  1653    *
  1654    * This function works the same for all strings except for ones that
  1655    * are readonly aliases.
  1656    * Starting with ICU 2.4, the assignment operator and the copy constructor
  1657    * allocate a new buffer and copy the buffer contents even for readonly aliases.
  1658    * This function implements the old, more efficient but less safe behavior
  1659    * of making this string also a readonly alias to the same buffer.
  1660    * The fastCopyFrom function must be used only if it is known that the lifetime of
  1661    * this UnicodeString is at least as long as the lifetime of the aliased buffer
  1662    * including its contents, for example for strings from resource bundles
  1663    * or aliases to string contents.
  1664    *
  1665    * @param src The text containing the characters to replace.
  1666    * @return a reference to this
  1667    * @stable ICU 2.4
  1668    */
  1669   UnicodeString &fastCopyFrom(const UnicodeString &src);
  1670 
  1671   /**
  1672    * Assignment operator.  Replace the characters in this UnicodeString
  1673    * with the code unit <TT>ch</TT>.
  1674    * @param ch the code unit to replace
  1675    * @return a reference to this
  1676    * @stable ICU 2.0
  1677    */
  1678   inline UnicodeString& operator= (UChar ch);
  1679 
  1680   /**
  1681    * Assignment operator.  Replace the characters in this UnicodeString
  1682    * with the code point <TT>ch</TT>.
  1683    * @param ch the code point to replace
  1684    * @return a reference to this
  1685    * @stable ICU 2.0
  1686    */
  1687   inline UnicodeString& operator= (UChar32 ch);
  1688 
  1689   /**
  1690    * Set the text in the UnicodeString object to the characters
  1691    * in <TT>srcText</TT> in the range
  1692    * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
  1693    * <TT>srcText</TT> is not modified.
  1694    * @param srcText the source for the new characters
  1695    * @param srcStart the offset into <TT>srcText</TT> where new characters
  1696    * will be obtained
  1697    * @return a reference to this
  1698    * @stable ICU 2.2
  1699    */
  1700   inline UnicodeString& setTo(const UnicodeString& srcText,
  1701                int32_t srcStart);
  1702 
  1703   /**
  1704    * Set the text in the UnicodeString object to the characters
  1705    * in <TT>srcText</TT> in the range
  1706    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  1707    * <TT>srcText</TT> is not modified.
  1708    * @param srcText the source for the new characters
  1709    * @param srcStart the offset into <TT>srcText</TT> where new characters
  1710    * will be obtained
  1711    * @param srcLength the number of characters in <TT>srcText</TT> in the
  1712    * replace string.
  1713    * @return a reference to this
  1714    * @stable ICU 2.0
  1715    */
  1716   inline UnicodeString& setTo(const UnicodeString& srcText,
  1717                int32_t srcStart,
  1718                int32_t srcLength);
  1719 
  1720   /**
  1721    * Set the text in the UnicodeString object to the characters in
  1722    * <TT>srcText</TT>.
  1723    * <TT>srcText</TT> is not modified.
  1724    * @param srcText the source for the new characters
  1725    * @return a reference to this
  1726    * @stable ICU 2.0
  1727    */
  1728   inline UnicodeString& setTo(const UnicodeString& srcText);
  1729 
  1730   /**
  1731    * Set the characters in the UnicodeString object to the characters
  1732    * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
  1733    * @param srcChars the source for the new characters
  1734    * @param srcLength the number of Unicode characters in srcChars.
  1735    * @return a reference to this
  1736    * @stable ICU 2.0
  1737    */
  1738   inline UnicodeString& setTo(const UChar *srcChars,
  1739                int32_t srcLength);
  1740 
  1741   /**
  1742    * Set the characters in the UnicodeString object to the code unit
  1743    * <TT>srcChar</TT>.
  1744    * @param srcChar the code unit which becomes the UnicodeString's character
  1745    * content
  1746    * @return a reference to this
  1747    * @stable ICU 2.0
  1748    */
  1749   UnicodeString& setTo(UChar srcChar);
  1750 
  1751   /**
  1752    * Set the characters in the UnicodeString object to the code point
  1753    * <TT>srcChar</TT>.
  1754    * @param srcChar the code point which becomes the UnicodeString's character
  1755    * content
  1756    * @return a reference to this
  1757    * @stable ICU 2.0
  1758    */
  1759   UnicodeString& setTo(UChar32 srcChar);
  1760 
  1761   /**
  1762    * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
  1763    * The text will be used for the UnicodeString object, but
  1764    * it will not be released when the UnicodeString is destroyed.
  1765    * This has copy-on-write semantics:
  1766    * When the string is modified, then the buffer is first copied into
  1767    * newly allocated memory.
  1768    * The aliased buffer is never modified.
  1769    * In an assignment to another UnicodeString, the text will be aliased again,
  1770    * so that both strings then alias the same readonly-text.
  1771    *
  1772    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
  1773    *                     This must be true if <code>textLength==-1</code>.
  1774    * @param text The characters to alias for the UnicodeString.
  1775    * @param textLength The number of Unicode characters in <code>text</code> to alias.
  1776    *                   If -1, then this constructor will determine the length
  1777    *                   by calling <code>u_strlen()</code>.
  1778    * @return a reference to this
  1779    * @stable ICU 2.0
  1780    */
  1781   UnicodeString &setTo(UBool isTerminated,
  1782                        const UChar *text,
  1783                        int32_t textLength);
  1784 
  1785   /**
  1786    * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
  1787    * The text will be used for the UnicodeString object, but
  1788    * it will not be released when the UnicodeString is destroyed.
  1789    * This has write-through semantics:
  1790    * For as long as the capacity of the buffer is sufficient, write operations
  1791    * will directly affect the buffer. When more capacity is necessary, then
  1792    * a new buffer will be allocated and the contents copied as with regularly
  1793    * constructed strings.
  1794    * In an assignment to another UnicodeString, the buffer will be copied.
  1795    * The extract(UChar *dst) function detects whether the dst pointer is the same
  1796    * as the string buffer itself and will in this case not copy the contents.
  1797    *
  1798    * @param buffer The characters to alias for the UnicodeString.
  1799    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
  1800    * @param buffCapacity The size of <code>buffer</code> in UChars.
  1801    * @return a reference to this
  1802    * @stable ICU 2.0
  1803    */
  1804   UnicodeString &setTo(UChar *buffer,
  1805                        int32_t buffLength,
  1806                        int32_t buffCapacity);
  1807 
  1808   /**
  1809    * Make this UnicodeString object invalid.
  1810    * The string will test TRUE with isBogus().
  1811    *
  1812    * A bogus string has no value. It is different from an empty string.
  1813    * It can be used to indicate that no string value is available.
  1814    * getBuffer() and getTerminatedBuffer() return NULL, and
  1815    * length() returns 0.
  1816    *
  1817    * This utility function is used throughout the UnicodeString
  1818    * implementation to indicate that a UnicodeString operation failed,
  1819    * and may be used in other functions,
  1820    * especially but not exclusively when such functions do not
  1821    * take a UErrorCode for simplicity.
  1822    *
  1823    * The following methods, and no others, will clear a string object's bogus flag:
  1824    * - remove()
  1825    * - remove(0, INT32_MAX)
  1826    * - truncate(0)
  1827    * - operator=() (assignment operator)
  1828    * - setTo(...)
  1829    *
  1830    * The simplest ways to turn a bogus string into an empty one
  1831    * is to use the remove() function.
  1832    * Examples for other functions that are equivalent to "set to empty string":
  1833    * \code
  1834    * if(s.isBogus()) {
  1835    *   s.remove();           // set to an empty string (remove all), or
  1836    *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
  1837    *   s.truncate(0);        // set to an empty string (complete truncation), or
  1838    *   s=UnicodeString();    // assign an empty string, or
  1839    *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
  1840    *   static const UChar nul=0;
  1841    *   s.setTo(&nul, 0);     // set to an empty C Unicode string
  1842    * }
  1843    * \endcode
  1844    *
  1845    * @see isBogus()
  1846    * @stable ICU 2.0
  1847    */
  1848   void setToBogus();
  1849 
  1850   /**
  1851    * Set the character at the specified offset to the specified character.
  1852    * @param offset A valid offset into the text of the character to set
  1853    * @param ch The new character
  1854    * @return A reference to this
  1855    * @stable ICU 2.0
  1856    */
  1857   UnicodeString& setCharAt(int32_t offset,
  1858                UChar ch);
  1859 
  1860 
  1861   /* Append operations */
  1862 
  1863   /**
  1864    * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
  1865    * object.
  1866    * @param ch the code unit to be appended
  1867    * @return a reference to this
  1868    * @stable ICU 2.0
  1869    */
  1870  inline  UnicodeString& operator+= (UChar ch);
  1871 
  1872   /**
  1873    * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
  1874    * object.
  1875    * @param ch the code point to be appended
  1876    * @return a reference to this
  1877    * @stable ICU 2.0
  1878    */
  1879  inline  UnicodeString& operator+= (UChar32 ch);
  1880 
  1881   /**
  1882    * Append operator. Append the characters in <TT>srcText</TT> to the
  1883    * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> is
  1884    * not modified.
  1885    * @param srcText the source for the new characters
  1886    * @return a reference to this
  1887    * @stable ICU 2.0
  1888    */
  1889   inline UnicodeString& operator+= (const UnicodeString& srcText);
  1890 
  1891   /**
  1892    * Append the characters
  1893    * in <TT>srcText</TT> in the range
  1894    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
  1895    * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
  1896    * is not modified.
  1897    * @param srcText the source for the new characters
  1898    * @param srcStart the offset into <TT>srcText</TT> where new characters
  1899    * will be obtained
  1900    * @param srcLength the number of characters in <TT>srcText</TT> in
  1901    * the append string
  1902    * @return a reference to this
  1903    * @stable ICU 2.0
  1904    */
  1905   inline UnicodeString& append(const UnicodeString& srcText,
  1906             int32_t srcStart,
  1907             int32_t srcLength);
  1908 
  1909   /**
  1910    * Append the characters in <TT>srcText</TT> to the UnicodeString object at
  1911    * offset <TT>start</TT>. <TT>srcText</TT> is not modified.
  1912    * @param srcText the source for the new characters
  1913    * @return a reference to this
  1914    * @stable ICU 2.0
  1915    */
  1916   inline UnicodeString& append(const UnicodeString& srcText);
  1917 
  1918   /**
  1919    * Append the characters in <TT>srcChars</TT> in the range
  1920    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
  1921    * object at offset
  1922    * <TT>start</TT>. <TT>srcChars</TT> is not modified.
  1923    * @param srcChars the source for the new characters
  1924    * @param srcStart the offset into <TT>srcChars</TT> where new characters
  1925    * will be obtained
  1926    * @param srcLength the number of characters in <TT>srcChars</TT> in
  1927    * the append string
  1928    * @return a reference to this
  1929    * @stable ICU 2.0
  1930    */
  1931   inline UnicodeString& append(const UChar *srcChars,
  1932             int32_t srcStart,
  1933             int32_t srcLength);
  1934 
  1935   /**
  1936    * Append the characters in <TT>srcChars</TT> to the UnicodeString object
  1937    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
  1938    * @param srcChars the source for the new characters
  1939    * @param srcLength the number of Unicode characters in <TT>srcChars</TT>
  1940    * @return a reference to this
  1941    * @stable ICU 2.0
  1942    */
  1943   inline UnicodeString& append(const UChar *srcChars,
  1944             int32_t srcLength);
  1945 
  1946   /**
  1947    * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
  1948    * @param srcChar the code unit to append
  1949    * @return a reference to this
  1950    * @stable ICU 2.0
  1951    */
  1952   inline UnicodeString& append(UChar srcChar);
  1953 
  1954   /**
  1955    * Append the code point <TT>srcChar</TT> to the UnicodeString object.
  1956    * @param srcChar the code point to append
  1957    * @return a reference to this
  1958    * @stable ICU 2.0
  1959    */
  1960   inline UnicodeString& append(UChar32 srcChar);
  1961 
  1962 
  1963   /* Insert operations */
  1964 
  1965   /**
  1966    * Insert the characters in <TT>srcText</TT> in the range
  1967    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
  1968    * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
  1969    * @param start the offset where the insertion begins
  1970    * @param srcText the source for the new characters
  1971    * @param srcStart the offset into <TT>srcText</TT> where new characters
  1972    * will be obtained
  1973    * @param srcLength the number of characters in <TT>srcText</TT> in
  1974    * the insert string
  1975    * @return a reference to this
  1976    * @stable ICU 2.0
  1977    */
  1978   inline UnicodeString& insert(int32_t start,
  1979             const UnicodeString& srcText,
  1980             int32_t srcStart,
  1981             int32_t srcLength);
  1982 
  1983   /**
  1984    * Insert the characters in <TT>srcText</TT> into the UnicodeString object
  1985    * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
  1986    * @param start the offset where the insertion begins
  1987    * @param srcText the source for the new characters
  1988    * @return a reference to this
  1989    * @stable ICU 2.0
  1990    */
  1991   inline UnicodeString& insert(int32_t start,
  1992             const UnicodeString& srcText);
  1993 
  1994   /**
  1995    * Insert the characters in <TT>srcChars</TT> in the range
  1996    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
  1997    *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
  1998    * @param start the offset at which the insertion begins
  1999    * @param srcChars the source for the new characters
  2000    * @param srcStart the offset into <TT>srcChars</TT> where new characters
  2001    * will be obtained
  2002    * @param srcLength the number of characters in <TT>srcChars</TT>
  2003    * in the insert string
  2004    * @return a reference to this
  2005    * @stable ICU 2.0
  2006    */
  2007   inline UnicodeString& insert(int32_t start,
  2008             const UChar *srcChars,
  2009             int32_t srcStart,
  2010             int32_t srcLength);
  2011 
  2012   /**
  2013    * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
  2014    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
  2015    * @param start the offset where the insertion begins
  2016    * @param srcChars the source for the new characters
  2017    * @param srcLength the number of Unicode characters in srcChars.
  2018    * @return a reference to this
  2019    * @stable ICU 2.0
  2020    */
  2021   inline UnicodeString& insert(int32_t start,
  2022             const UChar *srcChars,
  2023             int32_t srcLength);
  2024 
  2025   /**
  2026    * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
  2027    * offset <TT>start</TT>.
  2028    * @param start the offset at which the insertion occurs
  2029    * @param srcChar the code unit to insert
  2030    * @return a reference to this
  2031    * @stable ICU 2.0
  2032    */
  2033   inline UnicodeString& insert(int32_t start,
  2034             UChar srcChar);
  2035 
  2036   /**
  2037    * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
  2038    * offset <TT>start</TT>.
  2039    * @param start the offset at which the insertion occurs
  2040    * @param srcChar the code point to insert
  2041    * @return a reference to this
  2042    * @stable ICU 2.0
  2043    */
  2044   inline UnicodeString& insert(int32_t start,
  2045             UChar32 srcChar);
  2046 
  2047 
  2048   /* Replace operations */
  2049 
  2050   /**
  2051    * Replace the characters in the range
  2052    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
  2053    * <TT>srcText</TT> in the range
  2054    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  2055    * <TT>srcText</TT> is not modified.
  2056    * @param start the offset at which the replace operation begins
  2057    * @param length the number of characters to replace. The character at
  2058    * <TT>start + length</TT> is not modified.
  2059    * @param srcText the source for the new characters
  2060    * @param srcStart the offset into <TT>srcText</TT> where new characters
  2061    * will be obtained
  2062    * @param srcLength the number of characters in <TT>srcText</TT> in
  2063    * the replace string
  2064    * @return a reference to this
  2065    * @stable ICU 2.0
  2066    */
  2067   UnicodeString& replace(int32_t start,
  2068              int32_t length,
  2069              const UnicodeString& srcText,
  2070              int32_t srcStart,
  2071              int32_t srcLength);
  2072 
  2073   /**
  2074    * Replace the characters in the range
  2075    * [<TT>start</TT>, <TT>start + length</TT>)
  2076    * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
  2077    *  not modified.
  2078    * @param start the offset at which the replace operation begins
  2079    * @param length the number of characters to replace. The character at
  2080    * <TT>start + length</TT> is not modified.
  2081    * @param srcText the source for the new characters
  2082    * @return a reference to this
  2083    * @stable ICU 2.0
  2084    */
  2085   UnicodeString& replace(int32_t start,
  2086              int32_t length,
  2087              const UnicodeString& srcText);
  2088 
  2089   /**
  2090    * Replace the characters in the range
  2091    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
  2092    * <TT>srcChars</TT> in the range
  2093    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
  2094    * is not modified.
  2095    * @param start the offset at which the replace operation begins
  2096    * @param length the number of characters to replace.  The character at
  2097    * <TT>start + length</TT> is not modified.
  2098    * @param srcChars the source for the new characters
  2099    * @param srcStart the offset into <TT>srcChars</TT> where new characters
  2100    * will be obtained
  2101    * @param srcLength the number of characters in <TT>srcChars</TT>
  2102    * in the replace string
  2103    * @return a reference to this
  2104    * @stable ICU 2.0
  2105    */
  2106   UnicodeString& replace(int32_t start,
  2107              int32_t length,
  2108              const UChar *srcChars,
  2109              int32_t srcStart,
  2110              int32_t srcLength);
  2111 
  2112   /**
  2113    * Replace the characters in the range
  2114    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
  2115    * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
  2116    * @param start the offset at which the replace operation begins
  2117    * @param length number of characters to replace.  The character at
  2118    * <TT>start + length</TT> is not modified.
  2119    * @param srcChars the source for the new characters
  2120    * @param srcLength the number of Unicode characters in srcChars
  2121    * @return a reference to this
  2122    * @stable ICU 2.0
  2123    */
  2124   inline UnicodeString& replace(int32_t start,
  2125              int32_t length,
  2126              const UChar *srcChars,
  2127              int32_t srcLength);
  2128 
  2129   /**
  2130    * Replace the characters in the range
  2131    * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
  2132    * <TT>srcChar</TT>.
  2133    * @param start the offset at which the replace operation begins
  2134    * @param length the number of characters to replace.  The character at
  2135    * <TT>start + length</TT> is not modified.
  2136    * @param srcChar the new code unit
  2137    * @return a reference to this
  2138    * @stable ICU 2.0
  2139    */
  2140   inline UnicodeString& replace(int32_t start,
  2141              int32_t length,
  2142              UChar srcChar);
  2143 
  2144   /**
  2145    * Replace the characters in the range
  2146    * [<TT>start</TT>, <TT>start + length</TT>) with the code point
  2147    * <TT>srcChar</TT>.
  2148    * @param start the offset at which the replace operation begins
  2149    * @param length the number of characters to replace.  The character at
  2150    * <TT>start + length</TT> is not modified.
  2151    * @param srcChar the new code point
  2152    * @return a reference to this
  2153    * @stable ICU 2.0
  2154    */
  2155   inline UnicodeString& replace(int32_t start,
  2156              int32_t length,
  2157              UChar32 srcChar);
  2158 
  2159   /**
  2160    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
  2161    * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
  2162    * @param start the offset at which the replace operation begins
  2163    * @param limit the offset immediately following the replace range
  2164    * @param srcText the source for the new characters
  2165    * @return a reference to this
  2166    * @stable ICU 2.0
  2167    */
  2168   inline UnicodeString& replaceBetween(int32_t start,
  2169                 int32_t limit,
  2170                 const UnicodeString& srcText);
  2171 
  2172   /**
  2173    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
  2174    * with the characters in <TT>srcText</TT> in the range
  2175    * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
  2176    * @param start the offset at which the replace operation begins
  2177    * @param limit the offset immediately following the replace range
  2178    * @param srcText the source for the new characters
  2179    * @param srcStart the offset into <TT>srcChars</TT> where new characters
  2180    * will be obtained
  2181    * @param srcLimit the offset immediately following the range to copy
  2182    * in <TT>srcText</TT>
  2183    * @return a reference to this
  2184    * @stable ICU 2.0
  2185    */
  2186   inline UnicodeString& replaceBetween(int32_t start,
  2187                 int32_t limit,
  2188                 const UnicodeString& srcText,
  2189                 int32_t srcStart,
  2190                 int32_t srcLimit);
  2191 
  2192   /**
  2193    * Replace a substring of this object with the given text.
  2194    * @param start the beginning index, inclusive; <code>0 <= start
  2195    * <= limit</code>.
  2196    * @param limit the ending index, exclusive; <code>start <= limit
  2197    * <= length()</code>.
  2198    * @param text the text to replace characters <code>start</code>
  2199    * to <code>limit - 1</code>
  2200    * @stable ICU 2.0
  2201    */
  2202   virtual void handleReplaceBetween(int32_t start,
  2203                                     int32_t limit,
  2204                                     const UnicodeString& text);
  2205 
  2206   /**
  2207    * Replaceable API
  2208    * @return TRUE if it has MetaData
  2209    * @stable ICU 2.4
  2210    */
  2211   virtual UBool hasMetaData() const;
  2212 
  2213   /**
  2214    * Copy a substring of this object, retaining attribute (out-of-band)
  2215    * information.  This method is used to duplicate or reorder substrings.
  2216    * The destination index must not overlap the source range.
  2217    *
  2218    * @param start the beginning index, inclusive; <code>0 <= start <=
  2219    * limit</code>.
  2220    * @param limit the ending index, exclusive; <code>start <= limit <=
  2221    * length()</code>.
  2222    * @param dest the destination index.  The characters from
  2223    * <code>start..limit-1</code> will be copied to <code>dest</code>.
  2224    * Implementations of this method may assume that <code>dest <= start ||
  2225    * dest >= limit</code>.
  2226    * @stable ICU 2.0
  2227    */
  2228   virtual void copy(int32_t start, int32_t limit, int32_t dest);
  2229 
  2230   /* Search and replace operations */
  2231 
  2232   /**
  2233    * Replace all occurrences of characters in oldText with the characters
  2234    * in newText
  2235    * @param oldText the text containing the search text
  2236    * @param newText the text containing the replacement text
  2237    * @return a reference to this
  2238    * @stable ICU 2.0
  2239    */
  2240   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
  2241                 const UnicodeString& newText);
  2242 
  2243   /**
  2244    * Replace all occurrences of characters in oldText with characters
  2245    * in newText
  2246    * in the range [<TT>start</TT>, <TT>start + length</TT>).
  2247    * @param start the start of the range in which replace will performed
  2248    * @param length the length of the range in which replace will be performed
  2249    * @param oldText the text containing the search text
  2250    * @param newText the text containing the replacement text
  2251    * @return a reference to this
  2252    * @stable ICU 2.0
  2253    */
  2254   inline UnicodeString& findAndReplace(int32_t start,
  2255                 int32_t length,
  2256                 const UnicodeString& oldText,
  2257                 const UnicodeString& newText);
  2258 
  2259   /**
  2260    * Replace all occurrences of characters in oldText in the range
  2261    * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
  2262    * in newText in the range
  2263    * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
  2264    * in the range [<TT>start</TT>, <TT>start + length</TT>).
  2265    * @param start the start of the range in which replace will performed
  2266    * @param length the length of the range in which replace will be performed
  2267    * @param oldText the text containing the search text
  2268    * @param oldStart the start of the search range in <TT>oldText</TT>
  2269    * @param oldLength the length of the search range in <TT>oldText</TT>
  2270    * @param newText the text containing the replacement text
  2271    * @param newStart the start of the replacement range in <TT>newText</TT>
  2272    * @param newLength the length of the replacement range in <TT>newText</TT>
  2273    * @return a reference to this
  2274    * @stable ICU 2.0
  2275    */
  2276   UnicodeString& findAndReplace(int32_t start,
  2277                 int32_t length,
  2278                 const UnicodeString& oldText,
  2279                 int32_t oldStart,
  2280                 int32_t oldLength,
  2281                 const UnicodeString& newText,
  2282                 int32_t newStart,
  2283                 int32_t newLength);
  2284 
  2285 
  2286   /* Remove operations */
  2287 
  2288   /**
  2289    * Remove all characters from the UnicodeString object.
  2290    * @return a reference to this
  2291    * @stable ICU 2.0
  2292    */
  2293   inline UnicodeString& remove(void);
  2294 
  2295   /**
  2296    * Remove the characters in the range
  2297    * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
  2298    * @param start the offset of the first character to remove
  2299    * @param length the number of characters to remove
  2300    * @return a reference to this
  2301    * @stable ICU 2.0
  2302    */
  2303   inline UnicodeString& remove(int32_t start,
  2304                                int32_t length = (int32_t)INT32_MAX);
  2305 
  2306   /**
  2307    * Remove the characters in the range
  2308    * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
  2309    * @param start the offset of the first character to remove
  2310    * @param limit the offset immediately following the range to remove
  2311    * @return a reference to this
  2312    * @stable ICU 2.0
  2313    */
  2314   inline UnicodeString& removeBetween(int32_t start,
  2315                                       int32_t limit = (int32_t)INT32_MAX);
  2316 
  2317 
  2318   /* Length operations */
  2319 
  2320   /**
  2321    * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
  2322    * If the length of this UnicodeString is less than targetLength,
  2323    * length() - targetLength copies of padChar will be added to the
  2324    * beginning of this UnicodeString.
  2325    * @param targetLength the desired length of the string
  2326    * @param padChar the character to use for padding. Defaults to
  2327    * space (U+0020)
  2328    * @return TRUE if the text was padded, FALSE otherwise.
  2329    * @stable ICU 2.0
  2330    */
  2331   UBool padLeading(int32_t targetLength,
  2332                     UChar padChar = 0x0020);
  2333 
  2334   /**
  2335    * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
  2336    * If the length of this UnicodeString is less than targetLength,
  2337    * length() - targetLength copies of padChar will be added to the
  2338    * end of this UnicodeString.
  2339    * @param targetLength the desired length of the string
  2340    * @param padChar the character to use for padding. Defaults to
  2341    * space (U+0020)
  2342    * @return TRUE if the text was padded, FALSE otherwise.
  2343    * @stable ICU 2.0
  2344    */
  2345   UBool padTrailing(int32_t targetLength,
  2346                      UChar padChar = 0x0020);
  2347 
  2348   /**
  2349    * Truncate this UnicodeString to the <TT>targetLength</TT>.
  2350    * @param targetLength the desired length of this UnicodeString.
  2351    * @return TRUE if the text was truncated, FALSE otherwise
  2352    * @stable ICU 2.0
  2353    */
  2354   inline UBool truncate(int32_t targetLength);
  2355 
  2356   /**
  2357    * Trims leading and trailing whitespace from this UnicodeString.
  2358    * @return a reference to this
  2359    * @stable ICU 2.0
  2360    */
  2361   UnicodeString& trim(void);
  2362 
  2363 
  2364   /* Miscellaneous operations */
  2365 
  2366   /**
  2367    * Reverse this UnicodeString in place.
  2368    * @return a reference to this
  2369    * @stable ICU 2.0
  2370    */
  2371   inline UnicodeString& reverse(void);
  2372 
  2373   /**
  2374    * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
  2375    * this UnicodeString.
  2376    * @param start the start of the range to reverse
  2377    * @param length the number of characters to to reverse
  2378    * @return a reference to this
  2379    * @stable ICU 2.0
  2380    */
  2381   inline UnicodeString& reverse(int32_t start,
  2382              int32_t length);
  2383 
  2384   /**
  2385    * Convert the characters in this to UPPER CASE following the conventions of
  2386    * the default locale.
  2387    * @return A reference to this.
  2388    * @stable ICU 2.0
  2389    */
  2390   UnicodeString& toUpper(void);
  2391 
  2392   /**
  2393    * Convert the characters in this to UPPER CASE following the conventions of
  2394    * a specific locale.
  2395    * @param locale The locale containing the conventions to use.
  2396    * @return A reference to this.
  2397    * @stable ICU 2.0
  2398    */
  2399   UnicodeString& toUpper(const Locale& locale);
  2400 
  2401   /**
  2402    * Convert the characters in this to lower case following the conventions of
  2403    * the default locale.
  2404    * @return A reference to this.
  2405    * @stable ICU 2.0
  2406    */
  2407   UnicodeString& toLower(void);
  2408 
  2409   /**
  2410    * Convert the characters in this to lower case following the conventions of
  2411    * a specific locale.
  2412    * @param locale The locale containing the conventions to use.
  2413    * @return A reference to this.
  2414    * @stable ICU 2.0
  2415    */
  2416   UnicodeString& toLower(const Locale& locale);
  2417 
  2418 #if !UCONFIG_NO_BREAK_ITERATION
  2419 
  2420   /**
  2421    * Titlecase this string, convenience function using the default locale.
  2422    *
  2423    * Casing is locale-dependent and context-sensitive.
  2424    * Titlecasing uses a break iterator to find the first characters of words
  2425    * that are to be titlecased. It titlecases those characters and lowercases
  2426    * all others.
  2427    *
  2428    * The titlecase break iterator can be provided to customize for arbitrary
  2429    * styles, using rules and dictionaries beyond the standard iterators.
  2430    * It may be more efficient to always provide an iterator to avoid
  2431    * opening and closing one for each string.
  2432    * The standard titlecase iterator for the root locale implements the
  2433    * algorithm of Unicode TR 21.
  2434    *
  2435    * This function uses only the first() and next() methods of the
  2436    * provided break iterator.
  2437    *
  2438    * @param titleIter A break iterator to find the first characters of words
  2439    *                  that are to be titlecased.
  2440    *                  If none is provided (0), then a standard titlecase
  2441    *                  break iterator is opened.
  2442    *                  Otherwise the provided iterator is set to the string's text.
  2443    * @return A reference to this.
  2444    * @stable ICU 2.1
  2445    */
  2446   UnicodeString &toTitle(BreakIterator *titleIter);
  2447 
  2448   /**
  2449    * Titlecase this string.
  2450    *
  2451    * Casing is locale-dependent and context-sensitive.
  2452    * Titlecasing uses a break iterator to find the first characters of words
  2453    * that are to be titlecased. It titlecases those characters and lowercases
  2454    * all others.
  2455    *
  2456    * The titlecase break iterator can be provided to customize for arbitrary
  2457    * styles, using rules and dictionaries beyond the standard iterators.
  2458    * It may be more efficient to always provide an iterator to avoid
  2459    * opening and closing one for each string.
  2460    * The standard titlecase iterator for the root locale implements the
  2461    * algorithm of Unicode TR 21.
  2462    *
  2463    * This function uses only the first() and next() methods of the
  2464    * provided break iterator.
  2465    *
  2466    * @param titleIter A break iterator to find the first characters of words
  2467    *                  that are to be titlecased.
  2468    *                  If none is provided (0), then a standard titlecase
  2469    *                  break iterator is opened.
  2470    *                  Otherwise the provided iterator is set to the string's text.
  2471    * @param locale    The locale to consider.
  2472    * @return A reference to this.
  2473    * @stable ICU 2.1
  2474    */
  2475   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
  2476 
  2477 #endif
  2478 
  2479   /**
  2480    * Case-fold the characters in this string.
  2481    * Case-folding is locale-independent and not context-sensitive,
  2482    * but there is an option for whether to include or exclude mappings for dotted I
  2483    * and dotless i that are marked with 'I' in CaseFolding.txt.
  2484    * The result may be longer or shorter than the original.
  2485    *
  2486    * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
  2487    * @return A reference to this.
  2488    * @stable ICU 2.0
  2489    */
  2490   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
  2491 
  2492   //========================================
  2493   // Access to the internal buffer
  2494   //========================================
  2495 
  2496   /**
  2497    * Get a read/write pointer to the internal buffer.
  2498    * The buffer is guaranteed to be large enough for at least minCapacity UChars,
  2499    * writable, and is still owned by the UnicodeString object.
  2500    * Calls to getBuffer(minCapacity) must not be nested, and
  2501    * must be matched with calls to releaseBuffer(newLength).
  2502    * If the string buffer was read-only or shared,
  2503    * then it will be reallocated and copied.
  2504    *
  2505    * An attempted nested call will return 0, and will not further modify the
  2506    * state of the UnicodeString object.
  2507    * It also returns 0 if the string is bogus.
  2508    *
  2509    * The actual capacity of the string buffer may be larger than minCapacity.
  2510    * getCapacity() returns the actual capacity.
  2511    * For many operations, the full capacity should be used to avoid reallocations.
  2512    *
  2513    * While the buffer is "open" between getBuffer(minCapacity)
  2514    * and releaseBuffer(newLength), the following applies:
  2515    * - The string length is set to 0.
  2516    * - Any read API call on the UnicodeString object will behave like on a 0-length string.
  2517    * - Any write API call on the UnicodeString object is disallowed and will have no effect.
  2518    * - You can read from and write to the returned buffer.
  2519    * - The previous string contents will still be in the buffer;
  2520    *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
  2521    *   If the length() was greater than minCapacity, then any contents after minCapacity
  2522    *   may be lost.
  2523    *   The buffer contents is not NUL-terminated by getBuffer().
  2524    *   If length()<getCapacity() then you can terminate it by writing a NUL
  2525    *   at index length().
  2526    * - You must call releaseBuffer(newLength) before and in order to
  2527    *   return to normal UnicodeString operation.
  2528    *
  2529    * @param minCapacity the minimum number of UChars that are to be available
  2530    *        in the buffer, starting at the returned pointer;
  2531    *        default to the current string capacity if minCapacity==-1
  2532    * @return a writable pointer to the internal string buffer,
  2533    *         or 0 if an error occurs (nested calls, out of memory)
  2534    *
  2535    * @see releaseBuffer
  2536    * @see getTerminatedBuffer()
  2537    * @stable ICU 2.0
  2538    */
  2539   UChar *getBuffer(int32_t minCapacity);
  2540 
  2541   /**
  2542    * Release a read/write buffer on a UnicodeString object with an
  2543    * "open" getBuffer(minCapacity).
  2544    * This function must be called in a matched pair with getBuffer(minCapacity).
  2545    * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
  2546    *
  2547    * It will set the string length to newLength, at most to the current capacity.
  2548    * If newLength==-1 then it will set the length according to the
  2549    * first NUL in the buffer, or to the capacity if there is no NUL.
  2550    *
  2551    * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
  2552    *
  2553    * @param newLength the new length of the UnicodeString object;
  2554    *        defaults to the current capacity if newLength is greater than that;
  2555    *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
  2556    *        the current capacity of the string
  2557    *
  2558    * @see getBuffer(int32_t minCapacity)
  2559    * @stable ICU 2.0
  2560    */
  2561   void releaseBuffer(int32_t newLength=-1);
  2562 
  2563   /**
  2564    * Get a read-only pointer to the internal buffer.
  2565    * This can be called at any time on a valid UnicodeString.
  2566    *
  2567    * It returns 0 if the string is bogus, or
  2568    * during an "open" getBuffer(minCapacity).
  2569    *
  2570    * It can be called as many times as desired.
  2571    * The pointer that it returns will remain valid until the UnicodeString object is modified,
  2572    * at which time the pointer is semantically invalidated and must not be used any more.
  2573    *
  2574    * The capacity of the buffer can be determined with getCapacity().
  2575    * The part after length() may or may not be initialized and valid,
  2576    * depending on the history of the UnicodeString object.
  2577    *
  2578    * The buffer contents is (probably) not NUL-terminated.
  2579    * You can check if it is with
  2580    * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
  2581    * (See getTerminatedBuffer().)
  2582    *
  2583    * The buffer may reside in read-only memory. Its contents must not
  2584    * be modified.
  2585    *
  2586    * @return a read-only pointer to the internal string buffer,
  2587    *         or 0 if the string is empty or bogus
  2588    *
  2589    * @see getBuffer(int32_t minCapacity)
  2590    * @see getTerminatedBuffer()
  2591    * @stable ICU 2.0
  2592    */
  2593   inline const UChar *getBuffer() const;
  2594 
  2595   /**
  2596    * Get a read-only pointer to the internal buffer,
  2597    * making sure that it is NUL-terminated.
  2598    * This can be called at any time on a valid UnicodeString.
  2599    *
  2600    * It returns 0 if the string is bogus, or
  2601    * during an "open" getBuffer(minCapacity), or if the buffer cannot
  2602    * be NUL-terminated (because memory allocation failed).
  2603    *
  2604    * It can be called as many times as desired.
  2605    * The pointer that it returns will remain valid until the UnicodeString object is modified,
  2606    * at which time the pointer is semantically invalidated and must not be used any more.
  2607    *
  2608    * The capacity of the buffer can be determined with getCapacity().
  2609    * The part after length()+1 may or may not be initialized and valid,
  2610    * depending on the history of the UnicodeString object.
  2611    *
  2612    * The buffer contents is guaranteed to be NUL-terminated.
  2613    * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
  2614    * is written.
  2615    * For this reason, this function is not const, unlike getBuffer().
  2616    * Note that a UnicodeString may also contain NUL characters as part of its contents.
  2617    *
  2618    * The buffer may reside in read-only memory. Its contents must not
  2619    * be modified.
  2620    *
  2621    * @return a read-only pointer to the internal string buffer,
  2622    *         or 0 if the string is empty or bogus
  2623    *
  2624    * @see getBuffer(int32_t minCapacity)
  2625    * @see getBuffer()
  2626    * @stable ICU 2.2
  2627    */
  2628   inline const UChar *getTerminatedBuffer();
  2629 
  2630   //========================================
  2631   // Constructors
  2632   //========================================
  2633 
  2634   /** Construct an empty UnicodeString.
  2635    * @stable ICU 2.0
  2636    */
  2637   UnicodeString();
  2638 
  2639   /**
  2640    * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
  2641    * @param capacity the number of UChars this UnicodeString should hold
  2642    * before a resize is necessary; if count is greater than 0 and count
  2643    * code points c take up more space than capacity, then capacity is adjusted
  2644    * accordingly.
  2645    * @param c is used to initially fill the string
  2646    * @param count specifies how many code points c are to be written in the
  2647    *              string
  2648    * @stable ICU 2.0
  2649    */
  2650   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
  2651 
  2652   /**
  2653    * Single UChar (code unit) constructor.
  2654    * @param ch the character to place in the UnicodeString
  2655    * @stable ICU 2.0
  2656    */
  2657   UnicodeString(UChar ch);
  2658 
  2659   /**
  2660    * Single UChar32 (code point) constructor.
  2661    * @param ch the character to place in the UnicodeString
  2662    * @stable ICU 2.0
  2663    */
  2664   UnicodeString(UChar32 ch);
  2665 
  2666   /**
  2667    * UChar* constructor.
  2668    * @param text The characters to place in the UnicodeString.  <TT>text</TT>
  2669    * must be NULL (U+0000) terminated.
  2670    * @stable ICU 2.0
  2671    */
  2672   UnicodeString(const UChar *text);
  2673 
  2674   /**
  2675    * UChar* constructor.
  2676    * @param text The characters to place in the UnicodeString.
  2677    * @param textLength The number of Unicode characters in <TT>text</TT>
  2678    * to copy.
  2679    * @stable ICU 2.0
  2680    */
  2681   UnicodeString(const UChar *text,
  2682         int32_t textLength);
  2683 
  2684   /**
  2685    * Readonly-aliasing UChar* constructor.
  2686    * The text will be used for the UnicodeString object, but
  2687    * it will not be released when the UnicodeString is destroyed.
  2688    * This has copy-on-write semantics:
  2689    * When the string is modified, then the buffer is first copied into
  2690    * newly allocated memory.
  2691    * The aliased buffer is never modified.
  2692    * In an assignment to another UnicodeString, the text will be aliased again,
  2693    * so that both strings then alias the same readonly-text.
  2694    *
  2695    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
  2696    *                     This must be true if <code>textLength==-1</code>.
  2697    * @param text The characters to alias for the UnicodeString.
  2698    * @param textLength The number of Unicode characters in <code>text</code> to alias.
  2699    *                   If -1, then this constructor will determine the length
  2700    *                   by calling <code>u_strlen()</code>.
  2701    * @stable ICU 2.0
  2702    */
  2703   UnicodeString(UBool isTerminated,
  2704                 const UChar *text,
  2705                 int32_t textLength);
  2706 
  2707   /**
  2708    * Writable-aliasing UChar* constructor.
  2709    * The text will be used for the UnicodeString object, but
  2710    * it will not be released when the UnicodeString is destroyed.
  2711    * This has write-through semantics:
  2712    * For as long as the capacity of the buffer is sufficient, write operations
  2713    * will directly affect the buffer. When more capacity is necessary, then
  2714    * a new buffer will be allocated and the contents copied as with regularly
  2715    * constructed strings.
  2716    * In an assignment to another UnicodeString, the buffer will be copied.
  2717    * The extract(UChar *dst) function detects whether the dst pointer is the same
  2718    * as the string buffer itself and will in this case not copy the contents.
  2719    *
  2720    * @param buffer The characters to alias for the UnicodeString.
  2721    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
  2722    * @param buffCapacity The size of <code>buffer</code> in UChars.
  2723    * @stable ICU 2.0
  2724    */
  2725   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
  2726 
  2727 #if !UCONFIG_NO_CONVERSION
  2728 
  2729   /**
  2730    * char* constructor.
  2731    * @param codepageData an array of bytes, null-terminated
  2732    * @param codepage the encoding of <TT>codepageData</TT>.  The special
  2733    * value 0 for <TT>codepage</TT> indicates that the text is in the
  2734    * platform's default codepage.
  2735    *
  2736    * If <code>codepage</code> is an empty string (<code>""</code>),
  2737    * then a simple conversion is performed on the codepage-invariant
  2738    * subset ("invariant characters") of the platform encoding. See utypes.h.
  2739    * Recommendation: For invariant-character strings use the constructor
  2740    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
  2741    * because it avoids object code dependencies of UnicodeString on
  2742    * the conversion code.
  2743    *
  2744    * @stable ICU 2.0
  2745    */
  2746   UnicodeString(const char *codepageData,
  2747         const char *codepage = 0);
  2748 
  2749   /**
  2750    * char* constructor.
  2751    * @param codepageData an array of bytes.
  2752    * @param dataLength The number of bytes in <TT>codepageData</TT>.
  2753    * @param codepage the encoding of <TT>codepageData</TT>.  The special
  2754    * value 0 for <TT>codepage</TT> indicates that the text is in the
  2755    * platform's default codepage.
  2756    * If <code>codepage</code> is an empty string (<code>""</code>),
  2757    * then a simple conversion is performed on the codepage-invariant
  2758    * subset ("invariant characters") of the platform encoding. See utypes.h.
  2759    * Recommendation: For invariant-character strings use the constructor
  2760    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
  2761    * because it avoids object code dependencies of UnicodeString on
  2762    * the conversion code.
  2763    *
  2764    * @stable ICU 2.0
  2765    */
  2766   UnicodeString(const char *codepageData,
  2767         int32_t dataLength,
  2768         const char *codepage = 0);
  2769 
  2770   /**
  2771    * char * / UConverter constructor.
  2772    * This constructor uses an existing UConverter object to
  2773    * convert the codepage string to Unicode and construct a UnicodeString
  2774    * from that.
  2775    *
  2776    * The converter is reset at first.
  2777    * If the error code indicates a failure before this constructor is called,
  2778    * or if an error occurs during conversion or construction,
  2779    * then the string will be bogus.
  2780    *
  2781    * This function avoids the overhead of opening and closing a converter if
  2782    * multiple strings are constructed.
  2783    *
  2784    * @param src input codepage string
  2785    * @param srcLength length of the input string, can be -1 for NUL-terminated strings
  2786    * @param cnv converter object (ucnv_resetToUnicode() will be called),
  2787    *        can be NULL for the default converter
  2788    * @param errorCode normal ICU error code
  2789    * @stable ICU 2.0
  2790    */
  2791   UnicodeString(
  2792         const char *src, int32_t srcLength,
  2793         UConverter *cnv,
  2794         UErrorCode &errorCode);
  2795 
  2796 #endif
  2797 
  2798   /**
  2799    * Constructs a Unicode string from an invariant-character char * string.
  2800    * About invariant characters see utypes.h.
  2801    * This constructor has no runtime dependency on conversion code and is
  2802    * therefore recommended over ones taking a charset name string
  2803    * (where the empty string "" indicates invariant-character conversion).
  2804    *
  2805    * Use the macro US_INV as the third, signature-distinguishing parameter.
  2806    *
  2807    * For example:
  2808    * \code
  2809    * void fn(const char *s) {
  2810    *   UnicodeString ustr(s, -1, US_INV);
  2811    *   // use ustr ...
  2812    * }
  2813    * \endcode
  2814    *
  2815    * @param src String using only invariant characters.
  2816    * @param length Length of src, or -1 if NUL-terminated.
  2817    * @param inv Signature-distinguishing paramater, use US_INV.
  2818    *
  2819    * @see US_INV
  2820    * @draft ICU 3.2
  2821    */
  2822   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
  2823 
  2824 
  2825   /**
  2826    * Copy constructor.
  2827    * @param that The UnicodeString object to copy.
  2828    * @stable ICU 2.0
  2829    */
  2830   UnicodeString(const UnicodeString& that);
  2831 
  2832   /**
  2833    * 'Substring' constructor from tail of source string.
  2834    * @param src The UnicodeString object to copy.
  2835    * @param srcStart The offset into <tt>src</tt> at which to start copying.
  2836    * @stable ICU 2.2
  2837    */
  2838   UnicodeString(const UnicodeString& src, int32_t srcStart);
  2839 
  2840   /**
  2841    * 'Substring' constructor from subrange of source string.
  2842    * @param src The UnicodeString object to copy.
  2843    * @param srcStart The offset into <tt>src</tt> at which to start copying.
  2844    * @param srcLength The number of characters from <tt>src</tt> to copy.
  2845    * @stable ICU 2.2
  2846    */
  2847   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
  2848 
  2849   /**
  2850    * Clone this object, an instance of a subclass of Replaceable.
  2851    * Clones can be used concurrently in multiple threads.
  2852    * If a subclass does not implement clone(), or if an error occurs,
  2853    * then NULL is returned.
  2854    * The clone functions in all subclasses return a pointer to a Replaceable
  2855    * because some compilers do not support covariant (same-as-this)
  2856    * return types; cast to the appropriate subclass if necessary.
  2857    * The caller must delete the clone.
  2858    *
  2859    * @return a clone of this object
  2860    *
  2861    * @see Replaceable::clone
  2862    * @see getDynamicClassID
  2863    * @stable ICU 2.6
  2864    */
  2865   virtual Replaceable *clone() const;
  2866 
  2867   /** Destructor.
  2868    * @stable ICU 2.0
  2869    */
  2870   virtual ~UnicodeString();
  2871 
  2872 
  2873   /* Miscellaneous operations */
  2874 
  2875   /**
  2876    * Unescape a string of characters and return a string containing
  2877    * the result.  The following escape sequences are recognized:
  2878    *
  2879    * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
  2880    * \\Uhhhhhhhh   8 hex digits
  2881    * \\xhh         1-2 hex digits
  2882    * \\ooo         1-3 octal digits; o in [0-7]
  2883    * \\cX          control-X; X is masked with 0x1F
  2884    *
  2885    * as well as the standard ANSI C escapes:
  2886    *
  2887    * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
  2888    * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
  2889    * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
  2890    *
  2891    * Anything else following a backslash is generically escaped.  For
  2892    * example, "[a\\-z]" returns "[a-z]".
  2893    *
  2894    * If an escape sequence is ill-formed, this method returns an empty
  2895    * string.  An example of an ill-formed sequence is "\\u" followed by
  2896    * fewer than 4 hex digits.
  2897    *
  2898    * This function is similar to u_unescape() but not identical to it.
  2899    * The latter takes a source char*, so it does escape recognition
  2900    * and also invariant conversion.
  2901    *
  2902    * @return a string with backslash escapes interpreted, or an
  2903    * empty string on error.
  2904    * @see UnicodeString#unescapeAt()
  2905    * @see u_unescape()
  2906    * @see u_unescapeAt()
  2907    * @stable ICU 2.0
  2908    */
  2909   UnicodeString unescape() const;
  2910 
  2911   /**
  2912    * Unescape a single escape sequence and return the represented
  2913    * character.  See unescape() for a listing of the recognized escape
  2914    * sequences.  The character at offset-1 is assumed (without
  2915    * checking) to be a backslash.  If the escape sequence is
  2916    * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
  2917    * returned.
  2918    *
  2919    * @param offset an input output parameter.  On input, it is the
  2920    * offset into this string where the escape sequence is located,
  2921    * after the initial backslash.  On output, it is advanced after the
  2922    * last character parsed.  On error, it is not advanced at all.
  2923    * @return the character represented by the escape sequence at
  2924    * offset, or (UChar32)0xFFFFFFFF on error.
  2925    * @see UnicodeString#unescape()
  2926    * @see u_unescape()
  2927    * @see u_unescapeAt()
  2928    * @stable ICU 2.0
  2929    */
  2930   UChar32 unescapeAt(int32_t &offset) const;
  2931 
  2932   /**
  2933    * ICU "poor man's RTTI", returns a UClassID for this class.
  2934    *
  2935    * @stable ICU 2.2
  2936    */
  2937   static UClassID U_EXPORT2 getStaticClassID();
  2938 
  2939   /**
  2940    * ICU "poor man's RTTI", returns a UClassID for the actual class.
  2941    *
  2942    * @stable ICU 2.2
  2943    */
  2944   virtual UClassID getDynamicClassID() const;
  2945 
  2946   //========================================
  2947   // Implementation methods
  2948   //========================================
  2949 
  2950 protected:
  2951   /**
  2952    * Implement Replaceable::getLength() (see jitterbug 1027).
  2953    * @stable ICU 2.4
  2954    */
  2955   virtual int32_t getLength() const;
  2956 
  2957   /**
  2958    * The change in Replaceable to use virtual getCharAt() allows
  2959    * UnicodeString::charAt() to be inline again (see jitterbug 709).
  2960    * @stable ICU 2.4
  2961    */
  2962   virtual UChar getCharAt(int32_t offset) const;
  2963 
  2964   /**
  2965    * The change in Replaceable to use virtual getChar32At() allows
  2966    * UnicodeString::char32At() to be inline again (see jitterbug 709).
  2967    * @stable ICU 2.4
  2968    */
  2969   virtual UChar32 getChar32At(int32_t offset) const;
  2970 
  2971 private:
  2972 
  2973   inline int8_t
  2974   doCompare(int32_t start,
  2975            int32_t length,
  2976            const UnicodeString& srcText,
  2977            int32_t srcStart,
  2978            int32_t srcLength) const;
  2979 
  2980   int8_t doCompare(int32_t start,
  2981            int32_t length,
  2982            const UChar *srcChars,
  2983            int32_t srcStart,
  2984            int32_t srcLength) const;
  2985 
  2986   inline int8_t
  2987   doCompareCodePointOrder(int32_t start,
  2988                           int32_t length,
  2989                           const UnicodeString& srcText,
  2990                           int32_t srcStart,
  2991                           int32_t srcLength) const;
  2992 
  2993   int8_t doCompareCodePointOrder(int32_t start,
  2994                                  int32_t length,
  2995                                  const UChar *srcChars,
  2996                                  int32_t srcStart,
  2997                                  int32_t srcLength) const;
  2998 
  2999   inline int8_t
  3000   doCaseCompare(int32_t start,
  3001                 int32_t length,
  3002                 const UnicodeString &srcText,
  3003                 int32_t srcStart,
  3004                 int32_t srcLength,
  3005                 uint32_t options) const;
  3006 
  3007   int8_t
  3008   doCaseCompare(int32_t start,
  3009                 int32_t length,
  3010                 const UChar *srcChars,
  3011                 int32_t srcStart,
  3012                 int32_t srcLength,
  3013                 uint32_t options) const;
  3014 
  3015   int32_t doIndexOf(UChar c,
  3016             int32_t start,
  3017             int32_t length) const;
  3018 
  3019   int32_t doIndexOf(UChar32 c,
  3020                         int32_t start,
  3021                         int32_t length) const;
  3022 
  3023   int32_t doLastIndexOf(UChar c,
  3024                 int32_t start,
  3025                 int32_t length) const;
  3026 
  3027   int32_t doLastIndexOf(UChar32 c,
  3028                             int32_t start,
  3029                             int32_t length) const;
  3030 
  3031   void doExtract(int32_t start,
  3032          int32_t length,
  3033          UChar *dst,
  3034          int32_t dstStart) const;
  3035 
  3036   inline void doExtract(int32_t start,
  3037          int32_t length,
  3038          UnicodeString& target) const;
  3039 
  3040   inline UChar doCharAt(int32_t offset)  const;
  3041 
  3042   UnicodeString& doReplace(int32_t start,
  3043                int32_t length,
  3044                const UnicodeString& srcText,
  3045                int32_t srcStart,
  3046                int32_t srcLength);
  3047 
  3048   UnicodeString& doReplace(int32_t start,
  3049                int32_t length,
  3050                const UChar *srcChars,
  3051                int32_t srcStart,
  3052                int32_t srcLength);
  3053 
  3054   UnicodeString& doReverse(int32_t start,
  3055                int32_t length);
  3056 
  3057   // calculate hash code
  3058   int32_t doHashCode(void) const;
  3059 
  3060   // get pointer to start of array
  3061   inline UChar* getArrayStart(void);
  3062   inline const UChar* getArrayStart(void) const;
  3063 
  3064   // allocate the array; result may be fStackBuffer
  3065   // sets refCount to 1 if appropriate
  3066   // sets fArray, fCapacity, and fFlags
  3067   // returns boolean for success or failure
  3068   UBool allocate(int32_t capacity);
  3069 
  3070   // release the array if owned
  3071   void releaseArray(void);
  3072 
  3073   // turn a bogus string into an empty one
  3074   void unBogus();
  3075 
  3076   // implements assigment operator, copy constructor, and fastCopyFrom()
  3077   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
  3078 
  3079   // Pin start and limit to acceptable values.
  3080   inline void pinIndex(int32_t& start) const;
  3081   inline void pinIndices(int32_t& start,
  3082                          int32_t& length) const;
  3083 
  3084 #if !UCONFIG_NO_CONVERSION
  3085 
  3086   /* Internal extract() using UConverter. */
  3087   int32_t doExtract(int32_t start, int32_t length,
  3088                     char *dest, int32_t destCapacity,
  3089                     UConverter *cnv,
  3090                     UErrorCode &errorCode) const;
  3091 
  3092   /*
  3093    * Real constructor for converting from codepage data.
  3094    * It assumes that it is called with !fRefCounted.
  3095    *
  3096    * If <code>codepage==0</code>, then the default converter
  3097    * is used for the platform encoding.
  3098    * If <code>codepage</code> is an empty string (<code>""</code>),
  3099    * then a simple conversion is performed on the codepage-invariant
  3100    * subset ("invariant characters") of the platform encoding. See utypes.h.
  3101    */
  3102   void doCodepageCreate(const char *codepageData,
  3103                         int32_t dataLength,
  3104                         const char *codepage);
  3105 
  3106   /*
  3107    * Worker function for creating a UnicodeString from
  3108    * a codepage string using a UConverter.
  3109    */
  3110   void
  3111   doCodepageCreate(const char *codepageData,
  3112                    int32_t dataLength,
  3113                    UConverter *converter,
  3114                    UErrorCode &status);
  3115 
  3116 #endif
  3117 
  3118   /*
  3119    * This function is called when write access to the array
  3120    * is necessary.
  3121    *
  3122    * We need to make a copy of the array if
  3123    * the buffer is read-only, or
  3124    * the buffer is refCounted (shared), and refCount>1, or
  3125    * the buffer is too small.
  3126    *
  3127    * Return FALSE if memory could not be allocated.
  3128    */
  3129   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
  3130                             int32_t growCapacity = -1,
  3131                             UBool doCopyArray = TRUE,
  3132                             int32_t **pBufferToDelete = 0,
  3133                             UBool forceClone = FALSE);
  3134 
  3135   // common function for case mappings
  3136   UnicodeString &
  3137   caseMap(BreakIterator *titleIter,
  3138           const char *locale,
  3139           uint32_t options,
  3140           int32_t toWhichCase);
  3141 
  3142   // ref counting
  3143   void addRef(void);
  3144   int32_t removeRef(void);
  3145   int32_t refCount(void) const;
  3146 
  3147   // constants
  3148   enum {
  3149     US_STACKBUF_SIZE=7, // Size of stack buffer for small strings
  3150     kInvalidUChar=0xffff, // invalid UChar index
  3151     kGrowSize=128, // grow size for this buffer
  3152     kInvalidHashCode=0, // invalid hash code
  3153     kEmptyHashCode=1, // hash code for empty string
  3154 
  3155     // bit flag values for fFlags
  3156     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
  3157     kUsingStackBuffer=2,// fArray==fStackBuffer
  3158     kRefCounted=4,      // there is a refCount field before the characters in fArray
  3159     kBufferIsReadonly=8,// do not write to this buffer
  3160     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
  3161                         // and releaseBuffer(newLength) must be called
  3162 
  3163     // combined values for convenience
  3164     kShortString=kUsingStackBuffer,
  3165     kLongString=kRefCounted,
  3166     kReadonlyAlias=kBufferIsReadonly,
  3167     kWritableAlias=0
  3168   };
  3169 
  3170   friend class StringCharacterIterator;
  3171   friend class StringThreadTest;
  3172 
  3173   /*
  3174    * The following are all the class fields that are stored
  3175    * in each UnicodeString object.
  3176    * Note that UnicodeString has virtual functions,
  3177    * therefore there is an implicit vtable pointer
  3178    * as the first real field.
  3179    * The fields should be aligned such that no padding is
  3180    * necessary, mostly by having larger types first.
  3181    * On 32-bit machines, the size should be 32 bytes,
  3182    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
  3183    */
  3184   // (implicit) *vtable;
  3185   int32_t   fLength;        // number of characters in fArray
  3186   int32_t   fCapacity;      // sizeof fArray
  3187   UChar     *fArray;        // the Unicode data
  3188   uint16_t  fFlags;         // bit flags: see constants above
  3189   UChar     fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings
  3190 
  3191 };
  3192 
  3193 /**
  3194  * Create a new UnicodeString with the concatenation of two others.
  3195  *
  3196  * @param s1 The first string to be copied to the new one.
  3197  * @param s2 The second string to be copied to the new one, after s1.
  3198  * @return UnicodeString(s1).append(s2)
  3199  * @stable ICU 2.8
  3200  */
  3201 U_COMMON_API UnicodeString U_EXPORT2
  3202 operator+ (const UnicodeString &s1, const UnicodeString &s2);
  3203 
  3204 U_NAMESPACE_END
  3205 
  3206 // inline implementations -------------------------------------------------- ***
  3207 
  3208 //========================================
  3209 // Array copying
  3210 //========================================
  3211 /**
  3212  * Copy an array of UnicodeString OBJECTS (not pointers).
  3213  * @internal
  3214  */
  3215 inline void
  3216 uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t count)
  3217 { while(count-- > 0) *dst++ = *src++; }
  3218 
  3219 /**
  3220  * Copy an array of UnicodeString OBJECTS (not pointers).
  3221  * @internal
  3222  */
  3223 inline void
  3224 uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, int32_t srcStart,
  3225         U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t dstStart, int32_t count)
  3226 { uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
  3227 
  3228 U_NAMESPACE_BEGIN
  3229 
  3230 //========================================
  3231 // Inline members
  3232 //========================================
  3233 
  3234 //========================================
  3235 // Privates
  3236 //========================================
  3237 
  3238 inline void
  3239 UnicodeString::pinIndex(int32_t& start) const
  3240 {
  3241   // pin index
  3242   if(start < 0) {
  3243     start = 0;
  3244   } else if(start > fLength) {
  3245     start = fLength;
  3246   }
  3247 }
  3248 
  3249 inline void
  3250 UnicodeString::pinIndices(int32_t& start,
  3251                           int32_t& _length) const
  3252 {
  3253   // pin indices
  3254   if(start < 0) {
  3255     start = 0;
  3256   } else if(start > fLength) {
  3257     start = fLength;
  3258   }
  3259   if(_length < 0) {
  3260     _length = 0;
  3261   } else if(_length > (fLength - start)) {
  3262     _length = (fLength - start);
  3263   }
  3264 }
  3265 
  3266 inline UChar*
  3267 UnicodeString::getArrayStart()
  3268 { return fArray; }
  3269 
  3270 inline const UChar*
  3271 UnicodeString::getArrayStart() const
  3272 { return fArray; }
  3273 
  3274 //========================================
  3275 // Read-only implementation methods
  3276 //========================================
  3277 inline int32_t
  3278 UnicodeString::length() const
  3279 { return fLength; }
  3280 
  3281 inline int32_t
  3282 UnicodeString::getCapacity() const
  3283 { return fCapacity; }
  3284 
  3285 inline int32_t
  3286 UnicodeString::hashCode() const
  3287 { return doHashCode(); }
  3288 
  3289 inline UBool
  3290 UnicodeString::isBogus() const
  3291 { return (UBool)(fFlags & kIsBogus); }
  3292 
  3293 inline const UChar *
  3294 UnicodeString::getBuffer() const {
  3295   if(!(fFlags&(kIsBogus|kOpenGetBuffer))) {
  3296     return fArray;
  3297   } else {
  3298     return 0;
  3299   }
  3300 }
  3301 
  3302 //========================================
  3303 // Read-only alias methods
  3304 //========================================
  3305 inline int8_t
  3306 UnicodeString::doCompare(int32_t start,
  3307               int32_t length,
  3308               const UnicodeString& srcText,
  3309               int32_t srcStart,
  3310               int32_t srcLength) const
  3311 {
  3312   if(srcText.isBogus()) {
  3313     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  3314   } else {
  3315     srcText.pinIndices(srcStart, srcLength);
  3316     return doCompare(start, length, srcText.fArray, srcStart, srcLength);
  3317   }
  3318 }
  3319 
  3320 inline UBool
  3321 UnicodeString::operator== (const UnicodeString& text) const
  3322 {
  3323   if(isBogus()) {
  3324     return text.isBogus();
  3325   } else {
  3326     return
  3327       !text.isBogus() &&
  3328       fLength == text.fLength &&
  3329       doCompare(0, fLength, text, 0, text.fLength) == 0;
  3330   }
  3331 }
  3332 
  3333 inline UBool
  3334 UnicodeString::operator!= (const UnicodeString& text) const
  3335 { return (! operator==(text)); }
  3336 
  3337 inline UBool
  3338 UnicodeString::operator> (const UnicodeString& text) const
  3339 { return doCompare(0, fLength, text, 0, text.fLength) == 1; }
  3340 
  3341 inline UBool
  3342 UnicodeString::operator< (const UnicodeString& text) const
  3343 { return doCompare(0, fLength, text, 0, text.fLength) == -1; }
  3344 
  3345 inline UBool
  3346 UnicodeString::operator>= (const UnicodeString& text) const
  3347 { return doCompare(0, fLength, text, 0, text.fLength) != -1; }
  3348 
  3349 inline UBool
  3350 UnicodeString::operator<= (const UnicodeString& text) const
  3351 { return doCompare(0, fLength, text, 0, text.fLength) != 1; }
  3352 
  3353 inline int8_t
  3354 UnicodeString::compare(const UnicodeString& text) const
  3355 { return doCompare(0, fLength, text, 0, text.fLength); }
  3356 
  3357 inline int8_t
  3358 UnicodeString::compare(int32_t start,
  3359                int32_t _length,
  3360                const UnicodeString& srcText) const
  3361 { return doCompare(start, _length, srcText, 0, srcText.fLength); }
  3362 
  3363 inline int8_t
  3364 UnicodeString::compare(const UChar *srcChars,
  3365                int32_t srcLength) const
  3366 { return doCompare(0, fLength, srcChars, 0, srcLength); }
  3367 
  3368 inline int8_t
  3369 UnicodeString::compare(int32_t start,
  3370                int32_t _length,
  3371                const UnicodeString& srcText,
  3372                int32_t srcStart,
  3373                int32_t srcLength) const
  3374 { return doCompare(start, _length, srcText, srcStart, srcLength); }
  3375 
  3376 inline int8_t
  3377 UnicodeString::compare(int32_t start,
  3378                int32_t _length,
  3379                const UChar *srcChars) const
  3380 { return doCompare(start, _length, srcChars, 0, _length); }
  3381 
  3382 inline int8_t
  3383 UnicodeString::compare(int32_t start,
  3384                int32_t _length,
  3385                const UChar *srcChars,
  3386                int32_t srcStart,
  3387                int32_t srcLength) const
  3388 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
  3389 
  3390 inline int8_t
  3391 UnicodeString::compareBetween(int32_t start,
  3392                   int32_t limit,
  3393                   const UnicodeString& srcText,
  3394                   int32_t srcStart,
  3395                   int32_t srcLimit) const
  3396 { return doCompare(start, limit - start,
  3397            srcText, srcStart, srcLimit - srcStart); }
  3398 
  3399 inline int8_t
  3400 UnicodeString::doCompareCodePointOrder(int32_t start,
  3401                                        int32_t length,
  3402                                        const UnicodeString& srcText,
  3403                                        int32_t srcStart,
  3404                                        int32_t srcLength) const
  3405 {
  3406   if(srcText.isBogus()) {
  3407     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  3408   } else {
  3409     srcText.pinIndices(srcStart, srcLength);
  3410     return doCompareCodePointOrder(start, length, srcText.fArray, srcStart, srcLength);
  3411   }
  3412 }
  3413 
  3414 inline int8_t
  3415 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
  3416 { return doCompareCodePointOrder(0, fLength, text, 0, text.fLength); }
  3417 
  3418 inline int8_t
  3419 UnicodeString::compareCodePointOrder(int32_t start,
  3420                                      int32_t _length,
  3421                                      const UnicodeString& srcText) const
  3422 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.fLength); }
  3423 
  3424 inline int8_t
  3425 UnicodeString::compareCodePointOrder(const UChar *srcChars,
  3426                                      int32_t srcLength) const
  3427 { return doCompareCodePointOrder(0, fLength, srcChars, 0, srcLength); }
  3428 
  3429 inline int8_t
  3430 UnicodeString::compareCodePointOrder(int32_t start,
  3431                                      int32_t _length,
  3432                                      const UnicodeString& srcText,
  3433                                      int32_t srcStart,
  3434                                      int32_t srcLength) const
  3435 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
  3436 
  3437 inline int8_t
  3438 UnicodeString::compareCodePointOrder(int32_t start,
  3439                                      int32_t _length,
  3440                                      const UChar *srcChars) const
  3441 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
  3442 
  3443 inline int8_t
  3444 UnicodeString::compareCodePointOrder(int32_t start,
  3445                                      int32_t _length,
  3446                                      const UChar *srcChars,
  3447                                      int32_t srcStart,
  3448                                      int32_t srcLength) const
  3449 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
  3450 
  3451 inline int8_t
  3452 UnicodeString::compareCodePointOrderBetween(int32_t start,
  3453                                             int32_t limit,
  3454                                             const UnicodeString& srcText,
  3455                                             int32_t srcStart,
  3456                                             int32_t srcLimit) const
  3457 { return doCompareCodePointOrder(start, limit - start,
  3458            srcText, srcStart, srcLimit - srcStart); }
  3459 
  3460 inline int8_t
  3461 UnicodeString::doCaseCompare(int32_t start,
  3462                              int32_t length,
  3463                              const UnicodeString &srcText,
  3464                              int32_t srcStart,
  3465                              int32_t srcLength,
  3466                              uint32_t options) const
  3467 {
  3468   if(srcText.isBogus()) {
  3469     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  3470   } else {
  3471     srcText.pinIndices(srcStart, srcLength);
  3472     return doCaseCompare(start, length, srcText.fArray, srcStart, srcLength, options);
  3473   }
  3474 }
  3475 
  3476 inline int8_t
  3477 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
  3478   return doCaseCompare(0, fLength, text, 0, text.fLength, options);
  3479 }
  3480 
  3481 inline int8_t
  3482 UnicodeString::caseCompare(int32_t start,
  3483                            int32_t _length,
  3484                            const UnicodeString &srcText,
  3485                            uint32_t options) const {
  3486   return doCaseCompare(start, _length, srcText, 0, srcText.fLength, options);
  3487 }
  3488 
  3489 inline int8_t
  3490 UnicodeString::caseCompare(const UChar *srcChars,
  3491                            int32_t srcLength,
  3492                            uint32_t options) const {
  3493   return doCaseCompare(0, fLength, srcChars, 0, srcLength, options);
  3494 }
  3495 
  3496 inline int8_t
  3497 UnicodeString::caseCompare(int32_t start,
  3498                            int32_t _length,
  3499                            const UnicodeString &srcText,
  3500                            int32_t srcStart,
  3501                            int32_t srcLength,
  3502                            uint32_t options) const {
  3503   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
  3504 }
  3505 
  3506 inline int8_t
  3507 UnicodeString::caseCompare(int32_t start,
  3508                            int32_t _length,
  3509                            const UChar *srcChars,
  3510                            uint32_t options) const {
  3511   return doCaseCompare(start, _length, srcChars, 0, _length, options);
  3512 }
  3513 
  3514 inline int8_t
  3515 UnicodeString::caseCompare(int32_t start,
  3516                            int32_t _length,
  3517                            const UChar *srcChars,
  3518                            int32_t srcStart,
  3519                            int32_t srcLength,
  3520                            uint32_t options) const {
  3521   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
  3522 }
  3523 
  3524 inline int8_t
  3525 UnicodeString::caseCompareBetween(int32_t start,
  3526                                   int32_t limit,
  3527                                   const UnicodeString &srcText,
  3528                                   int32_t srcStart,
  3529                                   int32_t srcLimit,
  3530                                   uint32_t options) const {
  3531   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
  3532 }
  3533 
  3534 inline int32_t
  3535 UnicodeString::indexOf(const UnicodeString& srcText,
  3536                int32_t srcStart,
  3537                int32_t srcLength,
  3538                int32_t start,
  3539                int32_t _length) const
  3540 {
  3541   if(!srcText.isBogus()) {
  3542     srcText.pinIndices(srcStart, srcLength);
  3543     if(srcLength > 0) {
  3544       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
  3545     }
  3546   }
  3547   return -1;
  3548 }
  3549 
  3550 inline int32_t
  3551 UnicodeString::indexOf(const UnicodeString& text) const
  3552 { return indexOf(text, 0, text.fLength, 0, fLength); }
  3553 
  3554 inline int32_t
  3555 UnicodeString::indexOf(const UnicodeString& text,
  3556                int32_t start) const {
  3557   pinIndex(start);
  3558   return indexOf(text, 0, text.fLength, start, fLength - start);
  3559 }
  3560 
  3561 inline int32_t
  3562 UnicodeString::indexOf(const UnicodeString& text,
  3563                int32_t start,
  3564                int32_t _length) const
  3565 { return indexOf(text, 0, text.fLength, start, _length); }
  3566 
  3567 inline int32_t
  3568 UnicodeString::indexOf(const UChar *srcChars,
  3569                int32_t srcLength,
  3570                int32_t start) const {
  3571   pinIndex(start);
  3572   return indexOf(srcChars, 0, srcLength, start, fLength - start);
  3573 }
  3574 
  3575 inline int32_t
  3576 UnicodeString::indexOf(const UChar *srcChars,
  3577                int32_t srcLength,
  3578                int32_t start,
  3579                int32_t _length) const
  3580 { return indexOf(srcChars, 0, srcLength, start, _length); }
  3581 
  3582 inline int32_t
  3583 UnicodeString::indexOf(UChar c,
  3584                int32_t start,
  3585                int32_t _length) const
  3586 { return doIndexOf(c, start, _length); }
  3587 
  3588 inline int32_t
  3589 UnicodeString::indexOf(UChar32 c,
  3590                int32_t start,
  3591                int32_t _length) const
  3592 { return doIndexOf(c, start, _length); }
  3593 
  3594 inline int32_t
  3595 UnicodeString::indexOf(UChar c) const
  3596 { return doIndexOf(c, 0, fLength); }
  3597 
  3598 inline int32_t
  3599 UnicodeString::indexOf(UChar32 c) const
  3600 { return indexOf(c, 0, fLength); }
  3601 
  3602 inline int32_t
  3603 UnicodeString::indexOf(UChar c,
  3604                int32_t start) const {
  3605   pinIndex(start);
  3606   return doIndexOf(c, start, fLength - start);
  3607 }
  3608 
  3609 inline int32_t
  3610 UnicodeString::indexOf(UChar32 c,
  3611                int32_t start) const {
  3612   pinIndex(start);
  3613   return indexOf(c, start, fLength - start);
  3614 }
  3615 
  3616 inline int32_t
  3617 UnicodeString::lastIndexOf(const UChar *srcChars,
  3618                int32_t srcLength,
  3619                int32_t start,
  3620                int32_t _length) const
  3621 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
  3622 
  3623 inline int32_t
  3624 UnicodeString::lastIndexOf(const UChar *srcChars,
  3625                int32_t srcLength,
  3626                int32_t start) const {
  3627   pinIndex(start);
  3628   return lastIndexOf(srcChars, 0, srcLength, start, fLength - start);
  3629 }
  3630 
  3631 inline int32_t
  3632 UnicodeString::lastIndexOf(const UnicodeString& srcText,
  3633                int32_t srcStart,
  3634                int32_t srcLength,
  3635                int32_t start,
  3636                int32_t _length) const
  3637 {
  3638   if(!srcText.isBogus()) {
  3639     srcText.pinIndices(srcStart, srcLength);
  3640     if(srcLength > 0) {
  3641       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
  3642     }
  3643   }
  3644   return -1;
  3645 }
  3646 
  3647 inline int32_t
  3648 UnicodeString::lastIndexOf(const UnicodeString& text,
  3649                int32_t start,
  3650                int32_t _length) const
  3651 { return lastIndexOf(text, 0, text.fLength, start, _length); }
  3652 
  3653 inline int32_t
  3654 UnicodeString::lastIndexOf(const UnicodeString& text,
  3655                int32_t start) const {
  3656   pinIndex(start);
  3657   return lastIndexOf(text, 0, text.fLength, start, fLength - start);
  3658 }
  3659 
  3660 inline int32_t
  3661 UnicodeString::lastIndexOf(const UnicodeString& text) const
  3662 { return lastIndexOf(text, 0, text.fLength, 0, fLength); }
  3663 
  3664 inline int32_t
  3665 UnicodeString::lastIndexOf(UChar c,
  3666                int32_t start,
  3667                int32_t _length) const
  3668 { return doLastIndexOf(c, start, _length); }
  3669 
  3670 inline int32_t
  3671 UnicodeString::lastIndexOf(UChar32 c,
  3672                int32_t start,
  3673                int32_t _length) const {
  3674   return doLastIndexOf(c, start, _length);
  3675 }
  3676 
  3677 inline int32_t
  3678 UnicodeString::lastIndexOf(UChar c) const
  3679 { return doLastIndexOf(c, 0, fLength); }
  3680 
  3681 inline int32_t
  3682 UnicodeString::lastIndexOf(UChar32 c) const {
  3683   return lastIndexOf(c, 0, fLength);
  3684 }
  3685 
  3686 inline int32_t
  3687 UnicodeString::lastIndexOf(UChar c,
  3688                int32_t start) const {
  3689   pinIndex(start);
  3690   return doLastIndexOf(c, start, fLength - start);
  3691 }
  3692 
  3693 inline int32_t
  3694 UnicodeString::lastIndexOf(UChar32 c,
  3695                int32_t start) const {
  3696   pinIndex(start);
  3697   return lastIndexOf(c, start, fLength - start);
  3698 }
  3699 
  3700 inline UBool
  3701 UnicodeString::startsWith(const UnicodeString& text) const
  3702 { return compare(0, text.fLength, text, 0, text.fLength) == 0; }
  3703 
  3704 inline UBool
  3705 UnicodeString::startsWith(const UnicodeString& srcText,
  3706               int32_t srcStart,
  3707               int32_t srcLength) const
  3708 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
  3709 
  3710 inline UBool
  3711 UnicodeString::startsWith(const UChar *srcChars,
  3712               int32_t srcLength) const
  3713 { return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }
  3714 
  3715 inline UBool
  3716 UnicodeString::startsWith(const UChar *srcChars,
  3717               int32_t srcStart,
  3718               int32_t srcLength) const
  3719 { return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}
  3720 
  3721 inline UBool
  3722 UnicodeString::endsWith(const UnicodeString& text) const
  3723 { return doCompare(fLength - text.fLength, text.fLength,
  3724            text, 0, text.fLength) == 0; }
  3725 
  3726 inline UBool
  3727 UnicodeString::endsWith(const UnicodeString& srcText,
  3728             int32_t srcStart,
  3729             int32_t srcLength) const {
  3730   srcText.pinIndices(srcStart, srcLength);
  3731   return doCompare(fLength - srcLength, srcLength,
  3732                    srcText, srcStart, srcLength) == 0;
  3733 }
  3734 
  3735 inline UBool
  3736 UnicodeString::endsWith(const UChar *srcChars,
  3737             int32_t srcLength) const {
  3738   if(srcLength < 0) {
  3739     srcLength = u_strlen(srcChars);
  3740   }
  3741   return doCompare(fLength - srcLength, srcLength,
  3742                    srcChars, 0, srcLength) == 0;
  3743 }
  3744 
  3745 inline UBool
  3746 UnicodeString::endsWith(const UChar *srcChars,
  3747             int32_t srcStart,
  3748             int32_t srcLength) const {
  3749   if(srcLength < 0) {
  3750     srcLength = u_strlen(srcChars + srcStart);
  3751   }
  3752   return doCompare(fLength - srcLength, srcLength,
  3753                    srcChars, srcStart, srcLength) == 0;
  3754 }
  3755 
  3756 //========================================
  3757 // replace
  3758 //========================================
  3759 inline UnicodeString&
  3760 UnicodeString::replace(int32_t start,
  3761                int32_t _length,
  3762                const UnicodeString& srcText)
  3763 { return doReplace(start, _length, srcText, 0, srcText.fLength); }
  3764 
  3765 inline UnicodeString&
  3766 UnicodeString::replace(int32_t start,
  3767                int32_t _length,
  3768                const UnicodeString& srcText,
  3769                int32_t srcStart,
  3770                int32_t srcLength)
  3771 { return doReplace(start, _length, srcText, srcStart, srcLength); }
  3772 
  3773 inline UnicodeString&
  3774 UnicodeString::replace(int32_t start,
  3775                int32_t _length,
  3776                const UChar *srcChars,
  3777                int32_t srcLength)
  3778 { return doReplace(start, _length, srcChars, 0, srcLength); }
  3779 
  3780 inline UnicodeString&
  3781 UnicodeString::replace(int32_t start,
  3782                int32_t _length,
  3783                const UChar *srcChars,
  3784                int32_t srcStart,
  3785                int32_t srcLength)
  3786 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
  3787 
  3788 inline UnicodeString&
  3789 UnicodeString::replace(int32_t start,
  3790                int32_t _length,
  3791                UChar srcChar)
  3792 { return doReplace(start, _length, &srcChar, 0, 1); }
  3793 
  3794 inline UnicodeString&
  3795 UnicodeString::replace(int32_t start,
  3796                int32_t _length,
  3797                UChar32 srcChar) {
  3798   UChar buffer[U16_MAX_LENGTH];
  3799   int32_t count = 0;
  3800   UBool isError = FALSE;
  3801   U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
  3802   return doReplace(start, _length, buffer, 0, count);
  3803 }
  3804 
  3805 inline UnicodeString&
  3806 UnicodeString::replaceBetween(int32_t start,
  3807                   int32_t limit,
  3808                   const UnicodeString& srcText)
  3809 { return doReplace(start, limit - start, srcText, 0, srcText.fLength); }
  3810 
  3811 inline UnicodeString&
  3812 UnicodeString::replaceBetween(int32_t start,
  3813                   int32_t limit,
  3814                   const UnicodeString& srcText,
  3815                   int32_t srcStart,
  3816                   int32_t srcLimit)
  3817 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
  3818 
  3819 inline UnicodeString&
  3820 UnicodeString::findAndReplace(const UnicodeString& oldText,
  3821                   const UnicodeString& newText)
  3822 { return findAndReplace(0, fLength, oldText, 0, oldText.fLength,
  3823             newText, 0, newText.fLength); }
  3824 
  3825 inline UnicodeString&
  3826 UnicodeString::findAndReplace(int32_t start,
  3827                   int32_t _length,
  3828                   const UnicodeString& oldText,
  3829                   const UnicodeString& newText)
  3830 { return findAndReplace(start, _length, oldText, 0, oldText.fLength,
  3831             newText, 0, newText.fLength); }
  3832 
  3833 // ============================
  3834 // extract
  3835 // ============================
  3836 inline void
  3837 UnicodeString::doExtract(int32_t start,
  3838              int32_t _length,
  3839              UnicodeString& target) const
  3840 { target.replace(0, target.fLength, *this, start, _length); }
  3841 
  3842 inline void
  3843 UnicodeString::extract(int32_t start,
  3844                int32_t _length,
  3845                UChar *target,
  3846                int32_t targetStart) const
  3847 { doExtract(start, _length, target, targetStart); }
  3848 
  3849 inline void
  3850 UnicodeString::extract(int32_t start,
  3851                int32_t _length,
  3852                UnicodeString& target) const
  3853 { doExtract(start, _length, target); }
  3854 
  3855 #if !UCONFIG_NO_CONVERSION
  3856 
  3857 inline int32_t
  3858 UnicodeString::extract(int32_t start,
  3859                int32_t _length,
  3860                char *dst,
  3861                const char *codepage) const
  3862 
  3863 {
  3864   // This dstSize value will be checked explicitly
  3865   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
  3866 }
  3867 
  3868 #endif
  3869 
  3870 inline void
  3871 UnicodeString::extractBetween(int32_t start,
  3872                   int32_t limit,
  3873                   UChar *dst,
  3874                   int32_t dstStart) const {
  3875   pinIndex(start);
  3876   pinIndex(limit);
  3877   doExtract(start, limit - start, dst, dstStart);
  3878 }
  3879 
  3880 inline UChar
  3881 UnicodeString::doCharAt(int32_t offset) const
  3882 {
  3883   if((uint32_t)offset < (uint32_t)fLength) {
  3884     return fArray[offset];
  3885   } else {
  3886     return kInvalidUChar;
  3887   }
  3888 }
  3889 
  3890 inline UChar
  3891 UnicodeString::charAt(int32_t offset) const
  3892 { return doCharAt(offset); }
  3893 
  3894 inline UChar
  3895 UnicodeString::operator[] (int32_t offset) const
  3896 { return doCharAt(offset); }
  3897 
  3898 inline UChar32
  3899 UnicodeString::char32At(int32_t offset) const
  3900 {
  3901   if((uint32_t)offset < (uint32_t)fLength) {
  3902     UChar32 c;
  3903     U16_GET(fArray, 0, offset, fLength, c);
  3904     return c;
  3905   } else {
  3906     return kInvalidUChar;
  3907   }
  3908 }
  3909 
  3910 inline int32_t
  3911 UnicodeString::getChar32Start(int32_t offset) const {
  3912   if((uint32_t)offset < (uint32_t)fLength) {
  3913     U16_SET_CP_START(fArray, 0, offset);
  3914     return offset;
  3915   } else {
  3916     return 0;
  3917   }
  3918 }
  3919 
  3920 inline int32_t
  3921 UnicodeString::getChar32Limit(int32_t offset) const {
  3922   if((uint32_t)offset < (uint32_t)fLength) {
  3923     U16_SET_CP_LIMIT(fArray, 0, offset, fLength);
  3924     return offset;
  3925   } else {
  3926     return fLength;
  3927   }
  3928 }
  3929 
  3930 inline UBool
  3931 UnicodeString::isEmpty() const {
  3932   return fLength == 0;
  3933 }
  3934 
  3935 //========================================
  3936 // Write implementation methods
  3937 //========================================
  3938 inline const UChar *
  3939 UnicodeString::getTerminatedBuffer() {
  3940   if(fFlags&(kIsBogus|kOpenGetBuffer)) {
  3941     return 0;
  3942   } else if(fLength<fCapacity && fArray[fLength]==0) {
  3943     return fArray;
  3944   } else if(cloneArrayIfNeeded(fLength+1)) {
  3945     fArray[fLength]=0;
  3946     return fArray;
  3947   } else {
  3948     return 0;
  3949   }
  3950 }
  3951 
  3952 inline UnicodeString&
  3953 UnicodeString::operator= (UChar ch)
  3954 { return doReplace(0, fLength, &ch, 0, 1); }
  3955 
  3956 inline UnicodeString&
  3957 UnicodeString::operator= (UChar32 ch)
  3958 { return replace(0, fLength, ch); }
  3959 
  3960 inline UnicodeString&
  3961 UnicodeString::setTo(const UnicodeString& srcText,
  3962              int32_t srcStart,
  3963              int32_t srcLength)
  3964 {
  3965   unBogus();
  3966   return doReplace(0, fLength, srcText, srcStart, srcLength);
  3967 }
  3968 
  3969 inline UnicodeString&
  3970 UnicodeString::setTo(const UnicodeString& srcText,
  3971              int32_t srcStart)
  3972 {
  3973   unBogus();
  3974   srcText.pinIndex(srcStart);
  3975   return doReplace(0, fLength, srcText, srcStart, srcText.fLength - srcStart);
  3976 }
  3977 
  3978 inline UnicodeString&
  3979 UnicodeString::setTo(const UnicodeString& srcText)
  3980 {
  3981   unBogus();
  3982   return doReplace(0, fLength, srcText, 0, srcText.fLength);
  3983 }
  3984 
  3985 inline UnicodeString&
  3986 UnicodeString::setTo(const UChar *srcChars,
  3987              int32_t srcLength)
  3988 {
  3989   unBogus();
  3990   return doReplace(0, fLength, srcChars, 0, srcLength);
  3991 }
  3992 
  3993 inline UnicodeString&
  3994 UnicodeString::setTo(UChar srcChar)
  3995 {
  3996   unBogus();
  3997   return doReplace(0, fLength, &srcChar, 0, 1);
  3998 }
  3999 
  4000 inline UnicodeString&
  4001 UnicodeString::setTo(UChar32 srcChar)
  4002 {
  4003   unBogus();
  4004   return replace(0, fLength, srcChar);
  4005 }
  4006 
  4007 inline UnicodeString&
  4008 UnicodeString::operator+= (UChar ch)
  4009 { return doReplace(fLength, 0, &ch, 0, 1); }
  4010 
  4011 inline UnicodeString&
  4012 UnicodeString::operator+= (UChar32 ch) {
  4013   UChar buffer[U16_MAX_LENGTH];
  4014   int32_t _length = 0;
  4015   UBool isError = FALSE;
  4016   U16_APPEND(buffer, _length, U16_MAX_LENGTH, ch, isError);
  4017   return doReplace(fLength, 0, buffer, 0, _length);
  4018 }
  4019 
  4020 inline UnicodeString&
  4021 UnicodeString::operator+= (const UnicodeString& srcText)
  4022 { return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
  4023 
  4024 inline UnicodeString&
  4025 UnicodeString::append(const UnicodeString& srcText,
  4026               int32_t srcStart,
  4027               int32_t srcLength)
  4028 { return doReplace(fLength, 0, srcText, srcStart, srcLength); }
  4029 
  4030 inline UnicodeString&
  4031 UnicodeString::append(const UnicodeString& srcText)
  4032 { return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
  4033 
  4034 inline UnicodeString&
  4035 UnicodeString::append(const UChar *srcChars,
  4036               int32_t srcStart,
  4037               int32_t srcLength)
  4038 { return doReplace(fLength, 0, srcChars, srcStart, srcLength); }
  4039 
  4040 inline UnicodeString&
  4041 UnicodeString::append(const UChar *srcChars,
  4042               int32_t srcLength)
  4043 { return doReplace(fLength, 0, srcChars, 0, srcLength); }
  4044 
  4045 inline UnicodeString&
  4046 UnicodeString::append(UChar srcChar)
  4047 { return doReplace(fLength, 0, &srcChar, 0, 1); }
  4048 
  4049 inline UnicodeString&
  4050 UnicodeString::append(UChar32 srcChar) {
  4051   UChar buffer[U16_MAX_LENGTH];
  4052   int32_t _length = 0;
  4053   UBool isError = FALSE;
  4054   U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
  4055   return doReplace(fLength, 0, buffer, 0, _length);
  4056 }
  4057 
  4058 inline UnicodeString&
  4059 UnicodeString::insert(int32_t start,
  4060               const UnicodeString& srcText,
  4061               int32_t srcStart,
  4062               int32_t srcLength)
  4063 { return doReplace(start, 0, srcText, srcStart, srcLength); }
  4064 
  4065 inline UnicodeString&
  4066 UnicodeString::insert(int32_t start,
  4067               const UnicodeString& srcText)
  4068 { return doReplace(start, 0, srcText, 0, srcText.fLength); }
  4069 
  4070 inline UnicodeString&
  4071 UnicodeString::insert(int32_t start,
  4072               const UChar *srcChars,
  4073               int32_t srcStart,
  4074               int32_t srcLength)
  4075 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
  4076 
  4077 inline UnicodeString&
  4078 UnicodeString::insert(int32_t start,
  4079               const UChar *srcChars,
  4080               int32_t srcLength)
  4081 { return doReplace(start, 0, srcChars, 0, srcLength); }
  4082 
  4083 inline UnicodeString&
  4084 UnicodeString::insert(int32_t start,
  4085               UChar srcChar)
  4086 { return doReplace(start, 0, &srcChar, 0, 1); }
  4087 
  4088 inline UnicodeString&
  4089 UnicodeString::insert(int32_t start,
  4090               UChar32 srcChar)
  4091 { return replace(start, 0, srcChar); }
  4092 
  4093 
  4094 inline UnicodeString&
  4095 UnicodeString::remove()
  4096 {
  4097   // remove() of a bogus string makes the string empty and non-bogus
  4098   if(isBogus()) {
  4099     unBogus();
  4100   } else {
  4101     fLength = 0;
  4102   }
  4103   return *this;
  4104 }
  4105 
  4106 inline UnicodeString&
  4107 UnicodeString::remove(int32_t start,
  4108              int32_t _length)
  4109 {
  4110   if(start <= 0 && _length == INT32_MAX) {
  4111     // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
  4112     return remove();
  4113   } else {
  4114     return doReplace(start, _length, NULL, 0, 0);
  4115   }
  4116 }
  4117 
  4118 inline UnicodeString&
  4119 UnicodeString::removeBetween(int32_t start,
  4120                 int32_t limit)
  4121 { return doReplace(start, limit - start, NULL, 0, 0); }
  4122 
  4123 inline UBool
  4124 UnicodeString::truncate(int32_t targetLength)
  4125 {
  4126   if(isBogus() && targetLength == 0) {
  4127     // truncate(0) of a bogus string makes the string empty and non-bogus
  4128     unBogus();
  4129     return FALSE;
  4130   } else if((uint32_t)targetLength < (uint32_t)fLength) {
  4131     fLength = targetLength;
  4132     return TRUE;
  4133   } else {
  4134     return FALSE;
  4135   }
  4136 }
  4137 
  4138 inline UnicodeString&
  4139 UnicodeString::reverse()
  4140 { return doReverse(0, fLength); }
  4141 
  4142 inline UnicodeString&
  4143 UnicodeString::reverse(int32_t start,
  4144                int32_t _length)
  4145 { return doReverse(start, _length); }
  4146 
  4147 U_NAMESPACE_END
  4148 
  4149 #endif