Symaptic: os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/unistr.h@260cb5ec6c19

     1 /*

     2 **********************************************************************

     3 *   Copyright (C) 1998-2005, International Business Machines

     4 *   Corporation and others.  All Rights Reserved.

     5 **********************************************************************

6 *

     7 * File unistr.h

8 *

     9 * Modification History:

    10 *

    11 *   Date        Name        Description

    12 *   09/25/98    stephen     Creation.

    13 *   11/11/98    stephen     Changed per 11/9 code review.

    14 *   04/20/99    stephen     Overhauled per 4/16 code review.

    15 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method

    16 *                           handleReplaceBetween(); other methods unchanged.

    17 *   06/25/01    grhoten     Remove dependency on iostream.

    18 ******************************************************************************

    19 */

    21 #ifndef UNISTR_H

    22 #define UNISTR_H

    24 /**

    25  * \file

    26  * \brief C++ API: Unicode String

    27  */

    29 #include "unicode/rep.h"

    31 struct UConverter;          // unicode/ucnv.h

    32 class  StringThreadTest;

    34 #ifndef U_COMPARE_CODE_POINT_ORDER

    35 /* see also ustring.h and unorm.h */

    36 /**

    37  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:

    38  * Compare strings in code point order instead of code unit order.

    39  * @stable ICU 2.2

    40  */

    41 #define U_COMPARE_CODE_POINT_ORDER  0x8000

    42 #endif

    44 #ifndef USTRING_H

    45 /* see ustring.h */

    46 U_STABLE int32_t U_EXPORT2

    47 u_strlen(const UChar *s);

    48 #endif

    50 U_NAMESPACE_BEGIN

    52 class Locale;               // unicode/locid.h

    53 class StringCharacterIterator;

    54 class BreakIterator;        // unicode/brkiter.h

    56 /* The <iostream> include has been moved to unicode/ustream.h */

    58 /**

    59  * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor

    60  * which constructs a Unicode string from an invariant-character char * string.

    61  * About invariant characters see utypes.h.

    62  * This constructor has no runtime dependency on conversion code and is

    63  * therefore recommended over ones taking a charset name string

    64  * (where the empty string "" indicates invariant-character conversion).

    65  *

    66  * @draft ICU 3.2

    67  */

    68 #define US_INV UnicodeString::kInvariant

    70 /**

    71  * Unicode String literals in C++.

    72  * Dependent on the platform properties, different UnicodeString

    73  * constructors should be used to create a UnicodeString object from

    74  * a string literal.

    75  * The macros are defined for maximum performance.

    76  * They work only for strings that contain "invariant characters", i.e.,

    77  * only latin letters, digits, and some punctuation.

    78  * See utypes.h for details.

    79  *

    80  * The string parameter must be a C string literal.

    81  * The length of the string, not including the terminating

    82  * <code>NUL</code>, must be specified as a constant.

    83  * The U_STRING_DECL macro should be invoked exactly once for one

    84  * such string variable before it is used.

    85  * @stable ICU 2.0

    86  */

    87 #if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))

    88 #   define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)L ## cs, _length)

    89 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY

    90 #   define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)cs, _length)

    91 #else

    92 #   define UNICODE_STRING(cs, _length) UnicodeString(cs, _length, US_INV)

    93 #endif

    95 /**

    96  * Unicode String literals in C++.

    97  * Dependent on the platform properties, different UnicodeString

    98  * constructors should be used to create a UnicodeString object from

    99  * a string literal.

   100  * The macros are defined for improved performance.

   101  * They work only for strings that contain "invariant characters", i.e.,

   102  * only latin letters, digits, and some punctuation.

   103  * See utypes.h for details.

   104  *

   105  * The string parameter must be a C string literal.

   106  * @stable ICU 2.0

   107  */

   108 #if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))

   109 #   define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)L ## cs, -1)

   110 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY

   111 #   define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)cs, -1)

   112 #else

   113 #   define UNICODE_STRING_SIMPLE(cs) UnicodeString(cs, -1, US_INV)

   114 #endif

   116 /**

   117  * UnicodeString is a string class that stores Unicode characters directly and provides

   118  * similar functionality as the Java String and StringBuffer classes.

   119  * It is a concrete implementation of the abstract class Replaceable (for transliteration).

   120  *

   121  * The UnicodeString class is not suitable for subclassing.

   122  *

   123  * <p>For an overview of Unicode strings in C and C++ see the

   124  * <a href="http://icu.sourceforge.net/userguide/strings.html">User Guide Strings chapter</a>.</p>

   125  *

   126  * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.

   127  * A Unicode character may be stored with either one code unit

   128  * (the most common case) or with a matched pair of special code units

   129  * ("surrogates"). The data type for code units is UChar.

   130  * For single-character handling, a Unicode character code <em>point</em> is a value

   131  * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>

   132  *

   133  * <p>Indexes and offsets into and lengths of strings always count code units, not code points.

   134  * This is the same as with multi-byte char* strings in traditional string handling.

   135  * Operations on partial strings typically do not test for code point boundaries.

   136  * If necessary, the user needs to take care of such boundaries by testing for the code unit

   137  * values or by using functions like

   138  * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()

   139  * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>

   140  *

   141  * UnicodeString methods are more lenient with regard to input parameter values

   142  * than other ICU APIs. In particular:

   143  * - If indexes are out of bounds for a UnicodeString object

   144  *   (<0 or >length()) then they are "pinned" to the nearest boundary.

   145  * - If primitive string pointer values (e.g., const UChar * or char *)

   146  *   for input strings are NULL, then those input string parameters are treated

   147  *   as if they pointed to an empty string.

   148  *   However, this is <em>not</em> the case for char * parameters for charset names

   149  *   or other IDs.

   150  * - Most UnicodeString methods do not take a UErrorCode parameter because

   151  *   there are usually very few opportunities for failure other than a shortage

   152  *   of memory, error codes in low-level C++ string methods would be inconvenient,

   153  *   and the error code as the last parameter (ICU convention) would prevent

   154  *   the use of default parameter values.

   155  *   Instead, such methods set the UnicodeString into a "bogus" state

   156  *   (see isBogus()) if an error occurs.

   157  *

   158  * In string comparisons, two UnicodeString objects that are both "bogus"

   159  * compare equal (to be transitive and prevent endless loops in sorting),

   160  * and a "bogus" string compares less than any non-"bogus" one.

   161  *

   162  * Const UnicodeString methods are thread-safe. Multiple threads can use

   163  * const methods on the same UnicodeString object simultaneously,

   164  * but non-const methods must not be called concurrently (in multiple threads)

   165  * with any other (const or non-const) methods.

   166  *

   167  * Similarly, const UnicodeString & parameters are thread-safe.

   168  * One object may be passed in as such a parameter concurrently in multiple threads.

   169  * This includes the const UnicodeString & parameters for

   170  * copy construction, assignment, and cloning.

   171  *

   172  * <p>UnicodeString uses several storage methods.

   173  * String contents can be stored inside the UnicodeString object itself,

   174  * in an allocated and shared buffer, or in an outside buffer that is "aliased".

   175  * Most of this is done transparently, but careful aliasing in particular provides

   176  * significant performance improvements.

   177  * Also, the internal buffer is accessible via special functions.

   178  * For details see the

   179  * <a href="http://icu.sourceforge.net/userguide/strings.html">User Guide Strings chapter</a>.</p>

   180  *

   181  * @see utf.h

   182  * @see CharacterIterator

   183  * @stable ICU 2.0

   184  */

   185 class U_COMMON_API UnicodeString : public Replaceable

   186 {

   187 public:

   189   /**

   190    * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor

   191    * which constructs a Unicode string from an invariant-character char * string.

   192    * Use the macro US_INV instead of the full qualification for this value.

   193    *

   194    * @see US_INV

   195    * @draft ICU 3.2

   196    */

   197   enum EInvariant {

   198     /**

   199      * @see EInvariant

   200      * @draft ICU 3.2

   201      */

   202     kInvariant

   203   };

   205   //========================================

   206   // Read-only operations

   207   //========================================

   209   /* Comparison - bitwise only - for international comparison use collation */

   211   /**

   212    * Equality operator. Performs only bitwise comparison.

   213    * @param text The UnicodeString to compare to this one.

   214    * @return TRUE if <TT>text</TT> contains the same characters as this one,

   215    * FALSE otherwise.

   216    * @stable ICU 2.0

   217    */

   218   inline UBool operator== (const UnicodeString& text) const;

   220   /**

   221    * Inequality operator. Performs only bitwise comparison.

   222    * @param text The UnicodeString to compare to this one.

   223    * @return FALSE if <TT>text</TT> contains the same characters as this one,

   224    * TRUE otherwise.

   225    * @stable ICU 2.0

   226    */

   227   inline UBool operator!= (const UnicodeString& text) const;

   229   /**

   230    * Greater than operator. Performs only bitwise comparison.

   231    * @param text The UnicodeString to compare to this one.

   232    * @return TRUE if the characters in this are bitwise

   233    * greater than the characters in <code>text</code>, FALSE otherwise

   234    * @stable ICU 2.0

   235    */

   236   inline UBool operator> (const UnicodeString& text) const;

   238   /**

   239    * Less than operator. Performs only bitwise comparison.

   240    * @param text The UnicodeString to compare to this one.

   241    * @return TRUE if the characters in this are bitwise

   242    * less than the characters in <code>text</code>, FALSE otherwise

   243    * @stable ICU 2.0

   244    */

   245   inline UBool operator< (const UnicodeString& text) const;

   247   /**

   248    * Greater than or equal operator. Performs only bitwise comparison.

   249    * @param text The UnicodeString to compare to this one.

   250    * @return TRUE if the characters in this are bitwise

   251    * greater than or equal to the characters in <code>text</code>, FALSE otherwise

   252    * @stable ICU 2.0

   253    */

   254   inline UBool operator>= (const UnicodeString& text) const;

   256   /**

   257    * Less than or equal operator. Performs only bitwise comparison.

   258    * @param text The UnicodeString to compare to this one.

   259    * @return TRUE if the characters in this are bitwise

   260    * less than or equal to the characters in <code>text</code>, FALSE otherwise

   261    * @stable ICU 2.0

   262    */

   263   inline UBool operator<= (const UnicodeString& text) const;

   265   /**

   266    * Compare the characters bitwise in this UnicodeString to

   267    * the characters in <code>text</code>.

   268    * @param text The UnicodeString to compare to this one.

   269    * @return The result of bitwise character comparison: 0 if this

   270    * contains the same characters as <code>text</code>, -1 if the characters in

   271    * this are bitwise less than the characters in <code>text</code>, +1 if the

   272    * characters in this are bitwise greater than the characters

   273    * in <code>text</code>.

   274    * @stable ICU 2.0

   275    */

   276   inline int8_t compare(const UnicodeString& text) const;

   278   /**

   279    * Compare the characters bitwise in the range

   280    * [<TT>start</TT>, <TT>start + length</TT>) with the characters

   281    * in <TT>text</TT>

   282    * @param start the offset at which the compare operation begins

   283    * @param length the number of characters of text to compare.

   284    * @param text the other text to be compared against this string.

   285    * @return The result of bitwise character comparison: 0 if this

   286    * contains the same characters as <code>text</code>, -1 if the characters in

   287    * this are bitwise less than the characters in <code>text</code>, +1 if the

   288    * characters in this are bitwise greater than the characters

   289    * in <code>text</code>.

   290    * @stable ICU 2.0

   291    */

   292   inline int8_t compare(int32_t start,

   293          int32_t length,

   294          const UnicodeString& text) const;

   296   /**

   297    * Compare the characters bitwise in the range

   298    * [<TT>start</TT>, <TT>start + length</TT>) with the characters

   299    * in <TT>srcText</TT> in the range

   300    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

   301    * @param start the offset at which the compare operation begins

   302    * @param length the number of characters in this to compare.

   303    * @param srcText the text to be compared

   304    * @param srcStart the offset into <TT>srcText</TT> to start comparison

   305    * @param srcLength the number of characters in <TT>src</TT> to compare

   306    * @return The result of bitwise character comparison: 0 if this

   307    * contains the same characters as <code>srcText</code>, -1 if the characters in

   308    * this are bitwise less than the characters in <code>srcText</code>, +1 if the

   309    * characters in this are bitwise greater than the characters

   310    * in <code>srcText</code>.

   311    * @stable ICU 2.0

   312    */

   313    inline int8_t compare(int32_t start,

   314          int32_t length,

   315          const UnicodeString& srcText,

   316          int32_t srcStart,

   317          int32_t srcLength) const;

   319   /**

   320    * Compare the characters bitwise in this UnicodeString with the first

   321    * <TT>srcLength</TT> characters in <TT>srcChars</TT>.

   322    * @param srcChars The characters to compare to this UnicodeString.

   323    * @param srcLength the number of characters in <TT>srcChars</TT> to compare

   324    * @return The result of bitwise character comparison: 0 if this

   325    * contains the same characters as <code>srcChars</code>, -1 if the characters in

   326    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the

   327    * characters in this are bitwise greater than the characters

   328    * in <code>srcChars</code>.

   329    * @stable ICU 2.0

   330    */

   331   inline int8_t compare(const UChar *srcChars,

   332          int32_t srcLength) const;

   334   /**

   335    * Compare the characters bitwise in the range

   336    * [<TT>start</TT>, <TT>start + length</TT>) with the first

   337    * <TT>length</TT> characters in <TT>srcChars</TT>

   338    * @param start the offset at which the compare operation begins

   339    * @param length the number of characters to compare.

   340    * @param srcChars the characters to be compared

   341    * @return The result of bitwise character comparison: 0 if this

   342    * contains the same characters as <code>srcChars</code>, -1 if the characters in

   343    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the

   344    * characters in this are bitwise greater than the characters

   345    * in <code>srcChars</code>.

   346    * @stable ICU 2.0

   347    */

   348   inline int8_t compare(int32_t start,

   349          int32_t length,

   350          const UChar *srcChars) const;

   352   /**

   353    * Compare the characters bitwise in the range

   354    * [<TT>start</TT>, <TT>start + length</TT>) with the characters

   355    * in <TT>srcChars</TT> in the range

   356    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

   357    * @param start the offset at which the compare operation begins

   358    * @param length the number of characters in this to compare

   359    * @param srcChars the characters to be compared

   360    * @param srcStart the offset into <TT>srcChars</TT> to start comparison

   361    * @param srcLength the number of characters in <TT>srcChars</TT> to compare

   362    * @return The result of bitwise character comparison: 0 if this

   363    * contains the same characters as <code>srcChars</code>, -1 if the characters in

   364    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the

   365    * characters in this are bitwise greater than the characters

   366    * in <code>srcChars</code>.

   367    * @stable ICU 2.0

   368    */

   369   inline int8_t compare(int32_t start,

   370          int32_t length,

   371          const UChar *srcChars,

   372          int32_t srcStart,

   373          int32_t srcLength) const;

   375   /**

   376    * Compare the characters bitwise in the range

   377    * [<TT>start</TT>, <TT>limit</TT>) with the characters

   378    * in <TT>srcText</TT> in the range

   379    * [<TT>srcStart</TT>, <TT>srcLimit</TT>).

   380    * @param start the offset at which the compare operation begins

   381    * @param limit the offset immediately following the compare operation

   382    * @param srcText the text to be compared

   383    * @param srcStart the offset into <TT>srcText</TT> to start comparison

   384    * @param srcLimit the offset into <TT>srcText</TT> to limit comparison

   385    * @return The result of bitwise character comparison: 0 if this

   386    * contains the same characters as <code>srcText</code>, -1 if the characters in

   387    * this are bitwise less than the characters in <code>srcText</code>, +1 if the

   388    * characters in this are bitwise greater than the characters

   389    * in <code>srcText</code>.

   390    * @stable ICU 2.0

   391    */

   392   inline int8_t compareBetween(int32_t start,

   393             int32_t limit,

   394             const UnicodeString& srcText,

   395             int32_t srcStart,

   396             int32_t srcLimit) const;

   398   /**

   399    * Compare two Unicode strings in code point order.

   400    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work

   401    * if supplementary characters are present:

   402    *

   403    * In UTF-16, supplementary characters (with code points U+10000 and above) are

   404    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

   405    * which means that they compare as less than some other BMP characters like U+feff.

   406    * This function compares Unicode strings in code point order.

   407    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

   408    *

   409    * @param text Another string to compare this one to.

   410    * @return a negative/zero/positive integer corresponding to whether

   411    * this string is less than/equal to/greater than the second one

   412    * in code point order

   413    * @stable ICU 2.0

   414    */

   415   inline int8_t compareCodePointOrder(const UnicodeString& text) const;

   417   /**

   418    * Compare two Unicode strings in code point order.

   419    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work

   420    * if supplementary characters are present:

   421    *

   422    * In UTF-16, supplementary characters (with code points U+10000 and above) are

   423    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

   424    * which means that they compare as less than some other BMP characters like U+feff.

   425    * This function compares Unicode strings in code point order.

   426    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

   427    *

   428    * @param start The start offset in this string at which the compare operation begins.

   429    * @param length The number of code units from this string to compare.

   430    * @param srcText Another string to compare this one to.

   431    * @return a negative/zero/positive integer corresponding to whether

   432    * this string is less than/equal to/greater than the second one

   433    * in code point order

   434    * @stable ICU 2.0

   435    */

   436   inline int8_t compareCodePointOrder(int32_t start,

   437                                       int32_t length,

   438                                       const UnicodeString& srcText) const;

   440   /**

   441    * Compare two Unicode strings in code point order.

   442    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work

   443    * if supplementary characters are present:

   444    *

   445    * In UTF-16, supplementary characters (with code points U+10000 and above) are

   446    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

   447    * which means that they compare as less than some other BMP characters like U+feff.

   448    * This function compares Unicode strings in code point order.

   449    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

   450    *

   451    * @param start The start offset in this string at which the compare operation begins.

   452    * @param length The number of code units from this string to compare.

   453    * @param srcText Another string to compare this one to.

   454    * @param srcStart The start offset in that string at which the compare operation begins.

   455    * @param srcLength The number of code units from that string to compare.

   456    * @return a negative/zero/positive integer corresponding to whether

   457    * this string is less than/equal to/greater than the second one

   458    * in code point order

   459    * @stable ICU 2.0

   460    */

   461    inline int8_t compareCodePointOrder(int32_t start,

   462                                        int32_t length,

   463                                        const UnicodeString& srcText,

   464                                        int32_t srcStart,

   465                                        int32_t srcLength) const;

   467   /**

   468    * Compare two Unicode strings in code point order.

   469    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work

   470    * if supplementary characters are present:

   471    *

   472    * In UTF-16, supplementary characters (with code points U+10000 and above) are

   473    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

   474    * which means that they compare as less than some other BMP characters like U+feff.

   475    * This function compares Unicode strings in code point order.

   476    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

   477    *

   478    * @param srcChars A pointer to another string to compare this one to.

   479    * @param srcLength The number of code units from that string to compare.

   480    * @return a negative/zero/positive integer corresponding to whether

   481    * this string is less than/equal to/greater than the second one

   482    * in code point order

   483    * @stable ICU 2.0

   484    */

   485   inline int8_t compareCodePointOrder(const UChar *srcChars,

   486                                       int32_t srcLength) const;

   488   /**

   489    * Compare two Unicode strings in code point order.

   490    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work

   491    * if supplementary characters are present:

   492    *

   493    * In UTF-16, supplementary characters (with code points U+10000 and above) are

   494    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

   495    * which means that they compare as less than some other BMP characters like U+feff.

   496    * This function compares Unicode strings in code point order.

   497    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

   498    *

   499    * @param start The start offset in this string at which the compare operation begins.

   500    * @param length The number of code units from this string to compare.

   501    * @param srcChars A pointer to another string to compare this one to.

   502    * @return a negative/zero/positive integer corresponding to whether

   503    * this string is less than/equal to/greater than the second one

   504    * in code point order

   505    * @stable ICU 2.0

   506    */

   507   inline int8_t compareCodePointOrder(int32_t start,

   508                                       int32_t length,

   509                                       const UChar *srcChars) const;

   511   /**

   512    * Compare two Unicode strings in code point order.

   513    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work

   514    * if supplementary characters are present:

   515    *

   516    * In UTF-16, supplementary characters (with code points U+10000 and above) are

   517    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

   518    * which means that they compare as less than some other BMP characters like U+feff.

   519    * This function compares Unicode strings in code point order.

   520    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

   521    *

   522    * @param start The start offset in this string at which the compare operation begins.

   523    * @param length The number of code units from this string to compare.

   524    * @param srcChars A pointer to another string to compare this one to.

   525    * @param srcStart The start offset in that string at which the compare operation begins.

   526    * @param srcLength The number of code units from that string to compare.

   527    * @return a negative/zero/positive integer corresponding to whether

   528    * this string is less than/equal to/greater than the second one

   529    * in code point order

   530    * @stable ICU 2.0

   531    */

   532   inline int8_t compareCodePointOrder(int32_t start,

   533                                       int32_t length,

   534                                       const UChar *srcChars,

   535                                       int32_t srcStart,

   536                                       int32_t srcLength) const;

   538   /**

   539    * Compare two Unicode strings in code point order.

   540    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work

   541    * if supplementary characters are present:

   542    *

   543    * In UTF-16, supplementary characters (with code points U+10000 and above) are

   544    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

   545    * which means that they compare as less than some other BMP characters like U+feff.

   546    * This function compares Unicode strings in code point order.

   547    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

   548    *

   549    * @param start The start offset in this string at which the compare operation begins.

   550    * @param limit The offset after the last code unit from this string to compare.

   551    * @param srcText Another string to compare this one to.

   552    * @param srcStart The start offset in that string at which the compare operation begins.

   553    * @param srcLimit The offset after the last code unit from that string to compare.

   554    * @return a negative/zero/positive integer corresponding to whether

   555    * this string is less than/equal to/greater than the second one

   556    * in code point order

   557    * @stable ICU 2.0

   558    */

   559   inline int8_t compareCodePointOrderBetween(int32_t start,

   560                                              int32_t limit,

   561                                              const UnicodeString& srcText,

   562                                              int32_t srcStart,

   563                                              int32_t srcLimit) const;

   565   /**

   566    * Compare two strings case-insensitively using full case folding.

   567    * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).

   568    *

   569    * @param text Another string to compare this one to.

   570    * @param options A bit set of options:

   571    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   572    *     Comparison in code unit order with default case folding.

   573    *

   574    *   - U_COMPARE_CODE_POINT_ORDER

   575    *     Set to choose code point order instead of code unit order

   576    *     (see u_strCompare for details).

   577    *

   578    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   579    *

   580    * @return A negative, zero, or positive integer indicating the comparison result.

   581    * @stable ICU 2.0

   582    */

   583   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;

   585   /**

   586    * Compare two strings case-insensitively using full case folding.

   587    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).

   588    *

   589    * @param start The start offset in this string at which the compare operation begins.

   590    * @param length The number of code units from this string to compare.

   591    * @param srcText Another string to compare this one to.

   592    * @param options A bit set of options:

   593    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   594    *     Comparison in code unit order with default case folding.

   595    *

   596    *   - U_COMPARE_CODE_POINT_ORDER

   597    *     Set to choose code point order instead of code unit order

   598    *     (see u_strCompare for details).

   599    *

   600    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   601    *

   602    * @return A negative, zero, or positive integer indicating the comparison result.

   603    * @stable ICU 2.0

   604    */

   605   inline int8_t caseCompare(int32_t start,

   606          int32_t length,

   607          const UnicodeString& srcText,

   608          uint32_t options) const;

   610   /**

   611    * Compare two strings case-insensitively using full case folding.

   612    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).

   613    *

   614    * @param start The start offset in this string at which the compare operation begins.

   615    * @param length The number of code units from this string to compare.

   616    * @param srcText Another string to compare this one to.

   617    * @param srcStart The start offset in that string at which the compare operation begins.

   618    * @param srcLength The number of code units from that string to compare.

   619    * @param options A bit set of options:

   620    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   621    *     Comparison in code unit order with default case folding.

   622    *

   623    *   - U_COMPARE_CODE_POINT_ORDER

   624    *     Set to choose code point order instead of code unit order

   625    *     (see u_strCompare for details).

   626    *

   627    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   628    *

   629    * @return A negative, zero, or positive integer indicating the comparison result.

   630    * @stable ICU 2.0

   631    */

   632   inline int8_t caseCompare(int32_t start,

   633          int32_t length,

   634          const UnicodeString& srcText,

   635          int32_t srcStart,

   636          int32_t srcLength,

   637          uint32_t options) const;

   639   /**

   640    * Compare two strings case-insensitively using full case folding.

   641    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).

   642    *

   643    * @param srcChars A pointer to another string to compare this one to.

   644    * @param srcLength The number of code units from that string to compare.

   645    * @param options A bit set of options:

   646    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   647    *     Comparison in code unit order with default case folding.

   648    *

   649    *   - U_COMPARE_CODE_POINT_ORDER

   650    *     Set to choose code point order instead of code unit order

   651    *     (see u_strCompare for details).

   652    *

   653    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   654    *

   655    * @return A negative, zero, or positive integer indicating the comparison result.

   656    * @stable ICU 2.0

   657    */

   658   inline int8_t caseCompare(const UChar *srcChars,

   659          int32_t srcLength,

   660          uint32_t options) const;

   662   /**

   663    * Compare two strings case-insensitively using full case folding.

   664    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).

   665    *

   666    * @param start The start offset in this string at which the compare operation begins.

   667    * @param length The number of code units from this string to compare.

   668    * @param srcChars A pointer to another string to compare this one to.

   669    * @param options A bit set of options:

   670    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   671    *     Comparison in code unit order with default case folding.

   672    *

   673    *   - U_COMPARE_CODE_POINT_ORDER

   674    *     Set to choose code point order instead of code unit order

   675    *     (see u_strCompare for details).

   676    *

   677    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   678    *

   679    * @return A negative, zero, or positive integer indicating the comparison result.

   680    * @stable ICU 2.0

   681    */

   682   inline int8_t caseCompare(int32_t start,

   683          int32_t length,

   684          const UChar *srcChars,

   685          uint32_t options) const;

   687   /**

   688    * Compare two strings case-insensitively using full case folding.

   689    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).

   690    *

   691    * @param start The start offset in this string at which the compare operation begins.

   692    * @param length The number of code units from this string to compare.

   693    * @param srcChars A pointer to another string to compare this one to.

   694    * @param srcStart The start offset in that string at which the compare operation begins.

   695    * @param srcLength The number of code units from that string to compare.

   696    * @param options A bit set of options:

   697    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   698    *     Comparison in code unit order with default case folding.

   699    *

   700    *   - U_COMPARE_CODE_POINT_ORDER

   701    *     Set to choose code point order instead of code unit order

   702    *     (see u_strCompare for details).

   703    *

   704    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   705    *

   706    * @return A negative, zero, or positive integer indicating the comparison result.

   707    * @stable ICU 2.0

   708    */

   709   inline int8_t caseCompare(int32_t start,

   710          int32_t length,

   711          const UChar *srcChars,

   712          int32_t srcStart,

   713          int32_t srcLength,

   714          uint32_t options) const;

   716   /**

   717    * Compare two strings case-insensitively using full case folding.

   718    * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).

   719    *

   720    * @param start The start offset in this string at which the compare operation begins.

   721    * @param limit The offset after the last code unit from this string to compare.

   722    * @param srcText Another string to compare this one to.

   723    * @param srcStart The start offset in that string at which the compare operation begins.

   724    * @param srcLimit The offset after the last code unit from that string to compare.

   725    * @param options A bit set of options:

   726    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   727    *     Comparison in code unit order with default case folding.

   728    *

   729    *   - U_COMPARE_CODE_POINT_ORDER

   730    *     Set to choose code point order instead of code unit order

   731    *     (see u_strCompare for details).

   732    *

   733    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   734    *

   735    * @return A negative, zero, or positive integer indicating the comparison result.

   736    * @stable ICU 2.0

   737    */

   738   inline int8_t caseCompareBetween(int32_t start,

   739             int32_t limit,

   740             const UnicodeString& srcText,

   741             int32_t srcStart,

   742             int32_t srcLimit,

   743             uint32_t options) const;

   745   /**

   746    * Determine if this starts with the characters in <TT>text</TT>

   747    * @param text The text to match.

   748    * @return TRUE if this starts with the characters in <TT>text</TT>,

   749    * FALSE otherwise

   750    * @stable ICU 2.0

   751    */

   752   inline UBool startsWith(const UnicodeString& text) const;

   754   /**

   755    * Determine if this starts with the characters in <TT>srcText</TT>

   756    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

   757    * @param srcText The text to match.

   758    * @param srcStart the offset into <TT>srcText</TT> to start matching

   759    * @param srcLength the number of characters in <TT>srcText</TT> to match

   760    * @return TRUE if this starts with the characters in <TT>text</TT>,

   761    * FALSE otherwise

   762    * @stable ICU 2.0

   763    */

   764   inline UBool startsWith(const UnicodeString& srcText,

   765             int32_t srcStart,

   766             int32_t srcLength) const;

   768   /**

   769    * Determine if this starts with the characters in <TT>srcChars</TT>

   770    * @param srcChars The characters to match.

   771    * @param srcLength the number of characters in <TT>srcChars</TT>

   772    * @return TRUE if this starts with the characters in <TT>srcChars</TT>,

   773    * FALSE otherwise

   774    * @stable ICU 2.0

   775    */

   776   inline UBool startsWith(const UChar *srcChars,

   777             int32_t srcLength) const;

   779   /**

   780    * Determine if this ends with the characters in <TT>srcChars</TT>

   781    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

   782    * @param srcChars The characters to match.

   783    * @param srcStart the offset into <TT>srcText</TT> to start matching

   784    * @param srcLength the number of characters in <TT>srcChars</TT> to match

   785    * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise

   786    * @stable ICU 2.0

   787    */

   788   inline UBool startsWith(const UChar *srcChars,

   789             int32_t srcStart,

   790             int32_t srcLength) const;

   792   /**

   793    * Determine if this ends with the characters in <TT>text</TT>

   794    * @param text The text to match.

   795    * @return TRUE if this ends with the characters in <TT>text</TT>,

   796    * FALSE otherwise

   797    * @stable ICU 2.0

   798    */

   799   inline UBool endsWith(const UnicodeString& text) const;

   801   /**

   802    * Determine if this ends with the characters in <TT>srcText</TT>

   803    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

   804    * @param srcText The text to match.

   805    * @param srcStart the offset into <TT>srcText</TT> to start matching

   806    * @param srcLength the number of characters in <TT>srcText</TT> to match

   807    * @return TRUE if this ends with the characters in <TT>text</TT>,

   808    * FALSE otherwise

   809    * @stable ICU 2.0

   810    */

   811   inline UBool endsWith(const UnicodeString& srcText,

   812           int32_t srcStart,

   813           int32_t srcLength) const;

   815   /**

   816    * Determine if this ends with the characters in <TT>srcChars</TT>

   817    * @param srcChars The characters to match.

   818    * @param srcLength the number of characters in <TT>srcChars</TT>

   819    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,

   820    * FALSE otherwise

   821    * @stable ICU 2.0

   822    */

   823   inline UBool endsWith(const UChar *srcChars,

   824           int32_t srcLength) const;

   826   /**

   827    * Determine if this ends with the characters in <TT>srcChars</TT>

   828    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

   829    * @param srcChars The characters to match.

   830    * @param srcStart the offset into <TT>srcText</TT> to start matching

   831    * @param srcLength the number of characters in <TT>srcChars</TT> to match

   832    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,

   833    * FALSE otherwise

   834    * @stable ICU 2.0

   835    */

   836   inline UBool endsWith(const UChar *srcChars,

   837           int32_t srcStart,

   838           int32_t srcLength) const;

   841   /* Searching - bitwise only */

   843   /**

   844    * Locate in this the first occurrence of the characters in <TT>text</TT>,

   845    * using bitwise comparison.

   846    * @param text The text to search for.

   847    * @return The offset into this of the start of <TT>text</TT>,

   848    * or -1 if not found.

   849    * @stable ICU 2.0

   850    */

   851   inline int32_t indexOf(const UnicodeString& text) const;

   853   /**

   854    * Locate in this the first occurrence of the characters in <TT>text</TT>

   855    * starting at offset <TT>start</TT>, using bitwise comparison.

   856    * @param text The text to search for.

   857    * @param start The offset at which searching will start.

   858    * @return The offset into this of the start of <TT>text</TT>,

   859    * or -1 if not found.

   860    * @stable ICU 2.0

   861    */

   862   inline int32_t indexOf(const UnicodeString& text,

   863               int32_t start) const;

   865   /**

   866    * Locate in this the first occurrence in the range

   867    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

   868    * in <TT>text</TT>, using bitwise comparison.

   869    * @param text The text to search for.

   870    * @param start The offset at which searching will start.

   871    * @param length The number of characters to search

   872    * @return The offset into this of the start of <TT>text</TT>,

   873    * or -1 if not found.

   874    * @stable ICU 2.0

   875    */

   876   inline int32_t indexOf(const UnicodeString& text,

   877               int32_t start,

   878               int32_t length) const;

   880   /**

   881    * Locate in this the first occurrence in the range

   882    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

   883    *  in <TT>srcText</TT> in the range

   884    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

   885    * using bitwise comparison.

   886    * @param srcText The text to search for.

   887    * @param srcStart the offset into <TT>srcText</TT> at which

   888    * to start matching

   889    * @param srcLength the number of characters in <TT>srcText</TT> to match

   890    * @param start the offset into this at which to start matching

   891    * @param length the number of characters in this to search

   892    * @return The offset into this of the start of <TT>text</TT>,

   893    * or -1 if not found.

   894    * @stable ICU 2.0

   895    */

   896   inline int32_t indexOf(const UnicodeString& srcText,

   897               int32_t srcStart,

   898               int32_t srcLength,

   899               int32_t start,

   900               int32_t length) const;

   902   /**

   903    * Locate in this the first occurrence of the characters in

   904    * <TT>srcChars</TT>

   905    * starting at offset <TT>start</TT>, using bitwise comparison.

   906    * @param srcChars The text to search for.

   907    * @param srcLength the number of characters in <TT>srcChars</TT> to match

   908    * @param start the offset into this at which to start matching

   909    * @return The offset into this of the start of <TT>text</TT>,

   910    * or -1 if not found.

   911    * @stable ICU 2.0

   912    */

   913   inline int32_t indexOf(const UChar *srcChars,

   914               int32_t srcLength,

   915               int32_t start) const;

   917   /**

   918    * Locate in this the first occurrence in the range

   919    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

   920    * in <TT>srcChars</TT>, using bitwise comparison.

   921    * @param srcChars The text to search for.

   922    * @param srcLength the number of characters in <TT>srcChars</TT>

   923    * @param start The offset at which searching will start.

   924    * @param length The number of characters to search

   925    * @return The offset into this of the start of <TT>srcChars</TT>,

   926    * or -1 if not found.

   927    * @stable ICU 2.0

   928    */

   929   inline int32_t indexOf(const UChar *srcChars,

   930               int32_t srcLength,

   931               int32_t start,

   932               int32_t length) const;

   934   /**

   935    * Locate in this the first occurrence in the range

   936    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

   937    * in <TT>srcChars</TT> in the range

   938    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

   939    * using bitwise comparison.

   940    * @param srcChars The text to search for.

   941    * @param srcStart the offset into <TT>srcChars</TT> at which

   942    * to start matching

   943    * @param srcLength the number of characters in <TT>srcChars</TT> to match

   944    * @param start the offset into this at which to start matching

   945    * @param length the number of characters in this to search

   946    * @return The offset into this of the start of <TT>text</TT>,

   947    * or -1 if not found.

   948    * @stable ICU 2.0

   949    */

   950   int32_t indexOf(const UChar *srcChars,

   951               int32_t srcStart,

   952               int32_t srcLength,

   953               int32_t start,

   954               int32_t length) const;

   956   /**

   957    * Locate in this the first occurrence of the BMP code point <code>c</code>,

   958    * using bitwise comparison.

   959    * @param c The code unit to search for.

   960    * @return The offset into this of <TT>c</TT>, or -1 if not found.

   961    * @stable ICU 2.0

   962    */

   963   inline int32_t indexOf(UChar c) const;

   965   /**

   966    * Locate in this the first occurrence of the code point <TT>c</TT>,

   967    * using bitwise comparison.

   968    *

   969    * @param c The code point to search for.

   970    * @return The offset into this of <TT>c</TT>, or -1 if not found.

   971    * @stable ICU 2.0

   972    */

   973   inline int32_t indexOf(UChar32 c) const;

   975   /**

   976    * Locate in this the first occurrence of the BMP code point <code>c</code>,

   977    * starting at offset <TT>start</TT>, using bitwise comparison.

   978    * @param c The code unit to search for.

   979    * @param start The offset at which searching will start.

   980    * @return The offset into this of <TT>c</TT>, or -1 if not found.

   981    * @stable ICU 2.0

   982    */

   983   inline int32_t indexOf(UChar c,

   984               int32_t start) const;

   986   /**

   987    * Locate in this the first occurrence of the code point <TT>c</TT>

   988    * starting at offset <TT>start</TT>, using bitwise comparison.

   989    *

   990    * @param c The code point to search for.

   991    * @param start The offset at which searching will start.

   992    * @return The offset into this of <TT>c</TT>, or -1 if not found.

   993    * @stable ICU 2.0

   994    */

   995   inline int32_t indexOf(UChar32 c,

   996               int32_t start) const;

   998   /**

   999    * Locate in this the first occurrence of the BMP code point <code>c</code>

  1000    * in the range [<TT>start</TT>, <TT>start + length</TT>),

  1001    * using bitwise comparison.

  1002    * @param c The code unit to search for.

  1003    * @param start the offset into this at which to start matching

  1004    * @param length the number of characters in this to search

  1005    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1006    * @stable ICU 2.0

  1007    */

  1008   inline int32_t indexOf(UChar c,

  1009               int32_t start,

  1010               int32_t length) const;

  1012   /**

  1013    * Locate in this the first occurrence of the code point <TT>c</TT>

  1014    * in the range [<TT>start</TT>, <TT>start + length</TT>),

  1015    * using bitwise comparison.

  1016    *

  1017    * @param c The code point to search for.

  1018    * @param start the offset into this at which to start matching

  1019    * @param length the number of characters in this to search

  1020    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1021    * @stable ICU 2.0

  1022    */

  1023   inline int32_t indexOf(UChar32 c,

  1024               int32_t start,

  1025               int32_t length) const;

  1027   /**

  1028    * Locate in this the last occurrence of the characters in <TT>text</TT>,

  1029    * using bitwise comparison.

  1030    * @param text The text to search for.

  1031    * @return The offset into this of the start of <TT>text</TT>,

  1032    * or -1 if not found.

  1033    * @stable ICU 2.0

  1034    */

  1035   inline int32_t lastIndexOf(const UnicodeString& text) const;

  1037   /**

  1038    * Locate in this the last occurrence of the characters in <TT>text</TT>

  1039    * starting at offset <TT>start</TT>, using bitwise comparison.

  1040    * @param text The text to search for.

  1041    * @param start The offset at which searching will start.

  1042    * @return The offset into this of the start of <TT>text</TT>,

  1043    * or -1 if not found.

  1044    * @stable ICU 2.0

  1045    */

  1046   inline int32_t lastIndexOf(const UnicodeString& text,

  1047               int32_t start) const;

  1049   /**

  1050    * Locate in this the last occurrence in the range

  1051    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

  1052    * in <TT>text</TT>, using bitwise comparison.

  1053    * @param text The text to search for.

  1054    * @param start The offset at which searching will start.

  1055    * @param length The number of characters to search

  1056    * @return The offset into this of the start of <TT>text</TT>,

  1057    * or -1 if not found.

  1058    * @stable ICU 2.0

  1059    */

  1060   inline int32_t lastIndexOf(const UnicodeString& text,

  1061               int32_t start,

  1062               int32_t length) const;

  1064   /**

  1065    * Locate in this the last occurrence in the range

  1066    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

  1067    * in <TT>srcText</TT> in the range

  1068    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

  1069    * using bitwise comparison.

  1070    * @param srcText The text to search for.

  1071    * @param srcStart the offset into <TT>srcText</TT> at which

  1072    * to start matching

  1073    * @param srcLength the number of characters in <TT>srcText</TT> to match

  1074    * @param start the offset into this at which to start matching

  1075    * @param length the number of characters in this to search

  1076    * @return The offset into this of the start of <TT>text</TT>,

  1077    * or -1 if not found.

  1078    * @stable ICU 2.0

  1079    */

  1080   inline int32_t lastIndexOf(const UnicodeString& srcText,

  1081               int32_t srcStart,

  1082               int32_t srcLength,

  1083               int32_t start,

  1084               int32_t length) const;

  1086   /**

  1087    * Locate in this the last occurrence of the characters in <TT>srcChars</TT>

  1088    * starting at offset <TT>start</TT>, using bitwise comparison.

  1089    * @param srcChars The text to search for.

  1090    * @param srcLength the number of characters in <TT>srcChars</TT> to match

  1091    * @param start the offset into this at which to start matching

  1092    * @return The offset into this of the start of <TT>text</TT>,

  1093    * or -1 if not found.

  1094    * @stable ICU 2.0

  1095    */

  1096   inline int32_t lastIndexOf(const UChar *srcChars,

  1097               int32_t srcLength,

  1098               int32_t start) const;

  1100   /**

  1101    * Locate in this the last occurrence in the range

  1102    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

  1103    * in <TT>srcChars</TT>, using bitwise comparison.

  1104    * @param srcChars The text to search for.

  1105    * @param srcLength the number of characters in <TT>srcChars</TT>

  1106    * @param start The offset at which searching will start.

  1107    * @param length The number of characters to search

  1108    * @return The offset into this of the start of <TT>srcChars</TT>,

  1109    * or -1 if not found.

  1110    * @stable ICU 2.0

  1111    */

  1112   inline int32_t lastIndexOf(const UChar *srcChars,

  1113               int32_t srcLength,

  1114               int32_t start,

  1115               int32_t length) const;

  1117   /**

  1118    * Locate in this the last occurrence in the range

  1119    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

  1120    * in <TT>srcChars</TT> in the range

  1121    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

  1122    * using bitwise comparison.

  1123    * @param srcChars The text to search for.

  1124    * @param srcStart the offset into <TT>srcChars</TT> at which

  1125    * to start matching

  1126    * @param srcLength the number of characters in <TT>srcChars</TT> to match

  1127    * @param start the offset into this at which to start matching

  1128    * @param length the number of characters in this to search

  1129    * @return The offset into this of the start of <TT>text</TT>,

  1130    * or -1 if not found.

  1131    * @stable ICU 2.0

  1132    */

  1133   int32_t lastIndexOf(const UChar *srcChars,

  1134               int32_t srcStart,

  1135               int32_t srcLength,

  1136               int32_t start,

  1137               int32_t length) const;

  1139   /**

  1140    * Locate in this the last occurrence of the BMP code point <code>c</code>,

  1141    * using bitwise comparison.

  1142    * @param c The code unit to search for.

  1143    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1144    * @stable ICU 2.0

  1145    */

  1146   inline int32_t lastIndexOf(UChar c) const;

  1148   /**

  1149    * Locate in this the last occurrence of the code point <TT>c</TT>,

  1150    * using bitwise comparison.

  1151    *

  1152    * @param c The code point to search for.

  1153    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1154    * @stable ICU 2.0

  1155    */

  1156   inline int32_t lastIndexOf(UChar32 c) const;

  1158   /**

  1159    * Locate in this the last occurrence of the BMP code point <code>c</code>

  1160    * starting at offset <TT>start</TT>, using bitwise comparison.

  1161    * @param c The code unit to search for.

  1162    * @param start The offset at which searching will start.

  1163    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1164    * @stable ICU 2.0

  1165    */

  1166   inline int32_t lastIndexOf(UChar c,

  1167               int32_t start) const;

  1169   /**

  1170    * Locate in this the last occurrence of the code point <TT>c</TT>

  1171    * starting at offset <TT>start</TT>, using bitwise comparison.

  1172    *

  1173    * @param c The code point to search for.

  1174    * @param start The offset at which searching will start.

  1175    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1176    * @stable ICU 2.0

  1177    */

  1178   inline int32_t lastIndexOf(UChar32 c,

  1179               int32_t start) const;

  1181   /**

  1182    * Locate in this the last occurrence of the BMP code point <code>c</code>

  1183    * in the range [<TT>start</TT>, <TT>start + length</TT>),

  1184    * using bitwise comparison.

  1185    * @param c The code unit to search for.

  1186    * @param start the offset into this at which to start matching

  1187    * @param length the number of characters in this to search

  1188    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1189    * @stable ICU 2.0

  1190    */

  1191   inline int32_t lastIndexOf(UChar c,

  1192               int32_t start,

  1193               int32_t length) const;

  1195   /**

  1196    * Locate in this the last occurrence of the code point <TT>c</TT>

  1197    * in the range [<TT>start</TT>, <TT>start + length</TT>),

  1198    * using bitwise comparison.

  1199    *

  1200    * @param c The code point to search for.

  1201    * @param start the offset into this at which to start matching

  1202    * @param length the number of characters in this to search

  1203    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1204    * @stable ICU 2.0

  1205    */

  1206   inline int32_t lastIndexOf(UChar32 c,

  1207               int32_t start,

  1208               int32_t length) const;

  1211   /* Character access */

  1213   /**

  1214    * Return the code unit at offset <tt>offset</tt>.

  1215    * If the offset is not valid (0..length()-1) then U+ffff is returned.

  1216    * @param offset a valid offset into the text

  1217    * @return the code unit at offset <tt>offset</tt>

  1218    *         or 0xffff if the offset is not valid for this string

  1219    * @stable ICU 2.0

  1220    */

  1221   inline UChar charAt(int32_t offset) const;

  1223   /**

  1224    * Return the code unit at offset <tt>offset</tt>.

  1225    * If the offset is not valid (0..length()-1) then U+ffff is returned.

  1226    * @param offset a valid offset into the text

  1227    * @return the code unit at offset <tt>offset</tt>

  1228    * @stable ICU 2.0

  1229    */

  1230   inline UChar operator[] (int32_t offset) const;

  1232   /**

  1233    * Return the code point that contains the code unit

  1234    * at offset <tt>offset</tt>.

  1235    * If the offset is not valid (0..length()-1) then U+ffff is returned.

  1236    * @param offset a valid offset into the text

  1237    * that indicates the text offset of any of the code units

  1238    * that will be assembled into a code point (21-bit value) and returned

  1239    * @return the code point of text at <tt>offset</tt>

  1240    *         or 0xffff if the offset is not valid for this string

  1241    * @stable ICU 2.0

  1242    */

  1243   inline UChar32 char32At(int32_t offset) const;

  1245   /**

  1246    * Adjust a random-access offset so that

  1247    * it points to the beginning of a Unicode character.

  1248    * The offset that is passed in points to

  1249    * any code unit of a code point,

  1250    * while the returned offset will point to the first code unit

  1251    * of the same code point.

  1252    * In UTF-16, if the input offset points to a second surrogate

  1253    * of a surrogate pair, then the returned offset will point

  1254    * to the first surrogate.

  1255    * @param offset a valid offset into one code point of the text

  1256    * @return offset of the first code unit of the same code point

  1257    * @see U16_SET_CP_START

  1258    * @stable ICU 2.0

  1259    */

  1260   inline int32_t getChar32Start(int32_t offset) const;

  1262   /**

  1263    * Adjust a random-access offset so that

  1264    * it points behind a Unicode character.

  1265    * The offset that is passed in points behind

  1266    * any code unit of a code point,

  1267    * while the returned offset will point behind the last code unit

  1268    * of the same code point.

  1269    * In UTF-16, if the input offset points behind the first surrogate

  1270    * (i.e., to the second surrogate)

  1271    * of a surrogate pair, then the returned offset will point

  1272    * behind the second surrogate (i.e., to the first surrogate).

  1273    * @param offset a valid offset after any code unit of a code point of the text

  1274    * @return offset of the first code unit after the same code point

  1275    * @see U16_SET_CP_LIMIT

  1276    * @stable ICU 2.0

  1277    */

  1278   inline int32_t getChar32Limit(int32_t offset) const;

  1280   /**

  1281    * Move the code unit index along the string by delta code points.

  1282    * Interpret the input index as a code unit-based offset into the string,

  1283    * move the index forward or backward by delta code points, and

  1284    * return the resulting index.

  1285    * The input index should point to the first code unit of a code point,

  1286    * if there is more than one.

  1287    *

  1288    * Both input and output indexes are code unit-based as for all

  1289    * string indexes/offsets in ICU (and other libraries, like MBCS char*).

  1290    * If delta<0 then the index is moved backward (toward the start of the string).

  1291    * If delta>0 then the index is moved forward (toward the end of the string).

  1292    *

  1293    * This behaves like CharacterIterator::move32(delta, kCurrent).

  1294    *

  1295    * Behavior for out-of-bounds indexes:

  1296    * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,

  1297    * if the input index<0 then it is pinned to 0;

  1298    * if it is index>length() then it is pinned to length().

  1299    * Afterwards, the index is moved by <code>delta</code> code points

  1300    * forward or backward,

  1301    * but no further backward than to 0 and no further forward than to length().

  1302    * The resulting index return value will be in between 0 and length(), inclusively.

  1303    *

  1304    * Examples:

  1305    * <pre>

  1306    * // s has code points 'a' U+10000 'b' U+10ffff U+2029

  1307    * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();

  1308    *

  1309    * // initial index: position of U+10000

  1310    * int32_t index=1;

  1311    *

  1312    * // the following examples will all result in index==4, position of U+10ffff

  1313    *

  1314    * // skip 2 code points from some position in the string

  1315    * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'

  1316    *

  1317    * // go to the 3rd code point from the start of s (0-based)

  1318    * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'

  1319    *

  1320    * // go to the next-to-last code point of s

  1321    * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff

  1322    * </pre>

  1323    *

  1324    * @param index input code unit index

  1325    * @param delta (signed) code point count to move the index forward or backward

  1326    *        in the string

  1327    * @return the resulting code unit index

  1328    * @stable ICU 2.0

  1329    */

  1330   int32_t moveIndex32(int32_t index, int32_t delta) const;

  1332   /* Substring extraction */

  1334   /**

  1335    * Copy the characters in the range

  1336    * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,

  1337    * beginning at <tt>dstStart</tt>.

  1338    * If the string aliases to <code>dst</code> itself as an external buffer,

  1339    * then extract() will not copy the contents.

  1340    *

  1341    * @param start offset of first character which will be copied into the array

  1342    * @param length the number of characters to extract

  1343    * @param dst array in which to copy characters.  The length of <tt>dst</tt>

  1344    * must be at least (<tt>dstStart + length</tt>).

  1345    * @param dstStart the offset in <TT>dst</TT> where the first character

  1346    * will be extracted

  1347    * @stable ICU 2.0

  1348    */

  1349   inline void extract(int32_t start,

  1350            int32_t length,

  1351            UChar *dst,

  1352            int32_t dstStart = 0) const;

  1354   /**

  1355    * Copy the contents of the string into dest.

  1356    * This is a convenience function that

  1357    * checks if there is enough space in dest,

  1358    * extracts the entire string if possible,

  1359    * and NUL-terminates dest if possible.

  1360    *

  1361    * If the string fits into dest but cannot be NUL-terminated

  1362    * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.

  1363    * If the string itself does not fit into dest

  1364    * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.

  1365    *

  1366    * If the string aliases to <code>dest</code> itself as an external buffer,

  1367    * then extract() will not copy the contents.

  1368    *

  1369    * @param dest Destination string buffer.

  1370    * @param destCapacity Number of UChars available at dest.

  1371    * @param errorCode ICU error code.

  1372    * @return length()

  1373    * @stable ICU 2.0

  1374    */

  1375   int32_t

  1376   extract(UChar *dest, int32_t destCapacity,

  1377           UErrorCode &errorCode) const;

  1379   /**

  1380    * Copy the characters in the range

  1381    * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString

  1382    * <tt>target</tt>.

  1383    * @param start offset of first character which will be copied

  1384    * @param length the number of characters to extract

  1385    * @param target UnicodeString into which to copy characters.

  1386    * @return A reference to <TT>target</TT>

  1387    * @stable ICU 2.0

  1388    */

  1389   inline void extract(int32_t start,

  1390            int32_t length,

  1391            UnicodeString& target) const;

  1393   /**

  1394    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)

  1395    * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.

  1396    * @param start offset of first character which will be copied into the array

  1397    * @param limit offset immediately following the last character to be copied

  1398    * @param dst array in which to copy characters.  The length of <tt>dst</tt>

  1399    * must be at least (<tt>dstStart + (limit - start)</tt>).

  1400    * @param dstStart the offset in <TT>dst</TT> where the first character

  1401    * will be extracted

  1402    * @stable ICU 2.0

  1403    */

  1404   inline void extractBetween(int32_t start,

  1405               int32_t limit,

  1406               UChar *dst,

  1407               int32_t dstStart = 0) const;

  1409   /**

  1410    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)

  1411    * into the UnicodeString <tt>target</tt>.  Replaceable API.

  1412    * @param start offset of first character which will be copied

  1413    * @param limit offset immediately following the last character to be copied

  1414    * @param target UnicodeString into which to copy characters.

  1415    * @return A reference to <TT>target</TT>

  1416    * @stable ICU 2.0

  1417    */

  1418   virtual void extractBetween(int32_t start,

  1419               int32_t limit,

  1420               UnicodeString& target) const;

  1422   /**

  1423    * Copy the characters in the range

  1424    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.

  1425    * All characters must be invariant (see utypes.h).

  1426    * Use US_INV as the last, signature-distinguishing parameter.

  1427    *

  1428    * This function does not write any more than <code>targetLength</code>

  1429    * characters but returns the length of the entire output string

  1430    * so that one can allocate a larger buffer and call the function again

  1431    * if necessary.

  1432    * The output string is NUL-terminated if possible.

  1433    *

  1434    * @param start offset of first character which will be copied

  1435    * @param startLength the number of characters to extract

  1436    * @param target the target buffer for extraction, can be NULL

  1437    *               if targetLength is 0

  1438    * @param targetCapacity the length of the target buffer

  1439    * @param inv Signature-distinguishing paramater, use US_INV.

  1440    * @return the output string length, not including the terminating NUL

  1441    * @draft ICU 3.2

  1442    */

  1443   int32_t extract(int32_t start,

  1444            int32_t startLength,

  1445            char *target,

  1446            int32_t targetCapacity,

  1447            enum EInvariant inv) const;

  1449 #if !UCONFIG_NO_CONVERSION

  1451   /**

  1452    * Copy the characters in the range

  1453    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters

  1454    * in a specified codepage.

  1455    * The output string is NUL-terminated.

  1456    *

  1457    * Recommendation: For invariant-character strings use

  1458    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const

  1459    * because it avoids object code dependencies of UnicodeString on

  1460    * the conversion code.

  1461    *

  1462    * @param start offset of first character which will be copied

  1463    * @param startLength the number of characters to extract

  1464    * @param target the target buffer for extraction

  1465    * @param codepage the desired codepage for the characters.  0 has

  1466    * the special meaning of the default codepage

  1467    * If <code>codepage</code> is an empty string (<code>""</code>),

  1468    * then a simple conversion is performed on the codepage-invariant

  1469    * subset ("invariant characters") of the platform encoding. See utypes.h.

  1470    * If <TT>target</TT> is NULL, then the number of bytes required for

  1471    * <TT>target</TT> is returned. It is assumed that the target is big enough

  1472    * to fit all of the characters.

  1473    * @return the output string length, not including the terminating NUL

  1474    * @stable ICU 2.0

  1475    */

  1476   inline int32_t extract(int32_t start,

  1477                  int32_t startLength,

  1478                  char *target,

  1479                  const char *codepage = 0) const;

  1481   /**

  1482    * Copy the characters in the range

  1483    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters

  1484    * in a specified codepage.

  1485    * This function does not write any more than <code>targetLength</code>

  1486    * characters but returns the length of the entire output string

  1487    * so that one can allocate a larger buffer and call the function again

  1488    * if necessary.

  1489    * The output string is NUL-terminated if possible.

  1490    *

  1491    * Recommendation: For invariant-character strings use

  1492    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const

  1493    * because it avoids object code dependencies of UnicodeString on

  1494    * the conversion code.

  1495    *

  1496    * @param start offset of first character which will be copied

  1497    * @param startLength the number of characters to extract

  1498    * @param target the target buffer for extraction

  1499    * @param targetLength the length of the target buffer

  1500    * @param codepage the desired codepage for the characters.  0 has

  1501    * the special meaning of the default codepage

  1502    * If <code>codepage</code> is an empty string (<code>""</code>),

  1503    * then a simple conversion is performed on the codepage-invariant

  1504    * subset ("invariant characters") of the platform encoding. See utypes.h.

  1505    * If <TT>target</TT> is NULL, then the number of bytes required for

  1506    * <TT>target</TT> is returned.

  1507    * @return the output string length, not including the terminating NUL

  1508    * @stable ICU 2.0

  1509    */

  1510   int32_t extract(int32_t start,

  1511            int32_t startLength,

  1512            char *target,

  1513            uint32_t targetLength,

  1514            const char *codepage = 0) const;

  1516   /**

  1517    * Convert the UnicodeString into a codepage string using an existing UConverter.

  1518    * The output string is NUL-terminated if possible.

  1519    *

  1520    * This function avoids the overhead of opening and closing a converter if

  1521    * multiple strings are extracted.

  1522    *

  1523    * @param dest destination string buffer, can be NULL if destCapacity==0

  1524    * @param destCapacity the number of chars available at dest

  1525    * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),

  1526    *        or NULL for the default converter

  1527    * @param errorCode normal ICU error code

  1528    * @return the length of the output string, not counting the terminating NUL;

  1529    *         if the length is greater than destCapacity, then the string will not fit

  1530    *         and a buffer of the indicated length would need to be passed in

  1531    * @stable ICU 2.0

  1532    */

  1533   int32_t extract(char *dest, int32_t destCapacity,

  1534                   UConverter *cnv,

  1535                   UErrorCode &errorCode) const;

  1537 #endif

  1539   /* Length operations */

  1541   /**

  1542    * Return the length of the UnicodeString object.

  1543    * The length is the number of UChar code units are in the UnicodeString.

  1544    * If you want the number of code points, please use countChar32().

  1545    * @return the length of the UnicodeString object

  1546    * @see countChar32

  1547    * @stable ICU 2.0

  1548    */

  1549   inline int32_t length(void) const;

  1551   /**

  1552    * Count Unicode code points in the length UChar code units of the string.

  1553    * A code point may occupy either one or two UChar code units.

  1554    * Counting code points involves reading all code units.

  1555    *

  1556    * This functions is basically the inverse of moveIndex32().

  1557    *

  1558    * @param start the index of the first code unit to check

  1559    * @param length the number of UChar code units to check

  1560    * @return the number of code points in the specified code units

  1561    * @see length

  1562    * @stable ICU 2.0

  1563    */

  1564   int32_t

  1565   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;

  1567   /**

  1568    * Check if the length UChar code units of the string

  1569    * contain more Unicode code points than a certain number.

  1570    * This is more efficient than counting all code points in this part of the string

  1571    * and comparing that number with a threshold.

  1572    * This function may not need to scan the string at all if the length

  1573    * falls within a certain range, and

  1574    * never needs to count more than 'number+1' code points.

  1575    * Logically equivalent to (countChar32(start, length)>number).

  1576    * A Unicode code point may occupy either one or two UChar code units.

  1577    *

  1578    * @param start the index of the first code unit to check (0 for the entire string)

  1579    * @param length the number of UChar code units to check

  1580    *               (use INT32_MAX for the entire string; remember that start/length

  1581    *                values are pinned)

  1582    * @param number The number of code points in the (sub)string is compared against

  1583    *               the 'number' parameter.

  1584    * @return Boolean value for whether the string contains more Unicode code points

  1585    *         than 'number'. Same as (u_countChar32(s, length)>number).

  1586    * @see countChar32

  1587    * @see u_strHasMoreChar32Than

  1588    * @stable ICU 2.4

  1589    */

  1590   UBool

  1591   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;

  1593   /**

  1594    * Determine if this string is empty.

  1595    * @return TRUE if this string contains 0 characters, FALSE otherwise.

  1596    * @stable ICU 2.0

  1597    */

  1598   inline UBool isEmpty(void) const;

  1600   /**

  1601    * Return the capacity of the internal buffer of the UnicodeString object.

  1602    * This is useful together with the getBuffer functions.

  1603    * See there for details.

  1604    *

  1605    * @return the number of UChars available in the internal buffer

  1606    * @see getBuffer

  1607    * @stable ICU 2.0

  1608    */

  1609   inline int32_t getCapacity(void) const;

  1611   /* Other operations */

  1613   /**

  1614    * Generate a hash code for this object.

  1615    * @return The hash code of this UnicodeString.

  1616    * @stable ICU 2.0

  1617    */

  1618   inline int32_t hashCode(void) const;

  1620   /**

  1621    * Determine if this object contains a valid string.

  1622    * A bogus string has no value. It is different from an empty string.

  1623    * It can be used to indicate that no string value is available.

  1624    * getBuffer() and getTerminatedBuffer() return NULL, and

  1625    * length() returns 0.

  1626    *

  1627    * @return TRUE if the string is valid, FALSE otherwise

  1628    * @see setToBogus()

  1629    * @stable ICU 2.0

  1630    */

  1631   inline UBool isBogus(void) const;

  1634   //========================================

  1635   // Write operations

  1636   //========================================

  1638   /* Assignment operations */

  1640   /**

  1641    * Assignment operator.  Replace the characters in this UnicodeString

  1642    * with the characters from <TT>srcText</TT>.

  1643    * @param srcText The text containing the characters to replace

  1644    * @return a reference to this

  1645    * @stable ICU 2.0

  1646    */

  1647   UnicodeString &operator=(const UnicodeString &srcText);

  1649   /**

  1650    * Almost the same as the assignment operator.

  1651    * Replace the characters in this UnicodeString

  1652    * with the characters from <code>srcText</code>.

  1653    *

  1654    * This function works the same for all strings except for ones that

  1655    * are readonly aliases.

  1656    * Starting with ICU 2.4, the assignment operator and the copy constructor

  1657    * allocate a new buffer and copy the buffer contents even for readonly aliases.

  1658    * This function implements the old, more efficient but less safe behavior

  1659    * of making this string also a readonly alias to the same buffer.

  1660    * The fastCopyFrom function must be used only if it is known that the lifetime of

  1661    * this UnicodeString is at least as long as the lifetime of the aliased buffer

  1662    * including its contents, for example for strings from resource bundles

  1663    * or aliases to string contents.

  1664    *

  1665    * @param src The text containing the characters to replace.

  1666    * @return a reference to this

  1667    * @stable ICU 2.4

  1668    */

  1669   UnicodeString &fastCopyFrom(const UnicodeString &src);

  1671   /**

  1672    * Assignment operator.  Replace the characters in this UnicodeString

  1673    * with the code unit <TT>ch</TT>.

  1674    * @param ch the code unit to replace

  1675    * @return a reference to this

  1676    * @stable ICU 2.0

  1677    */

  1678   inline UnicodeString& operator= (UChar ch);

  1680   /**

  1681    * Assignment operator.  Replace the characters in this UnicodeString

  1682    * with the code point <TT>ch</TT>.

  1683    * @param ch the code point to replace

  1684    * @return a reference to this

  1685    * @stable ICU 2.0

  1686    */

  1687   inline UnicodeString& operator= (UChar32 ch);

  1689   /**

  1690    * Set the text in the UnicodeString object to the characters

  1691    * in <TT>srcText</TT> in the range

  1692    * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).

  1693    * <TT>srcText</TT> is not modified.

  1694    * @param srcText the source for the new characters

  1695    * @param srcStart the offset into <TT>srcText</TT> where new characters

  1696    * will be obtained

  1697    * @return a reference to this

  1698    * @stable ICU 2.2

  1699    */

  1700   inline UnicodeString& setTo(const UnicodeString& srcText,

  1701                int32_t srcStart);

  1703   /**

  1704    * Set the text in the UnicodeString object to the characters

  1705    * in <TT>srcText</TT> in the range

  1706    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

  1707    * <TT>srcText</TT> is not modified.

  1708    * @param srcText the source for the new characters

  1709    * @param srcStart the offset into <TT>srcText</TT> where new characters

  1710    * will be obtained

  1711    * @param srcLength the number of characters in <TT>srcText</TT> in the

  1712    * replace string.

  1713    * @return a reference to this

  1714    * @stable ICU 2.0

  1715    */

  1716   inline UnicodeString& setTo(const UnicodeString& srcText,

  1717                int32_t srcStart,

  1718                int32_t srcLength);

  1720   /**

  1721    * Set the text in the UnicodeString object to the characters in

  1722    * <TT>srcText</TT>.

  1723    * <TT>srcText</TT> is not modified.

  1724    * @param srcText the source for the new characters

  1725    * @return a reference to this

  1726    * @stable ICU 2.0

  1727    */

  1728   inline UnicodeString& setTo(const UnicodeString& srcText);

  1730   /**

  1731    * Set the characters in the UnicodeString object to the characters

  1732    * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.

  1733    * @param srcChars the source for the new characters

  1734    * @param srcLength the number of Unicode characters in srcChars.

  1735    * @return a reference to this

  1736    * @stable ICU 2.0

  1737    */

  1738   inline UnicodeString& setTo(const UChar *srcChars,

  1739                int32_t srcLength);

  1741   /**

  1742    * Set the characters in the UnicodeString object to the code unit

  1743    * <TT>srcChar</TT>.

  1744    * @param srcChar the code unit which becomes the UnicodeString's character

  1745    * content

  1746    * @return a reference to this

  1747    * @stable ICU 2.0

  1748    */

  1749   UnicodeString& setTo(UChar srcChar);

  1751   /**

  1752    * Set the characters in the UnicodeString object to the code point

  1753    * <TT>srcChar</TT>.

  1754    * @param srcChar the code point which becomes the UnicodeString's character

  1755    * content

  1756    * @return a reference to this

  1757    * @stable ICU 2.0

  1758    */

  1759   UnicodeString& setTo(UChar32 srcChar);

  1761   /**

  1762    * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.

  1763    * The text will be used for the UnicodeString object, but

  1764    * it will not be released when the UnicodeString is destroyed.

  1765    * This has copy-on-write semantics:

  1766    * When the string is modified, then the buffer is first copied into

  1767    * newly allocated memory.

  1768    * The aliased buffer is never modified.

  1769    * In an assignment to another UnicodeString, the text will be aliased again,

  1770    * so that both strings then alias the same readonly-text.

  1771    *

  1772    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.

  1773    *                     This must be true if <code>textLength==-1</code>.

  1774    * @param text The characters to alias for the UnicodeString.

  1775    * @param textLength The number of Unicode characters in <code>text</code> to alias.

  1776    *                   If -1, then this constructor will determine the length

  1777    *                   by calling <code>u_strlen()</code>.

  1778    * @return a reference to this

  1779    * @stable ICU 2.0

  1780    */

  1781   UnicodeString &setTo(UBool isTerminated,

  1782                        const UChar *text,

  1783                        int32_t textLength);

  1785   /**

  1786    * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.

  1787    * The text will be used for the UnicodeString object, but

  1788    * it will not be released when the UnicodeString is destroyed.

  1789    * This has write-through semantics:

  1790    * For as long as the capacity of the buffer is sufficient, write operations

  1791    * will directly affect the buffer. When more capacity is necessary, then

  1792    * a new buffer will be allocated and the contents copied as with regularly

  1793    * constructed strings.

  1794    * In an assignment to another UnicodeString, the buffer will be copied.

  1795    * The extract(UChar *dst) function detects whether the dst pointer is the same

  1796    * as the string buffer itself and will in this case not copy the contents.

  1797    *

  1798    * @param buffer The characters to alias for the UnicodeString.

  1799    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.

  1800    * @param buffCapacity The size of <code>buffer</code> in UChars.

  1801    * @return a reference to this

  1802    * @stable ICU 2.0

  1803    */

  1804   UnicodeString &setTo(UChar *buffer,

  1805                        int32_t buffLength,

  1806                        int32_t buffCapacity);

  1808   /**

  1809    * Make this UnicodeString object invalid.

  1810    * The string will test TRUE with isBogus().

  1811    *

  1812    * A bogus string has no value. It is different from an empty string.

  1813    * It can be used to indicate that no string value is available.

  1814    * getBuffer() and getTerminatedBuffer() return NULL, and

  1815    * length() returns 0.

  1816    *

  1817    * This utility function is used throughout the UnicodeString

  1818    * implementation to indicate that a UnicodeString operation failed,

  1819    * and may be used in other functions,

  1820    * especially but not exclusively when such functions do not

  1821    * take a UErrorCode for simplicity.

  1822    *

  1823    * The following methods, and no others, will clear a string object's bogus flag:

  1824    * - remove()

  1825    * - remove(0, INT32_MAX)

  1826    * - truncate(0)

  1827    * - operator=() (assignment operator)

  1828    * - setTo(...)

  1829    *

  1830    * The simplest ways to turn a bogus string into an empty one

  1831    * is to use the remove() function.

  1832    * Examples for other functions that are equivalent to "set to empty string":

  1833    * \code

  1834    * if(s.isBogus()) {

  1835    *   s.remove();           // set to an empty string (remove all), or

  1836    *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or

  1837    *   s.truncate(0);        // set to an empty string (complete truncation), or

  1838    *   s=UnicodeString();    // assign an empty string, or

  1839    *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or

  1840    *   static const UChar nul=0;

  1841    *   s.setTo(&nul, 0);     // set to an empty C Unicode string

  1842    * }

  1843    * \endcode

  1844    *

  1845    * @see isBogus()

  1846    * @stable ICU 2.0

  1847    */

  1848   void setToBogus();

  1850   /**

  1851    * Set the character at the specified offset to the specified character.

  1852    * @param offset A valid offset into the text of the character to set

  1853    * @param ch The new character

  1854    * @return A reference to this

  1855    * @stable ICU 2.0

  1856    */

  1857   UnicodeString& setCharAt(int32_t offset,

  1858                UChar ch);

  1861   /* Append operations */

  1863   /**

  1864    * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString

  1865    * object.

  1866    * @param ch the code unit to be appended

  1867    * @return a reference to this

  1868    * @stable ICU 2.0

  1869    */

  1870  inline  UnicodeString& operator+= (UChar ch);

  1872   /**

  1873    * Append operator. Append the code point <TT>ch</TT> to the UnicodeString

  1874    * object.

  1875    * @param ch the code point to be appended

  1876    * @return a reference to this

  1877    * @stable ICU 2.0

  1878    */

  1879  inline  UnicodeString& operator+= (UChar32 ch);

  1881   /**

  1882    * Append operator. Append the characters in <TT>srcText</TT> to the

  1883    * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> is

  1884    * not modified.

  1885    * @param srcText the source for the new characters

  1886    * @return a reference to this

  1887    * @stable ICU 2.0

  1888    */

  1889   inline UnicodeString& operator+= (const UnicodeString& srcText);

  1891   /**

  1892    * Append the characters

  1893    * in <TT>srcText</TT> in the range

  1894    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the

  1895    * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>

  1896    * is not modified.

  1897    * @param srcText the source for the new characters

  1898    * @param srcStart the offset into <TT>srcText</TT> where new characters

  1899    * will be obtained

  1900    * @param srcLength the number of characters in <TT>srcText</TT> in

  1901    * the append string

  1902    * @return a reference to this

  1903    * @stable ICU 2.0

  1904    */

  1905   inline UnicodeString& append(const UnicodeString& srcText,

  1906             int32_t srcStart,

  1907             int32_t srcLength);

  1909   /**

  1910    * Append the characters in <TT>srcText</TT> to the UnicodeString object at

  1911    * offset <TT>start</TT>. <TT>srcText</TT> is not modified.

  1912    * @param srcText the source for the new characters

  1913    * @return a reference to this

  1914    * @stable ICU 2.0

  1915    */

  1916   inline UnicodeString& append(const UnicodeString& srcText);

  1918   /**

  1919    * Append the characters in <TT>srcChars</TT> in the range

  1920    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString

  1921    * object at offset

  1922    * <TT>start</TT>. <TT>srcChars</TT> is not modified.

  1923    * @param srcChars the source for the new characters

  1924    * @param srcStart the offset into <TT>srcChars</TT> where new characters

  1925    * will be obtained

  1926    * @param srcLength the number of characters in <TT>srcChars</TT> in

  1927    * the append string

  1928    * @return a reference to this

  1929    * @stable ICU 2.0

  1930    */

  1931   inline UnicodeString& append(const UChar *srcChars,

  1932             int32_t srcStart,

  1933             int32_t srcLength);

  1935   /**

  1936    * Append the characters in <TT>srcChars</TT> to the UnicodeString object

  1937    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.

  1938    * @param srcChars the source for the new characters

  1939    * @param srcLength the number of Unicode characters in <TT>srcChars</TT>

  1940    * @return a reference to this

  1941    * @stable ICU 2.0

  1942    */

  1943   inline UnicodeString& append(const UChar *srcChars,

  1944             int32_t srcLength);

  1946   /**

  1947    * Append the code unit <TT>srcChar</TT> to the UnicodeString object.

  1948    * @param srcChar the code unit to append

  1949    * @return a reference to this

  1950    * @stable ICU 2.0

  1951    */

  1952   inline UnicodeString& append(UChar srcChar);

  1954   /**

  1955    * Append the code point <TT>srcChar</TT> to the UnicodeString object.

  1956    * @param srcChar the code point to append

  1957    * @return a reference to this

  1958    * @stable ICU 2.0

  1959    */

  1960   inline UnicodeString& append(UChar32 srcChar);

  1963   /* Insert operations */

  1965   /**

  1966    * Insert the characters in <TT>srcText</TT> in the range

  1967    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString

  1968    * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.

  1969    * @param start the offset where the insertion begins

  1970    * @param srcText the source for the new characters

  1971    * @param srcStart the offset into <TT>srcText</TT> where new characters

  1972    * will be obtained

  1973    * @param srcLength the number of characters in <TT>srcText</TT> in

  1974    * the insert string

  1975    * @return a reference to this

  1976    * @stable ICU 2.0

  1977    */

  1978   inline UnicodeString& insert(int32_t start,

  1979             const UnicodeString& srcText,

  1980             int32_t srcStart,

  1981             int32_t srcLength);

  1983   /**

  1984    * Insert the characters in <TT>srcText</TT> into the UnicodeString object

  1985    * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.

  1986    * @param start the offset where the insertion begins

  1987    * @param srcText the source for the new characters

  1988    * @return a reference to this

  1989    * @stable ICU 2.0

  1990    */

  1991   inline UnicodeString& insert(int32_t start,

  1992             const UnicodeString& srcText);

  1994   /**

  1995    * Insert the characters in <TT>srcChars</TT> in the range

  1996    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString

  1997    *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.

  1998    * @param start the offset at which the insertion begins

  1999    * @param srcChars the source for the new characters

  2000    * @param srcStart the offset into <TT>srcChars</TT> where new characters

  2001    * will be obtained

  2002    * @param srcLength the number of characters in <TT>srcChars</TT>

  2003    * in the insert string

  2004    * @return a reference to this

  2005    * @stable ICU 2.0

  2006    */

  2007   inline UnicodeString& insert(int32_t start,

  2008             const UChar *srcChars,

  2009             int32_t srcStart,

  2010             int32_t srcLength);

  2012   /**

  2013    * Insert the characters in <TT>srcChars</TT> into the UnicodeString object

  2014    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.

  2015    * @param start the offset where the insertion begins

  2016    * @param srcChars the source for the new characters

  2017    * @param srcLength the number of Unicode characters in srcChars.

  2018    * @return a reference to this

  2019    * @stable ICU 2.0

  2020    */

  2021   inline UnicodeString& insert(int32_t start,

  2022             const UChar *srcChars,

  2023             int32_t srcLength);

  2025   /**

  2026    * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at

  2027    * offset <TT>start</TT>.

  2028    * @param start the offset at which the insertion occurs

  2029    * @param srcChar the code unit to insert

  2030    * @return a reference to this

  2031    * @stable ICU 2.0

  2032    */

  2033   inline UnicodeString& insert(int32_t start,

  2034             UChar srcChar);

  2036   /**

  2037    * Insert the code point <TT>srcChar</TT> into the UnicodeString object at

  2038    * offset <TT>start</TT>.

  2039    * @param start the offset at which the insertion occurs

  2040    * @param srcChar the code point to insert

  2041    * @return a reference to this

  2042    * @stable ICU 2.0

  2043    */

  2044   inline UnicodeString& insert(int32_t start,

  2045             UChar32 srcChar);

  2048   /* Replace operations */

  2050   /**

  2051    * Replace the characters in the range

  2052    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in

  2053    * <TT>srcText</TT> in the range

  2054    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

  2055    * <TT>srcText</TT> is not modified.

  2056    * @param start the offset at which the replace operation begins

  2057    * @param length the number of characters to replace. The character at

  2058    * <TT>start + length</TT> is not modified.

  2059    * @param srcText the source for the new characters

  2060    * @param srcStart the offset into <TT>srcText</TT> where new characters

  2061    * will be obtained

  2062    * @param srcLength the number of characters in <TT>srcText</TT> in

  2063    * the replace string

  2064    * @return a reference to this

  2065    * @stable ICU 2.0

  2066    */

  2067   UnicodeString& replace(int32_t start,

  2068              int32_t length,

  2069              const UnicodeString& srcText,

  2070              int32_t srcStart,

  2071              int32_t srcLength);

  2073   /**

  2074    * Replace the characters in the range

  2075    * [<TT>start</TT>, <TT>start + length</TT>)

  2076    * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is

  2077    *  not modified.

  2078    * @param start the offset at which the replace operation begins

  2079    * @param length the number of characters to replace. The character at

  2080    * <TT>start + length</TT> is not modified.

  2081    * @param srcText the source for the new characters

  2082    * @return a reference to this

  2083    * @stable ICU 2.0

  2084    */

  2085   UnicodeString& replace(int32_t start,

  2086              int32_t length,

  2087              const UnicodeString& srcText);

  2089   /**

  2090    * Replace the characters in the range

  2091    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in

  2092    * <TT>srcChars</TT> in the range

  2093    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>

  2094    * is not modified.

  2095    * @param start the offset at which the replace operation begins

  2096    * @param length the number of characters to replace.  The character at

  2097    * <TT>start + length</TT> is not modified.

  2098    * @param srcChars the source for the new characters

  2099    * @param srcStart the offset into <TT>srcChars</TT> where new characters

  2100    * will be obtained

  2101    * @param srcLength the number of characters in <TT>srcChars</TT>

  2102    * in the replace string

  2103    * @return a reference to this

  2104    * @stable ICU 2.0

  2105    */

  2106   UnicodeString& replace(int32_t start,

  2107              int32_t length,

  2108              const UChar *srcChars,

  2109              int32_t srcStart,

  2110              int32_t srcLength);

  2112   /**

  2113    * Replace the characters in the range

  2114    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in

  2115    * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.

  2116    * @param start the offset at which the replace operation begins

  2117    * @param length number of characters to replace.  The character at

  2118    * <TT>start + length</TT> is not modified.

  2119    * @param srcChars the source for the new characters

  2120    * @param srcLength the number of Unicode characters in srcChars

  2121    * @return a reference to this

  2122    * @stable ICU 2.0

  2123    */

  2124   inline UnicodeString& replace(int32_t start,

  2125              int32_t length,

  2126              const UChar *srcChars,

  2127              int32_t srcLength);

  2129   /**

  2130    * Replace the characters in the range

  2131    * [<TT>start</TT>, <TT>start + length</TT>) with the code unit

  2132    * <TT>srcChar</TT>.

  2133    * @param start the offset at which the replace operation begins

  2134    * @param length the number of characters to replace.  The character at

  2135    * <TT>start + length</TT> is not modified.

  2136    * @param srcChar the new code unit

  2137    * @return a reference to this

  2138    * @stable ICU 2.0

  2139    */

  2140   inline UnicodeString& replace(int32_t start,

  2141              int32_t length,

  2142              UChar srcChar);

  2144   /**

  2145    * Replace the characters in the range

  2146    * [<TT>start</TT>, <TT>start + length</TT>) with the code point

  2147    * <TT>srcChar</TT>.

  2148    * @param start the offset at which the replace operation begins

  2149    * @param length the number of characters to replace.  The character at

  2150    * <TT>start + length</TT> is not modified.

  2151    * @param srcChar the new code point

  2152    * @return a reference to this

  2153    * @stable ICU 2.0

  2154    */

  2155   inline UnicodeString& replace(int32_t start,

  2156              int32_t length,

  2157              UChar32 srcChar);

  2159   /**

  2160    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)

  2161    * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.

  2162    * @param start the offset at which the replace operation begins

  2163    * @param limit the offset immediately following the replace range

  2164    * @param srcText the source for the new characters

  2165    * @return a reference to this

  2166    * @stable ICU 2.0

  2167    */

  2168   inline UnicodeString& replaceBetween(int32_t start,

  2169                 int32_t limit,

  2170                 const UnicodeString& srcText);

  2172   /**

  2173    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)

  2174    * with the characters in <TT>srcText</TT> in the range

  2175    * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.

  2176    * @param start the offset at which the replace operation begins

  2177    * @param limit the offset immediately following the replace range

  2178    * @param srcText the source for the new characters

  2179    * @param srcStart the offset into <TT>srcChars</TT> where new characters

  2180    * will be obtained

  2181    * @param srcLimit the offset immediately following the range to copy

  2182    * in <TT>srcText</TT>

  2183    * @return a reference to this

  2184    * @stable ICU 2.0

  2185    */

  2186   inline UnicodeString& replaceBetween(int32_t start,

  2187                 int32_t limit,

  2188                 const UnicodeString& srcText,

  2189                 int32_t srcStart,

  2190                 int32_t srcLimit);

  2192   /**

  2193    * Replace a substring of this object with the given text.

  2194    * @param start the beginning index, inclusive; <code>0 <= start

  2195    * <= limit</code>.

  2196    * @param limit the ending index, exclusive; <code>start <= limit

  2197    * <= length()</code>.

  2198    * @param text the text to replace characters <code>start</code>

  2199    * to <code>limit - 1</code>

  2200    * @stable ICU 2.0

  2201    */

  2202   virtual void handleReplaceBetween(int32_t start,

  2203                                     int32_t limit,

  2204                                     const UnicodeString& text);

  2206   /**

  2207    * Replaceable API

  2208    * @return TRUE if it has MetaData

  2209    * @stable ICU 2.4

  2210    */

  2211   virtual UBool hasMetaData() const;

  2213   /**

  2214    * Copy a substring of this object, retaining attribute (out-of-band)

  2215    * information.  This method is used to duplicate or reorder substrings.

  2216    * The destination index must not overlap the source range.

  2217    *

  2218    * @param start the beginning index, inclusive; <code>0 <= start <=

  2219    * limit</code>.

  2220    * @param limit the ending index, exclusive; <code>start <= limit <=

  2221    * length()</code>.

  2222    * @param dest the destination index.  The characters from

  2223    * <code>start..limit-1</code> will be copied to <code>dest</code>.

  2224    * Implementations of this method may assume that <code>dest <= start ||

  2225    * dest >= limit</code>.

  2226    * @stable ICU 2.0

  2227    */

  2228   virtual void copy(int32_t start, int32_t limit, int32_t dest);

  2230   /* Search and replace operations */

  2232   /**

  2233    * Replace all occurrences of characters in oldText with the characters

  2234    * in newText

  2235    * @param oldText the text containing the search text

  2236    * @param newText the text containing the replacement text

  2237    * @return a reference to this

  2238    * @stable ICU 2.0

  2239    */

  2240   inline UnicodeString& findAndReplace(const UnicodeString& oldText,

  2241                 const UnicodeString& newText);

  2243   /**

  2244    * Replace all occurrences of characters in oldText with characters

  2245    * in newText

  2246    * in the range [<TT>start</TT>, <TT>start + length</TT>).

  2247    * @param start the start of the range in which replace will performed

  2248    * @param length the length of the range in which replace will be performed

  2249    * @param oldText the text containing the search text

  2250    * @param newText the text containing the replacement text

  2251    * @return a reference to this

  2252    * @stable ICU 2.0

  2253    */

  2254   inline UnicodeString& findAndReplace(int32_t start,

  2255                 int32_t length,

  2256                 const UnicodeString& oldText,

  2257                 const UnicodeString& newText);

  2259   /**

  2260    * Replace all occurrences of characters in oldText in the range

  2261    * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters

  2262    * in newText in the range

  2263    * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)

  2264    * in the range [<TT>start</TT>, <TT>start + length</TT>).

  2265    * @param start the start of the range in which replace will performed

  2266    * @param length the length of the range in which replace will be performed

  2267    * @param oldText the text containing the search text

  2268    * @param oldStart the start of the search range in <TT>oldText</TT>

  2269    * @param oldLength the length of the search range in <TT>oldText</TT>

  2270    * @param newText the text containing the replacement text

  2271    * @param newStart the start of the replacement range in <TT>newText</TT>

  2272    * @param newLength the length of the replacement range in <TT>newText</TT>

  2273    * @return a reference to this

  2274    * @stable ICU 2.0

  2275    */

  2276   UnicodeString& findAndReplace(int32_t start,

  2277                 int32_t length,

  2278                 const UnicodeString& oldText,

  2279                 int32_t oldStart,

  2280                 int32_t oldLength,

  2281                 const UnicodeString& newText,

  2282                 int32_t newStart,

  2283                 int32_t newLength);

  2286   /* Remove operations */

  2288   /**

  2289    * Remove all characters from the UnicodeString object.

  2290    * @return a reference to this

  2291    * @stable ICU 2.0

  2292    */

  2293   inline UnicodeString& remove(void);

  2295   /**

  2296    * Remove the characters in the range

  2297    * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.

  2298    * @param start the offset of the first character to remove

  2299    * @param length the number of characters to remove

  2300    * @return a reference to this

  2301    * @stable ICU 2.0

  2302    */

  2303   inline UnicodeString& remove(int32_t start,

  2304                                int32_t length = (int32_t)INT32_MAX);

  2306   /**

  2307    * Remove the characters in the range

  2308    * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.

  2309    * @param start the offset of the first character to remove

  2310    * @param limit the offset immediately following the range to remove

  2311    * @return a reference to this

  2312    * @stable ICU 2.0

  2313    */

  2314   inline UnicodeString& removeBetween(int32_t start,

  2315                                       int32_t limit = (int32_t)INT32_MAX);

  2318   /* Length operations */

  2320   /**

  2321    * Pad the start of this UnicodeString with the character <TT>padChar</TT>.

  2322    * If the length of this UnicodeString is less than targetLength,

  2323    * length() - targetLength copies of padChar will be added to the

  2324    * beginning of this UnicodeString.

  2325    * @param targetLength the desired length of the string

  2326    * @param padChar the character to use for padding. Defaults to

  2327    * space (U+0020)

  2328    * @return TRUE if the text was padded, FALSE otherwise.

  2329    * @stable ICU 2.0

  2330    */

  2331   UBool padLeading(int32_t targetLength,

  2332                     UChar padChar = 0x0020);

  2334   /**

  2335    * Pad the end of this UnicodeString with the character <TT>padChar</TT>.

  2336    * If the length of this UnicodeString is less than targetLength,

  2337    * length() - targetLength copies of padChar will be added to the

  2338    * end of this UnicodeString.

  2339    * @param targetLength the desired length of the string

  2340    * @param padChar the character to use for padding. Defaults to

  2341    * space (U+0020)

  2342    * @return TRUE if the text was padded, FALSE otherwise.

  2343    * @stable ICU 2.0

  2344    */

  2345   UBool padTrailing(int32_t targetLength,

  2346                      UChar padChar = 0x0020);

  2348   /**

  2349    * Truncate this UnicodeString to the <TT>targetLength</TT>.

  2350    * @param targetLength the desired length of this UnicodeString.

  2351    * @return TRUE if the text was truncated, FALSE otherwise

  2352    * @stable ICU 2.0

  2353    */

  2354   inline UBool truncate(int32_t targetLength);

  2356   /**

  2357    * Trims leading and trailing whitespace from this UnicodeString.

  2358    * @return a reference to this

  2359    * @stable ICU 2.0

  2360    */

  2361   UnicodeString& trim(void);

  2364   /* Miscellaneous operations */

  2366   /**

  2367    * Reverse this UnicodeString in place.

  2368    * @return a reference to this

  2369    * @stable ICU 2.0

  2370    */

  2371   inline UnicodeString& reverse(void);

  2373   /**

  2374    * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in

  2375    * this UnicodeString.

  2376    * @param start the start of the range to reverse

  2377    * @param length the number of characters to to reverse

  2378    * @return a reference to this

  2379    * @stable ICU 2.0

  2380    */

  2381   inline UnicodeString& reverse(int32_t start,

  2382              int32_t length);

  2384   /**

  2385    * Convert the characters in this to UPPER CASE following the conventions of

  2386    * the default locale.

  2387    * @return A reference to this.

  2388    * @stable ICU 2.0

  2389    */

  2390   UnicodeString& toUpper(void);

  2392   /**

  2393    * Convert the characters in this to UPPER CASE following the conventions of

  2394    * a specific locale.

  2395    * @param locale The locale containing the conventions to use.

  2396    * @return A reference to this.

  2397    * @stable ICU 2.0

  2398    */

  2399   UnicodeString& toUpper(const Locale& locale);

  2401   /**

  2402    * Convert the characters in this to lower case following the conventions of

  2403    * the default locale.

  2404    * @return A reference to this.

  2405    * @stable ICU 2.0

  2406    */

  2407   UnicodeString& toLower(void);

  2409   /**

  2410    * Convert the characters in this to lower case following the conventions of

  2411    * a specific locale.

  2412    * @param locale The locale containing the conventions to use.

  2413    * @return A reference to this.

  2414    * @stable ICU 2.0

  2415    */

  2416   UnicodeString& toLower(const Locale& locale);

  2418 #if !UCONFIG_NO_BREAK_ITERATION

  2420   /**

  2421    * Titlecase this string, convenience function using the default locale.

  2422    *

  2423    * Casing is locale-dependent and context-sensitive.

  2424    * Titlecasing uses a break iterator to find the first characters of words

  2425    * that are to be titlecased. It titlecases those characters and lowercases

  2426    * all others.

  2427    *

  2428    * The titlecase break iterator can be provided to customize for arbitrary

  2429    * styles, using rules and dictionaries beyond the standard iterators.

  2430    * It may be more efficient to always provide an iterator to avoid

  2431    * opening and closing one for each string.

  2432    * The standard titlecase iterator for the root locale implements the

  2433    * algorithm of Unicode TR 21.

  2434    *

  2435    * This function uses only the first() and next() methods of the

  2436    * provided break iterator.

  2437    *

  2438    * @param titleIter A break iterator to find the first characters of words

  2439    *                  that are to be titlecased.

  2440    *                  If none is provided (0), then a standard titlecase

  2441    *                  break iterator is opened.

  2442    *                  Otherwise the provided iterator is set to the string's text.

  2443    * @return A reference to this.

  2444    * @stable ICU 2.1

  2445    */

  2446   UnicodeString &toTitle(BreakIterator *titleIter);

  2448   /**

  2449    * Titlecase this string.

  2450    *

  2451    * Casing is locale-dependent and context-sensitive.

  2452    * Titlecasing uses a break iterator to find the first characters of words

  2453    * that are to be titlecased. It titlecases those characters and lowercases

  2454    * all others.

  2455    *

  2456    * The titlecase break iterator can be provided to customize for arbitrary

  2457    * styles, using rules and dictionaries beyond the standard iterators.

  2458    * It may be more efficient to always provide an iterator to avoid

  2459    * opening and closing one for each string.

  2460    * The standard titlecase iterator for the root locale implements the

  2461    * algorithm of Unicode TR 21.

  2462    *

  2463    * This function uses only the first() and next() methods of the

  2464    * provided break iterator.

  2465    *

  2466    * @param titleIter A break iterator to find the first characters of words

  2467    *                  that are to be titlecased.

  2468    *                  If none is provided (0), then a standard titlecase

  2469    *                  break iterator is opened.

  2470    *                  Otherwise the provided iterator is set to the string's text.

  2471    * @param locale    The locale to consider.

  2472    * @return A reference to this.

  2473    * @stable ICU 2.1

  2474    */

  2475   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);

  2477 #endif

  2479   /**

  2480    * Case-fold the characters in this string.

  2481    * Case-folding is locale-independent and not context-sensitive,

  2482    * but there is an option for whether to include or exclude mappings for dotted I

  2483    * and dotless i that are marked with 'I' in CaseFolding.txt.

  2484    * The result may be longer or shorter than the original.

  2485    *

  2486    * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I

  2487    * @return A reference to this.

  2488    * @stable ICU 2.0

  2489    */

  2490   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);

  2492   //========================================

  2493   // Access to the internal buffer

  2494   //========================================

  2496   /**

  2497    * Get a read/write pointer to the internal buffer.

  2498    * The buffer is guaranteed to be large enough for at least minCapacity UChars,

  2499    * writable, and is still owned by the UnicodeString object.

  2500    * Calls to getBuffer(minCapacity) must not be nested, and

  2501    * must be matched with calls to releaseBuffer(newLength).

  2502    * If the string buffer was read-only or shared,

  2503    * then it will be reallocated and copied.

  2504    *

  2505    * An attempted nested call will return 0, and will not further modify the

  2506    * state of the UnicodeString object.

  2507    * It also returns 0 if the string is bogus.

  2508    *

  2509    * The actual capacity of the string buffer may be larger than minCapacity.

  2510    * getCapacity() returns the actual capacity.

  2511    * For many operations, the full capacity should be used to avoid reallocations.

  2512    *

  2513    * While the buffer is "open" between getBuffer(minCapacity)

  2514    * and releaseBuffer(newLength), the following applies:

  2515    * - The string length is set to 0.

  2516    * - Any read API call on the UnicodeString object will behave like on a 0-length string.

  2517    * - Any write API call on the UnicodeString object is disallowed and will have no effect.

  2518    * - You can read from and write to the returned buffer.

  2519    * - The previous string contents will still be in the buffer;

  2520    *   if you want to use it, then you need to call length() before getBuffer(minCapacity).

  2521    *   If the length() was greater than minCapacity, then any contents after minCapacity

  2522    *   may be lost.

  2523    *   The buffer contents is not NUL-terminated by getBuffer().

  2524    *   If length()<getCapacity() then you can terminate it by writing a NUL

  2525    *   at index length().

  2526    * - You must call releaseBuffer(newLength) before and in order to

  2527    *   return to normal UnicodeString operation.

  2528    *

  2529    * @param minCapacity the minimum number of UChars that are to be available

  2530    *        in the buffer, starting at the returned pointer;

  2531    *        default to the current string capacity if minCapacity==-1

  2532    * @return a writable pointer to the internal string buffer,

  2533    *         or 0 if an error occurs (nested calls, out of memory)

  2534    *

  2535    * @see releaseBuffer

  2536    * @see getTerminatedBuffer()

  2537    * @stable ICU 2.0

  2538    */

  2539   UChar *getBuffer(int32_t minCapacity);

  2541   /**

  2542    * Release a read/write buffer on a UnicodeString object with an

  2543    * "open" getBuffer(minCapacity).

  2544    * This function must be called in a matched pair with getBuffer(minCapacity).

  2545    * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".

  2546    *

  2547    * It will set the string length to newLength, at most to the current capacity.

  2548    * If newLength==-1 then it will set the length according to the

  2549    * first NUL in the buffer, or to the capacity if there is no NUL.

  2550    *

  2551    * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.

  2552    *

  2553    * @param newLength the new length of the UnicodeString object;

  2554    *        defaults to the current capacity if newLength is greater than that;

  2555    *        if newLength==-1, it defaults to u_strlen(buffer) but not more than

  2556    *        the current capacity of the string

  2557    *

  2558    * @see getBuffer(int32_t minCapacity)

  2559    * @stable ICU 2.0

  2560    */

  2561   void releaseBuffer(int32_t newLength=-1);

  2563   /**

  2564    * Get a read-only pointer to the internal buffer.

  2565    * This can be called at any time on a valid UnicodeString.

  2566    *

  2567    * It returns 0 if the string is bogus, or

  2568    * during an "open" getBuffer(minCapacity).

  2569    *

  2570    * It can be called as many times as desired.

  2571    * The pointer that it returns will remain valid until the UnicodeString object is modified,

  2572    * at which time the pointer is semantically invalidated and must not be used any more.

  2573    *

  2574    * The capacity of the buffer can be determined with getCapacity().

  2575    * The part after length() may or may not be initialized and valid,

  2576    * depending on the history of the UnicodeString object.

  2577    *

  2578    * The buffer contents is (probably) not NUL-terminated.

  2579    * You can check if it is with

  2580    * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.

  2581    * (See getTerminatedBuffer().)

  2582    *

  2583    * The buffer may reside in read-only memory. Its contents must not

  2584    * be modified.

  2585    *

  2586    * @return a read-only pointer to the internal string buffer,

  2587    *         or 0 if the string is empty or bogus

  2588    *

  2589    * @see getBuffer(int32_t minCapacity)

  2590    * @see getTerminatedBuffer()

  2591    * @stable ICU 2.0

  2592    */

  2593   inline const UChar *getBuffer() const;

  2595   /**

  2596    * Get a read-only pointer to the internal buffer,

  2597    * making sure that it is NUL-terminated.

  2598    * This can be called at any time on a valid UnicodeString.

  2599    *

  2600    * It returns 0 if the string is bogus, or

  2601    * during an "open" getBuffer(minCapacity), or if the buffer cannot

  2602    * be NUL-terminated (because memory allocation failed).

  2603    *

  2604    * It can be called as many times as desired.

  2605    * The pointer that it returns will remain valid until the UnicodeString object is modified,

  2606    * at which time the pointer is semantically invalidated and must not be used any more.

  2607    *

  2608    * The capacity of the buffer can be determined with getCapacity().

  2609    * The part after length()+1 may or may not be initialized and valid,

  2610    * depending on the history of the UnicodeString object.

  2611    *

  2612    * The buffer contents is guaranteed to be NUL-terminated.

  2613    * getTerminatedBuffer() may reallocate the buffer if a terminating NUL

  2614    * is written.

  2615    * For this reason, this function is not const, unlike getBuffer().

  2616    * Note that a UnicodeString may also contain NUL characters as part of its contents.

  2617    *

  2618    * The buffer may reside in read-only memory. Its contents must not

  2619    * be modified.

  2620    *

  2621    * @return a read-only pointer to the internal string buffer,

  2622    *         or 0 if the string is empty or bogus

  2623    *

  2624    * @see getBuffer(int32_t minCapacity)

  2625    * @see getBuffer()

  2626    * @stable ICU 2.2

  2627    */

  2628   inline const UChar *getTerminatedBuffer();

  2630   //========================================

  2631   // Constructors

  2632   //========================================

  2634   /** Construct an empty UnicodeString.

  2635    * @stable ICU 2.0

  2636    */

  2637   UnicodeString();

  2639   /**

  2640    * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars

  2641    * @param capacity the number of UChars this UnicodeString should hold

  2642    * before a resize is necessary; if count is greater than 0 and count

  2643    * code points c take up more space than capacity, then capacity is adjusted

  2644    * accordingly.

  2645    * @param c is used to initially fill the string

  2646    * @param count specifies how many code points c are to be written in the

  2647    *              string

  2648    * @stable ICU 2.0

  2649    */

  2650   UnicodeString(int32_t capacity, UChar32 c, int32_t count);

  2652   /**

  2653    * Single UChar (code unit) constructor.

  2654    * @param ch the character to place in the UnicodeString

  2655    * @stable ICU 2.0

  2656    */

  2657   UnicodeString(UChar ch);

  2659   /**

  2660    * Single UChar32 (code point) constructor.

  2661    * @param ch the character to place in the UnicodeString

  2662    * @stable ICU 2.0

  2663    */

  2664   UnicodeString(UChar32 ch);

  2666   /**

  2667    * UChar* constructor.

  2668    * @param text The characters to place in the UnicodeString.  <TT>text</TT>

  2669    * must be NULL (U+0000) terminated.

  2670    * @stable ICU 2.0

  2671    */

  2672   UnicodeString(const UChar *text);

  2674   /**

  2675    * UChar* constructor.

  2676    * @param text The characters to place in the UnicodeString.

  2677    * @param textLength The number of Unicode characters in <TT>text</TT>

  2678    * to copy.

  2679    * @stable ICU 2.0

  2680    */

  2681   UnicodeString(const UChar *text,

  2682         int32_t textLength);

  2684   /**

  2685    * Readonly-aliasing UChar* constructor.

  2686    * The text will be used for the UnicodeString object, but

  2687    * it will not be released when the UnicodeString is destroyed.

  2688    * This has copy-on-write semantics:

  2689    * When the string is modified, then the buffer is first copied into

  2690    * newly allocated memory.

  2691    * The aliased buffer is never modified.

  2692    * In an assignment to another UnicodeString, the text will be aliased again,

  2693    * so that both strings then alias the same readonly-text.

  2694    *

  2695    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.

  2696    *                     This must be true if <code>textLength==-1</code>.

  2697    * @param text The characters to alias for the UnicodeString.

  2698    * @param textLength The number of Unicode characters in <code>text</code> to alias.

  2699    *                   If -1, then this constructor will determine the length

  2700    *                   by calling <code>u_strlen()</code>.

  2701    * @stable ICU 2.0

  2702    */

  2703   UnicodeString(UBool isTerminated,

  2704                 const UChar *text,

  2705                 int32_t textLength);

  2707   /**

  2708    * Writable-aliasing UChar* constructor.

  2709    * The text will be used for the UnicodeString object, but

  2710    * it will not be released when the UnicodeString is destroyed.

  2711    * This has write-through semantics:

  2712    * For as long as the capacity of the buffer is sufficient, write operations

  2713    * will directly affect the buffer. When more capacity is necessary, then

  2714    * a new buffer will be allocated and the contents copied as with regularly

  2715    * constructed strings.

  2716    * In an assignment to another UnicodeString, the buffer will be copied.

  2717    * The extract(UChar *dst) function detects whether the dst pointer is the same

  2718    * as the string buffer itself and will in this case not copy the contents.

  2719    *

  2720    * @param buffer The characters to alias for the UnicodeString.

  2721    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.

  2722    * @param buffCapacity The size of <code>buffer</code> in UChars.

  2723    * @stable ICU 2.0

  2724    */

  2725   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);

  2727 #if !UCONFIG_NO_CONVERSION

  2729   /**

  2730    * char* constructor.

  2731    * @param codepageData an array of bytes, null-terminated

  2732    * @param codepage the encoding of <TT>codepageData</TT>.  The special

  2733    * value 0 for <TT>codepage</TT> indicates that the text is in the

  2734    * platform's default codepage.

  2735    *

  2736    * If <code>codepage</code> is an empty string (<code>""</code>),

  2737    * then a simple conversion is performed on the codepage-invariant

  2738    * subset ("invariant characters") of the platform encoding. See utypes.h.

  2739    * Recommendation: For invariant-character strings use the constructor

  2740    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)

  2741    * because it avoids object code dependencies of UnicodeString on

  2742    * the conversion code.

  2743    *

  2744    * @stable ICU 2.0

  2745    */

  2746   UnicodeString(const char *codepageData,

  2747         const char *codepage = 0);

  2749   /**

  2750    * char* constructor.

  2751    * @param codepageData an array of bytes.

  2752    * @param dataLength The number of bytes in <TT>codepageData</TT>.

  2753    * @param codepage the encoding of <TT>codepageData</TT>.  The special

  2754    * value 0 for <TT>codepage</TT> indicates that the text is in the

  2755    * platform's default codepage.

  2756    * If <code>codepage</code> is an empty string (<code>""</code>),

  2757    * then a simple conversion is performed on the codepage-invariant

  2758    * subset ("invariant characters") of the platform encoding. See utypes.h.

  2759    * Recommendation: For invariant-character strings use the constructor

  2760    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)

  2761    * because it avoids object code dependencies of UnicodeString on

  2762    * the conversion code.

  2763    *

  2764    * @stable ICU 2.0

  2765    */

  2766   UnicodeString(const char *codepageData,

  2767         int32_t dataLength,

  2768         const char *codepage = 0);

  2770   /**

  2771    * char * / UConverter constructor.

  2772    * This constructor uses an existing UConverter object to

  2773    * convert the codepage string to Unicode and construct a UnicodeString

  2774    * from that.

  2775    *

  2776    * The converter is reset at first.

  2777    * If the error code indicates a failure before this constructor is called,

  2778    * or if an error occurs during conversion or construction,

  2779    * then the string will be bogus.

  2780    *

  2781    * This function avoids the overhead of opening and closing a converter if

  2782    * multiple strings are constructed.

  2783    *

  2784    * @param src input codepage string

  2785    * @param srcLength length of the input string, can be -1 for NUL-terminated strings

  2786    * @param cnv converter object (ucnv_resetToUnicode() will be called),

  2787    *        can be NULL for the default converter

  2788    * @param errorCode normal ICU error code

  2789    * @stable ICU 2.0

  2790    */

  2791   UnicodeString(

  2792         const char *src, int32_t srcLength,

  2793         UConverter *cnv,

  2794         UErrorCode &errorCode);

  2796 #endif

  2798   /**

  2799    * Constructs a Unicode string from an invariant-character char * string.

  2800    * About invariant characters see utypes.h.

  2801    * This constructor has no runtime dependency on conversion code and is

  2802    * therefore recommended over ones taking a charset name string

  2803    * (where the empty string "" indicates invariant-character conversion).

  2804    *

  2805    * Use the macro US_INV as the third, signature-distinguishing parameter.

  2806    *

  2807    * For example:

  2808    * \code

  2809    * void fn(const char *s) {

  2810    *   UnicodeString ustr(s, -1, US_INV);

  2811    *   // use ustr ...

  2812    * }

  2813    * \endcode

  2814    *

  2815    * @param src String using only invariant characters.

  2816    * @param length Length of src, or -1 if NUL-terminated.

  2817    * @param inv Signature-distinguishing paramater, use US_INV.

  2818    *

  2819    * @see US_INV

  2820    * @draft ICU 3.2

  2821    */

  2822   UnicodeString(const char *src, int32_t length, enum EInvariant inv);

  2825   /**

  2826    * Copy constructor.

  2827    * @param that The UnicodeString object to copy.

  2828    * @stable ICU 2.0

  2829    */

  2830   UnicodeString(const UnicodeString& that);

  2832   /**

  2833    * 'Substring' constructor from tail of source string.

  2834    * @param src The UnicodeString object to copy.

  2835    * @param srcStart The offset into <tt>src</tt> at which to start copying.

  2836    * @stable ICU 2.2

  2837    */

  2838   UnicodeString(const UnicodeString& src, int32_t srcStart);

  2840   /**

  2841    * 'Substring' constructor from subrange of source string.

  2842    * @param src The UnicodeString object to copy.

  2843    * @param srcStart The offset into <tt>src</tt> at which to start copying.

  2844    * @param srcLength The number of characters from <tt>src</tt> to copy.

  2845    * @stable ICU 2.2

  2846    */

  2847   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);

  2849   /**

  2850    * Clone this object, an instance of a subclass of Replaceable.

  2851    * Clones can be used concurrently in multiple threads.

  2852    * If a subclass does not implement clone(), or if an error occurs,

  2853    * then NULL is returned.

  2854    * The clone functions in all subclasses return a pointer to a Replaceable

  2855    * because some compilers do not support covariant (same-as-this)

  2856    * return types; cast to the appropriate subclass if necessary.

  2857    * The caller must delete the clone.

  2858    *

  2859    * @return a clone of this object

  2860    *

  2861    * @see Replaceable::clone

  2862    * @see getDynamicClassID

  2863    * @stable ICU 2.6

  2864    */

  2865   virtual Replaceable *clone() const;

  2867   /** Destructor.

  2868    * @stable ICU 2.0

  2869    */

  2870   virtual ~UnicodeString();

  2873   /* Miscellaneous operations */

  2875   /**

  2876    * Unescape a string of characters and return a string containing

  2877    * the result.  The following escape sequences are recognized:

  2878    *

  2879    * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]

  2880    * \\Uhhhhhhhh   8 hex digits

  2881    * \\xhh         1-2 hex digits

  2882    * \\ooo         1-3 octal digits; o in [0-7]

  2883    * \\cX          control-X; X is masked with 0x1F

  2884    *

  2885    * as well as the standard ANSI C escapes:

  2886    *

  2887    * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,

  2888    * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,

  2889    * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C

  2890    *

  2891    * Anything else following a backslash is generically escaped.  For

  2892    * example, "[a\\-z]" returns "[a-z]".

  2893    *

  2894    * If an escape sequence is ill-formed, this method returns an empty

  2895    * string.  An example of an ill-formed sequence is "\\u" followed by

  2896    * fewer than 4 hex digits.

  2897    *

  2898    * This function is similar to u_unescape() but not identical to it.

  2899    * The latter takes a source char*, so it does escape recognition

  2900    * and also invariant conversion.

  2901    *

  2902    * @return a string with backslash escapes interpreted, or an

  2903    * empty string on error.

  2904    * @see UnicodeString#unescapeAt()

  2905    * @see u_unescape()

  2906    * @see u_unescapeAt()

  2907    * @stable ICU 2.0

  2908    */

  2909   UnicodeString unescape() const;

  2911   /**

  2912    * Unescape a single escape sequence and return the represented

  2913    * character.  See unescape() for a listing of the recognized escape

  2914    * sequences.  The character at offset-1 is assumed (without

  2915    * checking) to be a backslash.  If the escape sequence is

  2916    * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is

  2917    * returned.

  2918    *

  2919    * @param offset an input output parameter.  On input, it is the

  2920    * offset into this string where the escape sequence is located,

  2921    * after the initial backslash.  On output, it is advanced after the

  2922    * last character parsed.  On error, it is not advanced at all.

  2923    * @return the character represented by the escape sequence at

  2924    * offset, or (UChar32)0xFFFFFFFF on error.

  2925    * @see UnicodeString#unescape()

  2926    * @see u_unescape()

  2927    * @see u_unescapeAt()

  2928    * @stable ICU 2.0

  2929    */

  2930   UChar32 unescapeAt(int32_t &offset) const;

  2932   /**

  2933    * ICU "poor man's RTTI", returns a UClassID for this class.

  2934    *

  2935    * @stable ICU 2.2

  2936    */

  2937   static UClassID U_EXPORT2 getStaticClassID();

  2939   /**

  2940    * ICU "poor man's RTTI", returns a UClassID for the actual class.

  2941    *

  2942    * @stable ICU 2.2

  2943    */

  2944   virtual UClassID getDynamicClassID() const;

  2946   //========================================

  2947   // Implementation methods

  2948   //========================================

  2950 protected:

  2951   /**

  2952    * Implement Replaceable::getLength() (see jitterbug 1027).

  2953    * @stable ICU 2.4

  2954    */

  2955   virtual int32_t getLength() const;

  2957   /**

  2958    * The change in Replaceable to use virtual getCharAt() allows

  2959    * UnicodeString::charAt() to be inline again (see jitterbug 709).

  2960    * @stable ICU 2.4

  2961    */

  2962   virtual UChar getCharAt(int32_t offset) const;

  2964   /**

  2965    * The change in Replaceable to use virtual getChar32At() allows

  2966    * UnicodeString::char32At() to be inline again (see jitterbug 709).

  2967    * @stable ICU 2.4

  2968    */

  2969   virtual UChar32 getChar32At(int32_t offset) const;

  2971 private:

  2973   inline int8_t

  2974   doCompare(int32_t start,

  2975            int32_t length,

  2976            const UnicodeString& srcText,

  2977            int32_t srcStart,

  2978            int32_t srcLength) const;

  2980   int8_t doCompare(int32_t start,

  2981            int32_t length,

  2982            const UChar *srcChars,

  2983            int32_t srcStart,

  2984            int32_t srcLength) const;

  2986   inline int8_t

  2987   doCompareCodePointOrder(int32_t start,

  2988                           int32_t length,

  2989                           const UnicodeString& srcText,

  2990                           int32_t srcStart,

  2991                           int32_t srcLength) const;

  2993   int8_t doCompareCodePointOrder(int32_t start,

  2994                                  int32_t length,

  2995                                  const UChar *srcChars,

  2996                                  int32_t srcStart,

  2997                                  int32_t srcLength) const;

  2999   inline int8_t

  3000   doCaseCompare(int32_t start,

  3001                 int32_t length,

  3002                 const UnicodeString &srcText,

  3003                 int32_t srcStart,

  3004                 int32_t srcLength,

  3005                 uint32_t options) const;

  3007   int8_t

  3008   doCaseCompare(int32_t start,

  3009                 int32_t length,

  3010                 const UChar *srcChars,

  3011                 int32_t srcStart,

  3012                 int32_t srcLength,

  3013                 uint32_t options) const;

  3015   int32_t doIndexOf(UChar c,

  3016             int32_t start,

  3017             int32_t length) const;

  3019   int32_t doIndexOf(UChar32 c,

  3020                         int32_t start,

  3021                         int32_t length) const;

  3023   int32_t doLastIndexOf(UChar c,

  3024                 int32_t start,

  3025                 int32_t length) const;

  3027   int32_t doLastIndexOf(UChar32 c,

  3028                             int32_t start,

  3029                             int32_t length) const;

  3031   void doExtract(int32_t start,

  3032          int32_t length,

  3033          UChar *dst,

  3034          int32_t dstStart) const;

  3036   inline void doExtract(int32_t start,

  3037          int32_t length,

  3038          UnicodeString& target) const;

  3040   inline UChar doCharAt(int32_t offset)  const;

  3042   UnicodeString& doReplace(int32_t start,

  3043                int32_t length,

  3044                const UnicodeString& srcText,

  3045                int32_t srcStart,

  3046                int32_t srcLength);

  3048   UnicodeString& doReplace(int32_t start,

  3049                int32_t length,

  3050                const UChar *srcChars,

  3051                int32_t srcStart,

  3052                int32_t srcLength);

  3054   UnicodeString& doReverse(int32_t start,

  3055                int32_t length);

  3057   // calculate hash code

  3058   int32_t doHashCode(void) const;

  3060   // get pointer to start of array

  3061   inline UChar* getArrayStart(void);

  3062   inline const UChar* getArrayStart(void) const;

  3064   // allocate the array; result may be fStackBuffer

  3065   // sets refCount to 1 if appropriate

  3066   // sets fArray, fCapacity, and fFlags

  3067   // returns boolean for success or failure

  3068   UBool allocate(int32_t capacity);

  3070   // release the array if owned

  3071   void releaseArray(void);

  3073   // turn a bogus string into an empty one

  3074   void unBogus();

  3076   // implements assigment operator, copy constructor, and fastCopyFrom()

  3077   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);

  3079   // Pin start and limit to acceptable values.

  3080   inline void pinIndex(int32_t& start) const;

  3081   inline void pinIndices(int32_t& start,

  3082                          int32_t& length) const;

  3084 #if !UCONFIG_NO_CONVERSION

  3086   /* Internal extract() using UConverter. */

  3087   int32_t doExtract(int32_t start, int32_t length,

  3088                     char *dest, int32_t destCapacity,

  3089                     UConverter *cnv,

  3090                     UErrorCode &errorCode) const;

  3092   /*

  3093    * Real constructor for converting from codepage data.

  3094    * It assumes that it is called with !fRefCounted.

  3095    *

  3096    * If <code>codepage==0</code>, then the default converter

  3097    * is used for the platform encoding.

  3098    * If <code>codepage</code> is an empty string (<code>""</code>),

  3099    * then a simple conversion is performed on the codepage-invariant

  3100    * subset ("invariant characters") of the platform encoding. See utypes.h.

  3101    */

  3102   void doCodepageCreate(const char *codepageData,

  3103                         int32_t dataLength,

  3104                         const char *codepage);

  3106   /*

  3107    * Worker function for creating a UnicodeString from

  3108    * a codepage string using a UConverter.

  3109    */

  3110   void

  3111   doCodepageCreate(const char *codepageData,

  3112                    int32_t dataLength,

  3113                    UConverter *converter,

  3114                    UErrorCode &status);

  3116 #endif

  3118   /*

  3119    * This function is called when write access to the array

  3120    * is necessary.

  3121    *

  3122    * We need to make a copy of the array if

  3123    * the buffer is read-only, or

  3124    * the buffer is refCounted (shared), and refCount>1, or

  3125    * the buffer is too small.

  3126    *

  3127    * Return FALSE if memory could not be allocated.

  3128    */

  3129   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,

  3130                             int32_t growCapacity = -1,

  3131                             UBool doCopyArray = TRUE,

  3132                             int32_t **pBufferToDelete = 0,

  3133                             UBool forceClone = FALSE);

  3135   // common function for case mappings

  3136   UnicodeString &

  3137   caseMap(BreakIterator *titleIter,

  3138           const char *locale,

  3139           uint32_t options,

  3140           int32_t toWhichCase);

  3142   // ref counting

  3143   void addRef(void);

  3144   int32_t removeRef(void);

  3145   int32_t refCount(void) const;

  3147   // constants

  3148   enum {

  3149     US_STACKBUF_SIZE=7, // Size of stack buffer for small strings

  3150     kInvalidUChar=0xffff, // invalid UChar index

  3151     kGrowSize=128, // grow size for this buffer

  3152     kInvalidHashCode=0, // invalid hash code

  3153     kEmptyHashCode=1, // hash code for empty string

  3155     // bit flag values for fFlags

  3156     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL

  3157     kUsingStackBuffer=2,// fArray==fStackBuffer

  3158     kRefCounted=4,      // there is a refCount field before the characters in fArray

  3159     kBufferIsReadonly=8,// do not write to this buffer

  3160     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),

  3161                         // and releaseBuffer(newLength) must be called

  3163     // combined values for convenience

  3164     kShortString=kUsingStackBuffer,

  3165     kLongString=kRefCounted,

  3166     kReadonlyAlias=kBufferIsReadonly,

  3167     kWritableAlias=0

  3168   };

  3170   friend class StringCharacterIterator;

  3171   friend class StringThreadTest;

  3173   /*

  3174    * The following are all the class fields that are stored

  3175    * in each UnicodeString object.

  3176    * Note that UnicodeString has virtual functions,

  3177    * therefore there is an implicit vtable pointer

  3178    * as the first real field.

  3179    * The fields should be aligned such that no padding is

  3180    * necessary, mostly by having larger types first.

  3181    * On 32-bit machines, the size should be 32 bytes,

  3182    * on 64-bit machines (8-byte pointers), it should be 40 bytes.

  3183    */

  3184   // (implicit) *vtable;

  3185   int32_t   fLength;        // number of characters in fArray

  3186   int32_t   fCapacity;      // sizeof fArray

  3187   UChar     *fArray;        // the Unicode data

  3188   uint16_t  fFlags;         // bit flags: see constants above

  3189   UChar     fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings

  3191 };

  3193 /**

  3194  * Create a new UnicodeString with the concatenation of two others.

  3195  *

  3196  * @param s1 The first string to be copied to the new one.

  3197  * @param s2 The second string to be copied to the new one, after s1.

  3198  * @return UnicodeString(s1).append(s2)

  3199  * @stable ICU 2.8

  3200  */

  3201 U_COMMON_API UnicodeString U_EXPORT2

  3202 operator+ (const UnicodeString &s1, const UnicodeString &s2);

  3204 U_NAMESPACE_END

  3206 // inline implementations -------------------------------------------------- ***

  3208 //========================================

  3209 // Array copying

  3210 //========================================

  3211 /**

  3212  * Copy an array of UnicodeString OBJECTS (not pointers).

  3213  * @internal

  3214  */

  3215 inline void

  3216 uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t count)

  3217 { while(count-- > 0) *dst++ = *src++; }

  3219 /**

  3220  * Copy an array of UnicodeString OBJECTS (not pointers).

  3221  * @internal

  3222  */

  3223 inline void

  3224 uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, int32_t srcStart,

  3225         U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t dstStart, int32_t count)

  3226 { uprv_arrayCopy(src+srcStart, dst+dstStart, count); }

  3228 U_NAMESPACE_BEGIN

  3230 //========================================

  3231 // Inline members

  3232 //========================================

  3234 //========================================

  3235 // Privates

  3236 //========================================

  3238 inline void

  3239 UnicodeString::pinIndex(int32_t& start) const

  3240 {

  3241   // pin index

  3242   if(start < 0) {

  3243     start = 0;

  3244   } else if(start > fLength) {

  3245     start = fLength;

  3246   }

  3247 }

  3249 inline void

  3250 UnicodeString::pinIndices(int32_t& start,

  3251                           int32_t& _length) const

  3252 {

  3253   // pin indices

  3254   if(start < 0) {

  3255     start = 0;

  3256   } else if(start > fLength) {

  3257     start = fLength;

  3258   }

  3259   if(_length < 0) {

  3260     _length = 0;

  3261   } else if(_length > (fLength - start)) {

  3262     _length = (fLength - start);

  3263   }

  3264 }

  3266 inline UChar*

  3267 UnicodeString::getArrayStart()

  3268 { return fArray; }

  3270 inline const UChar*

  3271 UnicodeString::getArrayStart() const

  3272 { return fArray; }

  3274 //========================================

  3275 // Read-only implementation methods

  3276 //========================================

  3277 inline int32_t

  3278 UnicodeString::length() const

  3279 { return fLength; }

  3281 inline int32_t

  3282 UnicodeString::getCapacity() const

  3283 { return fCapacity; }

  3285 inline int32_t

  3286 UnicodeString::hashCode() const

  3287 { return doHashCode(); }

  3289 inline UBool

  3290 UnicodeString::isBogus() const

  3291 { return (UBool)(fFlags & kIsBogus); }

  3293 inline const UChar *

  3294 UnicodeString::getBuffer() const {

  3295   if(!(fFlags&(kIsBogus|kOpenGetBuffer))) {

  3296     return fArray;

  3297   } else {

  3298     return 0;

  3299   }

  3300 }

  3302 //========================================

  3303 // Read-only alias methods

  3304 //========================================

  3305 inline int8_t

  3306 UnicodeString::doCompare(int32_t start,

  3307               int32_t length,

  3308               const UnicodeString& srcText,

  3309               int32_t srcStart,

  3310               int32_t srcLength) const

  3311 {

  3312   if(srcText.isBogus()) {

  3313     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise

  3314   } else {

  3315     srcText.pinIndices(srcStart, srcLength);

  3316     return doCompare(start, length, srcText.fArray, srcStart, srcLength);

  3317   }

  3318 }

  3320 inline UBool

  3321 UnicodeString::operator== (const UnicodeString& text) const

  3322 {

  3323   if(isBogus()) {

  3324     return text.isBogus();

  3325   } else {

  3326     return

  3327       !text.isBogus() &&

  3328       fLength == text.fLength &&

  3329       doCompare(0, fLength, text, 0, text.fLength) == 0;

  3330   }

  3331 }

  3333 inline UBool

  3334 UnicodeString::operator!= (const UnicodeString& text) const

  3335 { return (! operator==(text)); }

  3337 inline UBool

  3338 UnicodeString::operator> (const UnicodeString& text) const

  3339 { return doCompare(0, fLength, text, 0, text.fLength) == 1; }

  3341 inline UBool

  3342 UnicodeString::operator< (const UnicodeString& text) const

  3343 { return doCompare(0, fLength, text, 0, text.fLength) == -1; }

  3345 inline UBool

  3346 UnicodeString::operator>= (const UnicodeString& text) const

  3347 { return doCompare(0, fLength, text, 0, text.fLength) != -1; }

  3349 inline UBool

  3350 UnicodeString::operator<= (const UnicodeString& text) const

  3351 { return doCompare(0, fLength, text, 0, text.fLength) != 1; }

  3353 inline int8_t

  3354 UnicodeString::compare(const UnicodeString& text) const

  3355 { return doCompare(0, fLength, text, 0, text.fLength); }

  3357 inline int8_t

  3358 UnicodeString::compare(int32_t start,

  3359                int32_t _length,

  3360                const UnicodeString& srcText) const

  3361 { return doCompare(start, _length, srcText, 0, srcText.fLength); }

  3363 inline int8_t

  3364 UnicodeString::compare(const UChar *srcChars,

  3365                int32_t srcLength) const

  3366 { return doCompare(0, fLength, srcChars, 0, srcLength); }

  3368 inline int8_t

  3369 UnicodeString::compare(int32_t start,

  3370                int32_t _length,

  3371                const UnicodeString& srcText,

  3372                int32_t srcStart,

  3373                int32_t srcLength) const

  3374 { return doCompare(start, _length, srcText, srcStart, srcLength); }

  3376 inline int8_t

  3377 UnicodeString::compare(int32_t start,

  3378                int32_t _length,

  3379                const UChar *srcChars) const

  3380 { return doCompare(start, _length, srcChars, 0, _length); }

  3382 inline int8_t

  3383 UnicodeString::compare(int32_t start,

  3384                int32_t _length,

  3385                const UChar *srcChars,

  3386                int32_t srcStart,

  3387                int32_t srcLength) const

  3388 { return doCompare(start, _length, srcChars, srcStart, srcLength); }

  3390 inline int8_t

  3391 UnicodeString::compareBetween(int32_t start,

  3392                   int32_t limit,

  3393                   const UnicodeString& srcText,

  3394                   int32_t srcStart,

  3395                   int32_t srcLimit) const

  3396 { return doCompare(start, limit - start,

  3397            srcText, srcStart, srcLimit - srcStart); }

  3399 inline int8_t

  3400 UnicodeString::doCompareCodePointOrder(int32_t start,

  3401                                        int32_t length,

  3402                                        const UnicodeString& srcText,

  3403                                        int32_t srcStart,

  3404                                        int32_t srcLength) const

  3405 {

  3406   if(srcText.isBogus()) {

  3407     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise

  3408   } else {

  3409     srcText.pinIndices(srcStart, srcLength);

  3410     return doCompareCodePointOrder(start, length, srcText.fArray, srcStart, srcLength);

  3411   }

  3412 }

  3414 inline int8_t

  3415 UnicodeString::compareCodePointOrder(const UnicodeString& text) const

  3416 { return doCompareCodePointOrder(0, fLength, text, 0, text.fLength); }

  3418 inline int8_t

  3419 UnicodeString::compareCodePointOrder(int32_t start,

  3420                                      int32_t _length,

  3421                                      const UnicodeString& srcText) const

  3422 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.fLength); }

  3424 inline int8_t

  3425 UnicodeString::compareCodePointOrder(const UChar *srcChars,

  3426                                      int32_t srcLength) const

  3427 { return doCompareCodePointOrder(0, fLength, srcChars, 0, srcLength); }

  3429 inline int8_t

  3430 UnicodeString::compareCodePointOrder(int32_t start,

  3431                                      int32_t _length,

  3432                                      const UnicodeString& srcText,

  3433                                      int32_t srcStart,

  3434                                      int32_t srcLength) const

  3435 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }

  3437 inline int8_t

  3438 UnicodeString::compareCodePointOrder(int32_t start,

  3439                                      int32_t _length,

  3440                                      const UChar *srcChars) const

  3441 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }

  3443 inline int8_t

  3444 UnicodeString::compareCodePointOrder(int32_t start,

  3445                                      int32_t _length,

  3446                                      const UChar *srcChars,

  3447                                      int32_t srcStart,

  3448                                      int32_t srcLength) const

  3449 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }

  3451 inline int8_t

  3452 UnicodeString::compareCodePointOrderBetween(int32_t start,

  3453                                             int32_t limit,

  3454                                             const UnicodeString& srcText,

  3455                                             int32_t srcStart,

  3456                                             int32_t srcLimit) const

  3457 { return doCompareCodePointOrder(start, limit - start,

  3458            srcText, srcStart, srcLimit - srcStart); }

  3460 inline int8_t

  3461 UnicodeString::doCaseCompare(int32_t start,

  3462                              int32_t length,

  3463                              const UnicodeString &srcText,

  3464                              int32_t srcStart,

  3465                              int32_t srcLength,

  3466                              uint32_t options) const

  3467 {

  3468   if(srcText.isBogus()) {

  3469     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise

  3470   } else {

  3471     srcText.pinIndices(srcStart, srcLength);

  3472     return doCaseCompare(start, length, srcText.fArray, srcStart, srcLength, options);

  3473   }

  3474 }

  3476 inline int8_t

  3477 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {

  3478   return doCaseCompare(0, fLength, text, 0, text.fLength, options);

  3479 }

  3481 inline int8_t

  3482 UnicodeString::caseCompare(int32_t start,

  3483                            int32_t _length,

  3484                            const UnicodeString &srcText,

  3485                            uint32_t options) const {

  3486   return doCaseCompare(start, _length, srcText, 0, srcText.fLength, options);

  3487 }

  3489 inline int8_t

  3490 UnicodeString::caseCompare(const UChar *srcChars,

  3491                            int32_t srcLength,

  3492                            uint32_t options) const {

  3493   return doCaseCompare(0, fLength, srcChars, 0, srcLength, options);

  3494 }

  3496 inline int8_t

  3497 UnicodeString::caseCompare(int32_t start,

  3498                            int32_t _length,

  3499                            const UnicodeString &srcText,

  3500                            int32_t srcStart,

  3501                            int32_t srcLength,

  3502                            uint32_t options) const {

  3503   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);

  3504 }

  3506 inline int8_t

  3507 UnicodeString::caseCompare(int32_t start,

  3508                            int32_t _length,

  3509                            const UChar *srcChars,

  3510                            uint32_t options) const {

  3511   return doCaseCompare(start, _length, srcChars, 0, _length, options);

  3512 }

  3514 inline int8_t

  3515 UnicodeString::caseCompare(int32_t start,

  3516                            int32_t _length,

  3517                            const UChar *srcChars,

  3518                            int32_t srcStart,

  3519                            int32_t srcLength,

  3520                            uint32_t options) const {

  3521   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);

  3522 }

  3524 inline int8_t

  3525 UnicodeString::caseCompareBetween(int32_t start,

  3526                                   int32_t limit,

  3527                                   const UnicodeString &srcText,

  3528                                   int32_t srcStart,

  3529                                   int32_t srcLimit,

  3530                                   uint32_t options) const {

  3531   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);

  3532 }

  3534 inline int32_t

  3535 UnicodeString::indexOf(const UnicodeString& srcText,

  3536                int32_t srcStart,

  3537                int32_t srcLength,

  3538                int32_t start,

  3539                int32_t _length) const

  3540 {

  3541   if(!srcText.isBogus()) {

  3542     srcText.pinIndices(srcStart, srcLength);

  3543     if(srcLength > 0) {

  3544       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);

  3545     }

  3546   }

  3547   return -1;

  3548 }

  3550 inline int32_t

  3551 UnicodeString::indexOf(const UnicodeString& text) const

  3552 { return indexOf(text, 0, text.fLength, 0, fLength); }

  3554 inline int32_t

  3555 UnicodeString::indexOf(const UnicodeString& text,

  3556                int32_t start) const {

  3557   pinIndex(start);

  3558   return indexOf(text, 0, text.fLength, start, fLength - start);

  3559 }

  3561 inline int32_t

  3562 UnicodeString::indexOf(const UnicodeString& text,

  3563                int32_t start,

  3564                int32_t _length) const

  3565 { return indexOf(text, 0, text.fLength, start, _length); }

  3567 inline int32_t

  3568 UnicodeString::indexOf(const UChar *srcChars,

  3569                int32_t srcLength,

  3570                int32_t start) const {

  3571   pinIndex(start);

  3572   return indexOf(srcChars, 0, srcLength, start, fLength - start);

  3573 }

  3575 inline int32_t

  3576 UnicodeString::indexOf(const UChar *srcChars,

  3577                int32_t srcLength,

  3578                int32_t start,

  3579                int32_t _length) const

  3580 { return indexOf(srcChars, 0, srcLength, start, _length); }

  3582 inline int32_t

  3583 UnicodeString::indexOf(UChar c,

  3584                int32_t start,

  3585                int32_t _length) const

  3586 { return doIndexOf(c, start, _length); }

  3588 inline int32_t

  3589 UnicodeString::indexOf(UChar32 c,

  3590                int32_t start,

  3591                int32_t _length) const

  3592 { return doIndexOf(c, start, _length); }

  3594 inline int32_t

  3595 UnicodeString::indexOf(UChar c) const

  3596 { return doIndexOf(c, 0, fLength); }

  3598 inline int32_t

  3599 UnicodeString::indexOf(UChar32 c) const

  3600 { return indexOf(c, 0, fLength); }

  3602 inline int32_t

  3603 UnicodeString::indexOf(UChar c,

  3604                int32_t start) const {

  3605   pinIndex(start);

  3606   return doIndexOf(c, start, fLength - start);

  3607 }

  3609 inline int32_t

  3610 UnicodeString::indexOf(UChar32 c,

  3611                int32_t start) const {

  3612   pinIndex(start);

  3613   return indexOf(c, start, fLength - start);

  3614 }

  3616 inline int32_t

  3617 UnicodeString::lastIndexOf(const UChar *srcChars,

  3618                int32_t srcLength,

  3619                int32_t start,

  3620                int32_t _length) const

  3621 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }

  3623 inline int32_t

  3624 UnicodeString::lastIndexOf(const UChar *srcChars,

  3625                int32_t srcLength,

  3626                int32_t start) const {

  3627   pinIndex(start);

  3628   return lastIndexOf(srcChars, 0, srcLength, start, fLength - start);

  3629 }

  3631 inline int32_t

  3632 UnicodeString::lastIndexOf(const UnicodeString& srcText,

  3633                int32_t srcStart,

  3634                int32_t srcLength,

  3635                int32_t start,

  3636                int32_t _length) const

  3637 {

  3638   if(!srcText.isBogus()) {

  3639     srcText.pinIndices(srcStart, srcLength);

  3640     if(srcLength > 0) {

  3641       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);

  3642     }

  3643   }

  3644   return -1;

  3645 }

  3647 inline int32_t

  3648 UnicodeString::lastIndexOf(const UnicodeString& text,

  3649                int32_t start,

  3650                int32_t _length) const

  3651 { return lastIndexOf(text, 0, text.fLength, start, _length); }

  3653 inline int32_t

  3654 UnicodeString::lastIndexOf(const UnicodeString& text,

  3655                int32_t start) const {

  3656   pinIndex(start);

  3657   return lastIndexOf(text, 0, text.fLength, start, fLength - start);

  3658 }

  3660 inline int32_t

  3661 UnicodeString::lastIndexOf(const UnicodeString& text) const

  3662 { return lastIndexOf(text, 0, text.fLength, 0, fLength); }

  3664 inline int32_t

  3665 UnicodeString::lastIndexOf(UChar c,

  3666                int32_t start,

  3667                int32_t _length) const

  3668 { return doLastIndexOf(c, start, _length); }

  3670 inline int32_t

  3671 UnicodeString::lastIndexOf(UChar32 c,

  3672                int32_t start,

  3673                int32_t _length) const {

  3674   return doLastIndexOf(c, start, _length);

  3675 }

  3677 inline int32_t

  3678 UnicodeString::lastIndexOf(UChar c) const

  3679 { return doLastIndexOf(c, 0, fLength); }

  3681 inline int32_t

  3682 UnicodeString::lastIndexOf(UChar32 c) const {

  3683   return lastIndexOf(c, 0, fLength);

  3684 }

  3686 inline int32_t

  3687 UnicodeString::lastIndexOf(UChar c,

  3688                int32_t start) const {

  3689   pinIndex(start);

  3690   return doLastIndexOf(c, start, fLength - start);

  3691 }

  3693 inline int32_t

  3694 UnicodeString::lastIndexOf(UChar32 c,

  3695                int32_t start) const {

  3696   pinIndex(start);

  3697   return lastIndexOf(c, start, fLength - start);

  3698 }

  3700 inline UBool

  3701 UnicodeString::startsWith(const UnicodeString& text) const

  3702 { return compare(0, text.fLength, text, 0, text.fLength) == 0; }

  3704 inline UBool

  3705 UnicodeString::startsWith(const UnicodeString& srcText,

  3706               int32_t srcStart,

  3707               int32_t srcLength) const

  3708 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }

  3710 inline UBool

  3711 UnicodeString::startsWith(const UChar *srcChars,

  3712               int32_t srcLength) const

  3713 { return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }

  3715 inline UBool

  3716 UnicodeString::startsWith(const UChar *srcChars,

  3717               int32_t srcStart,

  3718               int32_t srcLength) const

  3719 { return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}

  3721 inline UBool

  3722 UnicodeString::endsWith(const UnicodeString& text) const

  3723 { return doCompare(fLength - text.fLength, text.fLength,

  3724            text, 0, text.fLength) == 0; }

  3726 inline UBool

  3727 UnicodeString::endsWith(const UnicodeString& srcText,

  3728             int32_t srcStart,

  3729             int32_t srcLength) const {

  3730   srcText.pinIndices(srcStart, srcLength);

  3731   return doCompare(fLength - srcLength, srcLength,

  3732                    srcText, srcStart, srcLength) == 0;

  3733 }

  3735 inline UBool

  3736 UnicodeString::endsWith(const UChar *srcChars,

  3737             int32_t srcLength) const {

  3738   if(srcLength < 0) {

  3739     srcLength = u_strlen(srcChars);

  3740   }

  3741   return doCompare(fLength - srcLength, srcLength,

  3742                    srcChars, 0, srcLength) == 0;

  3743 }

  3745 inline UBool

  3746 UnicodeString::endsWith(const UChar *srcChars,

  3747             int32_t srcStart,

  3748             int32_t srcLength) const {

  3749   if(srcLength < 0) {

  3750     srcLength = u_strlen(srcChars + srcStart);

  3751   }

  3752   return doCompare(fLength - srcLength, srcLength,

  3753                    srcChars, srcStart, srcLength) == 0;

  3754 }

  3756 //========================================

  3757 // replace

  3758 //========================================

  3759 inline UnicodeString&

  3760 UnicodeString::replace(int32_t start,

  3761                int32_t _length,

  3762                const UnicodeString& srcText)

  3763 { return doReplace(start, _length, srcText, 0, srcText.fLength); }

  3765 inline UnicodeString&

  3766 UnicodeString::replace(int32_t start,

  3767                int32_t _length,

  3768                const UnicodeString& srcText,

  3769                int32_t srcStart,

  3770                int32_t srcLength)

  3771 { return doReplace(start, _length, srcText, srcStart, srcLength); }

  3773 inline UnicodeString&

  3774 UnicodeString::replace(int32_t start,

  3775                int32_t _length,

  3776                const UChar *srcChars,

  3777                int32_t srcLength)

  3778 { return doReplace(start, _length, srcChars, 0, srcLength); }

  3780 inline UnicodeString&

  3781 UnicodeString::replace(int32_t start,

  3782                int32_t _length,

  3783                const UChar *srcChars,

  3784                int32_t srcStart,

  3785                int32_t srcLength)

  3786 { return doReplace(start, _length, srcChars, srcStart, srcLength); }

  3788 inline UnicodeString&

  3789 UnicodeString::replace(int32_t start,

  3790                int32_t _length,

  3791                UChar srcChar)

  3792 { return doReplace(start, _length, &srcChar, 0, 1); }

  3794 inline UnicodeString&

  3795 UnicodeString::replace(int32_t start,

  3796                int32_t _length,

  3797                UChar32 srcChar) {

  3798   UChar buffer[U16_MAX_LENGTH];

  3799   int32_t count = 0;

  3800   UBool isError = FALSE;

  3801   U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);

  3802   return doReplace(start, _length, buffer, 0, count);

  3803 }

  3805 inline UnicodeString&

  3806 UnicodeString::replaceBetween(int32_t start,

  3807                   int32_t limit,

  3808                   const UnicodeString& srcText)

  3809 { return doReplace(start, limit - start, srcText, 0, srcText.fLength); }

  3811 inline UnicodeString&

  3812 UnicodeString::replaceBetween(int32_t start,

  3813                   int32_t limit,

  3814                   const UnicodeString& srcText,

  3815                   int32_t srcStart,

  3816                   int32_t srcLimit)

  3817 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }

  3819 inline UnicodeString&

  3820 UnicodeString::findAndReplace(const UnicodeString& oldText,

  3821                   const UnicodeString& newText)

  3822 { return findAndReplace(0, fLength, oldText, 0, oldText.fLength,

  3823             newText, 0, newText.fLength); }

  3825 inline UnicodeString&

  3826 UnicodeString::findAndReplace(int32_t start,

  3827                   int32_t _length,

  3828                   const UnicodeString& oldText,

  3829                   const UnicodeString& newText)

  3830 { return findAndReplace(start, _length, oldText, 0, oldText.fLength,

  3831             newText, 0, newText.fLength); }

  3833 // ============================

  3834 // extract

  3835 // ============================

  3836 inline void

  3837 UnicodeString::doExtract(int32_t start,

  3838              int32_t _length,

  3839              UnicodeString& target) const

  3840 { target.replace(0, target.fLength, *this, start, _length); }

  3842 inline void

  3843 UnicodeString::extract(int32_t start,

  3844                int32_t _length,

  3845                UChar *target,

  3846                int32_t targetStart) const

  3847 { doExtract(start, _length, target, targetStart); }

  3849 inline void

  3850 UnicodeString::extract(int32_t start,

  3851                int32_t _length,

  3852                UnicodeString& target) const

  3853 { doExtract(start, _length, target); }

  3855 #if !UCONFIG_NO_CONVERSION

  3857 inline int32_t

  3858 UnicodeString::extract(int32_t start,

  3859                int32_t _length,

  3860                char *dst,

  3861                const char *codepage) const

  3863 {

  3864   // This dstSize value will be checked explicitly

  3865   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);

  3866 }

  3868 #endif

  3870 inline void

  3871 UnicodeString::extractBetween(int32_t start,

  3872                   int32_t limit,

  3873                   UChar *dst,

  3874                   int32_t dstStart) const {

  3875   pinIndex(start);

  3876   pinIndex(limit);

  3877   doExtract(start, limit - start, dst, dstStart);

  3878 }

  3880 inline UChar

  3881 UnicodeString::doCharAt(int32_t offset) const

  3882 {

  3883   if((uint32_t)offset < (uint32_t)fLength) {

  3884     return fArray[offset];

  3885   } else {

  3886     return kInvalidUChar;

  3887   }

  3888 }

  3890 inline UChar

  3891 UnicodeString::charAt(int32_t offset) const

  3892 { return doCharAt(offset); }

  3894 inline UChar

  3895 UnicodeString::operator[] (int32_t offset) const

  3896 { return doCharAt(offset); }

  3898 inline UChar32

  3899 UnicodeString::char32At(int32_t offset) const

  3900 {

  3901   if((uint32_t)offset < (uint32_t)fLength) {

  3902     UChar32 c;

  3903     U16_GET(fArray, 0, offset, fLength, c);

  3904     return c;

  3905   } else {

  3906     return kInvalidUChar;

  3907   }

  3908 }

  3910 inline int32_t

  3911 UnicodeString::getChar32Start(int32_t offset) const {

  3912   if((uint32_t)offset < (uint32_t)fLength) {

  3913     U16_SET_CP_START(fArray, 0, offset);

  3914     return offset;

  3915   } else {

  3916     return 0;

  3917   }

  3918 }

  3920 inline int32_t

  3921 UnicodeString::getChar32Limit(int32_t offset) const {

  3922   if((uint32_t)offset < (uint32_t)fLength) {

  3923     U16_SET_CP_LIMIT(fArray, 0, offset, fLength);

  3924     return offset;

  3925   } else {

  3926     return fLength;

  3927   }

  3928 }

  3930 inline UBool

  3931 UnicodeString::isEmpty() const {

  3932   return fLength == 0;

  3933 }

  3935 //========================================

  3936 // Write implementation methods

  3937 //========================================

  3938 inline const UChar *

  3939 UnicodeString::getTerminatedBuffer() {

  3940   if(fFlags&(kIsBogus|kOpenGetBuffer)) {

  3941     return 0;

  3942   } else if(fLength<fCapacity && fArray[fLength]==0) {

  3943     return fArray;

  3944   } else if(cloneArrayIfNeeded(fLength+1)) {

  3945     fArray[fLength]=0;

  3946     return fArray;

  3947   } else {

  3948     return 0;

  3949   }

  3950 }

  3952 inline UnicodeString&

  3953 UnicodeString::operator= (UChar ch)

  3954 { return doReplace(0, fLength, &ch, 0, 1); }

  3956 inline UnicodeString&

  3957 UnicodeString::operator= (UChar32 ch)

  3958 { return replace(0, fLength, ch); }

  3960 inline UnicodeString&

  3961 UnicodeString::setTo(const UnicodeString& srcText,

  3962              int32_t srcStart,

  3963              int32_t srcLength)

  3964 {

  3965   unBogus();

  3966   return doReplace(0, fLength, srcText, srcStart, srcLength);

  3967 }

  3969 inline UnicodeString&

  3970 UnicodeString::setTo(const UnicodeString& srcText,

  3971              int32_t srcStart)

  3972 {

  3973   unBogus();

  3974   srcText.pinIndex(srcStart);

  3975   return doReplace(0, fLength, srcText, srcStart, srcText.fLength - srcStart);

  3976 }

  3978 inline UnicodeString&

  3979 UnicodeString::setTo(const UnicodeString& srcText)

  3980 {

  3981   unBogus();

  3982   return doReplace(0, fLength, srcText, 0, srcText.fLength);

  3983 }

  3985 inline UnicodeString&

  3986 UnicodeString::setTo(const UChar *srcChars,

  3987              int32_t srcLength)

  3988 {

  3989   unBogus();

  3990   return doReplace(0, fLength, srcChars, 0, srcLength);

  3991 }

  3993 inline UnicodeString&

  3994 UnicodeString::setTo(UChar srcChar)

  3995 {

  3996   unBogus();

  3997   return doReplace(0, fLength, &srcChar, 0, 1);

  3998 }

  4000 inline UnicodeString&

  4001 UnicodeString::setTo(UChar32 srcChar)

  4002 {

  4003   unBogus();

  4004   return replace(0, fLength, srcChar);

  4005 }

  4007 inline UnicodeString&

  4008 UnicodeString::operator+= (UChar ch)

  4009 { return doReplace(fLength, 0, &ch, 0, 1); }

  4011 inline UnicodeString&

  4012 UnicodeString::operator+= (UChar32 ch) {

  4013   UChar buffer[U16_MAX_LENGTH];

  4014   int32_t _length = 0;

  4015   UBool isError = FALSE;

  4016   U16_APPEND(buffer, _length, U16_MAX_LENGTH, ch, isError);

  4017   return doReplace(fLength, 0, buffer, 0, _length);

  4018 }

  4020 inline UnicodeString&

  4021 UnicodeString::operator+= (const UnicodeString& srcText)

  4022 { return doReplace(fLength, 0, srcText, 0, srcText.fLength); }

  4024 inline UnicodeString&

  4025 UnicodeString::append(const UnicodeString& srcText,

  4026               int32_t srcStart,

  4027               int32_t srcLength)

  4028 { return doReplace(fLength, 0, srcText, srcStart, srcLength); }

  4030 inline UnicodeString&

  4031 UnicodeString::append(const UnicodeString& srcText)

  4032 { return doReplace(fLength, 0, srcText, 0, srcText.fLength); }

  4034 inline UnicodeString&

  4035 UnicodeString::append(const UChar *srcChars,

  4036               int32_t srcStart,

  4037               int32_t srcLength)

  4038 { return doReplace(fLength, 0, srcChars, srcStart, srcLength); }

  4040 inline UnicodeString&

  4041 UnicodeString::append(const UChar *srcChars,

  4042               int32_t srcLength)

  4043 { return doReplace(fLength, 0, srcChars, 0, srcLength); }

  4045 inline UnicodeString&

  4046 UnicodeString::append(UChar srcChar)

  4047 { return doReplace(fLength, 0, &srcChar, 0, 1); }

  4049 inline UnicodeString&

  4050 UnicodeString::append(UChar32 srcChar) {

  4051   UChar buffer[U16_MAX_LENGTH];

  4052   int32_t _length = 0;

  4053   UBool isError = FALSE;

  4054   U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);

  4055   return doReplace(fLength, 0, buffer, 0, _length);

  4056 }

  4058 inline UnicodeString&

  4059 UnicodeString::insert(int32_t start,

  4060               const UnicodeString& srcText,

  4061               int32_t srcStart,

  4062               int32_t srcLength)

  4063 { return doReplace(start, 0, srcText, srcStart, srcLength); }

  4065 inline UnicodeString&

  4066 UnicodeString::insert(int32_t start,

  4067               const UnicodeString& srcText)

  4068 { return doReplace(start, 0, srcText, 0, srcText.fLength); }

  4070 inline UnicodeString&

  4071 UnicodeString::insert(int32_t start,

  4072               const UChar *srcChars,

  4073               int32_t srcStart,

  4074               int32_t srcLength)

  4075 { return doReplace(start, 0, srcChars, srcStart, srcLength); }

  4077 inline UnicodeString&

  4078 UnicodeString::insert(int32_t start,

  4079               const UChar *srcChars,

  4080               int32_t srcLength)

  4081 { return doReplace(start, 0, srcChars, 0, srcLength); }

  4083 inline UnicodeString&

  4084 UnicodeString::insert(int32_t start,

  4085               UChar srcChar)

  4086 { return doReplace(start, 0, &srcChar, 0, 1); }

  4088 inline UnicodeString&

  4089 UnicodeString::insert(int32_t start,

  4090               UChar32 srcChar)

  4091 { return replace(start, 0, srcChar); }

  4094 inline UnicodeString&

  4095 UnicodeString::remove()

  4096 {

  4097   // remove() of a bogus string makes the string empty and non-bogus

  4098   if(isBogus()) {

  4099     unBogus();

  4100   } else {

  4101     fLength = 0;

  4102   }

  4103   return *this;

  4104 }

  4106 inline UnicodeString&

  4107 UnicodeString::remove(int32_t start,

  4108              int32_t _length)

  4109 {

  4110   if(start <= 0 && _length == INT32_MAX) {

  4111     // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus

  4112     return remove();

  4113   } else {

  4114     return doReplace(start, _length, NULL, 0, 0);

  4115   }

  4116 }

  4118 inline UnicodeString&

  4119 UnicodeString::removeBetween(int32_t start,

  4120                 int32_t limit)

  4121 { return doReplace(start, limit - start, NULL, 0, 0); }

  4123 inline UBool

  4124 UnicodeString::truncate(int32_t targetLength)

  4125 {

  4126   if(isBogus() && targetLength == 0) {

  4127     // truncate(0) of a bogus string makes the string empty and non-bogus

  4128     unBogus();

  4129     return FALSE;

  4130   } else if((uint32_t)targetLength < (uint32_t)fLength) {

  4131     fLength = targetLength;

  4132     return TRUE;

  4133   } else {

  4134     return FALSE;

  4135   }

  4136 }

  4138 inline UnicodeString&

  4139 UnicodeString::reverse()

  4140 { return doReverse(0, fLength); }

  4142 inline UnicodeString&

  4143 UnicodeString::reverse(int32_t start,

  4144                int32_t _length)

  4145 { return doReverse(start, _length); }

  4147 U_NAMESPACE_END

  4149 #endif

author	sl
	Tue, 10 Jun 2014 14:32:02 +0200
changeset 1	260cb5ec6c19
permissions	-rw-r--r--