1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/unistr.h Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,4149 @@
1.4 +/*
1.5 +**********************************************************************
1.6 +* Copyright (C) 1998-2005, International Business Machines
1.7 +* Corporation and others. All Rights Reserved.
1.8 +**********************************************************************
1.9 +*
1.10 +* File unistr.h
1.11 +*
1.12 +* Modification History:
1.13 +*
1.14 +* Date Name Description
1.15 +* 09/25/98 stephen Creation.
1.16 +* 11/11/98 stephen Changed per 11/9 code review.
1.17 +* 04/20/99 stephen Overhauled per 4/16 code review.
1.18 +* 11/18/99 aliu Made to inherit from Replaceable. Added method
1.19 +* handleReplaceBetween(); other methods unchanged.
1.20 +* 06/25/01 grhoten Remove dependency on iostream.
1.21 +******************************************************************************
1.22 +*/
1.23 +
1.24 +#ifndef UNISTR_H
1.25 +#define UNISTR_H
1.26 +
1.27 +/**
1.28 + * \file
1.29 + * \brief C++ API: Unicode String
1.30 + */
1.31 +
1.32 +#include "unicode/rep.h"
1.33 +
1.34 +struct UConverter; // unicode/ucnv.h
1.35 +class StringThreadTest;
1.36 +
1.37 +#ifndef U_COMPARE_CODE_POINT_ORDER
1.38 +/* see also ustring.h and unorm.h */
1.39 +/**
1.40 + * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
1.41 + * Compare strings in code point order instead of code unit order.
1.42 + * @stable ICU 2.2
1.43 + */
1.44 +#define U_COMPARE_CODE_POINT_ORDER 0x8000
1.45 +#endif
1.46 +
1.47 +#ifndef USTRING_H
1.48 +/* see ustring.h */
1.49 +U_STABLE int32_t U_EXPORT2
1.50 +u_strlen(const UChar *s);
1.51 +#endif
1.52 +
1.53 +U_NAMESPACE_BEGIN
1.54 +
1.55 +class Locale; // unicode/locid.h
1.56 +class StringCharacterIterator;
1.57 +class BreakIterator; // unicode/brkiter.h
1.58 +
1.59 +/* The <iostream> include has been moved to unicode/ustream.h */
1.60 +
1.61 +/**
1.62 + * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
1.63 + * which constructs a Unicode string from an invariant-character char * string.
1.64 + * About invariant characters see utypes.h.
1.65 + * This constructor has no runtime dependency on conversion code and is
1.66 + * therefore recommended over ones taking a charset name string
1.67 + * (where the empty string "" indicates invariant-character conversion).
1.68 + *
1.69 + * @draft ICU 3.2
1.70 + */
1.71 +#define US_INV UnicodeString::kInvariant
1.72 +
1.73 +/**
1.74 + * Unicode String literals in C++.
1.75 + * Dependent on the platform properties, different UnicodeString
1.76 + * constructors should be used to create a UnicodeString object from
1.77 + * a string literal.
1.78 + * The macros are defined for maximum performance.
1.79 + * They work only for strings that contain "invariant characters", i.e.,
1.80 + * only latin letters, digits, and some punctuation.
1.81 + * See utypes.h for details.
1.82 + *
1.83 + * The string parameter must be a C string literal.
1.84 + * The length of the string, not including the terminating
1.85 + * <code>NUL</code>, must be specified as a constant.
1.86 + * The U_STRING_DECL macro should be invoked exactly once for one
1.87 + * such string variable before it is used.
1.88 + * @stable ICU 2.0
1.89 + */
1.90 +#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
1.91 +# define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)L ## cs, _length)
1.92 +#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
1.93 +# define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)cs, _length)
1.94 +#else
1.95 +# define UNICODE_STRING(cs, _length) UnicodeString(cs, _length, US_INV)
1.96 +#endif
1.97 +
1.98 +/**
1.99 + * Unicode String literals in C++.
1.100 + * Dependent on the platform properties, different UnicodeString
1.101 + * constructors should be used to create a UnicodeString object from
1.102 + * a string literal.
1.103 + * The macros are defined for improved performance.
1.104 + * They work only for strings that contain "invariant characters", i.e.,
1.105 + * only latin letters, digits, and some punctuation.
1.106 + * See utypes.h for details.
1.107 + *
1.108 + * The string parameter must be a C string literal.
1.109 + * @stable ICU 2.0
1.110 + */
1.111 +#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
1.112 +# define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)L ## cs, -1)
1.113 +#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
1.114 +# define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)cs, -1)
1.115 +#else
1.116 +# define UNICODE_STRING_SIMPLE(cs) UnicodeString(cs, -1, US_INV)
1.117 +#endif
1.118 +
1.119 +/**
1.120 + * UnicodeString is a string class that stores Unicode characters directly and provides
1.121 + * similar functionality as the Java String and StringBuffer classes.
1.122 + * It is a concrete implementation of the abstract class Replaceable (for transliteration).
1.123 + *
1.124 + * The UnicodeString class is not suitable for subclassing.
1.125 + *
1.126 + * <p>For an overview of Unicode strings in C and C++ see the
1.127 + * <a href="http://icu.sourceforge.net/userguide/strings.html">User Guide Strings chapter</a>.</p>
1.128 + *
1.129 + * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
1.130 + * A Unicode character may be stored with either one code unit
1.131 + * (the most common case) or with a matched pair of special code units
1.132 + * ("surrogates"). The data type for code units is UChar.
1.133 + * For single-character handling, a Unicode character code <em>point</em> is a value
1.134 + * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
1.135 + *
1.136 + * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
1.137 + * This is the same as with multi-byte char* strings in traditional string handling.
1.138 + * Operations on partial strings typically do not test for code point boundaries.
1.139 + * If necessary, the user needs to take care of such boundaries by testing for the code unit
1.140 + * values or by using functions like
1.141 + * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
1.142 + * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
1.143 + *
1.144 + * UnicodeString methods are more lenient with regard to input parameter values
1.145 + * than other ICU APIs. In particular:
1.146 + * - If indexes are out of bounds for a UnicodeString object
1.147 + * (<0 or >length()) then they are "pinned" to the nearest boundary.
1.148 + * - If primitive string pointer values (e.g., const UChar * or char *)
1.149 + * for input strings are NULL, then those input string parameters are treated
1.150 + * as if they pointed to an empty string.
1.151 + * However, this is <em>not</em> the case for char * parameters for charset names
1.152 + * or other IDs.
1.153 + * - Most UnicodeString methods do not take a UErrorCode parameter because
1.154 + * there are usually very few opportunities for failure other than a shortage
1.155 + * of memory, error codes in low-level C++ string methods would be inconvenient,
1.156 + * and the error code as the last parameter (ICU convention) would prevent
1.157 + * the use of default parameter values.
1.158 + * Instead, such methods set the UnicodeString into a "bogus" state
1.159 + * (see isBogus()) if an error occurs.
1.160 + *
1.161 + * In string comparisons, two UnicodeString objects that are both "bogus"
1.162 + * compare equal (to be transitive and prevent endless loops in sorting),
1.163 + * and a "bogus" string compares less than any non-"bogus" one.
1.164 + *
1.165 + * Const UnicodeString methods are thread-safe. Multiple threads can use
1.166 + * const methods on the same UnicodeString object simultaneously,
1.167 + * but non-const methods must not be called concurrently (in multiple threads)
1.168 + * with any other (const or non-const) methods.
1.169 + *
1.170 + * Similarly, const UnicodeString & parameters are thread-safe.
1.171 + * One object may be passed in as such a parameter concurrently in multiple threads.
1.172 + * This includes the const UnicodeString & parameters for
1.173 + * copy construction, assignment, and cloning.
1.174 + *
1.175 + * <p>UnicodeString uses several storage methods.
1.176 + * String contents can be stored inside the UnicodeString object itself,
1.177 + * in an allocated and shared buffer, or in an outside buffer that is "aliased".
1.178 + * Most of this is done transparently, but careful aliasing in particular provides
1.179 + * significant performance improvements.
1.180 + * Also, the internal buffer is accessible via special functions.
1.181 + * For details see the
1.182 + * <a href="http://icu.sourceforge.net/userguide/strings.html">User Guide Strings chapter</a>.</p>
1.183 + *
1.184 + * @see utf.h
1.185 + * @see CharacterIterator
1.186 + * @stable ICU 2.0
1.187 + */
1.188 +class U_COMMON_API UnicodeString : public Replaceable
1.189 +{
1.190 +public:
1.191 +
1.192 + /**
1.193 + * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
1.194 + * which constructs a Unicode string from an invariant-character char * string.
1.195 + * Use the macro US_INV instead of the full qualification for this value.
1.196 + *
1.197 + * @see US_INV
1.198 + * @draft ICU 3.2
1.199 + */
1.200 + enum EInvariant {
1.201 + /**
1.202 + * @see EInvariant
1.203 + * @draft ICU 3.2
1.204 + */
1.205 + kInvariant
1.206 + };
1.207 +
1.208 + //========================================
1.209 + // Read-only operations
1.210 + //========================================
1.211 +
1.212 + /* Comparison - bitwise only - for international comparison use collation */
1.213 +
1.214 + /**
1.215 + * Equality operator. Performs only bitwise comparison.
1.216 + * @param text The UnicodeString to compare to this one.
1.217 + * @return TRUE if <TT>text</TT> contains the same characters as this one,
1.218 + * FALSE otherwise.
1.219 + * @stable ICU 2.0
1.220 + */
1.221 + inline UBool operator== (const UnicodeString& text) const;
1.222 +
1.223 + /**
1.224 + * Inequality operator. Performs only bitwise comparison.
1.225 + * @param text The UnicodeString to compare to this one.
1.226 + * @return FALSE if <TT>text</TT> contains the same characters as this one,
1.227 + * TRUE otherwise.
1.228 + * @stable ICU 2.0
1.229 + */
1.230 + inline UBool operator!= (const UnicodeString& text) const;
1.231 +
1.232 + /**
1.233 + * Greater than operator. Performs only bitwise comparison.
1.234 + * @param text The UnicodeString to compare to this one.
1.235 + * @return TRUE if the characters in this are bitwise
1.236 + * greater than the characters in <code>text</code>, FALSE otherwise
1.237 + * @stable ICU 2.0
1.238 + */
1.239 + inline UBool operator> (const UnicodeString& text) const;
1.240 +
1.241 + /**
1.242 + * Less than operator. Performs only bitwise comparison.
1.243 + * @param text The UnicodeString to compare to this one.
1.244 + * @return TRUE if the characters in this are bitwise
1.245 + * less than the characters in <code>text</code>, FALSE otherwise
1.246 + * @stable ICU 2.0
1.247 + */
1.248 + inline UBool operator< (const UnicodeString& text) const;
1.249 +
1.250 + /**
1.251 + * Greater than or equal operator. Performs only bitwise comparison.
1.252 + * @param text The UnicodeString to compare to this one.
1.253 + * @return TRUE if the characters in this are bitwise
1.254 + * greater than or equal to the characters in <code>text</code>, FALSE otherwise
1.255 + * @stable ICU 2.0
1.256 + */
1.257 + inline UBool operator>= (const UnicodeString& text) const;
1.258 +
1.259 + /**
1.260 + * Less than or equal operator. Performs only bitwise comparison.
1.261 + * @param text The UnicodeString to compare to this one.
1.262 + * @return TRUE if the characters in this are bitwise
1.263 + * less than or equal to the characters in <code>text</code>, FALSE otherwise
1.264 + * @stable ICU 2.0
1.265 + */
1.266 + inline UBool operator<= (const UnicodeString& text) const;
1.267 +
1.268 + /**
1.269 + * Compare the characters bitwise in this UnicodeString to
1.270 + * the characters in <code>text</code>.
1.271 + * @param text The UnicodeString to compare to this one.
1.272 + * @return The result of bitwise character comparison: 0 if this
1.273 + * contains the same characters as <code>text</code>, -1 if the characters in
1.274 + * this are bitwise less than the characters in <code>text</code>, +1 if the
1.275 + * characters in this are bitwise greater than the characters
1.276 + * in <code>text</code>.
1.277 + * @stable ICU 2.0
1.278 + */
1.279 + inline int8_t compare(const UnicodeString& text) const;
1.280 +
1.281 + /**
1.282 + * Compare the characters bitwise in the range
1.283 + * [<TT>start</TT>, <TT>start + length</TT>) with the characters
1.284 + * in <TT>text</TT>
1.285 + * @param start the offset at which the compare operation begins
1.286 + * @param length the number of characters of text to compare.
1.287 + * @param text the other text to be compared against this string.
1.288 + * @return The result of bitwise character comparison: 0 if this
1.289 + * contains the same characters as <code>text</code>, -1 if the characters in
1.290 + * this are bitwise less than the characters in <code>text</code>, +1 if the
1.291 + * characters in this are bitwise greater than the characters
1.292 + * in <code>text</code>.
1.293 + * @stable ICU 2.0
1.294 + */
1.295 + inline int8_t compare(int32_t start,
1.296 + int32_t length,
1.297 + const UnicodeString& text) const;
1.298 +
1.299 + /**
1.300 + * Compare the characters bitwise in the range
1.301 + * [<TT>start</TT>, <TT>start + length</TT>) with the characters
1.302 + * in <TT>srcText</TT> in the range
1.303 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1.304 + * @param start the offset at which the compare operation begins
1.305 + * @param length the number of characters in this to compare.
1.306 + * @param srcText the text to be compared
1.307 + * @param srcStart the offset into <TT>srcText</TT> to start comparison
1.308 + * @param srcLength the number of characters in <TT>src</TT> to compare
1.309 + * @return The result of bitwise character comparison: 0 if this
1.310 + * contains the same characters as <code>srcText</code>, -1 if the characters in
1.311 + * this are bitwise less than the characters in <code>srcText</code>, +1 if the
1.312 + * characters in this are bitwise greater than the characters
1.313 + * in <code>srcText</code>.
1.314 + * @stable ICU 2.0
1.315 + */
1.316 + inline int8_t compare(int32_t start,
1.317 + int32_t length,
1.318 + const UnicodeString& srcText,
1.319 + int32_t srcStart,
1.320 + int32_t srcLength) const;
1.321 +
1.322 + /**
1.323 + * Compare the characters bitwise in this UnicodeString with the first
1.324 + * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
1.325 + * @param srcChars The characters to compare to this UnicodeString.
1.326 + * @param srcLength the number of characters in <TT>srcChars</TT> to compare
1.327 + * @return The result of bitwise character comparison: 0 if this
1.328 + * contains the same characters as <code>srcChars</code>, -1 if the characters in
1.329 + * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
1.330 + * characters in this are bitwise greater than the characters
1.331 + * in <code>srcChars</code>.
1.332 + * @stable ICU 2.0
1.333 + */
1.334 + inline int8_t compare(const UChar *srcChars,
1.335 + int32_t srcLength) const;
1.336 +
1.337 + /**
1.338 + * Compare the characters bitwise in the range
1.339 + * [<TT>start</TT>, <TT>start + length</TT>) with the first
1.340 + * <TT>length</TT> characters in <TT>srcChars</TT>
1.341 + * @param start the offset at which the compare operation begins
1.342 + * @param length the number of characters to compare.
1.343 + * @param srcChars the characters to be compared
1.344 + * @return The result of bitwise character comparison: 0 if this
1.345 + * contains the same characters as <code>srcChars</code>, -1 if the characters in
1.346 + * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
1.347 + * characters in this are bitwise greater than the characters
1.348 + * in <code>srcChars</code>.
1.349 + * @stable ICU 2.0
1.350 + */
1.351 + inline int8_t compare(int32_t start,
1.352 + int32_t length,
1.353 + const UChar *srcChars) const;
1.354 +
1.355 + /**
1.356 + * Compare the characters bitwise in the range
1.357 + * [<TT>start</TT>, <TT>start + length</TT>) with the characters
1.358 + * in <TT>srcChars</TT> in the range
1.359 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1.360 + * @param start the offset at which the compare operation begins
1.361 + * @param length the number of characters in this to compare
1.362 + * @param srcChars the characters to be compared
1.363 + * @param srcStart the offset into <TT>srcChars</TT> to start comparison
1.364 + * @param srcLength the number of characters in <TT>srcChars</TT> to compare
1.365 + * @return The result of bitwise character comparison: 0 if this
1.366 + * contains the same characters as <code>srcChars</code>, -1 if the characters in
1.367 + * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
1.368 + * characters in this are bitwise greater than the characters
1.369 + * in <code>srcChars</code>.
1.370 + * @stable ICU 2.0
1.371 + */
1.372 + inline int8_t compare(int32_t start,
1.373 + int32_t length,
1.374 + const UChar *srcChars,
1.375 + int32_t srcStart,
1.376 + int32_t srcLength) const;
1.377 +
1.378 + /**
1.379 + * Compare the characters bitwise in the range
1.380 + * [<TT>start</TT>, <TT>limit</TT>) with the characters
1.381 + * in <TT>srcText</TT> in the range
1.382 + * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
1.383 + * @param start the offset at which the compare operation begins
1.384 + * @param limit the offset immediately following the compare operation
1.385 + * @param srcText the text to be compared
1.386 + * @param srcStart the offset into <TT>srcText</TT> to start comparison
1.387 + * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
1.388 + * @return The result of bitwise character comparison: 0 if this
1.389 + * contains the same characters as <code>srcText</code>, -1 if the characters in
1.390 + * this are bitwise less than the characters in <code>srcText</code>, +1 if the
1.391 + * characters in this are bitwise greater than the characters
1.392 + * in <code>srcText</code>.
1.393 + * @stable ICU 2.0
1.394 + */
1.395 + inline int8_t compareBetween(int32_t start,
1.396 + int32_t limit,
1.397 + const UnicodeString& srcText,
1.398 + int32_t srcStart,
1.399 + int32_t srcLimit) const;
1.400 +
1.401 + /**
1.402 + * Compare two Unicode strings in code point order.
1.403 + * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
1.404 + * if supplementary characters are present:
1.405 + *
1.406 + * In UTF-16, supplementary characters (with code points U+10000 and above) are
1.407 + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
1.408 + * which means that they compare as less than some other BMP characters like U+feff.
1.409 + * This function compares Unicode strings in code point order.
1.410 + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
1.411 + *
1.412 + * @param text Another string to compare this one to.
1.413 + * @return a negative/zero/positive integer corresponding to whether
1.414 + * this string is less than/equal to/greater than the second one
1.415 + * in code point order
1.416 + * @stable ICU 2.0
1.417 + */
1.418 + inline int8_t compareCodePointOrder(const UnicodeString& text) const;
1.419 +
1.420 + /**
1.421 + * Compare two Unicode strings in code point order.
1.422 + * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
1.423 + * if supplementary characters are present:
1.424 + *
1.425 + * In UTF-16, supplementary characters (with code points U+10000 and above) are
1.426 + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
1.427 + * which means that they compare as less than some other BMP characters like U+feff.
1.428 + * This function compares Unicode strings in code point order.
1.429 + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
1.430 + *
1.431 + * @param start The start offset in this string at which the compare operation begins.
1.432 + * @param length The number of code units from this string to compare.
1.433 + * @param srcText Another string to compare this one to.
1.434 + * @return a negative/zero/positive integer corresponding to whether
1.435 + * this string is less than/equal to/greater than the second one
1.436 + * in code point order
1.437 + * @stable ICU 2.0
1.438 + */
1.439 + inline int8_t compareCodePointOrder(int32_t start,
1.440 + int32_t length,
1.441 + const UnicodeString& srcText) const;
1.442 +
1.443 + /**
1.444 + * Compare two Unicode strings in code point order.
1.445 + * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
1.446 + * if supplementary characters are present:
1.447 + *
1.448 + * In UTF-16, supplementary characters (with code points U+10000 and above) are
1.449 + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
1.450 + * which means that they compare as less than some other BMP characters like U+feff.
1.451 + * This function compares Unicode strings in code point order.
1.452 + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
1.453 + *
1.454 + * @param start The start offset in this string at which the compare operation begins.
1.455 + * @param length The number of code units from this string to compare.
1.456 + * @param srcText Another string to compare this one to.
1.457 + * @param srcStart The start offset in that string at which the compare operation begins.
1.458 + * @param srcLength The number of code units from that string to compare.
1.459 + * @return a negative/zero/positive integer corresponding to whether
1.460 + * this string is less than/equal to/greater than the second one
1.461 + * in code point order
1.462 + * @stable ICU 2.0
1.463 + */
1.464 + inline int8_t compareCodePointOrder(int32_t start,
1.465 + int32_t length,
1.466 + const UnicodeString& srcText,
1.467 + int32_t srcStart,
1.468 + int32_t srcLength) const;
1.469 +
1.470 + /**
1.471 + * Compare two Unicode strings in code point order.
1.472 + * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
1.473 + * if supplementary characters are present:
1.474 + *
1.475 + * In UTF-16, supplementary characters (with code points U+10000 and above) are
1.476 + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
1.477 + * which means that they compare as less than some other BMP characters like U+feff.
1.478 + * This function compares Unicode strings in code point order.
1.479 + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
1.480 + *
1.481 + * @param srcChars A pointer to another string to compare this one to.
1.482 + * @param srcLength The number of code units from that string to compare.
1.483 + * @return a negative/zero/positive integer corresponding to whether
1.484 + * this string is less than/equal to/greater than the second one
1.485 + * in code point order
1.486 + * @stable ICU 2.0
1.487 + */
1.488 + inline int8_t compareCodePointOrder(const UChar *srcChars,
1.489 + int32_t srcLength) const;
1.490 +
1.491 + /**
1.492 + * Compare two Unicode strings in code point order.
1.493 + * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
1.494 + * if supplementary characters are present:
1.495 + *
1.496 + * In UTF-16, supplementary characters (with code points U+10000 and above) are
1.497 + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
1.498 + * which means that they compare as less than some other BMP characters like U+feff.
1.499 + * This function compares Unicode strings in code point order.
1.500 + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
1.501 + *
1.502 + * @param start The start offset in this string at which the compare operation begins.
1.503 + * @param length The number of code units from this string to compare.
1.504 + * @param srcChars A pointer to another string to compare this one to.
1.505 + * @return a negative/zero/positive integer corresponding to whether
1.506 + * this string is less than/equal to/greater than the second one
1.507 + * in code point order
1.508 + * @stable ICU 2.0
1.509 + */
1.510 + inline int8_t compareCodePointOrder(int32_t start,
1.511 + int32_t length,
1.512 + const UChar *srcChars) const;
1.513 +
1.514 + /**
1.515 + * Compare two Unicode strings in code point order.
1.516 + * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
1.517 + * if supplementary characters are present:
1.518 + *
1.519 + * In UTF-16, supplementary characters (with code points U+10000 and above) are
1.520 + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
1.521 + * which means that they compare as less than some other BMP characters like U+feff.
1.522 + * This function compares Unicode strings in code point order.
1.523 + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
1.524 + *
1.525 + * @param start The start offset in this string at which the compare operation begins.
1.526 + * @param length The number of code units from this string to compare.
1.527 + * @param srcChars A pointer to another string to compare this one to.
1.528 + * @param srcStart The start offset in that string at which the compare operation begins.
1.529 + * @param srcLength The number of code units from that string to compare.
1.530 + * @return a negative/zero/positive integer corresponding to whether
1.531 + * this string is less than/equal to/greater than the second one
1.532 + * in code point order
1.533 + * @stable ICU 2.0
1.534 + */
1.535 + inline int8_t compareCodePointOrder(int32_t start,
1.536 + int32_t length,
1.537 + const UChar *srcChars,
1.538 + int32_t srcStart,
1.539 + int32_t srcLength) const;
1.540 +
1.541 + /**
1.542 + * Compare two Unicode strings in code point order.
1.543 + * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
1.544 + * if supplementary characters are present:
1.545 + *
1.546 + * In UTF-16, supplementary characters (with code points U+10000 and above) are
1.547 + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
1.548 + * which means that they compare as less than some other BMP characters like U+feff.
1.549 + * This function compares Unicode strings in code point order.
1.550 + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
1.551 + *
1.552 + * @param start The start offset in this string at which the compare operation begins.
1.553 + * @param limit The offset after the last code unit from this string to compare.
1.554 + * @param srcText Another string to compare this one to.
1.555 + * @param srcStart The start offset in that string at which the compare operation begins.
1.556 + * @param srcLimit The offset after the last code unit from that string to compare.
1.557 + * @return a negative/zero/positive integer corresponding to whether
1.558 + * this string is less than/equal to/greater than the second one
1.559 + * in code point order
1.560 + * @stable ICU 2.0
1.561 + */
1.562 + inline int8_t compareCodePointOrderBetween(int32_t start,
1.563 + int32_t limit,
1.564 + const UnicodeString& srcText,
1.565 + int32_t srcStart,
1.566 + int32_t srcLimit) const;
1.567 +
1.568 + /**
1.569 + * Compare two strings case-insensitively using full case folding.
1.570 + * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
1.571 + *
1.572 + * @param text Another string to compare this one to.
1.573 + * @param options A bit set of options:
1.574 + * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
1.575 + * Comparison in code unit order with default case folding.
1.576 + *
1.577 + * - U_COMPARE_CODE_POINT_ORDER
1.578 + * Set to choose code point order instead of code unit order
1.579 + * (see u_strCompare for details).
1.580 + *
1.581 + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
1.582 + *
1.583 + * @return A negative, zero, or positive integer indicating the comparison result.
1.584 + * @stable ICU 2.0
1.585 + */
1.586 + inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
1.587 +
1.588 + /**
1.589 + * Compare two strings case-insensitively using full case folding.
1.590 + * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
1.591 + *
1.592 + * @param start The start offset in this string at which the compare operation begins.
1.593 + * @param length The number of code units from this string to compare.
1.594 + * @param srcText Another string to compare this one to.
1.595 + * @param options A bit set of options:
1.596 + * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
1.597 + * Comparison in code unit order with default case folding.
1.598 + *
1.599 + * - U_COMPARE_CODE_POINT_ORDER
1.600 + * Set to choose code point order instead of code unit order
1.601 + * (see u_strCompare for details).
1.602 + *
1.603 + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
1.604 + *
1.605 + * @return A negative, zero, or positive integer indicating the comparison result.
1.606 + * @stable ICU 2.0
1.607 + */
1.608 + inline int8_t caseCompare(int32_t start,
1.609 + int32_t length,
1.610 + const UnicodeString& srcText,
1.611 + uint32_t options) const;
1.612 +
1.613 + /**
1.614 + * Compare two strings case-insensitively using full case folding.
1.615 + * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
1.616 + *
1.617 + * @param start The start offset in this string at which the compare operation begins.
1.618 + * @param length The number of code units from this string to compare.
1.619 + * @param srcText Another string to compare this one to.
1.620 + * @param srcStart The start offset in that string at which the compare operation begins.
1.621 + * @param srcLength The number of code units from that string to compare.
1.622 + * @param options A bit set of options:
1.623 + * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
1.624 + * Comparison in code unit order with default case folding.
1.625 + *
1.626 + * - U_COMPARE_CODE_POINT_ORDER
1.627 + * Set to choose code point order instead of code unit order
1.628 + * (see u_strCompare for details).
1.629 + *
1.630 + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
1.631 + *
1.632 + * @return A negative, zero, or positive integer indicating the comparison result.
1.633 + * @stable ICU 2.0
1.634 + */
1.635 + inline int8_t caseCompare(int32_t start,
1.636 + int32_t length,
1.637 + const UnicodeString& srcText,
1.638 + int32_t srcStart,
1.639 + int32_t srcLength,
1.640 + uint32_t options) const;
1.641 +
1.642 + /**
1.643 + * Compare two strings case-insensitively using full case folding.
1.644 + * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
1.645 + *
1.646 + * @param srcChars A pointer to another string to compare this one to.
1.647 + * @param srcLength The number of code units from that string to compare.
1.648 + * @param options A bit set of options:
1.649 + * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
1.650 + * Comparison in code unit order with default case folding.
1.651 + *
1.652 + * - U_COMPARE_CODE_POINT_ORDER
1.653 + * Set to choose code point order instead of code unit order
1.654 + * (see u_strCompare for details).
1.655 + *
1.656 + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
1.657 + *
1.658 + * @return A negative, zero, or positive integer indicating the comparison result.
1.659 + * @stable ICU 2.0
1.660 + */
1.661 + inline int8_t caseCompare(const UChar *srcChars,
1.662 + int32_t srcLength,
1.663 + uint32_t options) const;
1.664 +
1.665 + /**
1.666 + * Compare two strings case-insensitively using full case folding.
1.667 + * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
1.668 + *
1.669 + * @param start The start offset in this string at which the compare operation begins.
1.670 + * @param length The number of code units from this string to compare.
1.671 + * @param srcChars A pointer to another string to compare this one to.
1.672 + * @param options A bit set of options:
1.673 + * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
1.674 + * Comparison in code unit order with default case folding.
1.675 + *
1.676 + * - U_COMPARE_CODE_POINT_ORDER
1.677 + * Set to choose code point order instead of code unit order
1.678 + * (see u_strCompare for details).
1.679 + *
1.680 + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
1.681 + *
1.682 + * @return A negative, zero, or positive integer indicating the comparison result.
1.683 + * @stable ICU 2.0
1.684 + */
1.685 + inline int8_t caseCompare(int32_t start,
1.686 + int32_t length,
1.687 + const UChar *srcChars,
1.688 + uint32_t options) const;
1.689 +
1.690 + /**
1.691 + * Compare two strings case-insensitively using full case folding.
1.692 + * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
1.693 + *
1.694 + * @param start The start offset in this string at which the compare operation begins.
1.695 + * @param length The number of code units from this string to compare.
1.696 + * @param srcChars A pointer to another string to compare this one to.
1.697 + * @param srcStart The start offset in that string at which the compare operation begins.
1.698 + * @param srcLength The number of code units from that string to compare.
1.699 + * @param options A bit set of options:
1.700 + * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
1.701 + * Comparison in code unit order with default case folding.
1.702 + *
1.703 + * - U_COMPARE_CODE_POINT_ORDER
1.704 + * Set to choose code point order instead of code unit order
1.705 + * (see u_strCompare for details).
1.706 + *
1.707 + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
1.708 + *
1.709 + * @return A negative, zero, or positive integer indicating the comparison result.
1.710 + * @stable ICU 2.0
1.711 + */
1.712 + inline int8_t caseCompare(int32_t start,
1.713 + int32_t length,
1.714 + const UChar *srcChars,
1.715 + int32_t srcStart,
1.716 + int32_t srcLength,
1.717 + uint32_t options) const;
1.718 +
1.719 + /**
1.720 + * Compare two strings case-insensitively using full case folding.
1.721 + * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
1.722 + *
1.723 + * @param start The start offset in this string at which the compare operation begins.
1.724 + * @param limit The offset after the last code unit from this string to compare.
1.725 + * @param srcText Another string to compare this one to.
1.726 + * @param srcStart The start offset in that string at which the compare operation begins.
1.727 + * @param srcLimit The offset after the last code unit from that string to compare.
1.728 + * @param options A bit set of options:
1.729 + * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
1.730 + * Comparison in code unit order with default case folding.
1.731 + *
1.732 + * - U_COMPARE_CODE_POINT_ORDER
1.733 + * Set to choose code point order instead of code unit order
1.734 + * (see u_strCompare for details).
1.735 + *
1.736 + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
1.737 + *
1.738 + * @return A negative, zero, or positive integer indicating the comparison result.
1.739 + * @stable ICU 2.0
1.740 + */
1.741 + inline int8_t caseCompareBetween(int32_t start,
1.742 + int32_t limit,
1.743 + const UnicodeString& srcText,
1.744 + int32_t srcStart,
1.745 + int32_t srcLimit,
1.746 + uint32_t options) const;
1.747 +
1.748 + /**
1.749 + * Determine if this starts with the characters in <TT>text</TT>
1.750 + * @param text The text to match.
1.751 + * @return TRUE if this starts with the characters in <TT>text</TT>,
1.752 + * FALSE otherwise
1.753 + * @stable ICU 2.0
1.754 + */
1.755 + inline UBool startsWith(const UnicodeString& text) const;
1.756 +
1.757 + /**
1.758 + * Determine if this starts with the characters in <TT>srcText</TT>
1.759 + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1.760 + * @param srcText The text to match.
1.761 + * @param srcStart the offset into <TT>srcText</TT> to start matching
1.762 + * @param srcLength the number of characters in <TT>srcText</TT> to match
1.763 + * @return TRUE if this starts with the characters in <TT>text</TT>,
1.764 + * FALSE otherwise
1.765 + * @stable ICU 2.0
1.766 + */
1.767 + inline UBool startsWith(const UnicodeString& srcText,
1.768 + int32_t srcStart,
1.769 + int32_t srcLength) const;
1.770 +
1.771 + /**
1.772 + * Determine if this starts with the characters in <TT>srcChars</TT>
1.773 + * @param srcChars The characters to match.
1.774 + * @param srcLength the number of characters in <TT>srcChars</TT>
1.775 + * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
1.776 + * FALSE otherwise
1.777 + * @stable ICU 2.0
1.778 + */
1.779 + inline UBool startsWith(const UChar *srcChars,
1.780 + int32_t srcLength) const;
1.781 +
1.782 + /**
1.783 + * Determine if this ends with the characters in <TT>srcChars</TT>
1.784 + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1.785 + * @param srcChars The characters to match.
1.786 + * @param srcStart the offset into <TT>srcText</TT> to start matching
1.787 + * @param srcLength the number of characters in <TT>srcChars</TT> to match
1.788 + * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
1.789 + * @stable ICU 2.0
1.790 + */
1.791 + inline UBool startsWith(const UChar *srcChars,
1.792 + int32_t srcStart,
1.793 + int32_t srcLength) const;
1.794 +
1.795 + /**
1.796 + * Determine if this ends with the characters in <TT>text</TT>
1.797 + * @param text The text to match.
1.798 + * @return TRUE if this ends with the characters in <TT>text</TT>,
1.799 + * FALSE otherwise
1.800 + * @stable ICU 2.0
1.801 + */
1.802 + inline UBool endsWith(const UnicodeString& text) const;
1.803 +
1.804 + /**
1.805 + * Determine if this ends with the characters in <TT>srcText</TT>
1.806 + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1.807 + * @param srcText The text to match.
1.808 + * @param srcStart the offset into <TT>srcText</TT> to start matching
1.809 + * @param srcLength the number of characters in <TT>srcText</TT> to match
1.810 + * @return TRUE if this ends with the characters in <TT>text</TT>,
1.811 + * FALSE otherwise
1.812 + * @stable ICU 2.0
1.813 + */
1.814 + inline UBool endsWith(const UnicodeString& srcText,
1.815 + int32_t srcStart,
1.816 + int32_t srcLength) const;
1.817 +
1.818 + /**
1.819 + * Determine if this ends with the characters in <TT>srcChars</TT>
1.820 + * @param srcChars The characters to match.
1.821 + * @param srcLength the number of characters in <TT>srcChars</TT>
1.822 + * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
1.823 + * FALSE otherwise
1.824 + * @stable ICU 2.0
1.825 + */
1.826 + inline UBool endsWith(const UChar *srcChars,
1.827 + int32_t srcLength) const;
1.828 +
1.829 + /**
1.830 + * Determine if this ends with the characters in <TT>srcChars</TT>
1.831 + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1.832 + * @param srcChars The characters to match.
1.833 + * @param srcStart the offset into <TT>srcText</TT> to start matching
1.834 + * @param srcLength the number of characters in <TT>srcChars</TT> to match
1.835 + * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
1.836 + * FALSE otherwise
1.837 + * @stable ICU 2.0
1.838 + */
1.839 + inline UBool endsWith(const UChar *srcChars,
1.840 + int32_t srcStart,
1.841 + int32_t srcLength) const;
1.842 +
1.843 +
1.844 + /* Searching - bitwise only */
1.845 +
1.846 + /**
1.847 + * Locate in this the first occurrence of the characters in <TT>text</TT>,
1.848 + * using bitwise comparison.
1.849 + * @param text The text to search for.
1.850 + * @return The offset into this of the start of <TT>text</TT>,
1.851 + * or -1 if not found.
1.852 + * @stable ICU 2.0
1.853 + */
1.854 + inline int32_t indexOf(const UnicodeString& text) const;
1.855 +
1.856 + /**
1.857 + * Locate in this the first occurrence of the characters in <TT>text</TT>
1.858 + * starting at offset <TT>start</TT>, using bitwise comparison.
1.859 + * @param text The text to search for.
1.860 + * @param start The offset at which searching will start.
1.861 + * @return The offset into this of the start of <TT>text</TT>,
1.862 + * or -1 if not found.
1.863 + * @stable ICU 2.0
1.864 + */
1.865 + inline int32_t indexOf(const UnicodeString& text,
1.866 + int32_t start) const;
1.867 +
1.868 + /**
1.869 + * Locate in this the first occurrence in the range
1.870 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1.871 + * in <TT>text</TT>, using bitwise comparison.
1.872 + * @param text The text to search for.
1.873 + * @param start The offset at which searching will start.
1.874 + * @param length The number of characters to search
1.875 + * @return The offset into this of the start of <TT>text</TT>,
1.876 + * or -1 if not found.
1.877 + * @stable ICU 2.0
1.878 + */
1.879 + inline int32_t indexOf(const UnicodeString& text,
1.880 + int32_t start,
1.881 + int32_t length) const;
1.882 +
1.883 + /**
1.884 + * Locate in this the first occurrence in the range
1.885 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1.886 + * in <TT>srcText</TT> in the range
1.887 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1.888 + * using bitwise comparison.
1.889 + * @param srcText The text to search for.
1.890 + * @param srcStart the offset into <TT>srcText</TT> at which
1.891 + * to start matching
1.892 + * @param srcLength the number of characters in <TT>srcText</TT> to match
1.893 + * @param start the offset into this at which to start matching
1.894 + * @param length the number of characters in this to search
1.895 + * @return The offset into this of the start of <TT>text</TT>,
1.896 + * or -1 if not found.
1.897 + * @stable ICU 2.0
1.898 + */
1.899 + inline int32_t indexOf(const UnicodeString& srcText,
1.900 + int32_t srcStart,
1.901 + int32_t srcLength,
1.902 + int32_t start,
1.903 + int32_t length) const;
1.904 +
1.905 + /**
1.906 + * Locate in this the first occurrence of the characters in
1.907 + * <TT>srcChars</TT>
1.908 + * starting at offset <TT>start</TT>, using bitwise comparison.
1.909 + * @param srcChars The text to search for.
1.910 + * @param srcLength the number of characters in <TT>srcChars</TT> to match
1.911 + * @param start the offset into this at which to start matching
1.912 + * @return The offset into this of the start of <TT>text</TT>,
1.913 + * or -1 if not found.
1.914 + * @stable ICU 2.0
1.915 + */
1.916 + inline int32_t indexOf(const UChar *srcChars,
1.917 + int32_t srcLength,
1.918 + int32_t start) const;
1.919 +
1.920 + /**
1.921 + * Locate in this the first occurrence in the range
1.922 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1.923 + * in <TT>srcChars</TT>, using bitwise comparison.
1.924 + * @param srcChars The text to search for.
1.925 + * @param srcLength the number of characters in <TT>srcChars</TT>
1.926 + * @param start The offset at which searching will start.
1.927 + * @param length The number of characters to search
1.928 + * @return The offset into this of the start of <TT>srcChars</TT>,
1.929 + * or -1 if not found.
1.930 + * @stable ICU 2.0
1.931 + */
1.932 + inline int32_t indexOf(const UChar *srcChars,
1.933 + int32_t srcLength,
1.934 + int32_t start,
1.935 + int32_t length) const;
1.936 +
1.937 + /**
1.938 + * Locate in this the first occurrence in the range
1.939 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1.940 + * in <TT>srcChars</TT> in the range
1.941 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1.942 + * using bitwise comparison.
1.943 + * @param srcChars The text to search for.
1.944 + * @param srcStart the offset into <TT>srcChars</TT> at which
1.945 + * to start matching
1.946 + * @param srcLength the number of characters in <TT>srcChars</TT> to match
1.947 + * @param start the offset into this at which to start matching
1.948 + * @param length the number of characters in this to search
1.949 + * @return The offset into this of the start of <TT>text</TT>,
1.950 + * or -1 if not found.
1.951 + * @stable ICU 2.0
1.952 + */
1.953 + int32_t indexOf(const UChar *srcChars,
1.954 + int32_t srcStart,
1.955 + int32_t srcLength,
1.956 + int32_t start,
1.957 + int32_t length) const;
1.958 +
1.959 + /**
1.960 + * Locate in this the first occurrence of the BMP code point <code>c</code>,
1.961 + * using bitwise comparison.
1.962 + * @param c The code unit to search for.
1.963 + * @return The offset into this of <TT>c</TT>, or -1 if not found.
1.964 + * @stable ICU 2.0
1.965 + */
1.966 + inline int32_t indexOf(UChar c) const;
1.967 +
1.968 + /**
1.969 + * Locate in this the first occurrence of the code point <TT>c</TT>,
1.970 + * using bitwise comparison.
1.971 + *
1.972 + * @param c The code point to search for.
1.973 + * @return The offset into this of <TT>c</TT>, or -1 if not found.
1.974 + * @stable ICU 2.0
1.975 + */
1.976 + inline int32_t indexOf(UChar32 c) const;
1.977 +
1.978 + /**
1.979 + * Locate in this the first occurrence of the BMP code point <code>c</code>,
1.980 + * starting at offset <TT>start</TT>, using bitwise comparison.
1.981 + * @param c The code unit to search for.
1.982 + * @param start The offset at which searching will start.
1.983 + * @return The offset into this of <TT>c</TT>, or -1 if not found.
1.984 + * @stable ICU 2.0
1.985 + */
1.986 + inline int32_t indexOf(UChar c,
1.987 + int32_t start) const;
1.988 +
1.989 + /**
1.990 + * Locate in this the first occurrence of the code point <TT>c</TT>
1.991 + * starting at offset <TT>start</TT>, using bitwise comparison.
1.992 + *
1.993 + * @param c The code point to search for.
1.994 + * @param start The offset at which searching will start.
1.995 + * @return The offset into this of <TT>c</TT>, or -1 if not found.
1.996 + * @stable ICU 2.0
1.997 + */
1.998 + inline int32_t indexOf(UChar32 c,
1.999 + int32_t start) const;
1.1000 +
1.1001 + /**
1.1002 + * Locate in this the first occurrence of the BMP code point <code>c</code>
1.1003 + * in the range [<TT>start</TT>, <TT>start + length</TT>),
1.1004 + * using bitwise comparison.
1.1005 + * @param c The code unit to search for.
1.1006 + * @param start the offset into this at which to start matching
1.1007 + * @param length the number of characters in this to search
1.1008 + * @return The offset into this of <TT>c</TT>, or -1 if not found.
1.1009 + * @stable ICU 2.0
1.1010 + */
1.1011 + inline int32_t indexOf(UChar c,
1.1012 + int32_t start,
1.1013 + int32_t length) const;
1.1014 +
1.1015 + /**
1.1016 + * Locate in this the first occurrence of the code point <TT>c</TT>
1.1017 + * in the range [<TT>start</TT>, <TT>start + length</TT>),
1.1018 + * using bitwise comparison.
1.1019 + *
1.1020 + * @param c The code point to search for.
1.1021 + * @param start the offset into this at which to start matching
1.1022 + * @param length the number of characters in this to search
1.1023 + * @return The offset into this of <TT>c</TT>, or -1 if not found.
1.1024 + * @stable ICU 2.0
1.1025 + */
1.1026 + inline int32_t indexOf(UChar32 c,
1.1027 + int32_t start,
1.1028 + int32_t length) const;
1.1029 +
1.1030 + /**
1.1031 + * Locate in this the last occurrence of the characters in <TT>text</TT>,
1.1032 + * using bitwise comparison.
1.1033 + * @param text The text to search for.
1.1034 + * @return The offset into this of the start of <TT>text</TT>,
1.1035 + * or -1 if not found.
1.1036 + * @stable ICU 2.0
1.1037 + */
1.1038 + inline int32_t lastIndexOf(const UnicodeString& text) const;
1.1039 +
1.1040 + /**
1.1041 + * Locate in this the last occurrence of the characters in <TT>text</TT>
1.1042 + * starting at offset <TT>start</TT>, using bitwise comparison.
1.1043 + * @param text The text to search for.
1.1044 + * @param start The offset at which searching will start.
1.1045 + * @return The offset into this of the start of <TT>text</TT>,
1.1046 + * or -1 if not found.
1.1047 + * @stable ICU 2.0
1.1048 + */
1.1049 + inline int32_t lastIndexOf(const UnicodeString& text,
1.1050 + int32_t start) const;
1.1051 +
1.1052 + /**
1.1053 + * Locate in this the last occurrence in the range
1.1054 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1.1055 + * in <TT>text</TT>, using bitwise comparison.
1.1056 + * @param text The text to search for.
1.1057 + * @param start The offset at which searching will start.
1.1058 + * @param length The number of characters to search
1.1059 + * @return The offset into this of the start of <TT>text</TT>,
1.1060 + * or -1 if not found.
1.1061 + * @stable ICU 2.0
1.1062 + */
1.1063 + inline int32_t lastIndexOf(const UnicodeString& text,
1.1064 + int32_t start,
1.1065 + int32_t length) const;
1.1066 +
1.1067 + /**
1.1068 + * Locate in this the last occurrence in the range
1.1069 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1.1070 + * in <TT>srcText</TT> in the range
1.1071 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1.1072 + * using bitwise comparison.
1.1073 + * @param srcText The text to search for.
1.1074 + * @param srcStart the offset into <TT>srcText</TT> at which
1.1075 + * to start matching
1.1076 + * @param srcLength the number of characters in <TT>srcText</TT> to match
1.1077 + * @param start the offset into this at which to start matching
1.1078 + * @param length the number of characters in this to search
1.1079 + * @return The offset into this of the start of <TT>text</TT>,
1.1080 + * or -1 if not found.
1.1081 + * @stable ICU 2.0
1.1082 + */
1.1083 + inline int32_t lastIndexOf(const UnicodeString& srcText,
1.1084 + int32_t srcStart,
1.1085 + int32_t srcLength,
1.1086 + int32_t start,
1.1087 + int32_t length) const;
1.1088 +
1.1089 + /**
1.1090 + * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1.1091 + * starting at offset <TT>start</TT>, using bitwise comparison.
1.1092 + * @param srcChars The text to search for.
1.1093 + * @param srcLength the number of characters in <TT>srcChars</TT> to match
1.1094 + * @param start the offset into this at which to start matching
1.1095 + * @return The offset into this of the start of <TT>text</TT>,
1.1096 + * or -1 if not found.
1.1097 + * @stable ICU 2.0
1.1098 + */
1.1099 + inline int32_t lastIndexOf(const UChar *srcChars,
1.1100 + int32_t srcLength,
1.1101 + int32_t start) const;
1.1102 +
1.1103 + /**
1.1104 + * Locate in this the last occurrence in the range
1.1105 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1.1106 + * in <TT>srcChars</TT>, using bitwise comparison.
1.1107 + * @param srcChars The text to search for.
1.1108 + * @param srcLength the number of characters in <TT>srcChars</TT>
1.1109 + * @param start The offset at which searching will start.
1.1110 + * @param length The number of characters to search
1.1111 + * @return The offset into this of the start of <TT>srcChars</TT>,
1.1112 + * or -1 if not found.
1.1113 + * @stable ICU 2.0
1.1114 + */
1.1115 + inline int32_t lastIndexOf(const UChar *srcChars,
1.1116 + int32_t srcLength,
1.1117 + int32_t start,
1.1118 + int32_t length) const;
1.1119 +
1.1120 + /**
1.1121 + * Locate in this the last occurrence in the range
1.1122 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1.1123 + * in <TT>srcChars</TT> in the range
1.1124 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1.1125 + * using bitwise comparison.
1.1126 + * @param srcChars The text to search for.
1.1127 + * @param srcStart the offset into <TT>srcChars</TT> at which
1.1128 + * to start matching
1.1129 + * @param srcLength the number of characters in <TT>srcChars</TT> to match
1.1130 + * @param start the offset into this at which to start matching
1.1131 + * @param length the number of characters in this to search
1.1132 + * @return The offset into this of the start of <TT>text</TT>,
1.1133 + * or -1 if not found.
1.1134 + * @stable ICU 2.0
1.1135 + */
1.1136 + int32_t lastIndexOf(const UChar *srcChars,
1.1137 + int32_t srcStart,
1.1138 + int32_t srcLength,
1.1139 + int32_t start,
1.1140 + int32_t length) const;
1.1141 +
1.1142 + /**
1.1143 + * Locate in this the last occurrence of the BMP code point <code>c</code>,
1.1144 + * using bitwise comparison.
1.1145 + * @param c The code unit to search for.
1.1146 + * @return The offset into this of <TT>c</TT>, or -1 if not found.
1.1147 + * @stable ICU 2.0
1.1148 + */
1.1149 + inline int32_t lastIndexOf(UChar c) const;
1.1150 +
1.1151 + /**
1.1152 + * Locate in this the last occurrence of the code point <TT>c</TT>,
1.1153 + * using bitwise comparison.
1.1154 + *
1.1155 + * @param c The code point to search for.
1.1156 + * @return The offset into this of <TT>c</TT>, or -1 if not found.
1.1157 + * @stable ICU 2.0
1.1158 + */
1.1159 + inline int32_t lastIndexOf(UChar32 c) const;
1.1160 +
1.1161 + /**
1.1162 + * Locate in this the last occurrence of the BMP code point <code>c</code>
1.1163 + * starting at offset <TT>start</TT>, using bitwise comparison.
1.1164 + * @param c The code unit to search for.
1.1165 + * @param start The offset at which searching will start.
1.1166 + * @return The offset into this of <TT>c</TT>, or -1 if not found.
1.1167 + * @stable ICU 2.0
1.1168 + */
1.1169 + inline int32_t lastIndexOf(UChar c,
1.1170 + int32_t start) const;
1.1171 +
1.1172 + /**
1.1173 + * Locate in this the last occurrence of the code point <TT>c</TT>
1.1174 + * starting at offset <TT>start</TT>, using bitwise comparison.
1.1175 + *
1.1176 + * @param c The code point to search for.
1.1177 + * @param start The offset at which searching will start.
1.1178 + * @return The offset into this of <TT>c</TT>, or -1 if not found.
1.1179 + * @stable ICU 2.0
1.1180 + */
1.1181 + inline int32_t lastIndexOf(UChar32 c,
1.1182 + int32_t start) const;
1.1183 +
1.1184 + /**
1.1185 + * Locate in this the last occurrence of the BMP code point <code>c</code>
1.1186 + * in the range [<TT>start</TT>, <TT>start + length</TT>),
1.1187 + * using bitwise comparison.
1.1188 + * @param c The code unit to search for.
1.1189 + * @param start the offset into this at which to start matching
1.1190 + * @param length the number of characters in this to search
1.1191 + * @return The offset into this of <TT>c</TT>, or -1 if not found.
1.1192 + * @stable ICU 2.0
1.1193 + */
1.1194 + inline int32_t lastIndexOf(UChar c,
1.1195 + int32_t start,
1.1196 + int32_t length) const;
1.1197 +
1.1198 + /**
1.1199 + * Locate in this the last occurrence of the code point <TT>c</TT>
1.1200 + * in the range [<TT>start</TT>, <TT>start + length</TT>),
1.1201 + * using bitwise comparison.
1.1202 + *
1.1203 + * @param c The code point to search for.
1.1204 + * @param start the offset into this at which to start matching
1.1205 + * @param length the number of characters in this to search
1.1206 + * @return The offset into this of <TT>c</TT>, or -1 if not found.
1.1207 + * @stable ICU 2.0
1.1208 + */
1.1209 + inline int32_t lastIndexOf(UChar32 c,
1.1210 + int32_t start,
1.1211 + int32_t length) const;
1.1212 +
1.1213 +
1.1214 + /* Character access */
1.1215 +
1.1216 + /**
1.1217 + * Return the code unit at offset <tt>offset</tt>.
1.1218 + * If the offset is not valid (0..length()-1) then U+ffff is returned.
1.1219 + * @param offset a valid offset into the text
1.1220 + * @return the code unit at offset <tt>offset</tt>
1.1221 + * or 0xffff if the offset is not valid for this string
1.1222 + * @stable ICU 2.0
1.1223 + */
1.1224 + inline UChar charAt(int32_t offset) const;
1.1225 +
1.1226 + /**
1.1227 + * Return the code unit at offset <tt>offset</tt>.
1.1228 + * If the offset is not valid (0..length()-1) then U+ffff is returned.
1.1229 + * @param offset a valid offset into the text
1.1230 + * @return the code unit at offset <tt>offset</tt>
1.1231 + * @stable ICU 2.0
1.1232 + */
1.1233 + inline UChar operator[] (int32_t offset) const;
1.1234 +
1.1235 + /**
1.1236 + * Return the code point that contains the code unit
1.1237 + * at offset <tt>offset</tt>.
1.1238 + * If the offset is not valid (0..length()-1) then U+ffff is returned.
1.1239 + * @param offset a valid offset into the text
1.1240 + * that indicates the text offset of any of the code units
1.1241 + * that will be assembled into a code point (21-bit value) and returned
1.1242 + * @return the code point of text at <tt>offset</tt>
1.1243 + * or 0xffff if the offset is not valid for this string
1.1244 + * @stable ICU 2.0
1.1245 + */
1.1246 + inline UChar32 char32At(int32_t offset) const;
1.1247 +
1.1248 + /**
1.1249 + * Adjust a random-access offset so that
1.1250 + * it points to the beginning of a Unicode character.
1.1251 + * The offset that is passed in points to
1.1252 + * any code unit of a code point,
1.1253 + * while the returned offset will point to the first code unit
1.1254 + * of the same code point.
1.1255 + * In UTF-16, if the input offset points to a second surrogate
1.1256 + * of a surrogate pair, then the returned offset will point
1.1257 + * to the first surrogate.
1.1258 + * @param offset a valid offset into one code point of the text
1.1259 + * @return offset of the first code unit of the same code point
1.1260 + * @see U16_SET_CP_START
1.1261 + * @stable ICU 2.0
1.1262 + */
1.1263 + inline int32_t getChar32Start(int32_t offset) const;
1.1264 +
1.1265 + /**
1.1266 + * Adjust a random-access offset so that
1.1267 + * it points behind a Unicode character.
1.1268 + * The offset that is passed in points behind
1.1269 + * any code unit of a code point,
1.1270 + * while the returned offset will point behind the last code unit
1.1271 + * of the same code point.
1.1272 + * In UTF-16, if the input offset points behind the first surrogate
1.1273 + * (i.e., to the second surrogate)
1.1274 + * of a surrogate pair, then the returned offset will point
1.1275 + * behind the second surrogate (i.e., to the first surrogate).
1.1276 + * @param offset a valid offset after any code unit of a code point of the text
1.1277 + * @return offset of the first code unit after the same code point
1.1278 + * @see U16_SET_CP_LIMIT
1.1279 + * @stable ICU 2.0
1.1280 + */
1.1281 + inline int32_t getChar32Limit(int32_t offset) const;
1.1282 +
1.1283 + /**
1.1284 + * Move the code unit index along the string by delta code points.
1.1285 + * Interpret the input index as a code unit-based offset into the string,
1.1286 + * move the index forward or backward by delta code points, and
1.1287 + * return the resulting index.
1.1288 + * The input index should point to the first code unit of a code point,
1.1289 + * if there is more than one.
1.1290 + *
1.1291 + * Both input and output indexes are code unit-based as for all
1.1292 + * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1.1293 + * If delta<0 then the index is moved backward (toward the start of the string).
1.1294 + * If delta>0 then the index is moved forward (toward the end of the string).
1.1295 + *
1.1296 + * This behaves like CharacterIterator::move32(delta, kCurrent).
1.1297 + *
1.1298 + * Behavior for out-of-bounds indexes:
1.1299 + * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1.1300 + * if the input index<0 then it is pinned to 0;
1.1301 + * if it is index>length() then it is pinned to length().
1.1302 + * Afterwards, the index is moved by <code>delta</code> code points
1.1303 + * forward or backward,
1.1304 + * but no further backward than to 0 and no further forward than to length().
1.1305 + * The resulting index return value will be in between 0 and length(), inclusively.
1.1306 + *
1.1307 + * Examples:
1.1308 + * <pre>
1.1309 + * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1.1310 + * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1.1311 + *
1.1312 + * // initial index: position of U+10000
1.1313 + * int32_t index=1;
1.1314 + *
1.1315 + * // the following examples will all result in index==4, position of U+10ffff
1.1316 + *
1.1317 + * // skip 2 code points from some position in the string
1.1318 + * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1.1319 + *
1.1320 + * // go to the 3rd code point from the start of s (0-based)
1.1321 + * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1.1322 + *
1.1323 + * // go to the next-to-last code point of s
1.1324 + * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1.1325 + * </pre>
1.1326 + *
1.1327 + * @param index input code unit index
1.1328 + * @param delta (signed) code point count to move the index forward or backward
1.1329 + * in the string
1.1330 + * @return the resulting code unit index
1.1331 + * @stable ICU 2.0
1.1332 + */
1.1333 + int32_t moveIndex32(int32_t index, int32_t delta) const;
1.1334 +
1.1335 + /* Substring extraction */
1.1336 +
1.1337 + /**
1.1338 + * Copy the characters in the range
1.1339 + * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1.1340 + * beginning at <tt>dstStart</tt>.
1.1341 + * If the string aliases to <code>dst</code> itself as an external buffer,
1.1342 + * then extract() will not copy the contents.
1.1343 + *
1.1344 + * @param start offset of first character which will be copied into the array
1.1345 + * @param length the number of characters to extract
1.1346 + * @param dst array in which to copy characters. The length of <tt>dst</tt>
1.1347 + * must be at least (<tt>dstStart + length</tt>).
1.1348 + * @param dstStart the offset in <TT>dst</TT> where the first character
1.1349 + * will be extracted
1.1350 + * @stable ICU 2.0
1.1351 + */
1.1352 + inline void extract(int32_t start,
1.1353 + int32_t length,
1.1354 + UChar *dst,
1.1355 + int32_t dstStart = 0) const;
1.1356 +
1.1357 + /**
1.1358 + * Copy the contents of the string into dest.
1.1359 + * This is a convenience function that
1.1360 + * checks if there is enough space in dest,
1.1361 + * extracts the entire string if possible,
1.1362 + * and NUL-terminates dest if possible.
1.1363 + *
1.1364 + * If the string fits into dest but cannot be NUL-terminated
1.1365 + * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1.1366 + * If the string itself does not fit into dest
1.1367 + * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1.1368 + *
1.1369 + * If the string aliases to <code>dest</code> itself as an external buffer,
1.1370 + * then extract() will not copy the contents.
1.1371 + *
1.1372 + * @param dest Destination string buffer.
1.1373 + * @param destCapacity Number of UChars available at dest.
1.1374 + * @param errorCode ICU error code.
1.1375 + * @return length()
1.1376 + * @stable ICU 2.0
1.1377 + */
1.1378 + int32_t
1.1379 + extract(UChar *dest, int32_t destCapacity,
1.1380 + UErrorCode &errorCode) const;
1.1381 +
1.1382 + /**
1.1383 + * Copy the characters in the range
1.1384 + * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
1.1385 + * <tt>target</tt>.
1.1386 + * @param start offset of first character which will be copied
1.1387 + * @param length the number of characters to extract
1.1388 + * @param target UnicodeString into which to copy characters.
1.1389 + * @return A reference to <TT>target</TT>
1.1390 + * @stable ICU 2.0
1.1391 + */
1.1392 + inline void extract(int32_t start,
1.1393 + int32_t length,
1.1394 + UnicodeString& target) const;
1.1395 +
1.1396 + /**
1.1397 + * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1.1398 + * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1.1399 + * @param start offset of first character which will be copied into the array
1.1400 + * @param limit offset immediately following the last character to be copied
1.1401 + * @param dst array in which to copy characters. The length of <tt>dst</tt>
1.1402 + * must be at least (<tt>dstStart + (limit - start)</tt>).
1.1403 + * @param dstStart the offset in <TT>dst</TT> where the first character
1.1404 + * will be extracted
1.1405 + * @stable ICU 2.0
1.1406 + */
1.1407 + inline void extractBetween(int32_t start,
1.1408 + int32_t limit,
1.1409 + UChar *dst,
1.1410 + int32_t dstStart = 0) const;
1.1411 +
1.1412 + /**
1.1413 + * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1.1414 + * into the UnicodeString <tt>target</tt>. Replaceable API.
1.1415 + * @param start offset of first character which will be copied
1.1416 + * @param limit offset immediately following the last character to be copied
1.1417 + * @param target UnicodeString into which to copy characters.
1.1418 + * @return A reference to <TT>target</TT>
1.1419 + * @stable ICU 2.0
1.1420 + */
1.1421 + virtual void extractBetween(int32_t start,
1.1422 + int32_t limit,
1.1423 + UnicodeString& target) const;
1.1424 +
1.1425 + /**
1.1426 + * Copy the characters in the range
1.1427 + * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
1.1428 + * All characters must be invariant (see utypes.h).
1.1429 + * Use US_INV as the last, signature-distinguishing parameter.
1.1430 + *
1.1431 + * This function does not write any more than <code>targetLength</code>
1.1432 + * characters but returns the length of the entire output string
1.1433 + * so that one can allocate a larger buffer and call the function again
1.1434 + * if necessary.
1.1435 + * The output string is NUL-terminated if possible.
1.1436 + *
1.1437 + * @param start offset of first character which will be copied
1.1438 + * @param startLength the number of characters to extract
1.1439 + * @param target the target buffer for extraction, can be NULL
1.1440 + * if targetLength is 0
1.1441 + * @param targetCapacity the length of the target buffer
1.1442 + * @param inv Signature-distinguishing paramater, use US_INV.
1.1443 + * @return the output string length, not including the terminating NUL
1.1444 + * @draft ICU 3.2
1.1445 + */
1.1446 + int32_t extract(int32_t start,
1.1447 + int32_t startLength,
1.1448 + char *target,
1.1449 + int32_t targetCapacity,
1.1450 + enum EInvariant inv) const;
1.1451 +
1.1452 +#if !UCONFIG_NO_CONVERSION
1.1453 +
1.1454 + /**
1.1455 + * Copy the characters in the range
1.1456 + * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1.1457 + * in a specified codepage.
1.1458 + * The output string is NUL-terminated.
1.1459 + *
1.1460 + * Recommendation: For invariant-character strings use
1.1461 + * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1.1462 + * because it avoids object code dependencies of UnicodeString on
1.1463 + * the conversion code.
1.1464 + *
1.1465 + * @param start offset of first character which will be copied
1.1466 + * @param startLength the number of characters to extract
1.1467 + * @param target the target buffer for extraction
1.1468 + * @param codepage the desired codepage for the characters. 0 has
1.1469 + * the special meaning of the default codepage
1.1470 + * If <code>codepage</code> is an empty string (<code>""</code>),
1.1471 + * then a simple conversion is performed on the codepage-invariant
1.1472 + * subset ("invariant characters") of the platform encoding. See utypes.h.
1.1473 + * If <TT>target</TT> is NULL, then the number of bytes required for
1.1474 + * <TT>target</TT> is returned. It is assumed that the target is big enough
1.1475 + * to fit all of the characters.
1.1476 + * @return the output string length, not including the terminating NUL
1.1477 + * @stable ICU 2.0
1.1478 + */
1.1479 + inline int32_t extract(int32_t start,
1.1480 + int32_t startLength,
1.1481 + char *target,
1.1482 + const char *codepage = 0) const;
1.1483 +
1.1484 + /**
1.1485 + * Copy the characters in the range
1.1486 + * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1.1487 + * in a specified codepage.
1.1488 + * This function does not write any more than <code>targetLength</code>
1.1489 + * characters but returns the length of the entire output string
1.1490 + * so that one can allocate a larger buffer and call the function again
1.1491 + * if necessary.
1.1492 + * The output string is NUL-terminated if possible.
1.1493 + *
1.1494 + * Recommendation: For invariant-character strings use
1.1495 + * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1.1496 + * because it avoids object code dependencies of UnicodeString on
1.1497 + * the conversion code.
1.1498 + *
1.1499 + * @param start offset of first character which will be copied
1.1500 + * @param startLength the number of characters to extract
1.1501 + * @param target the target buffer for extraction
1.1502 + * @param targetLength the length of the target buffer
1.1503 + * @param codepage the desired codepage for the characters. 0 has
1.1504 + * the special meaning of the default codepage
1.1505 + * If <code>codepage</code> is an empty string (<code>""</code>),
1.1506 + * then a simple conversion is performed on the codepage-invariant
1.1507 + * subset ("invariant characters") of the platform encoding. See utypes.h.
1.1508 + * If <TT>target</TT> is NULL, then the number of bytes required for
1.1509 + * <TT>target</TT> is returned.
1.1510 + * @return the output string length, not including the terminating NUL
1.1511 + * @stable ICU 2.0
1.1512 + */
1.1513 + int32_t extract(int32_t start,
1.1514 + int32_t startLength,
1.1515 + char *target,
1.1516 + uint32_t targetLength,
1.1517 + const char *codepage = 0) const;
1.1518 +
1.1519 + /**
1.1520 + * Convert the UnicodeString into a codepage string using an existing UConverter.
1.1521 + * The output string is NUL-terminated if possible.
1.1522 + *
1.1523 + * This function avoids the overhead of opening and closing a converter if
1.1524 + * multiple strings are extracted.
1.1525 + *
1.1526 + * @param dest destination string buffer, can be NULL if destCapacity==0
1.1527 + * @param destCapacity the number of chars available at dest
1.1528 + * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1.1529 + * or NULL for the default converter
1.1530 + * @param errorCode normal ICU error code
1.1531 + * @return the length of the output string, not counting the terminating NUL;
1.1532 + * if the length is greater than destCapacity, then the string will not fit
1.1533 + * and a buffer of the indicated length would need to be passed in
1.1534 + * @stable ICU 2.0
1.1535 + */
1.1536 + int32_t extract(char *dest, int32_t destCapacity,
1.1537 + UConverter *cnv,
1.1538 + UErrorCode &errorCode) const;
1.1539 +
1.1540 +#endif
1.1541 +
1.1542 + /* Length operations */
1.1543 +
1.1544 + /**
1.1545 + * Return the length of the UnicodeString object.
1.1546 + * The length is the number of UChar code units are in the UnicodeString.
1.1547 + * If you want the number of code points, please use countChar32().
1.1548 + * @return the length of the UnicodeString object
1.1549 + * @see countChar32
1.1550 + * @stable ICU 2.0
1.1551 + */
1.1552 + inline int32_t length(void) const;
1.1553 +
1.1554 + /**
1.1555 + * Count Unicode code points in the length UChar code units of the string.
1.1556 + * A code point may occupy either one or two UChar code units.
1.1557 + * Counting code points involves reading all code units.
1.1558 + *
1.1559 + * This functions is basically the inverse of moveIndex32().
1.1560 + *
1.1561 + * @param start the index of the first code unit to check
1.1562 + * @param length the number of UChar code units to check
1.1563 + * @return the number of code points in the specified code units
1.1564 + * @see length
1.1565 + * @stable ICU 2.0
1.1566 + */
1.1567 + int32_t
1.1568 + countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1.1569 +
1.1570 + /**
1.1571 + * Check if the length UChar code units of the string
1.1572 + * contain more Unicode code points than a certain number.
1.1573 + * This is more efficient than counting all code points in this part of the string
1.1574 + * and comparing that number with a threshold.
1.1575 + * This function may not need to scan the string at all if the length
1.1576 + * falls within a certain range, and
1.1577 + * never needs to count more than 'number+1' code points.
1.1578 + * Logically equivalent to (countChar32(start, length)>number).
1.1579 + * A Unicode code point may occupy either one or two UChar code units.
1.1580 + *
1.1581 + * @param start the index of the first code unit to check (0 for the entire string)
1.1582 + * @param length the number of UChar code units to check
1.1583 + * (use INT32_MAX for the entire string; remember that start/length
1.1584 + * values are pinned)
1.1585 + * @param number The number of code points in the (sub)string is compared against
1.1586 + * the 'number' parameter.
1.1587 + * @return Boolean value for whether the string contains more Unicode code points
1.1588 + * than 'number'. Same as (u_countChar32(s, length)>number).
1.1589 + * @see countChar32
1.1590 + * @see u_strHasMoreChar32Than
1.1591 + * @stable ICU 2.4
1.1592 + */
1.1593 + UBool
1.1594 + hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1.1595 +
1.1596 + /**
1.1597 + * Determine if this string is empty.
1.1598 + * @return TRUE if this string contains 0 characters, FALSE otherwise.
1.1599 + * @stable ICU 2.0
1.1600 + */
1.1601 + inline UBool isEmpty(void) const;
1.1602 +
1.1603 + /**
1.1604 + * Return the capacity of the internal buffer of the UnicodeString object.
1.1605 + * This is useful together with the getBuffer functions.
1.1606 + * See there for details.
1.1607 + *
1.1608 + * @return the number of UChars available in the internal buffer
1.1609 + * @see getBuffer
1.1610 + * @stable ICU 2.0
1.1611 + */
1.1612 + inline int32_t getCapacity(void) const;
1.1613 +
1.1614 + /* Other operations */
1.1615 +
1.1616 + /**
1.1617 + * Generate a hash code for this object.
1.1618 + * @return The hash code of this UnicodeString.
1.1619 + * @stable ICU 2.0
1.1620 + */
1.1621 + inline int32_t hashCode(void) const;
1.1622 +
1.1623 + /**
1.1624 + * Determine if this object contains a valid string.
1.1625 + * A bogus string has no value. It is different from an empty string.
1.1626 + * It can be used to indicate that no string value is available.
1.1627 + * getBuffer() and getTerminatedBuffer() return NULL, and
1.1628 + * length() returns 0.
1.1629 + *
1.1630 + * @return TRUE if the string is valid, FALSE otherwise
1.1631 + * @see setToBogus()
1.1632 + * @stable ICU 2.0
1.1633 + */
1.1634 + inline UBool isBogus(void) const;
1.1635 +
1.1636 +
1.1637 + //========================================
1.1638 + // Write operations
1.1639 + //========================================
1.1640 +
1.1641 + /* Assignment operations */
1.1642 +
1.1643 + /**
1.1644 + * Assignment operator. Replace the characters in this UnicodeString
1.1645 + * with the characters from <TT>srcText</TT>.
1.1646 + * @param srcText The text containing the characters to replace
1.1647 + * @return a reference to this
1.1648 + * @stable ICU 2.0
1.1649 + */
1.1650 + UnicodeString &operator=(const UnicodeString &srcText);
1.1651 +
1.1652 + /**
1.1653 + * Almost the same as the assignment operator.
1.1654 + * Replace the characters in this UnicodeString
1.1655 + * with the characters from <code>srcText</code>.
1.1656 + *
1.1657 + * This function works the same for all strings except for ones that
1.1658 + * are readonly aliases.
1.1659 + * Starting with ICU 2.4, the assignment operator and the copy constructor
1.1660 + * allocate a new buffer and copy the buffer contents even for readonly aliases.
1.1661 + * This function implements the old, more efficient but less safe behavior
1.1662 + * of making this string also a readonly alias to the same buffer.
1.1663 + * The fastCopyFrom function must be used only if it is known that the lifetime of
1.1664 + * this UnicodeString is at least as long as the lifetime of the aliased buffer
1.1665 + * including its contents, for example for strings from resource bundles
1.1666 + * or aliases to string contents.
1.1667 + *
1.1668 + * @param src The text containing the characters to replace.
1.1669 + * @return a reference to this
1.1670 + * @stable ICU 2.4
1.1671 + */
1.1672 + UnicodeString &fastCopyFrom(const UnicodeString &src);
1.1673 +
1.1674 + /**
1.1675 + * Assignment operator. Replace the characters in this UnicodeString
1.1676 + * with the code unit <TT>ch</TT>.
1.1677 + * @param ch the code unit to replace
1.1678 + * @return a reference to this
1.1679 + * @stable ICU 2.0
1.1680 + */
1.1681 + inline UnicodeString& operator= (UChar ch);
1.1682 +
1.1683 + /**
1.1684 + * Assignment operator. Replace the characters in this UnicodeString
1.1685 + * with the code point <TT>ch</TT>.
1.1686 + * @param ch the code point to replace
1.1687 + * @return a reference to this
1.1688 + * @stable ICU 2.0
1.1689 + */
1.1690 + inline UnicodeString& operator= (UChar32 ch);
1.1691 +
1.1692 + /**
1.1693 + * Set the text in the UnicodeString object to the characters
1.1694 + * in <TT>srcText</TT> in the range
1.1695 + * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1.1696 + * <TT>srcText</TT> is not modified.
1.1697 + * @param srcText the source for the new characters
1.1698 + * @param srcStart the offset into <TT>srcText</TT> where new characters
1.1699 + * will be obtained
1.1700 + * @return a reference to this
1.1701 + * @stable ICU 2.2
1.1702 + */
1.1703 + inline UnicodeString& setTo(const UnicodeString& srcText,
1.1704 + int32_t srcStart);
1.1705 +
1.1706 + /**
1.1707 + * Set the text in the UnicodeString object to the characters
1.1708 + * in <TT>srcText</TT> in the range
1.1709 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1.1710 + * <TT>srcText</TT> is not modified.
1.1711 + * @param srcText the source for the new characters
1.1712 + * @param srcStart the offset into <TT>srcText</TT> where new characters
1.1713 + * will be obtained
1.1714 + * @param srcLength the number of characters in <TT>srcText</TT> in the
1.1715 + * replace string.
1.1716 + * @return a reference to this
1.1717 + * @stable ICU 2.0
1.1718 + */
1.1719 + inline UnicodeString& setTo(const UnicodeString& srcText,
1.1720 + int32_t srcStart,
1.1721 + int32_t srcLength);
1.1722 +
1.1723 + /**
1.1724 + * Set the text in the UnicodeString object to the characters in
1.1725 + * <TT>srcText</TT>.
1.1726 + * <TT>srcText</TT> is not modified.
1.1727 + * @param srcText the source for the new characters
1.1728 + * @return a reference to this
1.1729 + * @stable ICU 2.0
1.1730 + */
1.1731 + inline UnicodeString& setTo(const UnicodeString& srcText);
1.1732 +
1.1733 + /**
1.1734 + * Set the characters in the UnicodeString object to the characters
1.1735 + * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
1.1736 + * @param srcChars the source for the new characters
1.1737 + * @param srcLength the number of Unicode characters in srcChars.
1.1738 + * @return a reference to this
1.1739 + * @stable ICU 2.0
1.1740 + */
1.1741 + inline UnicodeString& setTo(const UChar *srcChars,
1.1742 + int32_t srcLength);
1.1743 +
1.1744 + /**
1.1745 + * Set the characters in the UnicodeString object to the code unit
1.1746 + * <TT>srcChar</TT>.
1.1747 + * @param srcChar the code unit which becomes the UnicodeString's character
1.1748 + * content
1.1749 + * @return a reference to this
1.1750 + * @stable ICU 2.0
1.1751 + */
1.1752 + UnicodeString& setTo(UChar srcChar);
1.1753 +
1.1754 + /**
1.1755 + * Set the characters in the UnicodeString object to the code point
1.1756 + * <TT>srcChar</TT>.
1.1757 + * @param srcChar the code point which becomes the UnicodeString's character
1.1758 + * content
1.1759 + * @return a reference to this
1.1760 + * @stable ICU 2.0
1.1761 + */
1.1762 + UnicodeString& setTo(UChar32 srcChar);
1.1763 +
1.1764 + /**
1.1765 + * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
1.1766 + * The text will be used for the UnicodeString object, but
1.1767 + * it will not be released when the UnicodeString is destroyed.
1.1768 + * This has copy-on-write semantics:
1.1769 + * When the string is modified, then the buffer is first copied into
1.1770 + * newly allocated memory.
1.1771 + * The aliased buffer is never modified.
1.1772 + * In an assignment to another UnicodeString, the text will be aliased again,
1.1773 + * so that both strings then alias the same readonly-text.
1.1774 + *
1.1775 + * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
1.1776 + * This must be true if <code>textLength==-1</code>.
1.1777 + * @param text The characters to alias for the UnicodeString.
1.1778 + * @param textLength The number of Unicode characters in <code>text</code> to alias.
1.1779 + * If -1, then this constructor will determine the length
1.1780 + * by calling <code>u_strlen()</code>.
1.1781 + * @return a reference to this
1.1782 + * @stable ICU 2.0
1.1783 + */
1.1784 + UnicodeString &setTo(UBool isTerminated,
1.1785 + const UChar *text,
1.1786 + int32_t textLength);
1.1787 +
1.1788 + /**
1.1789 + * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
1.1790 + * The text will be used for the UnicodeString object, but
1.1791 + * it will not be released when the UnicodeString is destroyed.
1.1792 + * This has write-through semantics:
1.1793 + * For as long as the capacity of the buffer is sufficient, write operations
1.1794 + * will directly affect the buffer. When more capacity is necessary, then
1.1795 + * a new buffer will be allocated and the contents copied as with regularly
1.1796 + * constructed strings.
1.1797 + * In an assignment to another UnicodeString, the buffer will be copied.
1.1798 + * The extract(UChar *dst) function detects whether the dst pointer is the same
1.1799 + * as the string buffer itself and will in this case not copy the contents.
1.1800 + *
1.1801 + * @param buffer The characters to alias for the UnicodeString.
1.1802 + * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
1.1803 + * @param buffCapacity The size of <code>buffer</code> in UChars.
1.1804 + * @return a reference to this
1.1805 + * @stable ICU 2.0
1.1806 + */
1.1807 + UnicodeString &setTo(UChar *buffer,
1.1808 + int32_t buffLength,
1.1809 + int32_t buffCapacity);
1.1810 +
1.1811 + /**
1.1812 + * Make this UnicodeString object invalid.
1.1813 + * The string will test TRUE with isBogus().
1.1814 + *
1.1815 + * A bogus string has no value. It is different from an empty string.
1.1816 + * It can be used to indicate that no string value is available.
1.1817 + * getBuffer() and getTerminatedBuffer() return NULL, and
1.1818 + * length() returns 0.
1.1819 + *
1.1820 + * This utility function is used throughout the UnicodeString
1.1821 + * implementation to indicate that a UnicodeString operation failed,
1.1822 + * and may be used in other functions,
1.1823 + * especially but not exclusively when such functions do not
1.1824 + * take a UErrorCode for simplicity.
1.1825 + *
1.1826 + * The following methods, and no others, will clear a string object's bogus flag:
1.1827 + * - remove()
1.1828 + * - remove(0, INT32_MAX)
1.1829 + * - truncate(0)
1.1830 + * - operator=() (assignment operator)
1.1831 + * - setTo(...)
1.1832 + *
1.1833 + * The simplest ways to turn a bogus string into an empty one
1.1834 + * is to use the remove() function.
1.1835 + * Examples for other functions that are equivalent to "set to empty string":
1.1836 + * \code
1.1837 + * if(s.isBogus()) {
1.1838 + * s.remove(); // set to an empty string (remove all), or
1.1839 + * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
1.1840 + * s.truncate(0); // set to an empty string (complete truncation), or
1.1841 + * s=UnicodeString(); // assign an empty string, or
1.1842 + * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
1.1843 + * static const UChar nul=0;
1.1844 + * s.setTo(&nul, 0); // set to an empty C Unicode string
1.1845 + * }
1.1846 + * \endcode
1.1847 + *
1.1848 + * @see isBogus()
1.1849 + * @stable ICU 2.0
1.1850 + */
1.1851 + void setToBogus();
1.1852 +
1.1853 + /**
1.1854 + * Set the character at the specified offset to the specified character.
1.1855 + * @param offset A valid offset into the text of the character to set
1.1856 + * @param ch The new character
1.1857 + * @return A reference to this
1.1858 + * @stable ICU 2.0
1.1859 + */
1.1860 + UnicodeString& setCharAt(int32_t offset,
1.1861 + UChar ch);
1.1862 +
1.1863 +
1.1864 + /* Append operations */
1.1865 +
1.1866 + /**
1.1867 + * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
1.1868 + * object.
1.1869 + * @param ch the code unit to be appended
1.1870 + * @return a reference to this
1.1871 + * @stable ICU 2.0
1.1872 + */
1.1873 + inline UnicodeString& operator+= (UChar ch);
1.1874 +
1.1875 + /**
1.1876 + * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
1.1877 + * object.
1.1878 + * @param ch the code point to be appended
1.1879 + * @return a reference to this
1.1880 + * @stable ICU 2.0
1.1881 + */
1.1882 + inline UnicodeString& operator+= (UChar32 ch);
1.1883 +
1.1884 + /**
1.1885 + * Append operator. Append the characters in <TT>srcText</TT> to the
1.1886 + * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> is
1.1887 + * not modified.
1.1888 + * @param srcText the source for the new characters
1.1889 + * @return a reference to this
1.1890 + * @stable ICU 2.0
1.1891 + */
1.1892 + inline UnicodeString& operator+= (const UnicodeString& srcText);
1.1893 +
1.1894 + /**
1.1895 + * Append the characters
1.1896 + * in <TT>srcText</TT> in the range
1.1897 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
1.1898 + * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
1.1899 + * is not modified.
1.1900 + * @param srcText the source for the new characters
1.1901 + * @param srcStart the offset into <TT>srcText</TT> where new characters
1.1902 + * will be obtained
1.1903 + * @param srcLength the number of characters in <TT>srcText</TT> in
1.1904 + * the append string
1.1905 + * @return a reference to this
1.1906 + * @stable ICU 2.0
1.1907 + */
1.1908 + inline UnicodeString& append(const UnicodeString& srcText,
1.1909 + int32_t srcStart,
1.1910 + int32_t srcLength);
1.1911 +
1.1912 + /**
1.1913 + * Append the characters in <TT>srcText</TT> to the UnicodeString object at
1.1914 + * offset <TT>start</TT>. <TT>srcText</TT> is not modified.
1.1915 + * @param srcText the source for the new characters
1.1916 + * @return a reference to this
1.1917 + * @stable ICU 2.0
1.1918 + */
1.1919 + inline UnicodeString& append(const UnicodeString& srcText);
1.1920 +
1.1921 + /**
1.1922 + * Append the characters in <TT>srcChars</TT> in the range
1.1923 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
1.1924 + * object at offset
1.1925 + * <TT>start</TT>. <TT>srcChars</TT> is not modified.
1.1926 + * @param srcChars the source for the new characters
1.1927 + * @param srcStart the offset into <TT>srcChars</TT> where new characters
1.1928 + * will be obtained
1.1929 + * @param srcLength the number of characters in <TT>srcChars</TT> in
1.1930 + * the append string
1.1931 + * @return a reference to this
1.1932 + * @stable ICU 2.0
1.1933 + */
1.1934 + inline UnicodeString& append(const UChar *srcChars,
1.1935 + int32_t srcStart,
1.1936 + int32_t srcLength);
1.1937 +
1.1938 + /**
1.1939 + * Append the characters in <TT>srcChars</TT> to the UnicodeString object
1.1940 + * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
1.1941 + * @param srcChars the source for the new characters
1.1942 + * @param srcLength the number of Unicode characters in <TT>srcChars</TT>
1.1943 + * @return a reference to this
1.1944 + * @stable ICU 2.0
1.1945 + */
1.1946 + inline UnicodeString& append(const UChar *srcChars,
1.1947 + int32_t srcLength);
1.1948 +
1.1949 + /**
1.1950 + * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
1.1951 + * @param srcChar the code unit to append
1.1952 + * @return a reference to this
1.1953 + * @stable ICU 2.0
1.1954 + */
1.1955 + inline UnicodeString& append(UChar srcChar);
1.1956 +
1.1957 + /**
1.1958 + * Append the code point <TT>srcChar</TT> to the UnicodeString object.
1.1959 + * @param srcChar the code point to append
1.1960 + * @return a reference to this
1.1961 + * @stable ICU 2.0
1.1962 + */
1.1963 + inline UnicodeString& append(UChar32 srcChar);
1.1964 +
1.1965 +
1.1966 + /* Insert operations */
1.1967 +
1.1968 + /**
1.1969 + * Insert the characters in <TT>srcText</TT> in the range
1.1970 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
1.1971 + * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
1.1972 + * @param start the offset where the insertion begins
1.1973 + * @param srcText the source for the new characters
1.1974 + * @param srcStart the offset into <TT>srcText</TT> where new characters
1.1975 + * will be obtained
1.1976 + * @param srcLength the number of characters in <TT>srcText</TT> in
1.1977 + * the insert string
1.1978 + * @return a reference to this
1.1979 + * @stable ICU 2.0
1.1980 + */
1.1981 + inline UnicodeString& insert(int32_t start,
1.1982 + const UnicodeString& srcText,
1.1983 + int32_t srcStart,
1.1984 + int32_t srcLength);
1.1985 +
1.1986 + /**
1.1987 + * Insert the characters in <TT>srcText</TT> into the UnicodeString object
1.1988 + * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
1.1989 + * @param start the offset where the insertion begins
1.1990 + * @param srcText the source for the new characters
1.1991 + * @return a reference to this
1.1992 + * @stable ICU 2.0
1.1993 + */
1.1994 + inline UnicodeString& insert(int32_t start,
1.1995 + const UnicodeString& srcText);
1.1996 +
1.1997 + /**
1.1998 + * Insert the characters in <TT>srcChars</TT> in the range
1.1999 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
1.2000 + * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
1.2001 + * @param start the offset at which the insertion begins
1.2002 + * @param srcChars the source for the new characters
1.2003 + * @param srcStart the offset into <TT>srcChars</TT> where new characters
1.2004 + * will be obtained
1.2005 + * @param srcLength the number of characters in <TT>srcChars</TT>
1.2006 + * in the insert string
1.2007 + * @return a reference to this
1.2008 + * @stable ICU 2.0
1.2009 + */
1.2010 + inline UnicodeString& insert(int32_t start,
1.2011 + const UChar *srcChars,
1.2012 + int32_t srcStart,
1.2013 + int32_t srcLength);
1.2014 +
1.2015 + /**
1.2016 + * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
1.2017 + * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
1.2018 + * @param start the offset where the insertion begins
1.2019 + * @param srcChars the source for the new characters
1.2020 + * @param srcLength the number of Unicode characters in srcChars.
1.2021 + * @return a reference to this
1.2022 + * @stable ICU 2.0
1.2023 + */
1.2024 + inline UnicodeString& insert(int32_t start,
1.2025 + const UChar *srcChars,
1.2026 + int32_t srcLength);
1.2027 +
1.2028 + /**
1.2029 + * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
1.2030 + * offset <TT>start</TT>.
1.2031 + * @param start the offset at which the insertion occurs
1.2032 + * @param srcChar the code unit to insert
1.2033 + * @return a reference to this
1.2034 + * @stable ICU 2.0
1.2035 + */
1.2036 + inline UnicodeString& insert(int32_t start,
1.2037 + UChar srcChar);
1.2038 +
1.2039 + /**
1.2040 + * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
1.2041 + * offset <TT>start</TT>.
1.2042 + * @param start the offset at which the insertion occurs
1.2043 + * @param srcChar the code point to insert
1.2044 + * @return a reference to this
1.2045 + * @stable ICU 2.0
1.2046 + */
1.2047 + inline UnicodeString& insert(int32_t start,
1.2048 + UChar32 srcChar);
1.2049 +
1.2050 +
1.2051 + /* Replace operations */
1.2052 +
1.2053 + /**
1.2054 + * Replace the characters in the range
1.2055 + * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
1.2056 + * <TT>srcText</TT> in the range
1.2057 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1.2058 + * <TT>srcText</TT> is not modified.
1.2059 + * @param start the offset at which the replace operation begins
1.2060 + * @param length the number of characters to replace. The character at
1.2061 + * <TT>start + length</TT> is not modified.
1.2062 + * @param srcText the source for the new characters
1.2063 + * @param srcStart the offset into <TT>srcText</TT> where new characters
1.2064 + * will be obtained
1.2065 + * @param srcLength the number of characters in <TT>srcText</TT> in
1.2066 + * the replace string
1.2067 + * @return a reference to this
1.2068 + * @stable ICU 2.0
1.2069 + */
1.2070 + UnicodeString& replace(int32_t start,
1.2071 + int32_t length,
1.2072 + const UnicodeString& srcText,
1.2073 + int32_t srcStart,
1.2074 + int32_t srcLength);
1.2075 +
1.2076 + /**
1.2077 + * Replace the characters in the range
1.2078 + * [<TT>start</TT>, <TT>start + length</TT>)
1.2079 + * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
1.2080 + * not modified.
1.2081 + * @param start the offset at which the replace operation begins
1.2082 + * @param length the number of characters to replace. The character at
1.2083 + * <TT>start + length</TT> is not modified.
1.2084 + * @param srcText the source for the new characters
1.2085 + * @return a reference to this
1.2086 + * @stable ICU 2.0
1.2087 + */
1.2088 + UnicodeString& replace(int32_t start,
1.2089 + int32_t length,
1.2090 + const UnicodeString& srcText);
1.2091 +
1.2092 + /**
1.2093 + * Replace the characters in the range
1.2094 + * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
1.2095 + * <TT>srcChars</TT> in the range
1.2096 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
1.2097 + * is not modified.
1.2098 + * @param start the offset at which the replace operation begins
1.2099 + * @param length the number of characters to replace. The character at
1.2100 + * <TT>start + length</TT> is not modified.
1.2101 + * @param srcChars the source for the new characters
1.2102 + * @param srcStart the offset into <TT>srcChars</TT> where new characters
1.2103 + * will be obtained
1.2104 + * @param srcLength the number of characters in <TT>srcChars</TT>
1.2105 + * in the replace string
1.2106 + * @return a reference to this
1.2107 + * @stable ICU 2.0
1.2108 + */
1.2109 + UnicodeString& replace(int32_t start,
1.2110 + int32_t length,
1.2111 + const UChar *srcChars,
1.2112 + int32_t srcStart,
1.2113 + int32_t srcLength);
1.2114 +
1.2115 + /**
1.2116 + * Replace the characters in the range
1.2117 + * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
1.2118 + * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
1.2119 + * @param start the offset at which the replace operation begins
1.2120 + * @param length number of characters to replace. The character at
1.2121 + * <TT>start + length</TT> is not modified.
1.2122 + * @param srcChars the source for the new characters
1.2123 + * @param srcLength the number of Unicode characters in srcChars
1.2124 + * @return a reference to this
1.2125 + * @stable ICU 2.0
1.2126 + */
1.2127 + inline UnicodeString& replace(int32_t start,
1.2128 + int32_t length,
1.2129 + const UChar *srcChars,
1.2130 + int32_t srcLength);
1.2131 +
1.2132 + /**
1.2133 + * Replace the characters in the range
1.2134 + * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
1.2135 + * <TT>srcChar</TT>.
1.2136 + * @param start the offset at which the replace operation begins
1.2137 + * @param length the number of characters to replace. The character at
1.2138 + * <TT>start + length</TT> is not modified.
1.2139 + * @param srcChar the new code unit
1.2140 + * @return a reference to this
1.2141 + * @stable ICU 2.0
1.2142 + */
1.2143 + inline UnicodeString& replace(int32_t start,
1.2144 + int32_t length,
1.2145 + UChar srcChar);
1.2146 +
1.2147 + /**
1.2148 + * Replace the characters in the range
1.2149 + * [<TT>start</TT>, <TT>start + length</TT>) with the code point
1.2150 + * <TT>srcChar</TT>.
1.2151 + * @param start the offset at which the replace operation begins
1.2152 + * @param length the number of characters to replace. The character at
1.2153 + * <TT>start + length</TT> is not modified.
1.2154 + * @param srcChar the new code point
1.2155 + * @return a reference to this
1.2156 + * @stable ICU 2.0
1.2157 + */
1.2158 + inline UnicodeString& replace(int32_t start,
1.2159 + int32_t length,
1.2160 + UChar32 srcChar);
1.2161 +
1.2162 + /**
1.2163 + * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
1.2164 + * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
1.2165 + * @param start the offset at which the replace operation begins
1.2166 + * @param limit the offset immediately following the replace range
1.2167 + * @param srcText the source for the new characters
1.2168 + * @return a reference to this
1.2169 + * @stable ICU 2.0
1.2170 + */
1.2171 + inline UnicodeString& replaceBetween(int32_t start,
1.2172 + int32_t limit,
1.2173 + const UnicodeString& srcText);
1.2174 +
1.2175 + /**
1.2176 + * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
1.2177 + * with the characters in <TT>srcText</TT> in the range
1.2178 + * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
1.2179 + * @param start the offset at which the replace operation begins
1.2180 + * @param limit the offset immediately following the replace range
1.2181 + * @param srcText the source for the new characters
1.2182 + * @param srcStart the offset into <TT>srcChars</TT> where new characters
1.2183 + * will be obtained
1.2184 + * @param srcLimit the offset immediately following the range to copy
1.2185 + * in <TT>srcText</TT>
1.2186 + * @return a reference to this
1.2187 + * @stable ICU 2.0
1.2188 + */
1.2189 + inline UnicodeString& replaceBetween(int32_t start,
1.2190 + int32_t limit,
1.2191 + const UnicodeString& srcText,
1.2192 + int32_t srcStart,
1.2193 + int32_t srcLimit);
1.2194 +
1.2195 + /**
1.2196 + * Replace a substring of this object with the given text.
1.2197 + * @param start the beginning index, inclusive; <code>0 <= start
1.2198 + * <= limit</code>.
1.2199 + * @param limit the ending index, exclusive; <code>start <= limit
1.2200 + * <= length()</code>.
1.2201 + * @param text the text to replace characters <code>start</code>
1.2202 + * to <code>limit - 1</code>
1.2203 + * @stable ICU 2.0
1.2204 + */
1.2205 + virtual void handleReplaceBetween(int32_t start,
1.2206 + int32_t limit,
1.2207 + const UnicodeString& text);
1.2208 +
1.2209 + /**
1.2210 + * Replaceable API
1.2211 + * @return TRUE if it has MetaData
1.2212 + * @stable ICU 2.4
1.2213 + */
1.2214 + virtual UBool hasMetaData() const;
1.2215 +
1.2216 + /**
1.2217 + * Copy a substring of this object, retaining attribute (out-of-band)
1.2218 + * information. This method is used to duplicate or reorder substrings.
1.2219 + * The destination index must not overlap the source range.
1.2220 + *
1.2221 + * @param start the beginning index, inclusive; <code>0 <= start <=
1.2222 + * limit</code>.
1.2223 + * @param limit the ending index, exclusive; <code>start <= limit <=
1.2224 + * length()</code>.
1.2225 + * @param dest the destination index. The characters from
1.2226 + * <code>start..limit-1</code> will be copied to <code>dest</code>.
1.2227 + * Implementations of this method may assume that <code>dest <= start ||
1.2228 + * dest >= limit</code>.
1.2229 + * @stable ICU 2.0
1.2230 + */
1.2231 + virtual void copy(int32_t start, int32_t limit, int32_t dest);
1.2232 +
1.2233 + /* Search and replace operations */
1.2234 +
1.2235 + /**
1.2236 + * Replace all occurrences of characters in oldText with the characters
1.2237 + * in newText
1.2238 + * @param oldText the text containing the search text
1.2239 + * @param newText the text containing the replacement text
1.2240 + * @return a reference to this
1.2241 + * @stable ICU 2.0
1.2242 + */
1.2243 + inline UnicodeString& findAndReplace(const UnicodeString& oldText,
1.2244 + const UnicodeString& newText);
1.2245 +
1.2246 + /**
1.2247 + * Replace all occurrences of characters in oldText with characters
1.2248 + * in newText
1.2249 + * in the range [<TT>start</TT>, <TT>start + length</TT>).
1.2250 + * @param start the start of the range in which replace will performed
1.2251 + * @param length the length of the range in which replace will be performed
1.2252 + * @param oldText the text containing the search text
1.2253 + * @param newText the text containing the replacement text
1.2254 + * @return a reference to this
1.2255 + * @stable ICU 2.0
1.2256 + */
1.2257 + inline UnicodeString& findAndReplace(int32_t start,
1.2258 + int32_t length,
1.2259 + const UnicodeString& oldText,
1.2260 + const UnicodeString& newText);
1.2261 +
1.2262 + /**
1.2263 + * Replace all occurrences of characters in oldText in the range
1.2264 + * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
1.2265 + * in newText in the range
1.2266 + * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
1.2267 + * in the range [<TT>start</TT>, <TT>start + length</TT>).
1.2268 + * @param start the start of the range in which replace will performed
1.2269 + * @param length the length of the range in which replace will be performed
1.2270 + * @param oldText the text containing the search text
1.2271 + * @param oldStart the start of the search range in <TT>oldText</TT>
1.2272 + * @param oldLength the length of the search range in <TT>oldText</TT>
1.2273 + * @param newText the text containing the replacement text
1.2274 + * @param newStart the start of the replacement range in <TT>newText</TT>
1.2275 + * @param newLength the length of the replacement range in <TT>newText</TT>
1.2276 + * @return a reference to this
1.2277 + * @stable ICU 2.0
1.2278 + */
1.2279 + UnicodeString& findAndReplace(int32_t start,
1.2280 + int32_t length,
1.2281 + const UnicodeString& oldText,
1.2282 + int32_t oldStart,
1.2283 + int32_t oldLength,
1.2284 + const UnicodeString& newText,
1.2285 + int32_t newStart,
1.2286 + int32_t newLength);
1.2287 +
1.2288 +
1.2289 + /* Remove operations */
1.2290 +
1.2291 + /**
1.2292 + * Remove all characters from the UnicodeString object.
1.2293 + * @return a reference to this
1.2294 + * @stable ICU 2.0
1.2295 + */
1.2296 + inline UnicodeString& remove(void);
1.2297 +
1.2298 + /**
1.2299 + * Remove the characters in the range
1.2300 + * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
1.2301 + * @param start the offset of the first character to remove
1.2302 + * @param length the number of characters to remove
1.2303 + * @return a reference to this
1.2304 + * @stable ICU 2.0
1.2305 + */
1.2306 + inline UnicodeString& remove(int32_t start,
1.2307 + int32_t length = (int32_t)INT32_MAX);
1.2308 +
1.2309 + /**
1.2310 + * Remove the characters in the range
1.2311 + * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
1.2312 + * @param start the offset of the first character to remove
1.2313 + * @param limit the offset immediately following the range to remove
1.2314 + * @return a reference to this
1.2315 + * @stable ICU 2.0
1.2316 + */
1.2317 + inline UnicodeString& removeBetween(int32_t start,
1.2318 + int32_t limit = (int32_t)INT32_MAX);
1.2319 +
1.2320 +
1.2321 + /* Length operations */
1.2322 +
1.2323 + /**
1.2324 + * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
1.2325 + * If the length of this UnicodeString is less than targetLength,
1.2326 + * length() - targetLength copies of padChar will be added to the
1.2327 + * beginning of this UnicodeString.
1.2328 + * @param targetLength the desired length of the string
1.2329 + * @param padChar the character to use for padding. Defaults to
1.2330 + * space (U+0020)
1.2331 + * @return TRUE if the text was padded, FALSE otherwise.
1.2332 + * @stable ICU 2.0
1.2333 + */
1.2334 + UBool padLeading(int32_t targetLength,
1.2335 + UChar padChar = 0x0020);
1.2336 +
1.2337 + /**
1.2338 + * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
1.2339 + * If the length of this UnicodeString is less than targetLength,
1.2340 + * length() - targetLength copies of padChar will be added to the
1.2341 + * end of this UnicodeString.
1.2342 + * @param targetLength the desired length of the string
1.2343 + * @param padChar the character to use for padding. Defaults to
1.2344 + * space (U+0020)
1.2345 + * @return TRUE if the text was padded, FALSE otherwise.
1.2346 + * @stable ICU 2.0
1.2347 + */
1.2348 + UBool padTrailing(int32_t targetLength,
1.2349 + UChar padChar = 0x0020);
1.2350 +
1.2351 + /**
1.2352 + * Truncate this UnicodeString to the <TT>targetLength</TT>.
1.2353 + * @param targetLength the desired length of this UnicodeString.
1.2354 + * @return TRUE if the text was truncated, FALSE otherwise
1.2355 + * @stable ICU 2.0
1.2356 + */
1.2357 + inline UBool truncate(int32_t targetLength);
1.2358 +
1.2359 + /**
1.2360 + * Trims leading and trailing whitespace from this UnicodeString.
1.2361 + * @return a reference to this
1.2362 + * @stable ICU 2.0
1.2363 + */
1.2364 + UnicodeString& trim(void);
1.2365 +
1.2366 +
1.2367 + /* Miscellaneous operations */
1.2368 +
1.2369 + /**
1.2370 + * Reverse this UnicodeString in place.
1.2371 + * @return a reference to this
1.2372 + * @stable ICU 2.0
1.2373 + */
1.2374 + inline UnicodeString& reverse(void);
1.2375 +
1.2376 + /**
1.2377 + * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
1.2378 + * this UnicodeString.
1.2379 + * @param start the start of the range to reverse
1.2380 + * @param length the number of characters to to reverse
1.2381 + * @return a reference to this
1.2382 + * @stable ICU 2.0
1.2383 + */
1.2384 + inline UnicodeString& reverse(int32_t start,
1.2385 + int32_t length);
1.2386 +
1.2387 + /**
1.2388 + * Convert the characters in this to UPPER CASE following the conventions of
1.2389 + * the default locale.
1.2390 + * @return A reference to this.
1.2391 + * @stable ICU 2.0
1.2392 + */
1.2393 + UnicodeString& toUpper(void);
1.2394 +
1.2395 + /**
1.2396 + * Convert the characters in this to UPPER CASE following the conventions of
1.2397 + * a specific locale.
1.2398 + * @param locale The locale containing the conventions to use.
1.2399 + * @return A reference to this.
1.2400 + * @stable ICU 2.0
1.2401 + */
1.2402 + UnicodeString& toUpper(const Locale& locale);
1.2403 +
1.2404 + /**
1.2405 + * Convert the characters in this to lower case following the conventions of
1.2406 + * the default locale.
1.2407 + * @return A reference to this.
1.2408 + * @stable ICU 2.0
1.2409 + */
1.2410 + UnicodeString& toLower(void);
1.2411 +
1.2412 + /**
1.2413 + * Convert the characters in this to lower case following the conventions of
1.2414 + * a specific locale.
1.2415 + * @param locale The locale containing the conventions to use.
1.2416 + * @return A reference to this.
1.2417 + * @stable ICU 2.0
1.2418 + */
1.2419 + UnicodeString& toLower(const Locale& locale);
1.2420 +
1.2421 +#if !UCONFIG_NO_BREAK_ITERATION
1.2422 +
1.2423 + /**
1.2424 + * Titlecase this string, convenience function using the default locale.
1.2425 + *
1.2426 + * Casing is locale-dependent and context-sensitive.
1.2427 + * Titlecasing uses a break iterator to find the first characters of words
1.2428 + * that are to be titlecased. It titlecases those characters and lowercases
1.2429 + * all others.
1.2430 + *
1.2431 + * The titlecase break iterator can be provided to customize for arbitrary
1.2432 + * styles, using rules and dictionaries beyond the standard iterators.
1.2433 + * It may be more efficient to always provide an iterator to avoid
1.2434 + * opening and closing one for each string.
1.2435 + * The standard titlecase iterator for the root locale implements the
1.2436 + * algorithm of Unicode TR 21.
1.2437 + *
1.2438 + * This function uses only the first() and next() methods of the
1.2439 + * provided break iterator.
1.2440 + *
1.2441 + * @param titleIter A break iterator to find the first characters of words
1.2442 + * that are to be titlecased.
1.2443 + * If none is provided (0), then a standard titlecase
1.2444 + * break iterator is opened.
1.2445 + * Otherwise the provided iterator is set to the string's text.
1.2446 + * @return A reference to this.
1.2447 + * @stable ICU 2.1
1.2448 + */
1.2449 + UnicodeString &toTitle(BreakIterator *titleIter);
1.2450 +
1.2451 + /**
1.2452 + * Titlecase this string.
1.2453 + *
1.2454 + * Casing is locale-dependent and context-sensitive.
1.2455 + * Titlecasing uses a break iterator to find the first characters of words
1.2456 + * that are to be titlecased. It titlecases those characters and lowercases
1.2457 + * all others.
1.2458 + *
1.2459 + * The titlecase break iterator can be provided to customize for arbitrary
1.2460 + * styles, using rules and dictionaries beyond the standard iterators.
1.2461 + * It may be more efficient to always provide an iterator to avoid
1.2462 + * opening and closing one for each string.
1.2463 + * The standard titlecase iterator for the root locale implements the
1.2464 + * algorithm of Unicode TR 21.
1.2465 + *
1.2466 + * This function uses only the first() and next() methods of the
1.2467 + * provided break iterator.
1.2468 + *
1.2469 + * @param titleIter A break iterator to find the first characters of words
1.2470 + * that are to be titlecased.
1.2471 + * If none is provided (0), then a standard titlecase
1.2472 + * break iterator is opened.
1.2473 + * Otherwise the provided iterator is set to the string's text.
1.2474 + * @param locale The locale to consider.
1.2475 + * @return A reference to this.
1.2476 + * @stable ICU 2.1
1.2477 + */
1.2478 + UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
1.2479 +
1.2480 +#endif
1.2481 +
1.2482 + /**
1.2483 + * Case-fold the characters in this string.
1.2484 + * Case-folding is locale-independent and not context-sensitive,
1.2485 + * but there is an option for whether to include or exclude mappings for dotted I
1.2486 + * and dotless i that are marked with 'I' in CaseFolding.txt.
1.2487 + * The result may be longer or shorter than the original.
1.2488 + *
1.2489 + * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
1.2490 + * @return A reference to this.
1.2491 + * @stable ICU 2.0
1.2492 + */
1.2493 + UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
1.2494 +
1.2495 + //========================================
1.2496 + // Access to the internal buffer
1.2497 + //========================================
1.2498 +
1.2499 + /**
1.2500 + * Get a read/write pointer to the internal buffer.
1.2501 + * The buffer is guaranteed to be large enough for at least minCapacity UChars,
1.2502 + * writable, and is still owned by the UnicodeString object.
1.2503 + * Calls to getBuffer(minCapacity) must not be nested, and
1.2504 + * must be matched with calls to releaseBuffer(newLength).
1.2505 + * If the string buffer was read-only or shared,
1.2506 + * then it will be reallocated and copied.
1.2507 + *
1.2508 + * An attempted nested call will return 0, and will not further modify the
1.2509 + * state of the UnicodeString object.
1.2510 + * It also returns 0 if the string is bogus.
1.2511 + *
1.2512 + * The actual capacity of the string buffer may be larger than minCapacity.
1.2513 + * getCapacity() returns the actual capacity.
1.2514 + * For many operations, the full capacity should be used to avoid reallocations.
1.2515 + *
1.2516 + * While the buffer is "open" between getBuffer(minCapacity)
1.2517 + * and releaseBuffer(newLength), the following applies:
1.2518 + * - The string length is set to 0.
1.2519 + * - Any read API call on the UnicodeString object will behave like on a 0-length string.
1.2520 + * - Any write API call on the UnicodeString object is disallowed and will have no effect.
1.2521 + * - You can read from and write to the returned buffer.
1.2522 + * - The previous string contents will still be in the buffer;
1.2523 + * if you want to use it, then you need to call length() before getBuffer(minCapacity).
1.2524 + * If the length() was greater than minCapacity, then any contents after minCapacity
1.2525 + * may be lost.
1.2526 + * The buffer contents is not NUL-terminated by getBuffer().
1.2527 + * If length()<getCapacity() then you can terminate it by writing a NUL
1.2528 + * at index length().
1.2529 + * - You must call releaseBuffer(newLength) before and in order to
1.2530 + * return to normal UnicodeString operation.
1.2531 + *
1.2532 + * @param minCapacity the minimum number of UChars that are to be available
1.2533 + * in the buffer, starting at the returned pointer;
1.2534 + * default to the current string capacity if minCapacity==-1
1.2535 + * @return a writable pointer to the internal string buffer,
1.2536 + * or 0 if an error occurs (nested calls, out of memory)
1.2537 + *
1.2538 + * @see releaseBuffer
1.2539 + * @see getTerminatedBuffer()
1.2540 + * @stable ICU 2.0
1.2541 + */
1.2542 + UChar *getBuffer(int32_t minCapacity);
1.2543 +
1.2544 + /**
1.2545 + * Release a read/write buffer on a UnicodeString object with an
1.2546 + * "open" getBuffer(minCapacity).
1.2547 + * This function must be called in a matched pair with getBuffer(minCapacity).
1.2548 + * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
1.2549 + *
1.2550 + * It will set the string length to newLength, at most to the current capacity.
1.2551 + * If newLength==-1 then it will set the length according to the
1.2552 + * first NUL in the buffer, or to the capacity if there is no NUL.
1.2553 + *
1.2554 + * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
1.2555 + *
1.2556 + * @param newLength the new length of the UnicodeString object;
1.2557 + * defaults to the current capacity if newLength is greater than that;
1.2558 + * if newLength==-1, it defaults to u_strlen(buffer) but not more than
1.2559 + * the current capacity of the string
1.2560 + *
1.2561 + * @see getBuffer(int32_t minCapacity)
1.2562 + * @stable ICU 2.0
1.2563 + */
1.2564 + void releaseBuffer(int32_t newLength=-1);
1.2565 +
1.2566 + /**
1.2567 + * Get a read-only pointer to the internal buffer.
1.2568 + * This can be called at any time on a valid UnicodeString.
1.2569 + *
1.2570 + * It returns 0 if the string is bogus, or
1.2571 + * during an "open" getBuffer(minCapacity).
1.2572 + *
1.2573 + * It can be called as many times as desired.
1.2574 + * The pointer that it returns will remain valid until the UnicodeString object is modified,
1.2575 + * at which time the pointer is semantically invalidated and must not be used any more.
1.2576 + *
1.2577 + * The capacity of the buffer can be determined with getCapacity().
1.2578 + * The part after length() may or may not be initialized and valid,
1.2579 + * depending on the history of the UnicodeString object.
1.2580 + *
1.2581 + * The buffer contents is (probably) not NUL-terminated.
1.2582 + * You can check if it is with
1.2583 + * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
1.2584 + * (See getTerminatedBuffer().)
1.2585 + *
1.2586 + * The buffer may reside in read-only memory. Its contents must not
1.2587 + * be modified.
1.2588 + *
1.2589 + * @return a read-only pointer to the internal string buffer,
1.2590 + * or 0 if the string is empty or bogus
1.2591 + *
1.2592 + * @see getBuffer(int32_t minCapacity)
1.2593 + * @see getTerminatedBuffer()
1.2594 + * @stable ICU 2.0
1.2595 + */
1.2596 + inline const UChar *getBuffer() const;
1.2597 +
1.2598 + /**
1.2599 + * Get a read-only pointer to the internal buffer,
1.2600 + * making sure that it is NUL-terminated.
1.2601 + * This can be called at any time on a valid UnicodeString.
1.2602 + *
1.2603 + * It returns 0 if the string is bogus, or
1.2604 + * during an "open" getBuffer(minCapacity), or if the buffer cannot
1.2605 + * be NUL-terminated (because memory allocation failed).
1.2606 + *
1.2607 + * It can be called as many times as desired.
1.2608 + * The pointer that it returns will remain valid until the UnicodeString object is modified,
1.2609 + * at which time the pointer is semantically invalidated and must not be used any more.
1.2610 + *
1.2611 + * The capacity of the buffer can be determined with getCapacity().
1.2612 + * The part after length()+1 may or may not be initialized and valid,
1.2613 + * depending on the history of the UnicodeString object.
1.2614 + *
1.2615 + * The buffer contents is guaranteed to be NUL-terminated.
1.2616 + * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
1.2617 + * is written.
1.2618 + * For this reason, this function is not const, unlike getBuffer().
1.2619 + * Note that a UnicodeString may also contain NUL characters as part of its contents.
1.2620 + *
1.2621 + * The buffer may reside in read-only memory. Its contents must not
1.2622 + * be modified.
1.2623 + *
1.2624 + * @return a read-only pointer to the internal string buffer,
1.2625 + * or 0 if the string is empty or bogus
1.2626 + *
1.2627 + * @see getBuffer(int32_t minCapacity)
1.2628 + * @see getBuffer()
1.2629 + * @stable ICU 2.2
1.2630 + */
1.2631 + inline const UChar *getTerminatedBuffer();
1.2632 +
1.2633 + //========================================
1.2634 + // Constructors
1.2635 + //========================================
1.2636 +
1.2637 + /** Construct an empty UnicodeString.
1.2638 + * @stable ICU 2.0
1.2639 + */
1.2640 + UnicodeString();
1.2641 +
1.2642 + /**
1.2643 + * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
1.2644 + * @param capacity the number of UChars this UnicodeString should hold
1.2645 + * before a resize is necessary; if count is greater than 0 and count
1.2646 + * code points c take up more space than capacity, then capacity is adjusted
1.2647 + * accordingly.
1.2648 + * @param c is used to initially fill the string
1.2649 + * @param count specifies how many code points c are to be written in the
1.2650 + * string
1.2651 + * @stable ICU 2.0
1.2652 + */
1.2653 + UnicodeString(int32_t capacity, UChar32 c, int32_t count);
1.2654 +
1.2655 + /**
1.2656 + * Single UChar (code unit) constructor.
1.2657 + * @param ch the character to place in the UnicodeString
1.2658 + * @stable ICU 2.0
1.2659 + */
1.2660 + UnicodeString(UChar ch);
1.2661 +
1.2662 + /**
1.2663 + * Single UChar32 (code point) constructor.
1.2664 + * @param ch the character to place in the UnicodeString
1.2665 + * @stable ICU 2.0
1.2666 + */
1.2667 + UnicodeString(UChar32 ch);
1.2668 +
1.2669 + /**
1.2670 + * UChar* constructor.
1.2671 + * @param text The characters to place in the UnicodeString. <TT>text</TT>
1.2672 + * must be NULL (U+0000) terminated.
1.2673 + * @stable ICU 2.0
1.2674 + */
1.2675 + UnicodeString(const UChar *text);
1.2676 +
1.2677 + /**
1.2678 + * UChar* constructor.
1.2679 + * @param text The characters to place in the UnicodeString.
1.2680 + * @param textLength The number of Unicode characters in <TT>text</TT>
1.2681 + * to copy.
1.2682 + * @stable ICU 2.0
1.2683 + */
1.2684 + UnicodeString(const UChar *text,
1.2685 + int32_t textLength);
1.2686 +
1.2687 + /**
1.2688 + * Readonly-aliasing UChar* constructor.
1.2689 + * The text will be used for the UnicodeString object, but
1.2690 + * it will not be released when the UnicodeString is destroyed.
1.2691 + * This has copy-on-write semantics:
1.2692 + * When the string is modified, then the buffer is first copied into
1.2693 + * newly allocated memory.
1.2694 + * The aliased buffer is never modified.
1.2695 + * In an assignment to another UnicodeString, the text will be aliased again,
1.2696 + * so that both strings then alias the same readonly-text.
1.2697 + *
1.2698 + * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
1.2699 + * This must be true if <code>textLength==-1</code>.
1.2700 + * @param text The characters to alias for the UnicodeString.
1.2701 + * @param textLength The number of Unicode characters in <code>text</code> to alias.
1.2702 + * If -1, then this constructor will determine the length
1.2703 + * by calling <code>u_strlen()</code>.
1.2704 + * @stable ICU 2.0
1.2705 + */
1.2706 + UnicodeString(UBool isTerminated,
1.2707 + const UChar *text,
1.2708 + int32_t textLength);
1.2709 +
1.2710 + /**
1.2711 + * Writable-aliasing UChar* constructor.
1.2712 + * The text will be used for the UnicodeString object, but
1.2713 + * it will not be released when the UnicodeString is destroyed.
1.2714 + * This has write-through semantics:
1.2715 + * For as long as the capacity of the buffer is sufficient, write operations
1.2716 + * will directly affect the buffer. When more capacity is necessary, then
1.2717 + * a new buffer will be allocated and the contents copied as with regularly
1.2718 + * constructed strings.
1.2719 + * In an assignment to another UnicodeString, the buffer will be copied.
1.2720 + * The extract(UChar *dst) function detects whether the dst pointer is the same
1.2721 + * as the string buffer itself and will in this case not copy the contents.
1.2722 + *
1.2723 + * @param buffer The characters to alias for the UnicodeString.
1.2724 + * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
1.2725 + * @param buffCapacity The size of <code>buffer</code> in UChars.
1.2726 + * @stable ICU 2.0
1.2727 + */
1.2728 + UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
1.2729 +
1.2730 +#if !UCONFIG_NO_CONVERSION
1.2731 +
1.2732 + /**
1.2733 + * char* constructor.
1.2734 + * @param codepageData an array of bytes, null-terminated
1.2735 + * @param codepage the encoding of <TT>codepageData</TT>. The special
1.2736 + * value 0 for <TT>codepage</TT> indicates that the text is in the
1.2737 + * platform's default codepage.
1.2738 + *
1.2739 + * If <code>codepage</code> is an empty string (<code>""</code>),
1.2740 + * then a simple conversion is performed on the codepage-invariant
1.2741 + * subset ("invariant characters") of the platform encoding. See utypes.h.
1.2742 + * Recommendation: For invariant-character strings use the constructor
1.2743 + * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
1.2744 + * because it avoids object code dependencies of UnicodeString on
1.2745 + * the conversion code.
1.2746 + *
1.2747 + * @stable ICU 2.0
1.2748 + */
1.2749 + UnicodeString(const char *codepageData,
1.2750 + const char *codepage = 0);
1.2751 +
1.2752 + /**
1.2753 + * char* constructor.
1.2754 + * @param codepageData an array of bytes.
1.2755 + * @param dataLength The number of bytes in <TT>codepageData</TT>.
1.2756 + * @param codepage the encoding of <TT>codepageData</TT>. The special
1.2757 + * value 0 for <TT>codepage</TT> indicates that the text is in the
1.2758 + * platform's default codepage.
1.2759 + * If <code>codepage</code> is an empty string (<code>""</code>),
1.2760 + * then a simple conversion is performed on the codepage-invariant
1.2761 + * subset ("invariant characters") of the platform encoding. See utypes.h.
1.2762 + * Recommendation: For invariant-character strings use the constructor
1.2763 + * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
1.2764 + * because it avoids object code dependencies of UnicodeString on
1.2765 + * the conversion code.
1.2766 + *
1.2767 + * @stable ICU 2.0
1.2768 + */
1.2769 + UnicodeString(const char *codepageData,
1.2770 + int32_t dataLength,
1.2771 + const char *codepage = 0);
1.2772 +
1.2773 + /**
1.2774 + * char * / UConverter constructor.
1.2775 + * This constructor uses an existing UConverter object to
1.2776 + * convert the codepage string to Unicode and construct a UnicodeString
1.2777 + * from that.
1.2778 + *
1.2779 + * The converter is reset at first.
1.2780 + * If the error code indicates a failure before this constructor is called,
1.2781 + * or if an error occurs during conversion or construction,
1.2782 + * then the string will be bogus.
1.2783 + *
1.2784 + * This function avoids the overhead of opening and closing a converter if
1.2785 + * multiple strings are constructed.
1.2786 + *
1.2787 + * @param src input codepage string
1.2788 + * @param srcLength length of the input string, can be -1 for NUL-terminated strings
1.2789 + * @param cnv converter object (ucnv_resetToUnicode() will be called),
1.2790 + * can be NULL for the default converter
1.2791 + * @param errorCode normal ICU error code
1.2792 + * @stable ICU 2.0
1.2793 + */
1.2794 + UnicodeString(
1.2795 + const char *src, int32_t srcLength,
1.2796 + UConverter *cnv,
1.2797 + UErrorCode &errorCode);
1.2798 +
1.2799 +#endif
1.2800 +
1.2801 + /**
1.2802 + * Constructs a Unicode string from an invariant-character char * string.
1.2803 + * About invariant characters see utypes.h.
1.2804 + * This constructor has no runtime dependency on conversion code and is
1.2805 + * therefore recommended over ones taking a charset name string
1.2806 + * (where the empty string "" indicates invariant-character conversion).
1.2807 + *
1.2808 + * Use the macro US_INV as the third, signature-distinguishing parameter.
1.2809 + *
1.2810 + * For example:
1.2811 + * \code
1.2812 + * void fn(const char *s) {
1.2813 + * UnicodeString ustr(s, -1, US_INV);
1.2814 + * // use ustr ...
1.2815 + * }
1.2816 + * \endcode
1.2817 + *
1.2818 + * @param src String using only invariant characters.
1.2819 + * @param length Length of src, or -1 if NUL-terminated.
1.2820 + * @param inv Signature-distinguishing paramater, use US_INV.
1.2821 + *
1.2822 + * @see US_INV
1.2823 + * @draft ICU 3.2
1.2824 + */
1.2825 + UnicodeString(const char *src, int32_t length, enum EInvariant inv);
1.2826 +
1.2827 +
1.2828 + /**
1.2829 + * Copy constructor.
1.2830 + * @param that The UnicodeString object to copy.
1.2831 + * @stable ICU 2.0
1.2832 + */
1.2833 + UnicodeString(const UnicodeString& that);
1.2834 +
1.2835 + /**
1.2836 + * 'Substring' constructor from tail of source string.
1.2837 + * @param src The UnicodeString object to copy.
1.2838 + * @param srcStart The offset into <tt>src</tt> at which to start copying.
1.2839 + * @stable ICU 2.2
1.2840 + */
1.2841 + UnicodeString(const UnicodeString& src, int32_t srcStart);
1.2842 +
1.2843 + /**
1.2844 + * 'Substring' constructor from subrange of source string.
1.2845 + * @param src The UnicodeString object to copy.
1.2846 + * @param srcStart The offset into <tt>src</tt> at which to start copying.
1.2847 + * @param srcLength The number of characters from <tt>src</tt> to copy.
1.2848 + * @stable ICU 2.2
1.2849 + */
1.2850 + UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
1.2851 +
1.2852 + /**
1.2853 + * Clone this object, an instance of a subclass of Replaceable.
1.2854 + * Clones can be used concurrently in multiple threads.
1.2855 + * If a subclass does not implement clone(), or if an error occurs,
1.2856 + * then NULL is returned.
1.2857 + * The clone functions in all subclasses return a pointer to a Replaceable
1.2858 + * because some compilers do not support covariant (same-as-this)
1.2859 + * return types; cast to the appropriate subclass if necessary.
1.2860 + * The caller must delete the clone.
1.2861 + *
1.2862 + * @return a clone of this object
1.2863 + *
1.2864 + * @see Replaceable::clone
1.2865 + * @see getDynamicClassID
1.2866 + * @stable ICU 2.6
1.2867 + */
1.2868 + virtual Replaceable *clone() const;
1.2869 +
1.2870 + /** Destructor.
1.2871 + * @stable ICU 2.0
1.2872 + */
1.2873 + virtual ~UnicodeString();
1.2874 +
1.2875 +
1.2876 + /* Miscellaneous operations */
1.2877 +
1.2878 + /**
1.2879 + * Unescape a string of characters and return a string containing
1.2880 + * the result. The following escape sequences are recognized:
1.2881 + *
1.2882 + * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
1.2883 + * \\Uhhhhhhhh 8 hex digits
1.2884 + * \\xhh 1-2 hex digits
1.2885 + * \\ooo 1-3 octal digits; o in [0-7]
1.2886 + * \\cX control-X; X is masked with 0x1F
1.2887 + *
1.2888 + * as well as the standard ANSI C escapes:
1.2889 + *
1.2890 + * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
1.2891 + * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
1.2892 + * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
1.2893 + *
1.2894 + * Anything else following a backslash is generically escaped. For
1.2895 + * example, "[a\\-z]" returns "[a-z]".
1.2896 + *
1.2897 + * If an escape sequence is ill-formed, this method returns an empty
1.2898 + * string. An example of an ill-formed sequence is "\\u" followed by
1.2899 + * fewer than 4 hex digits.
1.2900 + *
1.2901 + * This function is similar to u_unescape() but not identical to it.
1.2902 + * The latter takes a source char*, so it does escape recognition
1.2903 + * and also invariant conversion.
1.2904 + *
1.2905 + * @return a string with backslash escapes interpreted, or an
1.2906 + * empty string on error.
1.2907 + * @see UnicodeString#unescapeAt()
1.2908 + * @see u_unescape()
1.2909 + * @see u_unescapeAt()
1.2910 + * @stable ICU 2.0
1.2911 + */
1.2912 + UnicodeString unescape() const;
1.2913 +
1.2914 + /**
1.2915 + * Unescape a single escape sequence and return the represented
1.2916 + * character. See unescape() for a listing of the recognized escape
1.2917 + * sequences. The character at offset-1 is assumed (without
1.2918 + * checking) to be a backslash. If the escape sequence is
1.2919 + * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
1.2920 + * returned.
1.2921 + *
1.2922 + * @param offset an input output parameter. On input, it is the
1.2923 + * offset into this string where the escape sequence is located,
1.2924 + * after the initial backslash. On output, it is advanced after the
1.2925 + * last character parsed. On error, it is not advanced at all.
1.2926 + * @return the character represented by the escape sequence at
1.2927 + * offset, or (UChar32)0xFFFFFFFF on error.
1.2928 + * @see UnicodeString#unescape()
1.2929 + * @see u_unescape()
1.2930 + * @see u_unescapeAt()
1.2931 + * @stable ICU 2.0
1.2932 + */
1.2933 + UChar32 unescapeAt(int32_t &offset) const;
1.2934 +
1.2935 + /**
1.2936 + * ICU "poor man's RTTI", returns a UClassID for this class.
1.2937 + *
1.2938 + * @stable ICU 2.2
1.2939 + */
1.2940 + static UClassID U_EXPORT2 getStaticClassID();
1.2941 +
1.2942 + /**
1.2943 + * ICU "poor man's RTTI", returns a UClassID for the actual class.
1.2944 + *
1.2945 + * @stable ICU 2.2
1.2946 + */
1.2947 + virtual UClassID getDynamicClassID() const;
1.2948 +
1.2949 + //========================================
1.2950 + // Implementation methods
1.2951 + //========================================
1.2952 +
1.2953 +protected:
1.2954 + /**
1.2955 + * Implement Replaceable::getLength() (see jitterbug 1027).
1.2956 + * @stable ICU 2.4
1.2957 + */
1.2958 + virtual int32_t getLength() const;
1.2959 +
1.2960 + /**
1.2961 + * The change in Replaceable to use virtual getCharAt() allows
1.2962 + * UnicodeString::charAt() to be inline again (see jitterbug 709).
1.2963 + * @stable ICU 2.4
1.2964 + */
1.2965 + virtual UChar getCharAt(int32_t offset) const;
1.2966 +
1.2967 + /**
1.2968 + * The change in Replaceable to use virtual getChar32At() allows
1.2969 + * UnicodeString::char32At() to be inline again (see jitterbug 709).
1.2970 + * @stable ICU 2.4
1.2971 + */
1.2972 + virtual UChar32 getChar32At(int32_t offset) const;
1.2973 +
1.2974 +private:
1.2975 +
1.2976 + inline int8_t
1.2977 + doCompare(int32_t start,
1.2978 + int32_t length,
1.2979 + const UnicodeString& srcText,
1.2980 + int32_t srcStart,
1.2981 + int32_t srcLength) const;
1.2982 +
1.2983 + int8_t doCompare(int32_t start,
1.2984 + int32_t length,
1.2985 + const UChar *srcChars,
1.2986 + int32_t srcStart,
1.2987 + int32_t srcLength) const;
1.2988 +
1.2989 + inline int8_t
1.2990 + doCompareCodePointOrder(int32_t start,
1.2991 + int32_t length,
1.2992 + const UnicodeString& srcText,
1.2993 + int32_t srcStart,
1.2994 + int32_t srcLength) const;
1.2995 +
1.2996 + int8_t doCompareCodePointOrder(int32_t start,
1.2997 + int32_t length,
1.2998 + const UChar *srcChars,
1.2999 + int32_t srcStart,
1.3000 + int32_t srcLength) const;
1.3001 +
1.3002 + inline int8_t
1.3003 + doCaseCompare(int32_t start,
1.3004 + int32_t length,
1.3005 + const UnicodeString &srcText,
1.3006 + int32_t srcStart,
1.3007 + int32_t srcLength,
1.3008 + uint32_t options) const;
1.3009 +
1.3010 + int8_t
1.3011 + doCaseCompare(int32_t start,
1.3012 + int32_t length,
1.3013 + const UChar *srcChars,
1.3014 + int32_t srcStart,
1.3015 + int32_t srcLength,
1.3016 + uint32_t options) const;
1.3017 +
1.3018 + int32_t doIndexOf(UChar c,
1.3019 + int32_t start,
1.3020 + int32_t length) const;
1.3021 +
1.3022 + int32_t doIndexOf(UChar32 c,
1.3023 + int32_t start,
1.3024 + int32_t length) const;
1.3025 +
1.3026 + int32_t doLastIndexOf(UChar c,
1.3027 + int32_t start,
1.3028 + int32_t length) const;
1.3029 +
1.3030 + int32_t doLastIndexOf(UChar32 c,
1.3031 + int32_t start,
1.3032 + int32_t length) const;
1.3033 +
1.3034 + void doExtract(int32_t start,
1.3035 + int32_t length,
1.3036 + UChar *dst,
1.3037 + int32_t dstStart) const;
1.3038 +
1.3039 + inline void doExtract(int32_t start,
1.3040 + int32_t length,
1.3041 + UnicodeString& target) const;
1.3042 +
1.3043 + inline UChar doCharAt(int32_t offset) const;
1.3044 +
1.3045 + UnicodeString& doReplace(int32_t start,
1.3046 + int32_t length,
1.3047 + const UnicodeString& srcText,
1.3048 + int32_t srcStart,
1.3049 + int32_t srcLength);
1.3050 +
1.3051 + UnicodeString& doReplace(int32_t start,
1.3052 + int32_t length,
1.3053 + const UChar *srcChars,
1.3054 + int32_t srcStart,
1.3055 + int32_t srcLength);
1.3056 +
1.3057 + UnicodeString& doReverse(int32_t start,
1.3058 + int32_t length);
1.3059 +
1.3060 + // calculate hash code
1.3061 + int32_t doHashCode(void) const;
1.3062 +
1.3063 + // get pointer to start of array
1.3064 + inline UChar* getArrayStart(void);
1.3065 + inline const UChar* getArrayStart(void) const;
1.3066 +
1.3067 + // allocate the array; result may be fStackBuffer
1.3068 + // sets refCount to 1 if appropriate
1.3069 + // sets fArray, fCapacity, and fFlags
1.3070 + // returns boolean for success or failure
1.3071 + UBool allocate(int32_t capacity);
1.3072 +
1.3073 + // release the array if owned
1.3074 + void releaseArray(void);
1.3075 +
1.3076 + // turn a bogus string into an empty one
1.3077 + void unBogus();
1.3078 +
1.3079 + // implements assigment operator, copy constructor, and fastCopyFrom()
1.3080 + UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE);
1.3081 +
1.3082 + // Pin start and limit to acceptable values.
1.3083 + inline void pinIndex(int32_t& start) const;
1.3084 + inline void pinIndices(int32_t& start,
1.3085 + int32_t& length) const;
1.3086 +
1.3087 +#if !UCONFIG_NO_CONVERSION
1.3088 +
1.3089 + /* Internal extract() using UConverter. */
1.3090 + int32_t doExtract(int32_t start, int32_t length,
1.3091 + char *dest, int32_t destCapacity,
1.3092 + UConverter *cnv,
1.3093 + UErrorCode &errorCode) const;
1.3094 +
1.3095 + /*
1.3096 + * Real constructor for converting from codepage data.
1.3097 + * It assumes that it is called with !fRefCounted.
1.3098 + *
1.3099 + * If <code>codepage==0</code>, then the default converter
1.3100 + * is used for the platform encoding.
1.3101 + * If <code>codepage</code> is an empty string (<code>""</code>),
1.3102 + * then a simple conversion is performed on the codepage-invariant
1.3103 + * subset ("invariant characters") of the platform encoding. See utypes.h.
1.3104 + */
1.3105 + void doCodepageCreate(const char *codepageData,
1.3106 + int32_t dataLength,
1.3107 + const char *codepage);
1.3108 +
1.3109 + /*
1.3110 + * Worker function for creating a UnicodeString from
1.3111 + * a codepage string using a UConverter.
1.3112 + */
1.3113 + void
1.3114 + doCodepageCreate(const char *codepageData,
1.3115 + int32_t dataLength,
1.3116 + UConverter *converter,
1.3117 + UErrorCode &status);
1.3118 +
1.3119 +#endif
1.3120 +
1.3121 + /*
1.3122 + * This function is called when write access to the array
1.3123 + * is necessary.
1.3124 + *
1.3125 + * We need to make a copy of the array if
1.3126 + * the buffer is read-only, or
1.3127 + * the buffer is refCounted (shared), and refCount>1, or
1.3128 + * the buffer is too small.
1.3129 + *
1.3130 + * Return FALSE if memory could not be allocated.
1.3131 + */
1.3132 + UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
1.3133 + int32_t growCapacity = -1,
1.3134 + UBool doCopyArray = TRUE,
1.3135 + int32_t **pBufferToDelete = 0,
1.3136 + UBool forceClone = FALSE);
1.3137 +
1.3138 + // common function for case mappings
1.3139 + UnicodeString &
1.3140 + caseMap(BreakIterator *titleIter,
1.3141 + const char *locale,
1.3142 + uint32_t options,
1.3143 + int32_t toWhichCase);
1.3144 +
1.3145 + // ref counting
1.3146 + void addRef(void);
1.3147 + int32_t removeRef(void);
1.3148 + int32_t refCount(void) const;
1.3149 +
1.3150 + // constants
1.3151 + enum {
1.3152 + US_STACKBUF_SIZE=7, // Size of stack buffer for small strings
1.3153 + kInvalidUChar=0xffff, // invalid UChar index
1.3154 + kGrowSize=128, // grow size for this buffer
1.3155 + kInvalidHashCode=0, // invalid hash code
1.3156 + kEmptyHashCode=1, // hash code for empty string
1.3157 +
1.3158 + // bit flag values for fFlags
1.3159 + kIsBogus=1, // this string is bogus, i.e., not valid or NULL
1.3160 + kUsingStackBuffer=2,// fArray==fStackBuffer
1.3161 + kRefCounted=4, // there is a refCount field before the characters in fArray
1.3162 + kBufferIsReadonly=8,// do not write to this buffer
1.3163 + kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
1.3164 + // and releaseBuffer(newLength) must be called
1.3165 +
1.3166 + // combined values for convenience
1.3167 + kShortString=kUsingStackBuffer,
1.3168 + kLongString=kRefCounted,
1.3169 + kReadonlyAlias=kBufferIsReadonly,
1.3170 + kWritableAlias=0
1.3171 + };
1.3172 +
1.3173 + friend class StringCharacterIterator;
1.3174 + friend class StringThreadTest;
1.3175 +
1.3176 + /*
1.3177 + * The following are all the class fields that are stored
1.3178 + * in each UnicodeString object.
1.3179 + * Note that UnicodeString has virtual functions,
1.3180 + * therefore there is an implicit vtable pointer
1.3181 + * as the first real field.
1.3182 + * The fields should be aligned such that no padding is
1.3183 + * necessary, mostly by having larger types first.
1.3184 + * On 32-bit machines, the size should be 32 bytes,
1.3185 + * on 64-bit machines (8-byte pointers), it should be 40 bytes.
1.3186 + */
1.3187 + // (implicit) *vtable;
1.3188 + int32_t fLength; // number of characters in fArray
1.3189 + int32_t fCapacity; // sizeof fArray
1.3190 + UChar *fArray; // the Unicode data
1.3191 + uint16_t fFlags; // bit flags: see constants above
1.3192 + UChar fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings
1.3193 +
1.3194 +};
1.3195 +
1.3196 +/**
1.3197 + * Create a new UnicodeString with the concatenation of two others.
1.3198 + *
1.3199 + * @param s1 The first string to be copied to the new one.
1.3200 + * @param s2 The second string to be copied to the new one, after s1.
1.3201 + * @return UnicodeString(s1).append(s2)
1.3202 + * @stable ICU 2.8
1.3203 + */
1.3204 +U_COMMON_API UnicodeString U_EXPORT2
1.3205 +operator+ (const UnicodeString &s1, const UnicodeString &s2);
1.3206 +
1.3207 +U_NAMESPACE_END
1.3208 +
1.3209 +// inline implementations -------------------------------------------------- ***
1.3210 +
1.3211 +//========================================
1.3212 +// Array copying
1.3213 +//========================================
1.3214 +/**
1.3215 + * Copy an array of UnicodeString OBJECTS (not pointers).
1.3216 + * @internal
1.3217 + */
1.3218 +inline void
1.3219 +uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t count)
1.3220 +{ while(count-- > 0) *dst++ = *src++; }
1.3221 +
1.3222 +/**
1.3223 + * Copy an array of UnicodeString OBJECTS (not pointers).
1.3224 + * @internal
1.3225 + */
1.3226 +inline void
1.3227 +uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, int32_t srcStart,
1.3228 + U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t dstStart, int32_t count)
1.3229 +{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
1.3230 +
1.3231 +U_NAMESPACE_BEGIN
1.3232 +
1.3233 +//========================================
1.3234 +// Inline members
1.3235 +//========================================
1.3236 +
1.3237 +//========================================
1.3238 +// Privates
1.3239 +//========================================
1.3240 +
1.3241 +inline void
1.3242 +UnicodeString::pinIndex(int32_t& start) const
1.3243 +{
1.3244 + // pin index
1.3245 + if(start < 0) {
1.3246 + start = 0;
1.3247 + } else if(start > fLength) {
1.3248 + start = fLength;
1.3249 + }
1.3250 +}
1.3251 +
1.3252 +inline void
1.3253 +UnicodeString::pinIndices(int32_t& start,
1.3254 + int32_t& _length) const
1.3255 +{
1.3256 + // pin indices
1.3257 + if(start < 0) {
1.3258 + start = 0;
1.3259 + } else if(start > fLength) {
1.3260 + start = fLength;
1.3261 + }
1.3262 + if(_length < 0) {
1.3263 + _length = 0;
1.3264 + } else if(_length > (fLength - start)) {
1.3265 + _length = (fLength - start);
1.3266 + }
1.3267 +}
1.3268 +
1.3269 +inline UChar*
1.3270 +UnicodeString::getArrayStart()
1.3271 +{ return fArray; }
1.3272 +
1.3273 +inline const UChar*
1.3274 +UnicodeString::getArrayStart() const
1.3275 +{ return fArray; }
1.3276 +
1.3277 +//========================================
1.3278 +// Read-only implementation methods
1.3279 +//========================================
1.3280 +inline int32_t
1.3281 +UnicodeString::length() const
1.3282 +{ return fLength; }
1.3283 +
1.3284 +inline int32_t
1.3285 +UnicodeString::getCapacity() const
1.3286 +{ return fCapacity; }
1.3287 +
1.3288 +inline int32_t
1.3289 +UnicodeString::hashCode() const
1.3290 +{ return doHashCode(); }
1.3291 +
1.3292 +inline UBool
1.3293 +UnicodeString::isBogus() const
1.3294 +{ return (UBool)(fFlags & kIsBogus); }
1.3295 +
1.3296 +inline const UChar *
1.3297 +UnicodeString::getBuffer() const {
1.3298 + if(!(fFlags&(kIsBogus|kOpenGetBuffer))) {
1.3299 + return fArray;
1.3300 + } else {
1.3301 + return 0;
1.3302 + }
1.3303 +}
1.3304 +
1.3305 +//========================================
1.3306 +// Read-only alias methods
1.3307 +//========================================
1.3308 +inline int8_t
1.3309 +UnicodeString::doCompare(int32_t start,
1.3310 + int32_t length,
1.3311 + const UnicodeString& srcText,
1.3312 + int32_t srcStart,
1.3313 + int32_t srcLength) const
1.3314 +{
1.3315 + if(srcText.isBogus()) {
1.3316 + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
1.3317 + } else {
1.3318 + srcText.pinIndices(srcStart, srcLength);
1.3319 + return doCompare(start, length, srcText.fArray, srcStart, srcLength);
1.3320 + }
1.3321 +}
1.3322 +
1.3323 +inline UBool
1.3324 +UnicodeString::operator== (const UnicodeString& text) const
1.3325 +{
1.3326 + if(isBogus()) {
1.3327 + return text.isBogus();
1.3328 + } else {
1.3329 + return
1.3330 + !text.isBogus() &&
1.3331 + fLength == text.fLength &&
1.3332 + doCompare(0, fLength, text, 0, text.fLength) == 0;
1.3333 + }
1.3334 +}
1.3335 +
1.3336 +inline UBool
1.3337 +UnicodeString::operator!= (const UnicodeString& text) const
1.3338 +{ return (! operator==(text)); }
1.3339 +
1.3340 +inline UBool
1.3341 +UnicodeString::operator> (const UnicodeString& text) const
1.3342 +{ return doCompare(0, fLength, text, 0, text.fLength) == 1; }
1.3343 +
1.3344 +inline UBool
1.3345 +UnicodeString::operator< (const UnicodeString& text) const
1.3346 +{ return doCompare(0, fLength, text, 0, text.fLength) == -1; }
1.3347 +
1.3348 +inline UBool
1.3349 +UnicodeString::operator>= (const UnicodeString& text) const
1.3350 +{ return doCompare(0, fLength, text, 0, text.fLength) != -1; }
1.3351 +
1.3352 +inline UBool
1.3353 +UnicodeString::operator<= (const UnicodeString& text) const
1.3354 +{ return doCompare(0, fLength, text, 0, text.fLength) != 1; }
1.3355 +
1.3356 +inline int8_t
1.3357 +UnicodeString::compare(const UnicodeString& text) const
1.3358 +{ return doCompare(0, fLength, text, 0, text.fLength); }
1.3359 +
1.3360 +inline int8_t
1.3361 +UnicodeString::compare(int32_t start,
1.3362 + int32_t _length,
1.3363 + const UnicodeString& srcText) const
1.3364 +{ return doCompare(start, _length, srcText, 0, srcText.fLength); }
1.3365 +
1.3366 +inline int8_t
1.3367 +UnicodeString::compare(const UChar *srcChars,
1.3368 + int32_t srcLength) const
1.3369 +{ return doCompare(0, fLength, srcChars, 0, srcLength); }
1.3370 +
1.3371 +inline int8_t
1.3372 +UnicodeString::compare(int32_t start,
1.3373 + int32_t _length,
1.3374 + const UnicodeString& srcText,
1.3375 + int32_t srcStart,
1.3376 + int32_t srcLength) const
1.3377 +{ return doCompare(start, _length, srcText, srcStart, srcLength); }
1.3378 +
1.3379 +inline int8_t
1.3380 +UnicodeString::compare(int32_t start,
1.3381 + int32_t _length,
1.3382 + const UChar *srcChars) const
1.3383 +{ return doCompare(start, _length, srcChars, 0, _length); }
1.3384 +
1.3385 +inline int8_t
1.3386 +UnicodeString::compare(int32_t start,
1.3387 + int32_t _length,
1.3388 + const UChar *srcChars,
1.3389 + int32_t srcStart,
1.3390 + int32_t srcLength) const
1.3391 +{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
1.3392 +
1.3393 +inline int8_t
1.3394 +UnicodeString::compareBetween(int32_t start,
1.3395 + int32_t limit,
1.3396 + const UnicodeString& srcText,
1.3397 + int32_t srcStart,
1.3398 + int32_t srcLimit) const
1.3399 +{ return doCompare(start, limit - start,
1.3400 + srcText, srcStart, srcLimit - srcStart); }
1.3401 +
1.3402 +inline int8_t
1.3403 +UnicodeString::doCompareCodePointOrder(int32_t start,
1.3404 + int32_t length,
1.3405 + const UnicodeString& srcText,
1.3406 + int32_t srcStart,
1.3407 + int32_t srcLength) const
1.3408 +{
1.3409 + if(srcText.isBogus()) {
1.3410 + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
1.3411 + } else {
1.3412 + srcText.pinIndices(srcStart, srcLength);
1.3413 + return doCompareCodePointOrder(start, length, srcText.fArray, srcStart, srcLength);
1.3414 + }
1.3415 +}
1.3416 +
1.3417 +inline int8_t
1.3418 +UnicodeString::compareCodePointOrder(const UnicodeString& text) const
1.3419 +{ return doCompareCodePointOrder(0, fLength, text, 0, text.fLength); }
1.3420 +
1.3421 +inline int8_t
1.3422 +UnicodeString::compareCodePointOrder(int32_t start,
1.3423 + int32_t _length,
1.3424 + const UnicodeString& srcText) const
1.3425 +{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.fLength); }
1.3426 +
1.3427 +inline int8_t
1.3428 +UnicodeString::compareCodePointOrder(const UChar *srcChars,
1.3429 + int32_t srcLength) const
1.3430 +{ return doCompareCodePointOrder(0, fLength, srcChars, 0, srcLength); }
1.3431 +
1.3432 +inline int8_t
1.3433 +UnicodeString::compareCodePointOrder(int32_t start,
1.3434 + int32_t _length,
1.3435 + const UnicodeString& srcText,
1.3436 + int32_t srcStart,
1.3437 + int32_t srcLength) const
1.3438 +{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
1.3439 +
1.3440 +inline int8_t
1.3441 +UnicodeString::compareCodePointOrder(int32_t start,
1.3442 + int32_t _length,
1.3443 + const UChar *srcChars) const
1.3444 +{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
1.3445 +
1.3446 +inline int8_t
1.3447 +UnicodeString::compareCodePointOrder(int32_t start,
1.3448 + int32_t _length,
1.3449 + const UChar *srcChars,
1.3450 + int32_t srcStart,
1.3451 + int32_t srcLength) const
1.3452 +{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
1.3453 +
1.3454 +inline int8_t
1.3455 +UnicodeString::compareCodePointOrderBetween(int32_t start,
1.3456 + int32_t limit,
1.3457 + const UnicodeString& srcText,
1.3458 + int32_t srcStart,
1.3459 + int32_t srcLimit) const
1.3460 +{ return doCompareCodePointOrder(start, limit - start,
1.3461 + srcText, srcStart, srcLimit - srcStart); }
1.3462 +
1.3463 +inline int8_t
1.3464 +UnicodeString::doCaseCompare(int32_t start,
1.3465 + int32_t length,
1.3466 + const UnicodeString &srcText,
1.3467 + int32_t srcStart,
1.3468 + int32_t srcLength,
1.3469 + uint32_t options) const
1.3470 +{
1.3471 + if(srcText.isBogus()) {
1.3472 + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
1.3473 + } else {
1.3474 + srcText.pinIndices(srcStart, srcLength);
1.3475 + return doCaseCompare(start, length, srcText.fArray, srcStart, srcLength, options);
1.3476 + }
1.3477 +}
1.3478 +
1.3479 +inline int8_t
1.3480 +UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
1.3481 + return doCaseCompare(0, fLength, text, 0, text.fLength, options);
1.3482 +}
1.3483 +
1.3484 +inline int8_t
1.3485 +UnicodeString::caseCompare(int32_t start,
1.3486 + int32_t _length,
1.3487 + const UnicodeString &srcText,
1.3488 + uint32_t options) const {
1.3489 + return doCaseCompare(start, _length, srcText, 0, srcText.fLength, options);
1.3490 +}
1.3491 +
1.3492 +inline int8_t
1.3493 +UnicodeString::caseCompare(const UChar *srcChars,
1.3494 + int32_t srcLength,
1.3495 + uint32_t options) const {
1.3496 + return doCaseCompare(0, fLength, srcChars, 0, srcLength, options);
1.3497 +}
1.3498 +
1.3499 +inline int8_t
1.3500 +UnicodeString::caseCompare(int32_t start,
1.3501 + int32_t _length,
1.3502 + const UnicodeString &srcText,
1.3503 + int32_t srcStart,
1.3504 + int32_t srcLength,
1.3505 + uint32_t options) const {
1.3506 + return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
1.3507 +}
1.3508 +
1.3509 +inline int8_t
1.3510 +UnicodeString::caseCompare(int32_t start,
1.3511 + int32_t _length,
1.3512 + const UChar *srcChars,
1.3513 + uint32_t options) const {
1.3514 + return doCaseCompare(start, _length, srcChars, 0, _length, options);
1.3515 +}
1.3516 +
1.3517 +inline int8_t
1.3518 +UnicodeString::caseCompare(int32_t start,
1.3519 + int32_t _length,
1.3520 + const UChar *srcChars,
1.3521 + int32_t srcStart,
1.3522 + int32_t srcLength,
1.3523 + uint32_t options) const {
1.3524 + return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
1.3525 +}
1.3526 +
1.3527 +inline int8_t
1.3528 +UnicodeString::caseCompareBetween(int32_t start,
1.3529 + int32_t limit,
1.3530 + const UnicodeString &srcText,
1.3531 + int32_t srcStart,
1.3532 + int32_t srcLimit,
1.3533 + uint32_t options) const {
1.3534 + return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
1.3535 +}
1.3536 +
1.3537 +inline int32_t
1.3538 +UnicodeString::indexOf(const UnicodeString& srcText,
1.3539 + int32_t srcStart,
1.3540 + int32_t srcLength,
1.3541 + int32_t start,
1.3542 + int32_t _length) const
1.3543 +{
1.3544 + if(!srcText.isBogus()) {
1.3545 + srcText.pinIndices(srcStart, srcLength);
1.3546 + if(srcLength > 0) {
1.3547 + return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
1.3548 + }
1.3549 + }
1.3550 + return -1;
1.3551 +}
1.3552 +
1.3553 +inline int32_t
1.3554 +UnicodeString::indexOf(const UnicodeString& text) const
1.3555 +{ return indexOf(text, 0, text.fLength, 0, fLength); }
1.3556 +
1.3557 +inline int32_t
1.3558 +UnicodeString::indexOf(const UnicodeString& text,
1.3559 + int32_t start) const {
1.3560 + pinIndex(start);
1.3561 + return indexOf(text, 0, text.fLength, start, fLength - start);
1.3562 +}
1.3563 +
1.3564 +inline int32_t
1.3565 +UnicodeString::indexOf(const UnicodeString& text,
1.3566 + int32_t start,
1.3567 + int32_t _length) const
1.3568 +{ return indexOf(text, 0, text.fLength, start, _length); }
1.3569 +
1.3570 +inline int32_t
1.3571 +UnicodeString::indexOf(const UChar *srcChars,
1.3572 + int32_t srcLength,
1.3573 + int32_t start) const {
1.3574 + pinIndex(start);
1.3575 + return indexOf(srcChars, 0, srcLength, start, fLength - start);
1.3576 +}
1.3577 +
1.3578 +inline int32_t
1.3579 +UnicodeString::indexOf(const UChar *srcChars,
1.3580 + int32_t srcLength,
1.3581 + int32_t start,
1.3582 + int32_t _length) const
1.3583 +{ return indexOf(srcChars, 0, srcLength, start, _length); }
1.3584 +
1.3585 +inline int32_t
1.3586 +UnicodeString::indexOf(UChar c,
1.3587 + int32_t start,
1.3588 + int32_t _length) const
1.3589 +{ return doIndexOf(c, start, _length); }
1.3590 +
1.3591 +inline int32_t
1.3592 +UnicodeString::indexOf(UChar32 c,
1.3593 + int32_t start,
1.3594 + int32_t _length) const
1.3595 +{ return doIndexOf(c, start, _length); }
1.3596 +
1.3597 +inline int32_t
1.3598 +UnicodeString::indexOf(UChar c) const
1.3599 +{ return doIndexOf(c, 0, fLength); }
1.3600 +
1.3601 +inline int32_t
1.3602 +UnicodeString::indexOf(UChar32 c) const
1.3603 +{ return indexOf(c, 0, fLength); }
1.3604 +
1.3605 +inline int32_t
1.3606 +UnicodeString::indexOf(UChar c,
1.3607 + int32_t start) const {
1.3608 + pinIndex(start);
1.3609 + return doIndexOf(c, start, fLength - start);
1.3610 +}
1.3611 +
1.3612 +inline int32_t
1.3613 +UnicodeString::indexOf(UChar32 c,
1.3614 + int32_t start) const {
1.3615 + pinIndex(start);
1.3616 + return indexOf(c, start, fLength - start);
1.3617 +}
1.3618 +
1.3619 +inline int32_t
1.3620 +UnicodeString::lastIndexOf(const UChar *srcChars,
1.3621 + int32_t srcLength,
1.3622 + int32_t start,
1.3623 + int32_t _length) const
1.3624 +{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
1.3625 +
1.3626 +inline int32_t
1.3627 +UnicodeString::lastIndexOf(const UChar *srcChars,
1.3628 + int32_t srcLength,
1.3629 + int32_t start) const {
1.3630 + pinIndex(start);
1.3631 + return lastIndexOf(srcChars, 0, srcLength, start, fLength - start);
1.3632 +}
1.3633 +
1.3634 +inline int32_t
1.3635 +UnicodeString::lastIndexOf(const UnicodeString& srcText,
1.3636 + int32_t srcStart,
1.3637 + int32_t srcLength,
1.3638 + int32_t start,
1.3639 + int32_t _length) const
1.3640 +{
1.3641 + if(!srcText.isBogus()) {
1.3642 + srcText.pinIndices(srcStart, srcLength);
1.3643 + if(srcLength > 0) {
1.3644 + return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
1.3645 + }
1.3646 + }
1.3647 + return -1;
1.3648 +}
1.3649 +
1.3650 +inline int32_t
1.3651 +UnicodeString::lastIndexOf(const UnicodeString& text,
1.3652 + int32_t start,
1.3653 + int32_t _length) const
1.3654 +{ return lastIndexOf(text, 0, text.fLength, start, _length); }
1.3655 +
1.3656 +inline int32_t
1.3657 +UnicodeString::lastIndexOf(const UnicodeString& text,
1.3658 + int32_t start) const {
1.3659 + pinIndex(start);
1.3660 + return lastIndexOf(text, 0, text.fLength, start, fLength - start);
1.3661 +}
1.3662 +
1.3663 +inline int32_t
1.3664 +UnicodeString::lastIndexOf(const UnicodeString& text) const
1.3665 +{ return lastIndexOf(text, 0, text.fLength, 0, fLength); }
1.3666 +
1.3667 +inline int32_t
1.3668 +UnicodeString::lastIndexOf(UChar c,
1.3669 + int32_t start,
1.3670 + int32_t _length) const
1.3671 +{ return doLastIndexOf(c, start, _length); }
1.3672 +
1.3673 +inline int32_t
1.3674 +UnicodeString::lastIndexOf(UChar32 c,
1.3675 + int32_t start,
1.3676 + int32_t _length) const {
1.3677 + return doLastIndexOf(c, start, _length);
1.3678 +}
1.3679 +
1.3680 +inline int32_t
1.3681 +UnicodeString::lastIndexOf(UChar c) const
1.3682 +{ return doLastIndexOf(c, 0, fLength); }
1.3683 +
1.3684 +inline int32_t
1.3685 +UnicodeString::lastIndexOf(UChar32 c) const {
1.3686 + return lastIndexOf(c, 0, fLength);
1.3687 +}
1.3688 +
1.3689 +inline int32_t
1.3690 +UnicodeString::lastIndexOf(UChar c,
1.3691 + int32_t start) const {
1.3692 + pinIndex(start);
1.3693 + return doLastIndexOf(c, start, fLength - start);
1.3694 +}
1.3695 +
1.3696 +inline int32_t
1.3697 +UnicodeString::lastIndexOf(UChar32 c,
1.3698 + int32_t start) const {
1.3699 + pinIndex(start);
1.3700 + return lastIndexOf(c, start, fLength - start);
1.3701 +}
1.3702 +
1.3703 +inline UBool
1.3704 +UnicodeString::startsWith(const UnicodeString& text) const
1.3705 +{ return compare(0, text.fLength, text, 0, text.fLength) == 0; }
1.3706 +
1.3707 +inline UBool
1.3708 +UnicodeString::startsWith(const UnicodeString& srcText,
1.3709 + int32_t srcStart,
1.3710 + int32_t srcLength) const
1.3711 +{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
1.3712 +
1.3713 +inline UBool
1.3714 +UnicodeString::startsWith(const UChar *srcChars,
1.3715 + int32_t srcLength) const
1.3716 +{ return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }
1.3717 +
1.3718 +inline UBool
1.3719 +UnicodeString::startsWith(const UChar *srcChars,
1.3720 + int32_t srcStart,
1.3721 + int32_t srcLength) const
1.3722 +{ return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}
1.3723 +
1.3724 +inline UBool
1.3725 +UnicodeString::endsWith(const UnicodeString& text) const
1.3726 +{ return doCompare(fLength - text.fLength, text.fLength,
1.3727 + text, 0, text.fLength) == 0; }
1.3728 +
1.3729 +inline UBool
1.3730 +UnicodeString::endsWith(const UnicodeString& srcText,
1.3731 + int32_t srcStart,
1.3732 + int32_t srcLength) const {
1.3733 + srcText.pinIndices(srcStart, srcLength);
1.3734 + return doCompare(fLength - srcLength, srcLength,
1.3735 + srcText, srcStart, srcLength) == 0;
1.3736 +}
1.3737 +
1.3738 +inline UBool
1.3739 +UnicodeString::endsWith(const UChar *srcChars,
1.3740 + int32_t srcLength) const {
1.3741 + if(srcLength < 0) {
1.3742 + srcLength = u_strlen(srcChars);
1.3743 + }
1.3744 + return doCompare(fLength - srcLength, srcLength,
1.3745 + srcChars, 0, srcLength) == 0;
1.3746 +}
1.3747 +
1.3748 +inline UBool
1.3749 +UnicodeString::endsWith(const UChar *srcChars,
1.3750 + int32_t srcStart,
1.3751 + int32_t srcLength) const {
1.3752 + if(srcLength < 0) {
1.3753 + srcLength = u_strlen(srcChars + srcStart);
1.3754 + }
1.3755 + return doCompare(fLength - srcLength, srcLength,
1.3756 + srcChars, srcStart, srcLength) == 0;
1.3757 +}
1.3758 +
1.3759 +//========================================
1.3760 +// replace
1.3761 +//========================================
1.3762 +inline UnicodeString&
1.3763 +UnicodeString::replace(int32_t start,
1.3764 + int32_t _length,
1.3765 + const UnicodeString& srcText)
1.3766 +{ return doReplace(start, _length, srcText, 0, srcText.fLength); }
1.3767 +
1.3768 +inline UnicodeString&
1.3769 +UnicodeString::replace(int32_t start,
1.3770 + int32_t _length,
1.3771 + const UnicodeString& srcText,
1.3772 + int32_t srcStart,
1.3773 + int32_t srcLength)
1.3774 +{ return doReplace(start, _length, srcText, srcStart, srcLength); }
1.3775 +
1.3776 +inline UnicodeString&
1.3777 +UnicodeString::replace(int32_t start,
1.3778 + int32_t _length,
1.3779 + const UChar *srcChars,
1.3780 + int32_t srcLength)
1.3781 +{ return doReplace(start, _length, srcChars, 0, srcLength); }
1.3782 +
1.3783 +inline UnicodeString&
1.3784 +UnicodeString::replace(int32_t start,
1.3785 + int32_t _length,
1.3786 + const UChar *srcChars,
1.3787 + int32_t srcStart,
1.3788 + int32_t srcLength)
1.3789 +{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
1.3790 +
1.3791 +inline UnicodeString&
1.3792 +UnicodeString::replace(int32_t start,
1.3793 + int32_t _length,
1.3794 + UChar srcChar)
1.3795 +{ return doReplace(start, _length, &srcChar, 0, 1); }
1.3796 +
1.3797 +inline UnicodeString&
1.3798 +UnicodeString::replace(int32_t start,
1.3799 + int32_t _length,
1.3800 + UChar32 srcChar) {
1.3801 + UChar buffer[U16_MAX_LENGTH];
1.3802 + int32_t count = 0;
1.3803 + UBool isError = FALSE;
1.3804 + U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1.3805 + return doReplace(start, _length, buffer, 0, count);
1.3806 +}
1.3807 +
1.3808 +inline UnicodeString&
1.3809 +UnicodeString::replaceBetween(int32_t start,
1.3810 + int32_t limit,
1.3811 + const UnicodeString& srcText)
1.3812 +{ return doReplace(start, limit - start, srcText, 0, srcText.fLength); }
1.3813 +
1.3814 +inline UnicodeString&
1.3815 +UnicodeString::replaceBetween(int32_t start,
1.3816 + int32_t limit,
1.3817 + const UnicodeString& srcText,
1.3818 + int32_t srcStart,
1.3819 + int32_t srcLimit)
1.3820 +{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
1.3821 +
1.3822 +inline UnicodeString&
1.3823 +UnicodeString::findAndReplace(const UnicodeString& oldText,
1.3824 + const UnicodeString& newText)
1.3825 +{ return findAndReplace(0, fLength, oldText, 0, oldText.fLength,
1.3826 + newText, 0, newText.fLength); }
1.3827 +
1.3828 +inline UnicodeString&
1.3829 +UnicodeString::findAndReplace(int32_t start,
1.3830 + int32_t _length,
1.3831 + const UnicodeString& oldText,
1.3832 + const UnicodeString& newText)
1.3833 +{ return findAndReplace(start, _length, oldText, 0, oldText.fLength,
1.3834 + newText, 0, newText.fLength); }
1.3835 +
1.3836 +// ============================
1.3837 +// extract
1.3838 +// ============================
1.3839 +inline void
1.3840 +UnicodeString::doExtract(int32_t start,
1.3841 + int32_t _length,
1.3842 + UnicodeString& target) const
1.3843 +{ target.replace(0, target.fLength, *this, start, _length); }
1.3844 +
1.3845 +inline void
1.3846 +UnicodeString::extract(int32_t start,
1.3847 + int32_t _length,
1.3848 + UChar *target,
1.3849 + int32_t targetStart) const
1.3850 +{ doExtract(start, _length, target, targetStart); }
1.3851 +
1.3852 +inline void
1.3853 +UnicodeString::extract(int32_t start,
1.3854 + int32_t _length,
1.3855 + UnicodeString& target) const
1.3856 +{ doExtract(start, _length, target); }
1.3857 +
1.3858 +#if !UCONFIG_NO_CONVERSION
1.3859 +
1.3860 +inline int32_t
1.3861 +UnicodeString::extract(int32_t start,
1.3862 + int32_t _length,
1.3863 + char *dst,
1.3864 + const char *codepage) const
1.3865 +
1.3866 +{
1.3867 + // This dstSize value will be checked explicitly
1.3868 + return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
1.3869 +}
1.3870 +
1.3871 +#endif
1.3872 +
1.3873 +inline void
1.3874 +UnicodeString::extractBetween(int32_t start,
1.3875 + int32_t limit,
1.3876 + UChar *dst,
1.3877 + int32_t dstStart) const {
1.3878 + pinIndex(start);
1.3879 + pinIndex(limit);
1.3880 + doExtract(start, limit - start, dst, dstStart);
1.3881 +}
1.3882 +
1.3883 +inline UChar
1.3884 +UnicodeString::doCharAt(int32_t offset) const
1.3885 +{
1.3886 + if((uint32_t)offset < (uint32_t)fLength) {
1.3887 + return fArray[offset];
1.3888 + } else {
1.3889 + return kInvalidUChar;
1.3890 + }
1.3891 +}
1.3892 +
1.3893 +inline UChar
1.3894 +UnicodeString::charAt(int32_t offset) const
1.3895 +{ return doCharAt(offset); }
1.3896 +
1.3897 +inline UChar
1.3898 +UnicodeString::operator[] (int32_t offset) const
1.3899 +{ return doCharAt(offset); }
1.3900 +
1.3901 +inline UChar32
1.3902 +UnicodeString::char32At(int32_t offset) const
1.3903 +{
1.3904 + if((uint32_t)offset < (uint32_t)fLength) {
1.3905 + UChar32 c;
1.3906 + U16_GET(fArray, 0, offset, fLength, c);
1.3907 + return c;
1.3908 + } else {
1.3909 + return kInvalidUChar;
1.3910 + }
1.3911 +}
1.3912 +
1.3913 +inline int32_t
1.3914 +UnicodeString::getChar32Start(int32_t offset) const {
1.3915 + if((uint32_t)offset < (uint32_t)fLength) {
1.3916 + U16_SET_CP_START(fArray, 0, offset);
1.3917 + return offset;
1.3918 + } else {
1.3919 + return 0;
1.3920 + }
1.3921 +}
1.3922 +
1.3923 +inline int32_t
1.3924 +UnicodeString::getChar32Limit(int32_t offset) const {
1.3925 + if((uint32_t)offset < (uint32_t)fLength) {
1.3926 + U16_SET_CP_LIMIT(fArray, 0, offset, fLength);
1.3927 + return offset;
1.3928 + } else {
1.3929 + return fLength;
1.3930 + }
1.3931 +}
1.3932 +
1.3933 +inline UBool
1.3934 +UnicodeString::isEmpty() const {
1.3935 + return fLength == 0;
1.3936 +}
1.3937 +
1.3938 +//========================================
1.3939 +// Write implementation methods
1.3940 +//========================================
1.3941 +inline const UChar *
1.3942 +UnicodeString::getTerminatedBuffer() {
1.3943 + if(fFlags&(kIsBogus|kOpenGetBuffer)) {
1.3944 + return 0;
1.3945 + } else if(fLength<fCapacity && fArray[fLength]==0) {
1.3946 + return fArray;
1.3947 + } else if(cloneArrayIfNeeded(fLength+1)) {
1.3948 + fArray[fLength]=0;
1.3949 + return fArray;
1.3950 + } else {
1.3951 + return 0;
1.3952 + }
1.3953 +}
1.3954 +
1.3955 +inline UnicodeString&
1.3956 +UnicodeString::operator= (UChar ch)
1.3957 +{ return doReplace(0, fLength, &ch, 0, 1); }
1.3958 +
1.3959 +inline UnicodeString&
1.3960 +UnicodeString::operator= (UChar32 ch)
1.3961 +{ return replace(0, fLength, ch); }
1.3962 +
1.3963 +inline UnicodeString&
1.3964 +UnicodeString::setTo(const UnicodeString& srcText,
1.3965 + int32_t srcStart,
1.3966 + int32_t srcLength)
1.3967 +{
1.3968 + unBogus();
1.3969 + return doReplace(0, fLength, srcText, srcStart, srcLength);
1.3970 +}
1.3971 +
1.3972 +inline UnicodeString&
1.3973 +UnicodeString::setTo(const UnicodeString& srcText,
1.3974 + int32_t srcStart)
1.3975 +{
1.3976 + unBogus();
1.3977 + srcText.pinIndex(srcStart);
1.3978 + return doReplace(0, fLength, srcText, srcStart, srcText.fLength - srcStart);
1.3979 +}
1.3980 +
1.3981 +inline UnicodeString&
1.3982 +UnicodeString::setTo(const UnicodeString& srcText)
1.3983 +{
1.3984 + unBogus();
1.3985 + return doReplace(0, fLength, srcText, 0, srcText.fLength);
1.3986 +}
1.3987 +
1.3988 +inline UnicodeString&
1.3989 +UnicodeString::setTo(const UChar *srcChars,
1.3990 + int32_t srcLength)
1.3991 +{
1.3992 + unBogus();
1.3993 + return doReplace(0, fLength, srcChars, 0, srcLength);
1.3994 +}
1.3995 +
1.3996 +inline UnicodeString&
1.3997 +UnicodeString::setTo(UChar srcChar)
1.3998 +{
1.3999 + unBogus();
1.4000 + return doReplace(0, fLength, &srcChar, 0, 1);
1.4001 +}
1.4002 +
1.4003 +inline UnicodeString&
1.4004 +UnicodeString::setTo(UChar32 srcChar)
1.4005 +{
1.4006 + unBogus();
1.4007 + return replace(0, fLength, srcChar);
1.4008 +}
1.4009 +
1.4010 +inline UnicodeString&
1.4011 +UnicodeString::operator+= (UChar ch)
1.4012 +{ return doReplace(fLength, 0, &ch, 0, 1); }
1.4013 +
1.4014 +inline UnicodeString&
1.4015 +UnicodeString::operator+= (UChar32 ch) {
1.4016 + UChar buffer[U16_MAX_LENGTH];
1.4017 + int32_t _length = 0;
1.4018 + UBool isError = FALSE;
1.4019 + U16_APPEND(buffer, _length, U16_MAX_LENGTH, ch, isError);
1.4020 + return doReplace(fLength, 0, buffer, 0, _length);
1.4021 +}
1.4022 +
1.4023 +inline UnicodeString&
1.4024 +UnicodeString::operator+= (const UnicodeString& srcText)
1.4025 +{ return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
1.4026 +
1.4027 +inline UnicodeString&
1.4028 +UnicodeString::append(const UnicodeString& srcText,
1.4029 + int32_t srcStart,
1.4030 + int32_t srcLength)
1.4031 +{ return doReplace(fLength, 0, srcText, srcStart, srcLength); }
1.4032 +
1.4033 +inline UnicodeString&
1.4034 +UnicodeString::append(const UnicodeString& srcText)
1.4035 +{ return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
1.4036 +
1.4037 +inline UnicodeString&
1.4038 +UnicodeString::append(const UChar *srcChars,
1.4039 + int32_t srcStart,
1.4040 + int32_t srcLength)
1.4041 +{ return doReplace(fLength, 0, srcChars, srcStart, srcLength); }
1.4042 +
1.4043 +inline UnicodeString&
1.4044 +UnicodeString::append(const UChar *srcChars,
1.4045 + int32_t srcLength)
1.4046 +{ return doReplace(fLength, 0, srcChars, 0, srcLength); }
1.4047 +
1.4048 +inline UnicodeString&
1.4049 +UnicodeString::append(UChar srcChar)
1.4050 +{ return doReplace(fLength, 0, &srcChar, 0, 1); }
1.4051 +
1.4052 +inline UnicodeString&
1.4053 +UnicodeString::append(UChar32 srcChar) {
1.4054 + UChar buffer[U16_MAX_LENGTH];
1.4055 + int32_t _length = 0;
1.4056 + UBool isError = FALSE;
1.4057 + U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1.4058 + return doReplace(fLength, 0, buffer, 0, _length);
1.4059 +}
1.4060 +
1.4061 +inline UnicodeString&
1.4062 +UnicodeString::insert(int32_t start,
1.4063 + const UnicodeString& srcText,
1.4064 + int32_t srcStart,
1.4065 + int32_t srcLength)
1.4066 +{ return doReplace(start, 0, srcText, srcStart, srcLength); }
1.4067 +
1.4068 +inline UnicodeString&
1.4069 +UnicodeString::insert(int32_t start,
1.4070 + const UnicodeString& srcText)
1.4071 +{ return doReplace(start, 0, srcText, 0, srcText.fLength); }
1.4072 +
1.4073 +inline UnicodeString&
1.4074 +UnicodeString::insert(int32_t start,
1.4075 + const UChar *srcChars,
1.4076 + int32_t srcStart,
1.4077 + int32_t srcLength)
1.4078 +{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
1.4079 +
1.4080 +inline UnicodeString&
1.4081 +UnicodeString::insert(int32_t start,
1.4082 + const UChar *srcChars,
1.4083 + int32_t srcLength)
1.4084 +{ return doReplace(start, 0, srcChars, 0, srcLength); }
1.4085 +
1.4086 +inline UnicodeString&
1.4087 +UnicodeString::insert(int32_t start,
1.4088 + UChar srcChar)
1.4089 +{ return doReplace(start, 0, &srcChar, 0, 1); }
1.4090 +
1.4091 +inline UnicodeString&
1.4092 +UnicodeString::insert(int32_t start,
1.4093 + UChar32 srcChar)
1.4094 +{ return replace(start, 0, srcChar); }
1.4095 +
1.4096 +
1.4097 +inline UnicodeString&
1.4098 +UnicodeString::remove()
1.4099 +{
1.4100 + // remove() of a bogus string makes the string empty and non-bogus
1.4101 + if(isBogus()) {
1.4102 + unBogus();
1.4103 + } else {
1.4104 + fLength = 0;
1.4105 + }
1.4106 + return *this;
1.4107 +}
1.4108 +
1.4109 +inline UnicodeString&
1.4110 +UnicodeString::remove(int32_t start,
1.4111 + int32_t _length)
1.4112 +{
1.4113 + if(start <= 0 && _length == INT32_MAX) {
1.4114 + // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
1.4115 + return remove();
1.4116 + } else {
1.4117 + return doReplace(start, _length, NULL, 0, 0);
1.4118 + }
1.4119 +}
1.4120 +
1.4121 +inline UnicodeString&
1.4122 +UnicodeString::removeBetween(int32_t start,
1.4123 + int32_t limit)
1.4124 +{ return doReplace(start, limit - start, NULL, 0, 0); }
1.4125 +
1.4126 +inline UBool
1.4127 +UnicodeString::truncate(int32_t targetLength)
1.4128 +{
1.4129 + if(isBogus() && targetLength == 0) {
1.4130 + // truncate(0) of a bogus string makes the string empty and non-bogus
1.4131 + unBogus();
1.4132 + return FALSE;
1.4133 + } else if((uint32_t)targetLength < (uint32_t)fLength) {
1.4134 + fLength = targetLength;
1.4135 + return TRUE;
1.4136 + } else {
1.4137 + return FALSE;
1.4138 + }
1.4139 +}
1.4140 +
1.4141 +inline UnicodeString&
1.4142 +UnicodeString::reverse()
1.4143 +{ return doReverse(0, fLength); }
1.4144 +
1.4145 +inline UnicodeString&
1.4146 +UnicodeString::reverse(int32_t start,
1.4147 + int32_t _length)
1.4148 +{ return doReverse(start, _length); }
1.4149 +
1.4150 +U_NAMESPACE_END
1.4151 +
1.4152 +#endif