Symaptic: os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/ustring.h@260cb5ec6c19 (annotated)

sl@0	1	/*
sl@0	2	**********************************************************************
sl@0	3	* Copyright (C) 1998-2005, International Business Machines
sl@0	4	* Corporation and others. All Rights Reserved.
sl@0	5	**********************************************************************
sl@0	6	*
sl@0	7	* File ustring.h
sl@0	8	*
sl@0	9	* Modification History:
sl@0	10	*
sl@0	11	* Date Name Description
sl@0	12	* 12/07/98 bertrand Creation.
sl@0	13	******************************************************************************
sl@0	14	*/
sl@0	15
sl@0	16	#ifndef USTRING_H
sl@0	17	#define USTRING_H
sl@0	18
sl@0	19	#include "unicode/utypes.h"
sl@0	20	#include "unicode/putil.h"
sl@0	21	#include "unicode/uiter.h"
sl@0	22
sl@0	23	/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
sl@0	24	#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
sl@0	25	# define UBRK_TYPEDEF_UBREAK_ITERATOR
sl@0	26	typedef void UBreakIterator;
sl@0	27	#endif
sl@0	28
sl@0	29	/**
sl@0	30	* \file
sl@0	31	* \brief C API: Unicode string handling functions
sl@0	32	*
sl@0	33	* These C API functions provide general Unicode string handling.
sl@0	34	*
sl@0	35	* Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
sl@0	36	* functions. (For example, they do not check for bad arguments like NULL string pointers.)
sl@0	37	* In some cases, only the thread-safe variant of such a function is implemented here
sl@0	38	* (see u_strtok_r()).
sl@0	39	*
sl@0	40	* Other functions provide more Unicode-specific functionality like locale-specific
sl@0	41	* upper/lower-casing and string comparison in code point order.
sl@0	42	*
sl@0	43	* ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
sl@0	44	* UTF-16 encodes each Unicode code point with either one or two UChar code units.
sl@0	45	* (This is the default form of Unicode, and a forward-compatible extension of the original,
sl@0	46	* fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
sl@0	47	* in 1996.)
sl@0	48	*
sl@0	49	* Some APIs accept a 32-bit UChar32 value for a single code point.
sl@0	50	*
sl@0	51	* ICU also handles 16-bit Unicode text with unpaired surrogates.
sl@0	52	* Such text is not well-formed UTF-16.
sl@0	53	* Code-point-related functions treat unpaired surrogates as surrogate code points,
sl@0	54	* i.e., as separate units.
sl@0	55	*
sl@0	56	* Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
sl@0	57	* it is much more efficient even for random access because the code unit values
sl@0	58	* for single-unit characters vs. lead units vs. trail units are completely disjoint.
sl@0	59	* This means that it is easy to determine character (code point) boundaries from
sl@0	60	* random offsets in the string.
sl@0	61	*
sl@0	62	* Unicode (UTF-16) string processing is optimized for the single-unit case.
sl@0	63	* Although it is important to support supplementary characters
sl@0	64	* (which use pairs of lead/trail code units called "surrogates"),
sl@0	65	* their occurrence is rare. Almost all characters in modern use require only
sl@0	66	* a single UChar code unit (i.e., their code point values are <=0xffff).
sl@0	67	*
sl@0	68	* For more details see the User Guide Strings chapter (http://icu.sourceforge.net/userguide/strings.html).
sl@0	69	* For a discussion of the handling of unpaired surrogates see also
sl@0	70	* Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
sl@0	71	*/
sl@0	72
sl@0	73	/**
sl@0	74	* Determine the length of an array of UChar.
sl@0	75	*
sl@0	76	* @param s The array of UChars, NULL (U+0000) terminated.
sl@0	77	* @return The number of UChars in <code>chars</code>, minus the terminator.
sl@0	78	* @stable ICU 2.0
sl@0	79	*/
sl@0	80	U_STABLE int32_t U_EXPORT2
sl@0	81	u_strlen(const UChar *s);
sl@0	82
sl@0	83	/**
sl@0	84	* Count Unicode code points in the length UChar code units of the string.
sl@0	85	* A code point may occupy either one or two UChar code units.
sl@0	86	* Counting code points involves reading all code units.
sl@0	87	*
sl@0	88	* This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
sl@0	89	*
sl@0	90	* @param s The input string.
sl@0	91	* @param length The number of UChar code units to be checked, or -1 to count all
sl@0	92	* code points before the first NUL (U+0000).
sl@0	93	* @return The number of code points in the specified code units.
sl@0	94	* @stable ICU 2.0
sl@0	95	*/
sl@0	96	U_STABLE int32_t U_EXPORT2
sl@0	97	u_countChar32(const UChar *s, int32_t length);
sl@0	98
sl@0	99	/**
sl@0	100	* Check if the string contains more Unicode code points than a certain number.
sl@0	101	* This is more efficient than counting all code points in the entire string
sl@0	102	* and comparing that number with a threshold.
sl@0	103	* This function may not need to scan the string at all if the length is known
sl@0	104	* (not -1 for NUL-termination) and falls within a certain range, and
sl@0	105	* never needs to count more than 'number+1' code points.
sl@0	106	* Logically equivalent to (u_countChar32(s, length)>number).
sl@0	107	* A Unicode code point may occupy either one or two UChar code units.
sl@0	108	*
sl@0	109	* @param s The input string.
sl@0	110	* @param length The length of the string, or -1 if it is NUL-terminated.
sl@0	111	* @param number The number of code points in the string is compared against
sl@0	112	* the 'number' parameter.
sl@0	113	* @return Boolean value for whether the string contains more Unicode code points
sl@0	114	* than 'number'. Same as (u_countChar32(s, length)>number).
sl@0	115	* @stable ICU 2.4
sl@0	116	*/
sl@0	117	U_STABLE UBool U_EXPORT2
sl@0	118	u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
sl@0	119
sl@0	120	/**
sl@0	121	* Concatenate two ustrings. Appends a copy of <code>src</code>,
sl@0	122	* including the null terminator, to <code>dst</code>. The initial copied
sl@0	123	* character from <code>src</code> overwrites the null terminator in <code>dst</code>.
sl@0	124	*
sl@0	125	* @param dst The destination string.
sl@0	126	* @param src The source string.
sl@0	127	* @return A pointer to <code>dst</code>.
sl@0	128	* @stable ICU 2.0
sl@0	129	*/
sl@0	130	U_STABLE UChar* U_EXPORT2
sl@0	131	u_strcat(UChar *dst,
sl@0	132	const UChar *src);
sl@0	133
sl@0	134	/**
sl@0	135	* Concatenate two ustrings.
sl@0	136	* Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.
sl@0	137	* Adds a terminating NUL.
sl@0	138	* If src is too long, then only <code>n-1</code> characters will be copied
sl@0	139	* before the terminating NUL.
sl@0	140	* If <code>n<=0</code> then dst is not modified.
sl@0	141	*
sl@0	142	* @param dst The destination string.
sl@0	143	* @param src The source string.
sl@0	144	* @param n The maximum number of characters to compare.
sl@0	145	* @return A pointer to <code>dst</code>.
sl@0	146	* @stable ICU 2.0
sl@0	147	*/
sl@0	148	U_STABLE UChar* U_EXPORT2
sl@0	149	u_strncat(UChar *dst,
sl@0	150	const UChar *src,
sl@0	151	int32_t n);
sl@0	152
sl@0	153	/**
sl@0	154	* Find the first occurrence of a substring in a string.
sl@0	155	* The substring is found at code point boundaries.
sl@0	156	* That means that if the substring begins with
sl@0	157	* a trail surrogate or ends with a lead surrogate,
sl@0	158	* then it is found only if these surrogates stand alone in the text.
sl@0	159	* Otherwise, the substring edge units would be matched against
sl@0	160	* halves of surrogate pairs.
sl@0	161	*
sl@0	162	* @param s The string to search (NUL-terminated).
sl@0	163	* @param substring The substring to find (NUL-terminated).
sl@0	164	* @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
sl@0	165	* or <code>s</code> itself if the <code>substring</code> is empty,
sl@0	166	* or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
sl@0	167	* @stable ICU 2.0
sl@0	168	*
sl@0	169	* @see u_strrstr
sl@0	170	* @see u_strFindFirst
sl@0	171	* @see u_strFindLast
sl@0	172	*/
sl@0	173	U_STABLE UChar * U_EXPORT2
sl@0	174	u_strstr(const UChar s, const UChar substring);
sl@0	175
sl@0	176	/**
sl@0	177	* Find the first occurrence of a substring in a string.
sl@0	178	* The substring is found at code point boundaries.
sl@0	179	* That means that if the substring begins with
sl@0	180	* a trail surrogate or ends with a lead surrogate,
sl@0	181	* then it is found only if these surrogates stand alone in the text.
sl@0	182	* Otherwise, the substring edge units would be matched against
sl@0	183	* halves of surrogate pairs.
sl@0	184	*
sl@0	185	* @param s The string to search.
sl@0	186	* @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
sl@0	187	* @param substring The substring to find (NUL-terminated).
sl@0	188	* @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
sl@0	189	* @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
sl@0	190	* or <code>s</code> itself if the <code>substring</code> is empty,
sl@0	191	* or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
sl@0	192	* @stable ICU 2.4
sl@0	193	*
sl@0	194	* @see u_strstr
sl@0	195	* @see u_strFindLast
sl@0	196	*/
sl@0	197	U_STABLE UChar * U_EXPORT2
sl@0	198	u_strFindFirst(const UChar s, int32_t length, const UChar substring, int32_t subLength);
sl@0	199
sl@0	200	/**
sl@0	201	* Find the first occurrence of a BMP code point in a string.
sl@0	202	* A surrogate code point is found only if its match in the text is not
sl@0	203	* part of a surrogate pair.
sl@0	204	* A NUL character is found at the string terminator.
sl@0	205	*
sl@0	206	* @param s The string to search (NUL-terminated).
sl@0	207	* @param c The BMP code point to find.
sl@0	208	* @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
sl@0	209	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
sl@0	210	* @stable ICU 2.0
sl@0	211	*
sl@0	212	* @see u_strchr32
sl@0	213	* @see u_memchr
sl@0	214	* @see u_strstr
sl@0	215	* @see u_strFindFirst
sl@0	216	*/
sl@0	217	U_STABLE UChar * U_EXPORT2
sl@0	218	u_strchr(const UChar *s, UChar c);
sl@0	219
sl@0	220	/**
sl@0	221	* Find the first occurrence of a code point in a string.
sl@0	222	* A surrogate code point is found only if its match in the text is not
sl@0	223	* part of a surrogate pair.
sl@0	224	* A NUL character is found at the string terminator.
sl@0	225	*
sl@0	226	* @param s The string to search (NUL-terminated).
sl@0	227	* @param c The code point to find.
sl@0	228	* @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
sl@0	229	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
sl@0	230	* @stable ICU 2.0
sl@0	231	*
sl@0	232	* @see u_strchr
sl@0	233	* @see u_memchr32
sl@0	234	* @see u_strstr
sl@0	235	* @see u_strFindFirst
sl@0	236	*/
sl@0	237	U_STABLE UChar * U_EXPORT2
sl@0	238	u_strchr32(const UChar *s, UChar32 c);
sl@0	239
sl@0	240	/**
sl@0	241	* Find the last occurrence of a substring in a string.
sl@0	242	* The substring is found at code point boundaries.
sl@0	243	* That means that if the substring begins with
sl@0	244	* a trail surrogate or ends with a lead surrogate,
sl@0	245	* then it is found only if these surrogates stand alone in the text.
sl@0	246	* Otherwise, the substring edge units would be matched against
sl@0	247	* halves of surrogate pairs.
sl@0	248	*
sl@0	249	* @param s The string to search (NUL-terminated).
sl@0	250	* @param substring The substring to find (NUL-terminated).
sl@0	251	* @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
sl@0	252	* or <code>s</code> itself if the <code>substring</code> is empty,
sl@0	253	* or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
sl@0	254	* @stable ICU 2.4
sl@0	255	*
sl@0	256	* @see u_strstr
sl@0	257	* @see u_strFindFirst
sl@0	258	* @see u_strFindLast
sl@0	259	*/
sl@0	260	U_STABLE UChar * U_EXPORT2
sl@0	261	u_strrstr(const UChar s, const UChar substring);
sl@0	262
sl@0	263	/**
sl@0	264	* Find the last occurrence of a substring in a string.
sl@0	265	* The substring is found at code point boundaries.
sl@0	266	* That means that if the substring begins with
sl@0	267	* a trail surrogate or ends with a lead surrogate,
sl@0	268	* then it is found only if these surrogates stand alone in the text.
sl@0	269	* Otherwise, the substring edge units would be matched against
sl@0	270	* halves of surrogate pairs.
sl@0	271	*
sl@0	272	* @param s The string to search.
sl@0	273	* @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
sl@0	274	* @param substring The substring to find (NUL-terminated).
sl@0	275	* @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
sl@0	276	* @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
sl@0	277	* or <code>s</code> itself if the <code>substring</code> is empty,
sl@0	278	* or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
sl@0	279	* @stable ICU 2.4
sl@0	280	*
sl@0	281	* @see u_strstr
sl@0	282	* @see u_strFindLast
sl@0	283	*/
sl@0	284	U_STABLE UChar * U_EXPORT2
sl@0	285	u_strFindLast(const UChar s, int32_t length, const UChar substring, int32_t subLength);
sl@0	286
sl@0	287	/**
sl@0	288	* Find the last occurrence of a BMP code point in a string.
sl@0	289	* A surrogate code point is found only if its match in the text is not
sl@0	290	* part of a surrogate pair.
sl@0	291	* A NUL character is found at the string terminator.
sl@0	292	*
sl@0	293	* @param s The string to search (NUL-terminated).
sl@0	294	* @param c The BMP code point to find.
sl@0	295	* @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
sl@0	296	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
sl@0	297	* @stable ICU 2.4
sl@0	298	*
sl@0	299	* @see u_strrchr32
sl@0	300	* @see u_memrchr
sl@0	301	* @see u_strrstr
sl@0	302	* @see u_strFindLast
sl@0	303	*/
sl@0	304	U_STABLE UChar * U_EXPORT2
sl@0	305	u_strrchr(const UChar *s, UChar c);
sl@0	306
sl@0	307	/**
sl@0	308	* Find the last occurrence of a code point in a string.
sl@0	309	* A surrogate code point is found only if its match in the text is not
sl@0	310	* part of a surrogate pair.
sl@0	311	* A NUL character is found at the string terminator.
sl@0	312	*
sl@0	313	* @param s The string to search (NUL-terminated).
sl@0	314	* @param c The code point to find.
sl@0	315	* @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
sl@0	316	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
sl@0	317	* @stable ICU 2.4
sl@0	318	*
sl@0	319	* @see u_strrchr
sl@0	320	* @see u_memchr32
sl@0	321	* @see u_strrstr
sl@0	322	* @see u_strFindLast
sl@0	323	*/
sl@0	324	U_STABLE UChar * U_EXPORT2
sl@0	325	u_strrchr32(const UChar *s, UChar32 c);
sl@0	326
sl@0	327	/**
sl@0	328	* Locates the first occurrence in the string <code>string</code> of any of the characters
sl@0	329	* in the string <code>matchSet</code>.
sl@0	330	* Works just like C's strpbrk but with Unicode.
sl@0	331	*
sl@0	332	* @param string The string in which to search, NUL-terminated.
sl@0	333	* @param matchSet A NUL-terminated string defining a set of code points
sl@0	334	* for which to search in the text string.
sl@0	335	* @return A pointer to the character in <code>string</code> that matches one of the
sl@0	336	* characters in <code>matchSet</code>, or NULL if no such character is found.
sl@0	337	* @stable ICU 2.0
sl@0	338	*/
sl@0	339	U_STABLE UChar * U_EXPORT2
sl@0	340	u_strpbrk(const UChar string, const UChar matchSet);
sl@0	341
sl@0	342	/**
sl@0	343	* Returns the number of consecutive characters in <code>string</code>,
sl@0	344	* beginning with the first, that do not occur somewhere in <code>matchSet</code>.
sl@0	345	* Works just like C's strcspn but with Unicode.
sl@0	346	*
sl@0	347	* @param string The string in which to search, NUL-terminated.
sl@0	348	* @param matchSet A NUL-terminated string defining a set of code points
sl@0	349	* for which to search in the text string.
sl@0	350	* @return The number of initial characters in <code>string</code> that do not
sl@0	351	* occur in <code>matchSet</code>.
sl@0	352	* @see u_strspn
sl@0	353	* @stable ICU 2.0
sl@0	354	*/
sl@0	355	U_STABLE int32_t U_EXPORT2
sl@0	356	u_strcspn(const UChar string, const UChar matchSet);
sl@0	357
sl@0	358	/**
sl@0	359	* Returns the number of consecutive characters in <code>string</code>,
sl@0	360	* beginning with the first, that occur somewhere in <code>matchSet</code>.
sl@0	361	* Works just like C's strspn but with Unicode.
sl@0	362	*
sl@0	363	* @param string The string in which to search, NUL-terminated.
sl@0	364	* @param matchSet A NUL-terminated string defining a set of code points
sl@0	365	* for which to search in the text string.
sl@0	366	* @return The number of initial characters in <code>string</code> that do
sl@0	367	* occur in <code>matchSet</code>.
sl@0	368	* @see u_strcspn
sl@0	369	* @stable ICU 2.0
sl@0	370	*/
sl@0	371	U_STABLE int32_t U_EXPORT2
sl@0	372	u_strspn(const UChar string, const UChar matchSet);
sl@0	373
sl@0	374	/**
sl@0	375	* The string tokenizer API allows an application to break a string into
sl@0	376	* tokens. Unlike strtok(), the saveState (the current pointer within the
sl@0	377	* original string) is maintained in saveState. In the first call, the
sl@0	378	* argument src is a pointer to the string. In subsequent calls to
sl@0	379	* return successive tokens of that string, src must be specified as
sl@0	380	* NULL. The value saveState is set by this function to maintain the
sl@0	381	* function's position within the string, and on each subsequent call
sl@0	382	* you must give this argument the same variable. This function does
sl@0	383	* handle surrogate pairs. This function is similar to the strtok_r()
sl@0	384	* the POSIX Threads Extension (1003.1c-1995) version.
sl@0	385	*
sl@0	386	* @param src String containing token(s). This string will be modified.
sl@0	387	* After the first call to u_strtok_r(), this argument must
sl@0	388	* be NULL to get to the next token.
sl@0	389	* @param delim Set of delimiter characters (Unicode code points).
sl@0	390	* @param saveState The current pointer within the original string,
sl@0	391	* which is set by this function. The saveState
sl@0	392	* parameter should the address of a local variable of type
sl@0	393	* UChar . (i.e. defined "Uhar myLocalSaveState" and use
sl@0	394	* &myLocalSaveState for this parameter).
sl@0	395	* @return A pointer to the next token found in src, or NULL
sl@0	396	* when there are no more tokens.
sl@0	397	* @stable ICU 2.0
sl@0	398	*/
sl@0	399	U_STABLE UChar * U_EXPORT2
sl@0	400	u_strtok_r(UChar *src,
sl@0	401	const UChar *delim,
sl@0	402	UChar **saveState);
sl@0	403
sl@0	404	/**
sl@0	405	* Compare two Unicode strings for bitwise equality (code unit order).
sl@0	406	*
sl@0	407	* @param s1 A string to compare.
sl@0	408	* @param s2 A string to compare.
sl@0	409	* @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
sl@0	410	* value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive
sl@0	411	* value if <code>s1</code> is bitwise greater than <code>s2</code>.
sl@0	412	* @stable ICU 2.0
sl@0	413	*/
sl@0	414	U_STABLE int32_t U_EXPORT2
sl@0	415	u_strcmp(const UChar *s1,
sl@0	416	const UChar *s2);
sl@0	417
sl@0	418	/**
sl@0	419	* Compare two Unicode strings in code point order.
sl@0	420	* See u_strCompare for details.
sl@0	421	*
sl@0	422	* @param s1 A string to compare.
sl@0	423	* @param s2 A string to compare.
sl@0	424	* @return a negative/zero/positive integer corresponding to whether
sl@0	425	* the first string is less than/equal to/greater than the second one
sl@0	426	* in code point order
sl@0	427	* @stable ICU 2.0
sl@0	428	*/
sl@0	429	U_STABLE int32_t U_EXPORT2
sl@0	430	u_strcmpCodePointOrder(const UChar s1, const UChar s2);
sl@0	431
sl@0	432	/**
sl@0	433	* Compare two Unicode strings (binary order).
sl@0	434	*
sl@0	435	* The comparison can be done in code unit order or in code point order.
sl@0	436	* They differ only in UTF-16 when
sl@0	437	* comparing supplementary code points (U+10000..U+10ffff)
sl@0	438	* to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
sl@0	439	* In code unit order, high BMP code points sort after supplementary code points
sl@0	440	* because they are stored as pairs of surrogates which are at U+d800..U+dfff.
sl@0	441	*
sl@0	442	* This functions works with strings of different explicitly specified lengths
sl@0	443	* unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
sl@0	444	* NUL-terminated strings are possible with length arguments of -1.
sl@0	445	*
sl@0	446	* @param s1 First source string.
sl@0	447	* @param length1 Length of first source string, or -1 if NUL-terminated.
sl@0	448	*
sl@0	449	* @param s2 Second source string.
sl@0	450	* @param length2 Length of second source string, or -1 if NUL-terminated.
sl@0	451	*
sl@0	452	* @param codePointOrder Choose between code unit order (FALSE)
sl@0	453	* and code point order (TRUE).
sl@0	454	*
sl@0	455	* @return <0 or 0 or >0 as usual for string comparisons
sl@0	456	*
sl@0	457	* @stable ICU 2.2
sl@0	458	*/
sl@0	459	U_STABLE int32_t U_EXPORT2
sl@0	460	u_strCompare(const UChar *s1, int32_t length1,
sl@0	461	const UChar *s2, int32_t length2,
sl@0	462	UBool codePointOrder);
sl@0	463
sl@0	464	/**
sl@0	465	* Compare two Unicode strings (binary order)
sl@0	466	* as presented by UCharIterator objects.
sl@0	467	* Works otherwise just like u_strCompare().
sl@0	468	*
sl@0	469	* Both iterators are reset to their start positions.
sl@0	470	* When the function returns, it is undefined where the iterators
sl@0	471	* have stopped.
sl@0	472	*
sl@0	473	* @param iter1 First source string iterator.
sl@0	474	* @param iter2 Second source string iterator.
sl@0	475	* @param codePointOrder Choose between code unit order (FALSE)
sl@0	476	* and code point order (TRUE).
sl@0	477	*
sl@0	478	* @return <0 or 0 or >0 as usual for string comparisons
sl@0	479	*
sl@0	480	* @see u_strCompare
sl@0	481	*
sl@0	482	* @stable ICU 2.6
sl@0	483	*/
sl@0	484	U_STABLE int32_t U_EXPORT2
sl@0	485	u_strCompareIter(UCharIterator iter1, UCharIterator iter2, UBool codePointOrder);
sl@0	486
sl@0	487	#ifndef U_COMPARE_CODE_POINT_ORDER
sl@0	488	/* see also unistr.h and unorm.h */
sl@0	489	/**
sl@0	490	* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
sl@0	491	* Compare strings in code point order instead of code unit order.
sl@0	492	* @stable ICU 2.2
sl@0	493	*/
sl@0	494	#define U_COMPARE_CODE_POINT_ORDER 0x8000
sl@0	495	#endif
sl@0	496
sl@0	497	/**
sl@0	498	* Compare two strings case-insensitively using full case folding.
sl@0	499	* This is equivalent to
sl@0	500	* u_strCompare(u_strFoldCase(s1, options),
sl@0	501	* u_strFoldCase(s2, options),
sl@0	502	* (options&U_COMPARE_CODE_POINT_ORDER)!=0).
sl@0	503	*
sl@0	504	* The comparison can be done in UTF-16 code unit order or in code point order.
sl@0	505	* They differ only when comparing supplementary code points (U+10000..U+10ffff)
sl@0	506	* to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
sl@0	507	* In code unit order, high BMP code points sort after supplementary code points
sl@0	508	* because they are stored as pairs of surrogates which are at U+d800..U+dfff.
sl@0	509	*
sl@0	510	* This functions works with strings of different explicitly specified lengths
sl@0	511	* unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
sl@0	512	* NUL-terminated strings are possible with length arguments of -1.
sl@0	513	*
sl@0	514	* @param s1 First source string.
sl@0	515	* @param length1 Length of first source string, or -1 if NUL-terminated.
sl@0	516	*
sl@0	517	* @param s2 Second source string.
sl@0	518	* @param length2 Length of second source string, or -1 if NUL-terminated.
sl@0	519	*
sl@0	520	* @param options A bit set of options:
sl@0	521	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
sl@0	522	* Comparison in code unit order with default case folding.
sl@0	523	*
sl@0	524	* - U_COMPARE_CODE_POINT_ORDER
sl@0	525	* Set to choose code point order instead of code unit order
sl@0	526	* (see u_strCompare for details).
sl@0	527	*
sl@0	528	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
sl@0	529	*
sl@0	530	* @param pErrorCode Must be a valid pointer to an error code value,
sl@0	531	* which must not indicate a failure before the function call.
sl@0	532	*
sl@0	533	* @return <0 or 0 or >0 as usual for string comparisons
sl@0	534	*
sl@0	535	* @stable ICU 2.2
sl@0	536	*/
sl@0	537	U_STABLE int32_t U_EXPORT2
sl@0	538	u_strCaseCompare(const UChar *s1, int32_t length1,
sl@0	539	const UChar *s2, int32_t length2,
sl@0	540	uint32_t options,
sl@0	541	UErrorCode *pErrorCode);
sl@0	542
sl@0	543	/**
sl@0	544	* Compare two ustrings for bitwise equality.
sl@0	545	* Compares at most <code>n</code> characters.
sl@0	546	*
sl@0	547	* @param ucs1 A string to compare.
sl@0	548	* @param ucs2 A string to compare.
sl@0	549	* @param n The maximum number of characters to compare.
sl@0	550	* @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
sl@0	551	* value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
sl@0	552	* value if <code>s1</code> is bitwise greater than <code>s2</code>.
sl@0	553	* @stable ICU 2.0
sl@0	554	*/
sl@0	555	U_STABLE int32_t U_EXPORT2
sl@0	556	u_strncmp(const UChar *ucs1,
sl@0	557	const UChar *ucs2,
sl@0	558	int32_t n);
sl@0	559
sl@0	560	/**
sl@0	561	* Compare two Unicode strings in code point order.
sl@0	562	* This is different in UTF-16 from u_strncmp() if supplementary characters are present.
sl@0	563	* For details, see u_strCompare().
sl@0	564	*
sl@0	565	* @param s1 A string to compare.
sl@0	566	* @param s2 A string to compare.
sl@0	567	* @param n The maximum number of characters to compare.
sl@0	568	* @return a negative/zero/positive integer corresponding to whether
sl@0	569	* the first string is less than/equal to/greater than the second one
sl@0	570	* in code point order
sl@0	571	* @stable ICU 2.0
sl@0	572	*/
sl@0	573	U_STABLE int32_t U_EXPORT2
sl@0	574	u_strncmpCodePointOrder(const UChar s1, const UChar s2, int32_t n);
sl@0	575
sl@0	576	/**
sl@0	577	* Compare two strings case-insensitively using full case folding.
sl@0	578	* This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
sl@0	579	*
sl@0	580	* @param s1 A string to compare.
sl@0	581	* @param s2 A string to compare.
sl@0	582	* @param options A bit set of options:
sl@0	583	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
sl@0	584	* Comparison in code unit order with default case folding.
sl@0	585	*
sl@0	586	* - U_COMPARE_CODE_POINT_ORDER
sl@0	587	* Set to choose code point order instead of code unit order
sl@0	588	* (see u_strCompare for details).
sl@0	589	*
sl@0	590	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
sl@0	591	*
sl@0	592	* @return A negative, zero, or positive integer indicating the comparison result.
sl@0	593	* @stable ICU 2.0
sl@0	594	*/
sl@0	595	U_STABLE int32_t U_EXPORT2
sl@0	596	u_strcasecmp(const UChar s1, const UChar s2, uint32_t options);
sl@0	597
sl@0	598	/**
sl@0	599	* Compare two strings case-insensitively using full case folding.
sl@0	600	* This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
sl@0	601	* u_strFoldCase(s2, at most n, options)).
sl@0	602	*
sl@0	603	* @param s1 A string to compare.
sl@0	604	* @param s2 A string to compare.
sl@0	605	* @param n The maximum number of characters each string to case-fold and then compare.
sl@0	606	* @param options A bit set of options:
sl@0	607	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
sl@0	608	* Comparison in code unit order with default case folding.
sl@0	609	*
sl@0	610	* - U_COMPARE_CODE_POINT_ORDER
sl@0	611	* Set to choose code point order instead of code unit order
sl@0	612	* (see u_strCompare for details).
sl@0	613	*
sl@0	614	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
sl@0	615	*
sl@0	616	* @return A negative, zero, or positive integer indicating the comparison result.
sl@0	617	* @stable ICU 2.0
sl@0	618	*/
sl@0	619	U_STABLE int32_t U_EXPORT2
sl@0	620	u_strncasecmp(const UChar s1, const UChar s2, int32_t n, uint32_t options);
sl@0	621
sl@0	622	/**
sl@0	623	* Compare two strings case-insensitively using full case folding.
sl@0	624	* This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
sl@0	625	* u_strFoldCase(s2, n, options)).
sl@0	626	*
sl@0	627	* @param s1 A string to compare.
sl@0	628	* @param s2 A string to compare.
sl@0	629	* @param length The number of characters in each string to case-fold and then compare.
sl@0	630	* @param options A bit set of options:
sl@0	631	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
sl@0	632	* Comparison in code unit order with default case folding.
sl@0	633	*
sl@0	634	* - U_COMPARE_CODE_POINT_ORDER
sl@0	635	* Set to choose code point order instead of code unit order
sl@0	636	* (see u_strCompare for details).
sl@0	637	*
sl@0	638	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
sl@0	639	*
sl@0	640	* @return A negative, zero, or positive integer indicating the comparison result.
sl@0	641	* @stable ICU 2.0
sl@0	642	*/
sl@0	643	U_STABLE int32_t U_EXPORT2
sl@0	644	u_memcasecmp(const UChar s1, const UChar s2, int32_t length, uint32_t options);
sl@0	645
sl@0	646	/**
sl@0	647	* Copy a ustring. Adds a null terminator.
sl@0	648	*
sl@0	649	* @param dst The destination string.
sl@0	650	* @param src The source string.
sl@0	651	* @return A pointer to <code>dst</code>.
sl@0	652	* @stable ICU 2.0
sl@0	653	*/
sl@0	654	U_STABLE UChar* U_EXPORT2
sl@0	655	u_strcpy(UChar *dst,
sl@0	656	const UChar *src);
sl@0	657
sl@0	658	/**
sl@0	659	* Copy a ustring.
sl@0	660	* Copies at most <code>n</code> characters. The result will be null terminated
sl@0	661	* if the length of <code>src</code> is less than <code>n</code>.
sl@0	662	*
sl@0	663	* @param dst The destination string.
sl@0	664	* @param src The source string.
sl@0	665	* @param n The maximum number of characters to copy.
sl@0	666	* @return A pointer to <code>dst</code>.
sl@0	667	* @stable ICU 2.0
sl@0	668	*/
sl@0	669	U_STABLE UChar* U_EXPORT2
sl@0	670	u_strncpy(UChar *dst,
sl@0	671	const UChar *src,
sl@0	672	int32_t n);
sl@0	673
sl@0	674	#if !UCONFIG_NO_CONVERSION
sl@0	675
sl@0	676	/**
sl@0	677	* Copy a byte string encoded in the default codepage to a ustring.
sl@0	678	* Adds a null terminator.
sl@0	679	* Performs a host byte to UChar conversion
sl@0	680	*
sl@0	681	* @param dst The destination string.
sl@0	682	* @param src The source string.
sl@0	683	* @return A pointer to <code>dst</code>.
sl@0	684	* @stable ICU 2.0
sl@0	685	*/
sl@0	686	U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
sl@0	687	const char *src );
sl@0	688
sl@0	689	/**
sl@0	690	* Copy a byte string encoded in the default codepage to a ustring.
sl@0	691	* Copies at most <code>n</code> characters. The result will be null terminated
sl@0	692	* if the length of <code>src</code> is less than <code>n</code>.
sl@0	693	* Performs a host byte to UChar conversion
sl@0	694	*
sl@0	695	* @param dst The destination string.
sl@0	696	* @param src The source string.
sl@0	697	* @param n The maximum number of characters to copy.
sl@0	698	* @return A pointer to <code>dst</code>.
sl@0	699	* @stable ICU 2.0
sl@0	700	*/
sl@0	701	U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
sl@0	702	const char *src,
sl@0	703	int32_t n);
sl@0	704
sl@0	705	/**
sl@0	706	* Copy ustring to a byte string encoded in the default codepage.
sl@0	707	* Adds a null terminator.
sl@0	708	* Performs a UChar to host byte conversion
sl@0	709	*
sl@0	710	* @param dst The destination string.
sl@0	711	* @param src The source string.
sl@0	712	* @return A pointer to <code>dst</code>.
sl@0	713	* @stable ICU 2.0
sl@0	714	*/
sl@0	715	U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
sl@0	716	const UChar *src );
sl@0	717
sl@0	718	/**
sl@0	719	* Copy ustring to a byte string encoded in the default codepage.
sl@0	720	* Copies at most <code>n</code> characters. The result will be null terminated
sl@0	721	* if the length of <code>src</code> is less than <code>n</code>.
sl@0	722	* Performs a UChar to host byte conversion
sl@0	723	*
sl@0	724	* @param dst The destination string.
sl@0	725	* @param src The source string.
sl@0	726	* @param n The maximum number of characters to copy.
sl@0	727	* @return A pointer to <code>dst</code>.
sl@0	728	* @stable ICU 2.0
sl@0	729	*/
sl@0	730	U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
sl@0	731	const UChar *src,
sl@0	732	int32_t n );
sl@0	733
sl@0	734	#endif
sl@0	735
sl@0	736	/**
sl@0	737	* Synonym for memcpy(), but with UChars only.
sl@0	738	* @param dest The destination string
sl@0	739	* @param src The source string
sl@0	740	* @param count The number of characters to copy
sl@0	741	* @return A pointer to <code>dest</code>
sl@0	742	* @stable ICU 2.0
sl@0	743	*/
sl@0	744	U_STABLE UChar* U_EXPORT2
sl@0	745	u_memcpy(UChar dest, const UChar src, int32_t count);
sl@0	746
sl@0	747	/**
sl@0	748	* Synonym for memmove(), but with UChars only.
sl@0	749	* @param dest The destination string
sl@0	750	* @param src The source string
sl@0	751	* @param count The number of characters to move
sl@0	752	* @return A pointer to <code>dest</code>
sl@0	753	* @stable ICU 2.0
sl@0	754	*/
sl@0	755	U_STABLE UChar* U_EXPORT2
sl@0	756	u_memmove(UChar dest, const UChar src, int32_t count);
sl@0	757
sl@0	758	/**
sl@0	759	* Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.
sl@0	760	*
sl@0	761	* @param dest The destination string.
sl@0	762	* @param c The character to initialize the string.
sl@0	763	* @param count The maximum number of characters to set.
sl@0	764	* @return A pointer to <code>dest</code>.
sl@0	765	* @stable ICU 2.0
sl@0	766	*/
sl@0	767	U_STABLE UChar* U_EXPORT2
sl@0	768	u_memset(UChar *dest, UChar c, int32_t count);
sl@0	769
sl@0	770	/**
sl@0	771	* Compare the first <code>count</code> UChars of each buffer.
sl@0	772	*
sl@0	773	* @param buf1 The first string to compare.
sl@0	774	* @param buf2 The second string to compare.
sl@0	775	* @param count The maximum number of UChars to compare.
sl@0	776	* @return When buf1 < buf2, a negative number is returned.
sl@0	777	* When buf1 == buf2, 0 is returned.
sl@0	778	* When buf1 > buf2, a positive number is returned.
sl@0	779	* @stable ICU 2.0
sl@0	780	*/
sl@0	781	U_STABLE int32_t U_EXPORT2
sl@0	782	u_memcmp(const UChar buf1, const UChar buf2, int32_t count);
sl@0	783
sl@0	784	/**
sl@0	785	* Compare two Unicode strings in code point order.
sl@0	786	* This is different in UTF-16 from u_memcmp() if supplementary characters are present.
sl@0	787	* For details, see u_strCompare().
sl@0	788	*
sl@0	789	* @param s1 A string to compare.
sl@0	790	* @param s2 A string to compare.
sl@0	791	* @param count The maximum number of characters to compare.
sl@0	792	* @return a negative/zero/positive integer corresponding to whether
sl@0	793	* the first string is less than/equal to/greater than the second one
sl@0	794	* in code point order
sl@0	795	* @stable ICU 2.0
sl@0	796	*/
sl@0	797	U_STABLE int32_t U_EXPORT2
sl@0	798	u_memcmpCodePointOrder(const UChar s1, const UChar s2, int32_t count);
sl@0	799
sl@0	800	/**
sl@0	801	* Find the first occurrence of a BMP code point in a string.
sl@0	802	* A surrogate code point is found only if its match in the text is not
sl@0	803	* part of a surrogate pair.
sl@0	804	* A NUL character is found at the string terminator.
sl@0	805	*
sl@0	806	* @param s The string to search (contains <code>count</code> UChars).
sl@0	807	* @param c The BMP code point to find.
sl@0	808	* @param count The length of the string.
sl@0	809	* @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
sl@0	810	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
sl@0	811	* @stable ICU 2.0
sl@0	812	*
sl@0	813	* @see u_strchr
sl@0	814	* @see u_memchr32
sl@0	815	* @see u_strFindFirst
sl@0	816	*/
sl@0	817	U_STABLE UChar* U_EXPORT2
sl@0	818	u_memchr(const UChar *s, UChar c, int32_t count);
sl@0	819
sl@0	820	/**
sl@0	821	* Find the first occurrence of a code point in a string.
sl@0	822	* A surrogate code point is found only if its match in the text is not
sl@0	823	* part of a surrogate pair.
sl@0	824	* A NUL character is found at the string terminator.
sl@0	825	*
sl@0	826	* @param s The string to search (contains <code>count</code> UChars).
sl@0	827	* @param c The code point to find.
sl@0	828	* @param count The length of the string.
sl@0	829	* @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
sl@0	830	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
sl@0	831	* @stable ICU 2.0
sl@0	832	*
sl@0	833	* @see u_strchr32
sl@0	834	* @see u_memchr
sl@0	835	* @see u_strFindFirst
sl@0	836	*/
sl@0	837	U_STABLE UChar* U_EXPORT2
sl@0	838	u_memchr32(const UChar *s, UChar32 c, int32_t count);
sl@0	839
sl@0	840	/**
sl@0	841	* Find the last occurrence of a BMP code point in a string.
sl@0	842	* A surrogate code point is found only if its match in the text is not
sl@0	843	* part of a surrogate pair.
sl@0	844	* A NUL character is found at the string terminator.
sl@0	845	*
sl@0	846	* @param s The string to search (contains <code>count</code> UChars).
sl@0	847	* @param c The BMP code point to find.
sl@0	848	* @param count The length of the string.
sl@0	849	* @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
sl@0	850	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
sl@0	851	* @stable ICU 2.4
sl@0	852	*
sl@0	853	* @see u_strrchr
sl@0	854	* @see u_memrchr32
sl@0	855	* @see u_strFindLast
sl@0	856	*/
sl@0	857	U_STABLE UChar* U_EXPORT2
sl@0	858	u_memrchr(const UChar *s, UChar c, int32_t count);
sl@0	859
sl@0	860	/**
sl@0	861	* Find the last occurrence of a code point in a string.
sl@0	862	* A surrogate code point is found only if its match in the text is not
sl@0	863	* part of a surrogate pair.
sl@0	864	* A NUL character is found at the string terminator.
sl@0	865	*
sl@0	866	* @param s The string to search (contains <code>count</code> UChars).
sl@0	867	* @param c The code point to find.
sl@0	868	* @param count The length of the string.
sl@0	869	* @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
sl@0	870	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
sl@0	871	* @stable ICU 2.4
sl@0	872	*
sl@0	873	* @see u_strrchr32
sl@0	874	* @see u_memrchr
sl@0	875	* @see u_strFindLast
sl@0	876	*/
sl@0	877	U_STABLE UChar* U_EXPORT2
sl@0	878	u_memrchr32(const UChar *s, UChar32 c, int32_t count);
sl@0	879
sl@0	880	/**
sl@0	881	* Unicode String literals in C.
sl@0	882	* We need one macro to declare a variable for the string
sl@0	883	* and to statically preinitialize it if possible,
sl@0	884	* and a second macro to dynamically intialize such a string variable if necessary.
sl@0	885	*
sl@0	886	* The macros are defined for maximum performance.
sl@0	887	* They work only for strings that contain "invariant characters", i.e.,
sl@0	888	* only latin letters, digits, and some punctuation.
sl@0	889	* See utypes.h for details.
sl@0	890	*
sl@0	891	* A pair of macros for a single string must be used with the same
sl@0	892	* parameters.
sl@0	893	* The string parameter must be a C string literal.
sl@0	894	* The length of the string, not including the terminating
sl@0	895	* <code>NUL</code>, must be specified as a constant.
sl@0	896	* The U_STRING_DECL macro should be invoked exactly once for one
sl@0	897	* such string variable before it is used.
sl@0	898	*
sl@0	899	* Usage:
sl@0	900	* <pre>
sl@0	901	* U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
sl@0	902	* U_STRING_DECL(ustringVar2, "jumps 5%", 8);
sl@0	903	* static UBool didInit=FALSE;
sl@0	904	*
sl@0	905	* int32_t function() {
sl@0	906	* if(!didInit) {
sl@0	907	* U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
sl@0	908	* U_STRING_INIT(ustringVar2, "jumps 5%", 8);
sl@0	909	* didInit=TRUE;
sl@0	910	* }
sl@0	911	* return u_strcmp(ustringVar1, ustringVar2);
sl@0	912	* }
sl@0	913	* </pre>
sl@0	914	* @stable ICU 2.0
sl@0	915	*/
sl@0	916	#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY \|\| (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
sl@0	917	# define U_STRING_DECL(var, cs, length) static const wchar_t var[(length)+1]={ L ## cs }
sl@0	918	/*@stable ICU 2.0 /
sl@0	919	# define U_STRING_INIT(var, cs, length)
sl@0	920	#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
sl@0	921	# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (const UChar *)cs }
sl@0	922	/*@stable ICU 2.0 /
sl@0	923	# define U_STRING_INIT(var, cs, length)
sl@0	924	#else
sl@0	925	# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
sl@0	926	/*@stable ICU 2.0 /
sl@0	927	# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
sl@0	928	#endif
sl@0	929
sl@0	930	/**
sl@0	931	* Unescape a string of characters and write the resulting
sl@0	932	* Unicode characters to the destination buffer. The following escape
sl@0	933	* sequences are recognized:
sl@0	934	*
sl@0	935	* \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
sl@0	936	* \\Uhhhhhhhh 8 hex digits
sl@0	937	* \\xhh 1-2 hex digits
sl@0	938	* \\x{h...} 1-8 hex digits
sl@0	939	* \\ooo 1-3 octal digits; o in [0-7]
sl@0	940	* \\cX control-X; X is masked with 0x1F
sl@0	941	*
sl@0	942	* as well as the standard ANSI C escapes:
sl@0	943	*
sl@0	944	* \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
sl@0	945	* \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
sl@0	946	* \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
sl@0	947	*
sl@0	948	* Anything else following a backslash is generically escaped. For
sl@0	949	* example, "[a\\-z]" returns "[a-z]".
sl@0	950	*
sl@0	951	* If an escape sequence is ill-formed, this method returns an empty
sl@0	952	* string. An example of an ill-formed sequence is "\\u" followed by
sl@0	953	* fewer than 4 hex digits.
sl@0	954	*
sl@0	955	* The above characters are recognized in the compiler's codepage,
sl@0	956	* that is, they are coded as 'u', '\\', etc. Characters that are
sl@0	957	* not parts of escape sequences are converted using u_charsToUChars().
sl@0	958	*
sl@0	959	* This function is similar to UnicodeString::unescape() but not
sl@0	960	* identical to it. The latter takes a source UnicodeString, so it
sl@0	961	* does escape recognition but no conversion.
sl@0	962	*
sl@0	963	* @param src a zero-terminated string of invariant characters
sl@0	964	* @param dest pointer to buffer to receive converted and unescaped
sl@0	965	* text and, if there is room, a zero terminator. May be NULL for
sl@0	966	* preflighting, in which case no UChars will be written, but the
sl@0	967	* return value will still be valid. On error, an empty string is
sl@0	968	* stored here (if possible).
sl@0	969	* @param destCapacity the number of UChars that may be written at
sl@0	970	* dest. Ignored if dest == NULL.
sl@0	971	* @return the length of unescaped string.
sl@0	972	* @see u_unescapeAt
sl@0	973	* @see UnicodeString#unescape()
sl@0	974	* @see UnicodeString#unescapeAt()
sl@0	975	* @stable ICU 2.0
sl@0	976	*/
sl@0	977	U_STABLE int32_t U_EXPORT2
sl@0	978	u_unescape(const char *src,
sl@0	979	UChar *dest, int32_t destCapacity);
sl@0	980
sl@0	981	U_CDECL_BEGIN
sl@0	982	/**
sl@0	983	* Callback function for u_unescapeAt() that returns a character of
sl@0	984	* the source text given an offset and a context pointer. The context
sl@0	985	* pointer will be whatever is passed into u_unescapeAt().
sl@0	986	*
sl@0	987	* @param offset pointer to the offset that will be passed to u_unescapeAt().
sl@0	988	* @param context an opaque pointer passed directly into u_unescapeAt()
sl@0	989	* @return the character represented by the escape sequence at
sl@0	990	* offset
sl@0	991	* @see u_unescapeAt
sl@0	992	* @stable ICU 2.0
sl@0	993	*/
sl@0	994	typedef UChar (U_CALLCONV UNESCAPE_CHAR_AT)(int32_t offset, void context);
sl@0	995	U_CDECL_END
sl@0	996
sl@0	997	/**
sl@0	998	* Unescape a single sequence. The character at offset-1 is assumed
sl@0	999	* (without checking) to be a backslash. This method takes a callback
sl@0	1000	* pointer to a function that returns the UChar at a given offset. By
sl@0	1001	* varying this callback, ICU functions are able to unescape char*
sl@0	1002	* strings, UnicodeString objects, and UFILE pointers.
sl@0	1003	*
sl@0	1004	* If offset is out of range, or if the escape sequence is ill-formed,
sl@0	1005	* (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape()
sl@0	1006	* for a list of recognized sequences.
sl@0	1007	*
sl@0	1008	* @param charAt callback function that returns a UChar of the source
sl@0	1009	* text given an offset and a context pointer.
sl@0	1010	* @param offset pointer to the offset that will be passed to charAt.
sl@0	1011	* The offset value will be updated upon return to point after the
sl@0	1012	* last parsed character of the escape sequence. On error the offset
sl@0	1013	* is unchanged.
sl@0	1014	* @param length the number of characters in the source text. The
sl@0	1015	* last character of the source text is considered to be at offset
sl@0	1016	* length-1.
sl@0	1017	* @param context an opaque pointer passed directly into charAt.
sl@0	1018	* @return the character represented by the escape sequence at
sl@0	1019	* offset, or (UChar32)0xFFFFFFFF on error.
sl@0	1020	* @see u_unescape()
sl@0	1021	* @see UnicodeString#unescape()
sl@0	1022	* @see UnicodeString#unescapeAt()
sl@0	1023	* @stable ICU 2.0
sl@0	1024	*/
sl@0	1025	U_STABLE UChar32 U_EXPORT2
sl@0	1026	u_unescapeAt(UNESCAPE_CHAR_AT charAt,
sl@0	1027	int32_t *offset,
sl@0	1028	int32_t length,
sl@0	1029	void *context);
sl@0	1030
sl@0	1031	/**
sl@0	1032	* Uppercase the characters in a string.
sl@0	1033	* Casing is locale-dependent and context-sensitive.
sl@0	1034	* The result may be longer or shorter than the original.
sl@0	1035	* The source string and the destination buffer are allowed to overlap.
sl@0	1036	*
sl@0	1037	* @param dest A buffer for the result string. The result will be zero-terminated if
sl@0	1038	* the buffer is large enough.
sl@0	1039	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
sl@0	1040	* dest may be NULL and the function will only return the length of the result
sl@0	1041	* without writing any of the result string.
sl@0	1042	* @param src The original string
sl@0	1043	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
sl@0	1044	* @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
sl@0	1045	* @param pErrorCode Must be a valid pointer to an error code value,
sl@0	1046	* which must not indicate a failure before the function call.
sl@0	1047	* @return The length of the result string. It may be greater than destCapacity. In that case,
sl@0	1048	* only some of the result was written to the destination buffer.
sl@0	1049	* @stable ICU 2.0
sl@0	1050	*/
sl@0	1051	U_STABLE int32_t U_EXPORT2
sl@0	1052	u_strToUpper(UChar *dest, int32_t destCapacity,
sl@0	1053	const UChar *src, int32_t srcLength,
sl@0	1054	const char *locale,
sl@0	1055	UErrorCode *pErrorCode);
sl@0	1056
sl@0	1057	/**
sl@0	1058	* Lowercase the characters in a string.
sl@0	1059	* Casing is locale-dependent and context-sensitive.
sl@0	1060	* The result may be longer or shorter than the original.
sl@0	1061	* The source string and the destination buffer are allowed to overlap.
sl@0	1062	*
sl@0	1063	* @param dest A buffer for the result string. The result will be zero-terminated if
sl@0	1064	* the buffer is large enough.
sl@0	1065	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
sl@0	1066	* dest may be NULL and the function will only return the length of the result
sl@0	1067	* without writing any of the result string.
sl@0	1068	* @param src The original string
sl@0	1069	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
sl@0	1070	* @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
sl@0	1071	* @param pErrorCode Must be a valid pointer to an error code value,
sl@0	1072	* which must not indicate a failure before the function call.
sl@0	1073	* @return The length of the result string. It may be greater than destCapacity. In that case,
sl@0	1074	* only some of the result was written to the destination buffer.
sl@0	1075	* @stable ICU 2.0
sl@0	1076	*/
sl@0	1077	U_STABLE int32_t U_EXPORT2
sl@0	1078	u_strToLower(UChar *dest, int32_t destCapacity,
sl@0	1079	const UChar *src, int32_t srcLength,
sl@0	1080	const char *locale,
sl@0	1081	UErrorCode *pErrorCode);
sl@0	1082
sl@0	1083	#if !UCONFIG_NO_BREAK_ITERATION
sl@0	1084
sl@0	1085	/**
sl@0	1086	* Titlecase a string.
sl@0	1087	* Casing is locale-dependent and context-sensitive.
sl@0	1088	* Titlecasing uses a break iterator to find the first characters of words
sl@0	1089	* that are to be titlecased. It titlecases those characters and lowercases
sl@0	1090	* all others.
sl@0	1091	*
sl@0	1092	* The titlecase break iterator can be provided to customize for arbitrary
sl@0	1093	* styles, using rules and dictionaries beyond the standard iterators.
sl@0	1094	* It may be more efficient to always provide an iterator to avoid
sl@0	1095	* opening and closing one for each string.
sl@0	1096	* The standard titlecase iterator for the root locale implements the
sl@0	1097	* algorithm of Unicode TR 21.
sl@0	1098	*
sl@0	1099	* This function uses only the first() and next() methods of the
sl@0	1100	* provided break iterator.
sl@0	1101	*
sl@0	1102	* The result may be longer or shorter than the original.
sl@0	1103	* The source string and the destination buffer are allowed to overlap.
sl@0	1104	*
sl@0	1105	* @param dest A buffer for the result string. The result will be zero-terminated if
sl@0	1106	* the buffer is large enough.
sl@0	1107	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
sl@0	1108	* dest may be NULL and the function will only return the length of the result
sl@0	1109	* without writing any of the result string.
sl@0	1110	* @param src The original string
sl@0	1111	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
sl@0	1112	* @param titleIter A break iterator to find the first characters of words
sl@0	1113	* that are to be titlecased.
sl@0	1114	* If none is provided (NULL), then a standard titlecase
sl@0	1115	* break iterator is opened.
sl@0	1116	* @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
sl@0	1117	* @param pErrorCode Must be a valid pointer to an error code value,
sl@0	1118	* which must not indicate a failure before the function call.
sl@0	1119	* @return The length of the result string. It may be greater than destCapacity. In that case,
sl@0	1120	* only some of the result was written to the destination buffer.
sl@0	1121	* @stable ICU 2.1
sl@0	1122	*/
sl@0	1123	U_STABLE int32_t U_EXPORT2
sl@0	1124	u_strToTitle(UChar *dest, int32_t destCapacity,
sl@0	1125	const UChar *src, int32_t srcLength,
sl@0	1126	UBreakIterator *titleIter,
sl@0	1127	const char *locale,
sl@0	1128	UErrorCode *pErrorCode);
sl@0	1129
sl@0	1130	#endif
sl@0	1131
sl@0	1132	/**
sl@0	1133	* Case-fold the characters in a string.
sl@0	1134	* Case-folding is locale-independent and not context-sensitive,
sl@0	1135	* but there is an option for whether to include or exclude mappings for dotted I
sl@0	1136	* and dotless i that are marked with 'I' in CaseFolding.txt.
sl@0	1137	* The result may be longer or shorter than the original.
sl@0	1138	* The source string and the destination buffer are allowed to overlap.
sl@0	1139	*
sl@0	1140	* @param dest A buffer for the result string. The result will be zero-terminated if
sl@0	1141	* the buffer is large enough.
sl@0	1142	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
sl@0	1143	* dest may be NULL and the function will only return the length of the result
sl@0	1144	* without writing any of the result string.
sl@0	1145	* @param src The original string
sl@0	1146	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
sl@0	1147	* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
sl@0	1148	* @param pErrorCode Must be a valid pointer to an error code value,
sl@0	1149	* which must not indicate a failure before the function call.
sl@0	1150	* @return The length of the result string. It may be greater than destCapacity. In that case,
sl@0	1151	* only some of the result was written to the destination buffer.
sl@0	1152	* @stable ICU 2.0
sl@0	1153	*/
sl@0	1154	U_STABLE int32_t U_EXPORT2
sl@0	1155	u_strFoldCase(UChar *dest, int32_t destCapacity,
sl@0	1156	const UChar *src, int32_t srcLength,
sl@0	1157	uint32_t options,
sl@0	1158	UErrorCode *pErrorCode);
sl@0	1159
sl@0	1160	/**
sl@0	1161	* Converts a sequence of UChars to wchar_t units.
sl@0	1162	*
sl@0	1163	* @param dest A buffer for the result string. The result will be zero-terminated if
sl@0	1164	* the buffer is large enough.
sl@0	1165	* @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then
sl@0	1166	* dest may be NULL and the function will only return the length of the
sl@0	1167	* result without writing any of the result string (pre-flighting).
sl@0	1168	* @param pDestLength A pointer to receive the number of units written to the destination. If
sl@0	1169	* pDestLength!=NULL then *pDestLength is always set to the
sl@0	1170	* number of output units corresponding to the transformation of
sl@0	1171	* all the input units, even in case of a buffer overflow.
sl@0	1172	* @param src The original source string
sl@0	1173	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
sl@0	1174	* @param pErrorCode Must be a valid pointer to an error code value,
sl@0	1175	* which must not indicate a failure before the function call.
sl@0	1176	* @return The pointer to destination buffer.
sl@0	1177	* @stable ICU 2.0
sl@0	1178	*/
sl@0	1179	U_STABLE wchar_t* U_EXPORT2
sl@0	1180	u_strToWCS(wchar_t *dest,
sl@0	1181	int32_t destCapacity,
sl@0	1182	int32_t *pDestLength,
sl@0	1183	const UChar *src,
sl@0	1184	int32_t srcLength,
sl@0	1185	UErrorCode *pErrorCode);
sl@0	1186	/**
sl@0	1187	* Converts a sequence of wchar_t units to UChars
sl@0	1188	*
sl@0	1189	* @param dest A buffer for the result string. The result will be zero-terminated if
sl@0	1190	* the buffer is large enough.
sl@0	1191	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
sl@0	1192	* dest may be NULL and the function will only return the length of the
sl@0	1193	* result without writing any of the result string (pre-flighting).
sl@0	1194	* @param pDestLength A pointer to receive the number of units written to the destination. If
sl@0	1195	* pDestLength!=NULL then *pDestLength is always set to the
sl@0	1196	* number of output units corresponding to the transformation of
sl@0	1197	* all the input units, even in case of a buffer overflow.
sl@0	1198	* @param src The original source string
sl@0	1199	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
sl@0	1200	* @param pErrorCode Must be a valid pointer to an error code value,
sl@0	1201	* which must not indicate a failure before the function call.
sl@0	1202	* @return The pointer to destination buffer.
sl@0	1203	* @stable ICU 2.0
sl@0	1204	*/
sl@0	1205	U_STABLE UChar* U_EXPORT2
sl@0	1206	u_strFromWCS(UChar *dest,
sl@0	1207	int32_t destCapacity,
sl@0	1208	int32_t *pDestLength,
sl@0	1209	const wchar_t *src,
sl@0	1210	int32_t srcLength,
sl@0	1211	UErrorCode *pErrorCode);
sl@0	1212	/**
sl@0	1213	* Converts a sequence of UChars (UTF-16) to UTF-8 bytes
sl@0	1214	*
sl@0	1215	* @param dest A buffer for the result string. The result will be zero-terminated if
sl@0	1216	* the buffer is large enough.
sl@0	1217	* @param destCapacity The size of the buffer (number of chars). If it is 0, then
sl@0	1218	* dest may be NULL and the function will only return the length of the
sl@0	1219	* result without writing any of the result string (pre-flighting).
sl@0	1220	* @param pDestLength A pointer to receive the number of units written to the destination. If
sl@0	1221	* pDestLength!=NULL then *pDestLength is always set to the
sl@0	1222	* number of output units corresponding to the transformation of
sl@0	1223	* all the input units, even in case of a buffer overflow.
sl@0	1224	* @param src The original source string
sl@0	1225	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
sl@0	1226	* @param pErrorCode Must be a valid pointer to an error code value,
sl@0	1227	* which must not indicate a failure before the function call.
sl@0	1228	* @return The pointer to destination buffer.
sl@0	1229	* @stable ICU 2.0
sl@0	1230	*/
sl@0	1231	U_STABLE char* U_EXPORT2
sl@0	1232	u_strToUTF8(char *dest,
sl@0	1233	int32_t destCapacity,
sl@0	1234	int32_t *pDestLength,
sl@0	1235	const UChar *src,
sl@0	1236	int32_t srcLength,
sl@0	1237	UErrorCode *pErrorCode);
sl@0	1238
sl@0	1239	/**
sl@0	1240	* Converts a sequence of UTF-8 bytes to UChars (UTF-16).
sl@0	1241	*
sl@0	1242	* @param dest A buffer for the result string. The result will be zero-terminated if
sl@0	1243	* the buffer is large enough.
sl@0	1244	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
sl@0	1245	* dest may be NULL and the function will only return the length of the
sl@0	1246	* result without writing any of the result string (pre-flighting).
sl@0	1247	* @param pDestLength A pointer to receive the number of units written to the destination. If
sl@0	1248	* pDestLength!=NULL then *pDestLength is always set to the
sl@0	1249	* number of output units corresponding to the transformation of
sl@0	1250	* all the input units, even in case of a buffer overflow.
sl@0	1251	* @param src The original source string
sl@0	1252	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
sl@0	1253	* @param pErrorCode Must be a valid pointer to an error code value,
sl@0	1254	* which must not indicate a failure before the function call.
sl@0	1255	* @return The pointer to destination buffer.
sl@0	1256	* @stable ICU 2.0
sl@0	1257	*/
sl@0	1258	U_STABLE UChar* U_EXPORT2
sl@0	1259	u_strFromUTF8(UChar *dest,
sl@0	1260	int32_t destCapacity,
sl@0	1261	int32_t *pDestLength,
sl@0	1262	const char *src,
sl@0	1263	int32_t srcLength,
sl@0	1264	UErrorCode *pErrorCode);
sl@0	1265
sl@0	1266	/**
sl@0	1267	* Converts a sequence of UChars (UTF-16) to UTF32 units.
sl@0	1268	*
sl@0	1269	* @param dest A buffer for the result string. The result will be zero-terminated if
sl@0	1270	* the buffer is large enough.
sl@0	1271	* @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then
sl@0	1272	* dest may be NULL and the function will only return the length of the
sl@0	1273	* result without writing any of the result string (pre-flighting).
sl@0	1274	* @param pDestLength A pointer to receive the number of units written to the destination. If
sl@0	1275	* pDestLength!=NULL then *pDestLength is always set to the
sl@0	1276	* number of output units corresponding to the transformation of
sl@0	1277	* all the input units, even in case of a buffer overflow.
sl@0	1278	* @param src The original source string
sl@0	1279	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
sl@0	1280	* @param pErrorCode Must be a valid pointer to an error code value,
sl@0	1281	* which must not indicate a failure before the function call.
sl@0	1282	* @return The pointer to destination buffer.
sl@0	1283	* @stable ICU 2.0
sl@0	1284	*/
sl@0	1285	U_STABLE UChar32* U_EXPORT2
sl@0	1286	u_strToUTF32(UChar32 *dest,
sl@0	1287	int32_t destCapacity,
sl@0	1288	int32_t *pDestLength,
sl@0	1289	const UChar *src,
sl@0	1290	int32_t srcLength,
sl@0	1291	UErrorCode *pErrorCode);
sl@0	1292
sl@0	1293	/**
sl@0	1294	* Converts a sequence of UTF32 units to UChars (UTF-16)
sl@0	1295	*
sl@0	1296	* @param dest A buffer for the result string. The result will be zero-terminated if
sl@0	1297	* the buffer is large enough.
sl@0	1298	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
sl@0	1299	* dest may be NULL and the function will only return the length of the
sl@0	1300	* result without writing any of the result string (pre-flighting).
sl@0	1301	* @param pDestLength A pointer to receive the number of units written to the destination. If
sl@0	1302	* pDestLength!=NULL then *pDestLength is always set to the
sl@0	1303	* number of output units corresponding to the transformation of
sl@0	1304	* all the input units, even in case of a buffer overflow.
sl@0	1305	* @param src The original source string
sl@0	1306	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
sl@0	1307	* @param pErrorCode Must be a valid pointer to an error code value,
sl@0	1308	* which must not indicate a failure before the function call.
sl@0	1309	* @return The pointer to destination buffer.
sl@0	1310	* @stable ICU 2.0
sl@0	1311	*/
sl@0	1312	U_STABLE UChar* U_EXPORT2
sl@0	1313	u_strFromUTF32(UChar *dest,
sl@0	1314	int32_t destCapacity,
sl@0	1315	int32_t *pDestLength,
sl@0	1316	const UChar32 *src,
sl@0	1317	int32_t srcLength,
sl@0	1318	UErrorCode *pErrorCode);
sl@0	1319
sl@0	1320	#endif

author	sl
	Tue, 10 Jun 2014 14:32:02 +0200
changeset 1	260cb5ec6c19
permissions	-rw-r--r--