sl@0
|
1 |
/*
|
sl@0
|
2 |
*******************************************************************************
|
sl@0
|
3 |
*
|
sl@0
|
4 |
* Copyright (C) 2002-2005, International Business Machines
|
sl@0
|
5 |
* Corporation and others. All Rights Reserved.
|
sl@0
|
6 |
*
|
sl@0
|
7 |
*******************************************************************************
|
sl@0
|
8 |
* file name: uset.h
|
sl@0
|
9 |
* encoding: US-ASCII
|
sl@0
|
10 |
* tab size: 8 (not used)
|
sl@0
|
11 |
* indentation:4
|
sl@0
|
12 |
*
|
sl@0
|
13 |
* created on: 2002mar07
|
sl@0
|
14 |
* created by: Markus W. Scherer
|
sl@0
|
15 |
*
|
sl@0
|
16 |
* C version of UnicodeSet.
|
sl@0
|
17 |
*/
|
sl@0
|
18 |
|
sl@0
|
19 |
|
sl@0
|
20 |
/**
|
sl@0
|
21 |
* \file
|
sl@0
|
22 |
* \brief C API: Unicode Set
|
sl@0
|
23 |
*
|
sl@0
|
24 |
* <p>This is a C wrapper around the C++ UnicodeSet class.</p>
|
sl@0
|
25 |
*/
|
sl@0
|
26 |
|
sl@0
|
27 |
#ifndef __USET_H__
|
sl@0
|
28 |
#define __USET_H__
|
sl@0
|
29 |
|
sl@0
|
30 |
#include "unicode/utypes.h"
|
sl@0
|
31 |
#include "unicode/uchar.h"
|
sl@0
|
32 |
|
sl@0
|
33 |
#ifndef UCNV_H
|
sl@0
|
34 |
struct USet;
|
sl@0
|
35 |
/**
|
sl@0
|
36 |
* A UnicodeSet. Use the uset_* API to manipulate. Create with
|
sl@0
|
37 |
* uset_open*, and destroy with uset_close.
|
sl@0
|
38 |
* @stable ICU 2.4
|
sl@0
|
39 |
*/
|
sl@0
|
40 |
typedef struct USet USet;
|
sl@0
|
41 |
#endif
|
sl@0
|
42 |
|
sl@0
|
43 |
/**
|
sl@0
|
44 |
* Bitmask values to be passed to uset_openPatternOptions() or
|
sl@0
|
45 |
* uset_applyPattern() taking an option parameter.
|
sl@0
|
46 |
* @stable ICU 2.4
|
sl@0
|
47 |
*/
|
sl@0
|
48 |
enum {
|
sl@0
|
49 |
/**
|
sl@0
|
50 |
* Ignore white space within patterns unless quoted or escaped.
|
sl@0
|
51 |
* @stable ICU 2.4
|
sl@0
|
52 |
*/
|
sl@0
|
53 |
USET_IGNORE_SPACE = 1,
|
sl@0
|
54 |
|
sl@0
|
55 |
/**
|
sl@0
|
56 |
* Enable case insensitive matching. E.g., "[ab]" with this flag
|
sl@0
|
57 |
* will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will
|
sl@0
|
58 |
* match all except 'a', 'A', 'b', and 'B'. This performs a full
|
sl@0
|
59 |
* closure over case mappings, e.g. U+017F for s.
|
sl@0
|
60 |
*
|
sl@0
|
61 |
* The resulting set is a superset of the input for the code points but
|
sl@0
|
62 |
* not for the strings.
|
sl@0
|
63 |
* It performs a case mapping closure of the code points and adds
|
sl@0
|
64 |
* full case folding strings for the code points, and reduces strings of
|
sl@0
|
65 |
* the original set to their full case folding equivalents.
|
sl@0
|
66 |
*
|
sl@0
|
67 |
* This is designed for case-insensitive matches, for example
|
sl@0
|
68 |
* in regular expressions. The full code point case closure allows checking of
|
sl@0
|
69 |
* an input character directly against the closure set.
|
sl@0
|
70 |
* Strings are matched by comparing the case-folded form from the closure
|
sl@0
|
71 |
* set with an incremental case folding of the string in question.
|
sl@0
|
72 |
*
|
sl@0
|
73 |
* The closure set will also contain single code points if the original
|
sl@0
|
74 |
* set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
|
sl@0
|
75 |
* This is not necessary (that is, redundant) for the above matching method
|
sl@0
|
76 |
* but results in the same closure sets regardless of whether the original
|
sl@0
|
77 |
* set contained the code point or a string.
|
sl@0
|
78 |
*
|
sl@0
|
79 |
* @stable ICU 2.4
|
sl@0
|
80 |
*/
|
sl@0
|
81 |
USET_CASE_INSENSITIVE = 2,
|
sl@0
|
82 |
|
sl@0
|
83 |
/**
|
sl@0
|
84 |
* Bitmask for UnicodeSet::closeOver() indicating letter case.
|
sl@0
|
85 |
* This may be ORed together with other selectors.
|
sl@0
|
86 |
* @internal
|
sl@0
|
87 |
*/
|
sl@0
|
88 |
USET_CASE = 2,
|
sl@0
|
89 |
|
sl@0
|
90 |
/**
|
sl@0
|
91 |
* Enable case insensitive matching. E.g., "[ab]" with this flag
|
sl@0
|
92 |
* will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will
|
sl@0
|
93 |
* match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
|
sl@0
|
94 |
* title-, and uppercase mappings as well as the case folding
|
sl@0
|
95 |
* of each existing element in the set.
|
sl@0
|
96 |
* @draft ICU 3.2
|
sl@0
|
97 |
*/
|
sl@0
|
98 |
USET_ADD_CASE_MAPPINGS = 4,
|
sl@0
|
99 |
|
sl@0
|
100 |
/**
|
sl@0
|
101 |
* Enough for any single-code point set
|
sl@0
|
102 |
* @internal
|
sl@0
|
103 |
*/
|
sl@0
|
104 |
USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
|
sl@0
|
105 |
};
|
sl@0
|
106 |
|
sl@0
|
107 |
/**
|
sl@0
|
108 |
* A serialized form of a Unicode set. Limited manipulations are
|
sl@0
|
109 |
* possible directly on a serialized set. See below.
|
sl@0
|
110 |
* @stable ICU 2.4
|
sl@0
|
111 |
*/
|
sl@0
|
112 |
typedef struct USerializedSet {
|
sl@0
|
113 |
/**
|
sl@0
|
114 |
* The serialized Unicode Set.
|
sl@0
|
115 |
* @stable ICU 2.4
|
sl@0
|
116 |
*/
|
sl@0
|
117 |
const uint16_t *array;
|
sl@0
|
118 |
/**
|
sl@0
|
119 |
* The length of the array that contains BMP characters.
|
sl@0
|
120 |
* @stable ICU 2.4
|
sl@0
|
121 |
*/
|
sl@0
|
122 |
int32_t bmpLength;
|
sl@0
|
123 |
/**
|
sl@0
|
124 |
* The total length of the array.
|
sl@0
|
125 |
* @stable ICU 2.4
|
sl@0
|
126 |
*/
|
sl@0
|
127 |
int32_t length;
|
sl@0
|
128 |
/**
|
sl@0
|
129 |
* A small buffer for the array to reduce memory allocations.
|
sl@0
|
130 |
* @stable ICU 2.4
|
sl@0
|
131 |
*/
|
sl@0
|
132 |
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
|
sl@0
|
133 |
} USerializedSet;
|
sl@0
|
134 |
|
sl@0
|
135 |
/*********************************************************************
|
sl@0
|
136 |
* USet API
|
sl@0
|
137 |
*********************************************************************/
|
sl@0
|
138 |
|
sl@0
|
139 |
/**
|
sl@0
|
140 |
* Creates a USet object that contains the range of characters
|
sl@0
|
141 |
* start..end, inclusive.
|
sl@0
|
142 |
* @param start first character of the range, inclusive
|
sl@0
|
143 |
* @param end last character of the range, inclusive
|
sl@0
|
144 |
* @return a newly created USet. The caller must call uset_close() on
|
sl@0
|
145 |
* it when done.
|
sl@0
|
146 |
* @stable ICU 2.4
|
sl@0
|
147 |
*/
|
sl@0
|
148 |
U_STABLE USet* U_EXPORT2
|
sl@0
|
149 |
uset_open(UChar32 start, UChar32 end);
|
sl@0
|
150 |
|
sl@0
|
151 |
/**
|
sl@0
|
152 |
* Creates a set from the given pattern. See the UnicodeSet class
|
sl@0
|
153 |
* description for the syntax of the pattern language.
|
sl@0
|
154 |
* @param pattern a string specifying what characters are in the set
|
sl@0
|
155 |
* @param patternLength the length of the pattern, or -1 if null
|
sl@0
|
156 |
* terminated
|
sl@0
|
157 |
* @param ec the error code
|
sl@0
|
158 |
* @stable ICU 2.4
|
sl@0
|
159 |
*/
|
sl@0
|
160 |
U_STABLE USet* U_EXPORT2
|
sl@0
|
161 |
uset_openPattern(const UChar* pattern, int32_t patternLength,
|
sl@0
|
162 |
UErrorCode* ec);
|
sl@0
|
163 |
|
sl@0
|
164 |
/**
|
sl@0
|
165 |
* Creates a set from the given pattern. See the UnicodeSet class
|
sl@0
|
166 |
* description for the syntax of the pattern language.
|
sl@0
|
167 |
* @param pattern a string specifying what characters are in the set
|
sl@0
|
168 |
* @param patternLength the length of the pattern, or -1 if null
|
sl@0
|
169 |
* terminated
|
sl@0
|
170 |
* @param options bitmask for options to apply to the pattern.
|
sl@0
|
171 |
* Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
|
sl@0
|
172 |
* @param ec the error code
|
sl@0
|
173 |
* @stable ICU 2.4
|
sl@0
|
174 |
*/
|
sl@0
|
175 |
U_STABLE USet* U_EXPORT2
|
sl@0
|
176 |
uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
|
sl@0
|
177 |
uint32_t options,
|
sl@0
|
178 |
UErrorCode* ec);
|
sl@0
|
179 |
|
sl@0
|
180 |
/**
|
sl@0
|
181 |
* Disposes of the storage used by a USet object. This function should
|
sl@0
|
182 |
* be called exactly once for objects returned by uset_open().
|
sl@0
|
183 |
* @param set the object to dispose of
|
sl@0
|
184 |
* @stable ICU 2.4
|
sl@0
|
185 |
*/
|
sl@0
|
186 |
U_STABLE void U_EXPORT2
|
sl@0
|
187 |
uset_close(USet* set);
|
sl@0
|
188 |
|
sl@0
|
189 |
/**
|
sl@0
|
190 |
* Causes the USet object to represent the range <code>start - end</code>.
|
sl@0
|
191 |
* If <code>start > end</code> then this USet is set to an empty range.
|
sl@0
|
192 |
* @param set the object to set to the given range
|
sl@0
|
193 |
* @param start first character in the set, inclusive
|
sl@0
|
194 |
* @param end last character in the set, inclusive
|
sl@0
|
195 |
* @draft ICU 3.2
|
sl@0
|
196 |
*/
|
sl@0
|
197 |
U_DRAFT void U_EXPORT2
|
sl@0
|
198 |
uset_set(USet* set,
|
sl@0
|
199 |
UChar32 start, UChar32 end);
|
sl@0
|
200 |
|
sl@0
|
201 |
/**
|
sl@0
|
202 |
* Modifies the set to represent the set specified by the given
|
sl@0
|
203 |
* pattern. See the UnicodeSet class description for the syntax of
|
sl@0
|
204 |
* the pattern language. See also the User Guide chapter about UnicodeSet.
|
sl@0
|
205 |
* <em>Empties the set passed before applying the pattern.</em>
|
sl@0
|
206 |
* @param set The set to which the pattern is to be applied.
|
sl@0
|
207 |
* @param pattern A pointer to UChar string specifying what characters are in the set.
|
sl@0
|
208 |
* The character at pattern[0] must be a '['.
|
sl@0
|
209 |
* @param patternLength The length of the UChar string. -1 if NUL terminated.
|
sl@0
|
210 |
* @param options A bitmask for options to apply to the pattern.
|
sl@0
|
211 |
* Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
|
sl@0
|
212 |
* @param status Returns an error if the pattern cannot be parsed.
|
sl@0
|
213 |
* @return Upon successful parse, the value is either
|
sl@0
|
214 |
* the index of the character after the closing ']'
|
sl@0
|
215 |
* of the parsed pattern.
|
sl@0
|
216 |
* If the status code indicates failure, then the return value
|
sl@0
|
217 |
* is the index of the error in the source.
|
sl@0
|
218 |
*
|
sl@0
|
219 |
* @stable ICU 2.8
|
sl@0
|
220 |
*/
|
sl@0
|
221 |
U_STABLE int32_t U_EXPORT2
|
sl@0
|
222 |
uset_applyPattern(USet *set,
|
sl@0
|
223 |
const UChar *pattern, int32_t patternLength,
|
sl@0
|
224 |
uint32_t options,
|
sl@0
|
225 |
UErrorCode *status);
|
sl@0
|
226 |
|
sl@0
|
227 |
/**
|
sl@0
|
228 |
* Modifies the set to contain those code points which have the given value
|
sl@0
|
229 |
* for the given binary or enumerated property, as returned by
|
sl@0
|
230 |
* u_getIntPropertyValue. Prior contents of this set are lost.
|
sl@0
|
231 |
*
|
sl@0
|
232 |
* @param set the object to contain the code points defined by the property
|
sl@0
|
233 |
*
|
sl@0
|
234 |
* @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
|
sl@0
|
235 |
* or UCHAR_INT_START..UCHAR_INT_LIMIT-1
|
sl@0
|
236 |
* or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
|
sl@0
|
237 |
*
|
sl@0
|
238 |
* @param value a value in the range u_getIntPropertyMinValue(prop)..
|
sl@0
|
239 |
* u_getIntPropertyMaxValue(prop), with one exception. If prop is
|
sl@0
|
240 |
* UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
|
sl@0
|
241 |
* rather a mask value produced by U_GET_GC_MASK(). This allows grouped
|
sl@0
|
242 |
* categories such as [:L:] to be represented.
|
sl@0
|
243 |
*
|
sl@0
|
244 |
* @param ec error code input/output parameter
|
sl@0
|
245 |
*
|
sl@0
|
246 |
* @draft ICU 3.2
|
sl@0
|
247 |
*/
|
sl@0
|
248 |
U_DRAFT void U_EXPORT2
|
sl@0
|
249 |
uset_applyIntPropertyValue(USet* set,
|
sl@0
|
250 |
UProperty prop, int32_t value, UErrorCode* ec);
|
sl@0
|
251 |
|
sl@0
|
252 |
/**
|
sl@0
|
253 |
* Modifies the set to contain those code points which have the
|
sl@0
|
254 |
* given value for the given property. Prior contents of this
|
sl@0
|
255 |
* set are lost.
|
sl@0
|
256 |
*
|
sl@0
|
257 |
* @param set the object to contain the code points defined by the given
|
sl@0
|
258 |
* property and value alias
|
sl@0
|
259 |
*
|
sl@0
|
260 |
* @param prop a string specifying a property alias, either short or long.
|
sl@0
|
261 |
* The name is matched loosely. See PropertyAliases.txt for names and a
|
sl@0
|
262 |
* description of loose matching. If the value string is empty, then this
|
sl@0
|
263 |
* string is interpreted as either a General_Category value alias, a Script
|
sl@0
|
264 |
* value alias, a binary property alias, or a special ID. Special IDs are
|
sl@0
|
265 |
* matched loosely and correspond to the following sets:
|
sl@0
|
266 |
*
|
sl@0
|
267 |
* "ANY" = [\\u0000-\\U0010FFFF],
|
sl@0
|
268 |
* "ASCII" = [\\u0000-\\u007F],
|
sl@0
|
269 |
* "Assigned" = [:^Cn:].
|
sl@0
|
270 |
*
|
sl@0
|
271 |
* @param propLength the length of the prop, or -1 if NULL
|
sl@0
|
272 |
*
|
sl@0
|
273 |
* @param value a string specifying a value alias, either short or long.
|
sl@0
|
274 |
* The name is matched loosely. See PropertyValueAliases.txt for names
|
sl@0
|
275 |
* and a description of loose matching. In addition to aliases listed,
|
sl@0
|
276 |
* numeric values and canonical combining classes may be expressed
|
sl@0
|
277 |
* numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string
|
sl@0
|
278 |
* may also be empty.
|
sl@0
|
279 |
*
|
sl@0
|
280 |
* @param valueLength the length of the value, or -1 if NULL
|
sl@0
|
281 |
*
|
sl@0
|
282 |
* @param ec error code input/output parameter
|
sl@0
|
283 |
*
|
sl@0
|
284 |
* @draft ICU 3.2
|
sl@0
|
285 |
*/
|
sl@0
|
286 |
U_DRAFT void U_EXPORT2
|
sl@0
|
287 |
uset_applyPropertyAlias(USet* set,
|
sl@0
|
288 |
const UChar *prop, int32_t propLength,
|
sl@0
|
289 |
const UChar *value, int32_t valueLength,
|
sl@0
|
290 |
UErrorCode* ec);
|
sl@0
|
291 |
|
sl@0
|
292 |
/**
|
sl@0
|
293 |
* Return true if the given position, in the given pattern, appears
|
sl@0
|
294 |
* to be the start of a UnicodeSet pattern.
|
sl@0
|
295 |
*
|
sl@0
|
296 |
* @param pattern a string specifying the pattern
|
sl@0
|
297 |
* @param patternLength the length of the pattern, or -1 if NULL
|
sl@0
|
298 |
* @param pos the given position
|
sl@0
|
299 |
* @draft ICU 3.2
|
sl@0
|
300 |
*/
|
sl@0
|
301 |
U_DRAFT UBool U_EXPORT2
|
sl@0
|
302 |
uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
|
sl@0
|
303 |
int32_t pos);
|
sl@0
|
304 |
|
sl@0
|
305 |
/**
|
sl@0
|
306 |
* Returns a string representation of this set. If the result of
|
sl@0
|
307 |
* calling this function is passed to a uset_openPattern(), it
|
sl@0
|
308 |
* will produce another set that is equal to this one.
|
sl@0
|
309 |
* @param set the set
|
sl@0
|
310 |
* @param result the string to receive the rules, may be NULL
|
sl@0
|
311 |
* @param resultCapacity the capacity of result, may be 0 if result is NULL
|
sl@0
|
312 |
* @param escapeUnprintable if TRUE then convert unprintable
|
sl@0
|
313 |
* character to their hex escape representations, \\uxxxx or
|
sl@0
|
314 |
* \\Uxxxxxxxx. Unprintable characters are those other than
|
sl@0
|
315 |
* U+000A, U+0020..U+007E.
|
sl@0
|
316 |
* @param ec error code.
|
sl@0
|
317 |
* @return length of string, possibly larger than resultCapacity
|
sl@0
|
318 |
* @stable ICU 2.4
|
sl@0
|
319 |
*/
|
sl@0
|
320 |
U_STABLE int32_t U_EXPORT2
|
sl@0
|
321 |
uset_toPattern(const USet* set,
|
sl@0
|
322 |
UChar* result, int32_t resultCapacity,
|
sl@0
|
323 |
UBool escapeUnprintable,
|
sl@0
|
324 |
UErrorCode* ec);
|
sl@0
|
325 |
|
sl@0
|
326 |
/**
|
sl@0
|
327 |
* Adds the given character to the given USet. After this call,
|
sl@0
|
328 |
* uset_contains(set, c) will return TRUE.
|
sl@0
|
329 |
* @param set the object to which to add the character
|
sl@0
|
330 |
* @param c the character to add
|
sl@0
|
331 |
* @stable ICU 2.4
|
sl@0
|
332 |
*/
|
sl@0
|
333 |
U_STABLE void U_EXPORT2
|
sl@0
|
334 |
uset_add(USet* set, UChar32 c);
|
sl@0
|
335 |
|
sl@0
|
336 |
/**
|
sl@0
|
337 |
* Adds all of the elements in the specified set to this set if
|
sl@0
|
338 |
* they're not already present. This operation effectively
|
sl@0
|
339 |
* modifies this set so that its value is the <i>union</i> of the two
|
sl@0
|
340 |
* sets. The behavior of this operation is unspecified if the specified
|
sl@0
|
341 |
* collection is modified while the operation is in progress.
|
sl@0
|
342 |
*
|
sl@0
|
343 |
* @param set the object to which to add the set
|
sl@0
|
344 |
* @param additionalSet the source set whose elements are to be added to this set.
|
sl@0
|
345 |
* @stable ICU 2.6
|
sl@0
|
346 |
*/
|
sl@0
|
347 |
U_STABLE void U_EXPORT2
|
sl@0
|
348 |
uset_addAll(USet* set, const USet *additionalSet);
|
sl@0
|
349 |
|
sl@0
|
350 |
/**
|
sl@0
|
351 |
* Adds the given range of characters to the given USet. After this call,
|
sl@0
|
352 |
* uset_contains(set, start, end) will return TRUE.
|
sl@0
|
353 |
* @param set the object to which to add the character
|
sl@0
|
354 |
* @param start the first character of the range to add, inclusive
|
sl@0
|
355 |
* @param end the last character of the range to add, inclusive
|
sl@0
|
356 |
* @stable ICU 2.2
|
sl@0
|
357 |
*/
|
sl@0
|
358 |
U_STABLE void U_EXPORT2
|
sl@0
|
359 |
uset_addRange(USet* set, UChar32 start, UChar32 end);
|
sl@0
|
360 |
|
sl@0
|
361 |
/**
|
sl@0
|
362 |
* Adds the given string to the given USet. After this call,
|
sl@0
|
363 |
* uset_containsString(set, str, strLen) will return TRUE.
|
sl@0
|
364 |
* @param set the object to which to add the character
|
sl@0
|
365 |
* @param str the string to add
|
sl@0
|
366 |
* @param strLen the length of the string or -1 if null terminated.
|
sl@0
|
367 |
* @stable ICU 2.4
|
sl@0
|
368 |
*/
|
sl@0
|
369 |
U_STABLE void U_EXPORT2
|
sl@0
|
370 |
uset_addString(USet* set, const UChar* str, int32_t strLen);
|
sl@0
|
371 |
|
sl@0
|
372 |
/**
|
sl@0
|
373 |
* Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
|
sl@0
|
374 |
* If this set already any particular character, it has no effect on that character.
|
sl@0
|
375 |
* @param set the object to which to add the character
|
sl@0
|
376 |
* @param str the source string
|
sl@0
|
377 |
* @param strLen the length of the string or -1 if null terminated.
|
sl@0
|
378 |
* @draft ICU 3.4
|
sl@0
|
379 |
*/
|
sl@0
|
380 |
U_DRAFT void U_EXPORT2
|
sl@0
|
381 |
uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
|
sl@0
|
382 |
|
sl@0
|
383 |
/**
|
sl@0
|
384 |
* Removes the given character from the given USet. After this call,
|
sl@0
|
385 |
* uset_contains(set, c) will return FALSE.
|
sl@0
|
386 |
* @param set the object from which to remove the character
|
sl@0
|
387 |
* @param c the character to remove
|
sl@0
|
388 |
* @stable ICU 2.4
|
sl@0
|
389 |
*/
|
sl@0
|
390 |
U_STABLE void U_EXPORT2
|
sl@0
|
391 |
uset_remove(USet* set, UChar32 c);
|
sl@0
|
392 |
|
sl@0
|
393 |
/**
|
sl@0
|
394 |
* Removes the given range of characters from the given USet. After this call,
|
sl@0
|
395 |
* uset_contains(set, start, end) will return FALSE.
|
sl@0
|
396 |
* @param set the object to which to add the character
|
sl@0
|
397 |
* @param start the first character of the range to remove, inclusive
|
sl@0
|
398 |
* @param end the last character of the range to remove, inclusive
|
sl@0
|
399 |
* @stable ICU 2.2
|
sl@0
|
400 |
*/
|
sl@0
|
401 |
U_STABLE void U_EXPORT2
|
sl@0
|
402 |
uset_removeRange(USet* set, UChar32 start, UChar32 end);
|
sl@0
|
403 |
|
sl@0
|
404 |
/**
|
sl@0
|
405 |
* Removes the given string to the given USet. After this call,
|
sl@0
|
406 |
* uset_containsString(set, str, strLen) will return FALSE.
|
sl@0
|
407 |
* @param set the object to which to add the character
|
sl@0
|
408 |
* @param str the string to remove
|
sl@0
|
409 |
* @param strLen the length of the string or -1 if null terminated.
|
sl@0
|
410 |
* @stable ICU 2.4
|
sl@0
|
411 |
*/
|
sl@0
|
412 |
U_STABLE void U_EXPORT2
|
sl@0
|
413 |
uset_removeString(USet* set, const UChar* str, int32_t strLen);
|
sl@0
|
414 |
|
sl@0
|
415 |
/**
|
sl@0
|
416 |
* Removes from this set all of its elements that are contained in the
|
sl@0
|
417 |
* specified set. This operation effectively modifies this
|
sl@0
|
418 |
* set so that its value is the <i>asymmetric set difference</i> of
|
sl@0
|
419 |
* the two sets.
|
sl@0
|
420 |
* @param set the object from which the elements are to be removed
|
sl@0
|
421 |
* @param removeSet the object that defines which elements will be
|
sl@0
|
422 |
* removed from this set
|
sl@0
|
423 |
* @draft ICU 3.2
|
sl@0
|
424 |
*/
|
sl@0
|
425 |
U_DRAFT void U_EXPORT2
|
sl@0
|
426 |
uset_removeAll(USet* set, const USet* removeSet);
|
sl@0
|
427 |
|
sl@0
|
428 |
/**
|
sl@0
|
429 |
* Retain only the elements in this set that are contained in the
|
sl@0
|
430 |
* specified range. If <code>start > end</code> then an empty range is
|
sl@0
|
431 |
* retained, leaving the set empty. This is equivalent to
|
sl@0
|
432 |
* a boolean logic AND, or a set INTERSECTION.
|
sl@0
|
433 |
*
|
sl@0
|
434 |
* @param set the object for which to retain only the specified range
|
sl@0
|
435 |
* @param start first character, inclusive, of range to be retained
|
sl@0
|
436 |
* to this set.
|
sl@0
|
437 |
* @param end last character, inclusive, of range to be retained
|
sl@0
|
438 |
* to this set.
|
sl@0
|
439 |
* @draft ICU 3.2
|
sl@0
|
440 |
*/
|
sl@0
|
441 |
U_DRAFT void U_EXPORT2
|
sl@0
|
442 |
uset_retain(USet* set, UChar32 start, UChar32 end);
|
sl@0
|
443 |
|
sl@0
|
444 |
/**
|
sl@0
|
445 |
* Retains only the elements in this set that are contained in the
|
sl@0
|
446 |
* specified set. In other words, removes from this set all of
|
sl@0
|
447 |
* its elements that are not contained in the specified set. This
|
sl@0
|
448 |
* operation effectively modifies this set so that its value is
|
sl@0
|
449 |
* the <i>intersection</i> of the two sets.
|
sl@0
|
450 |
*
|
sl@0
|
451 |
* @param set the object on which to perform the retain
|
sl@0
|
452 |
* @param retain set that defines which elements this set will retain
|
sl@0
|
453 |
* @draft ICU 3.2
|
sl@0
|
454 |
*/
|
sl@0
|
455 |
U_DRAFT void U_EXPORT2
|
sl@0
|
456 |
uset_retainAll(USet* set, const USet* retain);
|
sl@0
|
457 |
|
sl@0
|
458 |
/**
|
sl@0
|
459 |
* Reallocate this objects internal structures to take up the least
|
sl@0
|
460 |
* possible space, without changing this object's value.
|
sl@0
|
461 |
*
|
sl@0
|
462 |
* @param set the object on which to perfrom the compact
|
sl@0
|
463 |
* @draft ICU 3.2
|
sl@0
|
464 |
*/
|
sl@0
|
465 |
U_DRAFT void U_EXPORT2
|
sl@0
|
466 |
uset_compact(USet* set);
|
sl@0
|
467 |
|
sl@0
|
468 |
/**
|
sl@0
|
469 |
* Inverts this set. This operation modifies this set so that
|
sl@0
|
470 |
* its value is its complement. This operation does not affect
|
sl@0
|
471 |
* the multicharacter strings, if any.
|
sl@0
|
472 |
* @param set the set
|
sl@0
|
473 |
* @stable ICU 2.4
|
sl@0
|
474 |
*/
|
sl@0
|
475 |
U_STABLE void U_EXPORT2
|
sl@0
|
476 |
uset_complement(USet* set);
|
sl@0
|
477 |
|
sl@0
|
478 |
/**
|
sl@0
|
479 |
* Complements in this set all elements contained in the specified
|
sl@0
|
480 |
* set. Any character in the other set will be removed if it is
|
sl@0
|
481 |
* in this set, or will be added if it is not in this set.
|
sl@0
|
482 |
*
|
sl@0
|
483 |
* @param set the set with which to complement
|
sl@0
|
484 |
* @param complement set that defines which elements will be xor'ed
|
sl@0
|
485 |
* from this set.
|
sl@0
|
486 |
* @draft ICU 3.2
|
sl@0
|
487 |
*/
|
sl@0
|
488 |
U_DRAFT void U_EXPORT2
|
sl@0
|
489 |
uset_complementAll(USet* set, const USet* complement);
|
sl@0
|
490 |
|
sl@0
|
491 |
/**
|
sl@0
|
492 |
* Removes all of the elements from this set. This set will be
|
sl@0
|
493 |
* empty after this call returns.
|
sl@0
|
494 |
* @param set the set
|
sl@0
|
495 |
* @stable ICU 2.4
|
sl@0
|
496 |
*/
|
sl@0
|
497 |
U_STABLE void U_EXPORT2
|
sl@0
|
498 |
uset_clear(USet* set);
|
sl@0
|
499 |
|
sl@0
|
500 |
/**
|
sl@0
|
501 |
* Returns TRUE if the given USet contains no characters and no
|
sl@0
|
502 |
* strings.
|
sl@0
|
503 |
* @param set the set
|
sl@0
|
504 |
* @return true if set is empty
|
sl@0
|
505 |
* @stable ICU 2.4
|
sl@0
|
506 |
*/
|
sl@0
|
507 |
U_STABLE UBool U_EXPORT2
|
sl@0
|
508 |
uset_isEmpty(const USet* set);
|
sl@0
|
509 |
|
sl@0
|
510 |
/**
|
sl@0
|
511 |
* Returns TRUE if the given USet contains the given character.
|
sl@0
|
512 |
* @param set the set
|
sl@0
|
513 |
* @param c The codepoint to check for within the set
|
sl@0
|
514 |
* @return true if set contains c
|
sl@0
|
515 |
* @stable ICU 2.4
|
sl@0
|
516 |
*/
|
sl@0
|
517 |
U_STABLE UBool U_EXPORT2
|
sl@0
|
518 |
uset_contains(const USet* set, UChar32 c);
|
sl@0
|
519 |
|
sl@0
|
520 |
/**
|
sl@0
|
521 |
* Returns TRUE if the given USet contains all characters c
|
sl@0
|
522 |
* where start <= c && c <= end.
|
sl@0
|
523 |
* @param set the set
|
sl@0
|
524 |
* @param start the first character of the range to test, inclusive
|
sl@0
|
525 |
* @param end the last character of the range to test, inclusive
|
sl@0
|
526 |
* @return TRUE if set contains the range
|
sl@0
|
527 |
* @stable ICU 2.2
|
sl@0
|
528 |
*/
|
sl@0
|
529 |
U_STABLE UBool U_EXPORT2
|
sl@0
|
530 |
uset_containsRange(const USet* set, UChar32 start, UChar32 end);
|
sl@0
|
531 |
|
sl@0
|
532 |
/**
|
sl@0
|
533 |
* Returns TRUE if the given USet contains the given string.
|
sl@0
|
534 |
* @param set the set
|
sl@0
|
535 |
* @param str the string
|
sl@0
|
536 |
* @param strLen the length of the string or -1 if null terminated.
|
sl@0
|
537 |
* @return true if set contains str
|
sl@0
|
538 |
* @stable ICU 2.4
|
sl@0
|
539 |
*/
|
sl@0
|
540 |
U_STABLE UBool U_EXPORT2
|
sl@0
|
541 |
uset_containsString(const USet* set, const UChar* str, int32_t strLen);
|
sl@0
|
542 |
|
sl@0
|
543 |
/**
|
sl@0
|
544 |
* Returns the index of the given character within this set, where
|
sl@0
|
545 |
* the set is ordered by ascending code point. If the character
|
sl@0
|
546 |
* is not in this set, return -1. The inverse of this method is
|
sl@0
|
547 |
* <code>charAt()</code>.
|
sl@0
|
548 |
* @param set the set
|
sl@0
|
549 |
* @param c the character to obtain the index for
|
sl@0
|
550 |
* @return an index from 0..size()-1, or -1
|
sl@0
|
551 |
* @draft ICU 3.2
|
sl@0
|
552 |
*/
|
sl@0
|
553 |
U_DRAFT int32_t U_EXPORT2
|
sl@0
|
554 |
uset_indexOf(const USet* set, UChar32 c);
|
sl@0
|
555 |
|
sl@0
|
556 |
/**
|
sl@0
|
557 |
* Returns the character at the given index within this set, where
|
sl@0
|
558 |
* the set is ordered by ascending code point. If the index is
|
sl@0
|
559 |
* out of range, return (UChar32)-1. The inverse of this method is
|
sl@0
|
560 |
* <code>indexOf()</code>.
|
sl@0
|
561 |
* @param set the set
|
sl@0
|
562 |
* @param index an index from 0..size()-1 to obtain the char for
|
sl@0
|
563 |
* @return the character at the given index, or (UChar32)-1.
|
sl@0
|
564 |
* @draft ICU 3.2
|
sl@0
|
565 |
*/
|
sl@0
|
566 |
U_DRAFT UChar32 U_EXPORT2
|
sl@0
|
567 |
uset_charAt(const USet* set, int32_t index);
|
sl@0
|
568 |
|
sl@0
|
569 |
/**
|
sl@0
|
570 |
* Returns the number of characters and strings contained in the given
|
sl@0
|
571 |
* USet.
|
sl@0
|
572 |
* @param set the set
|
sl@0
|
573 |
* @return a non-negative integer counting the characters and strings
|
sl@0
|
574 |
* contained in set
|
sl@0
|
575 |
* @stable ICU 2.4
|
sl@0
|
576 |
*/
|
sl@0
|
577 |
U_STABLE int32_t U_EXPORT2
|
sl@0
|
578 |
uset_size(const USet* set);
|
sl@0
|
579 |
|
sl@0
|
580 |
/**
|
sl@0
|
581 |
* Returns the number of items in this set. An item is either a range
|
sl@0
|
582 |
* of characters or a single multicharacter string.
|
sl@0
|
583 |
* @param set the set
|
sl@0
|
584 |
* @return a non-negative integer counting the character ranges
|
sl@0
|
585 |
* and/or strings contained in set
|
sl@0
|
586 |
* @stable ICU 2.4
|
sl@0
|
587 |
*/
|
sl@0
|
588 |
U_STABLE int32_t U_EXPORT2
|
sl@0
|
589 |
uset_getItemCount(const USet* set);
|
sl@0
|
590 |
|
sl@0
|
591 |
/**
|
sl@0
|
592 |
* Returns an item of this set. An item is either a range of
|
sl@0
|
593 |
* characters or a single multicharacter string.
|
sl@0
|
594 |
* @param set the set
|
sl@0
|
595 |
* @param itemIndex a non-negative integer in the range 0..
|
sl@0
|
596 |
* uset_getItemCount(set)-1
|
sl@0
|
597 |
* @param start pointer to variable to receive first character
|
sl@0
|
598 |
* in range, inclusive
|
sl@0
|
599 |
* @param end pointer to variable to receive last character in range,
|
sl@0
|
600 |
* inclusive
|
sl@0
|
601 |
* @param str buffer to receive the string, may be NULL
|
sl@0
|
602 |
* @param strCapacity capacity of str, or 0 if str is NULL
|
sl@0
|
603 |
* @param ec error code
|
sl@0
|
604 |
* @return the length of the string (>= 2), or 0 if the item is a
|
sl@0
|
605 |
* range, in which case it is the range *start..*end, or -1 if
|
sl@0
|
606 |
* itemIndex is out of range
|
sl@0
|
607 |
* @stable ICU 2.4
|
sl@0
|
608 |
*/
|
sl@0
|
609 |
U_STABLE int32_t U_EXPORT2
|
sl@0
|
610 |
uset_getItem(const USet* set, int32_t itemIndex,
|
sl@0
|
611 |
UChar32* start, UChar32* end,
|
sl@0
|
612 |
UChar* str, int32_t strCapacity,
|
sl@0
|
613 |
UErrorCode* ec);
|
sl@0
|
614 |
|
sl@0
|
615 |
/**
|
sl@0
|
616 |
* Returns true if set1 contains all the characters and strings
|
sl@0
|
617 |
* of set2. It answers the question, 'Is set1 a subset of set2?'
|
sl@0
|
618 |
* @param set1 set to be checked for containment
|
sl@0
|
619 |
* @param set2 set to be checked for containment
|
sl@0
|
620 |
* @return true if the test condition is met
|
sl@0
|
621 |
* @draft ICU 3.2
|
sl@0
|
622 |
*/
|
sl@0
|
623 |
U_DRAFT UBool U_EXPORT2
|
sl@0
|
624 |
uset_containsAll(const USet* set1, const USet* set2);
|
sl@0
|
625 |
|
sl@0
|
626 |
/**
|
sl@0
|
627 |
* Returns true if this set contains all the characters
|
sl@0
|
628 |
* of the given string. This is does not check containment of grapheme
|
sl@0
|
629 |
* clusters, like uset_containsString.
|
sl@0
|
630 |
* @param set set of characters to be checked for containment
|
sl@0
|
631 |
* @param str string containing codepoints to be checked for containment
|
sl@0
|
632 |
* @param strLen the length of the string or -1 if null terminated.
|
sl@0
|
633 |
* @return true if the test condition is met
|
sl@0
|
634 |
* @draft ICU 3.4
|
sl@0
|
635 |
*/
|
sl@0
|
636 |
U_DRAFT UBool U_EXPORT2
|
sl@0
|
637 |
uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
|
sl@0
|
638 |
|
sl@0
|
639 |
/**
|
sl@0
|
640 |
* Returns true if set1 contains none of the characters and strings
|
sl@0
|
641 |
* of set2. It answers the question, 'Is set1 a disjoint set of set2?'
|
sl@0
|
642 |
* @param set1 set to be checked for containment
|
sl@0
|
643 |
* @param set2 set to be checked for containment
|
sl@0
|
644 |
* @return true if the test condition is met
|
sl@0
|
645 |
* @draft ICU 3.2
|
sl@0
|
646 |
*/
|
sl@0
|
647 |
U_DRAFT UBool U_EXPORT2
|
sl@0
|
648 |
uset_containsNone(const USet* set1, const USet* set2);
|
sl@0
|
649 |
|
sl@0
|
650 |
/**
|
sl@0
|
651 |
* Returns true if set1 contains some of the characters and strings
|
sl@0
|
652 |
* of set2. It answers the question, 'Does set1 and set2 have an intersection?'
|
sl@0
|
653 |
* @param set1 set to be checked for containment
|
sl@0
|
654 |
* @param set2 set to be checked for containment
|
sl@0
|
655 |
* @return true if the test condition is met
|
sl@0
|
656 |
* @draft ICU 3.2
|
sl@0
|
657 |
*/
|
sl@0
|
658 |
U_DRAFT UBool U_EXPORT2
|
sl@0
|
659 |
uset_containsSome(const USet* set1, const USet* set2);
|
sl@0
|
660 |
|
sl@0
|
661 |
/**
|
sl@0
|
662 |
* Returns true if set1 contains all of the characters and strings
|
sl@0
|
663 |
* of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
|
sl@0
|
664 |
* @param set1 set to be checked for containment
|
sl@0
|
665 |
* @param set2 set to be checked for containment
|
sl@0
|
666 |
* @return true if the test condition is met
|
sl@0
|
667 |
* @draft ICU 3.2
|
sl@0
|
668 |
*/
|
sl@0
|
669 |
U_DRAFT UBool U_EXPORT2
|
sl@0
|
670 |
uset_equals(const USet* set1, const USet* set2);
|
sl@0
|
671 |
|
sl@0
|
672 |
/*********************************************************************
|
sl@0
|
673 |
* Serialized set API
|
sl@0
|
674 |
*********************************************************************/
|
sl@0
|
675 |
|
sl@0
|
676 |
/**
|
sl@0
|
677 |
* Serializes this set into an array of 16-bit integers. Serialization
|
sl@0
|
678 |
* (currently) only records the characters in the set; multicharacter
|
sl@0
|
679 |
* strings are ignored.
|
sl@0
|
680 |
*
|
sl@0
|
681 |
* The array
|
sl@0
|
682 |
* has following format (each line is one 16-bit integer):
|
sl@0
|
683 |
*
|
sl@0
|
684 |
* length = (n+2*m) | (m!=0?0x8000:0)
|
sl@0
|
685 |
* bmpLength = n; present if m!=0
|
sl@0
|
686 |
* bmp[0]
|
sl@0
|
687 |
* bmp[1]
|
sl@0
|
688 |
* ...
|
sl@0
|
689 |
* bmp[n-1]
|
sl@0
|
690 |
* supp-high[0]
|
sl@0
|
691 |
* supp-low[0]
|
sl@0
|
692 |
* supp-high[1]
|
sl@0
|
693 |
* supp-low[1]
|
sl@0
|
694 |
* ...
|
sl@0
|
695 |
* supp-high[m-1]
|
sl@0
|
696 |
* supp-low[m-1]
|
sl@0
|
697 |
*
|
sl@0
|
698 |
* The array starts with a header. After the header are n bmp
|
sl@0
|
699 |
* code points, then m supplementary code points. Either n or m
|
sl@0
|
700 |
* or both may be zero. n+2*m is always <= 0x7FFF.
|
sl@0
|
701 |
*
|
sl@0
|
702 |
* If there are no supplementary characters (if m==0) then the
|
sl@0
|
703 |
* header is one 16-bit integer, 'length', with value n.
|
sl@0
|
704 |
*
|
sl@0
|
705 |
* If there are supplementary characters (if m!=0) then the header
|
sl@0
|
706 |
* is two 16-bit integers. The first, 'length', has value
|
sl@0
|
707 |
* (n+2*m)|0x8000. The second, 'bmpLength', has value n.
|
sl@0
|
708 |
*
|
sl@0
|
709 |
* After the header the code points are stored in ascending order.
|
sl@0
|
710 |
* Supplementary code points are stored as most significant 16
|
sl@0
|
711 |
* bits followed by least significant 16 bits.
|
sl@0
|
712 |
*
|
sl@0
|
713 |
* @param set the set
|
sl@0
|
714 |
* @param dest pointer to buffer of destCapacity 16-bit integers.
|
sl@0
|
715 |
* May be NULL only if destCapacity is zero.
|
sl@0
|
716 |
* @param destCapacity size of dest, or zero. Must not be negative.
|
sl@0
|
717 |
* @param pErrorCode pointer to the error code. Will be set to
|
sl@0
|
718 |
* U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to
|
sl@0
|
719 |
* U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
|
sl@0
|
720 |
* @return the total length of the serialized format, including
|
sl@0
|
721 |
* the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
|
sl@0
|
722 |
* than U_BUFFER_OVERFLOW_ERROR.
|
sl@0
|
723 |
* @stable ICU 2.4
|
sl@0
|
724 |
*/
|
sl@0
|
725 |
U_STABLE int32_t U_EXPORT2
|
sl@0
|
726 |
uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
|
sl@0
|
727 |
|
sl@0
|
728 |
/**
|
sl@0
|
729 |
* Given a serialized array, fill in the given serialized set object.
|
sl@0
|
730 |
* @param fillSet pointer to result
|
sl@0
|
731 |
* @param src pointer to start of array
|
sl@0
|
732 |
* @param srcLength length of array
|
sl@0
|
733 |
* @return true if the given array is valid, otherwise false
|
sl@0
|
734 |
* @stable ICU 2.4
|
sl@0
|
735 |
*/
|
sl@0
|
736 |
U_STABLE UBool U_EXPORT2
|
sl@0
|
737 |
uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
|
sl@0
|
738 |
|
sl@0
|
739 |
/**
|
sl@0
|
740 |
* Set the USerializedSet to contain the given character (and nothing
|
sl@0
|
741 |
* else).
|
sl@0
|
742 |
* @param fillSet pointer to result
|
sl@0
|
743 |
* @param c The codepoint to set
|
sl@0
|
744 |
* @stable ICU 2.4
|
sl@0
|
745 |
*/
|
sl@0
|
746 |
U_STABLE void U_EXPORT2
|
sl@0
|
747 |
uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
|
sl@0
|
748 |
|
sl@0
|
749 |
/**
|
sl@0
|
750 |
* Returns TRUE if the given USerializedSet contains the given
|
sl@0
|
751 |
* character.
|
sl@0
|
752 |
* @param set the serialized set
|
sl@0
|
753 |
* @param c The codepoint to check for within the set
|
sl@0
|
754 |
* @return true if set contains c
|
sl@0
|
755 |
* @stable ICU 2.4
|
sl@0
|
756 |
*/
|
sl@0
|
757 |
U_STABLE UBool U_EXPORT2
|
sl@0
|
758 |
uset_serializedContains(const USerializedSet* set, UChar32 c);
|
sl@0
|
759 |
|
sl@0
|
760 |
/**
|
sl@0
|
761 |
* Returns the number of disjoint ranges of characters contained in
|
sl@0
|
762 |
* the given serialized set. Ignores any strings contained in the
|
sl@0
|
763 |
* set.
|
sl@0
|
764 |
* @param set the serialized set
|
sl@0
|
765 |
* @return a non-negative integer counting the character ranges
|
sl@0
|
766 |
* contained in set
|
sl@0
|
767 |
* @stable ICU 2.4
|
sl@0
|
768 |
*/
|
sl@0
|
769 |
U_STABLE int32_t U_EXPORT2
|
sl@0
|
770 |
uset_getSerializedRangeCount(const USerializedSet* set);
|
sl@0
|
771 |
|
sl@0
|
772 |
/**
|
sl@0
|
773 |
* Returns a range of characters contained in the given serialized
|
sl@0
|
774 |
* set.
|
sl@0
|
775 |
* @param set the serialized set
|
sl@0
|
776 |
* @param rangeIndex a non-negative integer in the range 0..
|
sl@0
|
777 |
* uset_getSerializedRangeCount(set)-1
|
sl@0
|
778 |
* @param pStart pointer to variable to receive first character
|
sl@0
|
779 |
* in range, inclusive
|
sl@0
|
780 |
* @param pEnd pointer to variable to receive last character in range,
|
sl@0
|
781 |
* inclusive
|
sl@0
|
782 |
* @return true if rangeIndex is valid, otherwise false
|
sl@0
|
783 |
* @stable ICU 2.4
|
sl@0
|
784 |
*/
|
sl@0
|
785 |
U_STABLE UBool U_EXPORT2
|
sl@0
|
786 |
uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
|
sl@0
|
787 |
UChar32* pStart, UChar32* pEnd);
|
sl@0
|
788 |
|
sl@0
|
789 |
#endif
|