os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/uset.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
*******************************************************************************
sl@0
     3
*
sl@0
     4
*   Copyright (C) 2002-2005, International Business Machines
sl@0
     5
*   Corporation and others.  All Rights Reserved.
sl@0
     6
*
sl@0
     7
*******************************************************************************
sl@0
     8
*   file name:  uset.h
sl@0
     9
*   encoding:   US-ASCII
sl@0
    10
*   tab size:   8 (not used)
sl@0
    11
*   indentation:4
sl@0
    12
*
sl@0
    13
*   created on: 2002mar07
sl@0
    14
*   created by: Markus W. Scherer
sl@0
    15
*
sl@0
    16
*   C version of UnicodeSet.
sl@0
    17
*/
sl@0
    18
sl@0
    19
sl@0
    20
/**
sl@0
    21
 * \file
sl@0
    22
 * \brief C API: Unicode Set
sl@0
    23
 *
sl@0
    24
 * <p>This is a C wrapper around the C++ UnicodeSet class.</p>
sl@0
    25
 */
sl@0
    26
sl@0
    27
#ifndef __USET_H__
sl@0
    28
#define __USET_H__
sl@0
    29
sl@0
    30
#include "unicode/utypes.h"
sl@0
    31
#include "unicode/uchar.h"
sl@0
    32
sl@0
    33
#ifndef UCNV_H
sl@0
    34
struct USet;
sl@0
    35
/**
sl@0
    36
 * A UnicodeSet.  Use the uset_* API to manipulate.  Create with
sl@0
    37
 * uset_open*, and destroy with uset_close.
sl@0
    38
 * @stable ICU 2.4
sl@0
    39
 */
sl@0
    40
typedef struct USet USet;
sl@0
    41
#endif
sl@0
    42
sl@0
    43
/**
sl@0
    44
 * Bitmask values to be passed to uset_openPatternOptions() or
sl@0
    45
 * uset_applyPattern() taking an option parameter.
sl@0
    46
 * @stable ICU 2.4
sl@0
    47
 */
sl@0
    48
enum {
sl@0
    49
    /**
sl@0
    50
     * Ignore white space within patterns unless quoted or escaped.
sl@0
    51
     * @stable ICU 2.4
sl@0
    52
     */
sl@0
    53
    USET_IGNORE_SPACE = 1,  
sl@0
    54
sl@0
    55
    /**
sl@0
    56
     * Enable case insensitive matching.  E.g., "[ab]" with this flag
sl@0
    57
     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
sl@0
    58
     * match all except 'a', 'A', 'b', and 'B'. This performs a full
sl@0
    59
     * closure over case mappings, e.g. U+017F for s.
sl@0
    60
     *
sl@0
    61
     * The resulting set is a superset of the input for the code points but
sl@0
    62
     * not for the strings.
sl@0
    63
     * It performs a case mapping closure of the code points and adds
sl@0
    64
     * full case folding strings for the code points, and reduces strings of
sl@0
    65
     * the original set to their full case folding equivalents.
sl@0
    66
     *
sl@0
    67
     * This is designed for case-insensitive matches, for example
sl@0
    68
     * in regular expressions. The full code point case closure allows checking of
sl@0
    69
     * an input character directly against the closure set.
sl@0
    70
     * Strings are matched by comparing the case-folded form from the closure
sl@0
    71
     * set with an incremental case folding of the string in question.
sl@0
    72
     *
sl@0
    73
     * The closure set will also contain single code points if the original
sl@0
    74
     * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
sl@0
    75
     * This is not necessary (that is, redundant) for the above matching method
sl@0
    76
     * but results in the same closure sets regardless of whether the original
sl@0
    77
     * set contained the code point or a string.
sl@0
    78
     *
sl@0
    79
     * @stable ICU 2.4
sl@0
    80
     */
sl@0
    81
    USET_CASE_INSENSITIVE = 2,  
sl@0
    82
sl@0
    83
    /**
sl@0
    84
     * Bitmask for UnicodeSet::closeOver() indicating letter case.
sl@0
    85
     * This may be ORed together with other selectors.
sl@0
    86
     * @internal
sl@0
    87
     */
sl@0
    88
    USET_CASE = 2,
sl@0
    89
sl@0
    90
    /**
sl@0
    91
     * Enable case insensitive matching.  E.g., "[ab]" with this flag
sl@0
    92
     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
sl@0
    93
     * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
sl@0
    94
     * title-, and uppercase mappings as well as the case folding
sl@0
    95
     * of each existing element in the set.
sl@0
    96
     * @draft ICU 3.2
sl@0
    97
     */
sl@0
    98
    USET_ADD_CASE_MAPPINGS = 4,
sl@0
    99
sl@0
   100
    /**
sl@0
   101
     * Enough for any single-code point set
sl@0
   102
     * @internal
sl@0
   103
     */
sl@0
   104
    USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
sl@0
   105
};
sl@0
   106
sl@0
   107
/**
sl@0
   108
 * A serialized form of a Unicode set.  Limited manipulations are
sl@0
   109
 * possible directly on a serialized set.  See below.
sl@0
   110
 * @stable ICU 2.4
sl@0
   111
 */
sl@0
   112
typedef struct USerializedSet {
sl@0
   113
    /**
sl@0
   114
     * The serialized Unicode Set.
sl@0
   115
     * @stable ICU 2.4
sl@0
   116
     */
sl@0
   117
    const uint16_t *array;
sl@0
   118
    /**
sl@0
   119
     * The length of the array that contains BMP characters.
sl@0
   120
     * @stable ICU 2.4
sl@0
   121
     */
sl@0
   122
    int32_t bmpLength;
sl@0
   123
    /**
sl@0
   124
     * The total length of the array.
sl@0
   125
     * @stable ICU 2.4
sl@0
   126
     */
sl@0
   127
    int32_t length;
sl@0
   128
    /**
sl@0
   129
     * A small buffer for the array to reduce memory allocations.
sl@0
   130
     * @stable ICU 2.4
sl@0
   131
     */
sl@0
   132
    uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
sl@0
   133
} USerializedSet;
sl@0
   134
sl@0
   135
/*********************************************************************
sl@0
   136
 * USet API
sl@0
   137
 *********************************************************************/
sl@0
   138
sl@0
   139
/**
sl@0
   140
 * Creates a USet object that contains the range of characters
sl@0
   141
 * start..end, inclusive.
sl@0
   142
 * @param start first character of the range, inclusive
sl@0
   143
 * @param end last character of the range, inclusive
sl@0
   144
 * @return a newly created USet.  The caller must call uset_close() on
sl@0
   145
 * it when done.
sl@0
   146
 * @stable ICU 2.4
sl@0
   147
 */
sl@0
   148
U_STABLE USet* U_EXPORT2
sl@0
   149
uset_open(UChar32 start, UChar32 end);
sl@0
   150
sl@0
   151
/**
sl@0
   152
 * Creates a set from the given pattern.  See the UnicodeSet class
sl@0
   153
 * description for the syntax of the pattern language.
sl@0
   154
 * @param pattern a string specifying what characters are in the set
sl@0
   155
 * @param patternLength the length of the pattern, or -1 if null
sl@0
   156
 * terminated
sl@0
   157
 * @param ec the error code
sl@0
   158
 * @stable ICU 2.4
sl@0
   159
 */
sl@0
   160
U_STABLE USet* U_EXPORT2
sl@0
   161
uset_openPattern(const UChar* pattern, int32_t patternLength,
sl@0
   162
                 UErrorCode* ec);
sl@0
   163
sl@0
   164
/**
sl@0
   165
 * Creates a set from the given pattern.  See the UnicodeSet class
sl@0
   166
 * description for the syntax of the pattern language.
sl@0
   167
 * @param pattern a string specifying what characters are in the set
sl@0
   168
 * @param patternLength the length of the pattern, or -1 if null
sl@0
   169
 * terminated
sl@0
   170
 * @param options bitmask for options to apply to the pattern.
sl@0
   171
 * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
sl@0
   172
 * @param ec the error code
sl@0
   173
 * @stable ICU 2.4
sl@0
   174
 */
sl@0
   175
U_STABLE USet* U_EXPORT2
sl@0
   176
uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
sl@0
   177
                 uint32_t options,
sl@0
   178
                 UErrorCode* ec);
sl@0
   179
sl@0
   180
/**
sl@0
   181
 * Disposes of the storage used by a USet object.  This function should
sl@0
   182
 * be called exactly once for objects returned by uset_open().
sl@0
   183
 * @param set the object to dispose of
sl@0
   184
 * @stable ICU 2.4
sl@0
   185
 */
sl@0
   186
U_STABLE void U_EXPORT2
sl@0
   187
uset_close(USet* set);
sl@0
   188
sl@0
   189
/**
sl@0
   190
 * Causes the USet object to represent the range <code>start - end</code>.
sl@0
   191
 * If <code>start > end</code> then this USet is set to an empty range.
sl@0
   192
 * @param set the object to set to the given range
sl@0
   193
 * @param start first character in the set, inclusive
sl@0
   194
 * @param end last character in the set, inclusive
sl@0
   195
 * @draft ICU 3.2
sl@0
   196
 */
sl@0
   197
U_DRAFT void U_EXPORT2
sl@0
   198
uset_set(USet* set,
sl@0
   199
         UChar32 start, UChar32 end);
sl@0
   200
sl@0
   201
/**
sl@0
   202
 * Modifies the set to represent the set specified by the given
sl@0
   203
 * pattern. See the UnicodeSet class description for the syntax of 
sl@0
   204
 * the pattern language. See also the User Guide chapter about UnicodeSet.
sl@0
   205
 * <em>Empties the set passed before applying the pattern.</em>
sl@0
   206
 * @param set               The set to which the pattern is to be applied. 
sl@0
   207
 * @param pattern           A pointer to UChar string specifying what characters are in the set.
sl@0
   208
 *                          The character at pattern[0] must be a '['.
sl@0
   209
 * @param patternLength     The length of the UChar string. -1 if NUL terminated.
sl@0
   210
 * @param options           A bitmask for options to apply to the pattern.
sl@0
   211
 *                          Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
sl@0
   212
 * @param status            Returns an error if the pattern cannot be parsed.
sl@0
   213
 * @return                  Upon successful parse, the value is either
sl@0
   214
 *                          the index of the character after the closing ']' 
sl@0
   215
 *                          of the parsed pattern.
sl@0
   216
 *                          If the status code indicates failure, then the return value 
sl@0
   217
 *                          is the index of the error in the source.
sl@0
   218
 *                                  
sl@0
   219
 * @stable ICU 2.8
sl@0
   220
 */
sl@0
   221
U_STABLE int32_t U_EXPORT2 
sl@0
   222
uset_applyPattern(USet *set,
sl@0
   223
                  const UChar *pattern, int32_t patternLength,
sl@0
   224
                  uint32_t options,
sl@0
   225
                  UErrorCode *status);
sl@0
   226
sl@0
   227
/**
sl@0
   228
 * Modifies the set to contain those code points which have the given value
sl@0
   229
 * for the given binary or enumerated property, as returned by
sl@0
   230
 * u_getIntPropertyValue.  Prior contents of this set are lost.
sl@0
   231
 *
sl@0
   232
 * @param set the object to contain the code points defined by the property
sl@0
   233
 *
sl@0
   234
 * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
sl@0
   235
 * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
sl@0
   236
 * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
sl@0
   237
 *
sl@0
   238
 * @param value a value in the range u_getIntPropertyMinValue(prop)..
sl@0
   239
 * u_getIntPropertyMaxValue(prop), with one exception.  If prop is
sl@0
   240
 * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
sl@0
   241
 * rather a mask value produced by U_GET_GC_MASK().  This allows grouped
sl@0
   242
 * categories such as [:L:] to be represented.
sl@0
   243
 *
sl@0
   244
 * @param ec error code input/output parameter
sl@0
   245
 *
sl@0
   246
 * @draft ICU 3.2
sl@0
   247
 */
sl@0
   248
U_DRAFT void U_EXPORT2
sl@0
   249
uset_applyIntPropertyValue(USet* set,
sl@0
   250
                           UProperty prop, int32_t value, UErrorCode* ec);
sl@0
   251
sl@0
   252
/**
sl@0
   253
 * Modifies the set to contain those code points which have the
sl@0
   254
 * given value for the given property.  Prior contents of this
sl@0
   255
 * set are lost.
sl@0
   256
 *
sl@0
   257
 * @param set the object to contain the code points defined by the given
sl@0
   258
 * property and value alias
sl@0
   259
 *
sl@0
   260
 * @param prop a string specifying a property alias, either short or long.
sl@0
   261
 * The name is matched loosely.  See PropertyAliases.txt for names and a
sl@0
   262
 * description of loose matching.  If the value string is empty, then this
sl@0
   263
 * string is interpreted as either a General_Category value alias, a Script
sl@0
   264
 * value alias, a binary property alias, or a special ID.  Special IDs are
sl@0
   265
 * matched loosely and correspond to the following sets:
sl@0
   266
 *
sl@0
   267
 * "ANY" = [\\u0000-\\U0010FFFF],
sl@0
   268
 * "ASCII" = [\\u0000-\\u007F],
sl@0
   269
 * "Assigned" = [:^Cn:].
sl@0
   270
 *
sl@0
   271
 * @param propLength the length of the prop, or -1 if NULL
sl@0
   272
 *
sl@0
   273
 * @param value a string specifying a value alias, either short or long.
sl@0
   274
 * The name is matched loosely.  See PropertyValueAliases.txt for names
sl@0
   275
 * and a description of loose matching.  In addition to aliases listed,
sl@0
   276
 * numeric values and canonical combining classes may be expressed
sl@0
   277
 * numerically, e.g., ("nv", "0.5") or ("ccc", "220").  The value string
sl@0
   278
 * may also be empty.
sl@0
   279
 *
sl@0
   280
 * @param valueLength the length of the value, or -1 if NULL
sl@0
   281
 *
sl@0
   282
 * @param ec error code input/output parameter
sl@0
   283
 *
sl@0
   284
 * @draft ICU 3.2
sl@0
   285
 */
sl@0
   286
U_DRAFT void U_EXPORT2
sl@0
   287
uset_applyPropertyAlias(USet* set,
sl@0
   288
                        const UChar *prop, int32_t propLength,
sl@0
   289
                        const UChar *value, int32_t valueLength,
sl@0
   290
                        UErrorCode* ec);
sl@0
   291
sl@0
   292
/**
sl@0
   293
 * Return true if the given position, in the given pattern, appears
sl@0
   294
 * to be the start of a UnicodeSet pattern.
sl@0
   295
 *
sl@0
   296
 * @param pattern a string specifying the pattern
sl@0
   297
 * @param patternLength the length of the pattern, or -1 if NULL
sl@0
   298
 * @param pos the given position
sl@0
   299
 * @draft ICU 3.2
sl@0
   300
 */
sl@0
   301
U_DRAFT UBool U_EXPORT2
sl@0
   302
uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
sl@0
   303
                      int32_t pos);
sl@0
   304
sl@0
   305
/**
sl@0
   306
 * Returns a string representation of this set.  If the result of
sl@0
   307
 * calling this function is passed to a uset_openPattern(), it
sl@0
   308
 * will produce another set that is equal to this one.
sl@0
   309
 * @param set the set
sl@0
   310
 * @param result the string to receive the rules, may be NULL
sl@0
   311
 * @param resultCapacity the capacity of result, may be 0 if result is NULL
sl@0
   312
 * @param escapeUnprintable if TRUE then convert unprintable
sl@0
   313
 * character to their hex escape representations, \\uxxxx or
sl@0
   314
 * \\Uxxxxxxxx.  Unprintable characters are those other than
sl@0
   315
 * U+000A, U+0020..U+007E.
sl@0
   316
 * @param ec error code.
sl@0
   317
 * @return length of string, possibly larger than resultCapacity
sl@0
   318
 * @stable ICU 2.4
sl@0
   319
 */
sl@0
   320
U_STABLE int32_t U_EXPORT2
sl@0
   321
uset_toPattern(const USet* set,
sl@0
   322
               UChar* result, int32_t resultCapacity,
sl@0
   323
               UBool escapeUnprintable,
sl@0
   324
               UErrorCode* ec);
sl@0
   325
sl@0
   326
/**
sl@0
   327
 * Adds the given character to the given USet.  After this call,
sl@0
   328
 * uset_contains(set, c) will return TRUE.
sl@0
   329
 * @param set the object to which to add the character
sl@0
   330
 * @param c the character to add
sl@0
   331
 * @stable ICU 2.4
sl@0
   332
 */
sl@0
   333
U_STABLE void U_EXPORT2
sl@0
   334
uset_add(USet* set, UChar32 c);
sl@0
   335
sl@0
   336
/**
sl@0
   337
 * Adds all of the elements in the specified set to this set if
sl@0
   338
 * they're not already present.  This operation effectively
sl@0
   339
 * modifies this set so that its value is the <i>union</i> of the two
sl@0
   340
 * sets.  The behavior of this operation is unspecified if the specified
sl@0
   341
 * collection is modified while the operation is in progress.
sl@0
   342
 *
sl@0
   343
 * @param set the object to which to add the set
sl@0
   344
 * @param additionalSet the source set whose elements are to be added to this set.
sl@0
   345
 * @stable ICU 2.6
sl@0
   346
 */
sl@0
   347
U_STABLE void U_EXPORT2
sl@0
   348
uset_addAll(USet* set, const USet *additionalSet);
sl@0
   349
sl@0
   350
/**
sl@0
   351
 * Adds the given range of characters to the given USet.  After this call,
sl@0
   352
 * uset_contains(set, start, end) will return TRUE.
sl@0
   353
 * @param set the object to which to add the character
sl@0
   354
 * @param start the first character of the range to add, inclusive
sl@0
   355
 * @param end the last character of the range to add, inclusive
sl@0
   356
 * @stable ICU 2.2
sl@0
   357
 */
sl@0
   358
U_STABLE void U_EXPORT2
sl@0
   359
uset_addRange(USet* set, UChar32 start, UChar32 end);
sl@0
   360
sl@0
   361
/**
sl@0
   362
 * Adds the given string to the given USet.  After this call,
sl@0
   363
 * uset_containsString(set, str, strLen) will return TRUE.
sl@0
   364
 * @param set the object to which to add the character
sl@0
   365
 * @param str the string to add
sl@0
   366
 * @param strLen the length of the string or -1 if null terminated.
sl@0
   367
 * @stable ICU 2.4
sl@0
   368
 */
sl@0
   369
U_STABLE void U_EXPORT2
sl@0
   370
uset_addString(USet* set, const UChar* str, int32_t strLen);
sl@0
   371
sl@0
   372
/**
sl@0
   373
 * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
sl@0
   374
 * If this set already any particular character, it has no effect on that character.
sl@0
   375
 * @param set the object to which to add the character
sl@0
   376
 * @param str the source string
sl@0
   377
 * @param strLen the length of the string or -1 if null terminated.
sl@0
   378
 * @draft ICU 3.4
sl@0
   379
 */
sl@0
   380
U_DRAFT void U_EXPORT2
sl@0
   381
uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
sl@0
   382
sl@0
   383
/**
sl@0
   384
 * Removes the given character from the given USet.  After this call,
sl@0
   385
 * uset_contains(set, c) will return FALSE.
sl@0
   386
 * @param set the object from which to remove the character
sl@0
   387
 * @param c the character to remove
sl@0
   388
 * @stable ICU 2.4
sl@0
   389
 */
sl@0
   390
U_STABLE void U_EXPORT2
sl@0
   391
uset_remove(USet* set, UChar32 c);
sl@0
   392
sl@0
   393
/**
sl@0
   394
 * Removes the given range of characters from the given USet.  After this call,
sl@0
   395
 * uset_contains(set, start, end) will return FALSE.
sl@0
   396
 * @param set the object to which to add the character
sl@0
   397
 * @param start the first character of the range to remove, inclusive
sl@0
   398
 * @param end the last character of the range to remove, inclusive
sl@0
   399
 * @stable ICU 2.2
sl@0
   400
 */
sl@0
   401
U_STABLE void U_EXPORT2
sl@0
   402
uset_removeRange(USet* set, UChar32 start, UChar32 end);
sl@0
   403
sl@0
   404
/**
sl@0
   405
 * Removes the given string to the given USet.  After this call,
sl@0
   406
 * uset_containsString(set, str, strLen) will return FALSE.
sl@0
   407
 * @param set the object to which to add the character
sl@0
   408
 * @param str the string to remove
sl@0
   409
 * @param strLen the length of the string or -1 if null terminated.
sl@0
   410
 * @stable ICU 2.4
sl@0
   411
 */
sl@0
   412
U_STABLE void U_EXPORT2
sl@0
   413
uset_removeString(USet* set, const UChar* str, int32_t strLen);
sl@0
   414
sl@0
   415
/**
sl@0
   416
 * Removes from this set all of its elements that are contained in the
sl@0
   417
 * specified set.  This operation effectively modifies this
sl@0
   418
 * set so that its value is the <i>asymmetric set difference</i> of
sl@0
   419
 * the two sets.
sl@0
   420
 * @param set the object from which the elements are to be removed
sl@0
   421
 * @param removeSet the object that defines which elements will be
sl@0
   422
 * removed from this set
sl@0
   423
 * @draft ICU 3.2
sl@0
   424
 */
sl@0
   425
U_DRAFT void U_EXPORT2
sl@0
   426
uset_removeAll(USet* set, const USet* removeSet);
sl@0
   427
sl@0
   428
/**
sl@0
   429
 * Retain only the elements in this set that are contained in the
sl@0
   430
 * specified range.  If <code>start > end</code> then an empty range is
sl@0
   431
 * retained, leaving the set empty.  This is equivalent to
sl@0
   432
 * a boolean logic AND, or a set INTERSECTION.
sl@0
   433
 *
sl@0
   434
 * @param set the object for which to retain only the specified range
sl@0
   435
 * @param start first character, inclusive, of range to be retained
sl@0
   436
 * to this set.
sl@0
   437
 * @param end last character, inclusive, of range to be retained
sl@0
   438
 * to this set.
sl@0
   439
 * @draft ICU 3.2
sl@0
   440
 */
sl@0
   441
U_DRAFT void U_EXPORT2
sl@0
   442
uset_retain(USet* set, UChar32 start, UChar32 end);
sl@0
   443
sl@0
   444
/**
sl@0
   445
 * Retains only the elements in this set that are contained in the
sl@0
   446
 * specified set.  In other words, removes from this set all of
sl@0
   447
 * its elements that are not contained in the specified set.  This
sl@0
   448
 * operation effectively modifies this set so that its value is
sl@0
   449
 * the <i>intersection</i> of the two sets.
sl@0
   450
 *
sl@0
   451
 * @param set the object on which to perform the retain
sl@0
   452
 * @param retain set that defines which elements this set will retain
sl@0
   453
 * @draft ICU 3.2
sl@0
   454
 */
sl@0
   455
U_DRAFT void U_EXPORT2
sl@0
   456
uset_retainAll(USet* set, const USet* retain);
sl@0
   457
sl@0
   458
/**
sl@0
   459
 * Reallocate this objects internal structures to take up the least
sl@0
   460
 * possible space, without changing this object's value.
sl@0
   461
 *
sl@0
   462
 * @param set the object on which to perfrom the compact
sl@0
   463
 * @draft ICU 3.2
sl@0
   464
 */
sl@0
   465
U_DRAFT void U_EXPORT2
sl@0
   466
uset_compact(USet* set);
sl@0
   467
sl@0
   468
/**
sl@0
   469
 * Inverts this set.  This operation modifies this set so that
sl@0
   470
 * its value is its complement.  This operation does not affect
sl@0
   471
 * the multicharacter strings, if any.
sl@0
   472
 * @param set the set
sl@0
   473
 * @stable ICU 2.4
sl@0
   474
 */
sl@0
   475
U_STABLE void U_EXPORT2
sl@0
   476
uset_complement(USet* set);
sl@0
   477
sl@0
   478
/**
sl@0
   479
 * Complements in this set all elements contained in the specified
sl@0
   480
 * set.  Any character in the other set will be removed if it is
sl@0
   481
 * in this set, or will be added if it is not in this set.
sl@0
   482
 *
sl@0
   483
 * @param set the set with which to complement
sl@0
   484
 * @param complement set that defines which elements will be xor'ed
sl@0
   485
 * from this set.
sl@0
   486
 * @draft ICU 3.2
sl@0
   487
 */
sl@0
   488
U_DRAFT void U_EXPORT2
sl@0
   489
uset_complementAll(USet* set, const USet* complement);
sl@0
   490
sl@0
   491
/**
sl@0
   492
 * Removes all of the elements from this set.  This set will be
sl@0
   493
 * empty after this call returns.
sl@0
   494
 * @param set the set
sl@0
   495
 * @stable ICU 2.4
sl@0
   496
 */
sl@0
   497
U_STABLE void U_EXPORT2
sl@0
   498
uset_clear(USet* set);
sl@0
   499
sl@0
   500
/**
sl@0
   501
 * Returns TRUE if the given USet contains no characters and no
sl@0
   502
 * strings.
sl@0
   503
 * @param set the set
sl@0
   504
 * @return true if set is empty
sl@0
   505
 * @stable ICU 2.4
sl@0
   506
 */
sl@0
   507
U_STABLE UBool U_EXPORT2
sl@0
   508
uset_isEmpty(const USet* set);
sl@0
   509
sl@0
   510
/**
sl@0
   511
 * Returns TRUE if the given USet contains the given character.
sl@0
   512
 * @param set the set
sl@0
   513
 * @param c The codepoint to check for within the set
sl@0
   514
 * @return true if set contains c
sl@0
   515
 * @stable ICU 2.4
sl@0
   516
 */
sl@0
   517
U_STABLE UBool U_EXPORT2
sl@0
   518
uset_contains(const USet* set, UChar32 c);
sl@0
   519
sl@0
   520
/**
sl@0
   521
 * Returns TRUE if the given USet contains all characters c
sl@0
   522
 * where start <= c && c <= end.
sl@0
   523
 * @param set the set
sl@0
   524
 * @param start the first character of the range to test, inclusive
sl@0
   525
 * @param end the last character of the range to test, inclusive
sl@0
   526
 * @return TRUE if set contains the range
sl@0
   527
 * @stable ICU 2.2
sl@0
   528
 */
sl@0
   529
U_STABLE UBool U_EXPORT2
sl@0
   530
uset_containsRange(const USet* set, UChar32 start, UChar32 end);
sl@0
   531
sl@0
   532
/**
sl@0
   533
 * Returns TRUE if the given USet contains the given string.
sl@0
   534
 * @param set the set
sl@0
   535
 * @param str the string
sl@0
   536
 * @param strLen the length of the string or -1 if null terminated.
sl@0
   537
 * @return true if set contains str
sl@0
   538
 * @stable ICU 2.4
sl@0
   539
 */
sl@0
   540
U_STABLE UBool U_EXPORT2
sl@0
   541
uset_containsString(const USet* set, const UChar* str, int32_t strLen);
sl@0
   542
sl@0
   543
/**
sl@0
   544
 * Returns the index of the given character within this set, where
sl@0
   545
 * the set is ordered by ascending code point.  If the character
sl@0
   546
 * is not in this set, return -1.  The inverse of this method is
sl@0
   547
 * <code>charAt()</code>.
sl@0
   548
 * @param set the set
sl@0
   549
 * @param c the character to obtain the index for
sl@0
   550
 * @return an index from 0..size()-1, or -1
sl@0
   551
 * @draft ICU 3.2
sl@0
   552
 */
sl@0
   553
U_DRAFT int32_t U_EXPORT2
sl@0
   554
uset_indexOf(const USet* set, UChar32 c);
sl@0
   555
sl@0
   556
/**
sl@0
   557
 * Returns the character at the given index within this set, where
sl@0
   558
 * the set is ordered by ascending code point.  If the index is
sl@0
   559
 * out of range, return (UChar32)-1.  The inverse of this method is
sl@0
   560
 * <code>indexOf()</code>.
sl@0
   561
 * @param set the set
sl@0
   562
 * @param index an index from 0..size()-1 to obtain the char for
sl@0
   563
 * @return the character at the given index, or (UChar32)-1.
sl@0
   564
 * @draft ICU 3.2
sl@0
   565
 */
sl@0
   566
U_DRAFT UChar32 U_EXPORT2
sl@0
   567
uset_charAt(const USet* set, int32_t index);
sl@0
   568
sl@0
   569
/**
sl@0
   570
 * Returns the number of characters and strings contained in the given
sl@0
   571
 * USet.
sl@0
   572
 * @param set the set
sl@0
   573
 * @return a non-negative integer counting the characters and strings
sl@0
   574
 * contained in set
sl@0
   575
 * @stable ICU 2.4
sl@0
   576
 */
sl@0
   577
U_STABLE int32_t U_EXPORT2
sl@0
   578
uset_size(const USet* set);
sl@0
   579
sl@0
   580
/**
sl@0
   581
 * Returns the number of items in this set.  An item is either a range
sl@0
   582
 * of characters or a single multicharacter string.
sl@0
   583
 * @param set the set
sl@0
   584
 * @return a non-negative integer counting the character ranges
sl@0
   585
 * and/or strings contained in set
sl@0
   586
 * @stable ICU 2.4
sl@0
   587
 */
sl@0
   588
U_STABLE int32_t U_EXPORT2
sl@0
   589
uset_getItemCount(const USet* set);
sl@0
   590
sl@0
   591
/**
sl@0
   592
 * Returns an item of this set.  An item is either a range of
sl@0
   593
 * characters or a single multicharacter string.
sl@0
   594
 * @param set the set
sl@0
   595
 * @param itemIndex a non-negative integer in the range 0..
sl@0
   596
 * uset_getItemCount(set)-1
sl@0
   597
 * @param start pointer to variable to receive first character
sl@0
   598
 * in range, inclusive
sl@0
   599
 * @param end pointer to variable to receive last character in range,
sl@0
   600
 * inclusive
sl@0
   601
 * @param str buffer to receive the string, may be NULL
sl@0
   602
 * @param strCapacity capacity of str, or 0 if str is NULL
sl@0
   603
 * @param ec error code
sl@0
   604
 * @return the length of the string (>= 2), or 0 if the item is a
sl@0
   605
 * range, in which case it is the range *start..*end, or -1 if
sl@0
   606
 * itemIndex is out of range
sl@0
   607
 * @stable ICU 2.4
sl@0
   608
 */
sl@0
   609
U_STABLE int32_t U_EXPORT2
sl@0
   610
uset_getItem(const USet* set, int32_t itemIndex,
sl@0
   611
             UChar32* start, UChar32* end,
sl@0
   612
             UChar* str, int32_t strCapacity,
sl@0
   613
             UErrorCode* ec);
sl@0
   614
sl@0
   615
/**
sl@0
   616
 * Returns true if set1 contains all the characters and strings
sl@0
   617
 * of set2. It answers the question, 'Is set1 a subset of set2?'
sl@0
   618
 * @param set1 set to be checked for containment
sl@0
   619
 * @param set2 set to be checked for containment
sl@0
   620
 * @return true if the test condition is met
sl@0
   621
 * @draft ICU 3.2
sl@0
   622
 */
sl@0
   623
U_DRAFT UBool U_EXPORT2
sl@0
   624
uset_containsAll(const USet* set1, const USet* set2);
sl@0
   625
sl@0
   626
/**
sl@0
   627
 * Returns true if this set contains all the characters
sl@0
   628
 * of the given string. This is does not check containment of grapheme
sl@0
   629
 * clusters, like uset_containsString.
sl@0
   630
 * @param set set of characters to be checked for containment
sl@0
   631
 * @param str string containing codepoints to be checked for containment
sl@0
   632
 * @param strLen the length of the string or -1 if null terminated.
sl@0
   633
 * @return true if the test condition is met
sl@0
   634
 * @draft ICU 3.4
sl@0
   635
 */
sl@0
   636
U_DRAFT UBool U_EXPORT2
sl@0
   637
uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
sl@0
   638
sl@0
   639
/**
sl@0
   640
 * Returns true if set1 contains none of the characters and strings
sl@0
   641
 * of set2. It answers the question, 'Is set1 a disjoint set of set2?'
sl@0
   642
 * @param set1 set to be checked for containment
sl@0
   643
 * @param set2 set to be checked for containment
sl@0
   644
 * @return true if the test condition is met
sl@0
   645
 * @draft ICU 3.2
sl@0
   646
 */
sl@0
   647
U_DRAFT UBool U_EXPORT2
sl@0
   648
uset_containsNone(const USet* set1, const USet* set2);
sl@0
   649
sl@0
   650
/**
sl@0
   651
 * Returns true if set1 contains some of the characters and strings
sl@0
   652
 * of set2. It answers the question, 'Does set1 and set2 have an intersection?'
sl@0
   653
 * @param set1 set to be checked for containment
sl@0
   654
 * @param set2 set to be checked for containment
sl@0
   655
 * @return true if the test condition is met
sl@0
   656
 * @draft ICU 3.2
sl@0
   657
 */
sl@0
   658
U_DRAFT UBool U_EXPORT2
sl@0
   659
uset_containsSome(const USet* set1, const USet* set2);
sl@0
   660
sl@0
   661
/**
sl@0
   662
 * Returns true if set1 contains all of the characters and strings
sl@0
   663
 * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
sl@0
   664
 * @param set1 set to be checked for containment
sl@0
   665
 * @param set2 set to be checked for containment
sl@0
   666
 * @return true if the test condition is met
sl@0
   667
 * @draft ICU 3.2
sl@0
   668
 */
sl@0
   669
U_DRAFT UBool U_EXPORT2
sl@0
   670
uset_equals(const USet* set1, const USet* set2);
sl@0
   671
sl@0
   672
/*********************************************************************
sl@0
   673
 * Serialized set API
sl@0
   674
 *********************************************************************/
sl@0
   675
sl@0
   676
/**
sl@0
   677
 * Serializes this set into an array of 16-bit integers.  Serialization
sl@0
   678
 * (currently) only records the characters in the set; multicharacter
sl@0
   679
 * strings are ignored.
sl@0
   680
 *
sl@0
   681
 * The array
sl@0
   682
 * has following format (each line is one 16-bit integer):
sl@0
   683
 *
sl@0
   684
 *  length     = (n+2*m) | (m!=0?0x8000:0)
sl@0
   685
 *  bmpLength  = n; present if m!=0
sl@0
   686
 *  bmp[0]
sl@0
   687
 *  bmp[1]
sl@0
   688
 *  ...
sl@0
   689
 *  bmp[n-1]
sl@0
   690
 *  supp-high[0]
sl@0
   691
 *  supp-low[0]
sl@0
   692
 *  supp-high[1]
sl@0
   693
 *  supp-low[1]
sl@0
   694
 *  ...
sl@0
   695
 *  supp-high[m-1]
sl@0
   696
 *  supp-low[m-1]
sl@0
   697
 *
sl@0
   698
 * The array starts with a header.  After the header are n bmp
sl@0
   699
 * code points, then m supplementary code points.  Either n or m
sl@0
   700
 * or both may be zero.  n+2*m is always <= 0x7FFF.
sl@0
   701
 *
sl@0
   702
 * If there are no supplementary characters (if m==0) then the
sl@0
   703
 * header is one 16-bit integer, 'length', with value n.
sl@0
   704
 *
sl@0
   705
 * If there are supplementary characters (if m!=0) then the header
sl@0
   706
 * is two 16-bit integers.  The first, 'length', has value
sl@0
   707
 * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.
sl@0
   708
 *
sl@0
   709
 * After the header the code points are stored in ascending order.
sl@0
   710
 * Supplementary code points are stored as most significant 16
sl@0
   711
 * bits followed by least significant 16 bits.
sl@0
   712
 *
sl@0
   713
 * @param set the set
sl@0
   714
 * @param dest pointer to buffer of destCapacity 16-bit integers.
sl@0
   715
 * May be NULL only if destCapacity is zero.
sl@0
   716
 * @param destCapacity size of dest, or zero.  Must not be negative.
sl@0
   717
 * @param pErrorCode pointer to the error code.  Will be set to
sl@0
   718
 * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF.  Will be set to
sl@0
   719
 * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
sl@0
   720
 * @return the total length of the serialized format, including
sl@0
   721
 * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
sl@0
   722
 * than U_BUFFER_OVERFLOW_ERROR.
sl@0
   723
 * @stable ICU 2.4
sl@0
   724
 */
sl@0
   725
U_STABLE int32_t U_EXPORT2
sl@0
   726
uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
sl@0
   727
sl@0
   728
/**
sl@0
   729
 * Given a serialized array, fill in the given serialized set object.
sl@0
   730
 * @param fillSet pointer to result
sl@0
   731
 * @param src pointer to start of array
sl@0
   732
 * @param srcLength length of array
sl@0
   733
 * @return true if the given array is valid, otherwise false
sl@0
   734
 * @stable ICU 2.4
sl@0
   735
 */
sl@0
   736
U_STABLE UBool U_EXPORT2
sl@0
   737
uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
sl@0
   738
sl@0
   739
/**
sl@0
   740
 * Set the USerializedSet to contain the given character (and nothing
sl@0
   741
 * else).
sl@0
   742
 * @param fillSet pointer to result
sl@0
   743
 * @param c The codepoint to set
sl@0
   744
 * @stable ICU 2.4
sl@0
   745
 */
sl@0
   746
U_STABLE void U_EXPORT2
sl@0
   747
uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
sl@0
   748
sl@0
   749
/**
sl@0
   750
 * Returns TRUE if the given USerializedSet contains the given
sl@0
   751
 * character.
sl@0
   752
 * @param set the serialized set
sl@0
   753
 * @param c The codepoint to check for within the set
sl@0
   754
 * @return true if set contains c
sl@0
   755
 * @stable ICU 2.4
sl@0
   756
 */
sl@0
   757
U_STABLE UBool U_EXPORT2
sl@0
   758
uset_serializedContains(const USerializedSet* set, UChar32 c);
sl@0
   759
sl@0
   760
/**
sl@0
   761
 * Returns the number of disjoint ranges of characters contained in
sl@0
   762
 * the given serialized set.  Ignores any strings contained in the
sl@0
   763
 * set.
sl@0
   764
 * @param set the serialized set
sl@0
   765
 * @return a non-negative integer counting the character ranges
sl@0
   766
 * contained in set
sl@0
   767
 * @stable ICU 2.4
sl@0
   768
 */
sl@0
   769
U_STABLE int32_t U_EXPORT2
sl@0
   770
uset_getSerializedRangeCount(const USerializedSet* set);
sl@0
   771
sl@0
   772
/**
sl@0
   773
 * Returns a range of characters contained in the given serialized
sl@0
   774
 * set.
sl@0
   775
 * @param set the serialized set
sl@0
   776
 * @param rangeIndex a non-negative integer in the range 0..
sl@0
   777
 * uset_getSerializedRangeCount(set)-1
sl@0
   778
 * @param pStart pointer to variable to receive first character
sl@0
   779
 * in range, inclusive
sl@0
   780
 * @param pEnd pointer to variable to receive last character in range,
sl@0
   781
 * inclusive
sl@0
   782
 * @return true if rangeIndex is valid, otherwise false
sl@0
   783
 * @stable ICU 2.4
sl@0
   784
 */
sl@0
   785
U_STABLE UBool U_EXPORT2
sl@0
   786
uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
sl@0
   787
                        UChar32* pStart, UChar32* pEnd);
sl@0
   788
sl@0
   789
#endif