sl@0
|
1 |
/*
|
sl@0
|
2 |
*******************************************************************************
|
sl@0
|
3 |
*
|
sl@0
|
4 |
* Copyright (C) 2002-2005, International Business Machines
|
sl@0
|
5 |
* Corporation and others. All Rights Reserved.
|
sl@0
|
6 |
*
|
sl@0
|
7 |
*******************************************************************************
|
sl@0
|
8 |
* file name: uprops.h
|
sl@0
|
9 |
* encoding: US-ASCII
|
sl@0
|
10 |
* tab size: 8 (not used)
|
sl@0
|
11 |
* indentation:4
|
sl@0
|
12 |
*
|
sl@0
|
13 |
* created on: 2002feb24
|
sl@0
|
14 |
* created by: Markus W. Scherer
|
sl@0
|
15 |
*
|
sl@0
|
16 |
* Constants for mostly non-core Unicode character properties
|
sl@0
|
17 |
* stored in uprops.icu.
|
sl@0
|
18 |
*/
|
sl@0
|
19 |
|
sl@0
|
20 |
#ifndef __UPROPS_H__
|
sl@0
|
21 |
#define __UPROPS_H__
|
sl@0
|
22 |
|
sl@0
|
23 |
#include "unicode/utypes.h"
|
sl@0
|
24 |
#include "unicode/uset.h"
|
sl@0
|
25 |
#include "uset_imp.h"
|
sl@0
|
26 |
#include "udataswp.h"
|
sl@0
|
27 |
|
sl@0
|
28 |
/* indexes[] entries */
|
sl@0
|
29 |
enum {
|
sl@0
|
30 |
UPROPS_PROPS32_INDEX,
|
sl@0
|
31 |
UPROPS_EXCEPTIONS_INDEX,
|
sl@0
|
32 |
UPROPS_EXCEPTIONS_TOP_INDEX,
|
sl@0
|
33 |
|
sl@0
|
34 |
UPROPS_ADDITIONAL_TRIE_INDEX,
|
sl@0
|
35 |
UPROPS_ADDITIONAL_VECTORS_INDEX,
|
sl@0
|
36 |
UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX,
|
sl@0
|
37 |
|
sl@0
|
38 |
UPROPS_RESERVED_INDEX, /* 6 */
|
sl@0
|
39 |
|
sl@0
|
40 |
/* maximum values for code values in vector word 0 */
|
sl@0
|
41 |
UPROPS_MAX_VALUES_INDEX=10,
|
sl@0
|
42 |
/* maximum values for code values in vector word 2 */
|
sl@0
|
43 |
UPROPS_MAX_VALUES_2_INDEX,
|
sl@0
|
44 |
|
sl@0
|
45 |
UPROPS_INDEX_COUNT=16
|
sl@0
|
46 |
};
|
sl@0
|
47 |
|
sl@0
|
48 |
/* definitions for the main properties words */
|
sl@0
|
49 |
enum {
|
sl@0
|
50 |
/* general category shift==0 0 (5 bits) */
|
sl@0
|
51 |
UPROPS_NUMERIC_TYPE_SHIFT=5, /* 5 (3 bits) */
|
sl@0
|
52 |
UPROPS_NUMERIC_VALUE_SHIFT=8 /* 8 (8 bits) */
|
sl@0
|
53 |
};
|
sl@0
|
54 |
|
sl@0
|
55 |
#define GET_CATEGORY(props) ((props)&0x1f)
|
sl@0
|
56 |
#define CAT_MASK(props) U_MASK(GET_CATEGORY(props))
|
sl@0
|
57 |
|
sl@0
|
58 |
#define GET_NUMERIC_TYPE(props) (((props)>>UPROPS_NUMERIC_TYPE_SHIFT)&7)
|
sl@0
|
59 |
#define GET_NUMERIC_VALUE(props) (((props)>>UPROPS_NUMERIC_VALUE_SHIFT)&0xff)
|
sl@0
|
60 |
|
sl@0
|
61 |
/* internal numeric pseudo-types for special encodings of numeric values */
|
sl@0
|
62 |
enum {
|
sl@0
|
63 |
UPROPS_NT_FRACTION=4, /* ==U_NT_COUNT, must not change unless binary format version changes */
|
sl@0
|
64 |
UPROPS_NT_LARGE,
|
sl@0
|
65 |
UPROPS_NT_COUNT
|
sl@0
|
66 |
};
|
sl@0
|
67 |
|
sl@0
|
68 |
/* encoding of fractional and large numbers */
|
sl@0
|
69 |
enum {
|
sl@0
|
70 |
UPROPS_MAX_SMALL_NUMBER=0xff,
|
sl@0
|
71 |
|
sl@0
|
72 |
UPROPS_FRACTION_NUM_SHIFT=3, /* numerator: bits 7..3 */
|
sl@0
|
73 |
UPROPS_FRACTION_DEN_MASK=7, /* denominator: bits 2..0 */
|
sl@0
|
74 |
|
sl@0
|
75 |
UPROPS_FRACTION_MAX_NUM=31,
|
sl@0
|
76 |
UPROPS_FRACTION_DEN_OFFSET=2, /* denominator values are 2..9 */
|
sl@0
|
77 |
|
sl@0
|
78 |
UPROPS_FRACTION_MIN_DEN=UPROPS_FRACTION_DEN_OFFSET,
|
sl@0
|
79 |
UPROPS_FRACTION_MAX_DEN=UPROPS_FRACTION_MIN_DEN+UPROPS_FRACTION_DEN_MASK,
|
sl@0
|
80 |
|
sl@0
|
81 |
UPROPS_LARGE_MANT_SHIFT=4, /* mantissa: bits 7..4 */
|
sl@0
|
82 |
UPROPS_LARGE_EXP_MASK=0xf, /* exponent: bits 3..0 */
|
sl@0
|
83 |
UPROPS_LARGE_EXP_OFFSET=2, /* regular exponents 2..17 */
|
sl@0
|
84 |
UPROPS_LARGE_EXP_OFFSET_EXTRA=18, /* extra large exponents 18..33 */
|
sl@0
|
85 |
|
sl@0
|
86 |
UPROPS_LARGE_MIN_EXP=UPROPS_LARGE_EXP_OFFSET,
|
sl@0
|
87 |
UPROPS_LARGE_MAX_EXP=UPROPS_LARGE_MIN_EXP+UPROPS_LARGE_EXP_MASK,
|
sl@0
|
88 |
UPROPS_LARGE_MAX_EXP_EXTRA=UPROPS_LARGE_EXP_OFFSET_EXTRA+UPROPS_LARGE_EXP_MASK
|
sl@0
|
89 |
};
|
sl@0
|
90 |
|
sl@0
|
91 |
/* number of properties vector words */
|
sl@0
|
92 |
#define UPROPS_VECTOR_WORDS 3
|
sl@0
|
93 |
|
sl@0
|
94 |
/*
|
sl@0
|
95 |
* Properties in vector word 0
|
sl@0
|
96 |
* Bits
|
sl@0
|
97 |
* 31..24 DerivedAge version major/minor one nibble each
|
sl@0
|
98 |
* 23..18 Line Break
|
sl@0
|
99 |
* 17..15 East Asian Width
|
sl@0
|
100 |
* 14.. 7 UBlockCode
|
sl@0
|
101 |
* 6.. 0 UScriptCode
|
sl@0
|
102 |
*/
|
sl@0
|
103 |
|
sl@0
|
104 |
/* derived age: one nibble each for major and minor version numbers */
|
sl@0
|
105 |
#define UPROPS_AGE_MASK 0xff000000
|
sl@0
|
106 |
#define UPROPS_AGE_SHIFT 24
|
sl@0
|
107 |
|
sl@0
|
108 |
#define UPROPS_LB_MASK 0x00FC0000
|
sl@0
|
109 |
#define UPROPS_LB_SHIFT 18
|
sl@0
|
110 |
|
sl@0
|
111 |
#define UPROPS_EA_MASK 0x00038000
|
sl@0
|
112 |
#define UPROPS_EA_SHIFT 15
|
sl@0
|
113 |
|
sl@0
|
114 |
#define UPROPS_BLOCK_MASK 0x00007f80
|
sl@0
|
115 |
#define UPROPS_BLOCK_SHIFT 7
|
sl@0
|
116 |
|
sl@0
|
117 |
#define UPROPS_SCRIPT_MASK 0x0000007f
|
sl@0
|
118 |
|
sl@0
|
119 |
/*
|
sl@0
|
120 |
* Properties in vector word 1
|
sl@0
|
121 |
* Each bit encodes one binary property.
|
sl@0
|
122 |
* The following constants represent the bit number, use 1<<UPROPS_XYZ.
|
sl@0
|
123 |
* UPROPS_BINARY_1_TOP<=32!
|
sl@0
|
124 |
*
|
sl@0
|
125 |
* Keep this list of property enums in sync with
|
sl@0
|
126 |
* propListNames[] in icu/source/tools/genprops/props2.c!
|
sl@0
|
127 |
*
|
sl@0
|
128 |
* ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
|
sl@0
|
129 |
*/
|
sl@0
|
130 |
enum {
|
sl@0
|
131 |
UPROPS_WHITE_SPACE,
|
sl@0
|
132 |
UPROPS_WAS_BIDI_CONTROL, /* reserved, was used in format version 3 */
|
sl@0
|
133 |
UPROPS_WAS_JOIN_CONTROL,
|
sl@0
|
134 |
UPROPS_DASH,
|
sl@0
|
135 |
UPROPS_HYPHEN,
|
sl@0
|
136 |
UPROPS_QUOTATION_MARK,
|
sl@0
|
137 |
UPROPS_TERMINAL_PUNCTUATION,
|
sl@0
|
138 |
UPROPS_MATH,
|
sl@0
|
139 |
UPROPS_HEX_DIGIT,
|
sl@0
|
140 |
UPROPS_ASCII_HEX_DIGIT,
|
sl@0
|
141 |
UPROPS_ALPHABETIC,
|
sl@0
|
142 |
UPROPS_IDEOGRAPHIC,
|
sl@0
|
143 |
UPROPS_DIACRITIC,
|
sl@0
|
144 |
UPROPS_EXTENDER,
|
sl@0
|
145 |
UPROPS_WAS_LOWERCASE, /* reserved, was used in format version 3 */
|
sl@0
|
146 |
UPROPS_WAS_UPPERCASE,
|
sl@0
|
147 |
UPROPS_NONCHARACTER_CODE_POINT,
|
sl@0
|
148 |
UPROPS_GRAPHEME_EXTEND,
|
sl@0
|
149 |
UPROPS_GRAPHEME_LINK,
|
sl@0
|
150 |
UPROPS_IDS_BINARY_OPERATOR,
|
sl@0
|
151 |
UPROPS_IDS_TRINARY_OPERATOR,
|
sl@0
|
152 |
UPROPS_RADICAL,
|
sl@0
|
153 |
UPROPS_UNIFIED_IDEOGRAPH,
|
sl@0
|
154 |
UPROPS_DEFAULT_IGNORABLE_CODE_POINT,
|
sl@0
|
155 |
UPROPS_DEPRECATED,
|
sl@0
|
156 |
UPROPS_WAS_SOFT_DOTTED, /* reserved, was used in format version 3 */
|
sl@0
|
157 |
UPROPS_LOGICAL_ORDER_EXCEPTION,
|
sl@0
|
158 |
UPROPS_XID_START,
|
sl@0
|
159 |
UPROPS_XID_CONTINUE,
|
sl@0
|
160 |
UPROPS_ID_START, /* ICU 2.6, uprops format version 3.2 */
|
sl@0
|
161 |
UPROPS_ID_CONTINUE,
|
sl@0
|
162 |
UPROPS_GRAPHEME_BASE,
|
sl@0
|
163 |
UPROPS_BINARY_1_TOP /* ==32 - full! */
|
sl@0
|
164 |
};
|
sl@0
|
165 |
|
sl@0
|
166 |
/*
|
sl@0
|
167 |
* Properties in vector word 2
|
sl@0
|
168 |
* Bits
|
sl@0
|
169 |
* 31..24 More binary properties
|
sl@0
|
170 |
* 23..19 reserved
|
sl@0
|
171 |
* 18..14 Sentence Break
|
sl@0
|
172 |
* 13..10 Word Break
|
sl@0
|
173 |
* 9.. 5 Grapheme Cluster Break
|
sl@0
|
174 |
* 4.. 0 Decomposition Type
|
sl@0
|
175 |
*/
|
sl@0
|
176 |
#define UPROPS_SB_MASK 0x0007c000
|
sl@0
|
177 |
#define UPROPS_SB_SHIFT 14
|
sl@0
|
178 |
|
sl@0
|
179 |
#define UPROPS_WB_MASK 0x00003c00
|
sl@0
|
180 |
#define UPROPS_WB_SHIFT 10
|
sl@0
|
181 |
|
sl@0
|
182 |
#define UPROPS_GCB_MASK 0x000003e0
|
sl@0
|
183 |
#define UPROPS_GCB_SHIFT 5
|
sl@0
|
184 |
|
sl@0
|
185 |
#define UPROPS_DT_MASK 0x0000001f
|
sl@0
|
186 |
|
sl@0
|
187 |
enum {
|
sl@0
|
188 |
UPROPS_V2_S_TERM=24, /* new in ICU 3.0 and Unicode 4.0.1 */
|
sl@0
|
189 |
UPROPS_V2_VARIATION_SELECTOR,
|
sl@0
|
190 |
UPROPS_V2_PATTERN_SYNTAX, /* new in ICU 3.4 and Unicode 4.1 */
|
sl@0
|
191 |
UPROPS_V2_PATTERN_WHITE_SPACE,
|
sl@0
|
192 |
UPROPS_V2_TOP /* must be <=32 */
|
sl@0
|
193 |
};
|
sl@0
|
194 |
|
sl@0
|
195 |
/**
|
sl@0
|
196 |
* Get a properties vector word for a code point.
|
sl@0
|
197 |
* Implemented in uchar.c for uprops.c.
|
sl@0
|
198 |
* column==-1 gets the 32-bit main properties word instead.
|
sl@0
|
199 |
* @return 0 if no data or illegal argument
|
sl@0
|
200 |
*/
|
sl@0
|
201 |
U_CFUNC uint32_t
|
sl@0
|
202 |
u_getUnicodeProperties(UChar32 c, int32_t column);
|
sl@0
|
203 |
|
sl@0
|
204 |
/**
|
sl@0
|
205 |
* Get the the maximum values for some enum/int properties.
|
sl@0
|
206 |
* Use the same column numbers as for u_getUnicodeProperties().
|
sl@0
|
207 |
* The returned value will contain maximum values stored in the same bit fields
|
sl@0
|
208 |
* as where the enum values are stored in the u_getUnicodeProperties()
|
sl@0
|
209 |
* return values for the same columns.
|
sl@0
|
210 |
*
|
sl@0
|
211 |
* Valid columns are those for properties words that contain enumerated values.
|
sl@0
|
212 |
* (ICU 2.6: columns 0 and 2)
|
sl@0
|
213 |
* For other column numbers, this function will return 0.
|
sl@0
|
214 |
*
|
sl@0
|
215 |
* @internal
|
sl@0
|
216 |
*/
|
sl@0
|
217 |
U_CFUNC int32_t
|
sl@0
|
218 |
uprv_getMaxValues(int32_t column);
|
sl@0
|
219 |
|
sl@0
|
220 |
/**
|
sl@0
|
221 |
* Get the Hangul Syllable Type for c.
|
sl@0
|
222 |
* @internal
|
sl@0
|
223 |
*/
|
sl@0
|
224 |
U_CFUNC UHangulSyllableType
|
sl@0
|
225 |
uchar_getHST(UChar32 c);
|
sl@0
|
226 |
|
sl@0
|
227 |
/**
|
sl@0
|
228 |
* Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
|
sl@0
|
229 |
* @internal
|
sl@0
|
230 |
*/
|
sl@0
|
231 |
U_CFUNC UBool
|
sl@0
|
232 |
u_isalnumPOSIX(UChar32 c);
|
sl@0
|
233 |
|
sl@0
|
234 |
/**
|
sl@0
|
235 |
* Checks if c is in
|
sl@0
|
236 |
* [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
|
sl@0
|
237 |
* with space=\p{Whitespace} and Control=Cc.
|
sl@0
|
238 |
* Implements UCHAR_POSIX_GRAPH.
|
sl@0
|
239 |
* @internal
|
sl@0
|
240 |
*/
|
sl@0
|
241 |
U_CFUNC UBool
|
sl@0
|
242 |
u_isgraphPOSIX(UChar32 c);
|
sl@0
|
243 |
|
sl@0
|
244 |
/**
|
sl@0
|
245 |
* Checks if c is in \p{graph}\p{blank} - \p{cntrl}.
|
sl@0
|
246 |
* Implements UCHAR_POSIX_PRINT.
|
sl@0
|
247 |
* @internal
|
sl@0
|
248 |
*/
|
sl@0
|
249 |
U_CFUNC UBool
|
sl@0
|
250 |
u_isprintPOSIX(UChar32 c);
|
sl@0
|
251 |
|
sl@0
|
252 |
/** Turn a bit index into a bit flag. @internal */
|
sl@0
|
253 |
#define FLAG(n) ((uint32_t)1<<(n))
|
sl@0
|
254 |
|
sl@0
|
255 |
/** Flags for general categories in the order of UCharCategory. @internal */
|
sl@0
|
256 |
#define _Cn FLAG(U_GENERAL_OTHER_TYPES)
|
sl@0
|
257 |
#define _Lu FLAG(U_UPPERCASE_LETTER)
|
sl@0
|
258 |
#define _Ll FLAG(U_LOWERCASE_LETTER)
|
sl@0
|
259 |
#define _Lt FLAG(U_TITLECASE_LETTER)
|
sl@0
|
260 |
#define _Lm FLAG(U_MODIFIER_LETTER)
|
sl@0
|
261 |
#define _Lo FLAG(U_OTHER_LETTER)
|
sl@0
|
262 |
#define _Mn FLAG(U_NON_SPACING_MARK)
|
sl@0
|
263 |
#define _Me FLAG(U_ENCLOSING_MARK)
|
sl@0
|
264 |
#define _Mc FLAG(U_COMBINING_SPACING_MARK)
|
sl@0
|
265 |
#define _Nd FLAG(U_DECIMAL_DIGIT_NUMBER)
|
sl@0
|
266 |
#define _Nl FLAG(U_LETTER_NUMBER)
|
sl@0
|
267 |
#define _No FLAG(U_OTHER_NUMBER)
|
sl@0
|
268 |
#define _Zs FLAG(U_SPACE_SEPARATOR)
|
sl@0
|
269 |
#define _Zl FLAG(U_LINE_SEPARATOR)
|
sl@0
|
270 |
#define _Zp FLAG(U_PARAGRAPH_SEPARATOR)
|
sl@0
|
271 |
#define _Cc FLAG(U_CONTROL_CHAR)
|
sl@0
|
272 |
#define _Cf FLAG(U_FORMAT_CHAR)
|
sl@0
|
273 |
#define _Co FLAG(U_PRIVATE_USE_CHAR)
|
sl@0
|
274 |
#define _Cs FLAG(U_SURROGATE)
|
sl@0
|
275 |
#define _Pd FLAG(U_DASH_PUNCTUATION)
|
sl@0
|
276 |
#define _Ps FLAG(U_START_PUNCTUATION)
|
sl@0
|
277 |
#define _Pe FLAG(U_END_PUNCTUATION)
|
sl@0
|
278 |
#define _Pc FLAG(U_CONNECTOR_PUNCTUATION)
|
sl@0
|
279 |
#define _Po FLAG(U_OTHER_PUNCTUATION)
|
sl@0
|
280 |
#define _Sm FLAG(U_MATH_SYMBOL)
|
sl@0
|
281 |
#define _Sc FLAG(U_CURRENCY_SYMBOL)
|
sl@0
|
282 |
#define _Sk FLAG(U_MODIFIER_SYMBOL)
|
sl@0
|
283 |
#define _So FLAG(U_OTHER_SYMBOL)
|
sl@0
|
284 |
#define _Pi FLAG(U_INITIAL_PUNCTUATION)
|
sl@0
|
285 |
#define _Pf FLAG(U_FINAL_PUNCTUATION)
|
sl@0
|
286 |
|
sl@0
|
287 |
/** Some code points. @internal */
|
sl@0
|
288 |
enum {
|
sl@0
|
289 |
TAB =0x0009,
|
sl@0
|
290 |
LF =0x000a,
|
sl@0
|
291 |
FF =0x000c,
|
sl@0
|
292 |
CR =0x000d,
|
sl@0
|
293 |
U_A =0x0041,
|
sl@0
|
294 |
U_F =0x0046,
|
sl@0
|
295 |
U_Z =0x005a,
|
sl@0
|
296 |
U_a =0x0061,
|
sl@0
|
297 |
U_f =0x0066,
|
sl@0
|
298 |
U_z =0x007a,
|
sl@0
|
299 |
DEL =0x007f,
|
sl@0
|
300 |
NL =0x0085,
|
sl@0
|
301 |
NBSP =0x00a0,
|
sl@0
|
302 |
CGJ =0x034f,
|
sl@0
|
303 |
FIGURESP=0x2007,
|
sl@0
|
304 |
HAIRSP =0x200a,
|
sl@0
|
305 |
ZWNJ =0x200c,
|
sl@0
|
306 |
ZWJ =0x200d,
|
sl@0
|
307 |
RLM =0x200f,
|
sl@0
|
308 |
NNBSP =0x202f,
|
sl@0
|
309 |
WJ =0x2060,
|
sl@0
|
310 |
INHSWAP =0x206a,
|
sl@0
|
311 |
NOMDIG =0x206f,
|
sl@0
|
312 |
U_FW_A =0xff21,
|
sl@0
|
313 |
U_FW_F =0xff26,
|
sl@0
|
314 |
U_FW_Z =0xff3a,
|
sl@0
|
315 |
U_FW_a =0xff41,
|
sl@0
|
316 |
U_FW_f =0xff46,
|
sl@0
|
317 |
U_FW_z =0xff5a,
|
sl@0
|
318 |
ZWNBSP =0xfeff
|
sl@0
|
319 |
};
|
sl@0
|
320 |
|
sl@0
|
321 |
/**
|
sl@0
|
322 |
* Get the maximum length of a (regular/1.0/extended) character name.
|
sl@0
|
323 |
* @return 0 if no character names available.
|
sl@0
|
324 |
*/
|
sl@0
|
325 |
U_CAPI int32_t U_EXPORT2
|
sl@0
|
326 |
uprv_getMaxCharNameLength(void);
|
sl@0
|
327 |
|
sl@0
|
328 |
#if 0
|
sl@0
|
329 |
/*
|
sl@0
|
330 |
Currently not used but left for future use. Probably by UnicodeSet.
|
sl@0
|
331 |
urename.h and unames.c changed accordingly.
|
sl@0
|
332 |
*/
|
sl@0
|
333 |
/**
|
sl@0
|
334 |
* Get the maximum length of an ISO comment.
|
sl@0
|
335 |
* @return 0 if no ISO comments available.
|
sl@0
|
336 |
*/
|
sl@0
|
337 |
U_CAPI int32_t U_EXPORT2
|
sl@0
|
338 |
uprv_getMaxISOCommentLength();
|
sl@0
|
339 |
#endif
|
sl@0
|
340 |
|
sl@0
|
341 |
/**
|
sl@0
|
342 |
* Fills set with characters that are used in Unicode character names.
|
sl@0
|
343 |
* Includes all characters that are used in regular/Unicode 1.0/extended names.
|
sl@0
|
344 |
* Just empties the set if no character names are available.
|
sl@0
|
345 |
* @param sa USetAdder to receive characters.
|
sl@0
|
346 |
*/
|
sl@0
|
347 |
U_CAPI void U_EXPORT2
|
sl@0
|
348 |
uprv_getCharNameCharacters(const USetAdder *sa);
|
sl@0
|
349 |
|
sl@0
|
350 |
#if 0
|
sl@0
|
351 |
/*
|
sl@0
|
352 |
Currently not used but left for future use. Probably by UnicodeSet.
|
sl@0
|
353 |
urename.h and unames.c changed accordingly.
|
sl@0
|
354 |
*/
|
sl@0
|
355 |
/**
|
sl@0
|
356 |
* Fills set with characters that are used in Unicode character names.
|
sl@0
|
357 |
* Just empties the set if no ISO comments are available.
|
sl@0
|
358 |
* @param sa USetAdder to receive characters.
|
sl@0
|
359 |
*/
|
sl@0
|
360 |
U_CAPI void U_EXPORT2
|
sl@0
|
361 |
uprv_getISOCommentCharacters(const USetAdder *sa);
|
sl@0
|
362 |
*/
|
sl@0
|
363 |
#endif
|
sl@0
|
364 |
|
sl@0
|
365 |
/**
|
sl@0
|
366 |
* Constants for which data and implementation files provide which properties.
|
sl@0
|
367 |
* Used by UnicodeSet for service-specific property enumeration.
|
sl@0
|
368 |
* @internal
|
sl@0
|
369 |
*/
|
sl@0
|
370 |
enum UPropertySource {
|
sl@0
|
371 |
/** No source, not a supported property. */
|
sl@0
|
372 |
UPROPS_SRC_NONE,
|
sl@0
|
373 |
/** From uchar.c/uprops.icu main trie */
|
sl@0
|
374 |
UPROPS_SRC_CHAR,
|
sl@0
|
375 |
/** From uchar.c/uprops.icu properties vectors trie */
|
sl@0
|
376 |
UPROPS_SRC_PROPSVEC,
|
sl@0
|
377 |
/** Hangul_Syllable_Type, from uchar.c/uprops.icu */
|
sl@0
|
378 |
UPROPS_SRC_HST,
|
sl@0
|
379 |
/** From unames.c/unames.icu */
|
sl@0
|
380 |
UPROPS_SRC_NAMES,
|
sl@0
|
381 |
/** From unorm.cpp/unorm.icu */
|
sl@0
|
382 |
UPROPS_SRC_NORM,
|
sl@0
|
383 |
/** From ucase.c/ucase.icu */
|
sl@0
|
384 |
UPROPS_SRC_CASE,
|
sl@0
|
385 |
/** From ubidi_props.c/ubidi.icu */
|
sl@0
|
386 |
UPROPS_SRC_BIDI,
|
sl@0
|
387 |
/** From uchar.c/uprops.icu main trie as well as properties vectors trie */
|
sl@0
|
388 |
UPROPS_SRC_CHAR_AND_PROPSVEC,
|
sl@0
|
389 |
/** One more than the highest UPropertySource (UPROPS_SRC_) constant. */
|
sl@0
|
390 |
UPROPS_SRC_COUNT
|
sl@0
|
391 |
};
|
sl@0
|
392 |
typedef enum UPropertySource UPropertySource;
|
sl@0
|
393 |
|
sl@0
|
394 |
/**
|
sl@0
|
395 |
* @see UPropertySource
|
sl@0
|
396 |
* @internal
|
sl@0
|
397 |
*/
|
sl@0
|
398 |
U_CAPI UPropertySource U_EXPORT2
|
sl@0
|
399 |
uprops_getSource(UProperty which);
|
sl@0
|
400 |
|
sl@0
|
401 |
/**
|
sl@0
|
402 |
* Enumerate uprops.icu's main data trie and add the
|
sl@0
|
403 |
* start of each range of same properties to the set.
|
sl@0
|
404 |
* @internal
|
sl@0
|
405 |
*/
|
sl@0
|
406 |
U_CAPI void U_EXPORT2
|
sl@0
|
407 |
uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
|
sl@0
|
408 |
|
sl@0
|
409 |
/**
|
sl@0
|
410 |
* Enumerate uprops.icu's properties vectors trie and add the
|
sl@0
|
411 |
* start of each range of same properties to the set.
|
sl@0
|
412 |
* @internal
|
sl@0
|
413 |
*/
|
sl@0
|
414 |
U_CAPI void U_EXPORT2
|
sl@0
|
415 |
upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
|
sl@0
|
416 |
|
sl@0
|
417 |
/**
|
sl@0
|
418 |
* Same as uchar_addPropertyStarts() but only for Hangul_Syllable_Type.
|
sl@0
|
419 |
* @internal
|
sl@0
|
420 |
*/
|
sl@0
|
421 |
U_CAPI void U_EXPORT2
|
sl@0
|
422 |
uhst_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
|
sl@0
|
423 |
|
sl@0
|
424 |
/**
|
sl@0
|
425 |
* Return a set of characters for property enumeration.
|
sl@0
|
426 |
* For each two consecutive characters (start, limit) in the set,
|
sl@0
|
427 |
* all of the properties for start..limit-1 are all the same.
|
sl@0
|
428 |
*
|
sl@0
|
429 |
* @param sa USetAdder to receive result. Existing contents are lost.
|
sl@0
|
430 |
* @internal
|
sl@0
|
431 |
*/
|
sl@0
|
432 |
U_CAPI void U_EXPORT2
|
sl@0
|
433 |
uprv_getInclusions(const USetAdder *sa, UErrorCode *pErrorCode);
|
sl@0
|
434 |
|
sl@0
|
435 |
/**
|
sl@0
|
436 |
* Swap the ICU Unicode properties file. See uchar.c.
|
sl@0
|
437 |
* @internal
|
sl@0
|
438 |
*/
|
sl@0
|
439 |
U_CAPI int32_t U_EXPORT2
|
sl@0
|
440 |
uprops_swap(const UDataSwapper *ds,
|
sl@0
|
441 |
const void *inData, int32_t length, void *outData,
|
sl@0
|
442 |
UErrorCode *pErrorCode);
|
sl@0
|
443 |
|
sl@0
|
444 |
/**
|
sl@0
|
445 |
* Swap the ICU Unicode character names file. See uchar.c.
|
sl@0
|
446 |
* @internal
|
sl@0
|
447 |
*/
|
sl@0
|
448 |
U_CAPI int32_t U_EXPORT2
|
sl@0
|
449 |
uchar_swapNames(const UDataSwapper *ds,
|
sl@0
|
450 |
const void *inData, int32_t length, void *outData,
|
sl@0
|
451 |
UErrorCode *pErrorCode);
|
sl@0
|
452 |
|
sl@0
|
453 |
#endif
|