os/textandloc/fontservices/textshaperplugin/IcuSource/common/utrie.h
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/fontservices/textshaperplugin/IcuSource/common/utrie.h	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,741 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2001-2005, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +******************************************************************************
    1.11 +*   file name:  utrie.h
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2001nov08
    1.17 +*   created by: Markus W. Scherer
    1.18 +*/
    1.19 +
    1.20 +#ifndef __UTRIE_H__
    1.21 +#define __UTRIE_H__
    1.22 +
    1.23 +#include "unicode/utypes.h"
    1.24 +#include "udataswp.h"
    1.25 +
    1.26 +U_CDECL_BEGIN
    1.27 +
    1.28 +/**
    1.29 + * \file
    1.30 + *
    1.31 + * This is a common implementation of a "folded" trie.
    1.32 + * It is a kind of compressed, serializable table of 16- or 32-bit values associated with
    1.33 + * Unicode code points (0..0x10ffff).
    1.34 + *
    1.35 + * This implementation is optimized for getting values while walking forward
    1.36 + * through a UTF-16 string.
    1.37 + * Therefore, the simplest and fastest access macros are the
    1.38 + * _FROM_LEAD() and _FROM_OFFSET_TRAIL() macros.
    1.39 + *
    1.40 + * The _FROM_BMP() macros are a little more complicated; they get values
    1.41 + * even for lead surrogate code _points_, while the _FROM_LEAD() macros
    1.42 + * get special "folded" values for lead surrogate code _units_ if
    1.43 + * there is relevant data associated with them.
    1.44 + * From such a folded value, an offset needs to be extracted to supply
    1.45 + * to the _FROM_OFFSET_TRAIL() macros.
    1.46 + *
    1.47 + * Most of the more complex (and more convenient) functions/macros call a callback function
    1.48 + * to get that offset from the folded value for a lead surrogate unit.
    1.49 + */
    1.50 +
    1.51 +/**
    1.52 + * Trie constants, defining shift widths, index array lengths, etc.
    1.53 + */
    1.54 +enum {
    1.55 +    /** Shift size for shifting right the input index. 1..9 */
    1.56 +    UTRIE_SHIFT=5,
    1.57 +
    1.58 +    /** Number of data values in a stage 2 (data array) block. 2, 4, 8, .., 0x200 */
    1.59 +    UTRIE_DATA_BLOCK_LENGTH=1<<UTRIE_SHIFT,
    1.60 +
    1.61 +    /** Mask for getting the lower bits from the input index. */
    1.62 +    UTRIE_MASK=UTRIE_DATA_BLOCK_LENGTH-1,
    1.63 +
    1.64 +    /**
    1.65 +     * Lead surrogate code points' index displacement in the index array.
    1.66 +     * 0x10000-0xd800=0x2800
    1.67 +     */
    1.68 +    UTRIE_LEAD_INDEX_DISP=0x2800>>UTRIE_SHIFT,
    1.69 +
    1.70 +    /**
    1.71 +     * Shift size for shifting left the index array values.
    1.72 +     * Increases possible data size with 16-bit index values at the cost
    1.73 +     * of compactability.
    1.74 +     * This requires blocks of stage 2 data to be aligned by UTRIE_DATA_GRANULARITY.
    1.75 +     * 0..UTRIE_SHIFT
    1.76 +     */
    1.77 +    UTRIE_INDEX_SHIFT=2,
    1.78 +
    1.79 +    /** The alignment size of a stage 2 data block. Also the granularity for compaction. */
    1.80 +    UTRIE_DATA_GRANULARITY=1<<UTRIE_INDEX_SHIFT,
    1.81 +
    1.82 +    /** Number of bits of a trail surrogate that are used in index table lookups. */
    1.83 +    UTRIE_SURROGATE_BLOCK_BITS=10-UTRIE_SHIFT,
    1.84 +
    1.85 +    /**
    1.86 +     * Number of index (stage 1) entries per lead surrogate.
    1.87 +     * Same as number of index entries for 1024 trail surrogates,
    1.88 +     * ==0x400>>UTRIE_SHIFT
    1.89 +     */
    1.90 +    UTRIE_SURROGATE_BLOCK_COUNT=(1<<UTRIE_SURROGATE_BLOCK_BITS),
    1.91 +
    1.92 +    /** Length of the BMP portion of the index (stage 1) array. */
    1.93 +    UTRIE_BMP_INDEX_LENGTH=0x10000>>UTRIE_SHIFT
    1.94 +};
    1.95 +
    1.96 +/**
    1.97 + * Length of the index (stage 1) array before folding.
    1.98 + * Maximum number of Unicode code points (0x110000) shifted right by UTRIE_SHIFT.
    1.99 + */
   1.100 +#define UTRIE_MAX_INDEX_LENGTH (0x110000>>UTRIE_SHIFT)
   1.101 +
   1.102 +/**
   1.103 + * Maximum length of the runtime data (stage 2) array.
   1.104 + * Limited by 16-bit index values that are left-shifted by UTRIE_INDEX_SHIFT.
   1.105 + */
   1.106 +#define UTRIE_MAX_DATA_LENGTH (0x10000<<UTRIE_INDEX_SHIFT)
   1.107 +
   1.108 +/**
   1.109 + * Maximum length of the build-time data (stage 2) array.
   1.110 + * The maximum length is 0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400.
   1.111 + * (Number of Unicode code points + one all-initial-value block +
   1.112 + *  possible duplicate entries for 1024 lead surrogates.)
   1.113 + */
   1.114 +#define UTRIE_MAX_BUILD_TIME_DATA_LENGTH (0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400)
   1.115 +
   1.116 +/**
   1.117 + * Number of bytes for a dummy trie.
   1.118 + * A dummy trie is an empty runtime trie, used when a real data trie cannot
   1.119 + * be loaded.
   1.120 + * The number of bytes works for Latin-1-linear tries with 32-bit data
   1.121 + * (worst case).
   1.122 + *
   1.123 + * Calculation:
   1.124 + *   BMP index + 1 index block for lead surrogate code points +
   1.125 + *   Latin-1-linear array + 1 data block for lead surrogate code points
   1.126 + *
   1.127 + * Latin-1: if(UTRIE_SHIFT<=8) { 256 } else { included in first data block }
   1.128 + *
   1.129 + * @see utrie_unserializeDummy
   1.130 + */
   1.131 +#define UTRIE_DUMMY_SIZE ((UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT)*2+(UTRIE_SHIFT<=8?256:UTRIE_DATA_BLOCK_LENGTH)*4+UTRIE_DATA_BLOCK_LENGTH*4)
   1.132 +
   1.133 +/**
   1.134 + * Runtime UTrie callback function.
   1.135 + * Extract from a lead surrogate's data the
   1.136 + * index array offset of the indexes for that lead surrogate.
   1.137 + *
   1.138 + * @param data data value for a surrogate from the trie, including the folding offset
   1.139 + * @return offset>=UTRIE_BMP_INDEX_LENGTH, or 0 if there is no data for the lead surrogate
   1.140 + */
   1.141 +typedef int32_t U_CALLCONV
   1.142 +UTrieGetFoldingOffset(uint32_t data);
   1.143 +
   1.144 +/**
   1.145 + * Run-time Trie structure.
   1.146 + *
   1.147 + * Either the data table is 16 bits wide and accessed via the index
   1.148 + * pointer, with each index item increased by indexLength;
   1.149 + * in this case, data32==NULL.
   1.150 + *
   1.151 + * Or the data table is 32 bits wide and accessed via the data32 pointer.
   1.152 + */
   1.153 +struct UTrie {
   1.154 +    const uint16_t *index;
   1.155 +    const uint32_t *data32; /* NULL if 16b data is used via index */
   1.156 +
   1.157 +    /**
   1.158 +     * This function is not used in _FROM_LEAD, _FROM_BMP, and _FROM_OFFSET_TRAIL macros.
   1.159 +     * If convenience macros like _GET16 or _NEXT32 are used, this function must be set.
   1.160 +     *
   1.161 +     * utrie_unserialize() sets a default function which simply returns
   1.162 +     * the lead surrogate's value itself - which is the inverse of the default
   1.163 +     * folding function used by utrie_serialize().
   1.164 +     *
   1.165 +     * @see UTrieGetFoldingOffset
   1.166 +     */
   1.167 +    UTrieGetFoldingOffset *getFoldingOffset;
   1.168 +
   1.169 +    int32_t indexLength, dataLength;
   1.170 +    uint32_t initialValue;
   1.171 +    UBool isLatin1Linear;
   1.172 +};
   1.173 +
   1.174 +typedef struct UTrie UTrie;
   1.175 +
   1.176 +/** Internal trie getter from an offset (0 if c16 is a BMP/lead units) and a 16-bit unit */
   1.177 +#define _UTRIE_GET_RAW(trie, data, offset, c16) \
   1.178 +    (trie)->data[ \
   1.179 +        ((int32_t)((trie)->index[(offset)+((c16)>>UTRIE_SHIFT)])<<UTRIE_INDEX_SHIFT)+ \
   1.180 +        ((c16)&UTRIE_MASK) \
   1.181 +    ]
   1.182 +
   1.183 +/** Internal trie getter from a pair of surrogates */
   1.184 +#define _UTRIE_GET_FROM_PAIR(trie, data, c, c2, result, resultType) { \
   1.185 +    int32_t __offset; \
   1.186 +\
   1.187 +    /* get data for lead surrogate */ \
   1.188 +    (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
   1.189 +    __offset=(trie)->getFoldingOffset(result); \
   1.190 +\
   1.191 +    /* get the real data from the folded lead/trail units */ \
   1.192 +    if(__offset>0) { \
   1.193 +        (result)=_UTRIE_GET_RAW((trie), data, __offset, (c2)&0x3ff); \
   1.194 +    } else { \
   1.195 +        (result)=(resultType)((trie)->initialValue); \
   1.196 +    } \
   1.197 +}
   1.198 +
   1.199 +/** Internal trie getter from a BMP code point, treating a lead surrogate as a normal code point */
   1.200 +#define _UTRIE_GET_FROM_BMP(trie, data, c16) \
   1.201 +    _UTRIE_GET_RAW(trie, data, 0xd800<=(c16) && (c16)<=0xdbff ? UTRIE_LEAD_INDEX_DISP : 0, c16);
   1.202 +
   1.203 +/**
   1.204 + * Internal trie getter from a code point.
   1.205 + * Could be faster(?) but longer with
   1.206 + *   if((c32)<=0xd7ff) { (result)=_UTRIE_GET_RAW(trie, data, 0, c32); }
   1.207 + */
   1.208 +#define _UTRIE_GET(trie, data, c32, result, resultType) \
   1.209 +    if((uint32_t)(c32)<=0xffff) { \
   1.210 +        /* BMP code points */ \
   1.211 +        (result)=_UTRIE_GET_FROM_BMP(trie, data, c32); \
   1.212 +    } else if((uint32_t)(c32)<=0x10ffff) { \
   1.213 +        /* supplementary code point */ \
   1.214 +        UChar __lead16=UTF16_LEAD(c32); \
   1.215 +        _UTRIE_GET_FROM_PAIR(trie, data, __lead16, c32, result, resultType); \
   1.216 +    } else { \
   1.217 +        /* out of range */ \
   1.218 +        (result)=(resultType)((trie)->initialValue); \
   1.219 +    }
   1.220 +
   1.221 +/** Internal next-post-increment: get the next code point (c, c2) and its data */
   1.222 +#define _UTRIE_NEXT(trie, data, src, limit, c, c2, result, resultType) { \
   1.223 +    (c)=*(src)++; \
   1.224 +    if(!UTF_IS_LEAD(c)) { \
   1.225 +        (c2)=0; \
   1.226 +        (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
   1.227 +    } else if((src)!=(limit) && UTF_IS_TRAIL((c2)=*(src))) { \
   1.228 +        ++(src); \
   1.229 +        _UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
   1.230 +    } else { \
   1.231 +        /* unpaired lead surrogate code point */ \
   1.232 +        (c2)=0; \
   1.233 +        (result)=_UTRIE_GET_RAW((trie), data, UTRIE_LEAD_INDEX_DISP, (c)); \
   1.234 +    } \
   1.235 +}
   1.236 +
   1.237 +/** Internal previous: get the previous code point (c, c2) and its data */
   1.238 +#define _UTRIE_PREVIOUS(trie, data, start, src, c, c2, result, resultType) { \
   1.239 +    (c)=*--(src); \
   1.240 +    if(!UTF_IS_SURROGATE(c)) { \
   1.241 +        (c2)=0; \
   1.242 +        (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
   1.243 +    } else if(!UTF_IS_SURROGATE_FIRST(c)) { \
   1.244 +        /* trail surrogate */ \
   1.245 +        if((start)!=(src) && UTF_IS_LEAD((c2)=*((src)-1))) { \
   1.246 +            --(src); \
   1.247 +            (result)=(c); (c)=(c2); (c2)=(UChar)(result); /* swap c, c2 */ \
   1.248 +            _UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
   1.249 +        } else { \
   1.250 +            /* unpaired trail surrogate code point */ \
   1.251 +            (c2)=0; \
   1.252 +            (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
   1.253 +        } \
   1.254 +    } else { \
   1.255 +        /* unpaired lead surrogate code point */ \
   1.256 +        (c2)=0; \
   1.257 +        (result)=_UTRIE_GET_RAW((trie), data, UTRIE_LEAD_INDEX_DISP, (c)); \
   1.258 +    } \
   1.259 +}
   1.260 +
   1.261 +/* Public UTrie API ---------------------------------------------------------*/
   1.262 +
   1.263 +/**
   1.264 + * Get a pointer to the contiguous part of the data array
   1.265 + * for the Latin-1 range (U+0000..U+00ff).
   1.266 + * Must be used only if the Latin-1 range is in fact linear
   1.267 + * (trie->isLatin1Linear).
   1.268 + *
   1.269 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.270 + * @return (const uint16_t *) pointer to values for Latin-1 code points
   1.271 + */
   1.272 +#define UTRIE_GET16_LATIN1(trie) ((trie)->index+(trie)->indexLength+UTRIE_DATA_BLOCK_LENGTH)
   1.273 +
   1.274 +/**
   1.275 + * Get a pointer to the contiguous part of the data array
   1.276 + * for the Latin-1 range (U+0000..U+00ff).
   1.277 + * Must be used only if the Latin-1 range is in fact linear
   1.278 + * (trie->isLatin1Linear).
   1.279 + *
   1.280 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.281 + * @return (const uint32_t *) pointer to values for Latin-1 code points
   1.282 + */
   1.283 +#define UTRIE_GET32_LATIN1(trie) ((trie)->data32+UTRIE_DATA_BLOCK_LENGTH)
   1.284 +
   1.285 +/**
   1.286 + * Get a 16-bit trie value from a BMP code point (UChar, <=U+ffff).
   1.287 + * c16 may be a lead surrogate, which may have a value including a folding offset.
   1.288 + *
   1.289 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.290 + * @param c16 (UChar, in) the input BMP code point
   1.291 + * @return (uint16_t) trie lookup result
   1.292 + */
   1.293 +#define UTRIE_GET16_FROM_LEAD(trie, c16) _UTRIE_GET_RAW(trie, index, 0, c16)
   1.294 +
   1.295 +/**
   1.296 + * Get a 32-bit trie value from a BMP code point (UChar, <=U+ffff).
   1.297 + * c16 may be a lead surrogate, which may have a value including a folding offset.
   1.298 + *
   1.299 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.300 + * @param c16 (UChar, in) the input BMP code point
   1.301 + * @return (uint32_t) trie lookup result
   1.302 + */
   1.303 +#define UTRIE_GET32_FROM_LEAD(trie, c16) _UTRIE_GET_RAW(trie, data32, 0, c16)
   1.304 +
   1.305 +/**
   1.306 + * Get a 16-bit trie value from a BMP code point (UChar, <=U+ffff).
   1.307 + * Even lead surrogate code points are treated as normal code points,
   1.308 + * with unfolded values that may differ from _FROM_LEAD() macro results for them.
   1.309 + *
   1.310 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.311 + * @param c16 (UChar, in) the input BMP code point
   1.312 + * @return (uint16_t) trie lookup result
   1.313 + */
   1.314 +#define UTRIE_GET16_FROM_BMP(trie, c16) _UTRIE_GET_FROM_BMP(trie, index, c16)
   1.315 +
   1.316 +/**
   1.317 + * Get a 32-bit trie value from a BMP code point (UChar, <=U+ffff).
   1.318 + * Even lead surrogate code points are treated as normal code points,
   1.319 + * with unfolded values that may differ from _FROM_LEAD() macro results for them.
   1.320 + *
   1.321 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.322 + * @param c16 (UChar, in) the input BMP code point
   1.323 + * @return (uint32_t) trie lookup result
   1.324 + */
   1.325 +#define UTRIE_GET32_FROM_BMP(trie, c16) _UTRIE_GET_FROM_BMP(trie, data32, c16)
   1.326 +
   1.327 +/**
   1.328 + * Get a 16-bit trie value from a code point.
   1.329 + * Even lead surrogate code points are treated as normal code points,
   1.330 + * with unfolded values that may differ from _FROM_LEAD() macro results for them.
   1.331 + *
   1.332 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.333 + * @param c32 (UChar32, in) the input code point
   1.334 + * @param result (uint16_t, out) uint16_t variable for the trie lookup result
   1.335 + */
   1.336 +#define UTRIE_GET16(trie, c32, result) _UTRIE_GET(trie, index, c32, result, uint16_t)
   1.337 +
   1.338 +/**
   1.339 + * Get a 32-bit trie value from a code point.
   1.340 + * Even lead surrogate code points are treated as normal code points,
   1.341 + * with unfolded values that may differ from _FROM_LEAD() macro results for them.
   1.342 + *
   1.343 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.344 + * @param c32 (UChar32, in) the input code point
   1.345 + * @param result (uint32_t, out) uint32_t variable for the trie lookup result
   1.346 + */
   1.347 +#define UTRIE_GET32(trie, c32, result) _UTRIE_GET(trie, data32, c32, result, uint32_t)
   1.348 +
   1.349 +/**
   1.350 + * Get the next code point (c, c2), post-increment src,
   1.351 + * and get a 16-bit value from the trie.
   1.352 + *
   1.353 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.354 + * @param src (const UChar *, in/out) the source text pointer
   1.355 + * @param limit (const UChar *, in) the limit pointer for the text, or NULL
   1.356 + * @param c (UChar, out) variable for the BMP or lead code unit
   1.357 + * @param c2 (UChar, out) variable for 0 or the trail code unit
   1.358 + * @param result (uint16_t, out) uint16_t variable for the trie lookup result
   1.359 + */
   1.360 +#define UTRIE_NEXT16(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, index, src, limit, c, c2, result, uint16_t)
   1.361 +
   1.362 +/**
   1.363 + * Get the next code point (c, c2), post-increment src,
   1.364 + * and get a 32-bit value from the trie.
   1.365 + *
   1.366 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.367 + * @param src (const UChar *, in/out) the source text pointer
   1.368 + * @param limit (const UChar *, in) the limit pointer for the text, or NULL
   1.369 + * @param c (UChar, out) variable for the BMP or lead code unit
   1.370 + * @param c2 (UChar, out) variable for 0 or the trail code unit
   1.371 + * @param result (uint32_t, out) uint32_t variable for the trie lookup result
   1.372 + */
   1.373 +#define UTRIE_NEXT32(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, data32, src, limit, c, c2, result, uint32_t)
   1.374 +
   1.375 +/**
   1.376 + * Get the previous code point (c, c2), pre-decrement src,
   1.377 + * and get a 16-bit value from the trie.
   1.378 + *
   1.379 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.380 + * @param start (const UChar *, in) the start pointer for the text, or NULL
   1.381 + * @param src (const UChar *, in/out) the source text pointer
   1.382 + * @param c (UChar, out) variable for the BMP or lead code unit
   1.383 + * @param c2 (UChar, out) variable for 0 or the trail code unit
   1.384 + * @param result (uint16_t, out) uint16_t variable for the trie lookup result
   1.385 + */
   1.386 +#define UTRIE_PREVIOUS16(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, index, start, src, c, c2, result, uint16_t)
   1.387 +
   1.388 +/**
   1.389 + * Get the previous code point (c, c2), pre-decrement src,
   1.390 + * and get a 32-bit value from the trie.
   1.391 + *
   1.392 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.393 + * @param start (const UChar *, in) the start pointer for the text, or NULL
   1.394 + * @param src (const UChar *, in/out) the source text pointer
   1.395 + * @param c (UChar, out) variable for the BMP or lead code unit
   1.396 + * @param c2 (UChar, out) variable for 0 or the trail code unit
   1.397 + * @param result (uint32_t, out) uint32_t variable for the trie lookup result
   1.398 + */
   1.399 +#define UTRIE_PREVIOUS32(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, data32, start, src, c, c2, result, uint32_t)
   1.400 +
   1.401 +/**
   1.402 + * Get a 16-bit trie value from a pair of surrogates.
   1.403 + *
   1.404 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.405 + * @param c (UChar, in) a lead surrogate
   1.406 + * @param c2 (UChar, in) a trail surrogate
   1.407 + * @param result (uint16_t, out) uint16_t variable for the trie lookup result
   1.408 + */
   1.409 +#define UTRIE_GET16_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, index, c, c2, result, uint16_t)
   1.410 +
   1.411 +/**
   1.412 + * Get a 32-bit trie value from a pair of surrogates.
   1.413 + *
   1.414 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.415 + * @param c (UChar, in) a lead surrogate
   1.416 + * @param c2 (UChar, in) a trail surrogate
   1.417 + * @param result (uint32_t, out) uint32_t variable for the trie lookup result
   1.418 + */
   1.419 +#define UTRIE_GET32_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, data32, c, c2, result, uint32_t)
   1.420 +
   1.421 +/**
   1.422 + * Get a 16-bit trie value from a folding offset (from the value of a lead surrogate)
   1.423 + * and a trail surrogate.
   1.424 + *
   1.425 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.426 + * @param offset (int32_t, in) the folding offset from the value of a lead surrogate
   1.427 + * @param c2 (UChar, in) a trail surrogate (only the 10 low bits are significant)
   1.428 + * @return (uint16_t) trie lookup result
   1.429 + */
   1.430 +#define UTRIE_GET16_FROM_OFFSET_TRAIL(trie, offset, c2) _UTRIE_GET_RAW(trie, index, offset, (c2)&0x3ff)
   1.431 +
   1.432 +/**
   1.433 + * Get a 32-bit trie value from a folding offset (from the value of a lead surrogate)
   1.434 + * and a trail surrogate.
   1.435 + *
   1.436 + * @param trie (const UTrie *, in) a pointer to the runtime trie structure
   1.437 + * @param offset (int32_t, in) the folding offset from the value of a lead surrogate
   1.438 + * @param c2 (UChar, in) a trail surrogate (only the 10 low bits are significant)
   1.439 + * @return (uint32_t) trie lookup result
   1.440 + */
   1.441 +#define UTRIE_GET32_FROM_OFFSET_TRAIL(trie, offset, c2) _UTRIE_GET_RAW(trie, data32, offset, (c2)&0x3ff)
   1.442 +
   1.443 +/* enumeration callback types */
   1.444 +
   1.445 +/**
   1.446 + * Callback from utrie_enum(), extracts a uint32_t value from a
   1.447 + * trie value. This value will be passed on to the UTrieEnumRange function.
   1.448 + *
   1.449 + * @param context an opaque pointer, as passed into utrie_enum()
   1.450 + * @param value a value from the trie
   1.451 + * @return the value that is to be passed on to the UTrieEnumRange function
   1.452 + */
   1.453 +typedef uint32_t U_CALLCONV
   1.454 +UTrieEnumValue(const void *context, uint32_t value);
   1.455 +
   1.456 +/**
   1.457 + * Callback from utrie_enum(), is called for each contiguous range
   1.458 + * of code points with the same value as retrieved from the trie and
   1.459 + * transformed by the UTrieEnumValue function.
   1.460 + *
   1.461 + * The callback function can stop the enumeration by returning FALSE.
   1.462 + *
   1.463 + * @param context an opaque pointer, as passed into utrie_enum()
   1.464 + * @param start the first code point in a contiguous range with value
   1.465 + * @param limit one past the last code point in a contiguous range with value
   1.466 + * @param value the value that is set for all code points in [start..limit[
   1.467 + * @return FALSE to stop the enumeration
   1.468 + */
   1.469 +typedef UBool U_CALLCONV
   1.470 +UTrieEnumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value);
   1.471 +
   1.472 +/**
   1.473 + * Enumerate efficiently all values in a trie.
   1.474 + * For each entry in the trie, the value to be delivered is passed through
   1.475 + * the UTrieEnumValue function.
   1.476 + * The value is unchanged if that function pointer is NULL.
   1.477 + *
   1.478 + * For each contiguous range of code points with a given value,
   1.479 + * the UTrieEnumRange function is called.
   1.480 + *
   1.481 + * @param trie a pointer to the runtime trie structure
   1.482 + * @param enumValue a pointer to a function that may transform the trie entry value,
   1.483 + *                  or NULL if the values from the trie are to be used directly
   1.484 + * @param enumRange a pointer to a function that is called for each contiguous range
   1.485 + *                  of code points with the same value
   1.486 + * @param context an opaque pointer that is passed on to the callback functions
   1.487 + */
   1.488 +U_CAPI void U_EXPORT2
   1.489 +utrie_enum(const UTrie *trie,
   1.490 +           UTrieEnumValue *enumValue, UTrieEnumRange *enumRange, const void *context);
   1.491 +
   1.492 +/**
   1.493 + * Unserialize a trie from 32-bit-aligned memory.
   1.494 + * Inverse of utrie_serialize().
   1.495 + * Fills the UTrie runtime trie structure with the settings for the trie data.
   1.496 + *
   1.497 + * @param trie a pointer to the runtime trie structure
   1.498 + * @param data a pointer to 32-bit-aligned memory containing trie data
   1.499 + * @param length the number of bytes available at data
   1.500 + * @param pErrorCode an in/out ICU UErrorCode
   1.501 + * @return the number of bytes at data taken up by the trie data
   1.502 + */
   1.503 +U_CAPI int32_t U_EXPORT2
   1.504 +utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode);
   1.505 +
   1.506 +/**
   1.507 + * "Unserialize" a dummy trie.
   1.508 + * A dummy trie is an empty runtime trie, used when a real data trie cannot
   1.509 + * be loaded.
   1.510 + *
   1.511 + * The input memory is filled so that the trie always returns the initialValue,
   1.512 + * or the leadUnitValue for lead surrogate code points.
   1.513 + * The Latin-1 part is always set up to be linear.
   1.514 + *
   1.515 + * @param trie a pointer to the runtime trie structure
   1.516 + * @param data a pointer to 32-bit-aligned memory to be filled with the dummy trie data
   1.517 + * @param length the number of bytes available at data (recommended to use UTRIE_DUMMY_SIZE)
   1.518 + * @param initialValue the initial value that is set for all code points
   1.519 + * @param leadUnitValue the value for lead surrogate code _units_ that do not
   1.520 + *                      have associated supplementary data
   1.521 + * @param pErrorCode an in/out ICU UErrorCode
   1.522 + *
   1.523 + * @see UTRIE_DUMMY_SIZE
   1.524 + * @see utrie_open
   1.525 + */
   1.526 +U_CAPI int32_t U_EXPORT2
   1.527 +utrie_unserializeDummy(UTrie *trie,
   1.528 +                       void *data, int32_t length,
   1.529 +                       uint32_t initialValue, uint32_t leadUnitValue,
   1.530 +                       UBool make16BitTrie,
   1.531 +                       UErrorCode *pErrorCode);
   1.532 +
   1.533 +/**
   1.534 + * Default implementation for UTrie.getFoldingOffset, set automatically by
   1.535 + * utrie_unserialize().
   1.536 + * Simply returns the lead surrogate's value itself - which is the inverse
   1.537 + * of the default folding function used by utrie_serialize().
   1.538 + * Exported for static const UTrie structures.
   1.539 + *
   1.540 + * @see UTrieGetFoldingOffset
   1.541 + */
   1.542 +U_CAPI int32_t U_EXPORT2
   1.543 +utrie_defaultGetFoldingOffset(uint32_t data);
   1.544 +
   1.545 +/* Building a trie ----------------------------------------------------------*/
   1.546 +
   1.547 +/**
   1.548 + * Build-time trie structure.
   1.549 + * Opaque definition, here only to make fillIn parameters possible
   1.550 + * for utrie_open() and utrie_clone().
   1.551 + */
   1.552 +struct UNewTrie {
   1.553 +    /**
   1.554 +     * Index values at build-time are 32 bits wide for easier processing.
   1.555 +     * Bit 31 is set if the data block is used by multiple index values (from utrie_setRange()).
   1.556 +     */
   1.557 +    int32_t index[UTRIE_MAX_INDEX_LENGTH];
   1.558 +    uint32_t *data;
   1.559 +
   1.560 +    uint32_t leadUnitValue;
   1.561 +    int32_t indexLength, dataCapacity, dataLength;
   1.562 +    UBool isAllocated, isDataAllocated;
   1.563 +    UBool isLatin1Linear, isCompacted;
   1.564 +
   1.565 +    /**
   1.566 +     * Map of adjusted indexes, used in utrie_compact().
   1.567 +     * Maps from original indexes to new ones.
   1.568 +     */
   1.569 +    int32_t map[UTRIE_MAX_BUILD_TIME_DATA_LENGTH>>UTRIE_SHIFT];
   1.570 +};
   1.571 +
   1.572 +typedef struct UNewTrie UNewTrie;
   1.573 +
   1.574 +/**
   1.575 + * Build-time trie callback function, used with utrie_serialize().
   1.576 + * This function calculates a lead surrogate's value including a folding offset
   1.577 + * from the 1024 supplementary code points [start..start+1024[ .
   1.578 + * It is U+10000 <= start <= U+10fc00 and (start&0x3ff)==0.
   1.579 + *
   1.580 + * The folding offset is provided by the caller.
   1.581 + * It is offset=UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
   1.582 + * Instead of the offset itself, n can be stored in 10 bits -
   1.583 + * or fewer if it can be assumed that few lead surrogates have associated data.
   1.584 + *
   1.585 + * The returned value must be
   1.586 + * - not zero if and only if there is relevant data
   1.587 + *   for the corresponding 1024 supplementary code points
   1.588 + * - such that UTrie.getFoldingOffset(UNewTrieGetFoldedValue(..., offset))==offset
   1.589 + *
   1.590 + * @return a folded value, or 0 if there is no relevant data for the lead surrogate.
   1.591 + */
   1.592 +typedef uint32_t U_CALLCONV
   1.593 +UNewTrieGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset);
   1.594 +
   1.595 +/**
   1.596 + * Open a build-time trie structure.
   1.597 + * The size of the build-time data array is specified to avoid allocating a large
   1.598 + * array in all cases. The array itself can also be passed in.
   1.599 + *
   1.600 + * Although the trie is never fully expanded to a linear array, especially when
   1.601 + * utrie_setRange32() is used, the data array could be large during build time.
   1.602 + * The maximum length is
   1.603 + * UTRIE_MAX_BUILD_TIME_DATA_LENGTH=0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400.
   1.604 + * (Number of Unicode code points + one all-initial-value block +
   1.605 + *  possible duplicate entries for 1024 lead surrogates.)
   1.606 + * (UTRIE_DATA_BLOCK_LENGTH<=0x200 in all cases.)
   1.607 + *
   1.608 + * @param fillIn a pointer to a UNewTrie structure to be initialized (will not be released), or
   1.609 + *               NULL if one is to be allocated
   1.610 + * @param aliasData a pointer to a data array to be used (will not be released), or
   1.611 + *                  NULL if one is to be allocated
   1.612 + * @param maxDataLength the capacity of aliasData (if not NULL) or
   1.613 + *                      the length of the data array to be allocated
   1.614 + * @param initialValue the initial value that is set for all code points
   1.615 + * @param leadUnitValue the value for lead surrogate code _units_ that do not
   1.616 + *                      have associated supplementary data
   1.617 + * @param latin1Linear a flag indicating whether the Latin-1 range is to be allocated and
   1.618 + *                     kept in a linear, contiguous part of the data array
   1.619 + * @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie
   1.620 + */
   1.621 +U_CAPI UNewTrie * U_EXPORT2
   1.622 +utrie_open(UNewTrie *fillIn,
   1.623 +           uint32_t *aliasData, int32_t maxDataLength,
   1.624 +           uint32_t initialValue, uint32_t leadUnitValue,
   1.625 +           UBool latin1Linear);
   1.626 +
   1.627 +/**
   1.628 + * Clone a build-time trie structure with all entries.
   1.629 + *
   1.630 + * @param fillIn like in utrie_open()
   1.631 + * @param other the build-time trie structure to clone
   1.632 + * @param aliasData like in utrie_open(),
   1.633 + *                  used if aliasDataLength>=(capacity of other's data array)
   1.634 + * @param aliasDataLength the length of aliasData
   1.635 + * @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie
   1.636 + */
   1.637 +U_CAPI UNewTrie * U_EXPORT2
   1.638 +utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_t aliasDataLength);
   1.639 +
   1.640 +/**
   1.641 + * Close a build-time trie structure, and release memory
   1.642 + * that was allocated by utrie_open() or utrie_clone().
   1.643 + *
   1.644 + * @param trie the build-time trie
   1.645 + */
   1.646 +U_CAPI void U_EXPORT2
   1.647 +utrie_close(UNewTrie *trie);
   1.648 +
   1.649 +/**
   1.650 + * Get the data array of a build-time trie.
   1.651 + * The data may be modified, but entries that are equal before
   1.652 + * must still be equal after modification.
   1.653 + *
   1.654 + * @param trie the build-time trie
   1.655 + * @param pLength (out) a pointer to a variable that receives the number
   1.656 + *                of entries in the data array
   1.657 + * @return the data array
   1.658 + */
   1.659 +U_CAPI uint32_t * U_EXPORT2
   1.660 +utrie_getData(UNewTrie *trie, int32_t *pLength);
   1.661 +
   1.662 +/**
   1.663 + * Set a value for a code point.
   1.664 + *
   1.665 + * @param trie the build-time trie
   1.666 + * @param c the code point
   1.667 + * @param value the value
   1.668 + * @return FALSE if a failure occurred (illegal argument or data array overrun)
   1.669 + */
   1.670 +U_CAPI UBool U_EXPORT2
   1.671 +utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value);
   1.672 +
   1.673 +/**
   1.674 + * Get a value from a code point as stored in the build-time trie.
   1.675 + *
   1.676 + * @param trie the build-time trie
   1.677 + * @param c the code point
   1.678 + * @param pInBlockZero if not NULL, then *pInBlockZero is set to TRUE
   1.679 + *                     iff the value is retrieved from block 0;
   1.680 + *                     block 0 is the all-initial-value initial block
   1.681 + * @return the value
   1.682 + */
   1.683 +U_CAPI uint32_t U_EXPORT2
   1.684 +utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero);
   1.685 +
   1.686 +/**
   1.687 + * Set a value in a range of code points [start..limit[.
   1.688 + * All code points c with start<=c<limit will get the value if
   1.689 + * overwrite is TRUE or if the old value is 0.
   1.690 + *
   1.691 + * @param trie the build-time trie
   1.692 + * @param start the first code point to get the value
   1.693 + * @param limit one past the last code point to get the value
   1.694 + * @param value the value
   1.695 + * @param overwrite flag for whether old non-initial values are to be overwritten
   1.696 + * @return FALSE if a failure occurred (illegal argument or data array overrun)
   1.697 + */
   1.698 +U_CAPI UBool U_EXPORT2
   1.699 +utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite);
   1.700 +
   1.701 +/**
   1.702 + * Compact the build-time trie after all values are set, and then
   1.703 + * serialize it into 32-bit aligned memory.
   1.704 + *
   1.705 + * After this, the trie can only be serizalized again and/or closed;
   1.706 + * no further values can be added.
   1.707 + *
   1.708 + * @see utrie_unserialize()
   1.709 + *
   1.710 + * @param trie the build-time trie
   1.711 + * @param data a pointer to 32-bit-aligned memory for the trie data
   1.712 + * @param capacity the number of bytes available at data
   1.713 + * @param getFoldedValue a callback function that calculates the value for
   1.714 + *                       a lead surrogate from all of its supplementary code points
   1.715 + *                       and the folding offset;
   1.716 + *                       if NULL, then a default function is used which returns just
   1.717 + *                       the input offset when there are any non-initial-value entries
   1.718 + * @param reduceTo16Bits flag for whether the values are to be reduced to a
   1.719 + *                       width of 16 bits for serialization and runtime
   1.720 + * @param pErrorCode a UErrorCode argument; among other possible error codes:
   1.721 + * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization
   1.722 + * - U_MEMORY_ALLOCATION_ERROR if the trie data array is too small
   1.723 + * - U_INDEX_OUTOFBOUNDS_ERROR if the index or data arrays are too long after compaction for serialization
   1.724 + *
   1.725 + * @return the number of bytes written for the trie
   1.726 + */
   1.727 +U_CAPI int32_t U_EXPORT2
   1.728 +utrie_serialize(UNewTrie *trie, void *data, int32_t capacity,
   1.729 +                UNewTrieGetFoldedValue *getFoldedValue,
   1.730 +                UBool reduceTo16Bits,
   1.731 +                UErrorCode *pErrorCode);
   1.732 +
   1.733 +/**
   1.734 + * Swap a serialized UTrie.
   1.735 + * @internal
   1.736 + */
   1.737 +U_CAPI int32_t U_EXPORT2
   1.738 +utrie_swap(const UDataSwapper *ds,
   1.739 +           const void *inData, int32_t length, void *outData,
   1.740 +           UErrorCode *pErrorCode);
   1.741 +
   1.742 +U_CDECL_END
   1.743 +
   1.744 +#endif