os/textandloc/fontservices/textshaperplugin/IcuSource/common/util.h
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/fontservices/textshaperplugin/IcuSource/common/util.h	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,248 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +*   Copyright (c) 2001-2005, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +*   Date        Name        Description
    1.10 +*   11/19/2001  aliu        Creation.
    1.11 +**********************************************************************
    1.12 +*/
    1.13 +#ifndef ICU_UTIL_H
    1.14 +#define ICU_UTIL_H
    1.15 +
    1.16 +#include "unicode/utypes.h"
    1.17 +#include "unicode/uobject.h"
    1.18 +#include "unicode/unistr.h"
    1.19 +
    1.20 +//--------------------------------------------------------------------
    1.21 +// class ICU_Utility
    1.22 +// i18n utility functions, scoped into the class ICU_Utility.
    1.23 +//--------------------------------------------------------------------
    1.24 +
    1.25 +U_NAMESPACE_BEGIN
    1.26 +
    1.27 +class UnicodeMatcher;
    1.28 +
    1.29 +class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ {
    1.30 + public:
    1.31 +
    1.32 +    /**
    1.33 +     * Append a number to the given UnicodeString in the given radix.
    1.34 +     * Standard digits '0'-'9' are used and letters 'A'-'Z' for
    1.35 +     * radices 11 through 36.
    1.36 +     * @param result the digits of the number are appended here
    1.37 +     * @param n the number to be converted to digits; may be negative.
    1.38 +     * If negative, a '-' is prepended to the digits.
    1.39 +     * @param radix a radix from 2 to 36 inclusive.
    1.40 +     * @param minDigits the minimum number of digits, not including
    1.41 +     * any '-', to produce.  Values less than 2 have no effect.  One
    1.42 +     * digit is always emitted regardless of this parameter.
    1.43 +     * @return a reference to result
    1.44 +     */
    1.45 +    static UnicodeString& appendNumber(UnicodeString& result, int32_t n,
    1.46 +                                       int32_t radix = 10,
    1.47 +                                       int32_t minDigits = 1);
    1.48 +
    1.49 +    /**
    1.50 +     * Return true if the character is NOT printable ASCII.
    1.51 +     *
    1.52 +     * This method should really be in UnicodeString (or similar).  For
    1.53 +     * now, we implement it here and share it with friend classes.
    1.54 +     */
    1.55 +    static UBool isUnprintable(UChar32 c);
    1.56 +
    1.57 +    /**
    1.58 +     * Escape unprintable characters using \uxxxx notation for U+0000 to
    1.59 +     * U+FFFF and \Uxxxxxxxx for U+10000 and above.  If the character is
    1.60 +     * printable ASCII, then do nothing and return FALSE.  Otherwise,
    1.61 +     * append the escaped notation and return TRUE.
    1.62 +     */
    1.63 +    static UBool escapeUnprintable(UnicodeString& result, UChar32 c);
    1.64 +
    1.65 +    /**
    1.66 +     * Returns the index of a character, ignoring quoted text.
    1.67 +     * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
    1.68 +     * found by a search for 'h'.
    1.69 +     * @param text text to be searched
    1.70 +     * @param start the beginning index, inclusive; <code>0 <= start
    1.71 +     * <= limit</code>.
    1.72 +     * @param limit the ending index, exclusive; <code>start <= limit
    1.73 +     * <= text.length()</code>.
    1.74 +     * @param c character to search for
    1.75 +     * @return Offset of the first instance of c, or -1 if not found.
    1.76 +     */
    1.77 +//?FOR FUTURE USE.  DISABLE FOR NOW for coverage reasons.
    1.78 +//    static int32_t quotedIndexOf(const UnicodeString& text,
    1.79 +//                                 int32_t start, int32_t limit,
    1.80 +//                                 UChar c);
    1.81 +
    1.82 +    /**
    1.83 +     * Skip over a sequence of zero or more white space characters at pos.
    1.84 +     * @param advance if true, advance pos to the first non-white-space
    1.85 +     * character at or after pos, or str.length(), if there is none.
    1.86 +     * Otherwise leave pos unchanged.
    1.87 +     * @return the index of the first non-white-space character at or
    1.88 +     * after pos, or str.length(), if there is none.
    1.89 +     */
    1.90 +    static int32_t skipWhitespace(const UnicodeString& str, int32_t& pos,
    1.91 +                                  UBool advance = FALSE);
    1.92 +
    1.93 +    /**
    1.94 +     * Skip over whitespace in a Replaceable.  Whitespace is defined by
    1.95 +     * uprv_isRuleWhiteSpace().  Skipping may be done in the forward or
    1.96 +     * reverse direction.  In either case, the leftmost index will be
    1.97 +     * inclusive, and the rightmost index will be exclusive.  That is,
    1.98 +     * given a range defined as [start, limit), the call
    1.99 +     * skipWhitespace(text, start, limit) will advance start past leading
   1.100 +     * whitespace, whereas the call skipWhitespace(text, limit, start),
   1.101 +     * will back up limit past trailing whitespace.
   1.102 +     * @param text the text to be analyzed
   1.103 +     * @param pos either the start or limit of a range of 'text', to skip
   1.104 +     * leading or trailing whitespace, respectively
   1.105 +     * @param stop either the limit or start of a range of 'text', to skip
   1.106 +     * leading or trailing whitespace, respectively
   1.107 +     * @return the new start or limit, depending on what was passed in to
   1.108 +     * 'pos'
   1.109 +     */
   1.110 +//?FOR FUTURE USE.  DISABLE FOR NOW for coverage reasons.
   1.111 +//?    static int32_t skipWhitespace(const Replaceable& text,
   1.112 +//?                                  int32_t pos, int32_t stop);
   1.113 +
   1.114 +    /**
   1.115 +     * Parse a single non-whitespace character 'ch', optionally
   1.116 +     * preceded by whitespace.
   1.117 +     * @param id the string to be parsed
   1.118 +     * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
   1.119 +     * offset of the first character to be parsed.  On output, pos[0]
   1.120 +     * is the index after the last parsed character.  If the parse
   1.121 +     * fails, pos[0] will be unchanged.
   1.122 +     * @param ch the non-whitespace character to be parsed.
   1.123 +     * @return true if 'ch' is seen preceded by zero or more
   1.124 +     * whitespace characters.
   1.125 +     */
   1.126 +    static UBool parseChar(const UnicodeString& id, int32_t& pos, UChar ch);
   1.127 +
   1.128 +    /**
   1.129 +     * Parse a pattern string starting at offset pos.  Keywords are
   1.130 +     * matched case-insensitively.  Spaces may be skipped and may be
   1.131 +     * optional or required.  Integer values may be parsed, and if
   1.132 +     * they are, they will be returned in the given array.  If
   1.133 +     * successful, the offset of the next non-space character is
   1.134 +     * returned.  On failure, -1 is returned.
   1.135 +     * @param pattern must only contain lowercase characters, which
   1.136 +     * will match their uppercase equivalents as well.  A space
   1.137 +     * character matches one or more required spaces.  A '~' character
   1.138 +     * matches zero or more optional spaces.  A '#' character matches
   1.139 +     * an integer and stores it in parsedInts, which the caller must
   1.140 +     * ensure has enough capacity.
   1.141 +     * @param parsedInts array to receive parsed integers.  Caller
   1.142 +     * must ensure that parsedInts.length is >= the number of '#'
   1.143 +     * signs in 'pattern'.
   1.144 +     * @return the position after the last character parsed, or -1 if
   1.145 +     * the parse failed
   1.146 +     */
   1.147 +    static int32_t parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit,
   1.148 +                                const UnicodeString& pattern, int32_t* parsedInts);
   1.149 +        
   1.150 +    /**
   1.151 +     * Parse a pattern string within the given Replaceable and a parsing
   1.152 +     * pattern.  Characters are matched literally and case-sensitively
   1.153 +     * except for the following special characters:
   1.154 +     *
   1.155 +     * ~  zero or more uprv_isRuleWhiteSpace chars
   1.156 +     *
   1.157 +     * If end of pattern is reached with all matches along the way,
   1.158 +     * pos is advanced to the first unparsed index and returned.
   1.159 +     * Otherwise -1 is returned.
   1.160 +     * @param pat pattern that controls parsing
   1.161 +     * @param text text to be parsed, starting at index
   1.162 +     * @param index offset to first character to parse
   1.163 +     * @param limit offset after last character to parse
   1.164 +     * @return index after last parsed character, or -1 on parse failure.
   1.165 +     */
   1.166 +    static int32_t parsePattern(const UnicodeString& pat,
   1.167 +                                const Replaceable& text,
   1.168 +                                int32_t index,
   1.169 +                                int32_t limit);
   1.170 +
   1.171 +    /**
   1.172 +     * Parse an integer at pos, either of the form \d+ or of the form
   1.173 +     * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
   1.174 +     * or octal format.
   1.175 +     * @param pos INPUT-OUTPUT parameter.  On input, the first
   1.176 +     * character to parse.  On output, the character after the last
   1.177 +     * parsed character.
   1.178 +     */
   1.179 +    static int32_t parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit);
   1.180 +
   1.181 +    /**
   1.182 +     * Parse a Unicode identifier from the given string at the given
   1.183 +     * position.  Return the identifier, or an empty string if there
   1.184 +     * is no identifier.
   1.185 +     * @param str the string to parse
   1.186 +     * @param pos INPUT-OUPUT parameter.  On INPUT, pos is the
   1.187 +     * first character to examine.  It must be less than str.length(),
   1.188 +     * and it must not point to a whitespace character.  That is, must
   1.189 +     * have pos < str.length() and
   1.190 +     * !UCharacter::isWhitespace(str.char32At(pos)).  On
   1.191 +     * OUTPUT, the position after the last parsed character.
   1.192 +     * @return the Unicode identifier, or an empty string if there is
   1.193 +     * no valid identifier at pos.
   1.194 +     */
   1.195 +    static UnicodeString parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos);
   1.196 +
   1.197 +    /**
   1.198 +     * Parse an unsigned 31-bit integer at the given offset.  Use
   1.199 +     * UCharacter.digit() to parse individual characters into digits.
   1.200 +     * @param text the text to be parsed
   1.201 +     * @param pos INPUT-OUTPUT parameter.  On entry, pos is the
   1.202 +     * offset within text at which to start parsing; it should point
   1.203 +     * to a valid digit.  On exit, pos is the offset after the last
   1.204 +     * parsed character.  If the parse failed, it will be unchanged on
   1.205 +     * exit.  Must be >= 0 on entry.
   1.206 +     * @param radix the radix in which to parse; must be >= 2 and <=
   1.207 +     * 36.
   1.208 +     * @return a non-negative parsed number, or -1 upon parse failure.
   1.209 +     * Parse fails if there are no digits, that is, if pos does not
   1.210 +     * point to a valid digit on entry, or if the number to be parsed
   1.211 +     * does not fit into a 31-bit unsigned integer.
   1.212 +     */
   1.213 +    static int32_t parseNumber(const UnicodeString& text,
   1.214 +                               int32_t& pos, int8_t radix);
   1.215 +
   1.216 +    static void appendToRule(UnicodeString& rule,
   1.217 +                             UChar32 c,
   1.218 +                             UBool isLiteral,
   1.219 +                             UBool escapeUnprintable,
   1.220 +                             UnicodeString& quoteBuf);
   1.221 +    
   1.222 +    static void appendToRule(UnicodeString& rule,
   1.223 +                             const UnicodeString& text,
   1.224 +                             UBool isLiteral,
   1.225 +                             UBool escapeUnprintable,
   1.226 +                             UnicodeString& quoteBuf);
   1.227 +
   1.228 +    static void appendToRule(UnicodeString& rule,
   1.229 +                             const UnicodeMatcher* matcher,
   1.230 +                             UBool escapeUnprintable,
   1.231 +                             UnicodeString& quoteBuf);
   1.232 +
   1.233 +private:
   1.234 +    // do not instantiate
   1.235 +    ICU_Utility();
   1.236 +};
   1.237 +
   1.238 +U_NAMESPACE_END
   1.239 +
   1.240 +/**
   1.241 + * Is this character a "white space" in the sense of ICU rule parsers?
   1.242 + * Equivalent to test for Pattern_White_Space Unicode property.
   1.243 + * Stable set of characters, won't change.
   1.244 + * See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
   1.245 + * @internal
   1.246 + */
   1.247 +U_CAPI UBool U_EXPORT2
   1.248 +uprv_isRuleWhiteSpace(UChar32 c);
   1.249 +
   1.250 +#endif
   1.251 +//eof