os/textandloc/fontservices/textshaperplugin/IcuSource/common/ruleiter.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
**********************************************************************
sl@0
     3
* Copyright (c) 2003-2005, International Business Machines
sl@0
     4
* Corporation and others.  All Rights Reserved.
sl@0
     5
**********************************************************************
sl@0
     6
* Author: Alan Liu
sl@0
     7
* Created: September 24 2003
sl@0
     8
* Since: ICU 2.8
sl@0
     9
**********************************************************************
sl@0
    10
*/
sl@0
    11
#ifndef _RULEITER_H_
sl@0
    12
#define _RULEITER_H_
sl@0
    13
sl@0
    14
#include "unicode/utypes.h"
sl@0
    15
sl@0
    16
U_NAMESPACE_BEGIN
sl@0
    17
sl@0
    18
class UnicodeString;
sl@0
    19
class ParsePosition;
sl@0
    20
class SymbolTable;
sl@0
    21
sl@0
    22
/**
sl@0
    23
 * An iterator that returns 32-bit code points.  This class is deliberately
sl@0
    24
 * <em>not</em> related to any of the ICU character iterator classes
sl@0
    25
 * in order to minimize complexity.
sl@0
    26
 * @author Alan Liu
sl@0
    27
 * @since ICU 2.8
sl@0
    28
 */
sl@0
    29
class U_COMMON_API RuleCharacterIterator {
sl@0
    30
sl@0
    31
    // TODO: Ideas for later.  (Do not implement if not needed, lest the
sl@0
    32
    // code coverage numbers go down due to unused methods.)
sl@0
    33
    // 1. Add a copy constructor, operator==() method.
sl@0
    34
    // 2. Rather than return DONE, throw an exception if the end
sl@0
    35
    // is reached -- this is an alternate usage model, probably not useful.
sl@0
    36
sl@0
    37
private:
sl@0
    38
    /**
sl@0
    39
     * Text being iterated.
sl@0
    40
     */    
sl@0
    41
    const UnicodeString& text;
sl@0
    42
sl@0
    43
    /**
sl@0
    44
     * Position of iterator.
sl@0
    45
     */
sl@0
    46
    ParsePosition& pos;
sl@0
    47
sl@0
    48
    /**
sl@0
    49
     * Symbol table used to parse and dereference variables.  May be 0.
sl@0
    50
     */
sl@0
    51
    const SymbolTable* sym;
sl@0
    52
    
sl@0
    53
    /**
sl@0
    54
     * Current variable expansion, or 0 if none.
sl@0
    55
     */
sl@0
    56
    const UnicodeString* buf;
sl@0
    57
sl@0
    58
    /**
sl@0
    59
     * Position within buf.  Meaningless if buf == 0.
sl@0
    60
     */
sl@0
    61
    int32_t bufPos;
sl@0
    62
sl@0
    63
public:
sl@0
    64
    /**
sl@0
    65
     * Value returned when there are no more characters to iterate.
sl@0
    66
     */
sl@0
    67
    enum { DONE = -1 };
sl@0
    68
sl@0
    69
    /**
sl@0
    70
     * Bitmask option to enable parsing of variable names.  If (options &
sl@0
    71
     * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to
sl@0
    72
     * its value.  Variables are parsed using the SymbolTable API.
sl@0
    73
     */
sl@0
    74
    enum { PARSE_VARIABLES = 1 };
sl@0
    75
sl@0
    76
    /**
sl@0
    77
     * Bitmask option to enable parsing of escape sequences.  If (options &
sl@0
    78
     * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
sl@0
    79
     * to its value.  Escapes are parsed using Utility.unescapeAt().
sl@0
    80
     */
sl@0
    81
    enum { PARSE_ESCAPES   = 2 };
sl@0
    82
sl@0
    83
    /**
sl@0
    84
     * Bitmask option to enable skipping of whitespace.  If (options &
sl@0
    85
     * SKIP_WHITESPACE) != 0, then whitespace characters will be silently
sl@0
    86
     * skipped, as if they were not present in the input.  Whitespace
sl@0
    87
     * characters are defined by UCharacterProperty.isRuleWhiteSpace().
sl@0
    88
     */
sl@0
    89
    enum { SKIP_WHITESPACE = 4 };
sl@0
    90
sl@0
    91
    /**
sl@0
    92
     * Constructs an iterator over the given text, starting at the given
sl@0
    93
     * position.
sl@0
    94
     * @param text the text to be iterated
sl@0
    95
     * @param sym the symbol table, or null if there is none.  If sym is null,
sl@0
    96
     * then variables will not be deferenced, even if the PARSE_VARIABLES
sl@0
    97
     * option is set.
sl@0
    98
     * @param pos upon input, the index of the next character to return.  If a
sl@0
    99
     * variable has been dereferenced, then pos will <em>not</em> increment as
sl@0
   100
     * characters of the variable value are iterated.
sl@0
   101
     */
sl@0
   102
    RuleCharacterIterator(const UnicodeString& text, const SymbolTable* sym,
sl@0
   103
                          ParsePosition& pos);
sl@0
   104
    
sl@0
   105
    /**
sl@0
   106
     * Returns true if this iterator has no more characters to return.
sl@0
   107
     */
sl@0
   108
    UBool atEnd() const;
sl@0
   109
sl@0
   110
    /**
sl@0
   111
     * Returns the next character using the given options, or DONE if there
sl@0
   112
     * are no more characters, and advance the position to the next
sl@0
   113
     * character.
sl@0
   114
     * @param options one or more of the following options, bitwise-OR-ed
sl@0
   115
     * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
sl@0
   116
     * @param isEscaped output parameter set to TRUE if the character
sl@0
   117
     * was escaped
sl@0
   118
     * @param ec input-output error code.  An error will only be set by
sl@0
   119
     * this routing if options includes PARSE_VARIABLES and an unknown
sl@0
   120
     * variable name is seen, or if options includes PARSE_ESCAPES and
sl@0
   121
     * an invalid escape sequence is seen.
sl@0
   122
     * @return the current 32-bit code point, or DONE
sl@0
   123
     */
sl@0
   124
    UChar32 next(int32_t options, UBool& isEscaped, UErrorCode& ec);
sl@0
   125
sl@0
   126
    /**
sl@0
   127
     * Returns true if this iterator is currently within a variable expansion.
sl@0
   128
     */
sl@0
   129
    inline UBool inVariable() const;
sl@0
   130
sl@0
   131
    /**
sl@0
   132
     * An opaque object representing the position of a RuleCharacterIterator.
sl@0
   133
     */
sl@0
   134
    struct Pos {
sl@0
   135
    private:
sl@0
   136
        const UnicodeString* buf;
sl@0
   137
        int32_t pos;
sl@0
   138
        int32_t bufPos;
sl@0
   139
        friend class RuleCharacterIterator;
sl@0
   140
    };
sl@0
   141
sl@0
   142
    /**
sl@0
   143
     * Sets an object which, when later passed to setPos(), will
sl@0
   144
     * restore this iterator's position.  Usage idiom:
sl@0
   145
     *
sl@0
   146
     * RuleCharacterIterator iterator = ...;
sl@0
   147
     * RuleCharacterIterator::Pos pos;
sl@0
   148
     * iterator.getPos(pos);
sl@0
   149
     * for (;;) {
sl@0
   150
     *   iterator.getPos(pos);
sl@0
   151
     *   int c = iterator.next(...);
sl@0
   152
     *   ...
sl@0
   153
     * }
sl@0
   154
     * iterator.setPos(pos);
sl@0
   155
     *
sl@0
   156
     * @param p a position object to be set to this iterator's
sl@0
   157
     * current position.
sl@0
   158
     */
sl@0
   159
    void getPos(Pos& p) const;
sl@0
   160
sl@0
   161
    /**
sl@0
   162
     * Restores this iterator to the position it had when getPos()
sl@0
   163
     * set the given object.
sl@0
   164
     * @param p a position object previously set by getPos()
sl@0
   165
     */
sl@0
   166
    void setPos(const Pos& p);
sl@0
   167
sl@0
   168
    /**
sl@0
   169
     * Skips ahead past any ignored characters, as indicated by the given
sl@0
   170
     * options.  This is useful in conjunction with the lookahead() method.
sl@0
   171
     *
sl@0
   172
     * Currently, this only has an effect for SKIP_WHITESPACE.
sl@0
   173
     * @param options one or more of the following options, bitwise-OR-ed
sl@0
   174
     * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
sl@0
   175
     */
sl@0
   176
    void skipIgnored(int32_t options);
sl@0
   177
sl@0
   178
    /**
sl@0
   179
     * Returns a string containing the remainder of the characters to be
sl@0
   180
     * returned by this iterator, without any option processing.  If the
sl@0
   181
     * iterator is currently within a variable expansion, this will only
sl@0
   182
     * extend to the end of the variable expansion.  This method is provided
sl@0
   183
     * so that iterators may interoperate with string-based APIs.  The typical
sl@0
   184
     * sequence of calls is to call skipIgnored(), then call lookahead(), then
sl@0
   185
     * parse the string returned by lookahead(), then call jumpahead() to
sl@0
   186
     * resynchronize the iterator.
sl@0
   187
     * @param result a string to receive the characters to be returned
sl@0
   188
     * by future calls to next()
sl@0
   189
     * @return a reference to result
sl@0
   190
     */
sl@0
   191
    UnicodeString& lookahead(UnicodeString& result) const;
sl@0
   192
sl@0
   193
    /**
sl@0
   194
     * Advances the position by the given number of 16-bit code units.
sl@0
   195
     * This is useful in conjunction with the lookahead() method.
sl@0
   196
     * @param count the number of 16-bit code units to jump over
sl@0
   197
     */
sl@0
   198
    void jumpahead(int32_t count);
sl@0
   199
sl@0
   200
    /**
sl@0
   201
     * Returns a string representation of this object, consisting of the
sl@0
   202
     * characters being iterated, with a '|' marking the current position.
sl@0
   203
     * Position within an expanded variable is <em>not</em> indicated.
sl@0
   204
     * @param result output parameter to receive a string
sl@0
   205
     * representation of this object
sl@0
   206
     */
sl@0
   207
//    UnicodeString& toString(UnicodeString& result) const;
sl@0
   208
    
sl@0
   209
private:
sl@0
   210
    /**
sl@0
   211
     * Returns the current 32-bit code point without parsing escapes, parsing
sl@0
   212
     * variables, or skipping whitespace.
sl@0
   213
     * @return the current 32-bit code point
sl@0
   214
     */
sl@0
   215
    UChar32 _current() const;
sl@0
   216
    
sl@0
   217
    /**
sl@0
   218
     * Advances the position by the given amount.
sl@0
   219
     * @param count the number of 16-bit code units to advance past
sl@0
   220
     */
sl@0
   221
    void _advance(int32_t count);
sl@0
   222
};
sl@0
   223
sl@0
   224
inline UBool RuleCharacterIterator::inVariable() const {
sl@0
   225
    return buf != 0;
sl@0
   226
}
sl@0
   227
sl@0
   228
U_NAMESPACE_END
sl@0
   229
sl@0
   230
#endif // _RULEITER_H_
sl@0
   231
//eof