os/textandloc/fontservices/textshaperplugin/IcuSource/common/rbbiscan.h
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
//
sl@0
     2
//  rbbiscan.h
sl@0
     3
//
sl@0
     4
//  Copyright (C) 2002-2003, International Business Machines Corporation and others.
sl@0
     5
//  All Rights Reserved.
sl@0
     6
//
sl@0
     7
//  This file contains declarations for class RBBIRuleScanner
sl@0
     8
//
sl@0
     9
sl@0
    10
sl@0
    11
#ifndef RBBISCAN_H
sl@0
    12
#define RBBISCAN_H
sl@0
    13
sl@0
    14
#include "unicode/utypes.h"
sl@0
    15
#include "unicode/uobject.h"
sl@0
    16
#include "unicode/rbbi.h"
sl@0
    17
#include "unicode/uniset.h"
sl@0
    18
#include "unicode/parseerr.h"
sl@0
    19
#include "uhash.h"
sl@0
    20
#include "uvector.h"
sl@0
    21
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
sl@0
    22
                          //    looks up references to $variables within a set.
sl@0
    23
#include "rbbinode.h"
sl@0
    24
//#include "rbbitblb.h"
sl@0
    25
sl@0
    26
sl@0
    27
sl@0
    28
U_NAMESPACE_BEGIN
sl@0
    29
sl@0
    30
class   RBBIRuleBuilder;
sl@0
    31
class   RBBISymbolTable;
sl@0
    32
sl@0
    33
sl@0
    34
//--------------------------------------------------------------------------------
sl@0
    35
//
sl@0
    36
//  class RBBIRuleScanner does the lowest level, character-at-a-time
sl@0
    37
//                        scanning of break iterator rules.  
sl@0
    38
//
sl@0
    39
//                        The output of the scanner is parse trees for
sl@0
    40
//                        the rule expressions and a list of all Unicode Sets
sl@0
    41
//                        encountered.
sl@0
    42
//
sl@0
    43
//--------------------------------------------------------------------------------
sl@0
    44
static const int    kStackSize = 100;               // The size of the state stack for
sl@0
    45
                                                    //   rules parsing.  Corresponds roughly
sl@0
    46
                                                    //   to the depth of parentheses nesting
sl@0
    47
                                                    //   that is allowed in the rules.
sl@0
    48
sl@0
    49
enum EParseAction {dummy01, dummy02};               // Placeholder enum for the specifier for
sl@0
    50
                                                    //   actions that are specified in the
sl@0
    51
                                                    //   rule parsing state table.
sl@0
    52
sl@0
    53
class RBBIRuleScanner : public UMemory {
sl@0
    54
public:
sl@0
    55
sl@0
    56
    struct RBBIRuleChar {
sl@0
    57
        UChar32             fChar;
sl@0
    58
        UBool               fEscaped;
sl@0
    59
    };
sl@0
    60
sl@0
    61
    RBBIRuleScanner(RBBIRuleBuilder  *rb);
sl@0
    62
sl@0
    63
sl@0
    64
    virtual    ~RBBIRuleScanner();
sl@0
    65
sl@0
    66
    void        nextChar(RBBIRuleChar &c);          // Get the next char from the input stream.
sl@0
    67
                                                    // Return false if at end.
sl@0
    68
sl@0
    69
    UBool       push(const RBBIRuleChar &c);        // Push (unget) one character.
sl@0
    70
                                                    //   Only a single character may be pushed.
sl@0
    71
sl@0
    72
    void        parse();                            // Parse the rules, generating two parse
sl@0
    73
                                                    //   trees, one each for the forward and
sl@0
    74
                                                    //   reverse rules,
sl@0
    75
                                                    //   and a list of UnicodeSets encountered.
sl@0
    76
sl@0
    77
    /**
sl@0
    78
     * Return a rules string without unnecessary
sl@0
    79
     * characters.
sl@0
    80
     */
sl@0
    81
    static UnicodeString stripRules(const UnicodeString &rules);
sl@0
    82
private:
sl@0
    83
sl@0
    84
    UBool       doParseActions(EParseAction a);
sl@0
    85
    void        error(UErrorCode e);                   // error reporting convenience function.
sl@0
    86
    void        fixOpStack(RBBINode::OpPrecedence p);
sl@0
    87
                                                       //   a character.
sl@0
    88
    void        findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL);
sl@0
    89
sl@0
    90
    UChar32     nextCharLL();
sl@0
    91
#ifdef RBBI_DEBUG
sl@0
    92
    void        printNodeStack(const char *title);
sl@0
    93
#endif
sl@0
    94
    RBBINode    *pushNewNode(RBBINode::NodeType  t);
sl@0
    95
    void        scanSet();
sl@0
    96
sl@0
    97
sl@0
    98
    RBBIRuleBuilder               *fRB;              // The rule builder that we are part of.
sl@0
    99
sl@0
   100
    int32_t                       fScanIndex;        // Index of current character being processed
sl@0
   101
                                                     //   in the rule input string.
sl@0
   102
    int32_t                       fNextIndex;        // Index of the next character, which
sl@0
   103
                                                     //   is the first character not yet scanned.
sl@0
   104
    UBool                         fQuoteMode;        // Scan is in a 'quoted region'
sl@0
   105
    int                           fLineNum;          // Line number in input file.
sl@0
   106
    int                           fCharNum;          // Char position within the line.
sl@0
   107
    UChar32                       fLastChar;         // Previous char, needed to count CR-LF
sl@0
   108
                                                     //   as a single line, not two.
sl@0
   109
sl@0
   110
    RBBIRuleChar                  fC;                // Current char for parse state machine
sl@0
   111
                                                     //   processing.
sl@0
   112
    UnicodeString                 fVarName;          // $variableName, valid when we've just
sl@0
   113
                                                     //   scanned one.
sl@0
   114
sl@0
   115
    RBBIRuleTableEl               **fStateTable;     // State Transition Table for RBBI Rule
sl@0
   116
                                                     //   parsing.  index by p[state][char-class]
sl@0
   117
sl@0
   118
    uint16_t                      fStack[kStackSize];  // State stack, holds state pushes
sl@0
   119
    int                           fStackPtr;           //  and pops as specified in the state
sl@0
   120
                                                       //  transition rules.
sl@0
   121
sl@0
   122
    RBBINode                      *fNodeStack[kStackSize]; // Node stack, holds nodes created
sl@0
   123
                                                           //  during the parse of a rule
sl@0
   124
    int                            fNodeStackPtr;
sl@0
   125
sl@0
   126
sl@0
   127
    UBool                          fReverseRule;     // True if the rule currently being scanned
sl@0
   128
                                                     //  is a reverse direction rule (if it
sl@0
   129
                                                     //  starts with a '!')
sl@0
   130
sl@0
   131
    UBool                          fLookAheadRule;   // True if the rule includes a '/'
sl@0
   132
                                                     //   somewhere within it.
sl@0
   133
sl@0
   134
    RBBISymbolTable               *fSymbolTable;     // symbol table, holds definitions of
sl@0
   135
                                                     //   $variable symbols.
sl@0
   136
sl@0
   137
    UHashtable                    *fSetTable;        // UnicocodeSet hash table, holds indexes to
sl@0
   138
                                                     //   the sets created while parsing rules.
sl@0
   139
                                                     //   The key is the string used for creating
sl@0
   140
                                                     //   the set.
sl@0
   141
sl@0
   142
    UnicodeSet                    *fRuleSets[10];    // Unicode Sets that are needed during
sl@0
   143
                                                     //  the scanning of RBBI rules.  The
sl@0
   144
                                                     //  indicies for these are assigned by the
sl@0
   145
                                                     //  perl script that builds the state tables.
sl@0
   146
                                                     //  See rbbirpt.h.
sl@0
   147
sl@0
   148
    int32_t                        fRuleNum;         // Counts each rule as it is scanned.
sl@0
   149
sl@0
   150
    int32_t                        fOptionStart;     // Input index of start of a !!option
sl@0
   151
                                                     //   keyword, while being scanned.
sl@0
   152
sl@0
   153
    UnicodeSet *gRuleSet_rule_char;
sl@0
   154
    UnicodeSet *gRuleSet_white_space;
sl@0
   155
    UnicodeSet *gRuleSet_name_char;
sl@0
   156
    UnicodeSet *gRuleSet_name_start_char;
sl@0
   157
sl@0
   158
    RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class
sl@0
   159
    RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class
sl@0
   160
};
sl@0
   161
sl@0
   162
U_NAMESPACE_END
sl@0
   163
sl@0
   164
#endif