os/textandloc/fontservices/textshaperplugin/IcuSource/common/rbbiscan.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 //
     2 //  rbbiscan.h
     3 //
     4 //  Copyright (C) 2002-2003, International Business Machines Corporation and others.
     5 //  All Rights Reserved.
     6 //
     7 //  This file contains declarations for class RBBIRuleScanner
     8 //
     9 
    10 
    11 #ifndef RBBISCAN_H
    12 #define RBBISCAN_H
    13 
    14 #include "unicode/utypes.h"
    15 #include "unicode/uobject.h"
    16 #include "unicode/rbbi.h"
    17 #include "unicode/uniset.h"
    18 #include "unicode/parseerr.h"
    19 #include "uhash.h"
    20 #include "uvector.h"
    21 #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
    22                           //    looks up references to $variables within a set.
    23 #include "rbbinode.h"
    24 //#include "rbbitblb.h"
    25 
    26 
    27 
    28 U_NAMESPACE_BEGIN
    29 
    30 class   RBBIRuleBuilder;
    31 class   RBBISymbolTable;
    32 
    33 
    34 //--------------------------------------------------------------------------------
    35 //
    36 //  class RBBIRuleScanner does the lowest level, character-at-a-time
    37 //                        scanning of break iterator rules.  
    38 //
    39 //                        The output of the scanner is parse trees for
    40 //                        the rule expressions and a list of all Unicode Sets
    41 //                        encountered.
    42 //
    43 //--------------------------------------------------------------------------------
    44 static const int    kStackSize = 100;               // The size of the state stack for
    45                                                     //   rules parsing.  Corresponds roughly
    46                                                     //   to the depth of parentheses nesting
    47                                                     //   that is allowed in the rules.
    48 
    49 enum EParseAction {dummy01, dummy02};               // Placeholder enum for the specifier for
    50                                                     //   actions that are specified in the
    51                                                     //   rule parsing state table.
    52 
    53 class RBBIRuleScanner : public UMemory {
    54 public:
    55 
    56     struct RBBIRuleChar {
    57         UChar32             fChar;
    58         UBool               fEscaped;
    59     };
    60 
    61     RBBIRuleScanner(RBBIRuleBuilder  *rb);
    62 
    63 
    64     virtual    ~RBBIRuleScanner();
    65 
    66     void        nextChar(RBBIRuleChar &c);          // Get the next char from the input stream.
    67                                                     // Return false if at end.
    68 
    69     UBool       push(const RBBIRuleChar &c);        // Push (unget) one character.
    70                                                     //   Only a single character may be pushed.
    71 
    72     void        parse();                            // Parse the rules, generating two parse
    73                                                     //   trees, one each for the forward and
    74                                                     //   reverse rules,
    75                                                     //   and a list of UnicodeSets encountered.
    76 
    77     /**
    78      * Return a rules string without unnecessary
    79      * characters.
    80      */
    81     static UnicodeString stripRules(const UnicodeString &rules);
    82 private:
    83 
    84     UBool       doParseActions(EParseAction a);
    85     void        error(UErrorCode e);                   // error reporting convenience function.
    86     void        fixOpStack(RBBINode::OpPrecedence p);
    87                                                        //   a character.
    88     void        findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL);
    89 
    90     UChar32     nextCharLL();
    91 #ifdef RBBI_DEBUG
    92     void        printNodeStack(const char *title);
    93 #endif
    94     RBBINode    *pushNewNode(RBBINode::NodeType  t);
    95     void        scanSet();
    96 
    97 
    98     RBBIRuleBuilder               *fRB;              // The rule builder that we are part of.
    99 
   100     int32_t                       fScanIndex;        // Index of current character being processed
   101                                                      //   in the rule input string.
   102     int32_t                       fNextIndex;        // Index of the next character, which
   103                                                      //   is the first character not yet scanned.
   104     UBool                         fQuoteMode;        // Scan is in a 'quoted region'
   105     int                           fLineNum;          // Line number in input file.
   106     int                           fCharNum;          // Char position within the line.
   107     UChar32                       fLastChar;         // Previous char, needed to count CR-LF
   108                                                      //   as a single line, not two.
   109 
   110     RBBIRuleChar                  fC;                // Current char for parse state machine
   111                                                      //   processing.
   112     UnicodeString                 fVarName;          // $variableName, valid when we've just
   113                                                      //   scanned one.
   114 
   115     RBBIRuleTableEl               **fStateTable;     // State Transition Table for RBBI Rule
   116                                                      //   parsing.  index by p[state][char-class]
   117 
   118     uint16_t                      fStack[kStackSize];  // State stack, holds state pushes
   119     int                           fStackPtr;           //  and pops as specified in the state
   120                                                        //  transition rules.
   121 
   122     RBBINode                      *fNodeStack[kStackSize]; // Node stack, holds nodes created
   123                                                            //  during the parse of a rule
   124     int                            fNodeStackPtr;
   125 
   126 
   127     UBool                          fReverseRule;     // True if the rule currently being scanned
   128                                                      //  is a reverse direction rule (if it
   129                                                      //  starts with a '!')
   130 
   131     UBool                          fLookAheadRule;   // True if the rule includes a '/'
   132                                                      //   somewhere within it.
   133 
   134     RBBISymbolTable               *fSymbolTable;     // symbol table, holds definitions of
   135                                                      //   $variable symbols.
   136 
   137     UHashtable                    *fSetTable;        // UnicocodeSet hash table, holds indexes to
   138                                                      //   the sets created while parsing rules.
   139                                                      //   The key is the string used for creating
   140                                                      //   the set.
   141 
   142     UnicodeSet                    *fRuleSets[10];    // Unicode Sets that are needed during
   143                                                      //  the scanning of RBBI rules.  The
   144                                                      //  indicies for these are assigned by the
   145                                                      //  perl script that builds the state tables.
   146                                                      //  See rbbirpt.h.
   147 
   148     int32_t                        fRuleNum;         // Counts each rule as it is scanned.
   149 
   150     int32_t                        fOptionStart;     // Input index of start of a !!option
   151                                                      //   keyword, while being scanned.
   152 
   153     UnicodeSet *gRuleSet_rule_char;
   154     UnicodeSet *gRuleSet_white_space;
   155     UnicodeSet *gRuleSet_name_char;
   156     UnicodeSet *gRuleSet_name_start_char;
   157 
   158     RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class
   159     RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class
   160 };
   161 
   162 U_NAMESPACE_END
   163 
   164 #endif