sl@0: //
sl@0: //  rbbiscan.h
sl@0: //
sl@0: //  Copyright (C) 2002-2003, International Business Machines Corporation and others.
sl@0: //  All Rights Reserved.
sl@0: //
sl@0: //  This file contains declarations for class RBBIRuleScanner
sl@0: //
sl@0: 
sl@0: 
sl@0: #ifndef RBBISCAN_H
sl@0: #define RBBISCAN_H
sl@0: 
sl@0: #include "unicode/utypes.h"
sl@0: #include "unicode/uobject.h"
sl@0: #include "unicode/rbbi.h"
sl@0: #include "unicode/uniset.h"
sl@0: #include "unicode/parseerr.h"
sl@0: #include "uhash.h"
sl@0: #include "uvector.h"
sl@0: #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
sl@0:                           //    looks up references to $variables within a set.
sl@0: #include "rbbinode.h"
sl@0: //#include "rbbitblb.h"
sl@0: 
sl@0: 
sl@0: 
sl@0: U_NAMESPACE_BEGIN
sl@0: 
sl@0: class   RBBIRuleBuilder;
sl@0: class   RBBISymbolTable;
sl@0: 
sl@0: 
sl@0: //--------------------------------------------------------------------------------
sl@0: //
sl@0: //  class RBBIRuleScanner does the lowest level, character-at-a-time
sl@0: //                        scanning of break iterator rules.  
sl@0: //
sl@0: //                        The output of the scanner is parse trees for
sl@0: //                        the rule expressions and a list of all Unicode Sets
sl@0: //                        encountered.
sl@0: //
sl@0: //--------------------------------------------------------------------------------
sl@0: static const int    kStackSize = 100;               // The size of the state stack for
sl@0:                                                     //   rules parsing.  Corresponds roughly
sl@0:                                                     //   to the depth of parentheses nesting
sl@0:                                                     //   that is allowed in the rules.
sl@0: 
sl@0: enum EParseAction {dummy01, dummy02};               // Placeholder enum for the specifier for
sl@0:                                                     //   actions that are specified in the
sl@0:                                                     //   rule parsing state table.
sl@0: 
sl@0: class RBBIRuleScanner : public UMemory {
sl@0: public:
sl@0: 
sl@0:     struct RBBIRuleChar {
sl@0:         UChar32             fChar;
sl@0:         UBool               fEscaped;
sl@0:     };
sl@0: 
sl@0:     RBBIRuleScanner(RBBIRuleBuilder  *rb);
sl@0: 
sl@0: 
sl@0:     virtual    ~RBBIRuleScanner();
sl@0: 
sl@0:     void        nextChar(RBBIRuleChar &c);          // Get the next char from the input stream.
sl@0:                                                     // Return false if at end.
sl@0: 
sl@0:     UBool       push(const RBBIRuleChar &c);        // Push (unget) one character.
sl@0:                                                     //   Only a single character may be pushed.
sl@0: 
sl@0:     void        parse();                            // Parse the rules, generating two parse
sl@0:                                                     //   trees, one each for the forward and
sl@0:                                                     //   reverse rules,
sl@0:                                                     //   and a list of UnicodeSets encountered.
sl@0: 
sl@0:     /**
sl@0:      * Return a rules string without unnecessary
sl@0:      * characters.
sl@0:      */
sl@0:     static UnicodeString stripRules(const UnicodeString &rules);
sl@0: private:
sl@0: 
sl@0:     UBool       doParseActions(EParseAction a);
sl@0:     void        error(UErrorCode e);                   // error reporting convenience function.
sl@0:     void        fixOpStack(RBBINode::OpPrecedence p);
sl@0:                                                        //   a character.
sl@0:     void        findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL);
sl@0: 
sl@0:     UChar32     nextCharLL();
sl@0: #ifdef RBBI_DEBUG
sl@0:     void        printNodeStack(const char *title);
sl@0: #endif
sl@0:     RBBINode    *pushNewNode(RBBINode::NodeType  t);
sl@0:     void        scanSet();
sl@0: 
sl@0: 
sl@0:     RBBIRuleBuilder               *fRB;              // The rule builder that we are part of.
sl@0: 
sl@0:     int32_t                       fScanIndex;        // Index of current character being processed
sl@0:                                                      //   in the rule input string.
sl@0:     int32_t                       fNextIndex;        // Index of the next character, which
sl@0:                                                      //   is the first character not yet scanned.
sl@0:     UBool                         fQuoteMode;        // Scan is in a 'quoted region'
sl@0:     int                           fLineNum;          // Line number in input file.
sl@0:     int                           fCharNum;          // Char position within the line.
sl@0:     UChar32                       fLastChar;         // Previous char, needed to count CR-LF
sl@0:                                                      //   as a single line, not two.
sl@0: 
sl@0:     RBBIRuleChar                  fC;                // Current char for parse state machine
sl@0:                                                      //   processing.
sl@0:     UnicodeString                 fVarName;          // $variableName, valid when we've just
sl@0:                                                      //   scanned one.
sl@0: 
sl@0:     RBBIRuleTableEl               **fStateTable;     // State Transition Table for RBBI Rule
sl@0:                                                      //   parsing.  index by p[state][char-class]
sl@0: 
sl@0:     uint16_t                      fStack[kStackSize];  // State stack, holds state pushes
sl@0:     int                           fStackPtr;           //  and pops as specified in the state
sl@0:                                                        //  transition rules.
sl@0: 
sl@0:     RBBINode                      *fNodeStack[kStackSize]; // Node stack, holds nodes created
sl@0:                                                            //  during the parse of a rule
sl@0:     int                            fNodeStackPtr;
sl@0: 
sl@0: 
sl@0:     UBool                          fReverseRule;     // True if the rule currently being scanned
sl@0:                                                      //  is a reverse direction rule (if it
sl@0:                                                      //  starts with a '!')
sl@0: 
sl@0:     UBool                          fLookAheadRule;   // True if the rule includes a '/'
sl@0:                                                      //   somewhere within it.
sl@0: 
sl@0:     RBBISymbolTable               *fSymbolTable;     // symbol table, holds definitions of
sl@0:                                                      //   $variable symbols.
sl@0: 
sl@0:     UHashtable                    *fSetTable;        // UnicocodeSet hash table, holds indexes to
sl@0:                                                      //   the sets created while parsing rules.
sl@0:                                                      //   The key is the string used for creating
sl@0:                                                      //   the set.
sl@0: 
sl@0:     UnicodeSet                    *fRuleSets[10];    // Unicode Sets that are needed during
sl@0:                                                      //  the scanning of RBBI rules.  The
sl@0:                                                      //  indicies for these are assigned by the
sl@0:                                                      //  perl script that builds the state tables.
sl@0:                                                      //  See rbbirpt.h.
sl@0: 
sl@0:     int32_t                        fRuleNum;         // Counts each rule as it is scanned.
sl@0: 
sl@0:     int32_t                        fOptionStart;     // Input index of start of a !!option
sl@0:                                                      //   keyword, while being scanned.
sl@0: 
sl@0:     UnicodeSet *gRuleSet_rule_char;
sl@0:     UnicodeSet *gRuleSet_white_space;
sl@0:     UnicodeSet *gRuleSet_name_char;
sl@0:     UnicodeSet *gRuleSet_name_start_char;
sl@0: 
sl@0:     RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class
sl@0:     RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class
sl@0: };
sl@0: 
sl@0: U_NAMESPACE_END
sl@0: 
sl@0: #endif