os/textandloc/fontservices/textshaperplugin/IcuSource/common/rbbirb.h
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/fontservices/textshaperplugin/IcuSource/common/rbbirb.h	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,211 @@
     1.4 +//
     1.5 +//  rbbirb.h
     1.6 +//
     1.7 +//  Copyright (C) 2002-2004, International Business Machines Corporation and others.
     1.8 +//  All Rights Reserved.
     1.9 +//
    1.10 +//  This file contains declarations for several classes from the
    1.11 +//    Rule Based Break Iterator rule builder.
    1.12 +//
    1.13 +
    1.14 +
    1.15 +#ifndef RBBIRB_H
    1.16 +#define RBBIRB_H
    1.17 +
    1.18 +#include "unicode/utypes.h"
    1.19 +#include "unicode/uobject.h"
    1.20 +#include "unicode/rbbi.h"
    1.21 +#include "unicode/uniset.h"
    1.22 +#include "unicode/parseerr.h"
    1.23 +#include "uhash.h"
    1.24 +#include "uvector.h"
    1.25 +#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
    1.26 +                          //    looks up references to $variables within a set.
    1.27 +
    1.28 +
    1.29 +
    1.30 +U_NAMESPACE_BEGIN
    1.31 +
    1.32 +class               RBBIRuleScanner;
    1.33 +struct              RBBIRuleTableEl;
    1.34 +class               RBBISetBuilder;
    1.35 +class               RBBINode;
    1.36 +class               RBBITableBuilder;
    1.37 +
    1.38 +
    1.39 +
    1.40 +//--------------------------------------------------------------------------------
    1.41 +//
    1.42 +//   RBBISymbolTable.    Implements SymbolTable interface that is used by the
    1.43 +//                       UnicodeSet parser to resolve references to $variables.
    1.44 +//
    1.45 +//--------------------------------------------------------------------------------
    1.46 +class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one
    1.47 +public:                                       //   of these structs for each entry.
    1.48 +    RBBISymbolTableEntry();
    1.49 +    UnicodeString          key;
    1.50 +    RBBINode               *val;
    1.51 +    ~RBBISymbolTableEntry();
    1.52 +
    1.53 +private:
    1.54 +    RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class
    1.55 +    RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class
    1.56 +};
    1.57 +
    1.58 +
    1.59 +class RBBISymbolTable : public UMemory, public SymbolTable {
    1.60 +private:
    1.61 +    const UnicodeString      &fRules;
    1.62 +    UHashtable               *fHashTable;
    1.63 +    RBBIRuleScanner          *fRuleScanner;
    1.64 +
    1.65 +    // These next two fields are part of the mechanism for passing references to
    1.66 +    //   already-constructed UnicodeSets back to the UnicodeSet constructor
    1.67 +    //   when the pattern includes $variable references.
    1.68 +    const UnicodeString      ffffString;      // = "/uffff"
    1.69 +    UnicodeSet              *fCachedSetLookup;
    1.70 +
    1.71 +public:
    1.72 +    //  API inherited from class SymbolTable
    1.73 +    virtual const UnicodeString*  lookup(const UnicodeString& s) const;
    1.74 +    virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
    1.75 +    virtual UnicodeString parseReference(const UnicodeString& text,
    1.76 +                                         ParsePosition& pos, int32_t limit) const;
    1.77 +
    1.78 +    //  Additional Functions
    1.79 +    RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);
    1.80 +    virtual ~RBBISymbolTable();
    1.81 +
    1.82 +    virtual RBBINode *lookupNode(const UnicodeString &key) const;
    1.83 +    virtual void      addEntry  (const UnicodeString &key, RBBINode *val, UErrorCode &err);
    1.84 +
    1.85 +#ifdef RBBI_DEBUG
    1.86 +    virtual void      rbbiSymtablePrint() const;
    1.87 +#else
    1.88 +    // A do-nothing inline function for non-debug builds.  Member funcs can't be empty
    1.89 +    //  or the call sites won't compile.
    1.90 +    int  fFakeField;
    1.91 +    #define rbbiSymtablePrint() fFakeField=0; 
    1.92 +#endif
    1.93 +
    1.94 +private:
    1.95 +    RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
    1.96 +    RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
    1.97 +};
    1.98 +
    1.99 +
   1.100 +//--------------------------------------------------------------------------------
   1.101 +//
   1.102 +//  class RBBIRuleBuilder       The top-level class handling RBBI rule compiling.
   1.103 +//
   1.104 +//--------------------------------------------------------------------------------
   1.105 +class RBBIRuleBuilder : public UMemory {
   1.106 +public:
   1.107 +
   1.108 +    //  Create a rule based break iterator from a set of rules.
   1.109 +    //  This function is the main entry point into the rule builder.  The
   1.110 +    //   public ICU API for creating RBBIs uses this function to do the actual work.
   1.111 +    //
   1.112 +    static BreakIterator * createRuleBasedBreakIterator( const UnicodeString    &rules,
   1.113 +                                    UParseError      &parseError,
   1.114 +                                    UErrorCode       &status);
   1.115 +
   1.116 +public:
   1.117 +    // The "public" functions and data members that appear below are accessed
   1.118 +    //  (and shared) by the various parts that make up the rule builder.  They
   1.119 +    //  are NOT intended to be accessed by anything outside of the
   1.120 +    //  rule builder implementation.
   1.121 +    RBBIRuleBuilder(const UnicodeString  &rules,
   1.122 +                    UParseError          &parseErr,
   1.123 +                    UErrorCode           &status
   1.124 +        );
   1.125 +
   1.126 +    virtual    ~RBBIRuleBuilder();
   1.127 +    char                          *fDebugEnv;        // controls debug trace output
   1.128 +    UErrorCode                    *fStatus;          // Error reporting.  Keeping status
   1.129 +    UParseError                   *fParseError;      //   here avoids passing it everywhere.
   1.130 +    const UnicodeString           &fRules;           // The rule string that we are compiling
   1.131 +
   1.132 +    RBBIRuleScanner               *fScanner;         // The scanner.
   1.133 +    RBBINode                      *fForwardTree;     // The parse trees, generated by the scanner,
   1.134 +    RBBINode                      *fReverseTree;     //   then manipulated by subsequent steps.
   1.135 +    RBBINode                      *fSafeFwdTree;
   1.136 +    RBBINode                      *fSafeRevTree;
   1.137 +
   1.138 +    RBBINode                      **fDefaultTree;    // For rules not qualified with a !
   1.139 +                                                     //   the tree to which they belong to.
   1.140 +
   1.141 +    UBool                         fChainRules;       // True for chained Unicode TR style rules.
   1.142 +                                                     // False for traditional regexp rules.
   1.143 +
   1.144 +    UBool                         fLBCMNoChain;      // True:  suppress chaining of rules on
   1.145 +                                                     //   chars with LineBreak property == CM.
   1.146 +
   1.147 +    UBool                         fLookAheadHardBreak;  // True:  Look ahead matches cause an
   1.148 +                                                     // immediate break, no continuing for the
   1.149 +                                                     // longest match.
   1.150 +
   1.151 +    RBBISetBuilder                *fSetBuilder;      // Set and Character Category builder.
   1.152 +    UVector                       *fUSetNodes;       // Vector of all uset nodes.
   1.153 +
   1.154 +    RBBITableBuilder              *fForwardTables;   // State transition tables
   1.155 +    RBBITableBuilder              *fReverseTables;
   1.156 +    RBBITableBuilder              *fSafeFwdTables;
   1.157 +    RBBITableBuilder              *fSafeRevTables;
   1.158 +
   1.159 +    UVector                       *fRuleStatusVals;  // The values that can be returned
   1.160 +                                                     //   from getRuleStatus().
   1.161 +
   1.162 +    RBBIDataHeader                *flattenData();    // Create the flattened (runtime format)
   1.163 +                                                     // data tables..
   1.164 +private:
   1.165 +    RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class
   1.166 +    RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class
   1.167 +};
   1.168 +
   1.169 +
   1.170 +
   1.171 +
   1.172 +//----------------------------------------------------------------------------
   1.173 +//
   1.174 +//   RBBISetTableEl   is an entry in the hash table of UnicodeSets that have
   1.175 +//                    been encountered.  The val Node will be of nodetype uset
   1.176 +//                    and contain pointers to the actual UnicodeSets.
   1.177 +//                    The Key is the source string for initializing the set.
   1.178 +//
   1.179 +//                    The hash table is used to avoid creating duplicate
   1.180 +//                    unnamed (not $var references) UnicodeSets.
   1.181 +//
   1.182 +//                    Memory Management:
   1.183 +//                       The Hash Table owns these RBBISetTableEl structs and
   1.184 +//                            the key strings.  It does NOT own the val nodes.
   1.185 +//
   1.186 +//----------------------------------------------------------------------------
   1.187 +struct RBBISetTableEl {
   1.188 +    UnicodeString *key;
   1.189 +    RBBINode      *val;
   1.190 +};
   1.191 +
   1.192 +
   1.193 +//----------------------------------------------------------------------------
   1.194 +//
   1.195 +//   RBBIDebugPrintf    Printf equivalent, for debugging output.
   1.196 +//                      Conditional compilation of the implementation lets us
   1.197 +//                      get rid of the stdio dependency in environments where it
   1.198 +//                      is unavailable.
   1.199 +//
   1.200 +//----------------------------------------------------------------------------
   1.201 +#ifdef RBBI_DEBUG
   1.202 +#include <stdio.h>
   1.203 +#define RBBIDebugPrintf printf
   1.204 +#define RBBIDebugPuts puts
   1.205 +#else
   1.206 +#undef RBBIDebugPrintf 
   1.207 +#define RBBIDebugPuts(arg)
   1.208 +#endif
   1.209 +
   1.210 +U_NAMESPACE_END
   1.211 +#endif
   1.212 +
   1.213 +
   1.214 +