os/textandloc/fontservices/textshaperplugin/IcuSource/common/rbbirb.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
//
sl@0
     2
//  rbbirb.h
sl@0
     3
//
sl@0
     4
//  Copyright (C) 2002-2004, International Business Machines Corporation and others.
sl@0
     5
//  All Rights Reserved.
sl@0
     6
//
sl@0
     7
//  This file contains declarations for several classes from the
sl@0
     8
//    Rule Based Break Iterator rule builder.
sl@0
     9
//
sl@0
    10
sl@0
    11
sl@0
    12
#ifndef RBBIRB_H
sl@0
    13
#define RBBIRB_H
sl@0
    14
sl@0
    15
#include "unicode/utypes.h"
sl@0
    16
#include "unicode/uobject.h"
sl@0
    17
#include "unicode/rbbi.h"
sl@0
    18
#include "unicode/uniset.h"
sl@0
    19
#include "unicode/parseerr.h"
sl@0
    20
#include "uhash.h"
sl@0
    21
#include "uvector.h"
sl@0
    22
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
sl@0
    23
                          //    looks up references to $variables within a set.
sl@0
    24
sl@0
    25
sl@0
    26
sl@0
    27
U_NAMESPACE_BEGIN
sl@0
    28
sl@0
    29
class               RBBIRuleScanner;
sl@0
    30
struct              RBBIRuleTableEl;
sl@0
    31
class               RBBISetBuilder;
sl@0
    32
class               RBBINode;
sl@0
    33
class               RBBITableBuilder;
sl@0
    34
sl@0
    35
sl@0
    36
sl@0
    37
//--------------------------------------------------------------------------------
sl@0
    38
//
sl@0
    39
//   RBBISymbolTable.    Implements SymbolTable interface that is used by the
sl@0
    40
//                       UnicodeSet parser to resolve references to $variables.
sl@0
    41
//
sl@0
    42
//--------------------------------------------------------------------------------
sl@0
    43
class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one
sl@0
    44
public:                                       //   of these structs for each entry.
sl@0
    45
    RBBISymbolTableEntry();
sl@0
    46
    UnicodeString          key;
sl@0
    47
    RBBINode               *val;
sl@0
    48
    ~RBBISymbolTableEntry();
sl@0
    49
sl@0
    50
private:
sl@0
    51
    RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class
sl@0
    52
    RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class
sl@0
    53
};
sl@0
    54
sl@0
    55
sl@0
    56
class RBBISymbolTable : public UMemory, public SymbolTable {
sl@0
    57
private:
sl@0
    58
    const UnicodeString      &fRules;
sl@0
    59
    UHashtable               *fHashTable;
sl@0
    60
    RBBIRuleScanner          *fRuleScanner;
sl@0
    61
sl@0
    62
    // These next two fields are part of the mechanism for passing references to
sl@0
    63
    //   already-constructed UnicodeSets back to the UnicodeSet constructor
sl@0
    64
    //   when the pattern includes $variable references.
sl@0
    65
    const UnicodeString      ffffString;      // = "/uffff"
sl@0
    66
    UnicodeSet              *fCachedSetLookup;
sl@0
    67
sl@0
    68
public:
sl@0
    69
    //  API inherited from class SymbolTable
sl@0
    70
    virtual const UnicodeString*  lookup(const UnicodeString& s) const;
sl@0
    71
    virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
sl@0
    72
    virtual UnicodeString parseReference(const UnicodeString& text,
sl@0
    73
                                         ParsePosition& pos, int32_t limit) const;
sl@0
    74
sl@0
    75
    //  Additional Functions
sl@0
    76
    RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);
sl@0
    77
    virtual ~RBBISymbolTable();
sl@0
    78
sl@0
    79
    virtual RBBINode *lookupNode(const UnicodeString &key) const;
sl@0
    80
    virtual void      addEntry  (const UnicodeString &key, RBBINode *val, UErrorCode &err);
sl@0
    81
sl@0
    82
#ifdef RBBI_DEBUG
sl@0
    83
    virtual void      rbbiSymtablePrint() const;
sl@0
    84
#else
sl@0
    85
    // A do-nothing inline function for non-debug builds.  Member funcs can't be empty
sl@0
    86
    //  or the call sites won't compile.
sl@0
    87
    int  fFakeField;
sl@0
    88
    #define rbbiSymtablePrint() fFakeField=0; 
sl@0
    89
#endif
sl@0
    90
sl@0
    91
private:
sl@0
    92
    RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
sl@0
    93
    RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
sl@0
    94
};
sl@0
    95
sl@0
    96
sl@0
    97
//--------------------------------------------------------------------------------
sl@0
    98
//
sl@0
    99
//  class RBBIRuleBuilder       The top-level class handling RBBI rule compiling.
sl@0
   100
//
sl@0
   101
//--------------------------------------------------------------------------------
sl@0
   102
class RBBIRuleBuilder : public UMemory {
sl@0
   103
public:
sl@0
   104
sl@0
   105
    //  Create a rule based break iterator from a set of rules.
sl@0
   106
    //  This function is the main entry point into the rule builder.  The
sl@0
   107
    //   public ICU API for creating RBBIs uses this function to do the actual work.
sl@0
   108
    //
sl@0
   109
    static BreakIterator * createRuleBasedBreakIterator( const UnicodeString    &rules,
sl@0
   110
                                    UParseError      &parseError,
sl@0
   111
                                    UErrorCode       &status);
sl@0
   112
sl@0
   113
public:
sl@0
   114
    // The "public" functions and data members that appear below are accessed
sl@0
   115
    //  (and shared) by the various parts that make up the rule builder.  They
sl@0
   116
    //  are NOT intended to be accessed by anything outside of the
sl@0
   117
    //  rule builder implementation.
sl@0
   118
    RBBIRuleBuilder(const UnicodeString  &rules,
sl@0
   119
                    UParseError          &parseErr,
sl@0
   120
                    UErrorCode           &status
sl@0
   121
        );
sl@0
   122
sl@0
   123
    virtual    ~RBBIRuleBuilder();
sl@0
   124
    char                          *fDebugEnv;        // controls debug trace output
sl@0
   125
    UErrorCode                    *fStatus;          // Error reporting.  Keeping status
sl@0
   126
    UParseError                   *fParseError;      //   here avoids passing it everywhere.
sl@0
   127
    const UnicodeString           &fRules;           // The rule string that we are compiling
sl@0
   128
sl@0
   129
    RBBIRuleScanner               *fScanner;         // The scanner.
sl@0
   130
    RBBINode                      *fForwardTree;     // The parse trees, generated by the scanner,
sl@0
   131
    RBBINode                      *fReverseTree;     //   then manipulated by subsequent steps.
sl@0
   132
    RBBINode                      *fSafeFwdTree;
sl@0
   133
    RBBINode                      *fSafeRevTree;
sl@0
   134
sl@0
   135
    RBBINode                      **fDefaultTree;    // For rules not qualified with a !
sl@0
   136
                                                     //   the tree to which they belong to.
sl@0
   137
sl@0
   138
    UBool                         fChainRules;       // True for chained Unicode TR style rules.
sl@0
   139
                                                     // False for traditional regexp rules.
sl@0
   140
sl@0
   141
    UBool                         fLBCMNoChain;      // True:  suppress chaining of rules on
sl@0
   142
                                                     //   chars with LineBreak property == CM.
sl@0
   143
sl@0
   144
    UBool                         fLookAheadHardBreak;  // True:  Look ahead matches cause an
sl@0
   145
                                                     // immediate break, no continuing for the
sl@0
   146
                                                     // longest match.
sl@0
   147
sl@0
   148
    RBBISetBuilder                *fSetBuilder;      // Set and Character Category builder.
sl@0
   149
    UVector                       *fUSetNodes;       // Vector of all uset nodes.
sl@0
   150
sl@0
   151
    RBBITableBuilder              *fForwardTables;   // State transition tables
sl@0
   152
    RBBITableBuilder              *fReverseTables;
sl@0
   153
    RBBITableBuilder              *fSafeFwdTables;
sl@0
   154
    RBBITableBuilder              *fSafeRevTables;
sl@0
   155
sl@0
   156
    UVector                       *fRuleStatusVals;  // The values that can be returned
sl@0
   157
                                                     //   from getRuleStatus().
sl@0
   158
sl@0
   159
    RBBIDataHeader                *flattenData();    // Create the flattened (runtime format)
sl@0
   160
                                                     // data tables..
sl@0
   161
private:
sl@0
   162
    RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class
sl@0
   163
    RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class
sl@0
   164
};
sl@0
   165
sl@0
   166
sl@0
   167
sl@0
   168
sl@0
   169
//----------------------------------------------------------------------------
sl@0
   170
//
sl@0
   171
//   RBBISetTableEl   is an entry in the hash table of UnicodeSets that have
sl@0
   172
//                    been encountered.  The val Node will be of nodetype uset
sl@0
   173
//                    and contain pointers to the actual UnicodeSets.
sl@0
   174
//                    The Key is the source string for initializing the set.
sl@0
   175
//
sl@0
   176
//                    The hash table is used to avoid creating duplicate
sl@0
   177
//                    unnamed (not $var references) UnicodeSets.
sl@0
   178
//
sl@0
   179
//                    Memory Management:
sl@0
   180
//                       The Hash Table owns these RBBISetTableEl structs and
sl@0
   181
//                            the key strings.  It does NOT own the val nodes.
sl@0
   182
//
sl@0
   183
//----------------------------------------------------------------------------
sl@0
   184
struct RBBISetTableEl {
sl@0
   185
    UnicodeString *key;
sl@0
   186
    RBBINode      *val;
sl@0
   187
};
sl@0
   188
sl@0
   189
sl@0
   190
//----------------------------------------------------------------------------
sl@0
   191
//
sl@0
   192
//   RBBIDebugPrintf    Printf equivalent, for debugging output.
sl@0
   193
//                      Conditional compilation of the implementation lets us
sl@0
   194
//                      get rid of the stdio dependency in environments where it
sl@0
   195
//                      is unavailable.
sl@0
   196
//
sl@0
   197
//----------------------------------------------------------------------------
sl@0
   198
#ifdef RBBI_DEBUG
sl@0
   199
#include <stdio.h>
sl@0
   200
#define RBBIDebugPrintf printf
sl@0
   201
#define RBBIDebugPuts puts
sl@0
   202
#else
sl@0
   203
#undef RBBIDebugPrintf 
sl@0
   204
#define RBBIDebugPuts(arg)
sl@0
   205
#endif
sl@0
   206
sl@0
   207
U_NAMESPACE_END
sl@0
   208
#endif
sl@0
   209
sl@0
   210
sl@0
   211