1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/fontservices/textshaperplugin/IcuSource/common/rbbirb.h Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,211 @@
1.4 +//
1.5 +// rbbirb.h
1.6 +//
1.7 +// Copyright (C) 2002-2004, International Business Machines Corporation and others.
1.8 +// All Rights Reserved.
1.9 +//
1.10 +// This file contains declarations for several classes from the
1.11 +// Rule Based Break Iterator rule builder.
1.12 +//
1.13 +
1.14 +
1.15 +#ifndef RBBIRB_H
1.16 +#define RBBIRB_H
1.17 +
1.18 +#include "unicode/utypes.h"
1.19 +#include "unicode/uobject.h"
1.20 +#include "unicode/rbbi.h"
1.21 +#include "unicode/uniset.h"
1.22 +#include "unicode/parseerr.h"
1.23 +#include "uhash.h"
1.24 +#include "uvector.h"
1.25 +#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
1.26 + // looks up references to $variables within a set.
1.27 +
1.28 +
1.29 +
1.30 +U_NAMESPACE_BEGIN
1.31 +
1.32 +class RBBIRuleScanner;
1.33 +struct RBBIRuleTableEl;
1.34 +class RBBISetBuilder;
1.35 +class RBBINode;
1.36 +class RBBITableBuilder;
1.37 +
1.38 +
1.39 +
1.40 +//--------------------------------------------------------------------------------
1.41 +//
1.42 +// RBBISymbolTable. Implements SymbolTable interface that is used by the
1.43 +// UnicodeSet parser to resolve references to $variables.
1.44 +//
1.45 +//--------------------------------------------------------------------------------
1.46 +class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one
1.47 +public: // of these structs for each entry.
1.48 + RBBISymbolTableEntry();
1.49 + UnicodeString key;
1.50 + RBBINode *val;
1.51 + ~RBBISymbolTableEntry();
1.52 +
1.53 +private:
1.54 + RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class
1.55 + RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class
1.56 +};
1.57 +
1.58 +
1.59 +class RBBISymbolTable : public UMemory, public SymbolTable {
1.60 +private:
1.61 + const UnicodeString &fRules;
1.62 + UHashtable *fHashTable;
1.63 + RBBIRuleScanner *fRuleScanner;
1.64 +
1.65 + // These next two fields are part of the mechanism for passing references to
1.66 + // already-constructed UnicodeSets back to the UnicodeSet constructor
1.67 + // when the pattern includes $variable references.
1.68 + const UnicodeString ffffString; // = "/uffff"
1.69 + UnicodeSet *fCachedSetLookup;
1.70 +
1.71 +public:
1.72 + // API inherited from class SymbolTable
1.73 + virtual const UnicodeString* lookup(const UnicodeString& s) const;
1.74 + virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
1.75 + virtual UnicodeString parseReference(const UnicodeString& text,
1.76 + ParsePosition& pos, int32_t limit) const;
1.77 +
1.78 + // Additional Functions
1.79 + RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);
1.80 + virtual ~RBBISymbolTable();
1.81 +
1.82 + virtual RBBINode *lookupNode(const UnicodeString &key) const;
1.83 + virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err);
1.84 +
1.85 +#ifdef RBBI_DEBUG
1.86 + virtual void rbbiSymtablePrint() const;
1.87 +#else
1.88 + // A do-nothing inline function for non-debug builds. Member funcs can't be empty
1.89 + // or the call sites won't compile.
1.90 + int fFakeField;
1.91 + #define rbbiSymtablePrint() fFakeField=0;
1.92 +#endif
1.93 +
1.94 +private:
1.95 + RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
1.96 + RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
1.97 +};
1.98 +
1.99 +
1.100 +//--------------------------------------------------------------------------------
1.101 +//
1.102 +// class RBBIRuleBuilder The top-level class handling RBBI rule compiling.
1.103 +//
1.104 +//--------------------------------------------------------------------------------
1.105 +class RBBIRuleBuilder : public UMemory {
1.106 +public:
1.107 +
1.108 + // Create a rule based break iterator from a set of rules.
1.109 + // This function is the main entry point into the rule builder. The
1.110 + // public ICU API for creating RBBIs uses this function to do the actual work.
1.111 + //
1.112 + static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules,
1.113 + UParseError &parseError,
1.114 + UErrorCode &status);
1.115 +
1.116 +public:
1.117 + // The "public" functions and data members that appear below are accessed
1.118 + // (and shared) by the various parts that make up the rule builder. They
1.119 + // are NOT intended to be accessed by anything outside of the
1.120 + // rule builder implementation.
1.121 + RBBIRuleBuilder(const UnicodeString &rules,
1.122 + UParseError &parseErr,
1.123 + UErrorCode &status
1.124 + );
1.125 +
1.126 + virtual ~RBBIRuleBuilder();
1.127 + char *fDebugEnv; // controls debug trace output
1.128 + UErrorCode *fStatus; // Error reporting. Keeping status
1.129 + UParseError *fParseError; // here avoids passing it everywhere.
1.130 + const UnicodeString &fRules; // The rule string that we are compiling
1.131 +
1.132 + RBBIRuleScanner *fScanner; // The scanner.
1.133 + RBBINode *fForwardTree; // The parse trees, generated by the scanner,
1.134 + RBBINode *fReverseTree; // then manipulated by subsequent steps.
1.135 + RBBINode *fSafeFwdTree;
1.136 + RBBINode *fSafeRevTree;
1.137 +
1.138 + RBBINode **fDefaultTree; // For rules not qualified with a !
1.139 + // the tree to which they belong to.
1.140 +
1.141 + UBool fChainRules; // True for chained Unicode TR style rules.
1.142 + // False for traditional regexp rules.
1.143 +
1.144 + UBool fLBCMNoChain; // True: suppress chaining of rules on
1.145 + // chars with LineBreak property == CM.
1.146 +
1.147 + UBool fLookAheadHardBreak; // True: Look ahead matches cause an
1.148 + // immediate break, no continuing for the
1.149 + // longest match.
1.150 +
1.151 + RBBISetBuilder *fSetBuilder; // Set and Character Category builder.
1.152 + UVector *fUSetNodes; // Vector of all uset nodes.
1.153 +
1.154 + RBBITableBuilder *fForwardTables; // State transition tables
1.155 + RBBITableBuilder *fReverseTables;
1.156 + RBBITableBuilder *fSafeFwdTables;
1.157 + RBBITableBuilder *fSafeRevTables;
1.158 +
1.159 + UVector *fRuleStatusVals; // The values that can be returned
1.160 + // from getRuleStatus().
1.161 +
1.162 + RBBIDataHeader *flattenData(); // Create the flattened (runtime format)
1.163 + // data tables..
1.164 +private:
1.165 + RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class
1.166 + RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class
1.167 +};
1.168 +
1.169 +
1.170 +
1.171 +
1.172 +//----------------------------------------------------------------------------
1.173 +//
1.174 +// RBBISetTableEl is an entry in the hash table of UnicodeSets that have
1.175 +// been encountered. The val Node will be of nodetype uset
1.176 +// and contain pointers to the actual UnicodeSets.
1.177 +// The Key is the source string for initializing the set.
1.178 +//
1.179 +// The hash table is used to avoid creating duplicate
1.180 +// unnamed (not $var references) UnicodeSets.
1.181 +//
1.182 +// Memory Management:
1.183 +// The Hash Table owns these RBBISetTableEl structs and
1.184 +// the key strings. It does NOT own the val nodes.
1.185 +//
1.186 +//----------------------------------------------------------------------------
1.187 +struct RBBISetTableEl {
1.188 + UnicodeString *key;
1.189 + RBBINode *val;
1.190 +};
1.191 +
1.192 +
1.193 +//----------------------------------------------------------------------------
1.194 +//
1.195 +// RBBIDebugPrintf Printf equivalent, for debugging output.
1.196 +// Conditional compilation of the implementation lets us
1.197 +// get rid of the stdio dependency in environments where it
1.198 +// is unavailable.
1.199 +//
1.200 +//----------------------------------------------------------------------------
1.201 +#ifdef RBBI_DEBUG
1.202 +#include <stdio.h>
1.203 +#define RBBIDebugPrintf printf
1.204 +#define RBBIDebugPuts puts
1.205 +#else
1.206 +#undef RBBIDebugPrintf
1.207 +#define RBBIDebugPuts(arg)
1.208 +#endif
1.209 +
1.210 +U_NAMESPACE_END
1.211 +#endif
1.212 +
1.213 +
1.214 +