1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/fontservices/textshaperplugin/IcuSource/common/rbbisetb.h Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,126 @@
1.4 +//
1.5 +// rbbisetb.h
1.6 +/*
1.7 +**********************************************************************
1.8 +* Copyright (c) 2001-2005, International Business Machines
1.9 +* Corporation and others. All Rights Reserved.
1.10 +**********************************************************************
1.11 +*/
1.12 +
1.13 +#ifndef RBBISETB_H
1.14 +#define RBBISETB_H
1.15 +
1.16 +#include "unicode/utypes.h"
1.17 +#include "unicode/uobject.h"
1.18 +#include "rbbirb.h"
1.19 +#include "uvector.h"
1.20 +
1.21 +struct UNewTrie;
1.22 +
1.23 +U_NAMESPACE_BEGIN
1.24 +
1.25 +//
1.26 +// RBBISetBuilder Derives the character categories used by the runtime RBBI engine
1.27 +// from the Unicode Sets appearing in the source RBBI rules, and
1.28 +// creates the TRIE table used to map from Unicode to the
1.29 +// character categories.
1.30 +//
1.31 +
1.32 +
1.33 +//
1.34 +// RangeDescriptor
1.35 +//
1.36 +// Each of the non-overlapping character ranges gets one of these descriptors.
1.37 +// All of them are strung together in a linked list, which is kept in order
1.38 +// (by character)
1.39 +//
1.40 +class RangeDescriptor : public UMemory {
1.41 +public:
1.42 + UChar32 fStartChar; // Start of range, unicode 32 bit value.
1.43 + UChar32 fEndChar; // End of range, unicode 32 bit value.
1.44 + int32_t fNum; // runtime-mapped input value for this range.
1.45 + UVector *fIncludesSets; // vector of the the original
1.46 + // Unicode sets that include this range.
1.47 + // (Contains ptrs to uset nodes)
1.48 + RangeDescriptor *fNext; // Next RangeDescriptor in the linked list.
1.49 +
1.50 + RangeDescriptor(UErrorCode &status);
1.51 + RangeDescriptor(const RangeDescriptor &other, UErrorCode &status);
1.52 + ~RangeDescriptor();
1.53 + void split(UChar32 where, UErrorCode &status); // Spit this range in two at "where", with
1.54 + // where appearing in the second (higher) part.
1.55 + void setDictionaryFlag(); // Check whether this range appears as part of
1.56 + // the Unicode set named "dictionary"
1.57 +
1.58 +private:
1.59 + RangeDescriptor(const RangeDescriptor &other); // forbid copying of this class
1.60 + RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying of this class
1.61 +};
1.62 +
1.63 +
1.64 +//
1.65 +// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules.
1.66 +//
1.67 +// Starting with the rules parse tree from the scanner,
1.68 +//
1.69 +// - Enumerate the set of UnicodeSets that are referenced
1.70 +// by the RBBI rules.
1.71 +// - compute a derived set of non-overlapping UnicodeSets
1.72 +// that will correspond to columns in the state table for
1.73 +// the RBBI execution engine.
1.74 +// - construct the trie table that maps input characters
1.75 +// to set numbers in the non-overlapping set of sets.
1.76 +//
1.77 +
1.78 +
1.79 +class RBBISetBuilder : public UMemory {
1.80 +public:
1.81 + RBBISetBuilder(RBBIRuleBuilder *rb);
1.82 + ~RBBISetBuilder();
1.83 +
1.84 + void build();
1.85 + void addValToSets(UVector *sets, uint32_t val);
1.86 + void addValToSet (RBBINode *usetNode, uint32_t val);
1.87 + int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the
1.88 + // runtime state machine, which are the same as
1.89 + // columns in the DFA state table
1.90 + int32_t getTrieSize() /*const*/; // Size in bytes of the serialized Trie.
1.91 + void serializeTrie(uint8_t *where); // write out the serialized Trie.
1.92 + UChar32 getFirstChar(int32_t val) const;
1.93 +#ifdef RBBI_DEBUG
1.94 + void printSets();
1.95 + void printRanges();
1.96 + void printRangeGroups();
1.97 +#else
1.98 + #define printSets()
1.99 + #define printRanges()
1.100 + #define printRangeGroups()
1.101 +#endif
1.102 +
1.103 +private:
1.104 + void numberSets();
1.105 +
1.106 + RBBIRuleBuilder *fRB; // The RBBI Rule Compiler that owns us.
1.107 + UErrorCode *fStatus;
1.108 +
1.109 + RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors
1.110 +
1.111 + UNewTrie *fTrie; // The mapping TRIE that is the end result of processing
1.112 + uint32_t fTrieSize; // the Unicode Sets.
1.113 +
1.114 + // Groups correspond to character categories -
1.115 + // groups of ranges that are in the same original UnicodeSets.
1.116 + // fGroupCount is the index of the last used group.
1.117 + // fGroupCount+1 is also the number of columns in the RBBI state table being compiled.
1.118 + // State table column 0 is not used. Column 1 is for end-of-input.
1.119 + // column 2 is for group 0. Funny counting.
1.120 + int32_t fGroupCount;
1.121 +
1.122 + RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class
1.123 + RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class
1.124 +};
1.125 +
1.126 +
1.127 +
1.128 +U_NAMESPACE_END
1.129 +#endif