os/textandloc/fontservices/textshaperplugin/IcuSource/common/rbbisetb.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 //
     2 //  rbbisetb.h
     3 /*
     4 **********************************************************************
     5 *   Copyright (c) 2001-2005, International Business Machines
     6 *   Corporation and others.  All Rights Reserved.
     7 **********************************************************************
     8 */
     9 
    10 #ifndef RBBISETB_H
    11 #define RBBISETB_H
    12 
    13 #include "unicode/utypes.h"
    14 #include "unicode/uobject.h"
    15 #include "rbbirb.h"
    16 #include "uvector.h"
    17 
    18 struct  UNewTrie;
    19 
    20 U_NAMESPACE_BEGIN
    21 
    22 //
    23 //  RBBISetBuilder   Derives the character categories used by the runtime RBBI engine
    24 //                   from the Unicode Sets appearing in the source  RBBI rules, and
    25 //                   creates the TRIE table used to map from Unicode to the
    26 //                   character categories.
    27 //
    28 
    29 
    30 //
    31 //  RangeDescriptor
    32 //
    33 //     Each of the non-overlapping character ranges gets one of these descriptors.
    34 //     All of them are strung together in a linked list, which is kept in order
    35 //     (by character)
    36 //
    37 class RangeDescriptor : public UMemory {
    38 public:
    39     UChar32            fStartChar;      // Start of range, unicode 32 bit value.
    40     UChar32            fEndChar;        // End of range, unicode 32 bit value.
    41     int32_t            fNum;            // runtime-mapped input value for this range.
    42     UVector           *fIncludesSets;   // vector of the the original
    43                                         //   Unicode sets that include this range.
    44                                         //    (Contains ptrs to uset nodes)
    45     RangeDescriptor   *fNext;           // Next RangeDescriptor in the linked list.
    46 
    47     RangeDescriptor(UErrorCode &status);
    48     RangeDescriptor(const RangeDescriptor &other, UErrorCode &status);
    49     ~RangeDescriptor();
    50     void split(UChar32 where, UErrorCode &status);   // Spit this range in two at "where", with
    51                                         //   where appearing in the second (higher) part.
    52     void setDictionaryFlag();           // Check whether this range appears as part of
    53                                         //   the Unicode set named "dictionary"
    54 
    55 private:
    56     RangeDescriptor(const RangeDescriptor &other); // forbid copying of this class
    57     RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying of this class
    58 };
    59 
    60 
    61 //
    62 //  RBBISetBuilder   Handles processing of Unicode Sets from RBBI rules.
    63 //
    64 //      Starting with the rules parse tree from the scanner,
    65 //
    66 //                   -  Enumerate the set of UnicodeSets that are referenced
    67 //                      by the RBBI rules.
    68 //                   -  compute a derived set of non-overlapping UnicodeSets
    69 //                      that will correspond to columns in the state table for
    70 //                      the RBBI execution engine.
    71 //                   -  construct the trie table that maps input characters
    72 //                      to set numbers in the non-overlapping set of sets.
    73 //
    74 
    75 
    76 class RBBISetBuilder : public UMemory {
    77 public:
    78     RBBISetBuilder(RBBIRuleBuilder *rb);
    79     ~RBBISetBuilder();
    80 
    81     void     build();
    82     void     addValToSets(UVector *sets,      uint32_t val);
    83     void     addValToSet (RBBINode *usetNode, uint32_t val);
    84     int32_t  getNumCharCategories() const;   // CharCategories are the same as input symbol set to the
    85                                    //    runtime state machine, which are the same as
    86                                    //    columns in the DFA state table
    87     int32_t  getTrieSize() /*const*/;        // Size in bytes of the serialized Trie.
    88     void     serializeTrie(uint8_t *where);  // write out the serialized Trie.
    89     UChar32  getFirstChar(int32_t  val) const;
    90 #ifdef RBBI_DEBUG
    91     void     printSets();
    92     void     printRanges();
    93     void     printRangeGroups();
    94 #else
    95     #define printSets()
    96     #define printRanges()
    97     #define printRangeGroups()
    98 #endif
    99 
   100 private:
   101     void           numberSets();
   102 
   103     RBBIRuleBuilder       *fRB;             // The RBBI Rule Compiler that owns us.
   104     UErrorCode            *fStatus;
   105 
   106     RangeDescriptor       *fRangeList;      // Head of the linked list of RangeDescriptors
   107 
   108     UNewTrie              *fTrie;           // The mapping TRIE that is the end result of processing
   109     uint32_t              fTrieSize;        //  the Unicode Sets.
   110 
   111     // Groups correspond to character categories -
   112     //       groups of ranges that are in the same original UnicodeSets.
   113     //       fGroupCount is the index of the last used group.
   114     //       fGroupCount+1 is also the number of columns in the RBBI state table being compiled.
   115     //       State table column 0 is not used.  Column 1 is for end-of-input.
   116     //       column 2 is for group 0.  Funny counting.
   117     int32_t               fGroupCount;
   118 
   119     RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class
   120     RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class
   121 };
   122 
   123 
   124 
   125 U_NAMESPACE_END
   126 #endif