os/textandloc/fontservices/textshaperplugin/IcuSource/common/rbbisetb.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
//
sl@0
     2
//  rbbisetb.h
sl@0
     3
/*
sl@0
     4
**********************************************************************
sl@0
     5
*   Copyright (c) 2001-2005, International Business Machines
sl@0
     6
*   Corporation and others.  All Rights Reserved.
sl@0
     7
**********************************************************************
sl@0
     8
*/
sl@0
     9
sl@0
    10
#ifndef RBBISETB_H
sl@0
    11
#define RBBISETB_H
sl@0
    12
sl@0
    13
#include "unicode/utypes.h"
sl@0
    14
#include "unicode/uobject.h"
sl@0
    15
#include "rbbirb.h"
sl@0
    16
#include "uvector.h"
sl@0
    17
sl@0
    18
struct  UNewTrie;
sl@0
    19
sl@0
    20
U_NAMESPACE_BEGIN
sl@0
    21
sl@0
    22
//
sl@0
    23
//  RBBISetBuilder   Derives the character categories used by the runtime RBBI engine
sl@0
    24
//                   from the Unicode Sets appearing in the source  RBBI rules, and
sl@0
    25
//                   creates the TRIE table used to map from Unicode to the
sl@0
    26
//                   character categories.
sl@0
    27
//
sl@0
    28
sl@0
    29
sl@0
    30
//
sl@0
    31
//  RangeDescriptor
sl@0
    32
//
sl@0
    33
//     Each of the non-overlapping character ranges gets one of these descriptors.
sl@0
    34
//     All of them are strung together in a linked list, which is kept in order
sl@0
    35
//     (by character)
sl@0
    36
//
sl@0
    37
class RangeDescriptor : public UMemory {
sl@0
    38
public:
sl@0
    39
    UChar32            fStartChar;      // Start of range, unicode 32 bit value.
sl@0
    40
    UChar32            fEndChar;        // End of range, unicode 32 bit value.
sl@0
    41
    int32_t            fNum;            // runtime-mapped input value for this range.
sl@0
    42
    UVector           *fIncludesSets;   // vector of the the original
sl@0
    43
                                        //   Unicode sets that include this range.
sl@0
    44
                                        //    (Contains ptrs to uset nodes)
sl@0
    45
    RangeDescriptor   *fNext;           // Next RangeDescriptor in the linked list.
sl@0
    46
sl@0
    47
    RangeDescriptor(UErrorCode &status);
sl@0
    48
    RangeDescriptor(const RangeDescriptor &other, UErrorCode &status);
sl@0
    49
    ~RangeDescriptor();
sl@0
    50
    void split(UChar32 where, UErrorCode &status);   // Spit this range in two at "where", with
sl@0
    51
                                        //   where appearing in the second (higher) part.
sl@0
    52
    void setDictionaryFlag();           // Check whether this range appears as part of
sl@0
    53
                                        //   the Unicode set named "dictionary"
sl@0
    54
sl@0
    55
private:
sl@0
    56
    RangeDescriptor(const RangeDescriptor &other); // forbid copying of this class
sl@0
    57
    RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying of this class
sl@0
    58
};
sl@0
    59
sl@0
    60
sl@0
    61
//
sl@0
    62
//  RBBISetBuilder   Handles processing of Unicode Sets from RBBI rules.
sl@0
    63
//
sl@0
    64
//      Starting with the rules parse tree from the scanner,
sl@0
    65
//
sl@0
    66
//                   -  Enumerate the set of UnicodeSets that are referenced
sl@0
    67
//                      by the RBBI rules.
sl@0
    68
//                   -  compute a derived set of non-overlapping UnicodeSets
sl@0
    69
//                      that will correspond to columns in the state table for
sl@0
    70
//                      the RBBI execution engine.
sl@0
    71
//                   -  construct the trie table that maps input characters
sl@0
    72
//                      to set numbers in the non-overlapping set of sets.
sl@0
    73
//
sl@0
    74
sl@0
    75
sl@0
    76
class RBBISetBuilder : public UMemory {
sl@0
    77
public:
sl@0
    78
    RBBISetBuilder(RBBIRuleBuilder *rb);
sl@0
    79
    ~RBBISetBuilder();
sl@0
    80
sl@0
    81
    void     build();
sl@0
    82
    void     addValToSets(UVector *sets,      uint32_t val);
sl@0
    83
    void     addValToSet (RBBINode *usetNode, uint32_t val);
sl@0
    84
    int32_t  getNumCharCategories() const;   // CharCategories are the same as input symbol set to the
sl@0
    85
                                   //    runtime state machine, which are the same as
sl@0
    86
                                   //    columns in the DFA state table
sl@0
    87
    int32_t  getTrieSize() /*const*/;        // Size in bytes of the serialized Trie.
sl@0
    88
    void     serializeTrie(uint8_t *where);  // write out the serialized Trie.
sl@0
    89
    UChar32  getFirstChar(int32_t  val) const;
sl@0
    90
#ifdef RBBI_DEBUG
sl@0
    91
    void     printSets();
sl@0
    92
    void     printRanges();
sl@0
    93
    void     printRangeGroups();
sl@0
    94
#else
sl@0
    95
    #define printSets()
sl@0
    96
    #define printRanges()
sl@0
    97
    #define printRangeGroups()
sl@0
    98
#endif
sl@0
    99
sl@0
   100
private:
sl@0
   101
    void           numberSets();
sl@0
   102
sl@0
   103
    RBBIRuleBuilder       *fRB;             // The RBBI Rule Compiler that owns us.
sl@0
   104
    UErrorCode            *fStatus;
sl@0
   105
sl@0
   106
    RangeDescriptor       *fRangeList;      // Head of the linked list of RangeDescriptors
sl@0
   107
sl@0
   108
    UNewTrie              *fTrie;           // The mapping TRIE that is the end result of processing
sl@0
   109
    uint32_t              fTrieSize;        //  the Unicode Sets.
sl@0
   110
sl@0
   111
    // Groups correspond to character categories -
sl@0
   112
    //       groups of ranges that are in the same original UnicodeSets.
sl@0
   113
    //       fGroupCount is the index of the last used group.
sl@0
   114
    //       fGroupCount+1 is also the number of columns in the RBBI state table being compiled.
sl@0
   115
    //       State table column 0 is not used.  Column 1 is for end-of-input.
sl@0
   116
    //       column 2 is for group 0.  Funny counting.
sl@0
   117
    int32_t               fGroupCount;
sl@0
   118
sl@0
   119
    RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class
sl@0
   120
    RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class
sl@0
   121
};
sl@0
   122
sl@0
   123
sl@0
   124
sl@0
   125
U_NAMESPACE_END
sl@0
   126
#endif