| sl@0 |      1 | /*
 | 
| sl@0 |      2 | *******************************************************************************
 | 
| sl@0 |      3 | *
 | 
| sl@0 |      4 | *   Copyright (C) 1999-2005, International Business Machines
 | 
| sl@0 |      5 | *   Corporation and others.  All Rights Reserved.
 | 
| sl@0 |      6 | *
 | 
| sl@0 |      7 | *******************************************************************************
 | 
| sl@0 |      8 | *   file name:  rbbidata.h
 | 
| sl@0 |      9 | *   encoding:   US-ASCII
 | 
| sl@0 |     10 | *   tab size:   8 (not used)
 | 
| sl@0 |     11 | *   indentation:4
 | 
| sl@0 |     12 | *
 | 
| sl@0 |     13 | *   RBBI data formats  Includes
 | 
| sl@0 |     14 | *
 | 
| sl@0 |     15 | *                          Structs that describes the format of the Binary RBBI data,
 | 
| sl@0 |     16 | *                          as it is stored in ICU's data file.
 | 
| sl@0 |     17 | *
 | 
| sl@0 |     18 | *      RBBIDataWrapper  -  Instances of this class sit between the
 | 
| sl@0 |     19 | *                          raw data structs and the RulesBasedBreakIterator objects
 | 
| sl@0 |     20 | *                          that are created by applications.  The wrapper class
 | 
| sl@0 |     21 | *                          provides reference counting for the underlying data,
 | 
| sl@0 |     22 | *                          and direct pointers to data that would not otherwise
 | 
| sl@0 |     23 | *                          be accessible without ugly pointer arithmetic.  The
 | 
| sl@0 |     24 | *                          wrapper does not attempt to provide any higher level
 | 
| sl@0 |     25 | *                          abstractions for the data itself.
 | 
| sl@0 |     26 | *
 | 
| sl@0 |     27 | *                          There will be only one instance of RBBIDataWrapper for any
 | 
| sl@0 |     28 | *                          set of RBBI run time data being shared by instances
 | 
| sl@0 |     29 | *                          (clones) of RulesBasedBreakIterator.
 | 
| sl@0 |     30 | */
 | 
| sl@0 |     31 | 
 | 
| sl@0 |     32 | #ifndef __RBBIDATA_H__
 | 
| sl@0 |     33 | #define __RBBIDATA_H__
 | 
| sl@0 |     34 | 
 | 
| sl@0 |     35 | #include "unicode/utypes.h"
 | 
| sl@0 |     36 | #include "unicode/udata.h"
 | 
| sl@0 |     37 | #include "udataswp.h"
 | 
| sl@0 |     38 | 
 | 
| sl@0 |     39 | /**
 | 
| sl@0 |     40 |  * Swap RBBI data. See udataswp.h.
 | 
| sl@0 |     41 |  * @internal
 | 
| sl@0 |     42 |  */
 | 
| sl@0 |     43 | U_CAPI int32_t U_EXPORT2
 | 
| sl@0 |     44 | ubrk_swap(const UDataSwapper *ds,
 | 
| sl@0 |     45 |           const void *inData, int32_t length, void *outData,
 | 
| sl@0 |     46 |           UErrorCode *pErrorCode);
 | 
| sl@0 |     47 | 
 | 
| sl@0 |     48 | #ifdef XP_CPLUSPLUS
 | 
| sl@0 |     49 | 
 | 
| sl@0 |     50 | #include "unicode/uobject.h"
 | 
| sl@0 |     51 | #include "unicode/unistr.h"
 | 
| sl@0 |     52 | #include "utrie.h"
 | 
| sl@0 |     53 | 
 | 
| sl@0 |     54 | U_NAMESPACE_BEGIN
 | 
| sl@0 |     55 | 
 | 
| sl@0 |     56 | /*  
 | 
| sl@0 |     57 |  *   The following structs map exactly onto the raw data from ICU common data file. 
 | 
| sl@0 |     58 |  */
 | 
| sl@0 |     59 | struct RBBIDataHeader {
 | 
| sl@0 |     60 |     uint32_t         fMagic;           /*  == 0xbla0                                               */
 | 
| sl@0 |     61 |     uint8_t          fFormatVersion[4]; /* Data Format.  Same as the value in struct UDataInfo      */
 | 
| sl@0 |     62 |                                        /*   if there is one associated with this data.             */
 | 
| sl@0 |     63 |                                        /*     (version originates in rbbi, is copied to UDataInfo) */
 | 
| sl@0 |     64 |                                        /*   For ICU 3.2 and earlier, this field was                */
 | 
| sl@0 |     65 |                                        /*       uint32_t  fVersion                                 */
 | 
| sl@0 |     66 |                                        /*   with a value of 1.                                     */
 | 
| sl@0 |     67 |     uint32_t         fLength;          /*  Total length in bytes of this RBBI Data,                */
 | 
| sl@0 |     68 |                                        /*      including all sections, not just the header.        */
 | 
| sl@0 |     69 |     uint32_t         fCatCount;        /*  Number of character categories.                         */
 | 
| sl@0 |     70 | 
 | 
| sl@0 |     71 |     /*                                                                        */
 | 
| sl@0 |     72 |     /*  Offsets and sizes of each of the subsections within the RBBI data.    */
 | 
| sl@0 |     73 |     /*  All offsets are bytes from the start of the RBBIDataHeader.           */
 | 
| sl@0 |     74 |     /*  All sizes are in bytes.                                               */
 | 
| sl@0 |     75 |     /*                                                                        */
 | 
| sl@0 |     76 |     uint32_t         fFTable;         /*  forward state transition table. */
 | 
| sl@0 |     77 |     uint32_t         fFTableLen;
 | 
| sl@0 |     78 |     uint32_t         fRTable;         /*  Offset to the reverse state transition table. */
 | 
| sl@0 |     79 |     uint32_t         fRTableLen;
 | 
| sl@0 |     80 |     uint32_t         fSFTable;        /*  safe point forward transition table */
 | 
| sl@0 |     81 |     uint32_t         fSFTableLen;
 | 
| sl@0 |     82 |     uint32_t         fSRTable;        /*  safe point reverse transition table */
 | 
| sl@0 |     83 |     uint32_t         fSRTableLen;
 | 
| sl@0 |     84 |     uint32_t         fTrie;           /*  Offset to Trie data for character categories */
 | 
| sl@0 |     85 |     uint32_t         fTrieLen;
 | 
| sl@0 |     86 |     uint32_t         fRuleSource;     /*  Offset to the source for for the break */
 | 
| sl@0 |     87 |     uint32_t         fRuleSourceLen;  /*    rules.  Stored UChar *. */
 | 
| sl@0 |     88 |     uint32_t         fStatusTable;    /* Offset to the table of rule status values */
 | 
| sl@0 |     89 |     uint32_t         fStatusTableLen;
 | 
| sl@0 |     90 | 
 | 
| sl@0 |     91 |     uint32_t         fReserved[6];    /*  Reserved for expansion */
 | 
| sl@0 |     92 | 
 | 
| sl@0 |     93 | };
 | 
| sl@0 |     94 | 
 | 
| sl@0 |     95 | 
 | 
| sl@0 |     96 | 
 | 
| sl@0 |     97 | struct  RBBIStateTableRow {
 | 
| sl@0 |     98 |     int16_t          fAccepting;    /*  Non-zero if this row is for an accepting state.   */
 | 
| sl@0 |     99 |                                     /*  Value 0: not an accepting state.                  */
 | 
| sl@0 |    100 |                                     /*       -1: Unconditional Accepting state.           */
 | 
| sl@0 |    101 |                                     /*    positive:  Look-ahead match has completed.      */
 | 
| sl@0 |    102 |                                     /*           Actual boundary position happened earlier */
 | 
| sl@0 |    103 |                                     /*           Value here == fLookAhead in earlier      */
 | 
| sl@0 |    104 |                                     /*              state, at actual boundary pos.        */
 | 
| sl@0 |    105 |     int16_t          fLookAhead;    /*  Non-zero if this row is for a state that          */
 | 
| sl@0 |    106 |                                     /*    corresponds to a '/' in the rule source.        */
 | 
| sl@0 |    107 |                                     /*    Value is the same as the fAccepting             */
 | 
| sl@0 |    108 |                                     /*      value for the rule (which will appear         */
 | 
| sl@0 |    109 |                                     /*      in a different state.                         */
 | 
| sl@0 |    110 |     int16_t          fTagIdx;       /*  Non-zero if this row covers a {tagged} position   */
 | 
| sl@0 |    111 |                                     /*     from a rule.  Value is the index in the        */
 | 
| sl@0 |    112 |                                     /*     StatusTable of the set of matching             */
 | 
| sl@0 |    113 |                                     /*     tags (rule status values)                      */
 | 
| sl@0 |    114 |     int16_t          fReserved;
 | 
| sl@0 |    115 |     uint16_t         fNextState[2]; /*  Next State, indexed by char category.             */
 | 
| sl@0 |    116 |                                     /*    Array Size is fNumCols from the                 */
 | 
| sl@0 |    117 |                                     /*    state table header.                             */
 | 
| sl@0 |    118 |                                     /*    CAUTION:  see RBBITableBuilder::getTableSize()  */
 | 
| sl@0 |    119 |                                     /*              before changing anything here.        */
 | 
| sl@0 |    120 | };
 | 
| sl@0 |    121 | 
 | 
| sl@0 |    122 | 
 | 
| sl@0 |    123 | struct RBBIStateTable {
 | 
| sl@0 |    124 |     uint32_t         fNumStates;    /*  Number of states.                                 */
 | 
| sl@0 |    125 |     uint32_t         fRowLen;       /*  Length of a state table row, in bytes.            */
 | 
| sl@0 |    126 |     uint32_t         fFlags;        /*  Option Flags for this state table                 */
 | 
| sl@0 |    127 |     uint32_t         fReserved;     /*  reserved                                          */
 | 
| sl@0 |    128 |     char             fTableData[4]; /*  First RBBIStateTableRow begins here.              */
 | 
| sl@0 |    129 |                                     /*    (making it char[] simplifies ugly address       */
 | 
| sl@0 |    130 |                                     /*     arithmetic for indexing variable length rows.) */
 | 
| sl@0 |    131 | };
 | 
| sl@0 |    132 | 
 | 
| sl@0 |    133 | typedef enum {
 | 
| sl@0 |    134 |     RBBI_LOOKAHEAD_HARD_BREAK = 1
 | 
| sl@0 |    135 | } RBBIStateTableFlags;
 | 
| sl@0 |    136 | 
 | 
| sl@0 |    137 | 
 | 
| sl@0 |    138 | /*                                        */
 | 
| sl@0 |    139 | /*   The reference counting wrapper class */
 | 
| sl@0 |    140 | /*                                        */
 | 
| sl@0 |    141 | class RBBIDataWrapper : public UMemory {
 | 
| sl@0 |    142 | public:
 | 
| sl@0 |    143 |     RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
 | 
| sl@0 |    144 |     RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
 | 
| sl@0 |    145 |     ~RBBIDataWrapper();
 | 
| sl@0 |    146 | 
 | 
| sl@0 |    147 |     void                  init(const RBBIDataHeader *data, UErrorCode &status);
 | 
| sl@0 |    148 |     RBBIDataWrapper      *addReference();
 | 
| sl@0 |    149 |     void                  removeReference();
 | 
| sl@0 |    150 |     UBool                 operator ==(const RBBIDataWrapper &other) const;
 | 
| sl@0 |    151 |     int32_t               hashCode();
 | 
| sl@0 |    152 |     const UnicodeString  &getRuleSourceString() const;
 | 
| sl@0 |    153 | #ifdef RBBI_DEBUG
 | 
| sl@0 |    154 |     void                  printData();
 | 
| sl@0 |    155 |     void                  printTable(const char *heading, const RBBIStateTable *table);
 | 
| sl@0 |    156 | #else
 | 
| sl@0 |    157 |     #define printData()
 | 
| sl@0 |    158 |     #define printTable(heading, table)
 | 
| sl@0 |    159 | #endif
 | 
| sl@0 |    160 | 
 | 
| sl@0 |    161 |     /*                                     */
 | 
| sl@0 |    162 |     /*   Pointers to items within the data */
 | 
| sl@0 |    163 |     /*                                     */
 | 
| sl@0 |    164 |     const RBBIDataHeader     *fHeader;
 | 
| sl@0 |    165 |     const RBBIStateTable     *fForwardTable;
 | 
| sl@0 |    166 |     const RBBIStateTable     *fReverseTable;
 | 
| sl@0 |    167 |     const RBBIStateTable     *fSafeFwdTable;
 | 
| sl@0 |    168 |     const RBBIStateTable     *fSafeRevTable;
 | 
| sl@0 |    169 |     const UChar              *fRuleSource;
 | 
| sl@0 |    170 |     const int32_t            *fRuleStatusTable; 
 | 
| sl@0 |    171 | 
 | 
| sl@0 |    172 |     /* number of int32_t values in the rule status table.   Used to sanity check indexing */
 | 
| sl@0 |    173 |     int32_t             fStatusMaxIdx;
 | 
| sl@0 |    174 | 
 | 
| sl@0 |    175 |     UTrie               fTrie;
 | 
| sl@0 |    176 | 
 | 
| sl@0 |    177 | private:
 | 
| sl@0 |    178 |     int32_t             fRefCount;
 | 
| sl@0 |    179 |     UDataMemory        *fUDataMem;
 | 
| sl@0 |    180 |     UnicodeString       fRuleString;
 | 
| sl@0 |    181 | 
 | 
| sl@0 |    182 |     RBBIDataWrapper(const RBBIDataWrapper &other); /*  forbid copying of this class */
 | 
| sl@0 |    183 |     RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /*  forbid copying of this class */
 | 
| sl@0 |    184 | };
 | 
| sl@0 |    185 | 
 | 
| sl@0 |    186 | 
 | 
| sl@0 |    187 | 
 | 
| sl@0 |    188 | U_NAMESPACE_END
 | 
| sl@0 |    189 | 
 | 
| sl@0 |    190 | #endif /* C++ */
 | 
| sl@0 |    191 | 
 | 
| sl@0 |    192 | #endif
 |