os/textandloc/fontservices/textshaperplugin/IcuSource/common/rbbidata.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
*******************************************************************************
sl@0
     3
*
sl@0
     4
*   Copyright (C) 1999-2005, International Business Machines
sl@0
     5
*   Corporation and others.  All Rights Reserved.
sl@0
     6
*
sl@0
     7
*******************************************************************************
sl@0
     8
*   file name:  rbbidata.h
sl@0
     9
*   encoding:   US-ASCII
sl@0
    10
*   tab size:   8 (not used)
sl@0
    11
*   indentation:4
sl@0
    12
*
sl@0
    13
*   RBBI data formats  Includes
sl@0
    14
*
sl@0
    15
*                          Structs that describes the format of the Binary RBBI data,
sl@0
    16
*                          as it is stored in ICU's data file.
sl@0
    17
*
sl@0
    18
*      RBBIDataWrapper  -  Instances of this class sit between the
sl@0
    19
*                          raw data structs and the RulesBasedBreakIterator objects
sl@0
    20
*                          that are created by applications.  The wrapper class
sl@0
    21
*                          provides reference counting for the underlying data,
sl@0
    22
*                          and direct pointers to data that would not otherwise
sl@0
    23
*                          be accessible without ugly pointer arithmetic.  The
sl@0
    24
*                          wrapper does not attempt to provide any higher level
sl@0
    25
*                          abstractions for the data itself.
sl@0
    26
*
sl@0
    27
*                          There will be only one instance of RBBIDataWrapper for any
sl@0
    28
*                          set of RBBI run time data being shared by instances
sl@0
    29
*                          (clones) of RulesBasedBreakIterator.
sl@0
    30
*/
sl@0
    31
sl@0
    32
#ifndef __RBBIDATA_H__
sl@0
    33
#define __RBBIDATA_H__
sl@0
    34
sl@0
    35
#include "unicode/utypes.h"
sl@0
    36
#include "unicode/udata.h"
sl@0
    37
#include "udataswp.h"
sl@0
    38
sl@0
    39
/**
sl@0
    40
 * Swap RBBI data. See udataswp.h.
sl@0
    41
 * @internal
sl@0
    42
 */
sl@0
    43
U_CAPI int32_t U_EXPORT2
sl@0
    44
ubrk_swap(const UDataSwapper *ds,
sl@0
    45
          const void *inData, int32_t length, void *outData,
sl@0
    46
          UErrorCode *pErrorCode);
sl@0
    47
sl@0
    48
#ifdef XP_CPLUSPLUS
sl@0
    49
sl@0
    50
#include "unicode/uobject.h"
sl@0
    51
#include "unicode/unistr.h"
sl@0
    52
#include "utrie.h"
sl@0
    53
sl@0
    54
U_NAMESPACE_BEGIN
sl@0
    55
sl@0
    56
/*  
sl@0
    57
 *   The following structs map exactly onto the raw data from ICU common data file. 
sl@0
    58
 */
sl@0
    59
struct RBBIDataHeader {
sl@0
    60
    uint32_t         fMagic;           /*  == 0xbla0                                               */
sl@0
    61
    uint8_t          fFormatVersion[4]; /* Data Format.  Same as the value in struct UDataInfo      */
sl@0
    62
                                       /*   if there is one associated with this data.             */
sl@0
    63
                                       /*     (version originates in rbbi, is copied to UDataInfo) */
sl@0
    64
                                       /*   For ICU 3.2 and earlier, this field was                */
sl@0
    65
                                       /*       uint32_t  fVersion                                 */
sl@0
    66
                                       /*   with a value of 1.                                     */
sl@0
    67
    uint32_t         fLength;          /*  Total length in bytes of this RBBI Data,                */
sl@0
    68
                                       /*      including all sections, not just the header.        */
sl@0
    69
    uint32_t         fCatCount;        /*  Number of character categories.                         */
sl@0
    70
sl@0
    71
    /*                                                                        */
sl@0
    72
    /*  Offsets and sizes of each of the subsections within the RBBI data.    */
sl@0
    73
    /*  All offsets are bytes from the start of the RBBIDataHeader.           */
sl@0
    74
    /*  All sizes are in bytes.                                               */
sl@0
    75
    /*                                                                        */
sl@0
    76
    uint32_t         fFTable;         /*  forward state transition table. */
sl@0
    77
    uint32_t         fFTableLen;
sl@0
    78
    uint32_t         fRTable;         /*  Offset to the reverse state transition table. */
sl@0
    79
    uint32_t         fRTableLen;
sl@0
    80
    uint32_t         fSFTable;        /*  safe point forward transition table */
sl@0
    81
    uint32_t         fSFTableLen;
sl@0
    82
    uint32_t         fSRTable;        /*  safe point reverse transition table */
sl@0
    83
    uint32_t         fSRTableLen;
sl@0
    84
    uint32_t         fTrie;           /*  Offset to Trie data for character categories */
sl@0
    85
    uint32_t         fTrieLen;
sl@0
    86
    uint32_t         fRuleSource;     /*  Offset to the source for for the break */
sl@0
    87
    uint32_t         fRuleSourceLen;  /*    rules.  Stored UChar *. */
sl@0
    88
    uint32_t         fStatusTable;    /* Offset to the table of rule status values */
sl@0
    89
    uint32_t         fStatusTableLen;
sl@0
    90
sl@0
    91
    uint32_t         fReserved[6];    /*  Reserved for expansion */
sl@0
    92
sl@0
    93
};
sl@0
    94
sl@0
    95
sl@0
    96
sl@0
    97
struct  RBBIStateTableRow {
sl@0
    98
    int16_t          fAccepting;    /*  Non-zero if this row is for an accepting state.   */
sl@0
    99
                                    /*  Value 0: not an accepting state.                  */
sl@0
   100
                                    /*       -1: Unconditional Accepting state.           */
sl@0
   101
                                    /*    positive:  Look-ahead match has completed.      */
sl@0
   102
                                    /*           Actual boundary position happened earlier */
sl@0
   103
                                    /*           Value here == fLookAhead in earlier      */
sl@0
   104
                                    /*              state, at actual boundary pos.        */
sl@0
   105
    int16_t          fLookAhead;    /*  Non-zero if this row is for a state that          */
sl@0
   106
                                    /*    corresponds to a '/' in the rule source.        */
sl@0
   107
                                    /*    Value is the same as the fAccepting             */
sl@0
   108
                                    /*      value for the rule (which will appear         */
sl@0
   109
                                    /*      in a different state.                         */
sl@0
   110
    int16_t          fTagIdx;       /*  Non-zero if this row covers a {tagged} position   */
sl@0
   111
                                    /*     from a rule.  Value is the index in the        */
sl@0
   112
                                    /*     StatusTable of the set of matching             */
sl@0
   113
                                    /*     tags (rule status values)                      */
sl@0
   114
    int16_t          fReserved;
sl@0
   115
    uint16_t         fNextState[2]; /*  Next State, indexed by char category.             */
sl@0
   116
                                    /*    Array Size is fNumCols from the                 */
sl@0
   117
                                    /*    state table header.                             */
sl@0
   118
                                    /*    CAUTION:  see RBBITableBuilder::getTableSize()  */
sl@0
   119
                                    /*              before changing anything here.        */
sl@0
   120
};
sl@0
   121
sl@0
   122
sl@0
   123
struct RBBIStateTable {
sl@0
   124
    uint32_t         fNumStates;    /*  Number of states.                                 */
sl@0
   125
    uint32_t         fRowLen;       /*  Length of a state table row, in bytes.            */
sl@0
   126
    uint32_t         fFlags;        /*  Option Flags for this state table                 */
sl@0
   127
    uint32_t         fReserved;     /*  reserved                                          */
sl@0
   128
    char             fTableData[4]; /*  First RBBIStateTableRow begins here.              */
sl@0
   129
                                    /*    (making it char[] simplifies ugly address       */
sl@0
   130
                                    /*     arithmetic for indexing variable length rows.) */
sl@0
   131
};
sl@0
   132
sl@0
   133
typedef enum {
sl@0
   134
    RBBI_LOOKAHEAD_HARD_BREAK = 1
sl@0
   135
} RBBIStateTableFlags;
sl@0
   136
sl@0
   137
sl@0
   138
/*                                        */
sl@0
   139
/*   The reference counting wrapper class */
sl@0
   140
/*                                        */
sl@0
   141
class RBBIDataWrapper : public UMemory {
sl@0
   142
public:
sl@0
   143
    RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
sl@0
   144
    RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
sl@0
   145
    ~RBBIDataWrapper();
sl@0
   146
sl@0
   147
    void                  init(const RBBIDataHeader *data, UErrorCode &status);
sl@0
   148
    RBBIDataWrapper      *addReference();
sl@0
   149
    void                  removeReference();
sl@0
   150
    UBool                 operator ==(const RBBIDataWrapper &other) const;
sl@0
   151
    int32_t               hashCode();
sl@0
   152
    const UnicodeString  &getRuleSourceString() const;
sl@0
   153
#ifdef RBBI_DEBUG
sl@0
   154
    void                  printData();
sl@0
   155
    void                  printTable(const char *heading, const RBBIStateTable *table);
sl@0
   156
#else
sl@0
   157
    #define printData()
sl@0
   158
    #define printTable(heading, table)
sl@0
   159
#endif
sl@0
   160
sl@0
   161
    /*                                     */
sl@0
   162
    /*   Pointers to items within the data */
sl@0
   163
    /*                                     */
sl@0
   164
    const RBBIDataHeader     *fHeader;
sl@0
   165
    const RBBIStateTable     *fForwardTable;
sl@0
   166
    const RBBIStateTable     *fReverseTable;
sl@0
   167
    const RBBIStateTable     *fSafeFwdTable;
sl@0
   168
    const RBBIStateTable     *fSafeRevTable;
sl@0
   169
    const UChar              *fRuleSource;
sl@0
   170
    const int32_t            *fRuleStatusTable; 
sl@0
   171
sl@0
   172
    /* number of int32_t values in the rule status table.   Used to sanity check indexing */
sl@0
   173
    int32_t             fStatusMaxIdx;
sl@0
   174
sl@0
   175
    UTrie               fTrie;
sl@0
   176
sl@0
   177
private:
sl@0
   178
    int32_t             fRefCount;
sl@0
   179
    UDataMemory        *fUDataMem;
sl@0
   180
    UnicodeString       fRuleString;
sl@0
   181
sl@0
   182
    RBBIDataWrapper(const RBBIDataWrapper &other); /*  forbid copying of this class */
sl@0
   183
    RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /*  forbid copying of this class */
sl@0
   184
};
sl@0
   185
sl@0
   186
sl@0
   187
sl@0
   188
U_NAMESPACE_END
sl@0
   189
sl@0
   190
#endif /* C++ */
sl@0
   191
sl@0
   192
#endif