sl@0
|
1 |
/*
|
sl@0
|
2 |
*******************************************************************************
|
sl@0
|
3 |
*
|
sl@0
|
4 |
* Copyright (C) 1999-2005, International Business Machines
|
sl@0
|
5 |
* Corporation and others. All Rights Reserved.
|
sl@0
|
6 |
*
|
sl@0
|
7 |
*******************************************************************************
|
sl@0
|
8 |
* file name: rbbidata.h
|
sl@0
|
9 |
* encoding: US-ASCII
|
sl@0
|
10 |
* tab size: 8 (not used)
|
sl@0
|
11 |
* indentation:4
|
sl@0
|
12 |
*
|
sl@0
|
13 |
* RBBI data formats Includes
|
sl@0
|
14 |
*
|
sl@0
|
15 |
* Structs that describes the format of the Binary RBBI data,
|
sl@0
|
16 |
* as it is stored in ICU's data file.
|
sl@0
|
17 |
*
|
sl@0
|
18 |
* RBBIDataWrapper - Instances of this class sit between the
|
sl@0
|
19 |
* raw data structs and the RulesBasedBreakIterator objects
|
sl@0
|
20 |
* that are created by applications. The wrapper class
|
sl@0
|
21 |
* provides reference counting for the underlying data,
|
sl@0
|
22 |
* and direct pointers to data that would not otherwise
|
sl@0
|
23 |
* be accessible without ugly pointer arithmetic. The
|
sl@0
|
24 |
* wrapper does not attempt to provide any higher level
|
sl@0
|
25 |
* abstractions for the data itself.
|
sl@0
|
26 |
*
|
sl@0
|
27 |
* There will be only one instance of RBBIDataWrapper for any
|
sl@0
|
28 |
* set of RBBI run time data being shared by instances
|
sl@0
|
29 |
* (clones) of RulesBasedBreakIterator.
|
sl@0
|
30 |
*/
|
sl@0
|
31 |
|
sl@0
|
32 |
#ifndef __RBBIDATA_H__
|
sl@0
|
33 |
#define __RBBIDATA_H__
|
sl@0
|
34 |
|
sl@0
|
35 |
#include "unicode/utypes.h"
|
sl@0
|
36 |
#include "unicode/udata.h"
|
sl@0
|
37 |
#include "udataswp.h"
|
sl@0
|
38 |
|
sl@0
|
39 |
/**
|
sl@0
|
40 |
* Swap RBBI data. See udataswp.h.
|
sl@0
|
41 |
* @internal
|
sl@0
|
42 |
*/
|
sl@0
|
43 |
U_CAPI int32_t U_EXPORT2
|
sl@0
|
44 |
ubrk_swap(const UDataSwapper *ds,
|
sl@0
|
45 |
const void *inData, int32_t length, void *outData,
|
sl@0
|
46 |
UErrorCode *pErrorCode);
|
sl@0
|
47 |
|
sl@0
|
48 |
#ifdef XP_CPLUSPLUS
|
sl@0
|
49 |
|
sl@0
|
50 |
#include "unicode/uobject.h"
|
sl@0
|
51 |
#include "unicode/unistr.h"
|
sl@0
|
52 |
#include "utrie.h"
|
sl@0
|
53 |
|
sl@0
|
54 |
U_NAMESPACE_BEGIN
|
sl@0
|
55 |
|
sl@0
|
56 |
/*
|
sl@0
|
57 |
* The following structs map exactly onto the raw data from ICU common data file.
|
sl@0
|
58 |
*/
|
sl@0
|
59 |
struct RBBIDataHeader {
|
sl@0
|
60 |
uint32_t fMagic; /* == 0xbla0 */
|
sl@0
|
61 |
uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */
|
sl@0
|
62 |
/* if there is one associated with this data. */
|
sl@0
|
63 |
/* (version originates in rbbi, is copied to UDataInfo) */
|
sl@0
|
64 |
/* For ICU 3.2 and earlier, this field was */
|
sl@0
|
65 |
/* uint32_t fVersion */
|
sl@0
|
66 |
/* with a value of 1. */
|
sl@0
|
67 |
uint32_t fLength; /* Total length in bytes of this RBBI Data, */
|
sl@0
|
68 |
/* including all sections, not just the header. */
|
sl@0
|
69 |
uint32_t fCatCount; /* Number of character categories. */
|
sl@0
|
70 |
|
sl@0
|
71 |
/* */
|
sl@0
|
72 |
/* Offsets and sizes of each of the subsections within the RBBI data. */
|
sl@0
|
73 |
/* All offsets are bytes from the start of the RBBIDataHeader. */
|
sl@0
|
74 |
/* All sizes are in bytes. */
|
sl@0
|
75 |
/* */
|
sl@0
|
76 |
uint32_t fFTable; /* forward state transition table. */
|
sl@0
|
77 |
uint32_t fFTableLen;
|
sl@0
|
78 |
uint32_t fRTable; /* Offset to the reverse state transition table. */
|
sl@0
|
79 |
uint32_t fRTableLen;
|
sl@0
|
80 |
uint32_t fSFTable; /* safe point forward transition table */
|
sl@0
|
81 |
uint32_t fSFTableLen;
|
sl@0
|
82 |
uint32_t fSRTable; /* safe point reverse transition table */
|
sl@0
|
83 |
uint32_t fSRTableLen;
|
sl@0
|
84 |
uint32_t fTrie; /* Offset to Trie data for character categories */
|
sl@0
|
85 |
uint32_t fTrieLen;
|
sl@0
|
86 |
uint32_t fRuleSource; /* Offset to the source for for the break */
|
sl@0
|
87 |
uint32_t fRuleSourceLen; /* rules. Stored UChar *. */
|
sl@0
|
88 |
uint32_t fStatusTable; /* Offset to the table of rule status values */
|
sl@0
|
89 |
uint32_t fStatusTableLen;
|
sl@0
|
90 |
|
sl@0
|
91 |
uint32_t fReserved[6]; /* Reserved for expansion */
|
sl@0
|
92 |
|
sl@0
|
93 |
};
|
sl@0
|
94 |
|
sl@0
|
95 |
|
sl@0
|
96 |
|
sl@0
|
97 |
struct RBBIStateTableRow {
|
sl@0
|
98 |
int16_t fAccepting; /* Non-zero if this row is for an accepting state. */
|
sl@0
|
99 |
/* Value 0: not an accepting state. */
|
sl@0
|
100 |
/* -1: Unconditional Accepting state. */
|
sl@0
|
101 |
/* positive: Look-ahead match has completed. */
|
sl@0
|
102 |
/* Actual boundary position happened earlier */
|
sl@0
|
103 |
/* Value here == fLookAhead in earlier */
|
sl@0
|
104 |
/* state, at actual boundary pos. */
|
sl@0
|
105 |
int16_t fLookAhead; /* Non-zero if this row is for a state that */
|
sl@0
|
106 |
/* corresponds to a '/' in the rule source. */
|
sl@0
|
107 |
/* Value is the same as the fAccepting */
|
sl@0
|
108 |
/* value for the rule (which will appear */
|
sl@0
|
109 |
/* in a different state. */
|
sl@0
|
110 |
int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */
|
sl@0
|
111 |
/* from a rule. Value is the index in the */
|
sl@0
|
112 |
/* StatusTable of the set of matching */
|
sl@0
|
113 |
/* tags (rule status values) */
|
sl@0
|
114 |
int16_t fReserved;
|
sl@0
|
115 |
uint16_t fNextState[2]; /* Next State, indexed by char category. */
|
sl@0
|
116 |
/* Array Size is fNumCols from the */
|
sl@0
|
117 |
/* state table header. */
|
sl@0
|
118 |
/* CAUTION: see RBBITableBuilder::getTableSize() */
|
sl@0
|
119 |
/* before changing anything here. */
|
sl@0
|
120 |
};
|
sl@0
|
121 |
|
sl@0
|
122 |
|
sl@0
|
123 |
struct RBBIStateTable {
|
sl@0
|
124 |
uint32_t fNumStates; /* Number of states. */
|
sl@0
|
125 |
uint32_t fRowLen; /* Length of a state table row, in bytes. */
|
sl@0
|
126 |
uint32_t fFlags; /* Option Flags for this state table */
|
sl@0
|
127 |
uint32_t fReserved; /* reserved */
|
sl@0
|
128 |
char fTableData[4]; /* First RBBIStateTableRow begins here. */
|
sl@0
|
129 |
/* (making it char[] simplifies ugly address */
|
sl@0
|
130 |
/* arithmetic for indexing variable length rows.) */
|
sl@0
|
131 |
};
|
sl@0
|
132 |
|
sl@0
|
133 |
typedef enum {
|
sl@0
|
134 |
RBBI_LOOKAHEAD_HARD_BREAK = 1
|
sl@0
|
135 |
} RBBIStateTableFlags;
|
sl@0
|
136 |
|
sl@0
|
137 |
|
sl@0
|
138 |
/* */
|
sl@0
|
139 |
/* The reference counting wrapper class */
|
sl@0
|
140 |
/* */
|
sl@0
|
141 |
class RBBIDataWrapper : public UMemory {
|
sl@0
|
142 |
public:
|
sl@0
|
143 |
RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
|
sl@0
|
144 |
RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
|
sl@0
|
145 |
~RBBIDataWrapper();
|
sl@0
|
146 |
|
sl@0
|
147 |
void init(const RBBIDataHeader *data, UErrorCode &status);
|
sl@0
|
148 |
RBBIDataWrapper *addReference();
|
sl@0
|
149 |
void removeReference();
|
sl@0
|
150 |
UBool operator ==(const RBBIDataWrapper &other) const;
|
sl@0
|
151 |
int32_t hashCode();
|
sl@0
|
152 |
const UnicodeString &getRuleSourceString() const;
|
sl@0
|
153 |
#ifdef RBBI_DEBUG
|
sl@0
|
154 |
void printData();
|
sl@0
|
155 |
void printTable(const char *heading, const RBBIStateTable *table);
|
sl@0
|
156 |
#else
|
sl@0
|
157 |
#define printData()
|
sl@0
|
158 |
#define printTable(heading, table)
|
sl@0
|
159 |
#endif
|
sl@0
|
160 |
|
sl@0
|
161 |
/* */
|
sl@0
|
162 |
/* Pointers to items within the data */
|
sl@0
|
163 |
/* */
|
sl@0
|
164 |
const RBBIDataHeader *fHeader;
|
sl@0
|
165 |
const RBBIStateTable *fForwardTable;
|
sl@0
|
166 |
const RBBIStateTable *fReverseTable;
|
sl@0
|
167 |
const RBBIStateTable *fSafeFwdTable;
|
sl@0
|
168 |
const RBBIStateTable *fSafeRevTable;
|
sl@0
|
169 |
const UChar *fRuleSource;
|
sl@0
|
170 |
const int32_t *fRuleStatusTable;
|
sl@0
|
171 |
|
sl@0
|
172 |
/* number of int32_t values in the rule status table. Used to sanity check indexing */
|
sl@0
|
173 |
int32_t fStatusMaxIdx;
|
sl@0
|
174 |
|
sl@0
|
175 |
UTrie fTrie;
|
sl@0
|
176 |
|
sl@0
|
177 |
private:
|
sl@0
|
178 |
int32_t fRefCount;
|
sl@0
|
179 |
UDataMemory *fUDataMem;
|
sl@0
|
180 |
UnicodeString fRuleString;
|
sl@0
|
181 |
|
sl@0
|
182 |
RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */
|
sl@0
|
183 |
RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */
|
sl@0
|
184 |
};
|
sl@0
|
185 |
|
sl@0
|
186 |
|
sl@0
|
187 |
|
sl@0
|
188 |
U_NAMESPACE_END
|
sl@0
|
189 |
|
sl@0
|
190 |
#endif /* C++ */
|
sl@0
|
191 |
|
sl@0
|
192 |
#endif
|