sl@0
|
1 |
/*
|
sl@0
|
2 |
******************************************************************************
|
sl@0
|
3 |
*
|
sl@0
|
4 |
* Copyright (C) 1999-2005, International Business Machines
|
sl@0
|
5 |
* Corporation and others. All Rights Reserved.
|
sl@0
|
6 |
*
|
sl@0
|
7 |
******************************************************************************
|
sl@0
|
8 |
* file name: ubidiimp.h
|
sl@0
|
9 |
* encoding: US-ASCII
|
sl@0
|
10 |
* tab size: 8 (not used)
|
sl@0
|
11 |
* indentation:4
|
sl@0
|
12 |
*
|
sl@0
|
13 |
* created on: 1999aug06
|
sl@0
|
14 |
* created by: Markus W. Scherer
|
sl@0
|
15 |
*/
|
sl@0
|
16 |
|
sl@0
|
17 |
#ifndef UBIDIIMP_H
|
sl@0
|
18 |
#define UBIDIIMP_H
|
sl@0
|
19 |
|
sl@0
|
20 |
/* set import/export definitions */
|
sl@0
|
21 |
#ifdef U_COMMON_IMPLEMENTATION
|
sl@0
|
22 |
|
sl@0
|
23 |
#include "unicode/utypes.h"
|
sl@0
|
24 |
#include "unicode/uchar.h"
|
sl@0
|
25 |
#include "ubidi_props.h"
|
sl@0
|
26 |
|
sl@0
|
27 |
/* miscellaneous definitions ---------------------------------------------- */
|
sl@0
|
28 |
|
sl@0
|
29 |
typedef uint8_t DirProp;
|
sl@0
|
30 |
typedef uint32_t Flags;
|
sl@0
|
31 |
|
sl@0
|
32 |
/* Comparing the description of the BiDi algorithm with this implementation
|
sl@0
|
33 |
is easier with the same names for the BiDi types in the code as there.
|
sl@0
|
34 |
See UCharDirection in uchar.h .
|
sl@0
|
35 |
*/
|
sl@0
|
36 |
enum {
|
sl@0
|
37 |
L= U_LEFT_TO_RIGHT,
|
sl@0
|
38 |
R= U_RIGHT_TO_LEFT,
|
sl@0
|
39 |
EN= U_EUROPEAN_NUMBER,
|
sl@0
|
40 |
ES= U_EUROPEAN_NUMBER_SEPARATOR,
|
sl@0
|
41 |
ET= U_EUROPEAN_NUMBER_TERMINATOR,
|
sl@0
|
42 |
AN= U_ARABIC_NUMBER,
|
sl@0
|
43 |
CS= U_COMMON_NUMBER_SEPARATOR,
|
sl@0
|
44 |
B= U_BLOCK_SEPARATOR,
|
sl@0
|
45 |
S= U_SEGMENT_SEPARATOR,
|
sl@0
|
46 |
WS= U_WHITE_SPACE_NEUTRAL,
|
sl@0
|
47 |
ON= U_OTHER_NEUTRAL,
|
sl@0
|
48 |
LRE=U_LEFT_TO_RIGHT_EMBEDDING,
|
sl@0
|
49 |
LRO=U_LEFT_TO_RIGHT_OVERRIDE,
|
sl@0
|
50 |
AL= U_RIGHT_TO_LEFT_ARABIC,
|
sl@0
|
51 |
RLE=U_RIGHT_TO_LEFT_EMBEDDING,
|
sl@0
|
52 |
RLO=U_RIGHT_TO_LEFT_OVERRIDE,
|
sl@0
|
53 |
PDF=U_POP_DIRECTIONAL_FORMAT,
|
sl@0
|
54 |
NSM=U_DIR_NON_SPACING_MARK,
|
sl@0
|
55 |
BN= U_BOUNDARY_NEUTRAL,
|
sl@0
|
56 |
dirPropCount
|
sl@0
|
57 |
};
|
sl@0
|
58 |
|
sl@0
|
59 |
/*
|
sl@0
|
60 |
* Sometimes, bit values are more appropriate
|
sl@0
|
61 |
* to deal with directionality properties.
|
sl@0
|
62 |
* Abbreviations in these macro names refer to names
|
sl@0
|
63 |
* used in the BiDi algorithm.
|
sl@0
|
64 |
*/
|
sl@0
|
65 |
#define DIRPROP_FLAG(dir) (1UL<<(dir))
|
sl@0
|
66 |
|
sl@0
|
67 |
/* special flag for multiple runs from explicit embedding codes */
|
sl@0
|
68 |
#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
|
sl@0
|
69 |
|
sl@0
|
70 |
/* are there any characters that are LTR or RTL? */
|
sl@0
|
71 |
#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
|
sl@0
|
72 |
#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
|
sl@0
|
73 |
|
sl@0
|
74 |
/* explicit embedding codes */
|
sl@0
|
75 |
#define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
|
sl@0
|
76 |
#define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
|
sl@0
|
77 |
#define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO))
|
sl@0
|
78 |
|
sl@0
|
79 |
#define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF))
|
sl@0
|
80 |
#define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
|
sl@0
|
81 |
|
sl@0
|
82 |
/* paragraph and segment separators */
|
sl@0
|
83 |
#define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
|
sl@0
|
84 |
|
sl@0
|
85 |
/* all types that are counted as White Space or Neutral in some steps */
|
sl@0
|
86 |
#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT)
|
sl@0
|
87 |
#define MASK_N (DIRPROP_FLAG(ON)|MASK_WS)
|
sl@0
|
88 |
|
sl@0
|
89 |
/* all types that are included in a sequence of European Terminators for (W5) */
|
sl@0
|
90 |
#define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT)
|
sl@0
|
91 |
|
sl@0
|
92 |
/* types that are neutrals or could becomes neutrals in (Wn) */
|
sl@0
|
93 |
#define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N)
|
sl@0
|
94 |
|
sl@0
|
95 |
/*
|
sl@0
|
96 |
* These types may be changed to "e",
|
sl@0
|
97 |
* the embedding type (L or R) of the run,
|
sl@0
|
98 |
* in the BiDi algorithm (N2)
|
sl@0
|
99 |
*/
|
sl@0
|
100 |
#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
|
sl@0
|
101 |
|
sl@0
|
102 |
/* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */
|
sl@0
|
103 |
#define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
|
sl@0
|
104 |
|
sl@0
|
105 |
#define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
|
sl@0
|
106 |
|
sl@0
|
107 |
/*
|
sl@0
|
108 |
* The following bit is ORed to the property of characters in paragraphs
|
sl@0
|
109 |
* with contextual RTL direction when paraLevel is contextual.
|
sl@0
|
110 |
*/
|
sl@0
|
111 |
#define CONTEXT_RTL 0x80
|
sl@0
|
112 |
#define NO_CONTEXT_RTL(dir) ((dir)&~CONTEXT_RTL)
|
sl@0
|
113 |
/*
|
sl@0
|
114 |
* The following is a variant of DIRPROP_FLAG which ignores the CONTEXT_RTL bit.
|
sl@0
|
115 |
*/
|
sl@0
|
116 |
#define DIRPROP_FLAG_NC(dir) (1UL<<(NO_CONTEXT_RTL(dir)))
|
sl@0
|
117 |
|
sl@0
|
118 |
#define GET_PARALEVEL(ubidi, index) \
|
sl@0
|
119 |
(UBiDiLevel)((ubidi)->defaultParaLevel ? (ubidi)->dirProps[index]>>7 \
|
sl@0
|
120 |
: (ubidi)->paraLevel)
|
sl@0
|
121 |
|
sl@0
|
122 |
/* Paragraph type for multiple paragraph support --- -------------------- */
|
sl@0
|
123 |
typedef int32_t Para;
|
sl@0
|
124 |
|
sl@0
|
125 |
#define CR 0x000D
|
sl@0
|
126 |
#define LF 0x000A
|
sl@0
|
127 |
|
sl@0
|
128 |
/* Run structure for reordering --------------------------------------------- */
|
sl@0
|
129 |
|
sl@0
|
130 |
typedef struct Run {
|
sl@0
|
131 |
int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */
|
sl@0
|
132 |
visualLimit; /* last visual position of the run +1 */
|
sl@0
|
133 |
} Run;
|
sl@0
|
134 |
|
sl@0
|
135 |
/* in a Run, logicalStart will get this bit set if the run level is odd */
|
sl@0
|
136 |
#define INDEX_ODD_BIT (1UL<<31)
|
sl@0
|
137 |
|
sl@0
|
138 |
#define MAKE_INDEX_ODD_PAIR(index, level) (index|((int32_t)level<<31))
|
sl@0
|
139 |
#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)level<<31))
|
sl@0
|
140 |
#define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)
|
sl@0
|
141 |
|
sl@0
|
142 |
#define GET_INDEX(x) (x&~INDEX_ODD_BIT)
|
sl@0
|
143 |
#define GET_ODD_BIT(x) ((uint32_t)x>>31)
|
sl@0
|
144 |
#define IS_ODD_RUN(x) ((x&INDEX_ODD_BIT)!=0)
|
sl@0
|
145 |
#define IS_EVEN_RUN(x) ((x&INDEX_ODD_BIT)==0)
|
sl@0
|
146 |
|
sl@0
|
147 |
U_CFUNC UBool
|
sl@0
|
148 |
ubidi_getRuns(UBiDi *pBiDi);
|
sl@0
|
149 |
|
sl@0
|
150 |
/* UBiDi structure ----------------------------------------------------------- */
|
sl@0
|
151 |
|
sl@0
|
152 |
struct UBiDi {
|
sl@0
|
153 |
/* pointer to parent paragraph object (pointer to self if this object is
|
sl@0
|
154 |
* a paragraph object); set to NULL in a newly opened object; set to a
|
sl@0
|
155 |
* real value after a successful execution of ubidi_setPara or ubidi_setLine
|
sl@0
|
156 |
*/
|
sl@0
|
157 |
const UBiDi * pParaBiDi;
|
sl@0
|
158 |
|
sl@0
|
159 |
const UBiDiProps *bdp;
|
sl@0
|
160 |
|
sl@0
|
161 |
/* alias pointer to the current text */
|
sl@0
|
162 |
const UChar *text;
|
sl@0
|
163 |
|
sl@0
|
164 |
/* length of the current text */
|
sl@0
|
165 |
int32_t length;
|
sl@0
|
166 |
|
sl@0
|
167 |
/* memory sizes in bytes */
|
sl@0
|
168 |
int32_t dirPropsSize, levelsSize, parasSize, runsSize;
|
sl@0
|
169 |
|
sl@0
|
170 |
/* allocated memory */
|
sl@0
|
171 |
DirProp *dirPropsMemory;
|
sl@0
|
172 |
UBiDiLevel *levelsMemory;
|
sl@0
|
173 |
Para *parasMemory;
|
sl@0
|
174 |
Run *runsMemory;
|
sl@0
|
175 |
|
sl@0
|
176 |
/* indicators for whether memory may be allocated after ubidi_open() */
|
sl@0
|
177 |
UBool mayAllocateText, mayAllocateRuns;
|
sl@0
|
178 |
|
sl@0
|
179 |
/* arrays with one value per text-character */
|
sl@0
|
180 |
const DirProp *dirProps;
|
sl@0
|
181 |
UBiDiLevel *levels;
|
sl@0
|
182 |
|
sl@0
|
183 |
/* are we performing an approximation of the "inverse BiDi" algorithm? */
|
sl@0
|
184 |
UBool isInverse;
|
sl@0
|
185 |
UBool isInverse2;
|
sl@0
|
186 |
|
sl@0
|
187 |
/* must block separators receive level 0? */
|
sl@0
|
188 |
UBool orderParagraphsLTR;
|
sl@0
|
189 |
|
sl@0
|
190 |
/* the paragraph level */
|
sl@0
|
191 |
UBiDiLevel paraLevel;
|
sl@0
|
192 |
/* original paraLevel when contextual */
|
sl@0
|
193 |
/* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */
|
sl@0
|
194 |
UBiDiLevel defaultParaLevel;
|
sl@0
|
195 |
|
sl@0
|
196 |
/* the following is set in ubidi_setPara, used in processPropertySeq */
|
sl@0
|
197 |
const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */
|
sl@0
|
198 |
|
sl@0
|
199 |
/* the overall paragraph or line directionality - see UBiDiDirection */
|
sl@0
|
200 |
UBiDiDirection direction;
|
sl@0
|
201 |
|
sl@0
|
202 |
/* flags is a bit set for which directional properties are in the text */
|
sl@0
|
203 |
Flags flags;
|
sl@0
|
204 |
|
sl@0
|
205 |
/* characters after trailingWSStart are WS and are */
|
sl@0
|
206 |
/* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
|
sl@0
|
207 |
int32_t trailingWSStart;
|
sl@0
|
208 |
|
sl@0
|
209 |
/* fields for paragraph handling */
|
sl@0
|
210 |
int32_t paraCount; /* set in getDirProps() */
|
sl@0
|
211 |
Para *paras; /* limits of paragraphs, filled in
|
sl@0
|
212 |
ResolveExplicitLevels() or CheckExplicitLevels() */
|
sl@0
|
213 |
|
sl@0
|
214 |
/* for single paragraph text, we only need a tiny array of paras (no malloc()) */
|
sl@0
|
215 |
Para simpleParas[1];
|
sl@0
|
216 |
|
sl@0
|
217 |
/* fields for line reordering */
|
sl@0
|
218 |
int32_t runCount; /* ==-1: runs not set up yet */
|
sl@0
|
219 |
Run *runs;
|
sl@0
|
220 |
|
sl@0
|
221 |
/* for non-mixed text, we only need a tiny array of runs (no malloc()) */
|
sl@0
|
222 |
Run simpleRuns[1];
|
sl@0
|
223 |
};
|
sl@0
|
224 |
|
sl@0
|
225 |
#define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x)))
|
sl@0
|
226 |
#define IS_VALID_LINE(x) ((x) && ((x)->pParaBiDi) && ((x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi))
|
sl@0
|
227 |
#define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)))
|
sl@0
|
228 |
|
sl@0
|
229 |
/* helper function to (re)allocate memory if allowed */
|
sl@0
|
230 |
U_CFUNC UBool
|
sl@0
|
231 |
ubidi_getMemory(void **pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded);
|
sl@0
|
232 |
|
sl@0
|
233 |
/* helper macros for each allocated array in UBiDi */
|
sl@0
|
234 |
#define getDirPropsMemory(pBiDi, length) \
|
sl@0
|
235 |
ubidi_getMemory((void **)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
|
sl@0
|
236 |
(pBiDi)->mayAllocateText, (length))
|
sl@0
|
237 |
|
sl@0
|
238 |
#define getLevelsMemory(pBiDi, length) \
|
sl@0
|
239 |
ubidi_getMemory((void **)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
|
sl@0
|
240 |
(pBiDi)->mayAllocateText, (length))
|
sl@0
|
241 |
|
sl@0
|
242 |
#define getRunsMemory(pBiDi, length) \
|
sl@0
|
243 |
ubidi_getMemory((void **)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
|
sl@0
|
244 |
(pBiDi)->mayAllocateRuns, (length)*sizeof(Run))
|
sl@0
|
245 |
|
sl@0
|
246 |
/* additional macros used by ubidi_open() - always allow allocation */
|
sl@0
|
247 |
#define getInitialDirPropsMemory(pBiDi, length) \
|
sl@0
|
248 |
ubidi_getMemory((void **)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
|
sl@0
|
249 |
TRUE, (length))
|
sl@0
|
250 |
|
sl@0
|
251 |
#define getInitialLevelsMemory(pBiDi, length) \
|
sl@0
|
252 |
ubidi_getMemory((void **)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
|
sl@0
|
253 |
TRUE, (length))
|
sl@0
|
254 |
|
sl@0
|
255 |
#define getInitialParasMemory(pBiDi, length) \
|
sl@0
|
256 |
ubidi_getMemory((void **)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \
|
sl@0
|
257 |
TRUE, (length)*sizeof(Para))
|
sl@0
|
258 |
|
sl@0
|
259 |
#define getInitialRunsMemory(pBiDi, length) \
|
sl@0
|
260 |
ubidi_getMemory((void **)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
|
sl@0
|
261 |
TRUE, (length)*sizeof(Run))
|
sl@0
|
262 |
|
sl@0
|
263 |
#endif
|
sl@0
|
264 |
|
sl@0
|
265 |
#endif
|