sl@0: /* sl@0: ****************************************************************************** sl@0: * sl@0: * Copyright (C) 1999-2005, International Business Machines sl@0: * Corporation and others. All Rights Reserved. sl@0: * sl@0: ****************************************************************************** sl@0: * file name: ubidiimp.h sl@0: * encoding: US-ASCII sl@0: * tab size: 8 (not used) sl@0: * indentation:4 sl@0: * sl@0: * created on: 1999aug06 sl@0: * created by: Markus W. Scherer sl@0: */ sl@0: sl@0: #ifndef UBIDIIMP_H sl@0: #define UBIDIIMP_H sl@0: sl@0: /* set import/export definitions */ sl@0: #ifdef U_COMMON_IMPLEMENTATION sl@0: sl@0: #include "unicode/utypes.h" sl@0: #include "unicode/uchar.h" sl@0: #include "ubidi_props.h" sl@0: sl@0: /* miscellaneous definitions ---------------------------------------------- */ sl@0: sl@0: typedef uint8_t DirProp; sl@0: typedef uint32_t Flags; sl@0: sl@0: /* Comparing the description of the BiDi algorithm with this implementation sl@0: is easier with the same names for the BiDi types in the code as there. sl@0: See UCharDirection in uchar.h . sl@0: */ sl@0: enum { sl@0: L= U_LEFT_TO_RIGHT, sl@0: R= U_RIGHT_TO_LEFT, sl@0: EN= U_EUROPEAN_NUMBER, sl@0: ES= U_EUROPEAN_NUMBER_SEPARATOR, sl@0: ET= U_EUROPEAN_NUMBER_TERMINATOR, sl@0: AN= U_ARABIC_NUMBER, sl@0: CS= U_COMMON_NUMBER_SEPARATOR, sl@0: B= U_BLOCK_SEPARATOR, sl@0: S= U_SEGMENT_SEPARATOR, sl@0: WS= U_WHITE_SPACE_NEUTRAL, sl@0: ON= U_OTHER_NEUTRAL, sl@0: LRE=U_LEFT_TO_RIGHT_EMBEDDING, sl@0: LRO=U_LEFT_TO_RIGHT_OVERRIDE, sl@0: AL= U_RIGHT_TO_LEFT_ARABIC, sl@0: RLE=U_RIGHT_TO_LEFT_EMBEDDING, sl@0: RLO=U_RIGHT_TO_LEFT_OVERRIDE, sl@0: PDF=U_POP_DIRECTIONAL_FORMAT, sl@0: NSM=U_DIR_NON_SPACING_MARK, sl@0: BN= U_BOUNDARY_NEUTRAL, sl@0: dirPropCount sl@0: }; sl@0: sl@0: /* sl@0: * Sometimes, bit values are more appropriate sl@0: * to deal with directionality properties. sl@0: * Abbreviations in these macro names refer to names sl@0: * used in the BiDi algorithm. sl@0: */ sl@0: #define DIRPROP_FLAG(dir) (1UL<<(dir)) sl@0: sl@0: /* special flag for multiple runs from explicit embedding codes */ sl@0: #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31) sl@0: sl@0: /* are there any characters that are LTR or RTL? */ sl@0: #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) sl@0: #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) sl@0: sl@0: /* explicit embedding codes */ sl@0: #define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) sl@0: #define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) sl@0: #define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO)) sl@0: sl@0: #define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF)) sl@0: #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT) sl@0: sl@0: /* paragraph and segment separators */ sl@0: #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S)) sl@0: sl@0: /* all types that are counted as White Space or Neutral in some steps */ sl@0: #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT) sl@0: #define MASK_N (DIRPROP_FLAG(ON)|MASK_WS) sl@0: sl@0: /* all types that are included in a sequence of European Terminators for (W5) */ sl@0: #define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT) sl@0: sl@0: /* types that are neutrals or could becomes neutrals in (Wn) */ sl@0: #define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N) sl@0: sl@0: /* sl@0: * These types may be changed to "e", sl@0: * the embedding type (L or R) of the run, sl@0: * in the BiDi algorithm (N2) sl@0: */ sl@0: #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N) sl@0: sl@0: /* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */ sl@0: #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1)) sl@0: sl@0: #define IS_DEFAULT_LEVEL(level) ((level)>=0xfe) sl@0: sl@0: /* sl@0: * The following bit is ORed to the property of characters in paragraphs sl@0: * with contextual RTL direction when paraLevel is contextual. sl@0: */ sl@0: #define CONTEXT_RTL 0x80 sl@0: #define NO_CONTEXT_RTL(dir) ((dir)&~CONTEXT_RTL) sl@0: /* sl@0: * The following is a variant of DIRPROP_FLAG which ignores the CONTEXT_RTL bit. sl@0: */ sl@0: #define DIRPROP_FLAG_NC(dir) (1UL<<(NO_CONTEXT_RTL(dir))) sl@0: sl@0: #define GET_PARALEVEL(ubidi, index) \ sl@0: (UBiDiLevel)((ubidi)->defaultParaLevel ? (ubidi)->dirProps[index]>>7 \ sl@0: : (ubidi)->paraLevel) sl@0: sl@0: /* Paragraph type for multiple paragraph support --- -------------------- */ sl@0: typedef int32_t Para; sl@0: sl@0: #define CR 0x000D sl@0: #define LF 0x000A sl@0: sl@0: /* Run structure for reordering --------------------------------------------- */ sl@0: sl@0: typedef struct Run { sl@0: int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */ sl@0: visualLimit; /* last visual position of the run +1 */ sl@0: } Run; sl@0: sl@0: /* in a Run, logicalStart will get this bit set if the run level is odd */ sl@0: #define INDEX_ODD_BIT (1UL<<31) sl@0: sl@0: #define MAKE_INDEX_ODD_PAIR(index, level) (index|((int32_t)level<<31)) sl@0: #define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)level<<31)) sl@0: #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) sl@0: sl@0: #define GET_INDEX(x) (x&~INDEX_ODD_BIT) sl@0: #define GET_ODD_BIT(x) ((uint32_t)x>>31) sl@0: #define IS_ODD_RUN(x) ((x&INDEX_ODD_BIT)!=0) sl@0: #define IS_EVEN_RUN(x) ((x&INDEX_ODD_BIT)==0) sl@0: sl@0: U_CFUNC UBool sl@0: ubidi_getRuns(UBiDi *pBiDi); sl@0: sl@0: /* UBiDi structure ----------------------------------------------------------- */ sl@0: sl@0: struct UBiDi { sl@0: /* pointer to parent paragraph object (pointer to self if this object is sl@0: * a paragraph object); set to NULL in a newly opened object; set to a sl@0: * real value after a successful execution of ubidi_setPara or ubidi_setLine sl@0: */ sl@0: const UBiDi * pParaBiDi; sl@0: sl@0: const UBiDiProps *bdp; sl@0: sl@0: /* alias pointer to the current text */ sl@0: const UChar *text; sl@0: sl@0: /* length of the current text */ sl@0: int32_t length; sl@0: sl@0: /* memory sizes in bytes */ sl@0: int32_t dirPropsSize, levelsSize, parasSize, runsSize; sl@0: sl@0: /* allocated memory */ sl@0: DirProp *dirPropsMemory; sl@0: UBiDiLevel *levelsMemory; sl@0: Para *parasMemory; sl@0: Run *runsMemory; sl@0: sl@0: /* indicators for whether memory may be allocated after ubidi_open() */ sl@0: UBool mayAllocateText, mayAllocateRuns; sl@0: sl@0: /* arrays with one value per text-character */ sl@0: const DirProp *dirProps; sl@0: UBiDiLevel *levels; sl@0: sl@0: /* are we performing an approximation of the "inverse BiDi" algorithm? */ sl@0: UBool isInverse; sl@0: UBool isInverse2; sl@0: sl@0: /* must block separators receive level 0? */ sl@0: UBool orderParagraphsLTR; sl@0: sl@0: /* the paragraph level */ sl@0: UBiDiLevel paraLevel; sl@0: /* original paraLevel when contextual */ sl@0: /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */ sl@0: UBiDiLevel defaultParaLevel; sl@0: sl@0: /* the following is set in ubidi_setPara, used in processPropertySeq */ sl@0: const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */ sl@0: sl@0: /* the overall paragraph or line directionality - see UBiDiDirection */ sl@0: UBiDiDirection direction; sl@0: sl@0: /* flags is a bit set for which directional properties are in the text */ sl@0: Flags flags; sl@0: sl@0: /* characters after trailingWSStart are WS and are */ sl@0: /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ sl@0: int32_t trailingWSStart; sl@0: sl@0: /* fields for paragraph handling */ sl@0: int32_t paraCount; /* set in getDirProps() */ sl@0: Para *paras; /* limits of paragraphs, filled in sl@0: ResolveExplicitLevels() or CheckExplicitLevels() */ sl@0: sl@0: /* for single paragraph text, we only need a tiny array of paras (no malloc()) */ sl@0: Para simpleParas[1]; sl@0: sl@0: /* fields for line reordering */ sl@0: int32_t runCount; /* ==-1: runs not set up yet */ sl@0: Run *runs; sl@0: sl@0: /* for non-mixed text, we only need a tiny array of runs (no malloc()) */ sl@0: Run simpleRuns[1]; sl@0: }; sl@0: sl@0: #define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x))) sl@0: #define IS_VALID_LINE(x) ((x) && ((x)->pParaBiDi) && ((x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)) sl@0: #define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi))) sl@0: sl@0: /* helper function to (re)allocate memory if allowed */ sl@0: U_CFUNC UBool sl@0: ubidi_getMemory(void **pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded); sl@0: sl@0: /* helper macros for each allocated array in UBiDi */ sl@0: #define getDirPropsMemory(pBiDi, length) \ sl@0: ubidi_getMemory((void **)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ sl@0: (pBiDi)->mayAllocateText, (length)) sl@0: sl@0: #define getLevelsMemory(pBiDi, length) \ sl@0: ubidi_getMemory((void **)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ sl@0: (pBiDi)->mayAllocateText, (length)) sl@0: sl@0: #define getRunsMemory(pBiDi, length) \ sl@0: ubidi_getMemory((void **)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ sl@0: (pBiDi)->mayAllocateRuns, (length)*sizeof(Run)) sl@0: sl@0: /* additional macros used by ubidi_open() - always allow allocation */ sl@0: #define getInitialDirPropsMemory(pBiDi, length) \ sl@0: ubidi_getMemory((void **)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ sl@0: TRUE, (length)) sl@0: sl@0: #define getInitialLevelsMemory(pBiDi, length) \ sl@0: ubidi_getMemory((void **)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ sl@0: TRUE, (length)) sl@0: sl@0: #define getInitialParasMemory(pBiDi, length) \ sl@0: ubidi_getMemory((void **)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \ sl@0: TRUE, (length)*sizeof(Para)) sl@0: sl@0: #define getInitialRunsMemory(pBiDi, length) \ sl@0: ubidi_getMemory((void **)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ sl@0: TRUE, (length)*sizeof(Run)) sl@0: sl@0: #endif sl@0: sl@0: #endif