1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/fontservices/textshaperplugin/IcuSource/common/rbbidata.h Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,192 @@
1.4 +/*
1.5 +*******************************************************************************
1.6 +*
1.7 +* Copyright (C) 1999-2005, International Business Machines
1.8 +* Corporation and others. All Rights Reserved.
1.9 +*
1.10 +*******************************************************************************
1.11 +* file name: rbbidata.h
1.12 +* encoding: US-ASCII
1.13 +* tab size: 8 (not used)
1.14 +* indentation:4
1.15 +*
1.16 +* RBBI data formats Includes
1.17 +*
1.18 +* Structs that describes the format of the Binary RBBI data,
1.19 +* as it is stored in ICU's data file.
1.20 +*
1.21 +* RBBIDataWrapper - Instances of this class sit between the
1.22 +* raw data structs and the RulesBasedBreakIterator objects
1.23 +* that are created by applications. The wrapper class
1.24 +* provides reference counting for the underlying data,
1.25 +* and direct pointers to data that would not otherwise
1.26 +* be accessible without ugly pointer arithmetic. The
1.27 +* wrapper does not attempt to provide any higher level
1.28 +* abstractions for the data itself.
1.29 +*
1.30 +* There will be only one instance of RBBIDataWrapper for any
1.31 +* set of RBBI run time data being shared by instances
1.32 +* (clones) of RulesBasedBreakIterator.
1.33 +*/
1.34 +
1.35 +#ifndef __RBBIDATA_H__
1.36 +#define __RBBIDATA_H__
1.37 +
1.38 +#include "unicode/utypes.h"
1.39 +#include "unicode/udata.h"
1.40 +#include "udataswp.h"
1.41 +
1.42 +/**
1.43 + * Swap RBBI data. See udataswp.h.
1.44 + * @internal
1.45 + */
1.46 +U_CAPI int32_t U_EXPORT2
1.47 +ubrk_swap(const UDataSwapper *ds,
1.48 + const void *inData, int32_t length, void *outData,
1.49 + UErrorCode *pErrorCode);
1.50 +
1.51 +#ifdef XP_CPLUSPLUS
1.52 +
1.53 +#include "unicode/uobject.h"
1.54 +#include "unicode/unistr.h"
1.55 +#include "utrie.h"
1.56 +
1.57 +U_NAMESPACE_BEGIN
1.58 +
1.59 +/*
1.60 + * The following structs map exactly onto the raw data from ICU common data file.
1.61 + */
1.62 +struct RBBIDataHeader {
1.63 + uint32_t fMagic; /* == 0xbla0 */
1.64 + uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */
1.65 + /* if there is one associated with this data. */
1.66 + /* (version originates in rbbi, is copied to UDataInfo) */
1.67 + /* For ICU 3.2 and earlier, this field was */
1.68 + /* uint32_t fVersion */
1.69 + /* with a value of 1. */
1.70 + uint32_t fLength; /* Total length in bytes of this RBBI Data, */
1.71 + /* including all sections, not just the header. */
1.72 + uint32_t fCatCount; /* Number of character categories. */
1.73 +
1.74 + /* */
1.75 + /* Offsets and sizes of each of the subsections within the RBBI data. */
1.76 + /* All offsets are bytes from the start of the RBBIDataHeader. */
1.77 + /* All sizes are in bytes. */
1.78 + /* */
1.79 + uint32_t fFTable; /* forward state transition table. */
1.80 + uint32_t fFTableLen;
1.81 + uint32_t fRTable; /* Offset to the reverse state transition table. */
1.82 + uint32_t fRTableLen;
1.83 + uint32_t fSFTable; /* safe point forward transition table */
1.84 + uint32_t fSFTableLen;
1.85 + uint32_t fSRTable; /* safe point reverse transition table */
1.86 + uint32_t fSRTableLen;
1.87 + uint32_t fTrie; /* Offset to Trie data for character categories */
1.88 + uint32_t fTrieLen;
1.89 + uint32_t fRuleSource; /* Offset to the source for for the break */
1.90 + uint32_t fRuleSourceLen; /* rules. Stored UChar *. */
1.91 + uint32_t fStatusTable; /* Offset to the table of rule status values */
1.92 + uint32_t fStatusTableLen;
1.93 +
1.94 + uint32_t fReserved[6]; /* Reserved for expansion */
1.95 +
1.96 +};
1.97 +
1.98 +
1.99 +
1.100 +struct RBBIStateTableRow {
1.101 + int16_t fAccepting; /* Non-zero if this row is for an accepting state. */
1.102 + /* Value 0: not an accepting state. */
1.103 + /* -1: Unconditional Accepting state. */
1.104 + /* positive: Look-ahead match has completed. */
1.105 + /* Actual boundary position happened earlier */
1.106 + /* Value here == fLookAhead in earlier */
1.107 + /* state, at actual boundary pos. */
1.108 + int16_t fLookAhead; /* Non-zero if this row is for a state that */
1.109 + /* corresponds to a '/' in the rule source. */
1.110 + /* Value is the same as the fAccepting */
1.111 + /* value for the rule (which will appear */
1.112 + /* in a different state. */
1.113 + int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */
1.114 + /* from a rule. Value is the index in the */
1.115 + /* StatusTable of the set of matching */
1.116 + /* tags (rule status values) */
1.117 + int16_t fReserved;
1.118 + uint16_t fNextState[2]; /* Next State, indexed by char category. */
1.119 + /* Array Size is fNumCols from the */
1.120 + /* state table header. */
1.121 + /* CAUTION: see RBBITableBuilder::getTableSize() */
1.122 + /* before changing anything here. */
1.123 +};
1.124 +
1.125 +
1.126 +struct RBBIStateTable {
1.127 + uint32_t fNumStates; /* Number of states. */
1.128 + uint32_t fRowLen; /* Length of a state table row, in bytes. */
1.129 + uint32_t fFlags; /* Option Flags for this state table */
1.130 + uint32_t fReserved; /* reserved */
1.131 + char fTableData[4]; /* First RBBIStateTableRow begins here. */
1.132 + /* (making it char[] simplifies ugly address */
1.133 + /* arithmetic for indexing variable length rows.) */
1.134 +};
1.135 +
1.136 +typedef enum {
1.137 + RBBI_LOOKAHEAD_HARD_BREAK = 1
1.138 +} RBBIStateTableFlags;
1.139 +
1.140 +
1.141 +/* */
1.142 +/* The reference counting wrapper class */
1.143 +/* */
1.144 +class RBBIDataWrapper : public UMemory {
1.145 +public:
1.146 + RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
1.147 + RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
1.148 + ~RBBIDataWrapper();
1.149 +
1.150 + void init(const RBBIDataHeader *data, UErrorCode &status);
1.151 + RBBIDataWrapper *addReference();
1.152 + void removeReference();
1.153 + UBool operator ==(const RBBIDataWrapper &other) const;
1.154 + int32_t hashCode();
1.155 + const UnicodeString &getRuleSourceString() const;
1.156 +#ifdef RBBI_DEBUG
1.157 + void printData();
1.158 + void printTable(const char *heading, const RBBIStateTable *table);
1.159 +#else
1.160 + #define printData()
1.161 + #define printTable(heading, table)
1.162 +#endif
1.163 +
1.164 + /* */
1.165 + /* Pointers to items within the data */
1.166 + /* */
1.167 + const RBBIDataHeader *fHeader;
1.168 + const RBBIStateTable *fForwardTable;
1.169 + const RBBIStateTable *fReverseTable;
1.170 + const RBBIStateTable *fSafeFwdTable;
1.171 + const RBBIStateTable *fSafeRevTable;
1.172 + const UChar *fRuleSource;
1.173 + const int32_t *fRuleStatusTable;
1.174 +
1.175 + /* number of int32_t values in the rule status table. Used to sanity check indexing */
1.176 + int32_t fStatusMaxIdx;
1.177 +
1.178 + UTrie fTrie;
1.179 +
1.180 +private:
1.181 + int32_t fRefCount;
1.182 + UDataMemory *fUDataMem;
1.183 + UnicodeString fRuleString;
1.184 +
1.185 + RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */
1.186 + RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */
1.187 +};
1.188 +
1.189 +
1.190 +
1.191 +U_NAMESPACE_END
1.192 +
1.193 +#endif /* C++ */
1.194 +
1.195 +#endif