1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/ossrv/ossrv_pub/boost_apis/boost/regex/v4/states.hpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,276 @@
1.4 +/*
1.5 + *
1.6 + * Copyright (c) 1998-2002
1.7 + * John Maddock
1.8 + *
1.9 + * Use, modification and distribution are subject to the
1.10 + * Boost Software License, Version 1.0. (See accompanying file
1.11 + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
1.12 + *
1.13 + */
1.14 +
1.15 + /*
1.16 + * LOCATION: see http://www.boost.org for most recent version.
1.17 + * FILE states.cpp
1.18 + * VERSION see <boost/version.hpp>
1.19 + * DESCRIPTION: Declares internal state machine structures.
1.20 + */
1.21 +
1.22 +#ifndef BOOST_REGEX_V4_STATES_HPP
1.23 +#define BOOST_REGEX_V4_STATES_HPP
1.24 +
1.25 +#ifdef BOOST_HAS_ABI_HEADERS
1.26 +# include BOOST_ABI_PREFIX
1.27 +#endif
1.28 +
1.29 +namespace boost{
1.30 +namespace re_detail{
1.31 +
1.32 +/*** mask_type *******************************************************
1.33 +Whenever we have a choice of two alternatives, we use an array of bytes
1.34 +to indicate which of the two alternatives it is possible to take for any
1.35 +given input character. If mask_take is set, then we can take the next
1.36 +state, and if mask_skip is set then we can take the alternative.
1.37 +***********************************************************************/
1.38 +enum mask_type
1.39 +{
1.40 + mask_take = 1,
1.41 + mask_skip = 2,
1.42 + mask_init = 4,
1.43 + mask_any = mask_skip | mask_take,
1.44 + mask_all = mask_any
1.45 +};
1.46 +
1.47 +/*** helpers **********************************************************
1.48 +These helpers let us use function overload resolution to detect whether
1.49 +we have narrow or wide character strings:
1.50 +***********************************************************************/
1.51 +struct _narrow_type{};
1.52 +struct _wide_type{};
1.53 +template <class charT> struct is_byte;
1.54 +template<> struct is_byte<char> { typedef _narrow_type width_type; };
1.55 +template<> struct is_byte<unsigned char>{ typedef _narrow_type width_type; };
1.56 +template<> struct is_byte<signed char> { typedef _narrow_type width_type; };
1.57 +template <class charT> struct is_byte { typedef _wide_type width_type; };
1.58 +
1.59 +/*** enum syntax_element_type ******************************************
1.60 +Every record in the state machine falls into one of the following types:
1.61 +***********************************************************************/
1.62 +enum syntax_element_type
1.63 +{
1.64 + // start of a marked sub-expression, or perl-style (?...) extension
1.65 + syntax_element_startmark = 0,
1.66 + // end of a marked sub-expression, or perl-style (?...) extension
1.67 + syntax_element_endmark = syntax_element_startmark + 1,
1.68 + // any sequence of literal characters
1.69 + syntax_element_literal = syntax_element_endmark + 1,
1.70 + // start of line assertion: ^
1.71 + syntax_element_start_line = syntax_element_literal + 1,
1.72 + // end of line assertion $
1.73 + syntax_element_end_line = syntax_element_start_line + 1,
1.74 + // match any character: .
1.75 + syntax_element_wild = syntax_element_end_line + 1,
1.76 + // end of expression: we have a match when we get here
1.77 + syntax_element_match = syntax_element_wild + 1,
1.78 + // perl style word boundary: \b
1.79 + syntax_element_word_boundary = syntax_element_match + 1,
1.80 + // perl style within word boundary: \B
1.81 + syntax_element_within_word = syntax_element_word_boundary + 1,
1.82 + // start of word assertion: \<
1.83 + syntax_element_word_start = syntax_element_within_word + 1,
1.84 + // end of word assertion: \>
1.85 + syntax_element_word_end = syntax_element_word_start + 1,
1.86 + // start of buffer assertion: \`
1.87 + syntax_element_buffer_start = syntax_element_word_end + 1,
1.88 + // end of buffer assertion: \'
1.89 + syntax_element_buffer_end = syntax_element_buffer_start + 1,
1.90 + // backreference to previously matched sub-expression
1.91 + syntax_element_backref = syntax_element_buffer_end + 1,
1.92 + // either a wide character set [..] or one with multicharacter collating elements:
1.93 + syntax_element_long_set = syntax_element_backref + 1,
1.94 + // narrow character set: [...]
1.95 + syntax_element_set = syntax_element_long_set + 1,
1.96 + // jump to a new state in the machine:
1.97 + syntax_element_jump = syntax_element_set + 1,
1.98 + // choose between two production states:
1.99 + syntax_element_alt = syntax_element_jump + 1,
1.100 + // a repeat
1.101 + syntax_element_rep = syntax_element_alt + 1,
1.102 + // match a combining character sequence
1.103 + syntax_element_combining = syntax_element_rep + 1,
1.104 + // perl style soft buffer end: \z
1.105 + syntax_element_soft_buffer_end = syntax_element_combining + 1,
1.106 + // perl style continuation: \G
1.107 + syntax_element_restart_continue = syntax_element_soft_buffer_end + 1,
1.108 + // single character repeats:
1.109 + syntax_element_dot_rep = syntax_element_restart_continue + 1,
1.110 + syntax_element_char_rep = syntax_element_dot_rep + 1,
1.111 + syntax_element_short_set_rep = syntax_element_char_rep + 1,
1.112 + syntax_element_long_set_rep = syntax_element_short_set_rep + 1,
1.113 + // a backstep for lookbehind repeats:
1.114 + syntax_element_backstep = syntax_element_long_set_rep + 1,
1.115 + // an assertion that a mark was matched:
1.116 + syntax_element_assert_backref = syntax_element_backstep + 1,
1.117 + syntax_element_toggle_case = syntax_element_assert_backref + 1
1.118 +};
1.119 +
1.120 +#ifdef BOOST_REGEX_DEBUG
1.121 +// dwa 09/26/00 - This is needed to suppress warnings about an ambiguous conversion
1.122 +std::ostream& operator<<(std::ostream&, syntax_element_type);
1.123 +#endif
1.124 +
1.125 +struct re_syntax_base;
1.126 +
1.127 +/*** union offset_type ************************************************
1.128 +Points to another state in the machine. During machine construction
1.129 +we use integral offsets, but these are converted to pointers before
1.130 +execution of the machine.
1.131 +***********************************************************************/
1.132 +union offset_type
1.133 +{
1.134 + re_syntax_base* p;
1.135 + std::ptrdiff_t i;
1.136 +};
1.137 +
1.138 +/*** struct re_syntax_base ********************************************
1.139 +Base class for all states in the machine.
1.140 +***********************************************************************/
1.141 +struct re_syntax_base
1.142 +{
1.143 + syntax_element_type type; // what kind of state this is
1.144 + offset_type next; // next state in the machine
1.145 +};
1.146 +
1.147 +/*** struct re_brace **************************************************
1.148 +A marked parenthesis.
1.149 +***********************************************************************/
1.150 +struct re_brace : public re_syntax_base
1.151 +{
1.152 + // The index to match, can be zero (don't mark the sub-expression)
1.153 + // or negative (for perl style (?...) extentions):
1.154 + int index;
1.155 +};
1.156 +
1.157 +/*** struct re_dot **************************************************
1.158 +Match anything.
1.159 +***********************************************************************/
1.160 +enum
1.161 +{
1.162 + dont_care = 1,
1.163 + force_not_newline = 0,
1.164 + force_newline = 2,
1.165 +
1.166 + test_not_newline = 2,
1.167 + test_newline = 3
1.168 +};
1.169 +struct re_dot : public re_syntax_base
1.170 +{
1.171 + unsigned char mask;
1.172 +};
1.173 +
1.174 +/*** struct re_literal ************************************************
1.175 +A string of literals, following this structure will be an
1.176 +array of characters: charT[length]
1.177 +***********************************************************************/
1.178 +struct re_literal : public re_syntax_base
1.179 +{
1.180 + unsigned int length;
1.181 +};
1.182 +
1.183 +/*** struct re_case ************************************************
1.184 +Indicates whether we are moving to a case insensive block or not
1.185 +***********************************************************************/
1.186 +struct re_case : public re_syntax_base
1.187 +{
1.188 + bool icase;
1.189 +};
1.190 +
1.191 +/*** struct re_set_long ***********************************************
1.192 +A wide character set of characters, following this structure will be
1.193 +an array of type charT:
1.194 +First csingles null-terminated strings
1.195 +Then 2 * cranges NULL terminated strings
1.196 +Then cequivalents NULL terminated strings
1.197 +***********************************************************************/
1.198 +template <class mask_type>
1.199 +struct re_set_long : public re_syntax_base
1.200 +{
1.201 + unsigned int csingles, cranges, cequivalents;
1.202 + mask_type cclasses;
1.203 + mask_type cnclasses;
1.204 + bool isnot;
1.205 + bool singleton;
1.206 +};
1.207 +
1.208 +/*** struct re_set ****************************************************
1.209 +A set of narrow-characters, matches any of _map which is none-zero
1.210 +***********************************************************************/
1.211 +struct re_set : public re_syntax_base
1.212 +{
1.213 + unsigned char _map[1 << CHAR_BIT];
1.214 +};
1.215 +
1.216 +/*** struct re_jump ***************************************************
1.217 +Jump to a new location in the machine (not next).
1.218 +***********************************************************************/
1.219 +struct re_jump : public re_syntax_base
1.220 +{
1.221 + offset_type alt; // location to jump to
1.222 +};
1.223 +
1.224 +/*** struct re_alt ***************************************************
1.225 +Jump to a new location in the machine (possibly next).
1.226 +***********************************************************************/
1.227 +struct re_alt : public re_jump
1.228 +{
1.229 + unsigned char _map[1 << CHAR_BIT]; // which characters can take the jump
1.230 + unsigned int can_be_null; // true if we match a NULL string
1.231 +};
1.232 +
1.233 +/*** struct re_repeat *************************************************
1.234 +Repeat a section of the machine
1.235 +***********************************************************************/
1.236 +struct re_repeat : public re_alt
1.237 +{
1.238 + std::size_t min, max; // min and max allowable repeats
1.239 + int id; // Unique identifier for this repeat
1.240 + bool leading; // True if this repeat is at the start of the machine (lets us optimize some searches)
1.241 + bool greedy; // True if this is a greedy repeat
1.242 +};
1.243 +
1.244 +/*** enum re_jump_size_type *******************************************
1.245 +Provides compiled size of re_jump structure (allowing for trailing alignment).
1.246 +We provide this so we know how manybytes to insert when constructing the machine
1.247 +(The value of padding_mask is defined in regex_raw_buffer.hpp).
1.248 +***********************************************************************/
1.249 +enum re_jump_size_type
1.250 +{
1.251 + re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask),
1.252 + re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask),
1.253 + re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask)
1.254 +};
1.255 +
1.256 +/*** proc re_is_set_member *********************************************
1.257 +Forward declaration: we'll need this one later...
1.258 +***********************************************************************/
1.259 +
1.260 +template<class charT, class traits>
1.261 +struct regex_data;
1.262 +
1.263 +template <class iterator, class charT, class traits_type, class char_classT>
1.264 +iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
1.265 + iterator last,
1.266 + const re_set_long<char_classT>* set_,
1.267 + const regex_data<charT, traits_type>& e, bool icase);
1.268 +
1.269 +} // namespace re_detail
1.270 +
1.271 +} // namespace boost
1.272 +
1.273 +#ifdef BOOST_HAS_ABI_HEADERS
1.274 +# include BOOST_ABI_SUFFIX
1.275 +#endif
1.276 +
1.277 +#endif
1.278 +
1.279 +