os/ossrv/ossrv_pub/boost_apis/boost/regex/v4/states.hpp
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/ossrv/ossrv_pub/boost_apis/boost/regex/v4/states.hpp	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,276 @@
     1.4 +/*
     1.5 + *
     1.6 + * Copyright (c) 1998-2002
     1.7 + * John Maddock
     1.8 + *
     1.9 + * Use, modification and distribution are subject to the 
    1.10 + * Boost Software License, Version 1.0. (See accompanying file 
    1.11 + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
    1.12 + *
    1.13 + */
    1.14 +
    1.15 + /*
    1.16 +  *   LOCATION:    see http://www.boost.org for most recent version.
    1.17 +  *   FILE         states.cpp
    1.18 +  *   VERSION      see <boost/version.hpp>
    1.19 +  *   DESCRIPTION: Declares internal state machine structures.
    1.20 +  */
    1.21 +
    1.22 +#ifndef BOOST_REGEX_V4_STATES_HPP
    1.23 +#define BOOST_REGEX_V4_STATES_HPP
    1.24 +
    1.25 +#ifdef BOOST_HAS_ABI_HEADERS
    1.26 +#  include BOOST_ABI_PREFIX
    1.27 +#endif
    1.28 +
    1.29 +namespace boost{
    1.30 +namespace re_detail{
    1.31 +
    1.32 +/*** mask_type *******************************************************
    1.33 +Whenever we have a choice of two alternatives, we use an array of bytes
    1.34 +to indicate which of the two alternatives it is possible to take for any
    1.35 +given input character.  If mask_take is set, then we can take the next 
    1.36 +state, and if mask_skip is set then we can take the alternative.
    1.37 +***********************************************************************/
    1.38 +enum mask_type
    1.39 +{
    1.40 +   mask_take = 1,
    1.41 +   mask_skip = 2,
    1.42 +   mask_init = 4,
    1.43 +   mask_any = mask_skip | mask_take,
    1.44 +   mask_all = mask_any
    1.45 +};
    1.46 +
    1.47 +/*** helpers **********************************************************
    1.48 +These helpers let us use function overload resolution to detect whether
    1.49 +we have narrow or wide character strings:
    1.50 +***********************************************************************/
    1.51 +struct _narrow_type{};
    1.52 +struct _wide_type{};
    1.53 +template <class charT> struct is_byte;
    1.54 +template<>             struct is_byte<char>         { typedef _narrow_type width_type; };
    1.55 +template<>             struct is_byte<unsigned char>{ typedef _narrow_type width_type; };
    1.56 +template<>             struct is_byte<signed char>  { typedef _narrow_type width_type; };
    1.57 +template <class charT> struct is_byte               { typedef _wide_type width_type; };
    1.58 +
    1.59 +/*** enum syntax_element_type ******************************************
    1.60 +Every record in the state machine falls into one of the following types:
    1.61 +***********************************************************************/
    1.62 +enum syntax_element_type
    1.63 +{
    1.64 +   // start of a marked sub-expression, or perl-style (?...) extension
    1.65 +   syntax_element_startmark = 0,
    1.66 +   // end of a marked sub-expression, or perl-style (?...) extension
    1.67 +   syntax_element_endmark = syntax_element_startmark + 1,
    1.68 +   // any sequence of literal characters
    1.69 +   syntax_element_literal = syntax_element_endmark + 1,
    1.70 +   // start of line assertion: ^
    1.71 +   syntax_element_start_line = syntax_element_literal + 1,
    1.72 +   // end of line assertion $
    1.73 +   syntax_element_end_line = syntax_element_start_line + 1,
    1.74 +   // match any character: .
    1.75 +   syntax_element_wild = syntax_element_end_line + 1,
    1.76 +   // end of expression: we have a match when we get here
    1.77 +   syntax_element_match = syntax_element_wild + 1,
    1.78 +   // perl style word boundary: \b
    1.79 +   syntax_element_word_boundary = syntax_element_match + 1,
    1.80 +   // perl style within word boundary: \B
    1.81 +   syntax_element_within_word = syntax_element_word_boundary + 1,
    1.82 +   // start of word assertion: \<
    1.83 +   syntax_element_word_start = syntax_element_within_word + 1,
    1.84 +   // end of word assertion: \>
    1.85 +   syntax_element_word_end = syntax_element_word_start + 1,
    1.86 +   // start of buffer assertion: \`
    1.87 +   syntax_element_buffer_start = syntax_element_word_end + 1,
    1.88 +   // end of buffer assertion: \'
    1.89 +   syntax_element_buffer_end = syntax_element_buffer_start + 1,
    1.90 +   // backreference to previously matched sub-expression
    1.91 +   syntax_element_backref = syntax_element_buffer_end + 1,
    1.92 +   // either a wide character set [..] or one with multicharacter collating elements:
    1.93 +   syntax_element_long_set = syntax_element_backref + 1,
    1.94 +   // narrow character set: [...]
    1.95 +   syntax_element_set = syntax_element_long_set + 1,
    1.96 +   // jump to a new state in the machine:
    1.97 +   syntax_element_jump = syntax_element_set + 1,
    1.98 +   // choose between two production states:
    1.99 +   syntax_element_alt = syntax_element_jump + 1,
   1.100 +   // a repeat
   1.101 +   syntax_element_rep = syntax_element_alt + 1,
   1.102 +   // match a combining character sequence
   1.103 +   syntax_element_combining = syntax_element_rep + 1,
   1.104 +   // perl style soft buffer end: \z
   1.105 +   syntax_element_soft_buffer_end = syntax_element_combining + 1,
   1.106 +   // perl style continuation: \G
   1.107 +   syntax_element_restart_continue = syntax_element_soft_buffer_end + 1,
   1.108 +   // single character repeats:
   1.109 +   syntax_element_dot_rep = syntax_element_restart_continue + 1,
   1.110 +   syntax_element_char_rep = syntax_element_dot_rep + 1,
   1.111 +   syntax_element_short_set_rep = syntax_element_char_rep + 1,
   1.112 +   syntax_element_long_set_rep = syntax_element_short_set_rep + 1,
   1.113 +   // a backstep for lookbehind repeats:
   1.114 +   syntax_element_backstep = syntax_element_long_set_rep + 1,
   1.115 +   // an assertion that a mark was matched:
   1.116 +   syntax_element_assert_backref = syntax_element_backstep + 1,
   1.117 +   syntax_element_toggle_case = syntax_element_assert_backref + 1
   1.118 +};
   1.119 +
   1.120 +#ifdef BOOST_REGEX_DEBUG
   1.121 +// dwa 09/26/00 - This is needed to suppress warnings about an ambiguous conversion
   1.122 +std::ostream& operator<<(std::ostream&, syntax_element_type);
   1.123 +#endif
   1.124 +
   1.125 +struct re_syntax_base;
   1.126 +
   1.127 +/*** union offset_type ************************************************
   1.128 +Points to another state in the machine.  During machine construction
   1.129 +we use integral offsets, but these are converted to pointers before
   1.130 +execution of the machine.
   1.131 +***********************************************************************/
   1.132 +union offset_type
   1.133 +{
   1.134 +   re_syntax_base*   p;
   1.135 +   std::ptrdiff_t    i;
   1.136 +};
   1.137 +
   1.138 +/*** struct re_syntax_base ********************************************
   1.139 +Base class for all states in the machine.
   1.140 +***********************************************************************/
   1.141 +struct re_syntax_base
   1.142 +{
   1.143 +   syntax_element_type   type;         // what kind of state this is
   1.144 +   offset_type           next;         // next state in the machine
   1.145 +};
   1.146 +
   1.147 +/*** struct re_brace **************************************************
   1.148 +A marked parenthesis.
   1.149 +***********************************************************************/
   1.150 +struct re_brace : public re_syntax_base
   1.151 +{
   1.152 +   // The index to match, can be zero (don't mark the sub-expression)
   1.153 +   // or negative (for perl style (?...) extentions):
   1.154 +   int index;
   1.155 +};
   1.156 +
   1.157 +/*** struct re_dot **************************************************
   1.158 +Match anything.
   1.159 +***********************************************************************/
   1.160 +enum
   1.161 +{
   1.162 +   dont_care = 1,
   1.163 +   force_not_newline = 0,
   1.164 +   force_newline = 2,
   1.165 +
   1.166 +   test_not_newline = 2,
   1.167 +   test_newline = 3
   1.168 +};
   1.169 +struct re_dot : public re_syntax_base
   1.170 +{
   1.171 +   unsigned char mask;
   1.172 +};
   1.173 +
   1.174 +/*** struct re_literal ************************************************
   1.175 +A string of literals, following this structure will be an 
   1.176 +array of characters: charT[length]
   1.177 +***********************************************************************/
   1.178 +struct re_literal : public re_syntax_base
   1.179 +{
   1.180 +   unsigned int length;
   1.181 +};
   1.182 +
   1.183 +/*** struct re_case ************************************************
   1.184 +Indicates whether we are moving to a case insensive block or not
   1.185 +***********************************************************************/
   1.186 +struct re_case : public re_syntax_base
   1.187 +{
   1.188 +   bool icase;
   1.189 +};
   1.190 +
   1.191 +/*** struct re_set_long ***********************************************
   1.192 +A wide character set of characters, following this structure will be
   1.193 +an array of type charT:
   1.194 +First csingles null-terminated strings
   1.195 +Then 2 * cranges NULL terminated strings
   1.196 +Then cequivalents NULL terminated strings
   1.197 +***********************************************************************/
   1.198 +template <class mask_type>
   1.199 +struct re_set_long : public re_syntax_base
   1.200 +{
   1.201 +   unsigned int            csingles, cranges, cequivalents;
   1.202 +   mask_type               cclasses;
   1.203 +   mask_type               cnclasses;
   1.204 +   bool                    isnot;
   1.205 +   bool                    singleton;
   1.206 +};
   1.207 +
   1.208 +/*** struct re_set ****************************************************
   1.209 +A set of narrow-characters, matches any of _map which is none-zero
   1.210 +***********************************************************************/
   1.211 +struct re_set : public re_syntax_base
   1.212 +{
   1.213 +   unsigned char _map[1 << CHAR_BIT];
   1.214 +};
   1.215 +
   1.216 +/*** struct re_jump ***************************************************
   1.217 +Jump to a new location in the machine (not next).
   1.218 +***********************************************************************/
   1.219 +struct re_jump : public re_syntax_base
   1.220 +{
   1.221 +   offset_type     alt;                 // location to jump to
   1.222 +};
   1.223 +
   1.224 +/*** struct re_alt ***************************************************
   1.225 +Jump to a new location in the machine (possibly next).
   1.226 +***********************************************************************/
   1.227 +struct re_alt : public re_jump
   1.228 +{
   1.229 +   unsigned char   _map[1 << CHAR_BIT]; // which characters can take the jump
   1.230 +   unsigned int    can_be_null;         // true if we match a NULL string
   1.231 +};
   1.232 +
   1.233 +/*** struct re_repeat *************************************************
   1.234 +Repeat a section of the machine
   1.235 +***********************************************************************/
   1.236 +struct re_repeat : public re_alt
   1.237 +{
   1.238 +   std::size_t   min, max;  // min and max allowable repeats
   1.239 +   int           id;        // Unique identifier for this repeat
   1.240 +   bool          leading;   // True if this repeat is at the start of the machine (lets us optimize some searches)
   1.241 +   bool          greedy;    // True if this is a greedy repeat
   1.242 +};
   1.243 +
   1.244 +/*** enum re_jump_size_type *******************************************
   1.245 +Provides compiled size of re_jump structure (allowing for trailing alignment).
   1.246 +We provide this so we know how manybytes to insert when constructing the machine
   1.247 +(The value of padding_mask is defined in regex_raw_buffer.hpp).
   1.248 +***********************************************************************/
   1.249 +enum re_jump_size_type
   1.250 +{
   1.251 +   re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask),
   1.252 +   re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask),
   1.253 +   re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask)
   1.254 +};
   1.255 +
   1.256 +/*** proc re_is_set_member *********************************************
   1.257 +Forward declaration: we'll need this one later...
   1.258 +***********************************************************************/
   1.259 +
   1.260 +template<class charT, class traits>
   1.261 +struct regex_data;
   1.262 +
   1.263 +template <class iterator, class charT, class traits_type, class char_classT>
   1.264 +iterator BOOST_REGEX_CALL re_is_set_member(iterator next, 
   1.265 +                          iterator last, 
   1.266 +                          const re_set_long<char_classT>* set_, 
   1.267 +                          const regex_data<charT, traits_type>& e, bool icase);
   1.268 +
   1.269 +} // namespace re_detail
   1.270 +
   1.271 +} // namespace boost
   1.272 +
   1.273 +#ifdef BOOST_HAS_ABI_HEADERS
   1.274 +#  include BOOST_ABI_SUFFIX
   1.275 +#endif
   1.276 +
   1.277 +#endif
   1.278 +
   1.279 +