sl@0
|
1 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
2 |
/// \file regex_compiler.hpp
|
sl@0
|
3 |
/// Contains the definition of regex_compiler, a factory for building regex objects
|
sl@0
|
4 |
/// from strings.
|
sl@0
|
5 |
//
|
sl@0
|
6 |
// Copyright 2004 Eric Niebler. Distributed under the Boost
|
sl@0
|
7 |
// Software License, Version 1.0. (See accompanying file
|
sl@0
|
8 |
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
sl@0
|
9 |
|
sl@0
|
10 |
#ifndef BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
|
sl@0
|
11 |
#define BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
|
sl@0
|
12 |
|
sl@0
|
13 |
// MS compatible compilers support #pragma once
|
sl@0
|
14 |
#if defined(_MSC_VER) && (_MSC_VER >= 1020)
|
sl@0
|
15 |
# pragma once
|
sl@0
|
16 |
#endif
|
sl@0
|
17 |
|
sl@0
|
18 |
#include <boost/xpressive/basic_regex.hpp>
|
sl@0
|
19 |
#include <boost/xpressive/detail/dynamic/parser.hpp>
|
sl@0
|
20 |
#include <boost/xpressive/detail/dynamic/parse_charset.hpp>
|
sl@0
|
21 |
#include <boost/xpressive/detail/dynamic/parser_enum.hpp>
|
sl@0
|
22 |
#include <boost/xpressive/detail/dynamic/parser_traits.hpp>
|
sl@0
|
23 |
#include <boost/xpressive/detail/core/linker.hpp>
|
sl@0
|
24 |
#include <boost/xpressive/detail/core/optimize.hpp>
|
sl@0
|
25 |
|
sl@0
|
26 |
namespace boost { namespace xpressive
|
sl@0
|
27 |
{
|
sl@0
|
28 |
|
sl@0
|
29 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
30 |
// regex_compiler
|
sl@0
|
31 |
//
|
sl@0
|
32 |
/// \brief Class template regex_compiler is a factory for building basic_regex objects from a string.
|
sl@0
|
33 |
///
|
sl@0
|
34 |
/// Class template regex_compiler is used to construct a basic_regex object from a string. The string
|
sl@0
|
35 |
/// should contain a valid regular expression. You can imbue a regex_compiler object with a locale,
|
sl@0
|
36 |
/// after which all basic_regex objects created with that regex_compiler object will use that locale.
|
sl@0
|
37 |
/// After creating a regex_compiler object, and optionally imbueing it with a locale, you can call the
|
sl@0
|
38 |
/// compile() method to construct a basic_regex object, passing it the string representing the regular
|
sl@0
|
39 |
/// expression. You can call compile() multiple times on the same regex_compiler object. Two basic_regex
|
sl@0
|
40 |
/// objects compiled from the same string will have different regex_id's.
|
sl@0
|
41 |
template<typename BidiIter, typename RegexTraits, typename CompilerTraits>
|
sl@0
|
42 |
struct regex_compiler
|
sl@0
|
43 |
{
|
sl@0
|
44 |
typedef BidiIter iterator_type;
|
sl@0
|
45 |
typedef typename iterator_value<BidiIter>::type char_type;
|
sl@0
|
46 |
typedef std::basic_string<char_type> string_type;
|
sl@0
|
47 |
typedef regex_constants::syntax_option_type flag_type;
|
sl@0
|
48 |
typedef RegexTraits traits_type;
|
sl@0
|
49 |
typedef typename traits_type::char_class_type char_class_type;
|
sl@0
|
50 |
typedef typename traits_type::locale_type locale_type;
|
sl@0
|
51 |
|
sl@0
|
52 |
explicit regex_compiler(RegexTraits const &traits = RegexTraits())
|
sl@0
|
53 |
: mark_count_(0)
|
sl@0
|
54 |
, hidden_mark_count_(0)
|
sl@0
|
55 |
, traits_(traits)
|
sl@0
|
56 |
, upper_(0)
|
sl@0
|
57 |
{
|
sl@0
|
58 |
this->upper_ = lookup_classname(this->rxtraits(), "upper");
|
sl@0
|
59 |
BOOST_ASSERT(0 != this->upper_);
|
sl@0
|
60 |
}
|
sl@0
|
61 |
|
sl@0
|
62 |
///////////////////////////////////////////////////////////////////////////
|
sl@0
|
63 |
// imbue
|
sl@0
|
64 |
/// Specify the locale to be used by a regex_compiler.
|
sl@0
|
65 |
///
|
sl@0
|
66 |
/// \param loc The locale that this regex_compiler should use.
|
sl@0
|
67 |
/// \return The previous locale.
|
sl@0
|
68 |
locale_type imbue(locale_type loc)
|
sl@0
|
69 |
{
|
sl@0
|
70 |
locale_type oldloc = this->traits_.imbue(loc);
|
sl@0
|
71 |
this->upper_ = lookup_classname(this->rxtraits(), "upper");
|
sl@0
|
72 |
BOOST_ASSERT(0 != this->upper_);
|
sl@0
|
73 |
return oldloc;
|
sl@0
|
74 |
}
|
sl@0
|
75 |
|
sl@0
|
76 |
///////////////////////////////////////////////////////////////////////////
|
sl@0
|
77 |
// getloc
|
sl@0
|
78 |
/// Get the locale used by a regex_compiler.
|
sl@0
|
79 |
///
|
sl@0
|
80 |
/// \param loc The locale that this regex_compiler uses.
|
sl@0
|
81 |
locale_type getloc() const
|
sl@0
|
82 |
{
|
sl@0
|
83 |
return this->traits_.getloc();
|
sl@0
|
84 |
}
|
sl@0
|
85 |
|
sl@0
|
86 |
///////////////////////////////////////////////////////////////////////////
|
sl@0
|
87 |
// compile
|
sl@0
|
88 |
/// Builds a basic_regex object from a std::string.
|
sl@0
|
89 |
///
|
sl@0
|
90 |
/// \param pat A std::string containing the regular expression pattern.
|
sl@0
|
91 |
/// \param flags Optional bitmask that determines how the pat string is interpreted. (See syntax_option_type.)
|
sl@0
|
92 |
/// \return A basic_regex object corresponding to the regular expression represented by the string.
|
sl@0
|
93 |
/// \pre The std::string pat contains a valid string-based representation of a regular expression.
|
sl@0
|
94 |
/// \throw regex_error when the string has invalid regular expression syntax.
|
sl@0
|
95 |
basic_regex<BidiIter> compile(string_type pat, flag_type flags = regex_constants::ECMAScript)
|
sl@0
|
96 |
{
|
sl@0
|
97 |
this->reset();
|
sl@0
|
98 |
this->traits_.flags(flags);
|
sl@0
|
99 |
|
sl@0
|
100 |
string_iterator begin = pat.begin(), end = pat.end();
|
sl@0
|
101 |
|
sl@0
|
102 |
// at the top level, a regex is a sequence of alternates
|
sl@0
|
103 |
alternates_list alternates;
|
sl@0
|
104 |
this->parse_alternates(begin, end, alternates);
|
sl@0
|
105 |
detail::ensure(begin == end, regex_constants::error_paren, "mismatched parenthesis");
|
sl@0
|
106 |
|
sl@0
|
107 |
// convert the alternates list to the appropriate matcher and terminate the sequence
|
sl@0
|
108 |
detail::sequence<BidiIter> seq = detail::alternates_to_matchable(alternates, alternates_factory());
|
sl@0
|
109 |
seq += detail::make_dynamic_xpression<BidiIter>(detail::end_matcher());
|
sl@0
|
110 |
|
sl@0
|
111 |
// fill in the back-pointers by visiting the regex parse tree
|
sl@0
|
112 |
detail::xpression_linker<char_type> linker(this->rxtraits());
|
sl@0
|
113 |
seq.first->link(linker);
|
sl@0
|
114 |
|
sl@0
|
115 |
// bundle the regex information into a regex_impl object
|
sl@0
|
116 |
detail::regex_impl<BidiIter> impl;
|
sl@0
|
117 |
impl.xpr_ = seq.first;
|
sl@0
|
118 |
impl.traits_.reset(new RegexTraits(this->rxtraits()));
|
sl@0
|
119 |
impl.mark_count_ = this->mark_count_;
|
sl@0
|
120 |
impl.hidden_mark_count_ = this->hidden_mark_count_;
|
sl@0
|
121 |
|
sl@0
|
122 |
// optimization: get the peek chars OR the boyer-moore search string
|
sl@0
|
123 |
detail::optimize_regex(impl, this->rxtraits(), detail::is_random<BidiIter>());
|
sl@0
|
124 |
|
sl@0
|
125 |
return detail::core_access<BidiIter>::make_regex(impl);
|
sl@0
|
126 |
}
|
sl@0
|
127 |
|
sl@0
|
128 |
private:
|
sl@0
|
129 |
|
sl@0
|
130 |
typedef typename string_type::const_iterator string_iterator;
|
sl@0
|
131 |
typedef std::list<detail::sequence<BidiIter> > alternates_list;
|
sl@0
|
132 |
typedef detail::escape_value<char_type, char_class_type> escape_value;
|
sl@0
|
133 |
typedef detail::alternates_factory_impl<BidiIter, traits_type> alternates_factory;
|
sl@0
|
134 |
|
sl@0
|
135 |
///////////////////////////////////////////////////////////////////////////
|
sl@0
|
136 |
// reset
|
sl@0
|
137 |
/// INTERNAL ONLY
|
sl@0
|
138 |
void reset()
|
sl@0
|
139 |
{
|
sl@0
|
140 |
this->mark_count_ = 0;
|
sl@0
|
141 |
this->hidden_mark_count_ = 0;
|
sl@0
|
142 |
this->traits_.flags(regex_constants::ECMAScript);
|
sl@0
|
143 |
}
|
sl@0
|
144 |
|
sl@0
|
145 |
///////////////////////////////////////////////////////////////////////////
|
sl@0
|
146 |
// regex_traits
|
sl@0
|
147 |
/// INTERNAL ONLY
|
sl@0
|
148 |
traits_type &rxtraits()
|
sl@0
|
149 |
{
|
sl@0
|
150 |
return this->traits_.traits();
|
sl@0
|
151 |
}
|
sl@0
|
152 |
|
sl@0
|
153 |
///////////////////////////////////////////////////////////////////////////
|
sl@0
|
154 |
// regex_traits
|
sl@0
|
155 |
/// INTERNAL ONLY
|
sl@0
|
156 |
traits_type const &rxtraits() const
|
sl@0
|
157 |
{
|
sl@0
|
158 |
return this->traits_.traits();
|
sl@0
|
159 |
}
|
sl@0
|
160 |
|
sl@0
|
161 |
///////////////////////////////////////////////////////////////////////////
|
sl@0
|
162 |
// parse_alternates
|
sl@0
|
163 |
/// INTERNAL ONLY
|
sl@0
|
164 |
void parse_alternates(string_iterator &begin, string_iterator end, alternates_list &alternates)
|
sl@0
|
165 |
{
|
sl@0
|
166 |
using namespace regex_constants;
|
sl@0
|
167 |
string_iterator old_begin;
|
sl@0
|
168 |
|
sl@0
|
169 |
do
|
sl@0
|
170 |
{
|
sl@0
|
171 |
alternates.push_back(this->parse_sequence(begin, end));
|
sl@0
|
172 |
old_begin = begin;
|
sl@0
|
173 |
}
|
sl@0
|
174 |
while(begin != end && token_alternate == this->traits_.get_token(begin, end));
|
sl@0
|
175 |
|
sl@0
|
176 |
begin = old_begin;
|
sl@0
|
177 |
}
|
sl@0
|
178 |
|
sl@0
|
179 |
///////////////////////////////////////////////////////////////////////////
|
sl@0
|
180 |
// parse_group
|
sl@0
|
181 |
/// INTERNAL ONLY
|
sl@0
|
182 |
detail::sequence<BidiIter> parse_group(string_iterator &begin, string_iterator end)
|
sl@0
|
183 |
{
|
sl@0
|
184 |
using namespace regex_constants;
|
sl@0
|
185 |
int mark_nbr = 0;
|
sl@0
|
186 |
bool keeper = false;
|
sl@0
|
187 |
bool lookahead = false;
|
sl@0
|
188 |
bool lookbehind = false;
|
sl@0
|
189 |
bool negative = false;
|
sl@0
|
190 |
std::size_t old_mark_count = this->mark_count_;
|
sl@0
|
191 |
|
sl@0
|
192 |
detail::sequence<BidiIter> seq, seq_end;
|
sl@0
|
193 |
string_iterator tmp = string_iterator();
|
sl@0
|
194 |
|
sl@0
|
195 |
syntax_option_type old_flags = this->traits_.flags();
|
sl@0
|
196 |
|
sl@0
|
197 |
switch(this->traits_.get_group_type(begin, end))
|
sl@0
|
198 |
{
|
sl@0
|
199 |
case token_no_mark:
|
sl@0
|
200 |
// Don't process empty groups like (?:) or (?i)
|
sl@0
|
201 |
// BUGBUG this doesn't handle the degenerate (?:)+ correctly
|
sl@0
|
202 |
if(token_group_end == this->traits_.get_token(tmp = begin, end))
|
sl@0
|
203 |
{
|
sl@0
|
204 |
return this->parse_atom(begin = tmp, end);
|
sl@0
|
205 |
}
|
sl@0
|
206 |
break;
|
sl@0
|
207 |
|
sl@0
|
208 |
case token_negative_lookahead:
|
sl@0
|
209 |
negative = true; // fall-through
|
sl@0
|
210 |
case token_positive_lookahead:
|
sl@0
|
211 |
lookahead = true;
|
sl@0
|
212 |
seq_end = detail::make_dynamic_xpression<BidiIter>(detail::true_matcher());
|
sl@0
|
213 |
break;
|
sl@0
|
214 |
|
sl@0
|
215 |
case token_negative_lookbehind:
|
sl@0
|
216 |
negative = true; // fall-through
|
sl@0
|
217 |
case token_positive_lookbehind:
|
sl@0
|
218 |
lookbehind = true;
|
sl@0
|
219 |
seq_end = detail::make_dynamic_xpression<BidiIter>(detail::true_matcher());
|
sl@0
|
220 |
break;
|
sl@0
|
221 |
|
sl@0
|
222 |
case token_independent_sub_expression:
|
sl@0
|
223 |
keeper = true;
|
sl@0
|
224 |
seq_end = detail::make_dynamic_xpression<BidiIter>(detail::true_matcher());
|
sl@0
|
225 |
break;
|
sl@0
|
226 |
|
sl@0
|
227 |
case token_comment:
|
sl@0
|
228 |
while(detail::ensure(begin != end, error_paren, "mismatched parenthesis"))
|
sl@0
|
229 |
{
|
sl@0
|
230 |
switch(this->traits_.get_token(begin, end))
|
sl@0
|
231 |
{
|
sl@0
|
232 |
case token_group_end: return this->parse_atom(begin, end);
|
sl@0
|
233 |
case token_escape: detail::ensure(begin != end, error_escape, "incomplete escape sequence");
|
sl@0
|
234 |
case token_literal: ++begin;
|
sl@0
|
235 |
default:;
|
sl@0
|
236 |
}
|
sl@0
|
237 |
}
|
sl@0
|
238 |
break;
|
sl@0
|
239 |
|
sl@0
|
240 |
default:
|
sl@0
|
241 |
mark_nbr = static_cast<int>(++this->mark_count_);
|
sl@0
|
242 |
seq = detail::make_dynamic_xpression<BidiIter>(detail::mark_begin_matcher(mark_nbr));
|
sl@0
|
243 |
seq_end = detail::make_dynamic_xpression<BidiIter>(detail::mark_end_matcher(mark_nbr));
|
sl@0
|
244 |
break;
|
sl@0
|
245 |
}
|
sl@0
|
246 |
|
sl@0
|
247 |
// alternates
|
sl@0
|
248 |
alternates_list alternates;
|
sl@0
|
249 |
this->parse_alternates(begin, end, alternates);
|
sl@0
|
250 |
detail::ensure
|
sl@0
|
251 |
(
|
sl@0
|
252 |
begin != end && token_group_end == this->traits_.get_token(begin, end)
|
sl@0
|
253 |
, error_paren
|
sl@0
|
254 |
, "mismatched parenthesis"
|
sl@0
|
255 |
);
|
sl@0
|
256 |
|
sl@0
|
257 |
seq += detail::alternates_to_matchable(alternates, alternates_factory());
|
sl@0
|
258 |
seq += seq_end;
|
sl@0
|
259 |
|
sl@0
|
260 |
typedef shared_ptr<detail::matchable<BidiIter> const> xpr_type;
|
sl@0
|
261 |
bool do_save = (this->mark_count_ != old_mark_count);
|
sl@0
|
262 |
|
sl@0
|
263 |
if(lookahead)
|
sl@0
|
264 |
{
|
sl@0
|
265 |
detail::lookahead_matcher<xpr_type> lookahead(seq.first, negative, do_save);
|
sl@0
|
266 |
seq = detail::make_dynamic_xpression<BidiIter>(lookahead);
|
sl@0
|
267 |
}
|
sl@0
|
268 |
else if(lookbehind)
|
sl@0
|
269 |
{
|
sl@0
|
270 |
detail::lookbehind_matcher<xpr_type> lookbehind(seq.first, negative, do_save);
|
sl@0
|
271 |
seq = detail::make_dynamic_xpression<BidiIter>(lookbehind);
|
sl@0
|
272 |
}
|
sl@0
|
273 |
else if(keeper) // independent sub-expression
|
sl@0
|
274 |
{
|
sl@0
|
275 |
detail::keeper_matcher<xpr_type> keeper(seq.first, do_save);
|
sl@0
|
276 |
seq = detail::make_dynamic_xpression<BidiIter>(keeper);
|
sl@0
|
277 |
}
|
sl@0
|
278 |
|
sl@0
|
279 |
// restore the modifiers
|
sl@0
|
280 |
this->traits_.flags(old_flags);
|
sl@0
|
281 |
return seq;
|
sl@0
|
282 |
}
|
sl@0
|
283 |
|
sl@0
|
284 |
///////////////////////////////////////////////////////////////////////////
|
sl@0
|
285 |
// parse_charset
|
sl@0
|
286 |
/// INTERNAL ONLY
|
sl@0
|
287 |
detail::sequence<BidiIter> parse_charset(string_iterator &begin, string_iterator end)
|
sl@0
|
288 |
{
|
sl@0
|
289 |
detail::compound_charset<traits_type> chset;
|
sl@0
|
290 |
|
sl@0
|
291 |
// call out to a helper to actually parse the character set
|
sl@0
|
292 |
detail::parse_charset(begin, end, chset, this->traits_);
|
sl@0
|
293 |
|
sl@0
|
294 |
return detail::make_charset_xpression<BidiIter>
|
sl@0
|
295 |
(
|
sl@0
|
296 |
chset
|
sl@0
|
297 |
, this->rxtraits()
|
sl@0
|
298 |
, this->traits_.flags()
|
sl@0
|
299 |
);
|
sl@0
|
300 |
}
|
sl@0
|
301 |
|
sl@0
|
302 |
///////////////////////////////////////////////////////////////////////////
|
sl@0
|
303 |
// parse_atom
|
sl@0
|
304 |
/// INTERNAL ONLY
|
sl@0
|
305 |
detail::sequence<BidiIter> parse_atom(string_iterator &begin, string_iterator end)
|
sl@0
|
306 |
{
|
sl@0
|
307 |
using namespace regex_constants;
|
sl@0
|
308 |
escape_value esc = { 0, 0, 0, detail::escape_char };
|
sl@0
|
309 |
string_iterator old_begin = begin;
|
sl@0
|
310 |
|
sl@0
|
311 |
switch(this->traits_.get_token(begin, end))
|
sl@0
|
312 |
{
|
sl@0
|
313 |
case token_literal:
|
sl@0
|
314 |
return detail::make_literal_xpression<BidiIter>
|
sl@0
|
315 |
(
|
sl@0
|
316 |
this->parse_literal(begin, end), this->traits_.flags(), this->rxtraits()
|
sl@0
|
317 |
);
|
sl@0
|
318 |
|
sl@0
|
319 |
case token_any:
|
sl@0
|
320 |
return detail::make_any_xpression<BidiIter>(this->traits_.flags(), this->rxtraits());
|
sl@0
|
321 |
|
sl@0
|
322 |
case token_assert_begin_sequence:
|
sl@0
|
323 |
return detail::make_dynamic_xpression<BidiIter>(detail::assert_bos_matcher());
|
sl@0
|
324 |
|
sl@0
|
325 |
case token_assert_end_sequence:
|
sl@0
|
326 |
return detail::make_dynamic_xpression<BidiIter>(detail::assert_eos_matcher());
|
sl@0
|
327 |
|
sl@0
|
328 |
case token_assert_begin_line:
|
sl@0
|
329 |
return detail::make_assert_begin_line<BidiIter>(this->traits_.flags(), this->rxtraits());
|
sl@0
|
330 |
|
sl@0
|
331 |
case token_assert_end_line:
|
sl@0
|
332 |
return detail::make_assert_end_line<BidiIter>(this->traits_.flags(), this->rxtraits());
|
sl@0
|
333 |
|
sl@0
|
334 |
case token_assert_word_boundary:
|
sl@0
|
335 |
return detail::make_assert_word<BidiIter>(detail::word_boundary<true>(), this->rxtraits());
|
sl@0
|
336 |
|
sl@0
|
337 |
case token_assert_not_word_boundary:
|
sl@0
|
338 |
return detail::make_assert_word<BidiIter>(detail::word_boundary<false>(), this->rxtraits());
|
sl@0
|
339 |
|
sl@0
|
340 |
case token_assert_word_begin:
|
sl@0
|
341 |
return detail::make_assert_word<BidiIter>(detail::word_begin(), this->rxtraits());
|
sl@0
|
342 |
|
sl@0
|
343 |
case token_assert_word_end:
|
sl@0
|
344 |
return detail::make_assert_word<BidiIter>(detail::word_end(), this->rxtraits());
|
sl@0
|
345 |
|
sl@0
|
346 |
case token_escape:
|
sl@0
|
347 |
esc = this->parse_escape(begin, end);
|
sl@0
|
348 |
switch(esc.type_)
|
sl@0
|
349 |
{
|
sl@0
|
350 |
case detail::escape_mark:
|
sl@0
|
351 |
return detail::make_backref_xpression<BidiIter>
|
sl@0
|
352 |
(
|
sl@0
|
353 |
esc.mark_nbr_, this->traits_.flags(), this->rxtraits()
|
sl@0
|
354 |
);
|
sl@0
|
355 |
case detail::escape_char:
|
sl@0
|
356 |
return detail::make_char_xpression<BidiIter>
|
sl@0
|
357 |
(
|
sl@0
|
358 |
esc.ch_, this->traits_.flags(), this->rxtraits()
|
sl@0
|
359 |
);
|
sl@0
|
360 |
case detail::escape_class:
|
sl@0
|
361 |
return detail::make_posix_charset_xpression<BidiIter>
|
sl@0
|
362 |
(
|
sl@0
|
363 |
esc.class_
|
sl@0
|
364 |
, this->rxtraits().isctype(*begin++, this->upper_)
|
sl@0
|
365 |
, this->traits_.flags()
|
sl@0
|
366 |
, this->rxtraits()
|
sl@0
|
367 |
);
|
sl@0
|
368 |
}
|
sl@0
|
369 |
|
sl@0
|
370 |
case token_group_begin:
|
sl@0
|
371 |
return this->parse_group(begin, end);
|
sl@0
|
372 |
|
sl@0
|
373 |
case token_charset_begin:
|
sl@0
|
374 |
return this->parse_charset(begin, end);
|
sl@0
|
375 |
|
sl@0
|
376 |
case token_invalid_quantifier:
|
sl@0
|
377 |
throw regex_error(error_badrepeat, "quantifier not expected");
|
sl@0
|
378 |
|
sl@0
|
379 |
case token_quote_meta_begin:
|
sl@0
|
380 |
return detail::make_literal_xpression<BidiIter>
|
sl@0
|
381 |
(
|
sl@0
|
382 |
this->parse_quote_meta(begin, end), this->traits_.flags(), this->rxtraits()
|
sl@0
|
383 |
);
|
sl@0
|
384 |
|
sl@0
|
385 |
case token_quote_meta_end:
|
sl@0
|
386 |
throw regex_error
|
sl@0
|
387 |
(
|
sl@0
|
388 |
error_escape
|
sl@0
|
389 |
, "found quote-meta end without corresponding quote-meta begin"
|
sl@0
|
390 |
);
|
sl@0
|
391 |
|
sl@0
|
392 |
case token_end_of_pattern:
|
sl@0
|
393 |
break;
|
sl@0
|
394 |
|
sl@0
|
395 |
default:
|
sl@0
|
396 |
begin = old_begin;
|
sl@0
|
397 |
break;
|
sl@0
|
398 |
}
|
sl@0
|
399 |
|
sl@0
|
400 |
return detail::sequence<BidiIter>();
|
sl@0
|
401 |
}
|
sl@0
|
402 |
|
sl@0
|
403 |
///////////////////////////////////////////////////////////////////////////
|
sl@0
|
404 |
// parse_quant
|
sl@0
|
405 |
/// INTERNAL ONLY
|
sl@0
|
406 |
detail::sequence<BidiIter> parse_quant(string_iterator &begin, string_iterator end)
|
sl@0
|
407 |
{
|
sl@0
|
408 |
BOOST_ASSERT(begin != end);
|
sl@0
|
409 |
detail::quant_spec spec = { 0, 0, false };
|
sl@0
|
410 |
detail::sequence<BidiIter> seq = this->parse_atom(begin, end);
|
sl@0
|
411 |
|
sl@0
|
412 |
// BUGBUG this doesn't handle the degenerate (?:)+ correctly
|
sl@0
|
413 |
if(!seq.is_empty() && begin != end && seq.first->is_quantifiable())
|
sl@0
|
414 |
{
|
sl@0
|
415 |
if(this->traits_.get_quant_spec(begin, end, spec))
|
sl@0
|
416 |
{
|
sl@0
|
417 |
BOOST_ASSERT(spec.min_ <= spec.max_);
|
sl@0
|
418 |
|
sl@0
|
419 |
if(0 == spec.max_) // quant {0,0} is degenerate -- matches nothing.
|
sl@0
|
420 |
{
|
sl@0
|
421 |
seq = this->parse_quant(begin, end);
|
sl@0
|
422 |
}
|
sl@0
|
423 |
else
|
sl@0
|
424 |
{
|
sl@0
|
425 |
seq = seq.first->quantify(spec, this->hidden_mark_count_, seq, alternates_factory());
|
sl@0
|
426 |
}
|
sl@0
|
427 |
}
|
sl@0
|
428 |
}
|
sl@0
|
429 |
|
sl@0
|
430 |
return seq;
|
sl@0
|
431 |
}
|
sl@0
|
432 |
|
sl@0
|
433 |
///////////////////////////////////////////////////////////////////////////
|
sl@0
|
434 |
// parse_sequence
|
sl@0
|
435 |
/// INTERNAL ONLY
|
sl@0
|
436 |
detail::sequence<BidiIter> parse_sequence(string_iterator &begin, string_iterator end)
|
sl@0
|
437 |
{
|
sl@0
|
438 |
detail::sequence<BidiIter> seq;
|
sl@0
|
439 |
|
sl@0
|
440 |
while(begin != end)
|
sl@0
|
441 |
{
|
sl@0
|
442 |
detail::sequence<BidiIter> seq_quant = this->parse_quant(begin, end);
|
sl@0
|
443 |
|
sl@0
|
444 |
// did we find a quantified atom?
|
sl@0
|
445 |
if(seq_quant.is_empty())
|
sl@0
|
446 |
break;
|
sl@0
|
447 |
|
sl@0
|
448 |
// chain it to the end of the xpression sequence
|
sl@0
|
449 |
seq += seq_quant;
|
sl@0
|
450 |
}
|
sl@0
|
451 |
|
sl@0
|
452 |
return seq;
|
sl@0
|
453 |
}
|
sl@0
|
454 |
|
sl@0
|
455 |
///////////////////////////////////////////////////////////////////////////
|
sl@0
|
456 |
// parse_literal
|
sl@0
|
457 |
// scan ahead looking for char literals to be globbed together into a string literal
|
sl@0
|
458 |
/// INTERNAL ONLY
|
sl@0
|
459 |
string_type parse_literal(string_iterator &begin, string_iterator end)
|
sl@0
|
460 |
{
|
sl@0
|
461 |
using namespace regex_constants;
|
sl@0
|
462 |
BOOST_ASSERT(begin != end);
|
sl@0
|
463 |
BOOST_ASSERT(token_literal == this->traits_.get_token(begin, end));
|
sl@0
|
464 |
escape_value esc = { 0, 0, 0, detail::escape_char };
|
sl@0
|
465 |
string_type literal(1, *begin);
|
sl@0
|
466 |
|
sl@0
|
467 |
for(string_iterator prev = begin, tmp = ++begin; begin != end; prev = begin, begin = tmp)
|
sl@0
|
468 |
{
|
sl@0
|
469 |
detail::quant_spec spec;
|
sl@0
|
470 |
if(this->traits_.get_quant_spec(tmp, end, spec))
|
sl@0
|
471 |
{
|
sl@0
|
472 |
if(literal.size() != 1)
|
sl@0
|
473 |
{
|
sl@0
|
474 |
begin = prev;
|
sl@0
|
475 |
literal.erase(literal.size() - 1);
|
sl@0
|
476 |
}
|
sl@0
|
477 |
return literal;
|
sl@0
|
478 |
}
|
sl@0
|
479 |
else switch(this->traits_.get_token(tmp, end))
|
sl@0
|
480 |
{
|
sl@0
|
481 |
case token_escape:
|
sl@0
|
482 |
esc = this->parse_escape(tmp, end);
|
sl@0
|
483 |
if(detail::escape_char != esc.type_) return literal;
|
sl@0
|
484 |
literal += esc.ch_;
|
sl@0
|
485 |
break;
|
sl@0
|
486 |
case token_literal:
|
sl@0
|
487 |
literal += *tmp++;
|
sl@0
|
488 |
break;
|
sl@0
|
489 |
default:
|
sl@0
|
490 |
return literal;
|
sl@0
|
491 |
}
|
sl@0
|
492 |
}
|
sl@0
|
493 |
|
sl@0
|
494 |
return literal;
|
sl@0
|
495 |
}
|
sl@0
|
496 |
|
sl@0
|
497 |
///////////////////////////////////////////////////////////////////////////
|
sl@0
|
498 |
// parse_quote_meta
|
sl@0
|
499 |
// scan ahead looking for char literals to be globbed together into a string literal
|
sl@0
|
500 |
/// INTERNAL ONLY
|
sl@0
|
501 |
string_type parse_quote_meta(string_iterator &begin, string_iterator end)
|
sl@0
|
502 |
{
|
sl@0
|
503 |
using namespace regex_constants;
|
sl@0
|
504 |
string_iterator old_begin = begin, old_end;
|
sl@0
|
505 |
while(end != (old_end = begin))
|
sl@0
|
506 |
{
|
sl@0
|
507 |
switch(this->traits_.get_token(begin, end))
|
sl@0
|
508 |
{
|
sl@0
|
509 |
case token_quote_meta_end: return string_type(old_begin, old_end);
|
sl@0
|
510 |
case token_escape: detail::ensure(begin != end, error_escape, "incomplete escape sequence");
|
sl@0
|
511 |
case token_literal: ++begin;
|
sl@0
|
512 |
default:;
|
sl@0
|
513 |
}
|
sl@0
|
514 |
}
|
sl@0
|
515 |
return string_type(old_begin, begin);
|
sl@0
|
516 |
}
|
sl@0
|
517 |
|
sl@0
|
518 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
519 |
// parse_escape
|
sl@0
|
520 |
/// INTERNAL ONLY
|
sl@0
|
521 |
escape_value parse_escape(string_iterator &begin, string_iterator end)
|
sl@0
|
522 |
{
|
sl@0
|
523 |
detail::ensure(begin != end, regex_constants::error_escape, "incomplete escape sequence");
|
sl@0
|
524 |
|
sl@0
|
525 |
// first, check to see if this can be a backreference
|
sl@0
|
526 |
if(0 < this->rxtraits().value(*begin, 10))
|
sl@0
|
527 |
{
|
sl@0
|
528 |
// Parse at most 3 decimal digits.
|
sl@0
|
529 |
string_iterator tmp = begin;
|
sl@0
|
530 |
int mark_nbr = detail::toi(tmp, end, this->rxtraits(), 10, 999);
|
sl@0
|
531 |
|
sl@0
|
532 |
// If the resulting number could conceivably be a backref, then it is.
|
sl@0
|
533 |
if(10 > mark_nbr || mark_nbr <= static_cast<int>(this->mark_count_))
|
sl@0
|
534 |
{
|
sl@0
|
535 |
begin = tmp;
|
sl@0
|
536 |
escape_value esc = {0, mark_nbr, 0, detail::escape_mark};
|
sl@0
|
537 |
return esc;
|
sl@0
|
538 |
}
|
sl@0
|
539 |
}
|
sl@0
|
540 |
|
sl@0
|
541 |
// Not a backreference, defer to the parse_escape helper
|
sl@0
|
542 |
return detail::parse_escape(begin, end, this->traits_);
|
sl@0
|
543 |
}
|
sl@0
|
544 |
|
sl@0
|
545 |
std::size_t mark_count_;
|
sl@0
|
546 |
std::size_t hidden_mark_count_;
|
sl@0
|
547 |
CompilerTraits traits_;
|
sl@0
|
548 |
typename RegexTraits::char_class_type upper_;
|
sl@0
|
549 |
};
|
sl@0
|
550 |
|
sl@0
|
551 |
}} // namespace boost::xpressive
|
sl@0
|
552 |
|
sl@0
|
553 |
#endif
|