Update contrib.
1 ///////////////////////////////////////////////////////////////////////////////
2 /// \file regex_primitives.hpp
3 /// Contains the syntax elements for writing static regular expressions.
5 // Copyright 2004 Eric Niebler. Distributed under the Boost
6 // Software License, Version 1.0. (See accompanying file
7 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 #ifndef BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005
10 #define BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005
13 #include <boost/mpl/assert.hpp>
14 #include <boost/preprocessor/cat.hpp>
15 #include <boost/xpressive/proto/proto.hpp>
16 #include <boost/xpressive/detail/detail_fwd.hpp>
17 #include <boost/xpressive/detail/core/icase.hpp>
18 #include <boost/xpressive/detail/core/action.hpp>
19 #include <boost/xpressive/detail/core/matchers.hpp>
20 #include <boost/xpressive/detail/static/as_xpr.hpp>
21 #include <boost/xpressive/detail/static/compile.hpp>
22 #include <boost/xpressive/detail/static/modifier.hpp>
23 #include <boost/xpressive/detail/static/regex_operators.hpp>
24 #include <boost/xpressive/detail/static/productions/productions.hpp>
26 namespace boost { namespace xpressive { namespace detail
29 typedef assert_word_placeholder<word_boundary<true> > assert_word_boundary;
30 typedef assert_word_placeholder<word_begin> assert_word_begin;
31 typedef assert_word_placeholder<word_end> assert_word_end;
34 ///////////////////////////////////////////////////////////////////////////////
36 // BOOST_XPRESSIVE_GLOBAL
37 // for defining globals that neither violate the One Definition Rule nor
38 // lead to undefined behavior due to global object initialization order.
39 //#define BOOST_XPRESSIVE_GLOBAL(type, name, init) \
42 // template<int Dummy> \
43 // struct BOOST_PP_CAT(global_pod_, name) \
45 // static type const value; \
47 // union type_must_be_pod \
53 // template<int Dummy> \
54 // type const BOOST_PP_CAT(global_pod_, name)<Dummy>::value = init; \
56 // type const &name = detail::BOOST_PP_CAT(global_pod_, name)<0>::value
61 /// INTERNAL ONLY (for backwards compatibility)
62 unsigned int const repeat_max = UINT_MAX-1;
64 ///////////////////////////////////////////////////////////////////////////////
65 /// \brief For infinite repetition of a sub-expression.
67 /// Magic value used with the repeat\<\>() function template
68 /// to specify an unbounded repeat. Use as: repeat<17, inf>('a').
69 /// The equivalent in perl is /a{17,}/.
70 unsigned int const inf = UINT_MAX-1;
72 /// INTERNAL ONLY (for backwards compatibility)
74 proto::unary_op<detail::epsilon_matcher, proto::noop_tag>
77 ///////////////////////////////////////////////////////////////////////////////
78 /// \brief Successfully matches nothing.
80 /// Successfully matches a zero-width sequence. nil always succeeds and
81 /// never consumes any characters.
83 proto::unary_op<detail::epsilon_matcher, proto::noop_tag>
86 ///////////////////////////////////////////////////////////////////////////////
87 /// \brief Matches an alpha-numeric character.
89 /// The regex traits are used to determine which characters are alpha-numeric.
90 /// To match any character that is not alpha-numeric, use ~alnum.
92 /// \attention alnum is equivalent to /[[:alnum:]]/ in perl. ~alnum is equivalent
93 /// to /[[:^alnum:]]/ in perl.
95 proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
97 > const alnum = {"alnum"};
99 ///////////////////////////////////////////////////////////////////////////////
100 /// \brief Matches an alphabetic character.
102 /// The regex traits are used to determine which characters are alphabetic.
103 /// To match any character that is not alphabetic, use ~alpha.
105 /// \attention alpha is equivalent to /[[:alpha:]]/ in perl. ~alpha is equivalent
106 /// to /[[:^alpha:]]/ in perl.
108 proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
110 > const alpha = {"alpha"};
112 ///////////////////////////////////////////////////////////////////////////////
113 /// \brief Matches a blank (horizonal white-space) character.
115 /// The regex traits are used to determine which characters are blank characters.
116 /// To match any character that is not blank, use ~blank.
118 /// \attention blank is equivalent to /[[:blank:]]/ in perl. ~blank is equivalent
119 /// to /[[:^blank:]]/ in perl.
121 proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
123 > const blank = {"blank"};
125 ///////////////////////////////////////////////////////////////////////////////
126 /// \brief Matches a control character.
128 /// The regex traits are used to determine which characters are control characters.
129 /// To match any character that is not a control character, use ~cntrl.
131 /// \attention cntrl is equivalent to /[[:cntrl:]]/ in perl. ~cntrl is equivalent
132 /// to /[[:^cntrl:]]/ in perl.
134 proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
136 > const cntrl = {"cntrl"};
138 ///////////////////////////////////////////////////////////////////////////////
139 /// \brief Matches a digit character.
141 /// The regex traits are used to determine which characters are digits.
142 /// To match any character that is not a digit, use ~digit.
144 /// \attention digit is equivalent to /[[:digit:]]/ in perl. ~digit is equivalent
145 /// to /[[:^digit:]]/ in perl.
147 proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
149 > const digit = {"digit"};
151 ///////////////////////////////////////////////////////////////////////////////
152 /// \brief Matches a graph character.
154 /// The regex traits are used to determine which characters are graphable.
155 /// To match any character that is not graphable, use ~graph.
157 /// \attention graph is equivalent to /[[:graph:]]/ in perl. ~graph is equivalent
158 /// to /[[:^graph:]]/ in perl.
160 proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
162 > const graph = {"graph"};
164 ///////////////////////////////////////////////////////////////////////////////
165 /// \brief Matches a lower-case character.
167 /// The regex traits are used to determine which characters are lower-case.
168 /// To match any character that is not a lower-case character, use ~lower.
170 /// \attention lower is equivalent to /[[:lower:]]/ in perl. ~lower is equivalent
171 /// to /[[:^lower:]]/ in perl.
173 proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
175 > const lower = {"lower"};
177 ///////////////////////////////////////////////////////////////////////////////
178 /// \brief Matches a printable character.
180 /// The regex traits are used to determine which characters are printable.
181 /// To match any character that is not printable, use ~print.
183 /// \attention print is equivalent to /[[:print:]]/ in perl. ~print is equivalent
184 /// to /[[:^print:]]/ in perl.
186 proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
188 > const print = {"print"};
190 ///////////////////////////////////////////////////////////////////////////////
191 /// \brief Matches a punctuation character.
193 /// The regex traits are used to determine which characters are punctuation.
194 /// To match any character that is not punctuation, use ~punct.
196 /// \attention punct is equivalent to /[[:punct:]]/ in perl. ~punct is equivalent
197 /// to /[[:^punct:]]/ in perl.
199 proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
201 > const punct = {"punct"};
203 ///////////////////////////////////////////////////////////////////////////////
204 /// \brief Matches a space character.
206 /// The regex traits are used to determine which characters are space characters.
207 /// To match any character that is not white-space, use ~space.
209 /// \attention space is equivalent to /[[:space:]]/ in perl. ~space is equivalent
210 /// to /[[:^space:]]/ in perl.
212 proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
214 > const space = {"space"};
216 ///////////////////////////////////////////////////////////////////////////////
217 /// \brief Matches an upper-case character.
219 /// The regex traits are used to determine which characters are upper-case.
220 /// To match any character that is not upper-case, use ~upper.
222 /// \attention upper is equivalent to /[[:upper:]]/ in perl. ~upper is equivalent
223 /// to /[[:^upper:]]/ in perl.
225 proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
227 > const upper = {"upper"};
229 ///////////////////////////////////////////////////////////////////////////////
230 /// \brief Matches a hexadecimal digit character.
232 /// The regex traits are used to determine which characters are hex digits.
233 /// To match any character that is not a hex digit, use ~xdigit.
235 /// \attention xdigit is equivalent to /[[:xdigit:]]/ in perl. ~xdigit is equivalent
236 /// to /[[:^xdigit:]]/ in perl.
238 proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
240 > const xdigit = {"xdigit"};
242 ///////////////////////////////////////////////////////////////////////////////
243 /// \brief Beginning of sequence assertion.
245 /// For the character sequence [begin, end), 'bos' matches the
246 /// zero-width sub-sequence [begin, begin).
248 proto::unary_op<detail::assert_bos_matcher, proto::noop_tag>
251 ///////////////////////////////////////////////////////////////////////////////
252 /// \brief End of sequence assertion.
254 /// For the character sequence [begin, end),
255 /// 'eos' matches the zero-width sub-sequence [end, end).
257 /// \attention Unlike the perl end of sequence assertion \$, 'eos' will
258 /// not match at the position [end-1, end-1) if *(end-1) is '\\n'. To
259 /// get that behavior, use (!_n >> eos).
261 proto::unary_op<detail::assert_eos_matcher, proto::noop_tag>
264 ///////////////////////////////////////////////////////////////////////////////
265 /// \brief Beginning of line assertion.
267 /// 'bol' matches the zero-width sub-sequence
268 /// immediately following a logical newline sequence. The regex traits
269 /// is used to determine what constitutes a logical newline sequence.
271 proto::unary_op<detail::assert_bol_placeholder, proto::noop_tag>
274 ///////////////////////////////////////////////////////////////////////////////
275 /// \brief End of line assertion.
277 /// 'eol' matches the zero-width sub-sequence
278 /// immediately preceeding a logical newline sequence. The regex traits
279 /// is used to determine what constitutes a logical newline sequence.
281 proto::unary_op<detail::assert_eol_placeholder, proto::noop_tag>
284 ///////////////////////////////////////////////////////////////////////////////
285 /// \brief Beginning of word assertion.
287 /// 'bow' matches the zero-width sub-sequence
288 /// immediately following a non-word character and preceeding a word character.
289 /// The regex traits are used to determine what constitutes a word character.
291 proto::unary_op<detail::assert_word_begin, proto::noop_tag>
294 ///////////////////////////////////////////////////////////////////////////////
295 /// \brief End of word assertion.
297 /// 'eow' matches the zero-width sub-sequence
298 /// immediately following a word character and preceeding a non-word character.
299 /// The regex traits are used to determine what constitutes a word character.
301 proto::unary_op<detail::assert_word_end, proto::noop_tag>
304 ///////////////////////////////////////////////////////////////////////////////
305 /// \brief Word boundary assertion.
307 /// '_b' matches the zero-width sub-sequence at the beginning or the end of a word.
308 /// It is equivalent to (bow | eow). The regex traits are used to determine what
309 /// constitutes a word character. To match a non-word boundary, use ~_b.
311 /// \attention _b is like \\b in perl. ~_b is like \\B in perl.
313 proto::unary_op<detail::assert_word_boundary, proto::noop_tag>
316 ///////////////////////////////////////////////////////////////////////////////
317 /// \brief Matches a word character.
319 /// '_w' matches a single word character. The regex traits are used to determine which
320 /// characters are word characters. Use ~_w to match a character that is not a word
323 /// \attention _w is like \\w in perl. ~_w is like \\W in perl.
325 proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
329 ///////////////////////////////////////////////////////////////////////////////
330 /// \brief Matches a digit character.
332 /// '_d' matches a single digit character. The regex traits are used to determine which
333 /// characters are digits. Use ~_d to match a character that is not a digit
336 /// \attention _d is like \\d in perl. ~_d is like \\D in perl.
338 proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
342 ///////////////////////////////////////////////////////////////////////////////
343 /// \brief Matches a space character.
345 /// '_s' matches a single space character. The regex traits are used to determine which
346 /// characters are space characters. Use ~_s to match a character that is not a space
349 /// \attention _s is like \\s in perl. ~_s is like \\S in perl.
351 proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
355 ///////////////////////////////////////////////////////////////////////////////
356 /// \brief Matches a literal newline character, '\\n'.
358 /// '_n' matches a single newline character, '\\n'. Use ~_n to match a character
359 /// that is not a newline.
361 /// \attention ~_n is like '.' in perl without the /s modifier.
363 proto::unary_op<detail::literal_placeholder<char>, proto::noop_tag>
367 ///////////////////////////////////////////////////////////////////////////////
368 /// \brief Matches a logical newline sequence.
370 /// '_ln' matches a logical newline sequence. This can be any character in the
371 /// line separator class, as determined by the regex traits, or the '\\r\\n' sequence.
372 /// For the purpose of back-tracking, '\\r\\n' is treated as a unit.
373 /// To match any one character that is not a logical newline, use ~_ln.
375 detail::logical_newline_xpression
378 ///////////////////////////////////////////////////////////////////////////////
379 /// \brief Matches any one character.
381 /// Match any character, similar to '.' in perl syntax with the /s modifier.
382 /// '_' matches any one character, including the newline.
384 /// \attention To match any character except the newline, use ~_n
386 proto::unary_op<detail::any_matcher, proto::noop_tag>
389 ///////////////////////////////////////////////////////////////////////////////
390 /// \brief Reference to the current regex object
392 /// Useful when constructing recursive regular expression objects. The 'self'
393 /// identifier is a short-hand for the current regex object. For instance,
394 /// sregex rx = '(' >> (self | nil) >> ')'; will create a regex object that
395 /// matches balanced parens such as "((()))".
397 proto::unary_op<detail::self_placeholder, proto::noop_tag>
400 ///////////////////////////////////////////////////////////////////////////////
401 /// \brief Used to create character sets.
403 /// There are two ways to create character sets with the 'set' identifier. The
404 /// easiest is to create a comma-separated list of the characters in the set,
405 /// as in (set= 'a','b','c'). This set will match 'a', 'b', or 'c'. The other
406 /// way is to define the set as an argument to the set subscript operator.
407 /// For instance, set[ 'a' | range('b','c') | digit ] will match an 'a', 'b',
408 /// 'c' or a digit character.
410 /// To complement a set, apply the '~' operator. For instance, ~(set= 'a','b','c')
411 /// will match any character that is not an 'a', 'b', or 'c'.
413 /// Sets can be composed of other, possibly complemented, sets. For instance,
414 /// set[ ~digit | ~(set= 'a','b','c') ].
416 detail::set_initializer_type
419 ///////////////////////////////////////////////////////////////////////////////
420 /// \brief Sub-match placeholder, like $& in Perl
421 proto::op_proxy<detail::mark_tag, int> const s0 = {0};
423 ///////////////////////////////////////////////////////////////////////////////
424 /// \brief Sub-match placeholder, like $1 in perl.
426 /// To create a sub-match, assign a sub-expression to the sub-match placeholder.
427 /// For instance, (s1= _) will match any one character and remember which
428 /// character was matched in the 1st sub-match. Later in the pattern, you can
429 /// refer back to the sub-match. For instance, (s1= _) >> s1 will match any
430 /// character, and then match the same character again.
432 /// After a successful regex_match() or regex_search(), the sub-match placeholders
433 /// can be used to index into the match_results\<\> object to retrieve the Nth
435 proto::op_proxy<detail::mark_tag, int> const s1 = {1};
436 proto::op_proxy<detail::mark_tag, int> const s2 = {2};
437 proto::op_proxy<detail::mark_tag, int> const s3 = {3};
438 proto::op_proxy<detail::mark_tag, int> const s4 = {4};
439 proto::op_proxy<detail::mark_tag, int> const s5 = {5};
440 proto::op_proxy<detail::mark_tag, int> const s6 = {6};
441 proto::op_proxy<detail::mark_tag, int> const s7 = {7};
442 proto::op_proxy<detail::mark_tag, int> const s8 = {8};
443 proto::op_proxy<detail::mark_tag, int> const s9 = {9};
445 // NOTE: For the purpose of xpressive's documentation, make icase() look like an
446 // ordinary function. In reality, it is a function object defined in detail/icase.hpp
447 // so that it can serve double-duty as regex_constants::icase, the syntax_option_type.
448 // Do the same for as_xpr(), which is actually defined in detail/static/as_xpr.hpp
449 #ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED
450 ///////////////////////////////////////////////////////////////////////////////
451 /// \brief Makes a literal into a regular expression.
453 /// Use as_xpr() to turn a literal into a regular expression. For instance,
454 /// "foo" >> "bar" will not compile because both operands to the right-shift
455 /// operator are const char*, and no such operator exists. Use as_xpr("foo") >> "bar"
458 /// You can use as_xpr() with character literals in addition to string literals.
459 /// For instance, as_xpr('a') will match an 'a'. You can also complement a
460 /// character literal, as with ~as_xpr('a'). This will match any one character
461 /// that is not an 'a'.
462 template<typename Literal>
463 inline typename detail::as_xpr_type<Literal>::const_reference
464 as_xpr(Literal const &literal)
466 return detail::as_xpr_type<Literal>::call(xpr);
469 ///////////////////////////////////////////////////////////////////////////////
470 /// \brief Makes a sub-expression case-insensitive.
472 /// Use icase() to make a sub-expression case-insensitive. For instance,
473 /// "foo" >> icase(set['b'] >> "ar") will match "foo" exactly followed by
474 /// "bar" irrespective of case.
475 template<typename Xpr>
476 inline proto::binary_op<detail::icase_modifier, typename detail::as_xpr_type<Xpr>::type, modifier_tag> const
477 icase(Xpr const &xpr)
479 detail::icase_modifier mod;
480 return proto::make_op<modifier_tag>(mod, as_xpr(xpr));
484 ///////////////////////////////////////////////////////////////////////////////
485 /// \brief Embed a regex object by reference.
487 /// \param rex The basic_regex object to embed by reference.
488 template<typename BidiIter>
489 inline proto::unary_op<detail::regex_placeholder<BidiIter, true>, proto::noop_tag> const
490 by_ref(basic_regex<BidiIter> const &rex)
492 typedef detail::core_access<BidiIter> access;
493 shared_ptr<detail::regex_impl<BidiIter> > impl = access::get_regex_impl(rex);
494 return proto::noop(detail::regex_placeholder<BidiIter, true>(impl));
497 ///////////////////////////////////////////////////////////////////////////////
498 /// \brief Match a range of characters.
500 /// Match any character in the range [ch_min, ch_max].
502 /// \param ch_min The lower end of the range to match.
503 /// \param ch_max The upper end of the range to match.
504 template<typename Char>
505 inline proto::unary_op<detail::range_placeholder<Char>, proto::noop_tag> const
506 range(Char ch_min, Char ch_max)
508 return proto::noop(detail::range_placeholder<Char>(ch_min, ch_max));
511 ///////////////////////////////////////////////////////////////////////////////
512 /// \brief Make a sub-expression optional. Equivalent to !as_xpr(xpr).
514 /// \param xpr The sub-expression to make optional.
515 template<typename Xpr>
516 inline proto::unary_op
518 typename detail::as_xpr_type<Xpr>::type
519 , proto::logical_not_tag
521 optional(Xpr const &xpr)
526 ///////////////////////////////////////////////////////////////////////////////
527 /// \brief Repeat a sub-expression multiple times.
529 /// There are two forms of the repeat\<\>() function template. To match a
530 /// sub-expression N times, use repeat\<N\>(xpr). To match a sub-expression
531 /// from M to N times, use repeat\<M,N\>(xpr).
533 /// The repeat\<\>() function creates a greedy quantifier. To make the quantifier
534 /// non-greedy, apply the unary minus operator, as in -repeat\<M,N\>(xpr).
536 /// \param xpr The sub-expression to repeat.
537 template<unsigned int Min, unsigned int Max, typename Xpr>
538 inline proto::unary_op
540 typename detail::as_xpr_type<Xpr>::type
541 , detail::generic_quant_tag<Min, Max>
543 repeat(Xpr const &xpr)
545 return proto::make_op<detail::generic_quant_tag<Min, Max> >(as_xpr(xpr));
549 template<unsigned int Count, typename Xpr2>
550 inline proto::unary_op
552 typename detail::as_xpr_type<Xpr2>::type
553 , detail::generic_quant_tag<Count, Count>
555 repeat(Xpr2 const &xpr)
557 return proto::make_op<detail::generic_quant_tag<Count, Count> >(as_xpr(xpr));
560 ///////////////////////////////////////////////////////////////////////////////
561 /// \brief Create an independent sub-expression.
563 /// Turn off back-tracking for a sub-expression. Any branches or repeats within
564 /// the sub-expression will match only one way, and no other alternatives are
567 /// \attention keep(xpr) is equivalent to the perl (?>...) extension.
569 /// \param xpr The sub-expression to modify.
570 template<typename Xpr>
571 inline proto::unary_op
573 typename detail::as_xpr_type<Xpr>::type
578 return proto::make_op<detail::keeper_tag>(as_xpr(xpr));
581 ///////////////////////////////////////////////////////////////////////////////
582 /// \brief Look-ahead assertion.
584 /// before(xpr) succeeds if the xpr sub-expression would match at the current
585 /// position in the sequence, but xpr is not included in the match. For instance,
586 /// before("foo") succeeds if we are before a "foo". Look-ahead assertions can be
587 /// negated with the bit-compliment operator.
589 /// \attention before(xpr) is equivalent to the perl (?=...) extension.
590 /// ~before(xpr) is a negative look-ahead assertion, equivalent to the
591 /// perl (?!...) extension.
593 /// \param xpr The sub-expression to put in the look-ahead assertion.
594 template<typename Xpr>
595 inline proto::unary_op
597 typename detail::as_xpr_type<Xpr>::type
598 , detail::lookahead_tag<true>
600 before(Xpr const &xpr)
602 return proto::make_op<detail::lookahead_tag<true> >(as_xpr(xpr));
605 ///////////////////////////////////////////////////////////////////////////////
606 /// \brief Look-behind assertion.
608 /// after(xpr) succeeds if the xpr sub-expression would match at the current
609 /// position minus N in the sequence, where N is the width of xpr. xpr is not included in
610 /// the match. For instance, after("foo") succeeds if we are after a "foo". Look-behind
611 /// assertions can be negated with the bit-complement operator.
613 /// \attention after(xpr) is equivalent to the perl (?<=...) extension.
614 /// ~after(xpr) is a negative look-behind assertion, equivalent to the
615 /// perl (?<!...) extension.
617 /// \param xpr The sub-expression to put in the look-ahead assertion.
619 /// \pre xpr cannot match a variable number of characters.
620 template<typename Xpr>
621 inline proto::unary_op
623 typename detail::as_xpr_type<Xpr>::type
624 , detail::lookbehind_tag<true>
626 after(Xpr const &xpr)
628 return proto::make_op<detail::lookbehind_tag<true> >(as_xpr(xpr));
631 ///////////////////////////////////////////////////////////////////////////////
632 /// \brief Specify a regex traits or a std::locale.
634 /// imbue() instructs the regex engine to use the specified traits or locale
635 /// when matching the regex. The entire expression must use the same traits/locale.
636 /// For instance, the following specifies a locale for use with a regex:
638 /// sregex rx = imbue(loc)(+digit);
640 /// \param loc The std::locale or regex traits object.
641 template<typename Locale>
642 inline detail::modifier_op<detail::locale_modifier<Locale> > const
643 imbue(Locale const &loc)
645 detail::modifier_op<detail::locale_modifier<Locale> > mod =
647 detail::locale_modifier<Locale>(loc)
648 , regex_constants::ECMAScript
653 }} // namespace boost::xpressive