sl@0
|
1 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
2 |
/// \file regex_primitives.hpp
|
sl@0
|
3 |
/// Contains the syntax elements for writing static regular expressions.
|
sl@0
|
4 |
//
|
sl@0
|
5 |
// Copyright 2004 Eric Niebler. Distributed under the Boost
|
sl@0
|
6 |
// Software License, Version 1.0. (See accompanying file
|
sl@0
|
7 |
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
sl@0
|
8 |
|
sl@0
|
9 |
#ifndef BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005
|
sl@0
|
10 |
#define BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005
|
sl@0
|
11 |
|
sl@0
|
12 |
#include <climits>
|
sl@0
|
13 |
#include <boost/mpl/assert.hpp>
|
sl@0
|
14 |
#include <boost/preprocessor/cat.hpp>
|
sl@0
|
15 |
#include <boost/xpressive/proto/proto.hpp>
|
sl@0
|
16 |
#include <boost/xpressive/detail/detail_fwd.hpp>
|
sl@0
|
17 |
#include <boost/xpressive/detail/core/icase.hpp>
|
sl@0
|
18 |
#include <boost/xpressive/detail/core/action.hpp>
|
sl@0
|
19 |
#include <boost/xpressive/detail/core/matchers.hpp>
|
sl@0
|
20 |
#include <boost/xpressive/detail/static/as_xpr.hpp>
|
sl@0
|
21 |
#include <boost/xpressive/detail/static/compile.hpp>
|
sl@0
|
22 |
#include <boost/xpressive/detail/static/modifier.hpp>
|
sl@0
|
23 |
#include <boost/xpressive/detail/static/regex_operators.hpp>
|
sl@0
|
24 |
#include <boost/xpressive/detail/static/productions/productions.hpp>
|
sl@0
|
25 |
|
sl@0
|
26 |
namespace boost { namespace xpressive { namespace detail
|
sl@0
|
27 |
{
|
sl@0
|
28 |
|
sl@0
|
29 |
typedef assert_word_placeholder<word_boundary<true> > assert_word_boundary;
|
sl@0
|
30 |
typedef assert_word_placeholder<word_begin> assert_word_begin;
|
sl@0
|
31 |
typedef assert_word_placeholder<word_end> assert_word_end;
|
sl@0
|
32 |
|
sl@0
|
33 |
/*
|
sl@0
|
34 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
35 |
/// INTERNAL ONLY
|
sl@0
|
36 |
// BOOST_XPRESSIVE_GLOBAL
|
sl@0
|
37 |
// for defining globals that neither violate the One Definition Rule nor
|
sl@0
|
38 |
// lead to undefined behavior due to global object initialization order.
|
sl@0
|
39 |
//#define BOOST_XPRESSIVE_GLOBAL(type, name, init) \
|
sl@0
|
40 |
// namespace detail \
|
sl@0
|
41 |
// { \
|
sl@0
|
42 |
// template<int Dummy> \
|
sl@0
|
43 |
// struct BOOST_PP_CAT(global_pod_, name) \
|
sl@0
|
44 |
// { \
|
sl@0
|
45 |
// static type const value; \
|
sl@0
|
46 |
// private: \
|
sl@0
|
47 |
// union type_must_be_pod \
|
sl@0
|
48 |
// { \
|
sl@0
|
49 |
// type t; \
|
sl@0
|
50 |
// char ch; \
|
sl@0
|
51 |
// } u; \
|
sl@0
|
52 |
// }; \
|
sl@0
|
53 |
// template<int Dummy> \
|
sl@0
|
54 |
// type const BOOST_PP_CAT(global_pod_, name)<Dummy>::value = init; \
|
sl@0
|
55 |
// } \
|
sl@0
|
56 |
// type const &name = detail::BOOST_PP_CAT(global_pod_, name)<0>::value
|
sl@0
|
57 |
*/
|
sl@0
|
58 |
|
sl@0
|
59 |
} // namespace detail
|
sl@0
|
60 |
|
sl@0
|
61 |
/// INTERNAL ONLY (for backwards compatibility)
|
sl@0
|
62 |
unsigned int const repeat_max = UINT_MAX-1;
|
sl@0
|
63 |
|
sl@0
|
64 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
65 |
/// \brief For infinite repetition of a sub-expression.
|
sl@0
|
66 |
///
|
sl@0
|
67 |
/// Magic value used with the repeat\<\>() function template
|
sl@0
|
68 |
/// to specify an unbounded repeat. Use as: repeat<17, inf>('a').
|
sl@0
|
69 |
/// The equivalent in perl is /a{17,}/.
|
sl@0
|
70 |
unsigned int const inf = UINT_MAX-1;
|
sl@0
|
71 |
|
sl@0
|
72 |
/// INTERNAL ONLY (for backwards compatibility)
|
sl@0
|
73 |
proto::op_proxy<
|
sl@0
|
74 |
proto::unary_op<detail::epsilon_matcher, proto::noop_tag>
|
sl@0
|
75 |
> const epsilon = {};
|
sl@0
|
76 |
|
sl@0
|
77 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
78 |
/// \brief Successfully matches nothing.
|
sl@0
|
79 |
///
|
sl@0
|
80 |
/// Successfully matches a zero-width sequence. nil always succeeds and
|
sl@0
|
81 |
/// never consumes any characters.
|
sl@0
|
82 |
proto::op_proxy<
|
sl@0
|
83 |
proto::unary_op<detail::epsilon_matcher, proto::noop_tag>
|
sl@0
|
84 |
> const nil = {};
|
sl@0
|
85 |
|
sl@0
|
86 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
87 |
/// \brief Matches an alpha-numeric character.
|
sl@0
|
88 |
///
|
sl@0
|
89 |
/// The regex traits are used to determine which characters are alpha-numeric.
|
sl@0
|
90 |
/// To match any character that is not alpha-numeric, use ~alnum.
|
sl@0
|
91 |
///
|
sl@0
|
92 |
/// \attention alnum is equivalent to /[[:alnum:]]/ in perl. ~alnum is equivalent
|
sl@0
|
93 |
/// to /[[:^alnum:]]/ in perl.
|
sl@0
|
94 |
proto::op_proxy<
|
sl@0
|
95 |
proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
|
sl@0
|
96 |
, char const *
|
sl@0
|
97 |
> const alnum = {"alnum"};
|
sl@0
|
98 |
|
sl@0
|
99 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
100 |
/// \brief Matches an alphabetic character.
|
sl@0
|
101 |
///
|
sl@0
|
102 |
/// The regex traits are used to determine which characters are alphabetic.
|
sl@0
|
103 |
/// To match any character that is not alphabetic, use ~alpha.
|
sl@0
|
104 |
///
|
sl@0
|
105 |
/// \attention alpha is equivalent to /[[:alpha:]]/ in perl. ~alpha is equivalent
|
sl@0
|
106 |
/// to /[[:^alpha:]]/ in perl.
|
sl@0
|
107 |
proto::op_proxy<
|
sl@0
|
108 |
proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
|
sl@0
|
109 |
, char const *
|
sl@0
|
110 |
> const alpha = {"alpha"};
|
sl@0
|
111 |
|
sl@0
|
112 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
113 |
/// \brief Matches a blank (horizonal white-space) character.
|
sl@0
|
114 |
///
|
sl@0
|
115 |
/// The regex traits are used to determine which characters are blank characters.
|
sl@0
|
116 |
/// To match any character that is not blank, use ~blank.
|
sl@0
|
117 |
///
|
sl@0
|
118 |
/// \attention blank is equivalent to /[[:blank:]]/ in perl. ~blank is equivalent
|
sl@0
|
119 |
/// to /[[:^blank:]]/ in perl.
|
sl@0
|
120 |
proto::op_proxy<
|
sl@0
|
121 |
proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
|
sl@0
|
122 |
, char const *
|
sl@0
|
123 |
> const blank = {"blank"};
|
sl@0
|
124 |
|
sl@0
|
125 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
126 |
/// \brief Matches a control character.
|
sl@0
|
127 |
///
|
sl@0
|
128 |
/// The regex traits are used to determine which characters are control characters.
|
sl@0
|
129 |
/// To match any character that is not a control character, use ~cntrl.
|
sl@0
|
130 |
///
|
sl@0
|
131 |
/// \attention cntrl is equivalent to /[[:cntrl:]]/ in perl. ~cntrl is equivalent
|
sl@0
|
132 |
/// to /[[:^cntrl:]]/ in perl.
|
sl@0
|
133 |
proto::op_proxy<
|
sl@0
|
134 |
proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
|
sl@0
|
135 |
, char const *
|
sl@0
|
136 |
> const cntrl = {"cntrl"};
|
sl@0
|
137 |
|
sl@0
|
138 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
139 |
/// \brief Matches a digit character.
|
sl@0
|
140 |
///
|
sl@0
|
141 |
/// The regex traits are used to determine which characters are digits.
|
sl@0
|
142 |
/// To match any character that is not a digit, use ~digit.
|
sl@0
|
143 |
///
|
sl@0
|
144 |
/// \attention digit is equivalent to /[[:digit:]]/ in perl. ~digit is equivalent
|
sl@0
|
145 |
/// to /[[:^digit:]]/ in perl.
|
sl@0
|
146 |
proto::op_proxy<
|
sl@0
|
147 |
proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
|
sl@0
|
148 |
, char const *
|
sl@0
|
149 |
> const digit = {"digit"};
|
sl@0
|
150 |
|
sl@0
|
151 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
152 |
/// \brief Matches a graph character.
|
sl@0
|
153 |
///
|
sl@0
|
154 |
/// The regex traits are used to determine which characters are graphable.
|
sl@0
|
155 |
/// To match any character that is not graphable, use ~graph.
|
sl@0
|
156 |
///
|
sl@0
|
157 |
/// \attention graph is equivalent to /[[:graph:]]/ in perl. ~graph is equivalent
|
sl@0
|
158 |
/// to /[[:^graph:]]/ in perl.
|
sl@0
|
159 |
proto::op_proxy<
|
sl@0
|
160 |
proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
|
sl@0
|
161 |
, char const *
|
sl@0
|
162 |
> const graph = {"graph"};
|
sl@0
|
163 |
|
sl@0
|
164 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
165 |
/// \brief Matches a lower-case character.
|
sl@0
|
166 |
///
|
sl@0
|
167 |
/// The regex traits are used to determine which characters are lower-case.
|
sl@0
|
168 |
/// To match any character that is not a lower-case character, use ~lower.
|
sl@0
|
169 |
///
|
sl@0
|
170 |
/// \attention lower is equivalent to /[[:lower:]]/ in perl. ~lower is equivalent
|
sl@0
|
171 |
/// to /[[:^lower:]]/ in perl.
|
sl@0
|
172 |
proto::op_proxy<
|
sl@0
|
173 |
proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
|
sl@0
|
174 |
, char const *
|
sl@0
|
175 |
> const lower = {"lower"};
|
sl@0
|
176 |
|
sl@0
|
177 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
178 |
/// \brief Matches a printable character.
|
sl@0
|
179 |
///
|
sl@0
|
180 |
/// The regex traits are used to determine which characters are printable.
|
sl@0
|
181 |
/// To match any character that is not printable, use ~print.
|
sl@0
|
182 |
///
|
sl@0
|
183 |
/// \attention print is equivalent to /[[:print:]]/ in perl. ~print is equivalent
|
sl@0
|
184 |
/// to /[[:^print:]]/ in perl.
|
sl@0
|
185 |
proto::op_proxy<
|
sl@0
|
186 |
proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
|
sl@0
|
187 |
, char const *
|
sl@0
|
188 |
> const print = {"print"};
|
sl@0
|
189 |
|
sl@0
|
190 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
191 |
/// \brief Matches a punctuation character.
|
sl@0
|
192 |
///
|
sl@0
|
193 |
/// The regex traits are used to determine which characters are punctuation.
|
sl@0
|
194 |
/// To match any character that is not punctuation, use ~punct.
|
sl@0
|
195 |
///
|
sl@0
|
196 |
/// \attention punct is equivalent to /[[:punct:]]/ in perl. ~punct is equivalent
|
sl@0
|
197 |
/// to /[[:^punct:]]/ in perl.
|
sl@0
|
198 |
proto::op_proxy<
|
sl@0
|
199 |
proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
|
sl@0
|
200 |
, char const *
|
sl@0
|
201 |
> const punct = {"punct"};
|
sl@0
|
202 |
|
sl@0
|
203 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
204 |
/// \brief Matches a space character.
|
sl@0
|
205 |
///
|
sl@0
|
206 |
/// The regex traits are used to determine which characters are space characters.
|
sl@0
|
207 |
/// To match any character that is not white-space, use ~space.
|
sl@0
|
208 |
///
|
sl@0
|
209 |
/// \attention space is equivalent to /[[:space:]]/ in perl. ~space is equivalent
|
sl@0
|
210 |
/// to /[[:^space:]]/ in perl.
|
sl@0
|
211 |
proto::op_proxy<
|
sl@0
|
212 |
proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
|
sl@0
|
213 |
, char const *
|
sl@0
|
214 |
> const space = {"space"};
|
sl@0
|
215 |
|
sl@0
|
216 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
217 |
/// \brief Matches an upper-case character.
|
sl@0
|
218 |
///
|
sl@0
|
219 |
/// The regex traits are used to determine which characters are upper-case.
|
sl@0
|
220 |
/// To match any character that is not upper-case, use ~upper.
|
sl@0
|
221 |
///
|
sl@0
|
222 |
/// \attention upper is equivalent to /[[:upper:]]/ in perl. ~upper is equivalent
|
sl@0
|
223 |
/// to /[[:^upper:]]/ in perl.
|
sl@0
|
224 |
proto::op_proxy<
|
sl@0
|
225 |
proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
|
sl@0
|
226 |
, char const *
|
sl@0
|
227 |
> const upper = {"upper"};
|
sl@0
|
228 |
|
sl@0
|
229 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
230 |
/// \brief Matches a hexadecimal digit character.
|
sl@0
|
231 |
///
|
sl@0
|
232 |
/// The regex traits are used to determine which characters are hex digits.
|
sl@0
|
233 |
/// To match any character that is not a hex digit, use ~xdigit.
|
sl@0
|
234 |
///
|
sl@0
|
235 |
/// \attention xdigit is equivalent to /[[:xdigit:]]/ in perl. ~xdigit is equivalent
|
sl@0
|
236 |
/// to /[[:^xdigit:]]/ in perl.
|
sl@0
|
237 |
proto::op_proxy<
|
sl@0
|
238 |
proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
|
sl@0
|
239 |
, char const *
|
sl@0
|
240 |
> const xdigit = {"xdigit"};
|
sl@0
|
241 |
|
sl@0
|
242 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
243 |
/// \brief Beginning of sequence assertion.
|
sl@0
|
244 |
///
|
sl@0
|
245 |
/// For the character sequence [begin, end), 'bos' matches the
|
sl@0
|
246 |
/// zero-width sub-sequence [begin, begin).
|
sl@0
|
247 |
proto::op_proxy<
|
sl@0
|
248 |
proto::unary_op<detail::assert_bos_matcher, proto::noop_tag>
|
sl@0
|
249 |
> const bos = {};
|
sl@0
|
250 |
|
sl@0
|
251 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
252 |
/// \brief End of sequence assertion.
|
sl@0
|
253 |
///
|
sl@0
|
254 |
/// For the character sequence [begin, end),
|
sl@0
|
255 |
/// 'eos' matches the zero-width sub-sequence [end, end).
|
sl@0
|
256 |
///
|
sl@0
|
257 |
/// \attention Unlike the perl end of sequence assertion \$, 'eos' will
|
sl@0
|
258 |
/// not match at the position [end-1, end-1) if *(end-1) is '\\n'. To
|
sl@0
|
259 |
/// get that behavior, use (!_n >> eos).
|
sl@0
|
260 |
proto::op_proxy<
|
sl@0
|
261 |
proto::unary_op<detail::assert_eos_matcher, proto::noop_tag>
|
sl@0
|
262 |
> const eos = {};
|
sl@0
|
263 |
|
sl@0
|
264 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
265 |
/// \brief Beginning of line assertion.
|
sl@0
|
266 |
///
|
sl@0
|
267 |
/// 'bol' matches the zero-width sub-sequence
|
sl@0
|
268 |
/// immediately following a logical newline sequence. The regex traits
|
sl@0
|
269 |
/// is used to determine what constitutes a logical newline sequence.
|
sl@0
|
270 |
proto::op_proxy<
|
sl@0
|
271 |
proto::unary_op<detail::assert_bol_placeholder, proto::noop_tag>
|
sl@0
|
272 |
> const bol = {};
|
sl@0
|
273 |
|
sl@0
|
274 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
275 |
/// \brief End of line assertion.
|
sl@0
|
276 |
///
|
sl@0
|
277 |
/// 'eol' matches the zero-width sub-sequence
|
sl@0
|
278 |
/// immediately preceeding a logical newline sequence. The regex traits
|
sl@0
|
279 |
/// is used to determine what constitutes a logical newline sequence.
|
sl@0
|
280 |
proto::op_proxy<
|
sl@0
|
281 |
proto::unary_op<detail::assert_eol_placeholder, proto::noop_tag>
|
sl@0
|
282 |
> const eol = {};
|
sl@0
|
283 |
|
sl@0
|
284 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
285 |
/// \brief Beginning of word assertion.
|
sl@0
|
286 |
///
|
sl@0
|
287 |
/// 'bow' matches the zero-width sub-sequence
|
sl@0
|
288 |
/// immediately following a non-word character and preceeding a word character.
|
sl@0
|
289 |
/// The regex traits are used to determine what constitutes a word character.
|
sl@0
|
290 |
proto::op_proxy<
|
sl@0
|
291 |
proto::unary_op<detail::assert_word_begin, proto::noop_tag>
|
sl@0
|
292 |
> const bow = {};
|
sl@0
|
293 |
|
sl@0
|
294 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
295 |
/// \brief End of word assertion.
|
sl@0
|
296 |
///
|
sl@0
|
297 |
/// 'eow' matches the zero-width sub-sequence
|
sl@0
|
298 |
/// immediately following a word character and preceeding a non-word character.
|
sl@0
|
299 |
/// The regex traits are used to determine what constitutes a word character.
|
sl@0
|
300 |
proto::op_proxy<
|
sl@0
|
301 |
proto::unary_op<detail::assert_word_end, proto::noop_tag>
|
sl@0
|
302 |
> const eow = {};
|
sl@0
|
303 |
|
sl@0
|
304 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
305 |
/// \brief Word boundary assertion.
|
sl@0
|
306 |
///
|
sl@0
|
307 |
/// '_b' matches the zero-width sub-sequence at the beginning or the end of a word.
|
sl@0
|
308 |
/// It is equivalent to (bow | eow). The regex traits are used to determine what
|
sl@0
|
309 |
/// constitutes a word character. To match a non-word boundary, use ~_b.
|
sl@0
|
310 |
///
|
sl@0
|
311 |
/// \attention _b is like \\b in perl. ~_b is like \\B in perl.
|
sl@0
|
312 |
proto::op_proxy<
|
sl@0
|
313 |
proto::unary_op<detail::assert_word_boundary, proto::noop_tag>
|
sl@0
|
314 |
> const _b = {};
|
sl@0
|
315 |
|
sl@0
|
316 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
317 |
/// \brief Matches a word character.
|
sl@0
|
318 |
///
|
sl@0
|
319 |
/// '_w' matches a single word character. The regex traits are used to determine which
|
sl@0
|
320 |
/// characters are word characters. Use ~_w to match a character that is not a word
|
sl@0
|
321 |
/// character.
|
sl@0
|
322 |
///
|
sl@0
|
323 |
/// \attention _w is like \\w in perl. ~_w is like \\W in perl.
|
sl@0
|
324 |
proto::op_proxy<
|
sl@0
|
325 |
proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
|
sl@0
|
326 |
, char const *
|
sl@0
|
327 |
> const _w = {"w"};
|
sl@0
|
328 |
|
sl@0
|
329 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
330 |
/// \brief Matches a digit character.
|
sl@0
|
331 |
///
|
sl@0
|
332 |
/// '_d' matches a single digit character. The regex traits are used to determine which
|
sl@0
|
333 |
/// characters are digits. Use ~_d to match a character that is not a digit
|
sl@0
|
334 |
/// character.
|
sl@0
|
335 |
///
|
sl@0
|
336 |
/// \attention _d is like \\d in perl. ~_d is like \\D in perl.
|
sl@0
|
337 |
proto::op_proxy<
|
sl@0
|
338 |
proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
|
sl@0
|
339 |
, char const *
|
sl@0
|
340 |
> const _d = {"d"};
|
sl@0
|
341 |
|
sl@0
|
342 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
343 |
/// \brief Matches a space character.
|
sl@0
|
344 |
///
|
sl@0
|
345 |
/// '_s' matches a single space character. The regex traits are used to determine which
|
sl@0
|
346 |
/// characters are space characters. Use ~_s to match a character that is not a space
|
sl@0
|
347 |
/// character.
|
sl@0
|
348 |
///
|
sl@0
|
349 |
/// \attention _s is like \\s in perl. ~_s is like \\S in perl.
|
sl@0
|
350 |
proto::op_proxy<
|
sl@0
|
351 |
proto::unary_op<detail::posix_charset_placeholder, proto::noop_tag>
|
sl@0
|
352 |
, char const *
|
sl@0
|
353 |
> const _s = {"s"};
|
sl@0
|
354 |
|
sl@0
|
355 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
356 |
/// \brief Matches a literal newline character, '\\n'.
|
sl@0
|
357 |
///
|
sl@0
|
358 |
/// '_n' matches a single newline character, '\\n'. Use ~_n to match a character
|
sl@0
|
359 |
/// that is not a newline.
|
sl@0
|
360 |
///
|
sl@0
|
361 |
/// \attention ~_n is like '.' in perl without the /s modifier.
|
sl@0
|
362 |
proto::op_proxy<
|
sl@0
|
363 |
proto::unary_op<detail::literal_placeholder<char>, proto::noop_tag>
|
sl@0
|
364 |
, char
|
sl@0
|
365 |
> const _n = {'\n'};
|
sl@0
|
366 |
|
sl@0
|
367 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
368 |
/// \brief Matches a logical newline sequence.
|
sl@0
|
369 |
///
|
sl@0
|
370 |
/// '_ln' matches a logical newline sequence. This can be any character in the
|
sl@0
|
371 |
/// line separator class, as determined by the regex traits, or the '\\r\\n' sequence.
|
sl@0
|
372 |
/// For the purpose of back-tracking, '\\r\\n' is treated as a unit.
|
sl@0
|
373 |
/// To match any one character that is not a logical newline, use ~_ln.
|
sl@0
|
374 |
proto::op_proxy<
|
sl@0
|
375 |
detail::logical_newline_xpression
|
sl@0
|
376 |
> const _ln = {};
|
sl@0
|
377 |
|
sl@0
|
378 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
379 |
/// \brief Matches any one character.
|
sl@0
|
380 |
///
|
sl@0
|
381 |
/// Match any character, similar to '.' in perl syntax with the /s modifier.
|
sl@0
|
382 |
/// '_' matches any one character, including the newline.
|
sl@0
|
383 |
///
|
sl@0
|
384 |
/// \attention To match any character except the newline, use ~_n
|
sl@0
|
385 |
proto::op_proxy<
|
sl@0
|
386 |
proto::unary_op<detail::any_matcher, proto::noop_tag>
|
sl@0
|
387 |
> const _ = {};
|
sl@0
|
388 |
|
sl@0
|
389 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
390 |
/// \brief Reference to the current regex object
|
sl@0
|
391 |
///
|
sl@0
|
392 |
/// Useful when constructing recursive regular expression objects. The 'self'
|
sl@0
|
393 |
/// identifier is a short-hand for the current regex object. For instance,
|
sl@0
|
394 |
/// sregex rx = '(' >> (self | nil) >> ')'; will create a regex object that
|
sl@0
|
395 |
/// matches balanced parens such as "((()))".
|
sl@0
|
396 |
proto::op_proxy<
|
sl@0
|
397 |
proto::unary_op<detail::self_placeholder, proto::noop_tag>
|
sl@0
|
398 |
> const self = {};
|
sl@0
|
399 |
|
sl@0
|
400 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
401 |
/// \brief Used to create character sets.
|
sl@0
|
402 |
///
|
sl@0
|
403 |
/// There are two ways to create character sets with the 'set' identifier. The
|
sl@0
|
404 |
/// easiest is to create a comma-separated list of the characters in the set,
|
sl@0
|
405 |
/// as in (set= 'a','b','c'). This set will match 'a', 'b', or 'c'. The other
|
sl@0
|
406 |
/// way is to define the set as an argument to the set subscript operator.
|
sl@0
|
407 |
/// For instance, set[ 'a' | range('b','c') | digit ] will match an 'a', 'b',
|
sl@0
|
408 |
/// 'c' or a digit character.
|
sl@0
|
409 |
///
|
sl@0
|
410 |
/// To complement a set, apply the '~' operator. For instance, ~(set= 'a','b','c')
|
sl@0
|
411 |
/// will match any character that is not an 'a', 'b', or 'c'.
|
sl@0
|
412 |
///
|
sl@0
|
413 |
/// Sets can be composed of other, possibly complemented, sets. For instance,
|
sl@0
|
414 |
/// set[ ~digit | ~(set= 'a','b','c') ].
|
sl@0
|
415 |
proto::op_proxy<
|
sl@0
|
416 |
detail::set_initializer_type
|
sl@0
|
417 |
> const set = {};
|
sl@0
|
418 |
|
sl@0
|
419 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
420 |
/// \brief Sub-match placeholder, like $& in Perl
|
sl@0
|
421 |
proto::op_proxy<detail::mark_tag, int> const s0 = {0};
|
sl@0
|
422 |
|
sl@0
|
423 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
424 |
/// \brief Sub-match placeholder, like $1 in perl.
|
sl@0
|
425 |
///
|
sl@0
|
426 |
/// To create a sub-match, assign a sub-expression to the sub-match placeholder.
|
sl@0
|
427 |
/// For instance, (s1= _) will match any one character and remember which
|
sl@0
|
428 |
/// character was matched in the 1st sub-match. Later in the pattern, you can
|
sl@0
|
429 |
/// refer back to the sub-match. For instance, (s1= _) >> s1 will match any
|
sl@0
|
430 |
/// character, and then match the same character again.
|
sl@0
|
431 |
///
|
sl@0
|
432 |
/// After a successful regex_match() or regex_search(), the sub-match placeholders
|
sl@0
|
433 |
/// can be used to index into the match_results\<\> object to retrieve the Nth
|
sl@0
|
434 |
/// sub-match.
|
sl@0
|
435 |
proto::op_proxy<detail::mark_tag, int> const s1 = {1};
|
sl@0
|
436 |
proto::op_proxy<detail::mark_tag, int> const s2 = {2};
|
sl@0
|
437 |
proto::op_proxy<detail::mark_tag, int> const s3 = {3};
|
sl@0
|
438 |
proto::op_proxy<detail::mark_tag, int> const s4 = {4};
|
sl@0
|
439 |
proto::op_proxy<detail::mark_tag, int> const s5 = {5};
|
sl@0
|
440 |
proto::op_proxy<detail::mark_tag, int> const s6 = {6};
|
sl@0
|
441 |
proto::op_proxy<detail::mark_tag, int> const s7 = {7};
|
sl@0
|
442 |
proto::op_proxy<detail::mark_tag, int> const s8 = {8};
|
sl@0
|
443 |
proto::op_proxy<detail::mark_tag, int> const s9 = {9};
|
sl@0
|
444 |
|
sl@0
|
445 |
// NOTE: For the purpose of xpressive's documentation, make icase() look like an
|
sl@0
|
446 |
// ordinary function. In reality, it is a function object defined in detail/icase.hpp
|
sl@0
|
447 |
// so that it can serve double-duty as regex_constants::icase, the syntax_option_type.
|
sl@0
|
448 |
// Do the same for as_xpr(), which is actually defined in detail/static/as_xpr.hpp
|
sl@0
|
449 |
#ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED
|
sl@0
|
450 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
451 |
/// \brief Makes a literal into a regular expression.
|
sl@0
|
452 |
///
|
sl@0
|
453 |
/// Use as_xpr() to turn a literal into a regular expression. For instance,
|
sl@0
|
454 |
/// "foo" >> "bar" will not compile because both operands to the right-shift
|
sl@0
|
455 |
/// operator are const char*, and no such operator exists. Use as_xpr("foo") >> "bar"
|
sl@0
|
456 |
/// instead.
|
sl@0
|
457 |
///
|
sl@0
|
458 |
/// You can use as_xpr() with character literals in addition to string literals.
|
sl@0
|
459 |
/// For instance, as_xpr('a') will match an 'a'. You can also complement a
|
sl@0
|
460 |
/// character literal, as with ~as_xpr('a'). This will match any one character
|
sl@0
|
461 |
/// that is not an 'a'.
|
sl@0
|
462 |
template<typename Literal>
|
sl@0
|
463 |
inline typename detail::as_xpr_type<Literal>::const_reference
|
sl@0
|
464 |
as_xpr(Literal const &literal)
|
sl@0
|
465 |
{
|
sl@0
|
466 |
return detail::as_xpr_type<Literal>::call(xpr);
|
sl@0
|
467 |
}
|
sl@0
|
468 |
|
sl@0
|
469 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
470 |
/// \brief Makes a sub-expression case-insensitive.
|
sl@0
|
471 |
///
|
sl@0
|
472 |
/// Use icase() to make a sub-expression case-insensitive. For instance,
|
sl@0
|
473 |
/// "foo" >> icase(set['b'] >> "ar") will match "foo" exactly followed by
|
sl@0
|
474 |
/// "bar" irrespective of case.
|
sl@0
|
475 |
template<typename Xpr>
|
sl@0
|
476 |
inline proto::binary_op<detail::icase_modifier, typename detail::as_xpr_type<Xpr>::type, modifier_tag> const
|
sl@0
|
477 |
icase(Xpr const &xpr)
|
sl@0
|
478 |
{
|
sl@0
|
479 |
detail::icase_modifier mod;
|
sl@0
|
480 |
return proto::make_op<modifier_tag>(mod, as_xpr(xpr));
|
sl@0
|
481 |
}
|
sl@0
|
482 |
#endif
|
sl@0
|
483 |
|
sl@0
|
484 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
485 |
/// \brief Embed a regex object by reference.
|
sl@0
|
486 |
///
|
sl@0
|
487 |
/// \param rex The basic_regex object to embed by reference.
|
sl@0
|
488 |
template<typename BidiIter>
|
sl@0
|
489 |
inline proto::unary_op<detail::regex_placeholder<BidiIter, true>, proto::noop_tag> const
|
sl@0
|
490 |
by_ref(basic_regex<BidiIter> const &rex)
|
sl@0
|
491 |
{
|
sl@0
|
492 |
typedef detail::core_access<BidiIter> access;
|
sl@0
|
493 |
shared_ptr<detail::regex_impl<BidiIter> > impl = access::get_regex_impl(rex);
|
sl@0
|
494 |
return proto::noop(detail::regex_placeholder<BidiIter, true>(impl));
|
sl@0
|
495 |
}
|
sl@0
|
496 |
|
sl@0
|
497 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
498 |
/// \brief Match a range of characters.
|
sl@0
|
499 |
///
|
sl@0
|
500 |
/// Match any character in the range [ch_min, ch_max].
|
sl@0
|
501 |
///
|
sl@0
|
502 |
/// \param ch_min The lower end of the range to match.
|
sl@0
|
503 |
/// \param ch_max The upper end of the range to match.
|
sl@0
|
504 |
template<typename Char>
|
sl@0
|
505 |
inline proto::unary_op<detail::range_placeholder<Char>, proto::noop_tag> const
|
sl@0
|
506 |
range(Char ch_min, Char ch_max)
|
sl@0
|
507 |
{
|
sl@0
|
508 |
return proto::noop(detail::range_placeholder<Char>(ch_min, ch_max));
|
sl@0
|
509 |
}
|
sl@0
|
510 |
|
sl@0
|
511 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
512 |
/// \brief Make a sub-expression optional. Equivalent to !as_xpr(xpr).
|
sl@0
|
513 |
///
|
sl@0
|
514 |
/// \param xpr The sub-expression to make optional.
|
sl@0
|
515 |
template<typename Xpr>
|
sl@0
|
516 |
inline proto::unary_op
|
sl@0
|
517 |
<
|
sl@0
|
518 |
typename detail::as_xpr_type<Xpr>::type
|
sl@0
|
519 |
, proto::logical_not_tag
|
sl@0
|
520 |
> const
|
sl@0
|
521 |
optional(Xpr const &xpr)
|
sl@0
|
522 |
{
|
sl@0
|
523 |
return !as_xpr(xpr);
|
sl@0
|
524 |
}
|
sl@0
|
525 |
|
sl@0
|
526 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
527 |
/// \brief Repeat a sub-expression multiple times.
|
sl@0
|
528 |
///
|
sl@0
|
529 |
/// There are two forms of the repeat\<\>() function template. To match a
|
sl@0
|
530 |
/// sub-expression N times, use repeat\<N\>(xpr). To match a sub-expression
|
sl@0
|
531 |
/// from M to N times, use repeat\<M,N\>(xpr).
|
sl@0
|
532 |
///
|
sl@0
|
533 |
/// The repeat\<\>() function creates a greedy quantifier. To make the quantifier
|
sl@0
|
534 |
/// non-greedy, apply the unary minus operator, as in -repeat\<M,N\>(xpr).
|
sl@0
|
535 |
///
|
sl@0
|
536 |
/// \param xpr The sub-expression to repeat.
|
sl@0
|
537 |
template<unsigned int Min, unsigned int Max, typename Xpr>
|
sl@0
|
538 |
inline proto::unary_op
|
sl@0
|
539 |
<
|
sl@0
|
540 |
typename detail::as_xpr_type<Xpr>::type
|
sl@0
|
541 |
, detail::generic_quant_tag<Min, Max>
|
sl@0
|
542 |
> const
|
sl@0
|
543 |
repeat(Xpr const &xpr)
|
sl@0
|
544 |
{
|
sl@0
|
545 |
return proto::make_op<detail::generic_quant_tag<Min, Max> >(as_xpr(xpr));
|
sl@0
|
546 |
}
|
sl@0
|
547 |
|
sl@0
|
548 |
/// \overload
|
sl@0
|
549 |
template<unsigned int Count, typename Xpr2>
|
sl@0
|
550 |
inline proto::unary_op
|
sl@0
|
551 |
<
|
sl@0
|
552 |
typename detail::as_xpr_type<Xpr2>::type
|
sl@0
|
553 |
, detail::generic_quant_tag<Count, Count>
|
sl@0
|
554 |
> const
|
sl@0
|
555 |
repeat(Xpr2 const &xpr)
|
sl@0
|
556 |
{
|
sl@0
|
557 |
return proto::make_op<detail::generic_quant_tag<Count, Count> >(as_xpr(xpr));
|
sl@0
|
558 |
}
|
sl@0
|
559 |
|
sl@0
|
560 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
561 |
/// \brief Create an independent sub-expression.
|
sl@0
|
562 |
///
|
sl@0
|
563 |
/// Turn off back-tracking for a sub-expression. Any branches or repeats within
|
sl@0
|
564 |
/// the sub-expression will match only one way, and no other alternatives are
|
sl@0
|
565 |
/// tried.
|
sl@0
|
566 |
///
|
sl@0
|
567 |
/// \attention keep(xpr) is equivalent to the perl (?>...) extension.
|
sl@0
|
568 |
///
|
sl@0
|
569 |
/// \param xpr The sub-expression to modify.
|
sl@0
|
570 |
template<typename Xpr>
|
sl@0
|
571 |
inline proto::unary_op
|
sl@0
|
572 |
<
|
sl@0
|
573 |
typename detail::as_xpr_type<Xpr>::type
|
sl@0
|
574 |
, detail::keeper_tag
|
sl@0
|
575 |
> const
|
sl@0
|
576 |
keep(Xpr const &xpr)
|
sl@0
|
577 |
{
|
sl@0
|
578 |
return proto::make_op<detail::keeper_tag>(as_xpr(xpr));
|
sl@0
|
579 |
}
|
sl@0
|
580 |
|
sl@0
|
581 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
582 |
/// \brief Look-ahead assertion.
|
sl@0
|
583 |
///
|
sl@0
|
584 |
/// before(xpr) succeeds if the xpr sub-expression would match at the current
|
sl@0
|
585 |
/// position in the sequence, but xpr is not included in the match. For instance,
|
sl@0
|
586 |
/// before("foo") succeeds if we are before a "foo". Look-ahead assertions can be
|
sl@0
|
587 |
/// negated with the bit-compliment operator.
|
sl@0
|
588 |
///
|
sl@0
|
589 |
/// \attention before(xpr) is equivalent to the perl (?=...) extension.
|
sl@0
|
590 |
/// ~before(xpr) is a negative look-ahead assertion, equivalent to the
|
sl@0
|
591 |
/// perl (?!...) extension.
|
sl@0
|
592 |
///
|
sl@0
|
593 |
/// \param xpr The sub-expression to put in the look-ahead assertion.
|
sl@0
|
594 |
template<typename Xpr>
|
sl@0
|
595 |
inline proto::unary_op
|
sl@0
|
596 |
<
|
sl@0
|
597 |
typename detail::as_xpr_type<Xpr>::type
|
sl@0
|
598 |
, detail::lookahead_tag<true>
|
sl@0
|
599 |
> const
|
sl@0
|
600 |
before(Xpr const &xpr)
|
sl@0
|
601 |
{
|
sl@0
|
602 |
return proto::make_op<detail::lookahead_tag<true> >(as_xpr(xpr));
|
sl@0
|
603 |
}
|
sl@0
|
604 |
|
sl@0
|
605 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
606 |
/// \brief Look-behind assertion.
|
sl@0
|
607 |
///
|
sl@0
|
608 |
/// after(xpr) succeeds if the xpr sub-expression would match at the current
|
sl@0
|
609 |
/// position minus N in the sequence, where N is the width of xpr. xpr is not included in
|
sl@0
|
610 |
/// the match. For instance, after("foo") succeeds if we are after a "foo". Look-behind
|
sl@0
|
611 |
/// assertions can be negated with the bit-complement operator.
|
sl@0
|
612 |
///
|
sl@0
|
613 |
/// \attention after(xpr) is equivalent to the perl (?<=...) extension.
|
sl@0
|
614 |
/// ~after(xpr) is a negative look-behind assertion, equivalent to the
|
sl@0
|
615 |
/// perl (?<!...) extension.
|
sl@0
|
616 |
///
|
sl@0
|
617 |
/// \param xpr The sub-expression to put in the look-ahead assertion.
|
sl@0
|
618 |
///
|
sl@0
|
619 |
/// \pre xpr cannot match a variable number of characters.
|
sl@0
|
620 |
template<typename Xpr>
|
sl@0
|
621 |
inline proto::unary_op
|
sl@0
|
622 |
<
|
sl@0
|
623 |
typename detail::as_xpr_type<Xpr>::type
|
sl@0
|
624 |
, detail::lookbehind_tag<true>
|
sl@0
|
625 |
> const
|
sl@0
|
626 |
after(Xpr const &xpr)
|
sl@0
|
627 |
{
|
sl@0
|
628 |
return proto::make_op<detail::lookbehind_tag<true> >(as_xpr(xpr));
|
sl@0
|
629 |
}
|
sl@0
|
630 |
|
sl@0
|
631 |
///////////////////////////////////////////////////////////////////////////////
|
sl@0
|
632 |
/// \brief Specify a regex traits or a std::locale.
|
sl@0
|
633 |
///
|
sl@0
|
634 |
/// imbue() instructs the regex engine to use the specified traits or locale
|
sl@0
|
635 |
/// when matching the regex. The entire expression must use the same traits/locale.
|
sl@0
|
636 |
/// For instance, the following specifies a locale for use with a regex:
|
sl@0
|
637 |
/// std::locale loc;
|
sl@0
|
638 |
/// sregex rx = imbue(loc)(+digit);
|
sl@0
|
639 |
///
|
sl@0
|
640 |
/// \param loc The std::locale or regex traits object.
|
sl@0
|
641 |
template<typename Locale>
|
sl@0
|
642 |
inline detail::modifier_op<detail::locale_modifier<Locale> > const
|
sl@0
|
643 |
imbue(Locale const &loc)
|
sl@0
|
644 |
{
|
sl@0
|
645 |
detail::modifier_op<detail::locale_modifier<Locale> > mod =
|
sl@0
|
646 |
{
|
sl@0
|
647 |
detail::locale_modifier<Locale>(loc)
|
sl@0
|
648 |
, regex_constants::ECMAScript
|
sl@0
|
649 |
};
|
sl@0
|
650 |
return mod;
|
sl@0
|
651 |
}
|
sl@0
|
652 |
|
sl@0
|
653 |
}} // namespace boost::xpressive
|
sl@0
|
654 |
|
sl@0
|
655 |
#endif
|