os/ossrv/ossrv_pub/boost_apis/boost/xpressive/regex_compiler.hpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
///////////////////////////////////////////////////////////////////////////////
sl@0
     2
/// \file regex_compiler.hpp
sl@0
     3
/// Contains the definition of regex_compiler, a factory for building regex objects
sl@0
     4
/// from strings.
sl@0
     5
//
sl@0
     6
//  Copyright 2004 Eric Niebler. Distributed under the Boost
sl@0
     7
//  Software License, Version 1.0. (See accompanying file
sl@0
     8
//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
sl@0
     9
sl@0
    10
#ifndef BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
sl@0
    11
#define BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
sl@0
    12
sl@0
    13
// MS compatible compilers support #pragma once
sl@0
    14
#if defined(_MSC_VER) && (_MSC_VER >= 1020)
sl@0
    15
# pragma once
sl@0
    16
#endif
sl@0
    17
sl@0
    18
#include <boost/xpressive/basic_regex.hpp>
sl@0
    19
#include <boost/xpressive/detail/dynamic/parser.hpp>
sl@0
    20
#include <boost/xpressive/detail/dynamic/parse_charset.hpp>
sl@0
    21
#include <boost/xpressive/detail/dynamic/parser_enum.hpp>
sl@0
    22
#include <boost/xpressive/detail/dynamic/parser_traits.hpp>
sl@0
    23
#include <boost/xpressive/detail/core/linker.hpp>
sl@0
    24
#include <boost/xpressive/detail/core/optimize.hpp>
sl@0
    25
sl@0
    26
namespace boost { namespace xpressive
sl@0
    27
{
sl@0
    28
sl@0
    29
///////////////////////////////////////////////////////////////////////////////
sl@0
    30
// regex_compiler
sl@0
    31
//
sl@0
    32
/// \brief Class template regex_compiler is a factory for building basic_regex objects from a string.
sl@0
    33
///
sl@0
    34
/// Class template regex_compiler is used to construct a basic_regex object from a string. The string
sl@0
    35
/// should contain a valid regular expression. You can imbue a regex_compiler object with a locale,
sl@0
    36
/// after which all basic_regex objects created with that regex_compiler object will use that locale.
sl@0
    37
/// After creating a regex_compiler object, and optionally imbueing it with a locale, you can call the
sl@0
    38
/// compile() method to construct a basic_regex object, passing it the string representing the regular
sl@0
    39
/// expression. You can call compile() multiple times on the same regex_compiler object. Two basic_regex
sl@0
    40
/// objects compiled from the same string will have different regex_id's.
sl@0
    41
template<typename BidiIter, typename RegexTraits, typename CompilerTraits>
sl@0
    42
struct regex_compiler
sl@0
    43
{
sl@0
    44
    typedef BidiIter iterator_type;
sl@0
    45
    typedef typename iterator_value<BidiIter>::type char_type;
sl@0
    46
    typedef std::basic_string<char_type> string_type;
sl@0
    47
    typedef regex_constants::syntax_option_type flag_type;
sl@0
    48
    typedef RegexTraits traits_type;
sl@0
    49
    typedef typename traits_type::char_class_type char_class_type;
sl@0
    50
    typedef typename traits_type::locale_type locale_type;
sl@0
    51
sl@0
    52
    explicit regex_compiler(RegexTraits const &traits = RegexTraits())
sl@0
    53
      : mark_count_(0)
sl@0
    54
      , hidden_mark_count_(0)
sl@0
    55
      , traits_(traits)
sl@0
    56
      , upper_(0)
sl@0
    57
    {
sl@0
    58
        this->upper_ = lookup_classname(this->rxtraits(), "upper");
sl@0
    59
        BOOST_ASSERT(0 != this->upper_);
sl@0
    60
    }
sl@0
    61
sl@0
    62
    ///////////////////////////////////////////////////////////////////////////
sl@0
    63
    // imbue
sl@0
    64
    /// Specify the locale to be used by a regex_compiler.
sl@0
    65
    ///
sl@0
    66
    /// \param loc The locale that this regex_compiler should use.
sl@0
    67
    /// \return The previous locale.
sl@0
    68
    locale_type imbue(locale_type loc)
sl@0
    69
    {
sl@0
    70
        locale_type oldloc = this->traits_.imbue(loc);
sl@0
    71
        this->upper_ = lookup_classname(this->rxtraits(), "upper");
sl@0
    72
        BOOST_ASSERT(0 != this->upper_);
sl@0
    73
        return oldloc;
sl@0
    74
    }
sl@0
    75
sl@0
    76
    ///////////////////////////////////////////////////////////////////////////
sl@0
    77
    // getloc
sl@0
    78
    /// Get the locale used by a regex_compiler.
sl@0
    79
    ///
sl@0
    80
    /// \param loc The locale that this regex_compiler uses.
sl@0
    81
    locale_type getloc() const
sl@0
    82
    {
sl@0
    83
        return this->traits_.getloc();
sl@0
    84
    }
sl@0
    85
sl@0
    86
    ///////////////////////////////////////////////////////////////////////////
sl@0
    87
    // compile
sl@0
    88
    /// Builds a basic_regex object from a std::string.
sl@0
    89
    ///
sl@0
    90
    /// \param  pat A std::string containing the regular expression pattern.
sl@0
    91
    /// \param  flags Optional bitmask that determines how the pat string is interpreted. (See syntax_option_type.)
sl@0
    92
    /// \return A basic_regex object corresponding to the regular expression represented by the string.
sl@0
    93
    /// \pre    The std::string pat contains a valid string-based representation of a regular expression.
sl@0
    94
    /// \throw  regex_error when the string has invalid regular expression syntax.
sl@0
    95
    basic_regex<BidiIter> compile(string_type pat, flag_type flags = regex_constants::ECMAScript)
sl@0
    96
    {
sl@0
    97
        this->reset();
sl@0
    98
        this->traits_.flags(flags);
sl@0
    99
sl@0
   100
        string_iterator begin = pat.begin(), end = pat.end();
sl@0
   101
sl@0
   102
        // at the top level, a regex is a sequence of alternates
sl@0
   103
        alternates_list alternates;
sl@0
   104
        this->parse_alternates(begin, end, alternates);
sl@0
   105
        detail::ensure(begin == end, regex_constants::error_paren, "mismatched parenthesis");
sl@0
   106
sl@0
   107
        // convert the alternates list to the appropriate matcher and terminate the sequence
sl@0
   108
        detail::sequence<BidiIter> seq = detail::alternates_to_matchable(alternates, alternates_factory());
sl@0
   109
        seq += detail::make_dynamic_xpression<BidiIter>(detail::end_matcher());
sl@0
   110
sl@0
   111
        // fill in the back-pointers by visiting the regex parse tree
sl@0
   112
        detail::xpression_linker<char_type> linker(this->rxtraits());
sl@0
   113
        seq.first->link(linker);
sl@0
   114
sl@0
   115
        // bundle the regex information into a regex_impl object
sl@0
   116
        detail::regex_impl<BidiIter> impl;
sl@0
   117
        impl.xpr_ = seq.first;
sl@0
   118
        impl.traits_.reset(new RegexTraits(this->rxtraits()));
sl@0
   119
        impl.mark_count_ = this->mark_count_;
sl@0
   120
        impl.hidden_mark_count_ = this->hidden_mark_count_;
sl@0
   121
sl@0
   122
        // optimization: get the peek chars OR the boyer-moore search string
sl@0
   123
        detail::optimize_regex(impl, this->rxtraits(), detail::is_random<BidiIter>());
sl@0
   124
sl@0
   125
        return detail::core_access<BidiIter>::make_regex(impl);
sl@0
   126
    }
sl@0
   127
sl@0
   128
private:
sl@0
   129
sl@0
   130
    typedef typename string_type::const_iterator string_iterator;
sl@0
   131
    typedef std::list<detail::sequence<BidiIter> > alternates_list;
sl@0
   132
    typedef detail::escape_value<char_type, char_class_type> escape_value;
sl@0
   133
    typedef detail::alternates_factory_impl<BidiIter, traits_type> alternates_factory;
sl@0
   134
sl@0
   135
    ///////////////////////////////////////////////////////////////////////////
sl@0
   136
    // reset
sl@0
   137
    /// INTERNAL ONLY
sl@0
   138
    void reset()
sl@0
   139
    {
sl@0
   140
        this->mark_count_ = 0;
sl@0
   141
        this->hidden_mark_count_ = 0;
sl@0
   142
        this->traits_.flags(regex_constants::ECMAScript);
sl@0
   143
    }
sl@0
   144
sl@0
   145
    ///////////////////////////////////////////////////////////////////////////
sl@0
   146
    // regex_traits
sl@0
   147
    /// INTERNAL ONLY
sl@0
   148
    traits_type &rxtraits()
sl@0
   149
    {
sl@0
   150
        return this->traits_.traits();
sl@0
   151
    }
sl@0
   152
sl@0
   153
    ///////////////////////////////////////////////////////////////////////////
sl@0
   154
    // regex_traits
sl@0
   155
    /// INTERNAL ONLY
sl@0
   156
    traits_type const &rxtraits() const
sl@0
   157
    {
sl@0
   158
        return this->traits_.traits();
sl@0
   159
    }
sl@0
   160
sl@0
   161
    ///////////////////////////////////////////////////////////////////////////
sl@0
   162
    // parse_alternates
sl@0
   163
    /// INTERNAL ONLY
sl@0
   164
    void parse_alternates(string_iterator &begin, string_iterator end, alternates_list &alternates)
sl@0
   165
    {
sl@0
   166
        using namespace regex_constants;
sl@0
   167
        string_iterator old_begin;
sl@0
   168
sl@0
   169
        do
sl@0
   170
        {
sl@0
   171
            alternates.push_back(this->parse_sequence(begin, end));
sl@0
   172
            old_begin = begin;
sl@0
   173
        }
sl@0
   174
        while(begin != end && token_alternate == this->traits_.get_token(begin, end));
sl@0
   175
sl@0
   176
        begin = old_begin;
sl@0
   177
    }
sl@0
   178
sl@0
   179
    ///////////////////////////////////////////////////////////////////////////
sl@0
   180
    // parse_group
sl@0
   181
    /// INTERNAL ONLY
sl@0
   182
    detail::sequence<BidiIter> parse_group(string_iterator &begin, string_iterator end)
sl@0
   183
    {
sl@0
   184
        using namespace regex_constants;
sl@0
   185
        int mark_nbr = 0;
sl@0
   186
        bool keeper = false;
sl@0
   187
        bool lookahead = false;
sl@0
   188
        bool lookbehind = false;
sl@0
   189
        bool negative = false;
sl@0
   190
        std::size_t old_mark_count = this->mark_count_;
sl@0
   191
sl@0
   192
        detail::sequence<BidiIter> seq, seq_end;
sl@0
   193
        string_iterator tmp = string_iterator();
sl@0
   194
sl@0
   195
        syntax_option_type old_flags = this->traits_.flags();
sl@0
   196
sl@0
   197
        switch(this->traits_.get_group_type(begin, end))
sl@0
   198
        {
sl@0
   199
        case token_no_mark:
sl@0
   200
            // Don't process empty groups like (?:) or (?i)
sl@0
   201
            // BUGBUG this doesn't handle the degenerate (?:)+ correctly
sl@0
   202
            if(token_group_end == this->traits_.get_token(tmp = begin, end))
sl@0
   203
            {
sl@0
   204
                return this->parse_atom(begin = tmp, end);
sl@0
   205
            }
sl@0
   206
            break;
sl@0
   207
sl@0
   208
        case token_negative_lookahead:
sl@0
   209
            negative = true; // fall-through
sl@0
   210
        case token_positive_lookahead:
sl@0
   211
            lookahead = true;
sl@0
   212
            seq_end = detail::make_dynamic_xpression<BidiIter>(detail::true_matcher());
sl@0
   213
            break;
sl@0
   214
sl@0
   215
        case token_negative_lookbehind:
sl@0
   216
            negative = true; // fall-through
sl@0
   217
        case token_positive_lookbehind:
sl@0
   218
            lookbehind = true;
sl@0
   219
            seq_end = detail::make_dynamic_xpression<BidiIter>(detail::true_matcher());
sl@0
   220
            break;
sl@0
   221
sl@0
   222
        case token_independent_sub_expression:
sl@0
   223
            keeper = true;
sl@0
   224
            seq_end = detail::make_dynamic_xpression<BidiIter>(detail::true_matcher());
sl@0
   225
            break;
sl@0
   226
sl@0
   227
        case token_comment:
sl@0
   228
            while(detail::ensure(begin != end, error_paren, "mismatched parenthesis"))
sl@0
   229
            {
sl@0
   230
                switch(this->traits_.get_token(begin, end))
sl@0
   231
                {
sl@0
   232
                case token_group_end: return this->parse_atom(begin, end);
sl@0
   233
                case token_escape: detail::ensure(begin != end, error_escape, "incomplete escape sequence");
sl@0
   234
                case token_literal: ++begin;
sl@0
   235
                default:;
sl@0
   236
                }
sl@0
   237
            }
sl@0
   238
            break;
sl@0
   239
sl@0
   240
        default:
sl@0
   241
            mark_nbr = static_cast<int>(++this->mark_count_);
sl@0
   242
            seq = detail::make_dynamic_xpression<BidiIter>(detail::mark_begin_matcher(mark_nbr));
sl@0
   243
            seq_end = detail::make_dynamic_xpression<BidiIter>(detail::mark_end_matcher(mark_nbr));
sl@0
   244
            break;
sl@0
   245
        }
sl@0
   246
sl@0
   247
        // alternates
sl@0
   248
        alternates_list alternates;
sl@0
   249
        this->parse_alternates(begin, end, alternates);
sl@0
   250
        detail::ensure
sl@0
   251
        (
sl@0
   252
            begin != end && token_group_end == this->traits_.get_token(begin, end)
sl@0
   253
          , error_paren
sl@0
   254
          , "mismatched parenthesis"
sl@0
   255
        );
sl@0
   256
sl@0
   257
        seq += detail::alternates_to_matchable(alternates, alternates_factory());
sl@0
   258
        seq += seq_end;
sl@0
   259
sl@0
   260
        typedef shared_ptr<detail::matchable<BidiIter> const> xpr_type;
sl@0
   261
        bool do_save = (this->mark_count_ != old_mark_count);
sl@0
   262
sl@0
   263
        if(lookahead)
sl@0
   264
        {
sl@0
   265
            detail::lookahead_matcher<xpr_type> lookahead(seq.first, negative, do_save);
sl@0
   266
            seq = detail::make_dynamic_xpression<BidiIter>(lookahead);
sl@0
   267
        }
sl@0
   268
        else if(lookbehind)
sl@0
   269
        {
sl@0
   270
            detail::lookbehind_matcher<xpr_type> lookbehind(seq.first, negative, do_save);
sl@0
   271
            seq = detail::make_dynamic_xpression<BidiIter>(lookbehind);
sl@0
   272
        }
sl@0
   273
        else if(keeper) // independent sub-expression
sl@0
   274
        {
sl@0
   275
            detail::keeper_matcher<xpr_type> keeper(seq.first, do_save);
sl@0
   276
            seq = detail::make_dynamic_xpression<BidiIter>(keeper);
sl@0
   277
        }
sl@0
   278
sl@0
   279
        // restore the modifiers
sl@0
   280
        this->traits_.flags(old_flags);
sl@0
   281
        return seq;
sl@0
   282
    }
sl@0
   283
sl@0
   284
    ///////////////////////////////////////////////////////////////////////////
sl@0
   285
    // parse_charset
sl@0
   286
    /// INTERNAL ONLY
sl@0
   287
    detail::sequence<BidiIter> parse_charset(string_iterator &begin, string_iterator end)
sl@0
   288
    {
sl@0
   289
        detail::compound_charset<traits_type> chset;
sl@0
   290
sl@0
   291
        // call out to a helper to actually parse the character set
sl@0
   292
        detail::parse_charset(begin, end, chset, this->traits_);
sl@0
   293
sl@0
   294
        return detail::make_charset_xpression<BidiIter>
sl@0
   295
        (
sl@0
   296
            chset
sl@0
   297
          , this->rxtraits()
sl@0
   298
          , this->traits_.flags()
sl@0
   299
        );
sl@0
   300
    }
sl@0
   301
sl@0
   302
    ///////////////////////////////////////////////////////////////////////////
sl@0
   303
    // parse_atom
sl@0
   304
    /// INTERNAL ONLY
sl@0
   305
    detail::sequence<BidiIter> parse_atom(string_iterator &begin, string_iterator end)
sl@0
   306
    {
sl@0
   307
        using namespace regex_constants;
sl@0
   308
        escape_value esc = { 0, 0, 0, detail::escape_char };
sl@0
   309
        string_iterator old_begin = begin;
sl@0
   310
sl@0
   311
        switch(this->traits_.get_token(begin, end))
sl@0
   312
        {
sl@0
   313
        case token_literal:
sl@0
   314
            return detail::make_literal_xpression<BidiIter>
sl@0
   315
            (
sl@0
   316
                this->parse_literal(begin, end), this->traits_.flags(), this->rxtraits()
sl@0
   317
            );
sl@0
   318
sl@0
   319
        case token_any:
sl@0
   320
            return detail::make_any_xpression<BidiIter>(this->traits_.flags(), this->rxtraits());
sl@0
   321
sl@0
   322
        case token_assert_begin_sequence:
sl@0
   323
            return detail::make_dynamic_xpression<BidiIter>(detail::assert_bos_matcher());
sl@0
   324
sl@0
   325
        case token_assert_end_sequence:
sl@0
   326
            return detail::make_dynamic_xpression<BidiIter>(detail::assert_eos_matcher());
sl@0
   327
sl@0
   328
        case token_assert_begin_line:
sl@0
   329
            return detail::make_assert_begin_line<BidiIter>(this->traits_.flags(), this->rxtraits());
sl@0
   330
sl@0
   331
        case token_assert_end_line:
sl@0
   332
            return detail::make_assert_end_line<BidiIter>(this->traits_.flags(), this->rxtraits());
sl@0
   333
sl@0
   334
        case token_assert_word_boundary:
sl@0
   335
            return detail::make_assert_word<BidiIter>(detail::word_boundary<true>(), this->rxtraits());
sl@0
   336
sl@0
   337
        case token_assert_not_word_boundary:
sl@0
   338
            return detail::make_assert_word<BidiIter>(detail::word_boundary<false>(), this->rxtraits());
sl@0
   339
sl@0
   340
        case token_assert_word_begin:
sl@0
   341
            return detail::make_assert_word<BidiIter>(detail::word_begin(), this->rxtraits());
sl@0
   342
sl@0
   343
        case token_assert_word_end:
sl@0
   344
            return detail::make_assert_word<BidiIter>(detail::word_end(), this->rxtraits());
sl@0
   345
sl@0
   346
        case token_escape:
sl@0
   347
            esc = this->parse_escape(begin, end);
sl@0
   348
            switch(esc.type_)
sl@0
   349
            {
sl@0
   350
            case detail::escape_mark:
sl@0
   351
                return detail::make_backref_xpression<BidiIter>
sl@0
   352
                (
sl@0
   353
                    esc.mark_nbr_, this->traits_.flags(), this->rxtraits()
sl@0
   354
                );
sl@0
   355
            case detail::escape_char:
sl@0
   356
                return detail::make_char_xpression<BidiIter>
sl@0
   357
                (
sl@0
   358
                    esc.ch_, this->traits_.flags(), this->rxtraits()
sl@0
   359
                );
sl@0
   360
            case detail::escape_class:
sl@0
   361
                return detail::make_posix_charset_xpression<BidiIter>
sl@0
   362
                (
sl@0
   363
                    esc.class_
sl@0
   364
                  , this->rxtraits().isctype(*begin++, this->upper_)
sl@0
   365
                  , this->traits_.flags()
sl@0
   366
                  , this->rxtraits()
sl@0
   367
                );
sl@0
   368
            }
sl@0
   369
sl@0
   370
        case token_group_begin:
sl@0
   371
            return this->parse_group(begin, end);
sl@0
   372
sl@0
   373
        case token_charset_begin:
sl@0
   374
            return this->parse_charset(begin, end);
sl@0
   375
sl@0
   376
        case token_invalid_quantifier:
sl@0
   377
            throw regex_error(error_badrepeat, "quantifier not expected");
sl@0
   378
sl@0
   379
        case token_quote_meta_begin:
sl@0
   380
            return detail::make_literal_xpression<BidiIter>
sl@0
   381
            (
sl@0
   382
                this->parse_quote_meta(begin, end), this->traits_.flags(), this->rxtraits()
sl@0
   383
            );
sl@0
   384
sl@0
   385
        case token_quote_meta_end:
sl@0
   386
            throw regex_error
sl@0
   387
            (
sl@0
   388
                error_escape
sl@0
   389
              , "found quote-meta end without corresponding quote-meta begin"
sl@0
   390
            );
sl@0
   391
sl@0
   392
        case token_end_of_pattern:
sl@0
   393
            break;
sl@0
   394
sl@0
   395
        default:
sl@0
   396
            begin = old_begin;
sl@0
   397
            break;
sl@0
   398
        }
sl@0
   399
sl@0
   400
        return detail::sequence<BidiIter>();
sl@0
   401
    }
sl@0
   402
sl@0
   403
    ///////////////////////////////////////////////////////////////////////////
sl@0
   404
    // parse_quant
sl@0
   405
    /// INTERNAL ONLY
sl@0
   406
    detail::sequence<BidiIter> parse_quant(string_iterator &begin, string_iterator end)
sl@0
   407
    {
sl@0
   408
        BOOST_ASSERT(begin != end);
sl@0
   409
        detail::quant_spec spec = { 0, 0, false };
sl@0
   410
        detail::sequence<BidiIter> seq = this->parse_atom(begin, end);
sl@0
   411
sl@0
   412
        // BUGBUG this doesn't handle the degenerate (?:)+ correctly
sl@0
   413
        if(!seq.is_empty() && begin != end && seq.first->is_quantifiable())
sl@0
   414
        {
sl@0
   415
            if(this->traits_.get_quant_spec(begin, end, spec))
sl@0
   416
            {
sl@0
   417
                BOOST_ASSERT(spec.min_ <= spec.max_);
sl@0
   418
sl@0
   419
                if(0 == spec.max_) // quant {0,0} is degenerate -- matches nothing.
sl@0
   420
                {
sl@0
   421
                    seq = this->parse_quant(begin, end);
sl@0
   422
                }
sl@0
   423
                else
sl@0
   424
                {
sl@0
   425
                    seq = seq.first->quantify(spec, this->hidden_mark_count_, seq, alternates_factory());
sl@0
   426
                }
sl@0
   427
            }
sl@0
   428
        }
sl@0
   429
sl@0
   430
        return seq;
sl@0
   431
    }
sl@0
   432
sl@0
   433
    ///////////////////////////////////////////////////////////////////////////
sl@0
   434
    // parse_sequence
sl@0
   435
    /// INTERNAL ONLY
sl@0
   436
    detail::sequence<BidiIter> parse_sequence(string_iterator &begin, string_iterator end)
sl@0
   437
    {
sl@0
   438
        detail::sequence<BidiIter> seq;
sl@0
   439
sl@0
   440
        while(begin != end)
sl@0
   441
        {
sl@0
   442
            detail::sequence<BidiIter> seq_quant = this->parse_quant(begin, end);
sl@0
   443
sl@0
   444
            // did we find a quantified atom?
sl@0
   445
            if(seq_quant.is_empty())
sl@0
   446
                break;
sl@0
   447
sl@0
   448
            // chain it to the end of the xpression sequence
sl@0
   449
            seq += seq_quant;
sl@0
   450
        }
sl@0
   451
sl@0
   452
        return seq;
sl@0
   453
    }
sl@0
   454
sl@0
   455
    ///////////////////////////////////////////////////////////////////////////
sl@0
   456
    // parse_literal
sl@0
   457
    //  scan ahead looking for char literals to be globbed together into a string literal
sl@0
   458
    /// INTERNAL ONLY
sl@0
   459
    string_type parse_literal(string_iterator &begin, string_iterator end)
sl@0
   460
    {
sl@0
   461
        using namespace regex_constants;
sl@0
   462
        BOOST_ASSERT(begin != end);
sl@0
   463
        BOOST_ASSERT(token_literal == this->traits_.get_token(begin, end));
sl@0
   464
        escape_value esc = { 0, 0, 0, detail::escape_char };
sl@0
   465
        string_type literal(1, *begin);
sl@0
   466
sl@0
   467
        for(string_iterator prev = begin, tmp = ++begin; begin != end; prev = begin, begin = tmp)
sl@0
   468
        {
sl@0
   469
            detail::quant_spec spec;
sl@0
   470
            if(this->traits_.get_quant_spec(tmp, end, spec))
sl@0
   471
            {
sl@0
   472
                if(literal.size() != 1)
sl@0
   473
                {
sl@0
   474
                    begin = prev;
sl@0
   475
                    literal.erase(literal.size() - 1);
sl@0
   476
                }
sl@0
   477
                return literal;
sl@0
   478
            }
sl@0
   479
            else switch(this->traits_.get_token(tmp, end))
sl@0
   480
            {
sl@0
   481
            case token_escape:
sl@0
   482
                esc = this->parse_escape(tmp, end);
sl@0
   483
                if(detail::escape_char != esc.type_) return literal;
sl@0
   484
                literal += esc.ch_;
sl@0
   485
                break;
sl@0
   486
            case token_literal:
sl@0
   487
                literal += *tmp++;
sl@0
   488
                break;
sl@0
   489
            default:
sl@0
   490
                return literal;
sl@0
   491
            }
sl@0
   492
        }
sl@0
   493
sl@0
   494
        return literal;
sl@0
   495
    }
sl@0
   496
sl@0
   497
    ///////////////////////////////////////////////////////////////////////////
sl@0
   498
    // parse_quote_meta
sl@0
   499
    //  scan ahead looking for char literals to be globbed together into a string literal
sl@0
   500
    /// INTERNAL ONLY
sl@0
   501
    string_type parse_quote_meta(string_iterator &begin, string_iterator end)
sl@0
   502
    {
sl@0
   503
        using namespace regex_constants;
sl@0
   504
        string_iterator old_begin = begin, old_end;
sl@0
   505
        while(end != (old_end = begin))
sl@0
   506
        {
sl@0
   507
            switch(this->traits_.get_token(begin, end))
sl@0
   508
            {
sl@0
   509
            case token_quote_meta_end: return string_type(old_begin, old_end);
sl@0
   510
            case token_escape: detail::ensure(begin != end, error_escape, "incomplete escape sequence");
sl@0
   511
            case token_literal: ++begin;
sl@0
   512
            default:;
sl@0
   513
            }
sl@0
   514
        }
sl@0
   515
        return string_type(old_begin, begin);
sl@0
   516
    }
sl@0
   517
sl@0
   518
    ///////////////////////////////////////////////////////////////////////////////
sl@0
   519
    // parse_escape
sl@0
   520
    /// INTERNAL ONLY
sl@0
   521
    escape_value parse_escape(string_iterator &begin, string_iterator end)
sl@0
   522
    {
sl@0
   523
        detail::ensure(begin != end, regex_constants::error_escape, "incomplete escape sequence");
sl@0
   524
sl@0
   525
        // first, check to see if this can be a backreference
sl@0
   526
        if(0 < this->rxtraits().value(*begin, 10))
sl@0
   527
        {
sl@0
   528
            // Parse at most 3 decimal digits.
sl@0
   529
            string_iterator tmp = begin;
sl@0
   530
            int mark_nbr = detail::toi(tmp, end, this->rxtraits(), 10, 999);
sl@0
   531
sl@0
   532
            // If the resulting number could conceivably be a backref, then it is.
sl@0
   533
            if(10 > mark_nbr || mark_nbr <= static_cast<int>(this->mark_count_))
sl@0
   534
            {
sl@0
   535
                begin = tmp;
sl@0
   536
                escape_value esc = {0, mark_nbr, 0, detail::escape_mark};
sl@0
   537
                return esc;
sl@0
   538
            }
sl@0
   539
        }
sl@0
   540
sl@0
   541
        // Not a backreference, defer to the parse_escape helper
sl@0
   542
        return detail::parse_escape(begin, end, this->traits_);
sl@0
   543
    }
sl@0
   544
sl@0
   545
    std::size_t mark_count_;
sl@0
   546
    std::size_t hidden_mark_count_;
sl@0
   547
    CompilerTraits traits_;
sl@0
   548
    typename RegexTraits::char_class_type upper_;
sl@0
   549
};
sl@0
   550
sl@0
   551
}} // namespace boost::xpressive
sl@0
   552
sl@0
   553
#endif