os/ossrv/ossrv_pub/boost_apis/boost/regex/icu.hpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2  *
     3  * Copyright (c) 2004
     4  * John Maddock
     5  *
     6  * Use, modification and distribution are subject to the 
     7  * Boost Software License, Version 1.0. (See accompanying file 
     8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
     9  *
    10  */
    11 
    12  /*
    13   *   LOCATION:    see http://www.boost.org for most recent version.
    14   *   FILE         icu.hpp
    15   *   VERSION      see <boost/version.hpp>
    16   *   DESCRIPTION: Unicode regular expressions on top of the ICU Library.
    17   */
    18 
    19 #ifndef BOOST_REGEX_ICU_HPP
    20 #define BOOST_REGEX_ICU_HPP
    21 
    22 #include <unicode/utypes.h>
    23 #include <unicode/uchar.h>
    24 #include <unicode/coll.h>
    25 #include <boost/regex.hpp>
    26 #include <boost/regex/pending/unicode_iterator.hpp>
    27 #include <boost/mpl/int_fwd.hpp>
    28 #include <bitset>
    29 
    30 
    31 namespace boost{
    32 
    33 namespace re_detail{
    34 
    35 // 
    36 // Implementation details:
    37 //
    38 class BOOST_REGEX_DECL icu_regex_traits_implementation
    39 {
    40    typedef UChar32                      char_type;
    41    typedef std::size_t                  size_type;
    42    typedef std::vector<char_type>       string_type;
    43    typedef U_NAMESPACE_QUALIFIER Locale locale_type;
    44    typedef boost::uint_least32_t        char_class_type;
    45 public:
    46    icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& l)
    47       : m_locale(l)
    48    {
    49       UErrorCode success = U_ZERO_ERROR;
    50       m_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success));
    51       if(U_SUCCESS(success) == 0)
    52          init_error();
    53       m_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::IDENTICAL);
    54       success = U_ZERO_ERROR;
    55       m_primary_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success));
    56       if(U_SUCCESS(success) == 0)
    57          init_error();
    58       m_primary_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::PRIMARY);
    59    }
    60    U_NAMESPACE_QUALIFIER Locale getloc()const
    61    {
    62       return m_locale;
    63    }
    64    string_type do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const;
    65    string_type transform(const char_type* p1, const char_type* p2) const
    66    {
    67       return do_transform(p1, p2, m_collator.get());
    68    }
    69    string_type transform_primary(const char_type* p1, const char_type* p2) const
    70    {
    71       return do_transform(p1, p2, m_primary_collator.get());
    72    }
    73 private:
    74    void init_error()
    75    {
    76       std::runtime_error e("Could not initialize ICU resources");
    77       boost::throw_exception(e);
    78    }
    79    U_NAMESPACE_QUALIFIER Locale m_locale;                                  // The ICU locale that we're using
    80    boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_collator;          // The full collation object
    81    boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_primary_collator;  // The primary collation object
    82 };
    83 
    84 inline boost::shared_ptr<icu_regex_traits_implementation> get_icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& loc)
    85 {
    86    return boost::shared_ptr<icu_regex_traits_implementation>(new icu_regex_traits_implementation(loc));
    87 }
    88 
    89 }
    90 
    91 class BOOST_REGEX_DECL icu_regex_traits
    92 {
    93 public:
    94    typedef UChar32                      char_type;
    95    typedef std::size_t                  size_type;
    96    typedef std::vector<char_type>       string_type;
    97    typedef U_NAMESPACE_QUALIFIER Locale locale_type;
    98 #ifdef BOOST_NO_INT64_T
    99    typedef std::bitset<64>              char_class_type;
   100 #else
   101    typedef boost::uint64_t              char_class_type;
   102 #endif
   103 
   104    struct boost_extensions_tag{};
   105 
   106    icu_regex_traits()
   107       : m_pimpl(re_detail::get_icu_regex_traits_implementation(U_NAMESPACE_QUALIFIER Locale()))
   108    {
   109    }
   110    static size_type length(const char_type* p);
   111 
   112    ::boost::regex_constants::syntax_type syntax_type(char_type c)const
   113    {
   114       return ((c < 0x7f) && (c > 0)) ? re_detail::get_default_syntax_type(static_cast<char>(c)) : regex_constants::syntax_char;
   115    }
   116    ::boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c) const
   117    {
   118       return ((c < 0x7f) && (c > 0)) ? re_detail::get_default_escape_syntax_type(static_cast<char>(c)) : regex_constants::syntax_char;
   119    }
   120    char_type translate(char_type c) const
   121    {
   122       return c;
   123    }
   124    char_type translate_nocase(char_type c) const
   125    {
   126       return ::u_tolower(c);
   127    }
   128    char_type translate(char_type c, bool icase) const
   129    {
   130       return icase ? translate_nocase(c) : translate(c);
   131    }
   132    char_type tolower(char_type c) const
   133    {
   134       return ::u_tolower(c);
   135    }
   136    char_type toupper(char_type c) const
   137    {
   138       return ::u_toupper(c);
   139    }
   140    string_type transform(const char_type* p1, const char_type* p2) const
   141    {
   142       return m_pimpl->transform(p1, p2);
   143    }
   144    string_type transform_primary(const char_type* p1, const char_type* p2) const
   145    {
   146       return m_pimpl->transform_primary(p1, p2);
   147    }
   148    char_class_type lookup_classname(const char_type* p1, const char_type* p2) const;
   149    string_type lookup_collatename(const char_type* p1, const char_type* p2) const;
   150    bool isctype(char_type c, char_class_type f) const;
   151    int toi(const char_type*& p1, const char_type* p2, int radix)const
   152    {
   153       return re_detail::global_toi(p1, p2, radix, *this);
   154    }
   155    int value(char_type c, int radix)const
   156    {
   157       return u_digit(c, static_cast< ::int8_t>(radix));
   158    }
   159    locale_type imbue(locale_type l)
   160    {
   161       locale_type result(m_pimpl->getloc());
   162       m_pimpl = re_detail::get_icu_regex_traits_implementation(l);
   163       return result;
   164    }
   165    locale_type getloc()const
   166    {
   167       return locale_type();
   168    }
   169    std::string error_string(::boost::regex_constants::error_type n) const
   170    {
   171       return re_detail::get_default_error_string(n);
   172    }
   173 private:
   174    icu_regex_traits(const icu_regex_traits&);
   175    icu_regex_traits& operator=(const icu_regex_traits&);
   176 
   177    //
   178    // define the bitmasks offsets we need for additional character properties:
   179    //
   180    enum{
   181       offset_blank = U_CHAR_CATEGORY_COUNT,
   182       offset_space = U_CHAR_CATEGORY_COUNT+1,
   183       offset_xdigit = U_CHAR_CATEGORY_COUNT+2,
   184       offset_underscore = U_CHAR_CATEGORY_COUNT+3,
   185       offset_unicode = U_CHAR_CATEGORY_COUNT+4,
   186       offset_any = U_CHAR_CATEGORY_COUNT+5,
   187       offset_ascii = U_CHAR_CATEGORY_COUNT+6
   188    };
   189 
   190    //
   191    // and now the masks:
   192    //
   193    static const char_class_type mask_blank;
   194    static const char_class_type mask_space;
   195    static const char_class_type mask_xdigit;
   196    static const char_class_type mask_underscore;
   197    static const char_class_type mask_unicode;
   198    static const char_class_type mask_any;
   199    static const char_class_type mask_ascii;
   200 
   201    static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2);
   202 
   203    boost::shared_ptr< ::boost::re_detail::icu_regex_traits_implementation> m_pimpl;
   204 };
   205 
   206 } // namespace boost
   207 
   208 //
   209 // template instances:
   210 //
   211 #define BOOST_REGEX_CHAR_T UChar32
   212 #undef BOOST_REGEX_TRAITS_T
   213 #define BOOST_REGEX_TRAITS_T , icu_regex_traits
   214 #define BOOST_REGEX_ICU_INSTANCES
   215 #ifdef BOOST_REGEX_ICU_INSTANTIATE
   216 #  define BOOST_REGEX_INSTANTIATE
   217 #endif
   218 #include <boost/regex/v4/instances.hpp>
   219 #undef BOOST_REGEX_CHAR_T
   220 #undef BOOST_REGEX_TRAITS_T
   221 #undef BOOST_REGEX_ICU_INSTANCES
   222 #ifdef BOOST_REGEX_INSTANTIATE
   223 #  undef BOOST_REGEX_INSTANTIATE
   224 #endif
   225 
   226 namespace boost{
   227 
   228 // types:
   229 typedef basic_regex< ::UChar32, icu_regex_traits> u32regex;
   230 typedef match_results<const ::UChar32*> u32match;
   231 typedef match_results<const ::UChar*> u16match;
   232 
   233 //
   234 // Construction of 32-bit regex types from UTF-8 and UTF-16 primitives:
   235 //
   236 namespace re_detail{
   237 
   238 #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__)
   239 template <class InputIterator>
   240 inline u32regex do_make_u32regex(InputIterator i, 
   241                               InputIterator j, 
   242                               boost::regex_constants::syntax_option_type opt, 
   243                               const boost::mpl::int_<1>*)
   244 {
   245    typedef boost::u8_to_u32_iterator<InputIterator, UChar32> conv_type;
   246    return u32regex(conv_type(i), conv_type(j), opt);
   247 }
   248 
   249 template <class InputIterator>
   250 inline u32regex do_make_u32regex(InputIterator i, 
   251                               InputIterator j, 
   252                               boost::regex_constants::syntax_option_type opt, 
   253                               const boost::mpl::int_<2>*)
   254 {
   255    typedef boost::u16_to_u32_iterator<InputIterator, UChar32> conv_type;
   256    return u32regex(conv_type(i), conv_type(j), opt);
   257 }
   258 
   259 template <class InputIterator>
   260 inline u32regex do_make_u32regex(InputIterator i, 
   261                               InputIterator j, 
   262                               boost::regex_constants::syntax_option_type opt, 
   263                               const boost::mpl::int_<4>*)
   264 {
   265    return u32regex(i, j, opt);
   266 }
   267 #else
   268 template <class InputIterator>
   269 inline u32regex do_make_u32regex(InputIterator i, 
   270                               InputIterator j, 
   271                               boost::regex_constants::syntax_option_type opt, 
   272                               const boost::mpl::int_<1>*)
   273 {
   274    typedef boost::u8_to_u32_iterator<InputIterator, UChar32> conv_type;
   275    typedef std::vector<UChar32> vector_type;
   276    vector_type v;
   277    conv_type a(i), b(j);
   278    while(a != b)
   279    {
   280       v.push_back(*a);
   281       ++a;
   282    }
   283    if(v.size())
   284       return u32regex(&*v.begin(), v.size(), opt);
   285    return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
   286 }
   287 
   288 template <class InputIterator>
   289 inline u32regex do_make_u32regex(InputIterator i, 
   290                               InputIterator j, 
   291                               boost::regex_constants::syntax_option_type opt, 
   292                               const boost::mpl::int_<2>*)
   293 {
   294    typedef boost::u16_to_u32_iterator<InputIterator, UChar32> conv_type;
   295    typedef std::vector<UChar32> vector_type;
   296    vector_type v;
   297    conv_type a(i), b(j);
   298    while(a != b)
   299    {
   300       v.push_back(*a);
   301       ++a;
   302    }
   303    if(v.size())
   304       return u32regex(&*v.begin(), v.size(), opt);
   305    return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
   306 }
   307 
   308 template <class InputIterator>
   309 inline u32regex do_make_u32regex(InputIterator i, 
   310                               InputIterator j, 
   311                               boost::regex_constants::syntax_option_type opt, 
   312                               const boost::mpl::int_<4>*)
   313 {
   314    typedef std::vector<UCHAR32> vector_type;
   315    vector_type v;
   316    while(i != j)
   317    {
   318       v.push_back((UCHAR32)(*i));
   319       ++a;
   320    }
   321    if(v.size())
   322       return u32regex(&*v.begin(), v.size(), opt);
   323    return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
   324 }
   325 #endif
   326 }
   327 
   328 //
   329 // Construction from an iterator pair:
   330 //
   331 template <class InputIterator>
   332 inline u32regex make_u32regex(InputIterator i, 
   333                               InputIterator j, 
   334                               boost::regex_constants::syntax_option_type opt)
   335 {
   336    return re_detail::do_make_u32regex(i, j, opt, static_cast<boost::mpl::int_<sizeof(*i)> const*>(0));
   337 }
   338 //
   339 // construction from UTF-8 nul-terminated strings:
   340 //
   341 inline u32regex make_u32regex(const char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
   342 {
   343    return re_detail::do_make_u32regex(p, p + std::strlen(p), opt, static_cast<boost::mpl::int_<1> const*>(0));
   344 }
   345 inline u32regex make_u32regex(const unsigned char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
   346 {
   347    return re_detail::do_make_u32regex(p, p + std::strlen(reinterpret_cast<const char*>(p)), opt, static_cast<boost::mpl::int_<1> const*>(0));
   348 }
   349 //
   350 // construction from UTF-16 nul-terminated strings:
   351 //
   352 #ifndef BOOST_NO_WREGEX
   353 inline u32regex make_u32regex(const wchar_t* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
   354 {
   355    return re_detail::do_make_u32regex(p, p + std::wcslen(p), opt, static_cast<boost::mpl::int_<sizeof(wchar_t)> const*>(0));
   356 }
   357 #endif
   358 #ifndef U_WCHAR_IS_UTF16
   359 inline u32regex make_u32regex(const UChar* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
   360 {
   361    return re_detail::do_make_u32regex(p, p + u_strlen(p), opt, static_cast<boost::mpl::int_<2> const*>(0));
   362 }
   363 #endif
   364 //
   365 // construction from basic_string class-template:
   366 //
   367 template<class C, class T, class A>
   368 inline u32regex make_u32regex(const std::basic_string<C, T, A>& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
   369 {
   370    return re_detail::do_make_u32regex(s.begin(), s.end(), opt, static_cast<boost::mpl::int_<sizeof(C)> const*>(0));
   371 }
   372 //
   373 // Construction from ICU string type:
   374 //
   375 inline u32regex make_u32regex(const UnicodeString& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
   376 {
   377    return re_detail::do_make_u32regex(s.getBuffer(), s.getBuffer() + s.length(), opt, static_cast<boost::mpl::int_<2> const*>(0));
   378 }
   379 
   380 //
   381 // regex_match overloads that widen the character type as appropriate:
   382 //
   383 namespace re_detail{
   384 template<class MR1, class MR2>
   385 void copy_results(MR1& out, MR2 const& in)
   386 {
   387    // copy results from an adapted MR2 match_results:
   388    out.set_size(in.size(), in.prefix().first.base(), in.suffix().second.base());
   389    out.set_base(in.base().base());
   390    for(int i = 0; i < (int)in.size(); ++i)
   391    {
   392       if(in[i].matched)
   393       {
   394          out.set_first(in[i].first.base(), i);
   395          out.set_second(in[i].second.base(), i);
   396       }
   397    }
   398 }
   399 
   400 template <class BidiIterator, class Allocator>
   401 inline bool do_regex_match(BidiIterator first, BidiIterator last, 
   402                  match_results<BidiIterator, Allocator>& m, 
   403                  const u32regex& e, 
   404                  match_flag_type flags,
   405                  boost::mpl::int_<4> const*)
   406 {
   407    return ::boost::regex_match(first, last, m, e, flags);
   408 }
   409 template <class BidiIterator, class Allocator>
   410 bool do_regex_match(BidiIterator first, BidiIterator last, 
   411                  match_results<BidiIterator, Allocator>& m, 
   412                  const u32regex& e, 
   413                  match_flag_type flags,
   414                  boost::mpl::int_<2> const*)
   415 {
   416    typedef u16_to_u32_iterator<BidiIterator, UChar32> conv_type;
   417    typedef match_results<conv_type>                   match_type;
   418    typedef typename match_type::allocator_type        alloc_type;
   419    match_type what;
   420    bool result = ::boost::regex_match(conv_type(first), conv_type(last), what, e, flags);
   421    // copy results across to m:
   422    if(result) copy_results(m, what);
   423    return result;
   424 }
   425 template <class BidiIterator, class Allocator>
   426 bool do_regex_match(BidiIterator first, BidiIterator last, 
   427                  match_results<BidiIterator, Allocator>& m, 
   428                  const u32regex& e, 
   429                  match_flag_type flags,
   430                  boost::mpl::int_<1> const*)
   431 {
   432    typedef u8_to_u32_iterator<BidiIterator, UChar32>  conv_type;
   433    typedef match_results<conv_type>                   match_type;
   434    typedef typename match_type::allocator_type        alloc_type;
   435    match_type what;
   436    bool result = ::boost::regex_match(conv_type(first), conv_type(last), what, e, flags);
   437    // copy results across to m:
   438    if(result) copy_results(m, what);
   439    return result;
   440 }
   441 } // namespace re_detail
   442 
   443 template <class BidiIterator, class Allocator>
   444 inline bool u32regex_match(BidiIterator first, BidiIterator last, 
   445                  match_results<BidiIterator, Allocator>& m, 
   446                  const u32regex& e, 
   447                  match_flag_type flags = match_default)
   448 {
   449    return re_detail::do_regex_match(first, last, m, e, flags, static_cast<mpl::int_<sizeof(*first)> const*>(0));
   450 }
   451 inline bool u32regex_match(const UChar* p, 
   452                  match_results<const UChar*>& m, 
   453                  const u32regex& e, 
   454                  match_flag_type flags = match_default)
   455 {
   456    return re_detail::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast<mpl::int_<2> const*>(0));
   457 }
   458 #if !defined(U_WCHAR_IS_UTF16) && !defined(BOOST_NO_WREGEX)
   459 inline bool u32regex_match(const wchar_t* p, 
   460                  match_results<const wchar_t*>& m, 
   461                  const u32regex& e, 
   462                  match_flag_type flags = match_default)
   463 {
   464    return re_detail::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
   465 }
   466 #endif
   467 inline bool u32regex_match(const char* p, 
   468                  match_results<const char*>& m, 
   469                  const u32regex& e, 
   470                  match_flag_type flags = match_default)
   471 {
   472    return re_detail::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
   473 }
   474 inline bool u32regex_match(const unsigned char* p, 
   475                  match_results<const unsigned char*>& m, 
   476                  const u32regex& e, 
   477                  match_flag_type flags = match_default)
   478 {
   479    return re_detail::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
   480 }
   481 inline bool u32regex_match(const std::string& s, 
   482                         match_results<std::string::const_iterator>& m, 
   483                         const u32regex& e, 
   484                         match_flag_type flags = match_default)
   485 {
   486    return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<1> const*>(0));
   487 }
   488 #ifndef BOOST_NO_STD_WSTRING
   489 inline bool u32regex_match(const std::wstring& s, 
   490                         match_results<std::wstring::const_iterator>& m, 
   491                         const u32regex& e, 
   492                         match_flag_type flags = match_default)
   493 {
   494    return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
   495 }
   496 #endif
   497 inline bool u32regex_match(const UnicodeString& s, 
   498                         match_results<const UChar*>& m, 
   499                         const u32regex& e, 
   500                         match_flag_type flags = match_default)
   501 {
   502    return re_detail::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
   503 }
   504 //
   505 // regex_match overloads that do not return what matched:
   506 //
   507 template <class BidiIterator>
   508 inline bool u32regex_match(BidiIterator first, BidiIterator last, 
   509                  const u32regex& e, 
   510                  match_flag_type flags = match_default)
   511 {
   512    match_results<BidiIterator> m;
   513    return re_detail::do_regex_match(first, last, m, e, flags, static_cast<mpl::int_<sizeof(*first)> const*>(0));
   514 }
   515 inline bool u32regex_match(const UChar* p, 
   516                  const u32regex& e, 
   517                  match_flag_type flags = match_default)
   518 {
   519    match_results<const UChar*> m;
   520    return re_detail::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast<mpl::int_<2> const*>(0));
   521 }
   522 #if !defined(U_WCHAR_IS_UTF16) && !defined(BOOST_NO_WREGEX)
   523 inline bool u32regex_match(const wchar_t* p, 
   524                  const u32regex& e, 
   525                  match_flag_type flags = match_default)
   526 {
   527    match_results<const wchar_t*> m;
   528    return re_detail::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
   529 }
   530 #endif
   531 inline bool u32regex_match(const char* p, 
   532                  const u32regex& e, 
   533                  match_flag_type flags = match_default)
   534 {
   535    match_results<const char*> m;
   536    return re_detail::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
   537 }
   538 inline bool u32regex_match(const unsigned char* p, 
   539                  const u32regex& e, 
   540                  match_flag_type flags = match_default)
   541 {
   542    match_results<const unsigned char*> m;
   543    return re_detail::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
   544 }
   545 inline bool u32regex_match(const std::string& s, 
   546                         const u32regex& e, 
   547                         match_flag_type flags = match_default)
   548 {
   549    match_results<std::string::const_iterator> m;
   550    return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<1> const*>(0));
   551 }
   552 #ifndef BOOST_NO_STD_WSTRING
   553 inline bool u32regex_match(const std::wstring& s, 
   554                         const u32regex& e, 
   555                         match_flag_type flags = match_default)
   556 {
   557    match_results<std::wstring::const_iterator> m;
   558    return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
   559 }
   560 #endif
   561 inline bool u32regex_match(const UnicodeString& s, 
   562                         const u32regex& e, 
   563                         match_flag_type flags = match_default)
   564 {
   565    match_results<const UChar*> m;
   566    return re_detail::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
   567 }
   568 
   569 //
   570 // regex_search overloads that widen the character type as appropriate:
   571 //
   572 namespace re_detail{
   573 template <class BidiIterator, class Allocator>
   574 inline bool do_regex_search(BidiIterator first, BidiIterator last, 
   575                  match_results<BidiIterator, Allocator>& m, 
   576                  const u32regex& e, 
   577                  match_flag_type flags,
   578                  BidiIterator base,
   579                  boost::mpl::int_<4> const*)
   580 {
   581    return ::boost::regex_search(first, last, m, e, flags, base);
   582 }
   583 template <class BidiIterator, class Allocator>
   584 bool do_regex_search(BidiIterator first, BidiIterator last, 
   585                  match_results<BidiIterator, Allocator>& m, 
   586                  const u32regex& e, 
   587                  match_flag_type flags,
   588                  BidiIterator base,
   589                  boost::mpl::int_<2> const*)
   590 {
   591    typedef u16_to_u32_iterator<BidiIterator, UChar32> conv_type;
   592    typedef match_results<conv_type>                   match_type;
   593    typedef typename match_type::allocator_type        alloc_type;
   594    match_type what;
   595    bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base));
   596    // copy results across to m:
   597    if(result) copy_results(m, what);
   598    return result;
   599 }
   600 template <class BidiIterator, class Allocator>
   601 bool do_regex_search(BidiIterator first, BidiIterator last, 
   602                  match_results<BidiIterator, Allocator>& m, 
   603                  const u32regex& e, 
   604                  match_flag_type flags,
   605                  BidiIterator base,
   606                  boost::mpl::int_<1> const*)
   607 {
   608    typedef u8_to_u32_iterator<BidiIterator, UChar32>  conv_type;
   609    typedef match_results<conv_type>                   match_type;
   610    typedef typename match_type::allocator_type        alloc_type;
   611    match_type what;
   612    bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base));
   613    // copy results across to m:
   614    if(result) copy_results(m, what);
   615    return result;
   616 }
   617 }
   618 
   619 template <class BidiIterator, class Allocator>
   620 inline bool u32regex_search(BidiIterator first, BidiIterator last, 
   621                  match_results<BidiIterator, Allocator>& m, 
   622                  const u32regex& e, 
   623                  match_flag_type flags = match_default)
   624 {
   625    return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast<mpl::int_<sizeof(*first)> const*>(0));
   626 }
   627 template <class BidiIterator, class Allocator>
   628 inline bool u32regex_search(BidiIterator first, BidiIterator last, 
   629                  match_results<BidiIterator, Allocator>& m, 
   630                  const u32regex& e, 
   631                  match_flag_type flags,
   632                  BidiIterator base)
   633 {
   634    return re_detail::do_regex_search(first, last, m, e, flags, base, static_cast<mpl::int_<sizeof(*first)> const*>(0));
   635 }
   636 inline bool u32regex_search(const UChar* p, 
   637                  match_results<const UChar*>& m, 
   638                  const u32regex& e, 
   639                  match_flag_type flags = match_default)
   640 {
   641    return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast<mpl::int_<2> const*>(0));
   642 }
   643 #if !defined(U_WCHAR_IS_UTF16) && !defined(BOOST_NO_WREGEX)
   644 inline bool u32regex_search(const wchar_t* p, 
   645                  match_results<const wchar_t*>& m, 
   646                  const u32regex& e, 
   647                  match_flag_type flags = match_default)
   648 {
   649    return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
   650 }
   651 #endif
   652 inline bool u32regex_search(const char* p, 
   653                  match_results<const char*>& m, 
   654                  const u32regex& e, 
   655                  match_flag_type flags = match_default)
   656 {
   657    return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
   658 }
   659 inline bool u32regex_search(const unsigned char* p, 
   660                  match_results<const unsigned char*>& m, 
   661                  const u32regex& e, 
   662                  match_flag_type flags = match_default)
   663 {
   664    return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
   665 }
   666 inline bool u32regex_search(const std::string& s, 
   667                         match_results<std::string::const_iterator>& m, 
   668                         const u32regex& e, 
   669                         match_flag_type flags = match_default)
   670 {
   671    return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<1> const*>(0));
   672 }
   673 #ifndef BOOST_NO_STD_WSTRING
   674 inline bool u32regex_search(const std::wstring& s, 
   675                         match_results<std::wstring::const_iterator>& m, 
   676                         const u32regex& e, 
   677                         match_flag_type flags = match_default)
   678 {
   679    return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
   680 }
   681 #endif
   682 inline bool u32regex_search(const UnicodeString& s, 
   683                         match_results<const UChar*>& m, 
   684                         const u32regex& e, 
   685                         match_flag_type flags = match_default)
   686 {
   687    return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
   688 }
   689 template <class BidiIterator>
   690 inline bool u32regex_search(BidiIterator first, BidiIterator last, 
   691                  const u32regex& e, 
   692                  match_flag_type flags = match_default)
   693 {
   694    match_results<BidiIterator> m;
   695    return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast<mpl::int_<sizeof(*first)> const*>(0));
   696 }
   697 inline bool u32regex_search(const UChar* p, 
   698                  const u32regex& e, 
   699                  match_flag_type flags = match_default)
   700 {
   701    match_results<const UChar*> m;
   702    return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast<mpl::int_<2> const*>(0));
   703 }
   704 #if !defined(U_WCHAR_IS_UTF16) && !defined(BOOST_NO_WREGEX)
   705 inline bool u32regex_search(const wchar_t* p, 
   706                  const u32regex& e, 
   707                  match_flag_type flags = match_default)
   708 {
   709    match_results<const wchar_t*> m;
   710    return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
   711 }
   712 #endif
   713 inline bool u32regex_search(const char* p, 
   714                  const u32regex& e, 
   715                  match_flag_type flags = match_default)
   716 {
   717    match_results<const char*> m;
   718    return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
   719 }
   720 inline bool u32regex_search(const unsigned char* p, 
   721                  const u32regex& e, 
   722                  match_flag_type flags = match_default)
   723 {
   724    match_results<const unsigned char*> m;
   725    return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
   726 }
   727 inline bool u32regex_search(const std::string& s, 
   728                         const u32regex& e, 
   729                         match_flag_type flags = match_default)
   730 {
   731    match_results<std::string::const_iterator> m;
   732    return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<1> const*>(0));
   733 }
   734 #ifndef BOOST_NO_STD_WSTRING
   735 inline bool u32regex_search(const std::wstring& s, 
   736                         const u32regex& e, 
   737                         match_flag_type flags = match_default)
   738 {
   739    match_results<std::wstring::const_iterator> m;
   740    return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
   741 }
   742 #endif
   743 inline bool u32regex_search(const UnicodeString& s, 
   744                         const u32regex& e, 
   745                         match_flag_type flags = match_default)
   746 {
   747    match_results<const UChar*> m;
   748    return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
   749 }
   750 
   751 //
   752 // overloads for regex_replace with utf-8 and utf-16 data types:
   753 //
   754 namespace re_detail{
   755 template <class I>
   756 inline std::pair< boost::u8_to_u32_iterator<I>, boost::u8_to_u32_iterator<I> >
   757    make_utf32_seq(I i, I j, mpl::int_<1> const*)
   758 {
   759    return std::pair< boost::u8_to_u32_iterator<I>, boost::u8_to_u32_iterator<I> >(boost::u8_to_u32_iterator<I>(i), boost::u8_to_u32_iterator<I>(j));
   760 }
   761 template <class I>
   762 inline std::pair< boost::u16_to_u32_iterator<I>, boost::u16_to_u32_iterator<I> >
   763    make_utf32_seq(I i, I j, mpl::int_<2> const*)
   764 {
   765    return std::pair< boost::u16_to_u32_iterator<I>, boost::u16_to_u32_iterator<I> >(boost::u16_to_u32_iterator<I>(i), boost::u16_to_u32_iterator<I>(j));
   766 }
   767 template <class I>
   768 inline std::pair< I, I >
   769    make_utf32_seq(I i, I j, mpl::int_<4> const*)
   770 {
   771    return std::pair< I, I >(i, j);
   772 }
   773 template <class charT>
   774 inline std::pair< boost::u8_to_u32_iterator<const charT*>, boost::u8_to_u32_iterator<const charT*> >
   775    make_utf32_seq(const charT* p, mpl::int_<1> const*)
   776 {
   777    return std::pair< boost::u8_to_u32_iterator<const charT*>, boost::u8_to_u32_iterator<const charT*> >(boost::u8_to_u32_iterator<const charT*>(p), boost::u8_to_u32_iterator<const charT*>(p+std::strlen((const char*)p)));
   778 }
   779 template <class charT>
   780 inline std::pair< boost::u16_to_u32_iterator<const charT*>, boost::u16_to_u32_iterator<const charT*> >
   781    make_utf32_seq(const charT* p, mpl::int_<2> const*)
   782 {
   783    return std::pair< boost::u16_to_u32_iterator<const charT*>, boost::u16_to_u32_iterator<const charT*> >(boost::u16_to_u32_iterator<const charT*>(p), boost::u16_to_u32_iterator<const charT*>(p+u_strlen((const UChar*)p)));
   784 }
   785 template <class charT>
   786 inline std::pair< const charT*, const charT* >
   787    make_utf32_seq(const charT* p, mpl::int_<4> const*)
   788 {
   789    return std::pair< const charT*, const charT* >(p, p+icu_regex_traits::length((UChar32 const*)p));
   790 }
   791 template <class OutputIterator>
   792 inline OutputIterator make_utf32_out(OutputIterator o, mpl::int_<4> const*)
   793 {
   794    return o;
   795 }
   796 template <class OutputIterator>
   797 inline utf16_output_iterator<OutputIterator> make_utf32_out(OutputIterator o, mpl::int_<2> const*)
   798 {
   799    return o;
   800 }
   801 template <class OutputIterator>
   802 inline utf8_output_iterator<OutputIterator> make_utf32_out(OutputIterator o, mpl::int_<1> const*)
   803 {
   804    return o;
   805 }
   806 
   807 template <class OutputIterator, class I1, class I2>
   808 OutputIterator do_regex_replace(OutputIterator out,
   809                                  std::pair<I1, I1> const& in,
   810                                  const u32regex& e, 
   811                                  const std::pair<I2, I2>& fmt, 
   812                                  match_flag_type flags
   813                                  )
   814 {
   815    // unfortunately we have to copy the format string in order to pass in onward:
   816    std::vector<UChar32> f;
   817 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
   818    f.assign(fmt.first, fmt.second);
   819 #else
   820    f.clear();
   821    I2 pos = fmt.first;
   822    while(pos != fmt.second)
   823       f.push_back(*pos++);
   824 #endif
   825    
   826    regex_iterator<I1, UChar32, icu_regex_traits> i(in.first, in.second, e, flags);
   827    regex_iterator<I1, UChar32, icu_regex_traits> j;
   828    if(i == j)
   829    {
   830       if(!(flags & regex_constants::format_no_copy))
   831          out = re_detail::copy(in.first, in.second, out);
   832    }
   833    else
   834    {
   835       I1 last_m = in.first;
   836       while(i != j)
   837       {
   838          if(!(flags & regex_constants::format_no_copy))
   839             out = re_detail::copy(i->prefix().first, i->prefix().second, out); 
   840          if(f.size())
   841             out = ::boost::re_detail::regex_format_imp(out, *i, &*f.begin(), &*f.begin() + f.size(), flags, e.get_traits());
   842          else
   843             out = ::boost::re_detail::regex_format_imp(out, *i, static_cast<UChar32 const*>(0), static_cast<UChar32 const*>(0), flags, e.get_traits());
   844          last_m = (*i)[0].second;
   845          if(flags & regex_constants::format_first_only)
   846             break;
   847          ++i;
   848       }
   849       if(!(flags & regex_constants::format_no_copy))
   850          out = re_detail::copy(last_m, in.second, out);
   851    }
   852    return out;
   853 }
   854 template <class BaseIterator>
   855 inline const BaseIterator& extract_output_base(const BaseIterator& b)
   856 {
   857    return b;
   858 }
   859 template <class BaseIterator>
   860 inline BaseIterator extract_output_base(const utf8_output_iterator<BaseIterator>& b)
   861 {
   862    return b.base();
   863 }
   864 template <class BaseIterator>
   865 inline BaseIterator extract_output_base(const utf16_output_iterator<BaseIterator>& b)
   866 {
   867    return b.base();
   868 }
   869 }  // re_detail
   870 
   871 template <class OutputIterator, class BidirectionalIterator, class charT>
   872 inline OutputIterator u32regex_replace(OutputIterator out,
   873                          BidirectionalIterator first,
   874                          BidirectionalIterator last,
   875                          const u32regex& e, 
   876                          const charT* fmt, 
   877                          match_flag_type flags = match_default)
   878 {
   879    return re_detail::extract_output_base
   880 #if BOOST_WORKAROUND(BOOST_MSVC, <= 1300)
   881    <OutputIterator>
   882 #endif
   883     (
   884       re_detail::do_regex_replace(
   885          re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
   886          re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
   887          e,
   888          re_detail::make_utf32_seq(fmt, static_cast<mpl::int_<sizeof(*fmt)> const*>(0)),
   889          flags)
   890       );
   891 }
   892 
   893 template <class OutputIterator, class Iterator, class charT>
   894 inline OutputIterator u32regex_replace(OutputIterator out,
   895                          Iterator first,
   896                          Iterator last,
   897                          const u32regex& e, 
   898                          const std::basic_string<charT>& fmt,
   899                          match_flag_type flags = match_default)
   900 {
   901    return re_detail::extract_output_base
   902 #if BOOST_WORKAROUND(BOOST_MSVC, <= 1300)
   903    <OutputIterator>
   904 #endif
   905     (
   906       re_detail::do_regex_replace(
   907          re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
   908          re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
   909          e,
   910          re_detail::make_utf32_seq(fmt.begin(), fmt.end(), static_cast<mpl::int_<sizeof(charT)> const*>(0)),
   911          flags)
   912       );
   913 }
   914 
   915 template <class OutputIterator, class Iterator>
   916 inline OutputIterator u32regex_replace(OutputIterator out,
   917                          Iterator first,
   918                          Iterator last,
   919                          const u32regex& e, 
   920                          const UnicodeString& fmt,
   921                          match_flag_type flags = match_default)
   922 {
   923    return re_detail::extract_output_base
   924 #if BOOST_WORKAROUND(BOOST_MSVC, <= 1300)
   925    <OutputIterator>
   926 #endif
   927    (
   928       re_detail::do_regex_replace(
   929          re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
   930          re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
   931          e,
   932          re_detail::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast<mpl::int_<2> const*>(0)),
   933          flags)
   934       );
   935 }
   936 
   937 template <class charT>
   938 std::basic_string<charT> u32regex_replace(const std::basic_string<charT>& s,
   939                          const u32regex& e, 
   940                          const charT* fmt,
   941                          match_flag_type flags = match_default)
   942 {
   943    std::basic_string<charT> result;
   944    re_detail::string_out_iterator<std::basic_string<charT> > i(result);
   945    u32regex_replace(i, s.begin(), s.end(), e, fmt, flags);
   946    return result;
   947 }
   948 
   949 template <class charT>
   950 std::basic_string<charT> u32regex_replace(const std::basic_string<charT>& s,
   951                          const u32regex& e, 
   952                          const std::basic_string<charT>& fmt,
   953                          match_flag_type flags = match_default)
   954 {
   955    std::basic_string<charT> result;
   956    re_detail::string_out_iterator<std::basic_string<charT> > i(result);
   957    u32regex_replace(i, s.begin(), s.end(), e, fmt.c_str(), flags);
   958    return result;
   959 }
   960 
   961 namespace re_detail{
   962 
   963 class unicode_string_out_iterator
   964 {
   965    UnicodeString* out;
   966 public:
   967    unicode_string_out_iterator(UnicodeString& s) : out(&s) {}
   968    unicode_string_out_iterator& operator++() { return *this; }
   969    unicode_string_out_iterator& operator++(int) { return *this; }
   970    unicode_string_out_iterator& operator*() { return *this; }
   971    unicode_string_out_iterator& operator=(UChar v) 
   972    { 
   973       *out += v; 
   974       return *this; 
   975    }
   976    typedef std::ptrdiff_t difference_type;
   977    typedef UChar value_type;
   978    typedef value_type* pointer;
   979    typedef value_type& reference;
   980    typedef std::output_iterator_tag iterator_category;
   981 };
   982 
   983 }
   984 
   985 inline UnicodeString u32regex_replace(const UnicodeString& s,
   986                          const u32regex& e, 
   987                          const UChar* fmt,
   988                          match_flag_type flags = match_default)
   989 {
   990    UnicodeString result;
   991    re_detail::unicode_string_out_iterator i(result);
   992    u32regex_replace(i, s.getBuffer(), s.getBuffer()+s.length(), e, fmt, flags);
   993    return result;
   994 }
   995 
   996 inline UnicodeString u32regex_replace(const UnicodeString& s,
   997                          const u32regex& e, 
   998                          const UnicodeString& fmt,
   999                          match_flag_type flags = match_default)
  1000 {
  1001    UnicodeString result;
  1002    re_detail::unicode_string_out_iterator i(result);
  1003    re_detail::do_regex_replace(
  1004          re_detail::make_utf32_out(i, static_cast<mpl::int_<2> const*>(0)),
  1005          re_detail::make_utf32_seq(s.getBuffer(), s.getBuffer()+s.length(), static_cast<mpl::int_<2> const*>(0)),
  1006          e,
  1007          re_detail::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast<mpl::int_<2> const*>(0)),
  1008          flags);
  1009    return result;
  1010 }
  1011 
  1012 } // namespace boost.
  1013 
  1014 #include <boost/regex/v4/u32regex_iterator.hpp>
  1015 #include <boost/regex/v4/u32regex_token_iterator.hpp>
  1016 
  1017 #endif