1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/ossrv/ossrv_pub/boost_apis/boost/token_functions.hpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,615 @@
1.4 +// Boost token_functions.hpp ------------------------------------------------//
1.5 +
1.6 +// Copyright John R. Bandela 2001.
1.7 +
1.8 +// Distributed under the Boost Software License, Version 1.0. (See
1.9 +// accompanying file LICENSE_1_0.txt or copy at
1.10 +// http://www.boost.org/LICENSE_1_0.txt)
1.11 +
1.12 +// See http://www.boost.org/libs/tokenizer/ for documentation.
1.13 +
1.14 +// Revision History:
1.15 +// 01 Oct 2004 Joaquín M López Muñoz
1.16 +// Workaround for a problem with string::assign in msvc-stlport
1.17 +// 06 Apr 2004 John Bandela
1.18 +// Fixed a bug involving using char_delimiter with a true input iterator
1.19 +// 28 Nov 2003 Robert Zeh and John Bandela
1.20 +// Converted into "fast" functions that avoid using += when
1.21 +// the supplied iterator isn't an input_iterator; based on
1.22 +// some work done at Archelon and a version that was checked into
1.23 +// the boost CVS for a short period of time.
1.24 +// 20 Feb 2002 John Maddock
1.25 +// Removed using namespace std declarations and added
1.26 +// workaround for BOOST_NO_STDC_NAMESPACE (the library
1.27 +// can be safely mixed with regex).
1.28 +// 06 Feb 2002 Jeremy Siek
1.29 +// Added char_separator.
1.30 +// 02 Feb 2002 Jeremy Siek
1.31 +// Removed tabs and a little cleanup.
1.32 +
1.33 +
1.34 +#ifndef BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_
1.35 +#define BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_
1.36 +
1.37 +#include <vector>
1.38 +#include <stdexcept>
1.39 +#include <string>
1.40 +#include <cctype>
1.41 +#include <algorithm> // for find_if
1.42 +#include <boost/config.hpp>
1.43 +#include <boost/assert.hpp>
1.44 +#include <boost/detail/workaround.hpp>
1.45 +#include <boost/mpl/if.hpp>
1.46 +
1.47 +//
1.48 +// the following must not be macros if we are to prefix them
1.49 +// with std:: (they shouldn't be macros anyway...)
1.50 +//
1.51 +#ifdef ispunct
1.52 +# undef ispunct
1.53 +#endif
1.54 +#ifdef isspace
1.55 +# undef isspace
1.56 +#endif
1.57 +//
1.58 +// fix namespace problems:
1.59 +//
1.60 +#ifdef BOOST_NO_STDC_NAMESPACE
1.61 +namespace std{
1.62 + using ::ispunct;
1.63 + using ::isspace;
1.64 +}
1.65 +#endif
1.66 +
1.67 +namespace boost{
1.68 +
1.69 + //===========================================================================
1.70 + // The escaped_list_separator class. Which is a model of TokenizerFunction
1.71 + // An escaped list is a super-set of what is commonly known as a comma
1.72 + // separated value (csv) list.It is separated into fields by a comma or
1.73 + // other character. If the delimiting character is inside quotes, then it is
1.74 + // counted as a regular character.To allow for embedded quotes in a field,
1.75 + // there can be escape sequences using the \ much like C.
1.76 + // The role of the comma, the quotation mark, and the escape
1.77 + // character (backslash \), can be assigned to other characters.
1.78 +
1.79 + struct escaped_list_error : public std::runtime_error{
1.80 + escaped_list_error(const std::string& what_arg):std::runtime_error(what_arg) { }
1.81 + };
1.82 +
1.83 +
1.84 +// The out of the box GCC 2.95 on cygwin does not have a char_traits class.
1.85 +// MSVC does not like the following typename
1.86 +#if !defined(BOOST_MSVC) || BOOST_MSVC > 1300
1.87 + template <class Char,
1.88 + class Traits = typename std::basic_string<Char>::traits_type >
1.89 +#else
1.90 + template <class Char,
1.91 + class Traits = std::basic_string<Char>::traits_type >
1.92 +#endif
1.93 + class escaped_list_separator {
1.94 +
1.95 + private:
1.96 + typedef std::basic_string<Char,Traits> string_type;
1.97 + struct char_eq {
1.98 + Char e_;
1.99 + char_eq(Char e):e_(e) { }
1.100 + bool operator()(Char c) {
1.101 + return Traits::eq(e_,c);
1.102 + }
1.103 + };
1.104 + string_type escape_;
1.105 + string_type c_;
1.106 + string_type quote_;
1.107 + bool last_;
1.108 +
1.109 + bool is_escape(Char e) {
1.110 + char_eq f(e);
1.111 + return std::find_if(escape_.begin(),escape_.end(),f)!=escape_.end();
1.112 + }
1.113 + bool is_c(Char e) {
1.114 + char_eq f(e);
1.115 + return std::find_if(c_.begin(),c_.end(),f)!=c_.end();
1.116 + }
1.117 + bool is_quote(Char e) {
1.118 + char_eq f(e);
1.119 + return std::find_if(quote_.begin(),quote_.end(),f)!=quote_.end();
1.120 + }
1.121 + template <typename iterator, typename Token>
1.122 + void do_escape(iterator& next,iterator end,Token& tok) {
1.123 + if (++next == end)
1.124 + throw escaped_list_error(std::string("cannot end with escape"));
1.125 + if (Traits::eq(*next,'n')) {
1.126 + tok+='\n';
1.127 + return;
1.128 + }
1.129 + else if (is_quote(*next)) {
1.130 + tok+=*next;
1.131 + return;
1.132 + }
1.133 + else if (is_c(*next)) {
1.134 + tok+=*next;
1.135 + return;
1.136 + }
1.137 + else if (is_escape(*next)) {
1.138 + tok+=*next;
1.139 + return;
1.140 + }
1.141 + else
1.142 + throw escaped_list_error(std::string("unknown escape sequence"));
1.143 + }
1.144 +
1.145 + public:
1.146 +
1.147 + explicit escaped_list_separator(Char e = '\\',
1.148 + Char c = ',',Char q = '\"')
1.149 + : escape_(1,e), c_(1,c), quote_(1,q), last_(false) { }
1.150 +
1.151 + escaped_list_separator(string_type e, string_type c, string_type q)
1.152 + : escape_(e), c_(c), quote_(q), last_(false) { }
1.153 +
1.154 + void reset() {last_=false;}
1.155 +
1.156 + template <typename InputIterator, typename Token>
1.157 + bool operator()(InputIterator& next,InputIterator end,Token& tok) {
1.158 + bool bInQuote = false;
1.159 + tok = Token();
1.160 +
1.161 + if (next == end) {
1.162 + if (last_) {
1.163 + last_ = false;
1.164 + return true;
1.165 + }
1.166 + else
1.167 + return false;
1.168 + }
1.169 + last_ = false;
1.170 + for (;next != end;++next) {
1.171 + if (is_escape(*next)) {
1.172 + do_escape(next,end,tok);
1.173 + }
1.174 + else if (is_c(*next)) {
1.175 + if (!bInQuote) {
1.176 + // If we are not in quote, then we are done
1.177 + ++next;
1.178 + // The last character was a c, that means there is
1.179 + // 1 more blank field
1.180 + last_ = true;
1.181 + return true;
1.182 + }
1.183 + else tok+=*next;
1.184 + }
1.185 + else if (is_quote(*next)) {
1.186 + bInQuote=!bInQuote;
1.187 + }
1.188 + else {
1.189 + tok += *next;
1.190 + }
1.191 + }
1.192 + return true;
1.193 + }
1.194 + };
1.195 +
1.196 + //===========================================================================
1.197 + // The classes here are used by offset_separator and char_separator to implement
1.198 + // faster assigning of tokens using assign instead of +=
1.199 +
1.200 + namespace tokenizer_detail {
1.201 +
1.202 + // The assign_or_plus_equal struct contains functions that implement
1.203 + // assign, +=, and clearing based on the iterator type. The
1.204 + // generic case does nothing for plus_equal and clearing, while
1.205 + // passing through the call for assign.
1.206 + //
1.207 + // When an input iterator is being used, the situation is reversed.
1.208 + // The assign method does nothing, plus_equal invokes operator +=,
1.209 + // and the clearing method sets the supplied token to the default
1.210 + // token constructor's result.
1.211 + //
1.212 +
1.213 + template<class IteratorTag>
1.214 + struct assign_or_plus_equal {
1.215 + template<class Iterator, class Token>
1.216 + static void assign(Iterator b, Iterator e, Token &t) {
1.217 +
1.218 +#if BOOST_WORKAROUND(BOOST_MSVC, < 1300) &&\
1.219 + BOOST_WORKAROUND(__SGI_STL_PORT, < 0x500) &&\
1.220 + defined(_STLP_DEBUG) &&\
1.221 + (defined(_STLP_USE_DYNAMIC_LIB) || defined(_DLL))
1.222 + // Problem with string::assign for msvc-stlport in debug mode: the
1.223 + // linker tries to import the templatized version of this memfun,
1.224 + // which is obviously not exported.
1.225 + // See http://www.stlport.com/dcforum/DCForumID6/1763.html for details.
1.226 +
1.227 + t = Token();
1.228 + while(b != e) t += *b++;
1.229 +#else
1.230 + t.assign(b, e);
1.231 +#endif
1.232 +
1.233 + }
1.234 +
1.235 + template<class Token, class Value>
1.236 + static void plus_equal(Token &, const Value &) {
1.237 +
1.238 + }
1.239 +
1.240 + // If we are doing an assign, there is no need for the
1.241 + // the clear.
1.242 + //
1.243 + template<class Token>
1.244 + static void clear(Token &) {
1.245 +
1.246 + }
1.247 + };
1.248 +
1.249 + template <>
1.250 + struct assign_or_plus_equal<std::input_iterator_tag> {
1.251 + template<class Iterator, class Token>
1.252 + static void assign(Iterator b, Iterator e, Token &t) {
1.253 +
1.254 + }
1.255 + template<class Token, class Value>
1.256 + static void plus_equal(Token &t, const Value &v) {
1.257 + t += v;
1.258 + }
1.259 + template<class Token>
1.260 + static void clear(Token &t) {
1.261 + t = Token();
1.262 + }
1.263 + };
1.264 +
1.265 +
1.266 + template<class Iterator>
1.267 + struct pointer_iterator_category{
1.268 + typedef std::random_access_iterator_tag type;
1.269 + };
1.270 +
1.271 +
1.272 + template<class Iterator>
1.273 + struct class_iterator_category{
1.274 + typedef typename Iterator::iterator_category type;
1.275 + };
1.276 +
1.277 +
1.278 +
1.279 + // This portably gets the iterator_tag without partial template specialization
1.280 + template<class Iterator>
1.281 + struct get_iterator_category{
1.282 + typedef typename mpl::if_<is_pointer<Iterator>,
1.283 + pointer_iterator_category<Iterator>,
1.284 + class_iterator_category<Iterator>
1.285 + >::type cat;
1.286 +
1.287 + typedef typename cat::type iterator_category;
1.288 + };
1.289 +
1.290 +
1.291 +}
1.292 +
1.293 +
1.294 + //===========================================================================
1.295 + // The offset_separator class, which is a model of TokenizerFunction.
1.296 + // Offset breaks a string into tokens based on a range of offsets
1.297 +
1.298 + class offset_separator {
1.299 + private:
1.300 +
1.301 + std::vector<int> offsets_;
1.302 + unsigned int current_offset_;
1.303 + bool wrap_offsets_;
1.304 + bool return_partial_last_;
1.305 +
1.306 + public:
1.307 + template <typename Iter>
1.308 + offset_separator(Iter begin, Iter end, bool wrap_offsets = true,
1.309 + bool return_partial_last = true)
1.310 + : offsets_(begin,end), current_offset_(0),
1.311 + wrap_offsets_(wrap_offsets),
1.312 + return_partial_last_(return_partial_last) { }
1.313 +
1.314 + offset_separator()
1.315 + : offsets_(1,1), current_offset_(),
1.316 + wrap_offsets_(true), return_partial_last_(true) { }
1.317 +
1.318 + void reset() {
1.319 + current_offset_ = 0;
1.320 + }
1.321 +
1.322 + template <typename InputIterator, typename Token>
1.323 + bool operator()(InputIterator& next, InputIterator end, Token& tok)
1.324 + {
1.325 + typedef tokenizer_detail::assign_or_plus_equal<
1.326 +#if !defined(BOOST_MSVC) || BOOST_MSVC > 1300
1.327 + typename
1.328 +#endif
1.329 + tokenizer_detail::get_iterator_category<
1.330 + InputIterator>::iterator_category> assigner;
1.331 +
1.332 +
1.333 + BOOST_ASSERT(!offsets_.empty());
1.334 +
1.335 + assigner::clear(tok);
1.336 + InputIterator start(next);
1.337 +
1.338 + if (next == end)
1.339 + return false;
1.340 +
1.341 + if (current_offset_ == offsets_.size())
1.342 + if (wrap_offsets_)
1.343 + current_offset_=0;
1.344 + else
1.345 + return false;
1.346 +
1.347 + int c = offsets_[current_offset_];
1.348 + int i = 0;
1.349 + for (; i < c; ++i) {
1.350 + if (next == end)break;
1.351 + assigner::plus_equal(tok,*next++);
1.352 + }
1.353 + assigner::assign(start,next,tok);
1.354 +
1.355 + if (!return_partial_last_)
1.356 + if (i < (c-1) )
1.357 + return false;
1.358 +
1.359 + ++current_offset_;
1.360 + return true;
1.361 + }
1.362 + };
1.363 +
1.364 +
1.365 + //===========================================================================
1.366 + // The char_separator class breaks a sequence of characters into
1.367 + // tokens based on the character delimiters (very much like bad old
1.368 + // strtok). A delimiter character can either be kept or dropped. A
1.369 + // kept delimiter shows up as an output token, whereas a dropped
1.370 + // delimiter does not.
1.371 +
1.372 + // This class replaces the char_delimiters_separator class. The
1.373 + // constructor for the char_delimiters_separator class was too
1.374 + // confusing and needed to be deprecated. However, because of the
1.375 + // default arguments to the constructor, adding the new constructor
1.376 + // would cause ambiguity, so instead I deprecated the whole class.
1.377 + // The implementation of the class was also simplified considerably.
1.378 +
1.379 + enum empty_token_policy { drop_empty_tokens, keep_empty_tokens };
1.380 +
1.381 + // The out of the box GCC 2.95 on cygwin does not have a char_traits class.
1.382 +#if !defined(BOOST_MSVC) || BOOST_MSVC > 1300
1.383 + template <typename Char,
1.384 + typename Traits = typename std::basic_string<Char>::traits_type >
1.385 +#else
1.386 + template <typename Char,
1.387 + typename Traits = std::basic_string<Char>::traits_type >
1.388 +#endif
1.389 + class char_separator
1.390 + {
1.391 + typedef std::basic_string<Char,Traits> string_type;
1.392 + public:
1.393 + explicit
1.394 + char_separator(const Char* dropped_delims,
1.395 + const Char* kept_delims = 0,
1.396 + empty_token_policy empty_tokens = drop_empty_tokens)
1.397 + : m_dropped_delims(dropped_delims),
1.398 + m_use_ispunct(false),
1.399 + m_use_isspace(false),
1.400 + m_empty_tokens(empty_tokens),
1.401 + m_output_done(false)
1.402 + {
1.403 + // Borland workaround
1.404 + if (kept_delims)
1.405 + m_kept_delims = kept_delims;
1.406 + }
1.407 +
1.408 + // use ispunct() for kept delimiters and isspace for dropped.
1.409 + explicit
1.410 + char_separator()
1.411 + : m_use_ispunct(true),
1.412 + m_use_isspace(true),
1.413 + m_empty_tokens(drop_empty_tokens) { }
1.414 +
1.415 + void reset() { }
1.416 +
1.417 + template <typename InputIterator, typename Token>
1.418 + bool operator()(InputIterator& next, InputIterator end, Token& tok)
1.419 + {
1.420 + typedef tokenizer_detail::assign_or_plus_equal<
1.421 +#if !defined(BOOST_MSVC) || BOOST_MSVC > 1300
1.422 + typename
1.423 +#endif
1.424 + tokenizer_detail::get_iterator_category<
1.425 + InputIterator>::iterator_category> assigner;
1.426 +
1.427 + assigner::clear(tok);
1.428 +
1.429 + // skip past all dropped_delims
1.430 + if (m_empty_tokens == drop_empty_tokens)
1.431 + for (; next != end && is_dropped(*next); ++next)
1.432 + { }
1.433 +
1.434 + InputIterator start(next);
1.435 +
1.436 + if (m_empty_tokens == drop_empty_tokens) {
1.437 +
1.438 + if (next == end)
1.439 + return false;
1.440 +
1.441 +
1.442 + // if we are on a kept_delims move past it and stop
1.443 + if (is_kept(*next)) {
1.444 + assigner::plus_equal(tok,*next);
1.445 + ++next;
1.446 + } else
1.447 + // append all the non delim characters
1.448 + for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next)
1.449 + assigner::plus_equal(tok,*next);
1.450 + }
1.451 + else { // m_empty_tokens == keep_empty_tokens
1.452 +
1.453 + // Handle empty token at the end
1.454 + if (next == end)
1.455 + if (m_output_done == false) {
1.456 + m_output_done = true;
1.457 + assigner::assign(start,next,tok);
1.458 + return true;
1.459 + } else
1.460 + return false;
1.461 +
1.462 + if (is_kept(*next)) {
1.463 + if (m_output_done == false)
1.464 + m_output_done = true;
1.465 + else {
1.466 + assigner::plus_equal(tok,*next);
1.467 + ++next;
1.468 + m_output_done = false;
1.469 + }
1.470 + }
1.471 + else if (m_output_done == false && is_dropped(*next)) {
1.472 + m_output_done = true;
1.473 + }
1.474 + else {
1.475 + if (is_dropped(*next))
1.476 + start=++next;
1.477 + for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next)
1.478 + assigner::plus_equal(tok,*next);
1.479 + m_output_done = true;
1.480 + }
1.481 + }
1.482 + assigner::assign(start,next,tok);
1.483 + return true;
1.484 + }
1.485 +
1.486 + private:
1.487 + string_type m_kept_delims;
1.488 + string_type m_dropped_delims;
1.489 + bool m_use_ispunct;
1.490 + bool m_use_isspace;
1.491 + empty_token_policy m_empty_tokens;
1.492 + bool m_output_done;
1.493 +
1.494 + bool is_kept(Char E) const
1.495 + {
1.496 + if (m_kept_delims.length())
1.497 + return m_kept_delims.find(E) != string_type::npos;
1.498 + else if (m_use_ispunct) {
1.499 + return std::ispunct(E) != 0;
1.500 + } else
1.501 + return false;
1.502 + }
1.503 + bool is_dropped(Char E) const
1.504 + {
1.505 + if (m_dropped_delims.length())
1.506 + return m_dropped_delims.find(E) != string_type::npos;
1.507 + else if (m_use_isspace) {
1.508 + return std::isspace(E) != 0;
1.509 + } else
1.510 + return false;
1.511 + }
1.512 + };
1.513 +
1.514 + //===========================================================================
1.515 + // The following class is DEPRECATED, use class char_separators instead.
1.516 + //
1.517 + // The char_delimiters_separator class, which is a model of
1.518 + // TokenizerFunction. char_delimiters_separator breaks a string
1.519 + // into tokens based on character delimiters. There are 2 types of
1.520 + // delimiters. returnable delimiters can be returned as
1.521 + // tokens. These are often punctuation. nonreturnable delimiters
1.522 + // cannot be returned as tokens. These are often whitespace
1.523 +
1.524 + // The out of the box GCC 2.95 on cygwin does not have a char_traits class.
1.525 +#if !defined(BOOST_MSVC) || BOOST_MSVC > 1300
1.526 + template <class Char,
1.527 + class Traits = typename std::basic_string<Char>::traits_type >
1.528 +#else
1.529 + template <class Char,
1.530 + class Traits = std::basic_string<Char>::traits_type >
1.531 +#endif
1.532 + class char_delimiters_separator {
1.533 + private:
1.534 +
1.535 + typedef std::basic_string<Char,Traits> string_type;
1.536 + string_type returnable_;
1.537 + string_type nonreturnable_;
1.538 + bool return_delims_;
1.539 + bool no_ispunct_;
1.540 + bool no_isspace_;
1.541 +
1.542 + bool is_ret(Char E)const
1.543 + {
1.544 + if (returnable_.length())
1.545 + return returnable_.find(E) != string_type::npos;
1.546 + else{
1.547 + if (no_ispunct_) {return false;}
1.548 + else{
1.549 + int r = std::ispunct(E);
1.550 + return r != 0;
1.551 + }
1.552 + }
1.553 + }
1.554 + bool is_nonret(Char E)const
1.555 + {
1.556 + if (nonreturnable_.length())
1.557 + return nonreturnable_.find(E) != string_type::npos;
1.558 + else{
1.559 + if (no_isspace_) {return false;}
1.560 + else{
1.561 + int r = std::isspace(E);
1.562 + return r != 0;
1.563 + }
1.564 + }
1.565 + }
1.566 +
1.567 + public:
1.568 + explicit char_delimiters_separator(bool return_delims = false,
1.569 + const Char* returnable = 0,
1.570 + const Char* nonreturnable = 0)
1.571 + : returnable_(returnable ? returnable : string_type().c_str()),
1.572 + nonreturnable_(nonreturnable ? nonreturnable:string_type().c_str()),
1.573 + return_delims_(return_delims), no_ispunct_(returnable!=0),
1.574 + no_isspace_(nonreturnable!=0) { }
1.575 +
1.576 + void reset() { }
1.577 +
1.578 + public:
1.579 +
1.580 + template <typename InputIterator, typename Token>
1.581 + bool operator()(InputIterator& next, InputIterator end,Token& tok) {
1.582 + tok = Token();
1.583 +
1.584 + // skip past all nonreturnable delims
1.585 + // skip past the returnable only if we are not returning delims
1.586 + for (;next!=end && ( is_nonret(*next) || (is_ret(*next)
1.587 + && !return_delims_ ) );++next) { }
1.588 +
1.589 + if (next == end) {
1.590 + return false;
1.591 + }
1.592 +
1.593 + // if we are to return delims and we are one a returnable one
1.594 + // move past it and stop
1.595 + if (is_ret(*next) && return_delims_) {
1.596 + tok+=*next;
1.597 + ++next;
1.598 + }
1.599 + else
1.600 + // append all the non delim characters
1.601 + for (;next!=end && !is_nonret(*next) && !is_ret(*next);++next)
1.602 + tok+=*next;
1.603 +
1.604 +
1.605 + return true;
1.606 + }
1.607 + };
1.608 +
1.609 +
1.610 +} //namespace boost
1.611 +
1.612 +
1.613 +#endif
1.614 +
1.615 +
1.616 +
1.617 +
1.618 +