sl@0: //  (C) Copyright Jeremy Siek 2004 
sl@0: //  Distributed under the Boost Software License, Version 1.0. (See
sl@0: //  accompanying file LICENSE_1_0.txt or copy at
sl@0: //  http://www.boost.org/LICENSE_1_0.txt)
sl@0: 
sl@0: /*
sl@0:  * stringtok.hpp -- Breaks a string into tokens.  This is an example for lib3.
sl@0:  *
sl@0:  * Template function looks like this:
sl@0:  *
sl@0:  *    template <typename Container>
sl@0:  *    void stringtok (Container &l,
sl@0:  *                    string const &s,
sl@0:  *                    char const * const ws = " \t\n");
sl@0:  *
sl@0:  * A nondestructive version of strtok() that handles its own memory and can
sl@0:  * be broken up by any character(s).  Does all the work at once rather than
sl@0:  * in an invocation loop like strtok() requires.
sl@0:  *
sl@0:  * Container is any type that supports push_back(a_string), although using
sl@0:  * list<string> and deque<string> are indicated due to their O(1) push_back.
sl@0:  * (I prefer deque<> because op[]/at() is available as well.)  The first
sl@0:  * parameter references an existing Container.
sl@0:  *
sl@0:  * s is the string to be tokenized.  From the parameter declaration, it can
sl@0:  * be seen that s is not affected.  Since references-to-const may refer to
sl@0:  * temporaries, you could use stringtok(some_container, readline("")) when
sl@0:  * using the GNU readline library.
sl@0:  *
sl@0:  * The final parameter is an array of characters that serve as whitespace.
sl@0:  * Whitespace characters default to one or more of tab, space, and newline,
sl@0:  * in any combination.
sl@0:  *
sl@0:  * 'l' need not be empty on entry.  On return, 'l' will have the token
sl@0:  * strings appended.
sl@0:  *
sl@0:  *
sl@0:  * [Example:
sl@0:  *       list<string>       ls;
sl@0:  *       stringtok (ls, " this  \t is\t\n  a test  ");
sl@0:  *       for (list<string>::const_iterator i = ls.begin();
sl@0:  *            i != ls.end(); ++i)
sl@0:  *       {
sl@0:  *            cerr << ':' << (*i) << ":\n";
sl@0:  *       }
sl@0:  *
sl@0:  *  would print
sl@0:  *       :this:
sl@0:  *       :is:
sl@0:  *       :a:
sl@0:  *       :test:
sl@0:  * -end example]
sl@0:  *
sl@0:  * pedwards@jaj.com  May 1999
sl@0: */
sl@0: 
sl@0: 
sl@0: #include <string>
sl@0: #include <cstring>    // for strchr
sl@0: 
sl@0: 
sl@0: /*****************************************************************
sl@0:  * This is the only part of the implementation that I don't like.
sl@0:  * It can probably be improved upon by the reader...
sl@0: */
sl@0: 
sl@0:     inline bool
sl@0:     isws (char c, char const * const wstr)
sl@0:     {
sl@0:         using namespace std;
sl@0:         return (strchr(wstr,c) != NULL);
sl@0:     }
sl@0: 
sl@0: 
sl@0: namespace boost {
sl@0: 
sl@0: /*****************************************************************
sl@0:  * Simplistic and quite Standard, but a bit slow.  This should be
sl@0:  * templatized on basic_string instead, or on a more generic StringT
sl@0:  * that just happens to support ::size_type, .substr(), and so on.
sl@0:  * I had hoped that "whitespace" would be a trait, but it isn't, so
sl@0:  * the user must supply it.  Enh, this lets them break up strings on
sl@0:  * different things easier than traits would anyhow.
sl@0: */
sl@0: template <typename Container>
sl@0: void
sl@0: stringtok (Container &l, std::string const &s, char const * const ws = " \t\n")
sl@0: {
sl@0:   typedef std::string::size_type size_type;
sl@0:     const size_type  S = s.size();
sl@0:           size_type  i = 0;
sl@0: 
sl@0:     while (i < S) {
sl@0:         // eat leading whitespace
sl@0:         while ((i < S) && (isws(s[i],ws)))  ++i;
sl@0:         if (i == S)  return;  // nothing left but WS
sl@0: 
sl@0:         // find end of word
sl@0:         size_type  j = i+1;
sl@0:         while ((j < S) && (!isws(s[j],ws)))  ++j;
sl@0: 
sl@0:         // add word
sl@0:         l.push_back(s.substr(i,j-i));
sl@0: 
sl@0:         // set up for next loop
sl@0:         i = j+1;
sl@0:     }
sl@0: }
sl@0: 
sl@0: 
sl@0: } // namespace boost