sl@0: // (C) Copyright Jeremy Siek 2004 sl@0: // Distributed under the Boost Software License, Version 1.0. (See sl@0: // accompanying file LICENSE_1_0.txt or copy at sl@0: // http://www.boost.org/LICENSE_1_0.txt) sl@0: sl@0: /* sl@0: * stringtok.hpp -- Breaks a string into tokens. This is an example for lib3. sl@0: * sl@0: * Template function looks like this: sl@0: * sl@0: * template sl@0: * void stringtok (Container &l, sl@0: * string const &s, sl@0: * char const * const ws = " \t\n"); sl@0: * sl@0: * A nondestructive version of strtok() that handles its own memory and can sl@0: * be broken up by any character(s). Does all the work at once rather than sl@0: * in an invocation loop like strtok() requires. sl@0: * sl@0: * Container is any type that supports push_back(a_string), although using sl@0: * list and deque are indicated due to their O(1) push_back. sl@0: * (I prefer deque<> because op[]/at() is available as well.) The first sl@0: * parameter references an existing Container. sl@0: * sl@0: * s is the string to be tokenized. From the parameter declaration, it can sl@0: * be seen that s is not affected. Since references-to-const may refer to sl@0: * temporaries, you could use stringtok(some_container, readline("")) when sl@0: * using the GNU readline library. sl@0: * sl@0: * The final parameter is an array of characters that serve as whitespace. sl@0: * Whitespace characters default to one or more of tab, space, and newline, sl@0: * in any combination. sl@0: * sl@0: * 'l' need not be empty on entry. On return, 'l' will have the token sl@0: * strings appended. sl@0: * sl@0: * sl@0: * [Example: sl@0: * list ls; sl@0: * stringtok (ls, " this \t is\t\n a test "); sl@0: * for (list::const_iterator i = ls.begin(); sl@0: * i != ls.end(); ++i) sl@0: * { sl@0: * cerr << ':' << (*i) << ":\n"; sl@0: * } sl@0: * sl@0: * would print sl@0: * :this: sl@0: * :is: sl@0: * :a: sl@0: * :test: sl@0: * -end example] sl@0: * sl@0: * pedwards@jaj.com May 1999 sl@0: */ sl@0: sl@0: sl@0: #include sl@0: #include // for strchr sl@0: sl@0: sl@0: /***************************************************************** sl@0: * This is the only part of the implementation that I don't like. sl@0: * It can probably be improved upon by the reader... sl@0: */ sl@0: sl@0: inline bool sl@0: isws (char c, char const * const wstr) sl@0: { sl@0: using namespace std; sl@0: return (strchr(wstr,c) != NULL); sl@0: } sl@0: sl@0: sl@0: namespace boost { sl@0: sl@0: /***************************************************************** sl@0: * Simplistic and quite Standard, but a bit slow. This should be sl@0: * templatized on basic_string instead, or on a more generic StringT sl@0: * that just happens to support ::size_type, .substr(), and so on. sl@0: * I had hoped that "whitespace" would be a trait, but it isn't, so sl@0: * the user must supply it. Enh, this lets them break up strings on sl@0: * different things easier than traits would anyhow. sl@0: */ sl@0: template sl@0: void sl@0: stringtok (Container &l, std::string const &s, char const * const ws = " \t\n") sl@0: { sl@0: typedef std::string::size_type size_type; sl@0: const size_type S = s.size(); sl@0: size_type i = 0; sl@0: sl@0: while (i < S) { sl@0: // eat leading whitespace sl@0: while ((i < S) && (isws(s[i],ws))) ++i; sl@0: if (i == S) return; // nothing left but WS sl@0: sl@0: // find end of word sl@0: size_type j = i+1; sl@0: while ((j < S) && (!isws(s[j],ws))) ++j; sl@0: sl@0: // add word sl@0: l.push_back(s.substr(i,j-i)); sl@0: sl@0: // set up for next loop sl@0: i = j+1; sl@0: } sl@0: } sl@0: sl@0: sl@0: } // namespace boost