sl@0
|
1 |
// (C) Copyright Jeremy Siek 2004
|
sl@0
|
2 |
// Distributed under the Boost Software License, Version 1.0. (See
|
sl@0
|
3 |
// accompanying file LICENSE_1_0.txt or copy at
|
sl@0
|
4 |
// http://www.boost.org/LICENSE_1_0.txt)
|
sl@0
|
5 |
|
sl@0
|
6 |
/*
|
sl@0
|
7 |
* stringtok.hpp -- Breaks a string into tokens. This is an example for lib3.
|
sl@0
|
8 |
*
|
sl@0
|
9 |
* Template function looks like this:
|
sl@0
|
10 |
*
|
sl@0
|
11 |
* template <typename Container>
|
sl@0
|
12 |
* void stringtok (Container &l,
|
sl@0
|
13 |
* string const &s,
|
sl@0
|
14 |
* char const * const ws = " \t\n");
|
sl@0
|
15 |
*
|
sl@0
|
16 |
* A nondestructive version of strtok() that handles its own memory and can
|
sl@0
|
17 |
* be broken up by any character(s). Does all the work at once rather than
|
sl@0
|
18 |
* in an invocation loop like strtok() requires.
|
sl@0
|
19 |
*
|
sl@0
|
20 |
* Container is any type that supports push_back(a_string), although using
|
sl@0
|
21 |
* list<string> and deque<string> are indicated due to their O(1) push_back.
|
sl@0
|
22 |
* (I prefer deque<> because op[]/at() is available as well.) The first
|
sl@0
|
23 |
* parameter references an existing Container.
|
sl@0
|
24 |
*
|
sl@0
|
25 |
* s is the string to be tokenized. From the parameter declaration, it can
|
sl@0
|
26 |
* be seen that s is not affected. Since references-to-const may refer to
|
sl@0
|
27 |
* temporaries, you could use stringtok(some_container, readline("")) when
|
sl@0
|
28 |
* using the GNU readline library.
|
sl@0
|
29 |
*
|
sl@0
|
30 |
* The final parameter is an array of characters that serve as whitespace.
|
sl@0
|
31 |
* Whitespace characters default to one or more of tab, space, and newline,
|
sl@0
|
32 |
* in any combination.
|
sl@0
|
33 |
*
|
sl@0
|
34 |
* 'l' need not be empty on entry. On return, 'l' will have the token
|
sl@0
|
35 |
* strings appended.
|
sl@0
|
36 |
*
|
sl@0
|
37 |
*
|
sl@0
|
38 |
* [Example:
|
sl@0
|
39 |
* list<string> ls;
|
sl@0
|
40 |
* stringtok (ls, " this \t is\t\n a test ");
|
sl@0
|
41 |
* for (list<string>::const_iterator i = ls.begin();
|
sl@0
|
42 |
* i != ls.end(); ++i)
|
sl@0
|
43 |
* {
|
sl@0
|
44 |
* cerr << ':' << (*i) << ":\n";
|
sl@0
|
45 |
* }
|
sl@0
|
46 |
*
|
sl@0
|
47 |
* would print
|
sl@0
|
48 |
* :this:
|
sl@0
|
49 |
* :is:
|
sl@0
|
50 |
* :a:
|
sl@0
|
51 |
* :test:
|
sl@0
|
52 |
* -end example]
|
sl@0
|
53 |
*
|
sl@0
|
54 |
* pedwards@jaj.com May 1999
|
sl@0
|
55 |
*/
|
sl@0
|
56 |
|
sl@0
|
57 |
|
sl@0
|
58 |
#include <string>
|
sl@0
|
59 |
#include <cstring> // for strchr
|
sl@0
|
60 |
|
sl@0
|
61 |
|
sl@0
|
62 |
/*****************************************************************
|
sl@0
|
63 |
* This is the only part of the implementation that I don't like.
|
sl@0
|
64 |
* It can probably be improved upon by the reader...
|
sl@0
|
65 |
*/
|
sl@0
|
66 |
|
sl@0
|
67 |
inline bool
|
sl@0
|
68 |
isws (char c, char const * const wstr)
|
sl@0
|
69 |
{
|
sl@0
|
70 |
using namespace std;
|
sl@0
|
71 |
return (strchr(wstr,c) != NULL);
|
sl@0
|
72 |
}
|
sl@0
|
73 |
|
sl@0
|
74 |
|
sl@0
|
75 |
namespace boost {
|
sl@0
|
76 |
|
sl@0
|
77 |
/*****************************************************************
|
sl@0
|
78 |
* Simplistic and quite Standard, but a bit slow. This should be
|
sl@0
|
79 |
* templatized on basic_string instead, or on a more generic StringT
|
sl@0
|
80 |
* that just happens to support ::size_type, .substr(), and so on.
|
sl@0
|
81 |
* I had hoped that "whitespace" would be a trait, but it isn't, so
|
sl@0
|
82 |
* the user must supply it. Enh, this lets them break up strings on
|
sl@0
|
83 |
* different things easier than traits would anyhow.
|
sl@0
|
84 |
*/
|
sl@0
|
85 |
template <typename Container>
|
sl@0
|
86 |
void
|
sl@0
|
87 |
stringtok (Container &l, std::string const &s, char const * const ws = " \t\n")
|
sl@0
|
88 |
{
|
sl@0
|
89 |
typedef std::string::size_type size_type;
|
sl@0
|
90 |
const size_type S = s.size();
|
sl@0
|
91 |
size_type i = 0;
|
sl@0
|
92 |
|
sl@0
|
93 |
while (i < S) {
|
sl@0
|
94 |
// eat leading whitespace
|
sl@0
|
95 |
while ((i < S) && (isws(s[i],ws))) ++i;
|
sl@0
|
96 |
if (i == S) return; // nothing left but WS
|
sl@0
|
97 |
|
sl@0
|
98 |
// find end of word
|
sl@0
|
99 |
size_type j = i+1;
|
sl@0
|
100 |
while ((j < S) && (!isws(s[j],ws))) ++j;
|
sl@0
|
101 |
|
sl@0
|
102 |
// add word
|
sl@0
|
103 |
l.push_back(s.substr(i,j-i));
|
sl@0
|
104 |
|
sl@0
|
105 |
// set up for next loop
|
sl@0
|
106 |
i = j+1;
|
sl@0
|
107 |
}
|
sl@0
|
108 |
}
|
sl@0
|
109 |
|
sl@0
|
110 |
|
sl@0
|
111 |
} // namespace boost
|