Eclipse SUMO - Simulation of Urban MObility
StringTokenizer.cpp
Go to the documentation of this file.
1 /****************************************************************************/
2 // Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.org/sumo
3 // Copyright (C) 2001-2019 German Aerospace Center (DLR) and others.
4 // This program and the accompanying materials
5 // are made available under the terms of the Eclipse Public License v2.0
6 // which accompanies this distribution, and is available at
7 // http://www.eclipse.org/legal/epl-v20.html
8 // SPDX-License-Identifier: EPL-2.0
9 /****************************************************************************/
17 // A java-style StringTokenizer for c++ (stl)
18 /****************************************************************************/
19 
20 
21 // ===========================================================================
22 // included modules
23 // ===========================================================================
24 #include <config.h>
25 
26 #include <string>
27 #include <vector>
28 #include <iostream> // !!! debug only
29 
30 #include "UtilExceptions.h"
31 #include "StringTokenizer.h"
32 
33 
34 // ===========================================================================
35 // variable definitions
36 // ===========================================================================
37 const int StringTokenizer::NEWLINE = -256;
38 const int StringTokenizer::WHITECHARS = -257;
39 const int StringTokenizer::SPACE = 32;
40 const int StringTokenizer::TAB = 9;
41 
42 
43 // ===========================================================================
44 // method definitions
45 // ===========================================================================
46 
48  myPos(0) {
49 }
50 
51 
52 StringTokenizer::StringTokenizer(std::string tosplit) :
53  myTosplit(tosplit), myPos(0) {
54  prepareWhitechar(tosplit);
55 }
56 
57 
58 StringTokenizer::StringTokenizer(std::string tosplit, std::string token, bool splitAtAllChars) :
59  myTosplit(tosplit), myPos(0) {
60  prepare(tosplit, token, splitAtAllChars);
61 }
62 
63 
64 StringTokenizer::StringTokenizer(std::string tosplit, int special) :
65  myTosplit(tosplit), myPos(0) {
66  switch (special) {
67  case NEWLINE:
68  prepare(tosplit, "\r\n", true);
69  break;
70  case TAB:
71  prepare(tosplit, "\t", true);
72  break;
73  case WHITECHARS:
74  prepareWhitechar(tosplit);
75  break;
76  default:
77  char* buf = new char[2];
78  buf[0] = (char) special;
79  buf[1] = 0;
80  prepare(tosplit, buf, false);
81  delete[] buf;
82  break;
83  }
84 }
85 
86 
88 
89 
91  myPos = 0;
92 }
93 
94 
96  return myPos != (int)myStarts.size();
97 }
98 
99 
100 std::string StringTokenizer::next() {
101  if (myPos >= (int)myStarts.size()) {
102  throw OutOfBoundsException();
103  }
104  if (myLengths[myPos] == 0) {
105  myPos++;
106  return "";
107  }
108  int start = myStarts[myPos];
109  int length = myLengths[myPos++];
110  return myTosplit.substr(start, length);
111 }
112 
113 
114 std::string StringTokenizer::front() {
115  if (myStarts.size() == 0) {
116  throw OutOfBoundsException();
117  }
118  if (myLengths[0] == 0) {
119  return "";
120  }
121  return myTosplit.substr(myStarts[0], myLengths[0]);
122 }
123 
124 
125 std::string StringTokenizer::get(int pos) const {
126  if (pos >= (int)myStarts.size()) {
127  throw OutOfBoundsException();
128  }
129  if (myLengths[pos] == 0) {
130  return "";
131  }
132  int start = myStarts[pos];
133  int length = myLengths[pos];
134  return myTosplit.substr(start, length);
135 }
136 
137 
139  return (int)myStarts.size();
140 }
141 
142 
143 void StringTokenizer::prepare(const std::string& tosplit, const std::string& token, bool splitAtAllChars) {
144  int beg = 0;
145  int len = (int)token.length();
146  if (splitAtAllChars) {
147  len = 1;
148  }
149  while (beg < (int)tosplit.length()) {
150  std::string::size_type end;
151  if (splitAtAllChars) {
152  end = tosplit.find_first_of(token, beg);
153  } else {
154  end = tosplit.find(token, beg);
155  }
156  if (end == std::string::npos) {
157  end = tosplit.length();
158  }
159  myStarts.push_back(beg);
160  myLengths.push_back((int)end - beg);
161  beg = (int)end + len;
162  if (beg == (int)tosplit.length()) {
163  myStarts.push_back(beg - 1);
164  myLengths.push_back(0);
165  }
166  }
167 }
168 
169 
170 void StringTokenizer::prepareWhitechar(const std::string& tosplit) {
171  std::string::size_type len = tosplit.length();
172  std::string::size_type beg = 0;
173  while (beg < len && tosplit[beg] <= SPACE) {
174  beg++;
175  }
176  while (beg != std::string::npos && beg < len) {
177  std::string::size_type end = beg;
178  while (end < len && tosplit[end] > SPACE) {
179  end++;
180  }
181  myStarts.push_back((int)beg);
182  myLengths.push_back((int)end - (int)beg);
183  beg = end;
184  while (beg < len && tosplit[beg] <= SPACE) {
185  beg++;
186  }
187  }
188 }
189 
190 
191 std::vector<std::string>
193  std::vector<std::string> ret;
194  ret.reserve(size());
195  while (hasNext()) {
196  ret.push_back(next());
197  }
198  reinit();
199  return ret;
200 }
201 
202 /****************************************************************************/
void reinit()
reinitialises the internal iterator
std::string next()
returns the next substring when it exists. Otherwise the behaviour is undefined
static const int WHITECHARS
identifier for splitting the given string at all whitespace characters
std::string myTosplit
the string to split
std::string get(int pos) const
returns the item at the given position
static const int NEWLINE
identifier for splitting the given string at all newline characters
bool hasNext()
returns the information whether further substrings exist
StringTokenizer()
default constructor
void prepare(const std::string &tosplit, const std::string &token, bool splitAtAllChars)
splits the first string at all occurences of the second. If the third parameter is true split at all ...
~StringTokenizer()
destructor
int size() const
returns the number of existing substrings
SizeVector myLengths
the list of substring lengths
static const int SPACE
the ascii index of the highest whitespace character
SizeVector myStarts
the list of substring starts
std::string front()
returns the first substring without moving the iterator
static const int TAB
the ascii index of the tab character
std::vector< std::string > getVector()
return vector of strings
void prepareWhitechar(const std::string &tosplit)
splits the first string at all occurences of whitechars
int myPos
the current position in the list of substrings