MinitScript  0.9.31 PRE-BETA
UTF8StringTokenizer.cpp
Go to the documentation of this file.
1 #include <string>
2 #include <vector>
3 
8 
9 using std::string;
10 
14 
15 void UTF8StringTokenizer::tokenize(const string& str, const string& delimiters, bool emptyTokens)
16 {
17  idx = 0;
18  tokens.clear();
19  //
20  string token;
21  //
22  UTF8CharacterIterator u8It(str);
23  UTF8CharacterIterator delimiterU8It(delimiters);
24  // iterate string value
25  for (; u8It.hasNext(); ) {
26  auto c = u8It.next();
27  // iterate delimiters
28  delimiterU8It.reset();
29  //
30  auto foundDelimiter = false;
31  for (; delimiterU8It.hasNext(); ) {
32  // check if delimiter character is our current string value char
33  auto dc = delimiterU8It.next();
34  // got a delimiter?
35  if (c == dc) {
36  foundDelimiter = true;
37  // yep, add token to elements if we have any
38  if (emptyTokens == true || token.empty() == false) {
39  tokens.push_back(token);
40  token.clear();
41  }
42  //
43  break;
44  }
45  }
46  //
47  if (foundDelimiter == false) {
48  // no delimiter, add char to token
49  token+= Character::toString(c);
50  }
51  }
52  // do we have a token still? add it to elements
53  if (emptyTokens == true || token.empty() == false) {
54  tokens.push_back(token);
55  }
56 }
static const string toString(uint32_t character)
Generates a string from given character / code point.
Definition: Character.h:59