MinitScript
0.9.31 PRE-BETA
src
minitscript
utilities
UTF8StringTokenizer.cpp
Go to the documentation of this file.
1
#include <string>
2
#include <vector>
3
4
#include <
minitscript/minitscript.h
>
5
#include <
minitscript/utilities/Character.h
>
6
#include <
minitscript/utilities/UTF8StringTokenizer.h
>
7
#include <
minitscript/utilities/UTF8CharacterIterator.h
>
8
9
using
std::string;
10
11
using
minitscript::utilities::Character
;
12
using
minitscript::utilities::UTF8StringTokenizer
;
13
using
minitscript::utilities::UTF8CharacterIterator
;
14
15
void
UTF8StringTokenizer::tokenize(
const
string
& str,
const
string
& delimiters,
bool
emptyTokens)
16
{
17
idx
= 0;
18
tokens
.clear();
19
//
20
string
token;
21
//
22
UTF8CharacterIterator
u8It(str);
23
UTF8CharacterIterator
delimiterU8It(delimiters);
24
// iterate string value
25
for
(; u8It.
hasNext
(); ) {
26
auto
c = u8It.
next
();
27
// iterate delimiters
28
delimiterU8It.
reset
();
29
//
30
auto
foundDelimiter =
false
;
31
for
(; delimiterU8It.
hasNext
(); ) {
32
// check if delimiter character is our current string value char
33
auto
dc = delimiterU8It.
next
();
34
// got a delimiter?
35
if
(c == dc) {
36
foundDelimiter =
true
;
37
// yep, add token to elements if we have any
38
if
(emptyTokens ==
true
|| token.empty() ==
false
) {
39
tokens
.push_back(token);
40
token.clear();
41
}
42
//
43
break
;
44
}
45
}
46
//
47
if
(foundDelimiter ==
false
) {
48
// no delimiter, add char to token
49
token+=
Character::toString
(c);
50
}
51
}
52
// do we have a token still? add it to elements
53
if
(emptyTokens ==
true
|| token.empty() ==
false
) {
54
tokens
.push_back(token);
55
}
56
}
Character.h
UTF8CharacterIterator.h
UTF8StringTokenizer.h
minitscript::utilities::Character
Character class.
Definition:
Character.h:17
minitscript::utilities::Character::toString
static const string toString(uint32_t character)
Generates a string from given character / code point.
Definition:
Character.h:59
minitscript::utilities::UTF8CharacterIterator
UTF8 string character iterator.
Definition:
UTF8CharacterIterator.h:18
minitscript::utilities::UTF8CharacterIterator::reset
void reset() const
Reset.
Definition:
UTF8CharacterIterator.h:96
minitscript::utilities::UTF8CharacterIterator::next
int next() const
Definition:
UTF8CharacterIterator.h:165
minitscript::utilities::UTF8CharacterIterator::hasNext
bool hasNext() const
Definition:
UTF8CharacterIterator.h:159
minitscript::utilities::UTF8StringTokenizer
UTF8 string tokenizer class.
Definition:
UTF8StringTokenizer.h:17
minitscript::utilities::UTF8StringTokenizer::tokens
vector< string > tokens
Definition:
UTF8StringTokenizer.h:20
minitscript::utilities::UTF8StringTokenizer::idx
int idx
Definition:
UTF8StringTokenizer.h:21
minitscript.h
Generated by
1.9.1