MinitScript  0.9.31 PRE-BETA
UTF8StringTools.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include <regex>
4 #include <string>
5 #include <string_view>
6 #include <vector>
7 
11 
12 using std::smatch;
13 using std::string;
14 using std::string_view;
15 using std::vector;
16 
17 /**
18  * UTF8 String tools class
19  * @author Andreas Drewke
20  */
22 {
23 public:
24  /**
25  * Checks if string starts with prefix
26  * @param str string
27  * @param prefix prefix string
28  * @return if string starts with prefix
29  */
30  inline static const bool startsWith(const string& str, const string& prefix) {
31  return str.find(prefix) == 0;
32  }
33 
34  /**
35  * Checks if string starts with prefix
36  * @param str string
37  * @param prefix prefix string
38  * @return if string starts with prefix
39  */
40  inline static const bool viewStartsWith(const string_view& str, const string_view& prefix) {
41  return str.find(prefix) == 0;
42  }
43 
44  /**
45  * Checks if string ends with suffix
46  * @param str string
47  * @param suffix suffix string
48  * @return if string ends with suffix
49  */
50  inline static const bool endsWith(const string& str, const string& suffix) {
51  return
52  str.size() >= suffix.size() &&
53  str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
54  }
55 
56  /**
57  * Checks if string ends with suffix
58  * @param str string
59  * @param suffix suffix string
60  * @return if string ends with suffix
61  */
62  inline static const bool viewEndsWith(const string_view& str, const string_view& suffix) {
63  return
64  str.size() >= suffix.size() &&
65  str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
66  }
67 
68  /**
69  * Replace string with another string
70  * @param str string
71  * @param what what to replace
72  * @param by to replace by
73  * @param beginIndex index to begin with
74  * @param cache str UTF8 position cache
75  * @return replace result
76  */
77  static const string replace(
78  const string& str,
79  const string& what,
80  const string& by,
81  int64_t beginIndex = 0,
83  );
84 
85  /**
86  * Finds first index of given string
87  * @param str string
88  * @param what what
89  * @param beginIndex begin index
90  * @param cache str UTF8 position cache
91  * @return index or string::npos if not found
92  */
93  inline static int64_t indexOf(
94  const string& str,
95  const string& what,
96  int64_t beginIndex = 0,
98  ) {
99  return firstIndexOf(str, what, beginIndex, cache);
100  }
101 
102  /**
103  * Finds first index of given string
104  * @param str string
105  * @param what what
106  * @param beginIndex begin index
107  * @param cache str UTF8 position cache
108  * @return index or string::npos if not found
109  */
110  static int64_t firstIndexOf(
111  const string& str,
112  const string& what,
113  int64_t beginIndex = 0,
115  );
116 
117  /**
118  * Finds last index of given string
119  * @param str string
120  * @param what what
121  * @param endIndex end index or string::npos
122  * @param cache str UTF8 position cache
123  * @return index or string::npos if not found
124  */
125  static int64_t lastIndexOf(
126  const string& str,
127  const string& what,
128  int64_t endIndex = string::npos,
130  );
131 
132  /**
133  * Finds first index of characters provided within given string
134  * @param str string
135  * @param what what
136  * @param beginIndex begin index
137  * @param srcCache str UTF8 position cache
138  * @param whatCache what UTF8 position cache
139  * @return index or string::npos if not found
140  */
141  static int64_t firstIndexOfChars(const string& str, const string& what, int64_t beginIndex = 0, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* srcCache = nullptr, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* whatCache = nullptr);
142 
143  /**
144  * Finds last index of characters provided within given string
145  * @param str string
146  * @param what what
147  * @param endIndex end index or string::npos
148  * @param srcCache str UTF8 position cache
149  * @param whatCache what UTF8 position cache
150  * @return index or string::npos if not found
151  */
152  static int64_t lastIndexOfChars(const string& str, const string& what, int64_t endIndex = string::npos, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* srcCache = nullptr, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* whatCache = nullptr);
153 
154  /**
155  * Returns substring of given string from begin index to end index
156  * @param str string
157  * @param beginIndex begin index
158  * @param endIndex end index or string::npos
159  * @param cache str UTF8 position cache
160  * @return substring result
161  */
162  inline static const string substring(const string& str, int64_t beginIndex, int64_t endIndex = string::npos, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* srcCache = nullptr, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr) {
163  auto result = viewSubstring(string_view(str), beginIndex, endIndex, cache);
164  return string(result.data(), result.size());
165  }
166 
167  /**
168  * Returns substring of given string from begin index to end index
169  * @param str string
170  * @param beginIndex begin index
171  * @param endIndex end index or string::npos
172  * @param cache str UTF8 position cache
173  * @return substring result
174  */
175  static const string_view viewSubstring(const string_view& str, int64_t beginIndex, int64_t endIndex, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr);
176 
177  /**
178  * Checks if strings equal ignoring case
179  * @param string1 string 1
180  * @param string2 string 2
181  * @param string1Cache string1 UTF8 position cache
182  * @param string2Cache string2 UTF8 position cache
183  * @return equality
184  */
185  static bool equalsIgnoreCase(
186  const string& string1,
187  const string& string2,
190  );
191 
192  /**
193  * Trim string
194  * @param str string
195  * @param cache UTF8 position cache
196  * @return trimmed string
197  */
198  static const string trim(const string& str, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr);
199 
200  /**
201  * Trim string
202  * @param str string
203  * @param cache UTF8 position cache
204  * @return trimmed string
205  */
206  static const string_view viewTrim(const string_view& str, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr);
207 
208  /**
209  * Transform string to lower case
210  * @param str string
211  * @param cache UTF8 position cache
212  * @return lowercase string
213  */
214  static const string toLowerCase(const string& str, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr);
215 
216  /**
217  * Transform string to upper case
218  * @param str string
219  * @param cache UTF8 position cache
220  * @return uppercase string
221  */
222  static const string toUpperCase(const string& str, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr);
223 
224  /**
225  * Check if pattern matches whole string
226  * @param str string
227  * @param pattern pattern
228  * @param matches matches
229  * @return if pattern matches whole string
230  */
231  static bool regexMatch(const string& str, const string& pattern, smatch* matches = nullptr);
232 
233  /**
234  * Do regex pattern search
235  * @param str string
236  * @param pattern pattern
237  * @param matches matches
238  * @return if search was successful
239  */
240  static bool regexSearch(const string& str, const string& pattern, smatch* matches = nullptr);
241 
242  /**
243  * Replace regex pattern with given string
244  * @param str string
245  * @param pattern pattern
246  * @param by replace string
247  * @return replace result
248  */
249  static const string regexReplace(const string& str, const string& pattern, const string& by);
250 
251  /**
252  * Tokenize
253  * @param str string
254  * @param delimiters delimiters
255  * @param emptyTokens include empty tokens
256  * @return tokens
257  */
258  static const vector<string> tokenize(const string& str, const string& delimiters, bool emptyTokens = false);
259 
260  /**
261  * Pad a string left
262  * @param str string
263  * @param by by
264  * @param toLength to length
265  * @param cache str UTF8 position cache
266  * @return padded string
267  */
268  static const string padLeft(const string& str, const string& by, int64_t toLength, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr);
269 
270  /**
271  * Pad a string right
272  * @param str string
273  * @param by by
274  * @param toLength to length
275  * @param cache str UTF8 position cache
276  * @return padded string
277  */
278  static const string padRight(const string& str, const string& by, int64_t toLength, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr);
279 
280  /**
281  * Indent a string
282  * @param str string
283  * @param with with
284  * @param count count
285  * @return resulting string
286  */
287  inline static const string indent(const string& str, const string& with, int64_t count) {
288  string result;
289  for (auto i = 0; i < count; i++) result+= with;
290  return result + str;
291  }
292 
293  /**
294  * Generate a string
295  * @param what what
296  * @param count count
297  * @return resulting string
298  */
299  inline static const string generate(const string& what, int64_t count = 1) {
300  string result;
301  for (auto i = 0; i < count; i++) result+= what;
302  return result;
303  }
304 
305  /**
306  * Get UTF8 string length
307  * @param str string
308  * @param cache UTF8 position cache
309  * @return UTF8 string length
310  */
311  static int64_t getLength(const string& str, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr);
312 
313  /**
314  * Get UTF8 character at given index
315  * @param str string
316  * @param index index
317  * @param cache UTF8 position cache
318  * @return character as string
319  */
320  static const string getCharAt(const string& str, int64_t index, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr);
321 
322  /**
323  * Get UTF8 code point at given index
324  * @param str string
325  * @param index index
326  * @param cache UTF8 position cache
327  * @return character as codepoint
328  */
329  static uint32_t getCodePointAt(const string& str, int64_t index, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr);
330 
331  /**
332  * Get UTF8 binary buffer index
333  * @param str string
334  * @param charIdx character index
335  * @param cache UTF8 position cache
336  * @return UTF binary buffer position from given character/code point index
337  */
338  static int64_t getUTF8BinaryIndex(const string& str, int64_t charIdx, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr);
339 
340 };
341 
static const bool startsWith(const string &str, const string &prefix)
Checks if string starts with prefix.
static int64_t firstIndexOf(const string &str, const string &what, int64_t beginIndex=0, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Finds first index of given string.
static const string toUpperCase(const string &str, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Transform string to upper case.
static bool equalsIgnoreCase(const string &string1, const string &string2, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *string1Cache=nullptr, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *string2Cache=nullptr)
Checks if strings equal ignoring case.
static const string getCharAt(const string &str, int64_t index, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Get UTF8 character at given index.
static uint32_t getCodePointAt(const string &str, int64_t index, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Get UTF8 code point at given index.
static int64_t getUTF8BinaryIndex(const string &str, int64_t charIdx, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Get UTF8 binary buffer index.
static const bool viewEndsWith(const string_view &str, const string_view &suffix)
Checks if string ends with suffix.
static const string indent(const string &str, const string &with, int64_t count)
Indent a string.
static int64_t getLength(const string &str, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Get UTF8 string length.
static const string padLeft(const string &str, const string &by, int64_t toLength, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Pad a string left.
static const string toLowerCase(const string &str, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Transform string to lower case.
static const bool viewStartsWith(const string_view &str, const string_view &prefix)
Checks if string starts with prefix.
static const bool endsWith(const string &str, const string &suffix)
Checks if string ends with suffix.
static int64_t indexOf(const string &str, const string &what, int64_t beginIndex=0, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Finds first index of given string.
static const string trim(const string &str, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Trim string.
static const string_view viewTrim(const string_view &str, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Trim string.
static int64_t lastIndexOfChars(const string &str, const string &what, int64_t endIndex=string::npos, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *srcCache=nullptr, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *whatCache=nullptr)
Finds last index of characters provided within given string.
static const string replace(const string &str, const string &what, const string &by, int64_t beginIndex=0, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Replace string with another string.
static const string regexReplace(const string &str, const string &pattern, const string &by)
Replace regex pattern with given string.
static bool regexSearch(const string &str, const string &pattern, smatch *matches=nullptr)
Do regex pattern search.
static const string substring(const string &str, int64_t beginIndex, int64_t endIndex=string::npos, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *srcCache=nullptr, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Returns substring of given string from begin index to end index.
static int64_t lastIndexOf(const string &str, const string &what, int64_t endIndex=string::npos, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Finds last index of given string.
static const string generate(const string &what, int64_t count=1)
Generate a string.
static bool regexMatch(const string &str, const string &pattern, smatch *matches=nullptr)
Check if pattern matches whole string.
static const string_view viewSubstring(const string_view &str, int64_t beginIndex, int64_t endIndex, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Returns substring of given string from begin index to end index.
static const string padRight(const string &str, const string &by, int64_t toLength, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *cache=nullptr)
Pad a string right.
static const vector< string > tokenize(const string &str, const string &delimiters, bool emptyTokens=false)
Tokenize.
static int64_t firstIndexOfChars(const string &str, const string &what, int64_t beginIndex=0, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *srcCache=nullptr, ::minitscript::utilities::UTF8CharacterIterator::UTF8PositionCache *whatCache=nullptr)
Finds first index of characters provided within given string.