Stroika Library 3.0d23x
 
Loading...
Searching...
No Matches
String.h
Go to the documentation of this file.
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2026. All rights reserved
3 */
4#ifndef _Stroika_Foundation_Characters_String_h_
5#define _Stroika_Foundation_Characters_String_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <compare>
10#include <iosfwd>
11#include <locale>
12#include <string>
13#include <string_view>
14
16#include "Stroika/Foundation/Characters/SDKString.h"
19#include "Stroika/Foundation/Containers/Sequence.h"
20#include "Stroika/Foundation/Containers/Set.h"
24
25/**
26 * \file
27 *
28 * \note Code-Status: <a href="Code-Status.md#Beta">Beta</a>
29 *
30 * TODO:
31 *
32 * @todo Cleanup SubString (), and String::SubString_ use of SharedByValue<TRAITS>::ReadOnlyReference for
33 * performance. At some level - in String::SubString_ - we have a (hidden) sharedPtr and it would
34 * be safe and performant in that case to re-use that shared_ptr to make a new String envelope.
35 *
36 * However, I'm not sure its safe in general to have SharedByValue<TRAITS>::ReadOnlyReference expose
37 * its shared_ptr, which appears needed to make this happen.
38 *
39 * Not a biggie opportunity, so we can delay this -- LGP 2014-04-10
40 *
41 * @todo Add PadLeft/PadRight or FillLeft/FilLRight() - not sure which name is better. But idea is to
42 * produce a string which is identical to the orig except that IF start len < n, then expand it with
43 * the given arg char repeated on the left or right.
44 *
45 * @todo RFind() API should be embellished to include startAt etc, like regular Find () - but not 100%
46 * sure - think through...
47 *
48 * @todo MAYBE also add ReplaceOne() function (we have ReplaceAll() now) ; see Replace() API in this function? - maybe overload?
49 *
50 * @todo Move DOCS in the top of this file down to the appropriate major classes - and then review the implementation and make sure
51 * it is all correct for each (especially SetStorage () stuff looks questionable)
52 *
53 */
54
56 template <typename T>
57 class Set;
58}
59
61
62 /*
63 * \brief On Windows, affects the behavior of String::As<filesystem::path> ()
64 *
65 * On windows, its helpful when mapping String to std::filesystem::pathname to map certain common name
66 * prefixes to things that will be found on Windows.
67 *
68 * MSYS creates paths like /c/folder for c:/folder
69 * CYGWIN creates paths like /cygdrive/c/folder for c:/folder
70 *
71 * Automatically map these (since Stroika v3.0d6) in (was ToPath) As<filesystem::path> ();
72 *
73 * \see https://www.msys2.org/docs/filesystem-paths/
74 * this API is for getting strings from the commandline, or user input, or configuration files etc, where Cygwin
75 * or msys style paths maybe present. APIs that talk directly to the OS are more likely to more directly produce
76 * filesystem::path than String. Anyhow - because of this, on windows, its probably more helpful than not to map
77 * the MSYS/cygdrive crap to a path more likely to actually work right. --LGP 2024-03-06
78 */
79#ifndef qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin
80#define qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin qStroika_Foundation_Common_Platform_Windows
81#endif
82
83}
84
85#if qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin
86namespace std::filesystem {
87 class path; // forward declare for template specialization
88}
89#endif
90
92
93 class RegularExpression;
94 class RegularExpressionMatch;
95
96 /**
97 * Flag principally for LimitLength, but used elsewhere as well (e.g. ToString ()).
98 */
100 ePreferKeepLeft,
101 ePreferKeepRight,
102 ePreferKeepMid,
103
104 eDEFAULT = ePreferKeepLeft,
105
106 Stroika_Define_Enum_Bounds (ePreferKeepLeft, ePreferKeepMid)
107 };
108 using StringShorteningPreference::ePreferKeepLeft;
109 using StringShorteningPreference::ePreferKeepMid;
110 using StringShorteningPreference::ePreferKeepRight;
111
112 /**
113 * \brief returns true iff T == u8string, u16string, u32string, or wstring - which std::string types can be unambiguously converted to UNICODE
114 */
115 template <typename T>
116 concept IBasicUNICODEStdString = same_as<T, u8string> or same_as<T, u16string> or same_as<T, u32string> or same_as<T, wstring>;
117
118 /**
119 * \brief anything with a 'special .STRINGTYPE conversion' method to UNICODE string, such as filesystem::path
120 *
121 * Really, this is a thinly veiled attempt to avoid #include <filesystem> for modularity reasons.
122 */
123 template <typename T>
124 concept IStdPathLike2UNICODEString = requires (T t) {
125 { t.wstring () } -> same_as<wstring>;
126 } or requires (T t) {
127 { t.u8string () } -> same_as<u8string>;
128 } or requires (T t) {
129 { t.u16string () } -> same_as<u16string>;
130 } or requires (T t) {
131 { t.u32string () } -> same_as<u32string>;
132 };
133
134 class String;
135
136 /**
137 * The concept IConvertibleToString is satisfied iff the argument type can be used to construct a (Stroika) String.
138 * Note subtly, const char* is treated (as of Stroika v3) as convertible to String, but the characters must be ASCII, or
139 * an exception will be generated in the constructor.
140 */
141 template <typename T>
142 concept IConvertibleToString = convertible_to<T, String>;
143
144 /**
145 * \brief String is like std::u32string, except it is much easier to use, often much more space efficient, and more easily interoperates with other string types
146 *
147 * The Stroika String class is conceptually a sequence of (UNICODE) Characters, and so there is
148 * no obvious way to map the Stroika String to a std::string (in general). However, if you specify a codepage
149 * for conversion, or are converting to/from SDKString/SDKChar, or u8string, etc, there is builtin support for that.
150 *
151 * EOS Handling:
152 * The Stroika String class supports having embedded NUL-characters. It also supports
153 * easy construction from NUL-terminated character strings.
154 *
155 * Since Stroika v3, there is no longer c_str () support, since Stroika doesn't internally
156 * require NUL-terminated strings, and actively encourages different compact representations of
157 * strings (c_str() requires a choice of a particular encoding to make sense).
158 *
159 * About spans, and the \0 NUL-termination - generally do NOT include
160 * the NUL-character in your span! Stroika strings will allow this, and treat
161 * it as just another character, but its probably not what you meant.
162 *
163 * \note Narrow String handling
164 * Because the character set of strings of type 'char' is ambiguous, if you construct a String
165 * with char (char* etc) - it is somehow 'required' that the characters be ASCII. If using the FromConstantString () API
166 * , or operator""_k, it is checked with Require () - so assertion failure. If you construct
167 * with String::CTOR, it will generate a runtime exception (so more costly runtime checking).
168 *
169 * \note Satisfies Concepts:
170 * o static_assert (regular<String>);
171 *
172 * \note \em Thread-Safety <a href="Thread-Safety.md#C++-Standard-Thread-Safety">C++-Standard-Thread-Safety</a>
173 *
174 * \note Design note - mutability vs. immutability
175 * http://stroika-bugs.sophists.com/browse/STK-968 (see about deleting deprecated APIs and remnants of mutability) and c_str()
176 *
177 * String objects are IMMUTABLE (except for the OBVIOUS meaning case of operator= being allowed).
178 *
179 * String reps are IMMUTABLE.
180 *
181 * Use StringBuilder for a 'mutable' String (can be used mostly interchangeably with String).
182 *
183 * Current Mutating methods (as of v3.0d1x)
184 * o c_str () -- non-const deprecated in v3.0d13
185 * o SetCharAt - deprecated v3.0d12
186 * o c_str() (consider deprecating?)
187 * o operator= - deprecated v3.0d12
188 * o clear()- deprecated v3.0d12
189 * o Append - deprecated v3.0d12
190 * o operator+= - deprecated v3.0d12
191 * o erase() - deprecated v3.0d12
192 *
193 * SOMEWHAT ironically, the only of these methods hard to replace is the non-const c_str () - and maybe there
194 * not bad cuz I deprecated? COULD just deprecate ALL of these, and then the class is fully immutable. Probably
195 * easier to understand/reason about.
196 *
197 * \note <a href="Design-Overview.md#Comparisons">Comparisons</a>:
198 * o static_assert (totally_ordered<String>);
199 * o String::EqualsComparer, String::ThreeWayComparer and String::LessComparer provided with construction parameters to allow case insensitive compares
200 */
201 class String : public Traversal::Iterable<Character> {
202 private:
203 using inherited = Iterable<Character>;
204
205 protected:
206 class _IRep;
207
208 public:
209 /**
210 * All the constructors are obvious, except
211 * o NUL-character ARE allowed in strings, except for the case of single
212 * charX* argument constructors - which find the length based on
213 * the terminating NUL-character.
214 *
215 * o CTOR (PATHLIKE_TOSTRINGABLE&& s) - IStdPathLike2UNICODEString PATHLIKE_TOSTRINGABLE
216 * carefully excludes conflicting CTOR overloads, and purpose is to allow constructing a String
217 * from anything with a 'special conversion' method to UNICODE string, such as filesystem::path.
218 *
219 * \note about lifetime of argument data (basic_string_view<CHAR_T> constructors)
220 * All data is copied out / saved by the end of the constructor for all constructors EXCEPT
221 * the basic_string_view<CHAR_T> constructors - where it is REQUIRED the data last 'forever'.
222 *
223 * \pre for String (const basic_string_view<wchar_t>& str) - str[str.length()]=='\0';
224 * c-string nul-terminated (which happens automatically with L"xxx"sv)
225 *
226 * \note 'char' (using ASCII = char) constructors:
227 * Because the character-set of strings of type 'char' is ambiguous, if you construct a String
228 * with char (char* etc) - it runtime checked that the characters are ASCII (except for the basic_string_view
229 * constructors where we check but with assertions).
230 *
231 * This mimics the behavior in Stroika v2.1 with String::FromASCII ()
232 *
233 * \note the basic_string move Constructors MAY move or copy the underlying std string, but they still maintain
234 * the same requirements on their arguments as the copy basic_string constructors (eg. char must be ascii)
235 *
236 * \see also - FromUTF8, FromSDKString, FromNarrowSDKString, FromStringConstant, FromLatin1, which are all like constructors
237 * but with special names to avoid confusion and make clear their arguments, and not participate in overloading. Note, chose
238 * this path instead of FLAG argument and explicit on CTOR, cuz more terse.
239 */
240 String ();
241 explicit String (Character c);
242 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
243 String (const CHAR_T* cString);
244 template <Memory::ISpan SPAN_OF_CHAR_T>
245 String (SPAN_OF_CHAR_T s)
247 template <IStdBasicStringCompatibleCharacter CHAR_T>
248 String (const basic_string<CHAR_T>& s);
249 template <IStdBasicStringCompatibleCharacter CHAR_T>
250 String (const basic_string_view<CHAR_T>& s);
251 template <IStdBasicStringCompatibleCharacter CHAR_T>
252 explicit String (basic_string<CHAR_T>&& s);
253 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
254 String (const Iterable<CHAR_T>& src)
255 requires (not Memory::ISpan<CHAR_T>);
256 template <IStdPathLike2UNICODEString PATHLIKE_TOSTRINGABLE>
257 explicit String (PATHLIKE_TOSTRINGABLE&& s);
258 String (String&& from) noexcept = default;
259 String (const String& from) noexcept = default;
260
261 private:
262 template <IStdPathLike2UNICODEString PATHLIKE_TOSTRINGABLE>
263 static String mkSTR_ (PATHLIKE_TOSTRINGABLE&& s);
264
265 private:
266 static shared_ptr<_IRep> CTORFromBasicStringView_ (const basic_string_view<ASCII>& str);
267 static shared_ptr<_IRep> CTORFromBasicStringView_ (const basic_string_view<char8_t>& str);
268 static shared_ptr<_IRep> CTORFromBasicStringView_ (const basic_string_view<char16_t>& str);
269 static shared_ptr<_IRep> CTORFromBasicStringView_ (const basic_string_view<char32_t>& str);
270 static shared_ptr<_IRep> CTORFromBasicStringView_ (const basic_string_view<wchar_t>& str);
271
272 public:
273 ~String () = default;
274
275 protected:
276 /**
277 */
278 using _SafeReadRepAccessor = Iterable<Character>::_SafeReadRepAccessor<_IRep>;
279
280 protected:
281 /**
282 * \pre rep MUST be not-null
283 * However, with move constructor, it maybe null on exit.
284 */
285 String (const shared_ptr<_IRep>& rep) noexcept;
286 String (shared_ptr<_IRep>&& rep) noexcept;
287
288 public:
289 nonvirtual String& operator= (String&& rhs) noexcept = default;
290 nonvirtual String& operator= (const String& rhs) noexcept = default;
291
292 public:
293 /**
294 * Create a String object from a 'char-based' utf-8 encoded string.
295 *
296 * \par Example Usage
297 * \code
298 * EXPECT_TRUE (string{u8"שלום"} == String::FromUTF8 (u8"שלום").AsUTF8 ());
299 * \endcode
300 *
301 * \note This is not generally needed, as you can just use the String::CTOR, but for cases like
302 * std::string-> String - where the conversion needs extra information (an assertion about character encoding of source characters).
303 *
304 * \note Reading improperly encoded text may result in a RuntimeException indicating improperly encoded characters.
305 */
306 template <typename CHAR_T>
307 static String FromUTF8 (span<CHAR_T> from)
308 requires (same_as<remove_cv_t<CHAR_T>, char8_t> or same_as<remove_cv_t<CHAR_T>, char>);
309 template <typename CHAR_T>
310 static String FromUTF8 (const basic_string<CHAR_T>& from)
311 requires (same_as<remove_cv_t<CHAR_T>, char8_t> or same_as<remove_cv_t<CHAR_T>, char>);
312 template <typename CHAR_T>
313 static String FromUTF8 (const CHAR_T* from)
314 requires (same_as<remove_cv_t<CHAR_T>, char8_t> or same_as<remove_cv_t<CHAR_T>, char>);
315
316 public:
317 /**
318 * Create a String object from a 'SDKChar' (os-setting - current code page) encoded string.
319 * See @SDKChar
320 * See @SDKString
321 *
322 * \note Reading improperly encoded text may result in a RuntimeException indicating improperly encoded characters.
323 *
324 * \note for span<> overload - consider using AdjustNulTerminatedStringSpan
325 */
326 static String FromSDKString (const SDKChar* from);
327 static String FromSDKString (span<const SDKChar> s);
328 static String FromSDKString (const SDKString& from);
329
330 public:
331 /**
332 * Create a String object from a 'char-based' (os-setting - current code page) encoded string.
333 *
334 * \note Reading improperly encoded text may result in a RuntimeException indicating improperly encoded characters.
335 *
336 * \note for span<> overload - consider using AdjustNulTerminatedStringSpan
337 */
338 static String FromNarrowSDKString (const char* from);
339 static String FromNarrowSDKString (span<const char> s);
340 static String FromNarrowSDKString (const string& from);
341
342 public:
343 /**
344 * Create a String object from a char based on the encoding from the argument locale.
345 * This throws an exception if there is an error performing the conversion.
346 *
347 * \note Reading improperly encoded text may result in a RuntimeException indicating improperly encoded characters.
348 *
349 * \note for span<> overload - consider using AdjustNulTerminatedStringSpan
350 */
351 static String FromNarrowString (const char* from, const locale& l);
352 static String FromNarrowString (span<const char> s, const locale& l);
353 static String FromNarrowString (const string& from, const locale& l);
354
355 public:
356 /**
357 * \brief Take the given argument data (constant span) - which must remain unchanged - constant - for the application lifetime - and treat it as a Stroika String object
358 *
359 * This allows creation of String objects with fewer memory allocations and less copyinh, and more efficient storage, in most situations
360 *
361 * The resulting String is a perfectly compliant Stroika String (somewhat akin to std::string_view vs std::string).
362 *
363 * \par Example:
364 * \code
365 * String tmp1 = "FRED";
366 * String tmp2 = String{"FRED"};
367 * String tmp3 = String::FromStringConstant ("FRED"); // same as 2 above, but faster
368 * String tmp4 = "FRED"sv; // equivalent to FromStringConstant
369 * String tmp5 = "FRED"_k; // equivalent to FromStringConstant
370 * \endcode
371 *
372 * \em WARNING - BE VERY CAREFUL - be sure arguments have application lifetime (intended use case is C string literals).
373 *
374 * \pre argument string MAY contain embedded nul characters (but for char* overloads wrong size inferred).
375 *
376 * \note In Stroika v2.1 this was called class String_ExternalMemoryOwnership_ApplicationLifetime.
377 * \note In Stroika v2.1 this was called class String_Constant.
378 * \note In Stroika v2.1 this required NUL-char termination, but no longer
379 *
380 * \note FromStringConstant with 'char' - REQUIRES that the char elements are ASCII (someday this maybe lifted and interpret as Latin1)
381 * For the case of char, we also do not check/require the nul-termination bit.
382 */
383 template <size_t SIZE, IUNICODECanUnambiguouslyConvertFrom CHAR_T>
384 static String FromStringConstant (const CHAR_T (&cString)[SIZE]);
385 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
386 static String FromStringConstant (const basic_string_view<CHAR_T>& str);
387 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
388 static String FromStringConstant (span<const CHAR_T> str);
389 static String FromStringConstant (span<const ASCII> s); // better impl in CPP file
390 static String FromStringConstant (span<const char16_t> s); // ""
391 static String FromStringConstant (span<const wchar_t> s); // "" (inl file)
392 static String FromStringConstant (span<const char32_t> s); // ""
393
394 public:
395 /**
396 * Create a String object from UNICODE Latin-1 Supplement (https://en.wikipedia.org/wiki/Latin-1_Supplement)
397 *
398 * This is roughly, but not exactly, the same as the ISO-Latin-1 single-byte character set (https://en.wikipedia.org/wiki/ISO/IEC_8859-1)
399 *
400 * \note if character code point >= 256, this will throw an exception - not defined for that range (only checked if sizeof (CHAR_T) > 1)
401 *
402 * @aliases From8bitASCII () or FromExtendedASCII ()
403 *
404 * \note for span<> overload - consider using AdjustNulTerminatedStringSpan
405 */
406 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
407 static String FromLatin1 (const CHAR_T* cString);
408 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
409 static String FromLatin1 (span<const CHAR_T> s);
410 template <IStdBasicStringCompatibleCharacter CHAR_T>
411 static String FromLatin1 (const basic_string<CHAR_T>& s);
412
413 public:
414 /**
415 * \brief appends 'rhs' string to this string (without modifying this string) and returns the combined string
416 *
417 * @see Append() for a similar function that modifies 'this'
418 */
419 template <typename T>
420 nonvirtual String Concatenate (T&& rhs) const
421 requires (is_convertible_v<T, String>);
422
423 private:
424 nonvirtual String Concatenate_ (const String& rhs) const;
425
426 public:
427 /**
428 * Returns the number of characters in the String. Note that this may not be the same as bytes,
429 * does not include NUL termination, and doesn't in any way respect NUL termination (meaning
430 * a nul-character is allowed in a Stroika string.
431 *
432 * @aliases GetLength ()
433 */
434 nonvirtual size_t size () const noexcept;
435
436 public:
437 /**
438 */
439 nonvirtual bool empty () const noexcept;
440
441 public:
442 /**
443 */
444 nonvirtual const Character GetCharAt (size_t i) const noexcept;
445
446 public:
447 /**
448 * \brief return (read-only) Character object
449 *
450 * @aliases GetCharAt (size_t i) const;
451 *
452 * \note returns const due to https://stroika.atlassian.net/browse/STK-376 - so cannot accidentally have illusion of assignment being legal
453 */
454 nonvirtual const Character operator[] (size_t i) const noexcept;
455
456 public:
457 /**
458 * InsertAt() constructs a new string by taking this string, and inserting the argument
459 * characters.
460 *
461 * \em Note that for repeated insertions, this is much less efficient than just
462 * using StringBuilder.
463 *
464 * \note that if at == this->size (), you are appending.
465 *
466 * \note for span<> overload - consider using AdjustNulTerminatedStringSpan
467 */
468 nonvirtual String InsertAt (Character c, size_t at) const;
469 nonvirtual String InsertAt (const String& s, size_t at) const;
470 nonvirtual String InsertAt (span<const Character> s, size_t at) const;
471 nonvirtual String InsertAt (span<Character> s, size_t at) const;
472
473 public:
474 /**
475 * Remove the characters at 'charAt' (RemoveAt/1) or between 'from' and 'to' (const method - doesn't modify this)
476 *
477 * It is an error if this implies removing characters off the end of the string.
478 *
479 * \par Example Usage
480 * \code
481 * String mungedData = "04 July 2014";
482 * if (optional<pair<size_t, size_t>> i = mungedData.Find (RegularExpression{"0[^\b]"})) {
483 * mungedData = mungedData.RemoveAt (*i);
484 * }
485 * \endcode
486 *
487 * \pre (charAt < size ())
488 * \pre (from <= to)
489 * \pre (to <= size ())
490 *
491 * \em Note that this is quite inefficient: consider using StringBuilder
492 */
493 nonvirtual String RemoveAt (size_t charAt) const;
494 nonvirtual String RemoveAt (size_t from, size_t to) const;
495 nonvirtual String RemoveAt (pair<size_t, size_t> fromTo) const;
496
497 public:
498 /**
499 * Remove the first occurrence of Character 'c'/'/subString/ from the string. Not an error if none
500 * found. Doesn't modify this (const method) - returns resulting string.
501 *
502 * \em Note that this is quite inefficient: consider using StringBuffer
503 */
504 nonvirtual String RemoveFirstIf (Character c) const;
505 nonvirtual String RemoveFirstIf (const String& subString) const;
506
507 public:
508 /**
509 * Remove the all occurrences of Character 'c/subString' from this string
510 * (walking front to back - if removal creates one, it too is removed).
511 * Not an error if none found. Doesn't modify this (const method) - returns resulting string.
512 */
513 nonvirtual String RemoveAll (Character c) const;
514 nonvirtual String RemoveAll (const String& subString) const;
515
516 public:
517 /**
518 * OVERLOADS WITH size_t:
519 *
520 * Produce a substring of this string, starting at 'from', and up to 'to' (or end of string
521 * for one-arg overload).
522 *
523 * *NB* This function treats the second argument differently than String::substr () -
524 * which respects the STL basic_string API. This function treats the second argument
525 * as a 'to', STL substr() treats it as a count. This amounts to the same thing for the
526 * very common cases of substr(N) - because second argument is defaulted, and,
527 * substr (0, N) - because then the count and end are the same.
528 *
529 * \pre (from <= to);
530 * \pre (to <= size ()); // for 2-arg variant
531 *
532 * \par Example Usage
533 * \code
534 * String tmp { "This is good" };
535 * Assert (tmp.SubString (5) == "is good");
536 * \endcode
537 *
538 * \par Example Usage
539 * \code
540 * const String kTest_ { "a=b"sv };
541 * const String kLbl2LookFor_ { "a="_k };
542 * if (resultLine.Find (kLbl2LookFor_)) {
543 * String tmp { resultLine.SubString (kLbl2LookFor_.length ()) };
544 * }
545 * Assert (tmp == "b");
546 * \endcode
547 *
548 * OVERLOADS WITH ptrdiff_t:
549 *
550 * This is like SubString() except that if from/to are negative, they are treated as relative to the end
551 * of the String.
552 *
553 * So for example, SubString (0, -1) is equivalent to SubString (0, size () - 1) - and so is an
554 * error if the string is empty.
555 *
556 * Similarly, SubString (-5) is equivalent to SubString (size ()-5, size ()) - so can be used
557 * to grab the end of a string.
558 *
559 * \pre (adjustedFrom <= adjustedTo);
560 * \pre (adjustedTo <= size ()); // for 2-arg variant
561 *
562 * \note \em Design Note
563 * We chose not to overload SubString() with this functionality because it would have been to easy
564 * to mask bugs.
565 *
566 * \note \em Design Note
567 * This was originally inspired by Python arrays. From https://docs.python.org/2/tutorial/introduction.html:
568 * Indices may also be negative numbers, to start counting from the right
569 *
570 * @aliases
571 * This API is identical to the javascript String.slice () method/behavior
572 * @see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
573 * 15.5.4.13 String.prototype.slice (start, end)
574 *
575 * @aliases
576 * This API - when called with negative indexes - used to be called CircularSubString ().
577 *
578 * @see substr
579 * @see SafeSubString
580 */
581 template <typename SZ>
582 nonvirtual String SubString (SZ from) const;
583 template <typename SZ1, typename SZ2>
584 nonvirtual String SubString (SZ1 from, SZ2 to) const;
585
586 public:
587 /**
588 * Like SubString(), but no requirements on from/to. These are just adjusted to the edge of the string
589 * if the exceed those endpoints. And if arguments are <0, they are interpreted as end-relative.
590 *
591 * @aliases
592 * This API - when called with negative indexes - used to be called SafeCircularSubString ().
593 *
594 * @see substr
595 * @see SubString
596 */
597 template <typename SZ>
598 nonvirtual String SafeSubString (SZ from) const;
599 template <typename SZ1, typename SZ2>
600 nonvirtual String SafeSubString (SZ1 from, SZ2 to) const;
601
602 public:
603 /**
604 * \brief Return a substring of this string, starting at 'argument' n. If n > size(), return empty string.
605 *
606 * \note This hides inherited Skip, and returns a subtype of what it returns, and is just slightly more
607 * efficient, but otherwise completley compatible behavior.
608 */
609 nonvirtual String Skip (size_t n) const;
610
611 public:
612 /**
613 * Return 'count' copies of this String (concatenated after one another).
614 */
615 nonvirtual String Repeat (unsigned int count) const;
616
617 public:
618 /**
619 * Returns true if the argument character or string is found anywhere inside this string.
620 * This is equivalent to
621 * return Matches (".*" + X + ".*"); // If X had no characters which look like they are part of
622 * // a regular expression
623 *
624 * @see Match
625 */
626 nonvirtual bool Contains (Character c, CompareOptions co = eWithCase) const;
627 nonvirtual bool Contains (const String& subString, CompareOptions co = eWithCase) const;
628
629 public:
630 /**
631 *
632 */
633 nonvirtual bool ContainsAny (Iterable<Character> cs, CompareOptions co = eWithCase) const;
634
635 public:
636 /**
637 * Returns true iff the given substring is contained in this string.
638 *
639 * Similar to:
640 * return Matches (X + ".*");
641 * except for the fact that with StartsWith() doesn't interpret 'X' as a regular expression
642 *
643 * \pre not subString.empty () -- for the subString overload (because otherwise "".StartsWith("") would be ill-defined)
644 *
645 * @see Match
646 * @see EndsWith
647 */
648 nonvirtual bool StartsWith (const Character& c, CompareOptions co = eWithCase) const;
649 nonvirtual bool StartsWith (const String& subString, CompareOptions co = eWithCase) const;
650
651 public:
652 /**
653 * Returns true iff the given substring is contained in this string.
654 *
655 * Similar to:
656 * return Matches (X + ".*");
657 * except for the fact that with StartsWith() doesn't interpret 'X' as a regular expression
658 *
659 * \pre not subString.empty () -- for the subString overload (because otherwise "".EndsWith("") would be ill-defined)
660 *
661 * @see Match
662 * @see StartsWith
663 */
664 nonvirtual bool EndsWith (const Character& c, CompareOptions co = eWithCase) const;
665 nonvirtual bool EndsWith (const String& subString, CompareOptions co = eWithCase) const;
666
667 public:
668 /**
669 * \brief Return *this if it ends with argument character, or append 'c' so that it ends with a 'c'.
670 *
671 * \note this is to specific-purpose to be a very sensible API, but I find it pretty often pretty useful. So what-the-heck.
672 */
673 nonvirtual String AssureEndsWith (const Character& c, CompareOptions co = eWithCase) const;
674
675 public:
676 /**
677 * Apply the given regular expression return true if it matches this string. This only
678 * returns true if the expression matches the ENTIRE string - all the way to the end.
679 * @see FindEach() or @see Find - to find a set of things which match.
680 *
681 * \par Example Usage
682 * \code
683 * Assert (String{"abc"}.Matches ("abc"));
684 * Assert (not (String{"abc"}.Matches ("bc")));
685 * Assert (String{"abc"}.Matches (".*bc"));
686 * Assert (not String{"abc"}.Matches ("b.*c"));
687 * \endcode
688 *
689 * \par Example Usage
690 * \code
691 * static const RegularExpression kSonosRE_{"([0-9.:]*)( - .*)"_RegEx};
692 * static const String kTestStr_{"192.168.244.104 - Sonos Play:5"};
693 * optional<String> match1;
694 * optional<String> match2;
695 * EXPECT_TRUE (kTestStr_.Matches (kSonosRE_, &match1, &match2) and match1 == "192.168.244.104" and match2 == " - Sonos Play:5");
696 * EXPECT_EQ (kTestStr_.Matches<1> (kSonosRE_), make_tuple ("192.168.244.104"_k));
697 * EXPECT_EQ (kTestStr_.Matches<2> (kSonosRE_), make_tuple ("192.168.244.104"_k, "Sonos Play:5"_k));
698 * \endcode
699 *
700 * \par Example Usage
701 * \code
702 * // https://tools.ietf.org/html/rfc3986#appendix-B
703 * static const RegularExpression kParseURLRegExp_{"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"_RegEx};
704 * optional<String> scheme;
705 * optional<String> authority;
706 * optional<String> path;
707 * optional<String> query;
708 * optional<String> fragment;
709 * if (rawURL.Matches (kParseURLRegExp_, nullptr, &scheme, nullptr, &authority, &path, nullptr, &query, nullptr, &fragment)) {
710 * DbgTrace ("***good - scheme={}"_f, scheme);
711 * DbgTrace ("***good - authority={}"_f, authority);
712 * DbgTrace ("***good - path={}"_f, path);
713 * DbgTrace ("***good - query={}"_f, query);
714 * DbgTrace ("***good - fragment={}"_f, fragment);
715 * }
716 * \endcode
717 *
718 * \par Example Usage
719 * \code
720 * // Praat 6.4.23 (October 27 2024)
721 * String processRunnerOutput = "Praat 6.4.23 (October 27 2024)";
722 * String version;
723 * return processRunnerOutput.Matches ("(\\w+)\\s([\\w\\.]+).*"_RegEx, nullptr, &version)? version: "???"sv;
724 * return get<1> (processRunnerOutput.Matches<1> ("\\w+\\s([\\w\\.]+).*"_RegEx).value_or(make_tuple("???"_k))); // Or better
725 * \endcode
726 *
727 * Details on the regular expression language/format can be found at:
728 * http://en.wikipedia.org/wiki/C%2B%2B11#Regular_expressions
729 *
730 * \note If any 'sub-match' arguments are passed to Match, they MUST be of type optional<String>* or nullptr.
731 * Passing nullptr allows matched parameters to not be returned, but still identified positionally (by index).
732 *
733 * @see Contains
734 * @see StartsWith
735 * @see EndsWith
736 * @see Find
737 * @see FindEach
738 */
739 nonvirtual bool Matches (const RegularExpression& regEx) const;
740 nonvirtual bool Matches (const RegularExpression& regEx, Containers::Sequence<String>* matches) const;
741 template <Common::IAnyOf<optional<String>*, String*, nullptr_t>... OPTIONAL_STRINGS>
742 nonvirtual bool Matches (const RegularExpression& regEx, OPTIONAL_STRINGS&&... subMatches) const;
743 template <size_t I>
744 nonvirtual optional<Common::RepeatedTuple_t<I, String>> Matches (const RegularExpression& regEx) const;
745
746 public:
747 /**
748 * Find returns the index of the first occurrence of the given Character/substring argument in
749 * this string. Find () always returns a valid string index, which is followed by the
750 * given substring, or nullopt otherwise.
751 *
752 * Find () can optionally be provided a 'startAt' offset to begin the search at.
753 *
754 * And the overload taking a RegularExpression - returns BOTH the location where the match
755 * is found, as well as the end of the match.
756 *
757 * Note - for the special case of Find(empty-string) - the return value is 0 if this string
758 * is non-empty, and nullopt if this string was empty.
759 *
760 * @aliases - could have been called IndexOf ()
761 *
762 * \pre (startAt <= size ());
763 *
764 * \par Example Usage
765 * \code
766 * const String kTest_{ "a=b" };
767 * const String kLbl2LookFor_ { "a=" };
768 * if (kTest_.Find (kLbl2LookFor_)) {
769 * String tmp { kTest_.SubString (kLbl2LookFor_.length ()) };
770 * }
771 * Assert (tmp == "b");
772 * \endcode
773 *
774 * @see FindEach ()
775 * @see FindEachString ()
776 * @see Tokenize
777 */
778 nonvirtual optional<size_t> Find (Character c, CompareOptions co = eWithCase) const;
779 nonvirtual optional<size_t> Find (Character c, size_t startAt, CompareOptions co = eWithCase) const;
780 nonvirtual optional<size_t> Find (const String& subString, CompareOptions co = eWithCase) const;
781 nonvirtual optional<size_t> Find (const String& subString, size_t startAt, CompareOptions co = eWithCase) const;
782 nonvirtual optional<pair<size_t, size_t>> Find (const RegularExpression& regEx, size_t startAt = 0) const;
783 nonvirtual Traversal::Iterator<Character> Find (const function<bool (Character item)>& that) const;
784
785 public:
786 /**
787 * This is just like Find, but captures all the matching results in an iterable result.
788 * The reason the overload for RegularExpression's returns a list of pair<size_t,size_t> is because
789 * the endpoint of the match is ambiguous. For fixed string Find, the end of match is computable
790 * from the arguments.
791 *
792 * FindEach () can be more handy to use than directly using Find () in scenarios where you want
793 * to iterate over each match:
794 * e.g.:
795 * for (auto i : s.FindEach ("xxx")) {....}
796 *
797 * Also, to count matches, you can use:
798 * size_t nMatches = FindEach (matchexp).size ();
799 *
800 * Note: FindEach handles the special case of an empty match as ignored, so FindEach(empty-str-or-regexp)
801 * always returns an empty list. Also - for the String case, it returns distinct matches, so if you
802 * search String{"AAAA"}.FindEach ("AA"), you will get 2 answers ({0, 2}).
803 *
804 * @see Find ()
805 * @see FindEachString ()
806 * @see Matches ()
807 */
808 nonvirtual Containers::Sequence<pair<size_t, size_t>> FindEach (const RegularExpression& regEx) const;
809 nonvirtual Containers::Sequence<size_t> FindEach (const String& string2SearchFor, CompareOptions co = eWithCase) const;
810
811 public:
812 /**
813 * \par Example Usage
814 * \code
815 * const String kTest_{ "a=b,"sv };
816 * const RegularExpression kRE_{ "a=(.*)[, ]" };
817 * Sequence<String> tmp1{ kTest_.FindEachString (kRE_) };
818 * Assert (tmp1.size () == 1 and tmp1[0] == "a=b,");
819 * Sequence<RegularExpressionMatch> tmp2 { kTest_.FindEachMatch (kRE_) };
820 * Assert (tmp2.size () == 1 and tmp2[0].GetFullMatch () == "a=b," and tmp2[0].GetSubMatches () == Sequence<String>{"b"});
821 * \endcode
822 *
823 * @see Find ()
824 * @see FindEachString ()
825 * @see Matches ()
826 */
827 nonvirtual Containers::Sequence<RegularExpressionMatch> FindEachMatch (const RegularExpression& regEx) const;
828
829 public:
830 /**
831 * \par Example Usage
832 * \code
833 * const String kTest_ { "a=b, c=d"_k };
834 * const RegularExpression kRE_ { "(.)=(.)" };
835 * Assert ((kTest_.FindEachString (kRE_) == vector<String>{"a=b", "c=d"}));
836 * \endcode
837 *
838 * @see Find ()
839 * @see FindEachMatch ()
840 * @see Matches ()
841 */
842 nonvirtual Containers::Sequence<String> FindEachString (const RegularExpression& regEx) const;
843
844 public:
845 /**
846 * RFind (substring) returns the index of the last occurrence of the given substring in
847 * this string. This function always returns a valid string index, which is followed by the
848 * given substring, or optional<size_t> {} otherwise.
849 *
850 * @aliases RIndexOf ()
851 */
852 nonvirtual optional<size_t> RFind (Character c) const noexcept;
853 nonvirtual optional<size_t> RFind (const String& subString) const;
854
855 public:
856 /**
857 * Replace the range of this string with the given replacement. Const method: just creates new string as described.
858 */
859 nonvirtual String Replace (size_t from, size_t to, const String& replacement) const;
860 nonvirtual String Replace (pair<size_t, size_t> fromTo, const String& replacement) const;
861
862 public:
863 /**
864 * Apply the given regular expression, with 'with' and replace each match. This doesn't
865 * modify this string, but returns the replacement string.
866 *
867 * CHECK - BUT HI HTINK WE DEFINE TO REPLACE ALL? OR MAKE PARAM?
868 * See regex_replace () for definition of the regEx language
869 *
870 * Require (not string2SearchFor.empty ());
871 *
872 * \par Example Usage
873 * \code
874 * mungedData = mungedData.ReplaceAll (RegularExpression{ "\\b0+" }, ""); // strip all leading zeros
875 * \endcode
876 *
877 * \par Example Usage
878 * \code
879 * String a = "a b \n\t c";
880 * EXPECT_EQ (a.ReplaceAll (RegularExpression{"\\s+"sv}, " "sv), "a b c");
881 * EXPECT_EQ (a.ReplaceAll ("\\s+"_RegEx, " "sv), "a b c");
882 * \endcode
883 *
884 * Note - it IS legal to have with contain the original search for string, or even
885 * to have it 'created' as part of where it gets
886 * inserted. The implementation will only replace those that pre-existed.
887 *
888 * \note To perform a regular expression replace-all, which is case insensitive, create the regular expression with CompareOptions::eCaseInsensitive
889 *
890 * \note ReplaceAll could have been called 'SafeString' or 'FilteredString' (was at one point - replaces that functionality)
891 */
892 nonvirtual String ReplaceAll (const RegularExpression& regEx, const String& with) const;
893 nonvirtual String ReplaceAll (const String& string2SearchFor, const String& with, CompareOptions co = eWithCase) const;
894 nonvirtual String ReplaceAll (const function<bool (Character)>& replaceCharP, const String& with) const;
895 nonvirtual String ReplaceAll (const Containers::Set<Character>& charSet, const String& with) const;
896
897 public:
898 /**
899 * Replace any CR or LF or CRLF sequences with plain NL-terminated text.
900 */
901 nonvirtual String NormalizeTextToNL () const;
902
903 public:
904 /**
905 * \brief Replace sequences of whitespace characters (space, tab, newline etc) with a single space (or argument space character).
906 *
907 * \note see Qt 'QString::simplify()'.Idea is Trim () (right and left) - plus replace contiguous substrings with* Character::IsSpace () with a
908 * single (given) space character.
909 */
910 nonvirtual String NormalizeSpace (Character useSpaceCharacter = ' ') const;
911
912 public:
913 /**
914 * Break this String into constituent parts. This is a simplistic API but at least handy as is.
915 *
916 * The caller can specify the token separators by set, by lambda. This defaults to the lambda "isWhitespace".
917 *
918 * This is often called 'Split' in other APIs. This is NOT (as is now) a replacement for flex, but just for
919 * simple, but common string splitting needs (though if I had a regexp param, it may approach the power of flex).
920 *
921 * \note If this->length () == 0, this method returns a list of length 0;
922 * \note Its fine for the split character/characters to be missing, in which case this
923 * returns a list of length 1
924 *
925 * \par Example Usage
926 * \code
927 * String t { "ABC DEF G" };
928 * Assert (t.Tokenize ().length () == 3);
929 * Assert (t.Tokenize ()[1] == "DEF");
930 * \endcode
931 *
932 * \par Example Usage
933 * \code
934 * String t { "foo= 7" };
935 * auto tt = t.Tokenize ({ '=' });
936 * Assert (t.length () == 2);
937 * Assert (t[1] == " 7");
938 * Assert (t[1].Trim () == "7");
939 * \endcode
940 *
941 * \par Example Usage
942 * \code
943 * String t { "foo= 7" };
944 * auto tt = t.Tokenize ({ '=', ' ' });
945 * Assert (t.length () == 2);
946 * Assert (t[1] == "7");
947 * \endcode
948 *
949 * @see Find
950 *
951 * TODO:
952 * @todo Review:
953 * http://qt-project.org/doc/qt-5.0/qtcore/qstring.html#split
954 * especially:
955 * QString line = "forename\tmiddlename surname \t \t phone";
956 * QRegularExpression sep("\\s+");
957 * str = line.section(sep, 2, 2); // str == "surname"
958 * str = line.section(sep, -3, -2); // str == "middlename surname"
959 * Make sure our Find/Tokenize is at least this simple, and maybe diff between find and split
960 * is FIND the regular expression names the things looked for and SPLIT() uses regexp to name the separators?
961 * Add something like the above to the String String demo app (when it exists)
962 */
963 nonvirtual Containers::Sequence<String> Tokenize () const;
964 nonvirtual Containers::Sequence<String> Tokenize (const function<bool (Character)>& isTokenSeperator) const;
965 nonvirtual Containers::Sequence<String> Tokenize (const RegularExpression& isSeparator) const;
966 nonvirtual Containers::Sequence<String> Tokenize (const Containers::Set<Character>& delimiters) const;
967
968 public:
969 /**
970 * \brief break the String into a series of lines;
971 *
972 * \note could almost be done with Tokenize(), except for the one-sided nl-specific trimming.
973 *
974 * \note removes line-endings (\r\n, or \n, or \r).
975 */
976 nonvirtual Containers::Sequence<String> AsLines () const;
977
978 public:
979 /**
980 * \brief Breaks this string into Lines, with AsLines (), and applies the argument filter (as if with .Map<>) producing a subset of the lines which match
981 *
982 * note this is useful to replace 'shell script' logic where you might run some command and grep through its output for all
983 * matching lines.
984 *
985 * \par Example Usage
986 * \code
987 * String firstALineOrEmpty = String{"...e.g. from output of ProcessRunner..."}.Grep ("a:").NthValue (0);
988 * \endcode
989 */
990 nonvirtual Containers::Sequence<String> Grep (const String& fgrepArg) const;
991 nonvirtual Containers::Sequence<String> Grep (const RegularExpression& egrepArg) const;
992
993 public:
994 /**
995 * \brief Useful to replace 'awk print $3' - replace with Col(2) - zero based
996 *
997 * default separator = = "\\s+"_RegEx;
998 *
999 * \par Example Usage
1000 * \code
1001 * Assert (String{"ffmpeg version 7.1"}.Col (2) == "7.1");
1002 * \endcode
1003 *
1004 */
1005 nonvirtual optional<String> Col (size_t i) const;
1006 nonvirtual optional<String> Col (size_t i, const RegularExpression& separator) const;
1007
1008 public:
1009 /**
1010 * \brief see Col(i) - but with default value of empty string
1011 */
1012 nonvirtual String ColValue (size_t i, const String& valueIfMissing = {}) const;
1013
1014 public:
1015 /**
1016 * String LTrim () scans the characters form the left to right, and applies the given
1017 * 'shouldBeTrimmed' function (defaults to IsWhitespace). All such characters are removed,
1018 * and the resulting string is returned. This does not modify the current string its
1019 * applied to - just returns the trimmed string.
1020 */
1021 nonvirtual String LTrim (bool (*shouldBeTrimmed) (Character) = Character::IsWhitespace) const;
1022
1023 public:
1024 /**
1025 * String RTrim () scans the characters form the right to left, and applies the given
1026 * 'shouldBeTrimmed' function (defaults to IsWhitespace). All such characters are removed,
1027 * and the resulting string is returned. This does not modify the current string its
1028 * applied to - just returns the trimmed string.
1029 *
1030 * \par Example Usage
1031 * \code
1032 * String name = origName.RTrim ([] (Character c) { return c == '\\';}); // Trim a trailing backslash(s), if present
1033 * \endcode
1034 */
1035 nonvirtual String RTrim (bool (*shouldBeTrimmed) (Character) = Character::IsWhitespace) const;
1036
1037 public:
1038 /**
1039 * String Trim () is locally equivalent to RTrim (shouldBeTrimmed).LTrim (shouldBeTrimmed).
1040 */
1041 nonvirtual String Trim (bool (*shouldBeTrimmed) (Character) = Character::IsWhitespace) const;
1042
1043 public:
1044 /**
1045 * Walk the entire string, and produce a new string consisting of all characters for which
1046 * the predicate 'removeCharIf' returned false.
1047 */
1048 nonvirtual String StripAll (bool (*removeCharIf) (Character)) const;
1049
1050 public:
1051 /**
1052 * Combine the given array into a single string (typically comma space) separated.
1053 * If given a list of length n, this adds n-1 separators.
1054 *
1055 * \note .Net version - https://docs.microsoft.com/en-us/dotnet/api/system.string.join?redirectedfrom=MSDN&view=net-6.0#System_String_Join_System_String_System_String___
1056 * \note Java version - https://docs.oracle.com/javase/8/docs/api/java/lang/String.html#join-java.lang.CharSequence-java.lang.CharSequence...-
1057 * \note Javascript - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/join
1058 *
1059 * \note - CONSIDER LOSING this as 'Iterable<>::Join' just appears to work better -- LGP 2025-01-21
1060 */
1061 static String Join (const Iterable<String>& list, const String& separator = ", "sv);
1062
1063 public:
1064 /**
1065 * Return a new string based on this string where each lower case character is replaced by its
1066 * upper case equivalent. Note that non-lower-case characters (such as punctuation) un unchanged.
1067 */
1068 nonvirtual String ToLowerCase () const;
1069
1070 public:
1071 /**
1072 * Return a new string based on this string where each lower case character is replaced by its
1073 * upper case equivalent. Note that non-upper-case characters (such as punctuation) un unchanged.
1074 */
1075 nonvirtual String ToUpperCase () const;
1076
1077 public:
1078 /**
1079 * Return true if the string contains zero non-whitespace characters.
1080 */
1081 nonvirtual bool IsWhitespace () const;
1082
1083 public:
1084 /**
1085 * \brief return the first maxLen (or fewer if string shorter) characters of this string (adding ellipsis if truncated)
1086 *
1087 * This function is for GUI/display purposes. It returns the given string, trimmed down
1088 * to at most maxLen characters, and removes whitespace (on 'to trim' side - given by keepLeft flag -
1089 * if needed to get under maxLen).
1090 *
1091 * Note in the 3-arg overload, the ellipsis string MAY be the empty string.
1092 */
1093 nonvirtual String LimitLength (size_t maxLen, StringShorteningPreference keepPref = StringShorteningPreference::ePreferKeepLeft) const;
1094 nonvirtual String LimitLength (size_t maxLen, StringShorteningPreference keepLeft, const String& ellipsis) const;
1095
1096 public:
1097 /**
1098 * CopyTo () copies the contents of this string to the target buffer.
1099 * CopyTo () does NOT nul-terminate the target buffer
1100 *
1101 * Returns span of CHAR_T objects written - a subspan of the argument span
1102 *
1103 * \pre s.size () >= UTFConvert::ComputeTargetBufferSize<CHAR_T> (...this-string-data...);
1104 *
1105 * \see See also GetData<CHAR_T> (buf) - similar functionality - except caller doesn't need to know size of buffer to allocate
1106 */
1107 template <IUNICODECanAlwaysConvertTo CHAR_T>
1108 nonvirtual span<CHAR_T> CopyTo (span<CHAR_T> s) const
1109 requires (not is_const_v<CHAR_T>);
1110
1111 public:
1112 /**
1113 * Convert String losslessly into a standard C++ type.
1114 *
1115 * Only specifically specialized variants supported: IBasicUNICODEStdString<T> or same_as<T,String>
1116 * o wstring
1117 * o u8string
1118 * o u16string
1119 * o u32string
1120 * o String (return *this; handy sometimes in templated usage; harmless)
1121 * as well as:
1122 * o filesystem::path (or anything with .wstring() -> wstring method) - note see qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin
1123 *
1124 * DEPRECATED AS OF v3.0d1 because As is const method - could do non-const As<> overload for these, but that would be confusing
1125 * o const wchar_t*
1126 * o const Character*
1127 *
1128 * \note
1129 * o As<u8string> () equivalent to AsUTF8 () call
1130 * o As<u16string> () equivalent to AsUTF16 () call
1131 * o As<u32string> () equivalent to AsUTF32 () call
1132 *
1133 * \note We tried to also have template<typename T> explicit operator T () const; - conversion operator - but
1134 * We got too frequent confusion in complex combinations of templates, like with:
1135 * Set<String> x ( *optional<String> {String{}) ); // fails cuz calls operator Set<String> ()!
1136 * Set<String> x { *optional<String> {String{}) }; // works as expected
1137 */
1138 template <typename T>
1139 nonvirtual T As () const
1140 requires (IBasicUNICODEStdString<T> or same_as<T, String> or constructible_from<T, wstring>);
1141
1142 public:
1143 /**
1144 * Create a narrow string object from this, based on the encoding from the argument locale.
1145 * This throws an exception if there is an error performing the conversion, and the 'into' overload
1146 * leaves 'into' in an undefined (but safe) state.
1147 */
1148 nonvirtual string AsNarrowString (const locale& l) const;
1149 nonvirtual string AsNarrowString (const locale& l, AllowMissingCharacterErrorsFlag) const;
1150
1151 public:
1152 /**
1153 * Convert String losslessly into a standard C++ type.
1154 * Only specifically specialized variants are supported.
1155 *
1156 * SUPPORTED result type "T": values are:
1157 * string
1158 * u8string
1159 */
1160 template <typename T = u8string>
1161 nonvirtual T AsUTF8 () const
1162 requires (same_as<T, string> or same_as<T, u8string>);
1163
1164 public:
1165 /**
1166 * Convert String losslessly into a standard C++ type u16string.
1167 *
1168 * \par Example Usage:
1169 * \code
1170 * String s = u"hi mom";
1171 * u16string su = AsUTF16 ();
1172 * \endcode
1173 *
1174 * \note - the resulting string may have a different length than this->size() due to surrogates
1175 *
1176 * @todo allow wchar_t if sizeof(wchar_t) == 2
1177 */
1178 template <typename T = u16string>
1179 nonvirtual T AsUTF16 () const
1180 requires (same_as<T, u16string> or (sizeof (wchar_t) == sizeof (char16_t) and same_as<T, wstring>));
1181
1182 public:
1183 /**
1184 * Convert String losslessly into a standard C++ type u32string.
1185 *
1186 * \par Example Usage:
1187 * \code
1188 * String s = u"hi mom";
1189 * u32string su = AsUTF32 ();
1190 * \endcode
1191 *
1192 * \note - As of Stroika 2.1d23 - the resulting string may have a different length than this->size() due to surrogates,
1193 * but eventually the intent is to fix Stroika's string class so this is not true, and it returns the length of the string
1194 * in size () with surrogates removed (in other words uses ucs32 representation). But not there yet.
1195 */
1196 template <typename T = u32string>
1197 nonvirtual T AsUTF32 () const
1198 requires (same_as<T, u32string> or (sizeof (wchar_t) == sizeof (char32_t) and same_as<T, wstring>));
1199
1200 public:
1201 /**
1202 * See docs on SDKChar for meaning (character set).
1203 *
1204 * Note - many UNICODE Strings cannot be represented in the SDKString character set (especially if narrow - depends a lot).
1205 * But in that case, AsNarrowSDKString () will throw, unless AllowMissingCharacterErrorsFlag is specified.
1206 */
1207 nonvirtual SDKString AsSDKString () const;
1209
1210 public:
1211 /**
1212 * See docs on SDKChar for meaning (character set). If SDKChar is a wide character, there is probably still a
1213 * default 'code page' to interpret narrow characters (Windows CP_ACP). This is a string in that character set.
1214 *
1215 * Note - many UNICODE Strings cannot be represented in the SDKString character set (especially if narrow - depends a lot).
1216 * But in that case, AsNarrowSDKString () will throw, unless AllowMissingCharacterErrorsFlag is specified.
1217 */
1218 nonvirtual string AsNarrowSDKString () const;
1219 nonvirtual string AsNarrowSDKString (AllowMissingCharacterErrorsFlag) const;
1220
1221 public:
1222 /**
1223 * Convert String losslessly into a standard C++ type.
1224 * Only specifically specialized variants are supported (right now just <string> supported).
1225 * The source string MUST be valid ascii characters - throw RuntimeErrorException<>
1226 *
1227 * \par Example Usage:
1228 * \code
1229 * string a1 = String{"hi mom"}.AsASCII (); // OK
1230 * string a2 = String{u"שלום"}.AsASCII (); // throws
1231 * \endcode
1232 *
1233 * \note - this is a (compatible) change of behavior: before Stroika v2.1d23, this would assert out on invalid ASCII.
1234 *
1235 * Supported Types:
1236 * o Memory::StackBuffer<char>
1237 * o string
1238 * o u8string (note any ASCII string is also legit utf-8)
1239 */
1240 template <typename T = string>
1241 nonvirtual T AsASCII () const
1242 requires requires (T* into) {
1243 { into->empty () } -> same_as<bool>;
1244 { into->push_back (ASCII{0}) };
1245 };
1246
1247 public:
1248 /**
1249 * Convert String losslessly into a standard C++ type.
1250 * Only specifically specialized variants are supported (right now just <string> supported).
1251 * If this source contains any invalid ASCII characters, this returns nullopt, and else a valid engaged string.
1252 *
1253 * Supported Types(T):
1254 * o Memory::StackBuffer<char>
1255 * o string
1256 * o u8string (note any valid ASCII string is also valid utf-8)
1257 */
1258 template <typename T = string>
1259 nonvirtual optional<T> AsASCIIQuietly () const
1260 requires requires (T* into) {
1261 { into->empty () } -> same_as<bool>;
1262 { into->push_back (ASCII{0}) };
1263 };
1264
1265 public:
1266 /**
1267 * \brief Summary data for raw contents of rep - each rep will support at least one of these span forms
1268 *
1269 * Each rep will support a span of at least one code-point type (ascii, utf8, utf16, or utf32)
1270 *
1271 * This API is guaranteed to support a span of at least one of these types (maybe more). The caller may
1272 * specify the code-point type preferred.
1273 *
1274 * \note eAscii is a subset of eSingleByteLatin1, so when the type eAscii is returned, EITHER fSingleByteLatin1 or fAscii maybe
1275 * maybe used.
1276 *
1277 * This API is public, but best to avoid depending on internals of String API - like PeekSpanData - since
1278 * this reasonably likely to change in future versions.
1279 */
1282 /**
1283 * ASCII is useful to track in storage (though same size as eSingleByteLatin1) - because requests
1284 * to convert to UTF-8 are free - ASCII is legit UTF8 (not true for eSingleByteLatin1)
1285 */
1287 /**
1288 * Latin1 - 8 bit representation of characters. But 256 of them - more than plain ascii.
1289 * And cheap/easy to convert to UNICODE (since code points of wider characters exactly the same values).
1290 */
1292 eChar16,
1293 eChar32
1294 };
1296 union {
1297 span<const ASCII> fAscii;
1298 span<const Latin1> fSingleByteLatin1;
1299 span<const char16_t> fChar16;
1300 span<const char32_t> fChar32;
1301 };
1302 };
1303
1304 public:
1305 /**
1306 * \brief return the constant character data inside the string in the form of a case variant union of different span types (at least one will be there)
1307 * templated type arg just used to pick a preferred type.
1308 *
1309 * \note CHAR_TYPE == char implies eAscii
1310 *
1311 * \note Reason for the two step API - getting the PeekSpanData, and then using - is because getting
1312 * the data is most expensive part (virtual function), and the packaged PeekSpanData gives enuf
1313 * info to do the next steps (quickly inline usually)
1314 *
1315 * This API is public, but best to avoid depending on internals of String API - like PeekSpanData - since
1316 * this reasonably likely to change in future versions.
1317 */
1318 template <IUNICODECanUnambiguouslyConvertFrom CHAR_TYPE = ASCII>
1319 nonvirtual PeekSpanData GetPeekSpanData () const;
1320
1321 public:
1322 /**
1323 * \brief return the constant character data inside the string in the form of a span or nullopt if not available for that CHAR_TYPE
1324 *
1325 * This API is public, but best to avoid depending on internals of String API - like PeekSpanData - since
1326 * this reasonably likely to change in future versions.
1327 *
1328 * \note It is generally true that the data IsASCII (span) IFF Peek<ASCII> returns non-nullopt. But this is
1329 * not ACTUALLY always true. Generally, Stroika constructs strings like this. But callers may manually construct
1330 * a String with backend rep u32string, for example (e.g because of move construct) - and that might just happen
1331 * to be all ascii. You can count on that IF you get back value from PeekData<ASCII> - it must be all ASCII. But
1332 * the contrapositive is not always true.
1333 */
1334 template <IUNICODECanUnambiguouslyConvertFrom CHAR_TYPE>
1335 static optional<span<const CHAR_TYPE>> PeekData (const PeekSpanData& pds);
1336 template <IUNICODECanUnambiguouslyConvertFrom CHAR_TYPE>
1337 nonvirtual optional<span<const CHAR_TYPE>> PeekData () const;
1338
1339 public:
1340 /**
1341 * \brief return the constant character data inside the string (rep) in the form of a span, possibly quickly and directly, and possibly copied into possiblyUsedBuffer
1342 *
1343 * This API will typically return a span of data which is internal pointers into the data of the rep (and so its invalidated on the
1344 * next change to the string).
1345 *
1346 * BUT - it maybe a span of data stored into the argument possiblyUsedBuffer (which is why it must be provided - cannot be nullptr).
1347 * If you want the freedom to not pass in this buffer, see the PeekData API.
1348 *
1349 * \note - CHAR_T must satisfy the concept IUNICODECanAlwaysConvertTo - SAFELY - because the string MIGHT contain characters not in any
1350 * unsafe char class (like ASCII or Latin1), and so there might not be a way to do the conversion. Use
1351 * PeekData () to do that - where it can return nullopt if no conversion possible.
1352 *
1353 * \par Example Usage
1354 * \code
1355 * Memory::StackBuffer<char8_t> maybeIgnoreBuf1;
1356 * span<const char8_t> thisData = GetData (&maybeIgnoreBuf1);
1357 * \endcode
1358 *
1359 * \note Prior to Stroika v3.0d1, GetData() took no arguments, and returned pair<const CHAR_TYPE*, const CHAR_TYPE*>
1360 * which is pretty similar, but not quite the same. To adapt any existing code calling that older version of the API
1361 * just add a Memory::StackBuffer<T> b; and pass &b to GetData(); And the return span is not the same as pair<> but
1362 * easily convertible.
1363 */
1364 template <IUNICODECanAlwaysConvertTo CHAR_TYPE, size_t STACK_BUFFER_SZ>
1365 static span<const CHAR_TYPE> GetData (const PeekSpanData& pds, Memory::StackBuffer<CHAR_TYPE, STACK_BUFFER_SZ>* possiblyUsedBuffer);
1366 template <IUNICODECanAlwaysConvertTo CHAR_TYPE, size_t STACK_BUFFER_SZ>
1367 nonvirtual span<const CHAR_TYPE> GetData (Memory::StackBuffer<CHAR_TYPE, STACK_BUFFER_SZ>* possiblyUsedBuffer) const;
1368
1369 public:
1370 struct EqualsComparer;
1371
1372 public:
1373 struct LessComparer;
1374
1375 public:
1376 struct ThreeWayComparer;
1377
1378 public:
1379 /**
1380 * Return true if case sensitive compare of the two IConvertibleToString objects have the same characters.
1381 * Indirects to EqualsComparer{eWithCase} (...)
1382 *
1383 * \note For reasons I don't understand, the plain function declaration of operator== and requires not do appear to be required
1384 * on all major compilers due to quirks of the operator= rewrite rules, but I don't fully understand why --LGP 2024-11-18.
1385 */
1386 nonvirtual bool operator== (const String& rhs) const;
1387 template <IConvertibleToString T>
1388 nonvirtual bool operator== (T&& rhs) const
1389 requires (not same_as<remove_cvref_t<T>, String>);
1390
1391 public:
1392 /**
1393 * Return strong_ordering of case sensitive (three-way) compare of the two IConvertibleToString objects.
1394 * Indirects to ThreeWayComparer{eWithCase} (...)
1395 *
1396 * \see operator== ()
1397 */
1398 nonvirtual strong_ordering operator<=> (const String& rhs) const;
1399 template <IConvertibleToString T>
1400 nonvirtual strong_ordering operator<=> (T&& rhs) const
1401 requires (not same_as<remove_cvref_t<T>, String>);
1402
1403 public:
1404 /**
1405 * @aliases basic_string>char>::npos
1406 *
1407 * This is only used for 'STL-compatibility APIs, like substr (), find, rfind (), etc.
1408 */
1409 static constexpr size_t npos = static_cast<size_t> (-1);
1410
1411 public:
1412 /**
1413 * @aliases size
1414 */
1415 nonvirtual size_t length () const noexcept;
1416
1417 public:
1418 /**
1419 * \note BREAKING change between Stroika 2.1 and v3 - const c_str/0 no longer guaranteed to return non-null
1420 *
1421 * Mitigating this, the non-const c_str() still will return non-null, and the const overload taking
1422 * StackBuffer<wchar_t> will also guarantee returning non-null.
1423 *
1424 * In the case of the overloads taking no arguments, the lifetime of the returned pointer is until the
1425 * next change to this string. In the case of the StackBuffer overload, the guarantee extends for the lifetime
1426 * of the argument buffer (typically just the next few lines of code).
1427 *
1428 * This will always return a value which is NUL-terminated.
1429 *
1430 * Note also - the c_str () function CAN now be somewhat EXPENSIVE, causing a mutation of the String object, so use
1431 * one of the const overloads where possible (or where performance matters).
1432 *
1433 * \note Why does c_str (StackBuffer) return a tuple?
1434 * Sometimes you just want a plain const wchar_t* you can use with an old C pointer based API. But that
1435 * fails/asserts out if you happen to have an empty string and try to get the pointer. Sometimes - you just need
1436 * the pointer!
1437 *
1438 * And why the string-view part? Because sometimes you want the LENGTH. Sure - you can just compute it again. But
1439 * that is costly. Sure you can just use the original string length. BUT THAT WOULD BE A BUG once I support
1440 * surrogates properly (at least on windows where wchar_t isn't char32_t).
1441 */
1442 nonvirtual tuple<const wchar_t*, wstring_view> c_str (Memory::StackBuffer<wchar_t>* possibleBackingStore) const;
1443
1444 public:
1445 /**
1446 * Follow the basic_string<>::find () API
1447 *
1448 * need more overloads.
1449 *
1450 * Returns String::npos if not found, else the zero based index.
1451 */
1452 nonvirtual size_t find (Character c, size_t startAt = 0) const;
1453 nonvirtual size_t find (const String& s, size_t startAt = 0) const;
1454
1455 public:
1456 /**
1457 * Follow the basic_string<>::rfind () API
1458 *
1459 * need more overloads.
1460 *
1461 * Returns String::npos if not found, else the zero based index.
1462 */
1463 nonvirtual size_t rfind (Character c) const;
1464
1465 public:
1466 /**
1467 * mimic https://en.cppreference.com/w/cpp/string/basic_string/front
1468 *
1469 * \pre not empty ()
1470 */
1471 nonvirtual Character front () const;
1472
1473 public:
1474 /**
1475 * mimic https://en.cppreference.com/w/cpp/string/basic_string/back
1476 *
1477 * \pre not empty ()
1478 */
1479 nonvirtual Character back () const;
1480
1481 public:
1482 /**
1483 * Compatable with STL::basic_string::subtr() - which interprets second argument as count. Not the same
1484 * as Stroika::String::SubString (where the second argument is a 'to')
1485 *
1486 * @see SubString
1487 *
1488 * From http://en.cppreference.com/w/cpp/string/basic_string/substr
1489 * Returns a substring [pos, pos+count). If the requested substring extends
1490 * past the end of the string, or if count == npos, the returned substring is [pos, size()).
1491 * std::out_of_range if pos > size()
1492 */
1493 nonvirtual String substr (size_t from, size_t count = npos) const;
1494
1495 public:
1496 ///////////////// DEPRECATED FUNCTIONS /////////////////
1497 [[deprecated ("Since Stroika v3.0d13 - if you must use c_str() - use the overload taking StackBuffer arg), or use As<wstring> "
1498 "().c_str ()")]] const wchar_t*
1499 c_str ();
1500 [[deprecated ("Since Stroika v3.0d12 use StringBuilder::SetAt")]] void SetCharAt (Character c, size_t i);
1501 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void erase (size_t from = 0);
1502 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void erase (size_t from, size_t count);
1503 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void push_back (wchar_t c);
1504 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void push_back (Character c);
1505 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void Append (Character c);
1506 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void Append (const String& s);
1507 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void Append (const wchar_t* s);
1508 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void Append (const wchar_t* from, const wchar_t* to);
1509 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void Append (const Character* from, const Character* to);
1510 template <typename CHAR_T>
1511 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void Append (span<const CHAR_T> s)
1512 requires (same_as<CHAR_T, Character> or same_as<CHAR_T, char32_t>);
1513 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] String& operator+= (Character appendage);
1514 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] String& operator+= (const String& appendage);
1515 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] String& operator+= (const wchar_t* appendageCStr);
1516 [[deprecated ("Since Stroika v3.0d12 - just use a b String{}")]] void clear ()
1517 {
1518 *this = String{};
1519 }
1520 template <typename T>
1521 [[deprecated ("Since Stroika v3.0d2, just use 0 arg version)")]] void As (T* into) const
1522 requires (IBasicUNICODEStdString<T> or same_as<T, String>)
1523 {
1524 *into = this->As<T> ();
1525 }
1526 [[deprecated ("Since Stroika v3.0d2, just use 1 arg version)")]] void AsNarrowString (const locale& l, string* into) const
1527 {
1528 *into = this->AsNarrowString (l);
1529 }
1530 template <typename T = u8string>
1531 [[deprecated ("Since Stroika v3.0d2 - use AsUTF8/0")]] void AsUTF8 (T* into) const
1532 requires (same_as<T, string> or same_as<T, u8string>)
1533 {
1534 *into = this->AsUTF8 ();
1535 }
1536 template <typename T = u16string>
1537 [[deprecated ("Since Stroika v3.0d2 - use AsUTF16/0")]] void AsUTF16 (T* into) const
1538 requires (same_as<T, u16string> or (sizeof (wchar_t) == sizeof (char16_t) and same_as<T, wstring>))
1539 {
1540 *into = AsUTF16 ();
1541 }
1542 template <typename T = u32string>
1543 [[deprecated ("Since Stroika v3.0d2 - use AsUTF32/0")]] void AsUTF32 (T* into) const
1544 requires (same_as<T, u32string> or (sizeof (wchar_t) == sizeof (char32_t) and same_as<T, wstring>))
1545 {
1546 *into = AsUTF32 ();
1547 }
1548 [[deprecated ("Since Stroika v3.0d2 - just use /0")]] void AsSDKString (SDKString* into) const
1549 {
1550 *into = AsSDKString ();
1551 }
1552 [[deprecated ("Since Stroika v3.0d2 - just use /0")]] void AsNarrowSDKString (string* into) const
1553 {
1554 *into = SDK2Narrow (AsSDKString ());
1555 }
1556 template <typename T = string>
1557 [[deprecated ("Since v3.0d2 use /0")]] void AsASCII (T* into) const
1558 requires (same_as<T, string> or same_as<T, Memory::StackBuffer<char>>)
1559 {
1560 if (not AsASCIIQuietly (into)) {
1561 ThrowInvalidAsciiException_ ();
1562 }
1563 }
1564 template <typename T = string>
1565 [[deprecated ("Since v3.0d2 use /0 overload")]] bool AsASCIIQuietly (T* into) const
1566 requires (same_as<T, string> or same_as<T, Memory::StackBuffer<char>>)
1567 {
1568 auto r = this->AsASCIIQuietly ();
1569 if (r) {
1570 *into = *r;
1571 return true;
1572 }
1573 else {
1574 return false;
1575 }
1576 }
1577 [[deprecated ("Since Stroika v3.0d5 use StringShorteningPreference argument")]] String LimitLength (size_t maxLen, bool keepLeft) const
1578 {
1579 return LimitLength (maxLen, keepLeft ? StringShorteningPreference::ePreferKeepLeft : StringShorteningPreference::ePreferKeepRight);
1580 }
1581 [[deprecated ("Since Stroika v3.0d5 use StringShorteningPreference argument")]] String LimitLength (size_t maxLen, bool keepLeft,
1582 const String& ellipsis) const
1583 {
1584 return LimitLength (maxLen, keepLeft ? StringShorteningPreference::ePreferKeepLeft : StringShorteningPreference::ePreferKeepRight, ellipsis);
1585 }
1586 template <typename CHAR_T>
1587 [[deprecated ("Since Stroika v3.0d1, String{}")]] static String FromASCII (span<const CHAR_T> s)
1588 {
1589 return String{s};
1590 }
1591 template <typename CHAR_T>
1592 [[deprecated ("Since Stroika v3.0d1, String{}")]] static String FromASCII (const CHAR_T* cString)
1593 {
1594 return String{cString};
1595 }
1596 template <IStdBasicStringCompatibleCharacter CHAR_T>
1597 [[deprecated ("Since Stroika v3.0d1, String{}")]] static String FromASCII (const basic_string<CHAR_T>& str)
1598 {
1599 return String{str};
1600 }
1601 [[deprecated ("Since Stroika v3.0d1, use span{} overload for this")]] static String FromASCII (const char* from, const char* to)
1602 {
1603 return String{span{from, to}};
1604 }
1605 [[deprecated ("Since Stroika v3.0d1, use span{} overload for this")]] static String FromASCII (const wchar_t* from, const wchar_t* to)
1606 {
1607 return String{span{from, to}};
1608 }
1609 [[deprecated ("Since Stroika v3.0d1, use span overloads")]] String InsertAt (const wchar_t* from, const wchar_t* to, size_t at) const
1610 {
1611 Memory::StackBuffer<Character> buf{Memory::eUninitialized, UTFConvert::ComputeTargetBufferSize<Character> (span{from, to})};
1612 return InsertAt (UTFConvert::kThe.ConvertSpan (span{from, to}, span{buf}), at);
1613 }
1614 [[deprecated ("Since Stroika v3.0d1, use span overloads")]] String InsertAt (const Character* from, const Character* to, size_t at) const
1615 {
1616 return InsertAt (span{from, to}, at);
1617 }
1618 [[deprecated ("Since Stroika v3.0d1, use span{} overload for this")]] static String FromLatin1 (const char* start, const char* end)
1619 {
1620 return FromLatin1 (span{start, end});
1621 }
1622 [[deprecated ("Since Stroika v3.0d1, use span{} constructor for this")]] static String FromNarrowString (const char* from,
1623 const char* to, const locale& l)
1624 {
1625 return FromNarrowString (span{from, to}, l);
1626 }
1627 [[deprecated ("Since Stroika v3.0d1, use span{} constructor for this")]] static String FromNarrowSDKString (const char* from, const char* to)
1628 {
1629 return FromNarrowSDKString (span{from, to});
1630 }
1631 template <IUNICODECanAlwaysConvertTo CHAR_T>
1632 [[deprecated ("Since Stroika v3.0d1, use span{} constructor for this")]] String (const CHAR_T* from, const CHAR_T* to)
1633 : String{span<const CHAR_T>{from, to}}
1634 {
1635 }
1636 [[deprecated (
1637 "Since Stroika v3.0d1 - use As<wstring> ().c_str () or other c_str() overload (*UNSAFE TO USE*)")]] nonvirtual const wchar_t*
1638 c_str () const noexcept;
1639 [[deprecated ("Since Stroika v3.0 - use span{} overloads")]] inline static String FromSDKString (const SDKChar* from, const SDKChar* to)
1640 {
1641 return FromSDKString (span{from, to});
1642 }
1643 [[deprecated ("Since Stroika v3.0 - use span{} overloads")]] static String FromUTF8 (const char* from, const char* to)
1644 {
1645 return FromUTF8 (span{from, to});
1646 }
1647 [[deprecated ("Since Stroika v3.0 - use span{} overloads")]] static String FromUTF8 (const char8_t* from, const char8_t* to)
1648 {
1649 return FromUTF8 (span{from, to});
1650 }
1651 template <typename T = string>
1652 [[deprecated ("Since Stroika v3.0d1 - use Character::AsAsciiQuietly")]] static bool AsASCIIQuietly (const wchar_t* fromStart,
1653 const wchar_t* fromEnd, T* into)
1654 {
1655 return Character::AsASCIIQuietly (span<const wchar_t>{fromStart, fromEnd}, into);
1656 }
1657 [[deprecated (
1658 "Since Stroika v3.0d1 due to http://stroika-bugs.sophists.com/browse/STK-965 - NOT IMPLEMENTED")]] nonvirtual const wchar_t*
1659 data () const;
1660 [[deprecated ("Since Stroika v3.0d8 - use RemoveFirstIf")]] String Remove (Character c) const
1661 {
1662 return RemoveFirstIf (c);
1663 }
1664 [[deprecated ("Since Stroika v3.0d8 - use RemoveFirstIf")]] String Remove (const String& subString) const
1665 {
1666 return RemoveFirstIf (subString);
1667 }
1668
1669 private:
1670 static shared_ptr<_IRep> mkEmpty_ ();
1671
1672 private:
1673 /**
1674 * If the argument CHAR_T is restrictive (such as ASCII/char) - this CHECKS and THROWS (Character::CheckASCII).
1675 * This function also reads the data, and sees if it can downshift 'CHAR_T' to something more restrictive, and produces
1676 * a possibly smaller rep.
1677 *
1678 * For some overloads (e..g && move) - the data is 'stolen/moved'.
1679 *
1680 * See mk_nocheck_ for a simpler - DO WHAT I SAID - operation.
1681 */
1682 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
1683 static shared_ptr<_IRep> mk_ (span<const CHAR_T> s);
1684 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
1685 static shared_ptr<_IRep> mk_ (Iterable<CHAR_T> it);
1686 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
1687 static shared_ptr<_IRep> mk_ (span<CHAR_T> s);
1688 template <IStdBasicStringCompatibleCharacter CHAR_T>
1689 static shared_ptr<_IRep> mk_ (basic_string<CHAR_T>&& s);
1690
1691 private:
1692 /*
1693 * Note the mk_nocheck_ - just does the mk of the buffer, but assuming the arguments are legit and will fit (though it may
1694 * assert in DEBUG builds this is true).
1695 *
1696 * This just blindly allocates the buffer of the given size/type for the given arguments.
1697 */
1698 template <typename CHAR_T>
1699 static shared_ptr<_IRep> mk_nocheck_ (span<const CHAR_T> s)
1700 requires (same_as<CHAR_T, ASCII> or same_as<CHAR_T, Latin1> or same_as<CHAR_T, char16_t> or same_as<CHAR_T, char32_t>);
1701
1702 private:
1703 template <unsigned_integral T>
1704 nonvirtual size_t SubString_adjust_ (T fromOrTo, size_t myLength) const;
1705 template <signed_integral T>
1706 nonvirtual size_t SubString_adjust_ (T fromOrTo, size_t myLength) const;
1707
1708 private:
1709 nonvirtual String SubString_ (const _SafeReadRepAccessor& thisAccessor, size_t from, size_t to) const;
1710
1711 protected:
1712 nonvirtual void _AssertRepValidType () const;
1713
1714 private:
1715 [[noreturn]] static void ThrowInvalidAsciiException_ (); // avoid include
1716 };
1717 static_assert (totally_ordered<String>);
1718
1719#if qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin
1720 template <>
1721 std::filesystem::path String::As<std::filesystem::path> () const;
1722#endif
1723
1724 /**
1725 * operator<< ostream adapters work as you would expect and allow writing Stroika strings easily to ostreams such as cout.
1726 *
1727 * \note uses AsNarrowSDKString (eIgnoreErrors)
1728 *
1729 * The only catch - is that Stroika strings are UNICODE based, and so may not fit perfectly with 'char' based basic_ostream<>.
1730 * To address this, Stroika strings are mapped to 'narrow sdk strings' - ignoring any errors. As this is generally not a very
1731 * good practice to do (lossy) - and generally just done for debugging/diagnostic output, this was deemed acceptable (as of Stroika v3.0d6).
1732 */
1733 wostream& operator<< (wostream& out, const String& s);
1734 ostream& operator<< (ostream& out, const String& s);
1735
1736#if qStroika_HasComponent_googletest
1737 // For googletest compatibility
1738 void PrintTo (const String& s, std::ostream* os);
1739#endif
1740
1741 /**
1742 * Protected helper Rep class.
1743 *
1744 * \note Important design note - String reps are IMMUTABLE. Changes to string like +=, create new string reps (so costly).
1745 * Use StringBuilder for that purpose in performance sensitive code.
1746 */
1747 class String::_IRep : public Iterable<Character>::_IRep {
1748 public:
1749 /**
1750 * Return the ith character in the string.
1751 */
1752 virtual Character GetAt (size_t index) const noexcept = 0;
1753
1754 public:
1755 /**
1756 * Each rep will support a span of at least one code-point type (ascii, utf8, utf16, or utf32)
1757 *
1758 * This API is guaranteed to support a span of at least one of these types (maybe more). The caller may
1759 * specify the code-point type preferred.
1760 */
1761 virtual PeekSpanData PeekData ([[maybe_unused]] optional<PeekSpanData::StorageCodePointType> preferred) const noexcept = 0;
1762
1763 public:
1764 /*
1765 * Return a pointer to mostly standard (wide, nul-terminated) C string,
1766 * whose lifetime extends to the next non-const call on this rep, or nullptr.
1767 *
1768 * It is only 'mostly' standard because it is allowed to have nul-chars embedded in it. But it will
1769 * always have str[len] == 0;
1770 *
1771 * \note Since Stroika v3.0d1, this can return nullptr (in which case the String library will allocate a new backend)
1772 *
1773 * \post returnResult == nullptr or returnResult[len] == '\0';
1774 */
1775 virtual const wchar_t* c_str_peek () const noexcept = 0;
1776
1777 private:
1778 friend class String;
1779 };
1780
1781 // Some some docs/testing...
1782 static_assert (not IConvertibleToString<int>);
1783 static_assert (not IConvertibleToString<char>); // would have been sensible to allow, but easily generates confusing results: cuz that means String x = 3 would work; confusing with ovarloads)
1784 static_assert (IConvertibleToString<string>);
1785 static_assert (IConvertibleToString<wstring>);
1786 static_assert (IConvertibleToString<u8string>);
1787 static_assert (IConvertibleToString<u16string>);
1788 static_assert (IConvertibleToString<u32string>);
1789 static_assert (not IConvertibleToString<optional<String>>);
1790
1791 namespace Private_ {
1792 // This is just anything that can be treated as a 'span<const Character>'
1793 // clang-format off
1794 template <typename T>
1795 concept ICanBeTreatedAsSpanOfCharacter_ =
1796 derived_from<remove_cvref_t<T>, String>
1797 or same_as<remove_cvref_t<T>, u8string>
1798 or same_as<remove_cvref_t<T>, u8string_view>
1799 or same_as<remove_cvref_t<T>, u16string>
1800 or same_as<remove_cvref_t<T>, u16string_view>
1801 or same_as<remove_cvref_t<T>, u32string>
1802 or same_as<remove_cvref_t<T>, u32string_view>
1803 or same_as<remove_cvref_t<T>, wstring>
1804 or same_as<remove_cvref_t<T>, wstring_view>
1805 or same_as<remove_cvref_t<T>, const Character*>
1806 or same_as<remove_cvref_t<T>, const char8_t*>
1807 or same_as<remove_cvref_t<T>, const char16_t*>
1808 or same_as<remove_cvref_t<T>, const char32_t*>
1809 or same_as<remove_cvref_t<T>, const wchar_t*>
1810 ;
1811 // clang-format on
1812
1813 template <ICanBeTreatedAsSpanOfCharacter_ USTRING, size_t STACK_BUFFER_SZ>
1814 span<const Character> AsSpanOfCharacters_ (USTRING&& s, Memory::StackBuffer<Character, STACK_BUFFER_SZ>* mostlyIgnoredBuf);
1815 }
1816
1817 /**
1818 *
1819 * \par Example Usage
1820 * \code
1821 * constexpr String::EqualsComparer kStringCIComparer_ {Characters::CompareOptions::eCaseInsensitive};
1822 * if (kStringCIComparer_ (filename.extension (), ".HFCC"sv)) {
1823 * compiledName = filename;
1824 * }
1825 * \endcode
1826 *
1827 * \note There is no String::Equals() method, because it would look queer if it took one string argument, and if it was static
1828 * it would essentially look like the above comparer, so little point.
1829 */
1830 struct String::EqualsComparer : Common::ComparisonRelationDeclarationBase<Common::ComparisonRelationType::eEquals> {
1831 /**
1832 * optional CompareOptions to CTOR allows for case insensitive compares
1833 */
1834 constexpr EqualsComparer (CompareOptions co = eWithCase);
1835
1836 /**
1837 * Extra overloads a slight performance improvement
1838 */
1839 template <IConvertibleToString LT, IConvertibleToString RT>
1840 nonvirtual bool operator() (LT&& lhs, RT&& rhs) const;
1841
1842 CompareOptions fCompareOptions;
1843
1844 private:
1845 template <Private_::ICanBeTreatedAsSpanOfCharacter_ LT, Private_::ICanBeTreatedAsSpanOfCharacter_ RT>
1846 bool Cmp_ (LT&& lhs, RT&& rhs) const;
1847 template <Private_::ICanBeTreatedAsSpanOfCharacter_ LT, Private_::ICanBeTreatedAsSpanOfCharacter_ RT>
1848 bool Cmp_Generic_ (LT&& lhs, RT&& rhs) const;
1849 };
1850
1851 /**
1852 */
1853 struct String::ThreeWayComparer : Common::ComparisonRelationDeclarationBase<Common::ComparisonRelationType::eThreeWayCompare> {
1854 /**
1855 * optional CompareOptions to CTOR allows for case insensitive compares
1856 */
1857 constexpr ThreeWayComparer (CompareOptions co = eWithCase);
1858
1859 /**
1860 * Extra overloads a slight performance improvement
1861 */
1862 template <IConvertibleToString LT, IConvertibleToString RT>
1863 nonvirtual strong_ordering operator() (LT&& lhs, RT&& rhs) const;
1864
1865 CompareOptions fCompareOptions;
1866
1867 private:
1868 template <Private_::ICanBeTreatedAsSpanOfCharacter_ LT, Private_::ICanBeTreatedAsSpanOfCharacter_ RT>
1869 strong_ordering Cmp_ (LT&& lhs, RT&& rhs) const;
1870 template <Private_::ICanBeTreatedAsSpanOfCharacter_ LT, Private_::ICanBeTreatedAsSpanOfCharacter_ RT>
1871 strong_ordering Cmp_Generic_ (LT&& lhs, RT&& rhs) const;
1872 };
1873
1874 /**
1875 * \brief very similar to ThreeWayComparer but returns true if less
1876 */
1877 struct String::LessComparer : Common::ComparisonRelationDeclarationBase<Common::ComparisonRelationType::eStrictInOrder> {
1878 constexpr LessComparer (CompareOptions co = eWithCase);
1879
1880 template <typename T1, typename T2>
1881 nonvirtual bool operator() (T1 lhs, T2 rhs) const;
1882
1883 private:
1884 ThreeWayComparer fComparer_;
1885 };
1887
1888 inline namespace Literals {
1889 /**
1890 * \brief shorthand for String::FromStringConstant { ARGUMENT }
1891 *
1892 * \par Example:
1893 * \code
1894 * String s1 = "some-string"_k;
1895 * String s2 = String::FromStringConstant ("some-string");
1896 * String s3 = "some-string"sv; // in most cases this will also work fine, and is preferable (since sv is part of C++ standard)
1897 * \endcode
1898 *
1899 * \note _k is STILL sometimes useful and better than sv, since the TYPE returned by _k is a String_Constant which IS a String
1900 * so it will work in some overload contexts where sv would fail.
1901 *
1902 * \note operator""_k with char*, requires that the argument string MUST BE ASCII (someday maybe lifted to allow Latin1)
1903 */
1904 String operator""_k (const ASCII* s, size_t len);
1905 String operator""_k (const wchar_t* s, size_t len);
1906 String operator""_k (const char8_t* s, size_t len);
1907 String operator""_k (const char16_t* s, size_t len);
1908 String operator""_k (const char32_t* s, size_t len);
1909 }
1910
1911 /**
1912 * Basic operator overload with the obvious meaning, and simply indirect to @String::Concatenate (const String& rhs)
1913 *
1914 * \note Design Note
1915 * Don't use member function so "x" + String{u"x"} works.
1916 * Insist that EITHER LHS or RHS is a string (else operator applies too widely).
1917 *
1918 * Both arguments must be convertible to a String, and at least must be String or derived from String
1919 */
1920 template <IConvertibleToString LHS_T, IConvertibleToString RHS_T>
1921 String operator+ (LHS_T&& lhs, RHS_T&& rhs)
1922 requires (derived_from<remove_cvref_t<LHS_T>, String> or derived_from<remove_cvref_t<RHS_T>, String>);
1923
1924 /**
1925 * \brief StringCombiner is a simple function object used to combine two strings visually - used in Iterable<>::Join ()
1926 *
1927 * This can combine strings in the obvious way (concatenation) - but defaults to separating them with a comma (', ').
1928 *
1929 * \note the functional api - is to be given two strings, and a flag saying if the combination is the last one in the list,
1930 * since in English, this is frequently rendered somewhat differently than the rest.
1931 */
1932 template <typename STRING = String>
1934 STRING fSeparator{", "sv};
1935 optional<STRING> fSpecialSeparatorForLastPair;
1936 STRING operator() (const STRING& lhs, const STRING& rhs, bool isLast) const;
1937 };
1938
1939 /**
1940 * kDefaultStringCombiner is just StringCombiner{}, rendered as a function object, so that it can be externed/imported
1941 * in the Iterable code without imposing a dependency on the String code.
1942 */
1943 extern const function<String (String, String, bool)> kDefaultStringCombiner;
1944
1945}
1946
1947namespace Stroika::Foundation::Traversal {
1948 // specialized as performance optimization
1949 template <>
1950 Characters::String Iterable<Characters::String>::Join (const Characters::String& separator, const optional<Characters::String>& finalSeparator) const;
1951}
1952
1953namespace std {
1954 template <>
1955 struct hash<Stroika::Foundation::Characters::String> {
1956 size_t operator() (const Stroika::Foundation::Characters::String& arg) const;
1957 };
1958}
1959
1960namespace Stroika::Foundation::Memory {
1961 class BLOB; // Forward declare to avoid mutual include issues
1962}
1963
1965 template <typename T>
1966 struct DefaultSerializer; // Forward declare to avoid mutual include issues
1967 template <>
1969 Memory::BLOB operator() (const Stroika::Foundation::Characters::String& arg) const;
1970 };
1971}
1972
1973/**
1974 * Allow std::format to work with String class
1975 *
1976 * \note SUPER PRIMITIVE ROUGH FIRST DRAFT
1977 */
1978template <>
1979struct qStroika_Foundation_Characters_FMT_PREFIX_::formatter<Stroika::Foundation::Characters::String, wchar_t> {
1980 qStroika_Foundation_Characters_FMT_PREFIX_::formatter<std::wstring, wchar_t> fDelegate2_;
1981
1982 template <typename ParseContext>
1983 constexpr typename ParseContext::iterator parse (ParseContext& ctx)
1984 {
1985 return fDelegate2_.parse (ctx);
1986 }
1987
1988 template <typename FmtContext>
1989 typename FmtContext::iterator format (Stroika::Foundation::Characters::String s, FmtContext& ctx) const
1990 {
1991 return fDelegate2_.format (s.As<std::wstring> (), ctx);
1992 }
1993};
1994template <>
1995struct qStroika_Foundation_Characters_FMT_PREFIX_::formatter<Stroika::Foundation::Characters::String, char> {
1996 bool ignoreerrors{true}; // maybe set from thread-local variable, or parse() settings, or both
1997
1998 template <typename ParseContext>
1999 constexpr typename ParseContext::iterator parse (ParseContext& ctx)
2000 {
2001 auto it = ctx.begin ();
2002 while (it != ctx.end ()) {
2003 ++it;
2004#if 0
2005 if (it == ctx.end()) {
2006 throw Common::StdCompat::format_error{"Invalid format args (missing }) for formatter<String,char>."};
2007 }
2008#endif
2009 if (*it == '}') {
2010 return it;
2011 }
2012 }
2013 return it;
2014 }
2015
2016 template <typename FmtContext>
2017 typename FmtContext::iterator format (Stroika::Foundation::Characters::String s, FmtContext& ctx) const
2018 {
2019 using namespace Stroika::Foundation::Characters;
2020 // wformat_context delegateCTX;
2021 String dr{s}; // really want to delegate to wchar_t version (with vformat) but no documented easy way to extract format_args from ctx (though its in there)
2022 if (ignoreerrors) {
2023#if __cpp_lib_ranges >= 202207L
2024 return std::ranges::copy (dr.AsNarrowSDKString (eIgnoreErrors), ctx.out ()).out;
2025#else
2026 return format_to (ctx.out (), "{}", dr.AsNarrowSDKString (eIgnoreErrors));
2027#endif
2028 }
2029 else {
2030#if __cpp_lib_ranges >= 202207L
2031 return std::ranges::copy (dr.AsNarrowSDKString (), ctx.out ()).out;
2032#else
2033 return format_to (ctx.out (), "{}", dr.AsNarrowSDKString ());
2034#endif
2035 }
2036 }
2037};
2038
2039/*
2040 ********************************************************************************
2041 ***************************** Implementation Details ***************************
2042 ********************************************************************************
2043 */
2044#include "String.inl"
2045
2046#endif /*_Stroika_Foundation_Characters_String_h_*/
#define Stroika_Define_Enum_Bounds(FIRST_ITEM, LAST_ITEM)
constexpr bool IsWhitespace() const noexcept
RegularExpression is a compiled regular expression which can be used to match on a String class.
virtual Character GetAt(size_t index) const noexcept=0
virtual PeekSpanData PeekData(optional< PeekSpanData::StorageCodePointType > preferred) const noexcept=0
Similar to String, but intended to more efficiently construct a String. Mutable type (String is large...
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
nonvirtual bool Contains(Character c, CompareOptions co=eWithCase) const
Definition String.inl:697
nonvirtual size_t length() const noexcept
Definition String.inl:1051
nonvirtual String ToUpperCase() const
Definition String.cpp:1745
static String FromNarrowString(const char *from, const locale &l)
Definition String.inl:340
nonvirtual bool Matches(const RegularExpression &regEx) const
Definition String.cpp:1134
nonvirtual bool IsWhitespace() const
Definition String.cpp:1783
nonvirtual String NormalizeTextToNL() const
Definition String.cpp:1202
static String Join(const Iterable< String > &list, const String &separator=", "sv)
Definition String.cpp:1693
static String FromStringConstant(const CHAR_T(&cString)[SIZE])
Take the given argument data (constant span) - which must remain unchanged - constant - for the appli...
Definition String.inl:386
nonvirtual String ColValue(size_t i, const String &valueIfMissing={}) const
see Col(i) - but with default value of empty string
Definition String.inl:715
nonvirtual String NormalizeSpace(Character useSpaceCharacter=' ') const
Replace sequences of whitespace characters (space, tab, newline etc) with a single space (or argument...
Definition String.cpp:1230
nonvirtual Containers::Sequence< pair< size_t, size_t > > FindEach(const RegularExpression &regEx) const
Definition String.cpp:967
nonvirtual tuple< const wchar_t *, wstring_view > c_str(Memory::StackBuffer< wchar_t > *possibleBackingStore) const
Definition String.inl:1055
nonvirtual bool operator==(const String &rhs) const
Definition String.inl:1115
nonvirtual String Repeat(unsigned int count) const
Definition String.cpp:1425
static String FromSDKString(const SDKChar *from)
Definition String.inl:447
nonvirtual String LimitLength(size_t maxLen, StringShorteningPreference keepPref=StringShorteningPreference::ePreferKeepLeft) const
return the first maxLen (or fewer if string shorter) characters of this string (adding ellipsis if tr...
Definition String.inl:745
nonvirtual String RemoveAll(Character c) const
Definition String.cpp:824
nonvirtual Containers::Sequence< RegularExpressionMatch > FindEachMatch(const RegularExpression &regEx) const
Definition String.cpp:985
nonvirtual String RemoveFirstIf(Character c) const
Definition String.cpp:808
nonvirtual string AsNarrowSDKString() const
Definition String.inl:834
nonvirtual optional< String > Col(size_t i) const
Useful to replace 'awk print $3' - replace with Col(2) - zero based.
Definition String.cpp:1363
nonvirtual String InsertAt(Character c, size_t at) const
Definition String.inl:719
nonvirtual size_t rfind(Character c) const
Definition String.inl:1075
static String FromNarrowSDKString(const char *from)
Definition String.inl:470
nonvirtual string AsNarrowString(const locale &l) const
Definition String.cpp:1839
nonvirtual String Concatenate(T &&rhs) const
appends 'rhs' string to this string (without modifying this string) and returns the combined string
nonvirtual SDKString AsSDKString() const
Definition String.inl:806
nonvirtual size_t size() const noexcept
Definition String.inl:534
nonvirtual bool EndsWith(const Character &c, CompareOptions co=eWithCase) const
Definition String.cpp:1089
nonvirtual String ToLowerCase() const
Definition String.cpp:1707
nonvirtual String ReplaceAll(const RegularExpression &regEx, const String &with) const
Definition String.cpp:1156
static constexpr size_t npos
Definition String.h:1409
nonvirtual String Replace(size_t from, size_t to, const String &replacement) const
Definition String.cpp:1046
nonvirtual String SubString(SZ from) const
nonvirtual String Trim(bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
Definition String.cpp:1593
nonvirtual strong_ordering operator<=>(const String &rhs) const
Definition String.inl:1105
nonvirtual Character back() const
Definition String.inl:1079
nonvirtual bool StartsWith(const Character &c, CompareOptions co=eWithCase) const
Definition String.cpp:1060
nonvirtual String StripAll(bool(*removeCharIf)(Character)) const
Definition String.cpp:1665
nonvirtual String AssureEndsWith(const Character &c, CompareOptions co=eWithCase) const
Return *this if it ends with argument character, or append 'c' so that it ends with a 'c'.
Definition String.cpp:1124
nonvirtual span< CHAR_T > CopyTo(span< CHAR_T > s) const
nonvirtual PeekSpanData GetPeekSpanData() const
return the constant character data inside the string in the form of a case variant union of different...
nonvirtual String SafeSubString(SZ from) const
nonvirtual Containers::Sequence< String > AsLines() const
break the String into a series of lines;
Definition String.cpp:1307
nonvirtual String LTrim(bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
Definition String.cpp:1444
nonvirtual Character front() const
Definition String.inl:1086
nonvirtual Containers::Sequence< String > Grep(const String &fgrepArg) const
Breaks this string into Lines, with AsLines (), and applies the argument filter (as if with ....
Definition String.cpp:1342
nonvirtual Containers::Sequence< String > FindEachString(const RegularExpression &regEx) const
Definition String.cpp:1002
nonvirtual String Skip(size_t n) const
Return a substring of this string, starting at 'argument' n. If n > size(), return empty string.
Definition String.inl:604
nonvirtual optional< size_t > RFind(Character c) const noexcept
Definition String.cpp:1012
static span< const CHAR_TYPE > GetData(const PeekSpanData &pds, Memory::StackBuffer< CHAR_TYPE, STACK_BUFFER_SZ > *possiblyUsedBuffer)
return the constant character data inside the string (rep) in the form of a span, possibly quickly an...
Definition String.inl:967
nonvirtual Containers::Sequence< String > Tokenize() const
Definition String.cpp:1235
nonvirtual String RemoveAt(size_t charAt) const
Definition String.inl:608
nonvirtual optional< T > AsASCIIQuietly() const
static String FromLatin1(const CHAR_T *cString)
Definition String.inl:355
static optional< span< const CHAR_TYPE > > PeekData(const PeekSpanData &pds)
return the constant character data inside the string in the form of a span or nullopt if not availabl...
Definition String.inl:910
static String FromUTF8(span< CHAR_T > from)
Definition String.inl:420
nonvirtual String RTrim(bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
Definition String.cpp:1509
nonvirtual optional< size_t > Find(Character c, CompareOptions co=eWithCase) const
Definition String.inl:685
nonvirtual String substr(size_t from, size_t count=npos) const
Definition String.inl:1092
nonvirtual size_t find(Character c, size_t startAt=0) const
Definition String.inl:1067
Set<T> is a container of T, where once an item is added, additionally adds () do nothing.
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
Iterable<T> is a base class for containers which easily produce an Iterator<T> to traverse them.
Definition Iterable.h:237
Iterable(const Iterable &) noexcept=default
Iterable are safely copyable (by value). Since Iterable uses COW, this just copies the underlying poi...
returns true iff T == u8string, u16string, u32string, or wstring - which std::string types can be una...
Definition String.h:116
anything with a 'special .STRINGTYPE conversion' method to UNICODE string, such as filesystem::path
Definition String.h:124
IUNICODECanUnambiguouslyConvertFrom is any 'character representation type' where array of them unambi...
Definition Character.h:179
char ASCII
Stroika's string/character classes treat 'char' as being an ASCII character.
Definition Character.h:59
conditional_t< qTargetPlatformSDKUseswchar_t, wchar_t, char > SDKChar
Definition SDKChar.h:71
basic_string< SDKChar > SDKString
Definition SDKString.h:38
String operator+(LHS_T &&lhs, RHS_T &&rhs)
Definition String.inl:1294
const function< String(String, String, bool)> kDefaultStringCombiner
Definition String.inl:1319
wostream & operator<<(wostream &out, const String &s)
Definition String.cpp:2036
STL namespace.
very similar to ThreeWayComparer but returns true if less
Definition String.h:1877
Summary data for raw contents of rep - each rep will support at least one of these span forms.
Definition String.h:1280
StringCombiner is a simple function object used to combine two strings visually - used in Iterable<>:...
Definition String.h:1933
function object which serializes type T to a BLOB (or BLOB like) object