Stroika Library 3.0d20
 
Loading...
Searching...
No Matches
String.h
Go to the documentation of this file.
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Foundation_Characters_String_h_
5#define _Stroika_Foundation_Characters_String_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <compare>
10#include <iosfwd>
11#include <locale>
12#include <string>
13#include <string_view>
14
16#include "Stroika/Foundation/Characters/SDKString.h"
19#include "Stroika/Foundation/Containers/Sequence.h"
20#include "Stroika/Foundation/Containers/Set.h"
24
25/**
26 * \file
27 *
28 * \note Code-Status: <a href="Code-Status.md#Beta">Beta</a>
29 *
30 * TODO:
31 *
32 * @todo Cleanup SubString (), and String::SubString_ use of SharedByValue<TRAITS>::ReadOnlyReference for
33 * performance. At some level - in String::SubString_ - we have a (hidden) sharedPtr and it would
34 * be safe and performant in that case to re-use that shared_ptr to make a new String envelope.
35 *
36 * However, I'm not sure its safe in general to have SharedByValue<TRAITS>::ReadOnlyReference expose
37 * its shared_ptr, which appears needed to make this happen.
38 *
39 * Not a biggie opportunity, so we can delay this -- LGP 2014-04-10
40 *
41 * @todo Add PadLeft/PadRight or FillLeft/FilLRight() - not sure which name is better. But idea is to
42 * produce a string which is identical to the orig except that IF start len < n, then expand it with
43 * the given arg char repeated on the left or right.
44 *
45 * @todo RFind() API should be embellished to include startAt etc, like regular Find () - but not 100%
46 * sure - think through...
47 *
48 * @todo MAYBE also add ReplaceOne() function (we have ReplaceAll() now) ; see Replace() API in this function? - maybe overload?
49 *
50 * @todo Move DOCS in the top of this file down to the appropriate major classes - and then review the implementation and make sure
51 * it is all correct for each (especially SetStorage () stuff looks questionable)
52 *
53 */
54
56 template <typename T>
57 class Set;
58}
59
61
62 /*
63 * \brief On Windows, affects the behavior of String::As<filesystem::path> ()
64 *
65 * On windows, its helpful when mapping String to std::filesystem::pathname to map certain common name
66 * prefixes to things that will be found on Windows.
67 *
68 * MSYS creates paths like /c/folder for c:/folder
69 * CYGWIN creates paths like /cygdrive/c/folder for c:/folder
70 *
71 * Automatically map these (since Stroika v3.0d6) in (was ToPath) As<filesystem::path> ();
72 *
73 * \see https://www.msys2.org/docs/filesystem-paths/
74 * this API is for getting strings from the commandline, or user input, or configuration files etc, where Cygwin
75 * or msys style paths maybe present. APIs that talk directly to the OS are more likely to more directly produce
76 * filesystem::path than String. Anyhow - because of this, on windows, its probably more helpful than not to map
77 * the MSYS/cygdrive crap to a path more likely to actually work right. --LGP 2024-03-06
78 */
79#ifndef qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin
80#define qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin qStroika_Foundation_Common_Platform_Windows
81#endif
82
83}
84
85#if qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin
86namespace std::filesystem {
87 class path; // forward declare for template specialization
88}
89#endif
90
92
93 class RegularExpression;
94 class RegularExpressionMatch;
95
96 /**
97 * Flag principally for LimitLength, but used elsewhere as well (e.g. ToString ()).
98 */
100 ePreferKeepLeft,
101 ePreferKeepRight,
102 ePreferKeepMid,
103
104 eDEFAULT = ePreferKeepLeft,
105
106 Stroika_Define_Enum_Bounds (ePreferKeepLeft, ePreferKeepMid)
107 };
108 using StringShorteningPreference::ePreferKeepLeft;
109 using StringShorteningPreference::ePreferKeepMid;
110 using StringShorteningPreference::ePreferKeepRight;
111
112 /**
113 * \brief returns true iff T == u8string, u16string, u32string, or wstring - which std::string types can be unambiguously converted to UNICODE
114 */
115 template <typename T>
116 concept IBasicUNICODEStdString = same_as<T, u8string> or same_as<T, u16string> or same_as<T, u32string> or same_as<T, wstring>;
117
118 /**
119 * \brief anything with a 'special .STRINGTYPE conversion' method to UNICODE string, such as filesystem::path
120 *
121 * Really, this is a thinly veiled attempt to avoid #include <filesystem> for modularity reasons.
122 */
123 template <typename T>
124 concept IStdPathLike2UNICODEString = requires (T t) {
125 { t.wstring () } -> same_as<wstring>;
126 } or requires (T t) {
127 { t.u8string () } -> same_as<u8string>;
128 } or requires (T t) {
129 { t.u16string () } -> same_as<u16string>;
130 } or requires (T t) {
131 { t.u32string () } -> same_as<u32string>;
132 };
133
134 class String;
135
136 /**
137 * The concept IConvertibleToString is satisfied iff the argument type can be used to construct a (Stroika) String.
138 * Note subtly, const char* is treated (as of Stroika v3) as convertible to String, but the characters must be ASCII, or
139 * an exception will be generated in the constructor.
140 */
141 template <typename T>
142 concept IConvertibleToString = convertible_to<T, String>;
143
144 /**
145 * \brief String is like std::u32string, except it is much easier to use, often much more space efficient, and more easily interoperates with other string types
146 *
147 * The Stroika String class is conceptually a sequence of (UNICODE) Characters, and so there is
148 * no obvious way to map the Stroika String to a std::string (in general). However, if you specify a codepage
149 * for conversion, or are converting to/from SDKString/SDKChar, or u8string, etc, there is builtin support for that.
150 *
151 * EOS Handling:
152 * The Stroika String class supports having embedded NUL-characters. It also supports
153 * easy construction from NUL-terminated character strings.
154 *
155 * Since Stroika v3, there is no longer c_str () support, since Stroika doesn't internally
156 * require NUL-terminated strings, and actively encourages different compact representations of
157 * strings (c_str() requires a choice of a particular encoding to make sense).
158 *
159 * About spans, and the \0 NUL-termination - generally do NOT include
160 * the NUL-character in your span! Stroika strings will allow this, and treat
161 * it as just another character, but its probably not what you meant.
162 *
163 * \note Narrow String handling
164 * Because the character set of strings of type 'char' is ambiguous, if you construct a String
165 * with char (char* etc) - it is somehow 'required' that the characters be ASCII. If using the FromConstantString () API
166 * , or operator""_k, it is checked with Require () - so assertion failure. If you construct
167 * with String::CTOR, it will generate a runtime exception (so more costly runtime checking).
168 *
169 * \note Satisfies Concepts:
170 * o static_assert (regular<String>);
171 *
172 * \note \em Thread-Safety <a href="Thread-Safety.md#C++-Standard-Thread-Safety">C++-Standard-Thread-Safety</a>
173 *
174 * \note Design note - mutability vs. immutability
175 * http://stroika-bugs.sophists.com/browse/STK-968 (see about deleting deprecated APIs and remnants of mutability) and c_str()
176 *
177 * String objects are IMMUTABLE (except for the OBVIOUS meaning case of operator= being allowed).
178 *
179 * String reps are IMMUTABLE.
180 *
181 * Use StringBuilder for a 'mutable' String (can be used mostly interchangeably with String).
182 *
183 * Current Mutating methods (as of v3.0d1x)
184 * o c_str () -- non-const deprecated in v3.0d13
185 * o SetCharAt - deprecated v3.0d12
186 * o c_str() (consider deprecating?)
187 * o operator= - deprecated v3.0d12
188 * o clear()- deprecated v3.0d12
189 * o Append - deprecated v3.0d12
190 * o operator+= - deprecated v3.0d12
191 * o erase() - deprecated v3.0d12
192 *
193 * SOMEWHAT ironically, the only of these methods hard to replace is the non-const c_str () - and maybe there
194 * not bad cuz I deprecated? COULD just deprecate ALL of these, and then the class is fully immutable. Probably
195 * easier to understand/reason about.
196 *
197 * \note <a href="Design-Overview.md#Comparisons">Comparisons</a>:
198 * o static_assert (totally_ordered<String>);
199 * o String::EqualsComparer, String::ThreeWayComparer and String::LessComparer provided with construction parameters to allow case insensitive compares
200 */
201 class String : public Traversal::Iterable<Character> {
202 private:
203 using inherited = Iterable<Character>;
204
205 protected:
206 class _IRep;
207
208 public:
209 /**
210 * All the constructors are obvious, except
211 * o NUL-character ARE allowed in strings, except for the case of single
212 * charX* argument constructors - which find the length based on
213 * the terminating NUL-character.
214 *
215 * o CTOR (PATHLIKE_TOSTRINGABLE&& s) - IStdPathLike2UNICODEString PATHLIKE_TOSTRINGABLE
216 * carefully excludes conflicting CTOR overloads, and purpose is to allow constructing a String
217 * from anything with a 'special conversion' method to UNICODE string, such as filesystem::path.
218 *
219 * \note about lifetime of argument data (basic_string_view<CHAR_T> constructors)
220 * All data is copied out / saved by the end of the constructor for all constructors EXCEPT
221 * the basic_string_view<CHAR_T> constructors - where it is REQUIRED the data last 'forever'.
222 *
223 * \pre for String (const basic_string_view<wchar_t>& str) - str[str.length()]=='\0';
224 * c-string nul-terminated (which happens automatically with L"xxx"sv)
225 *
226 * \note 'char' (using ASCII = char) constructors:
227 * Because the character-set of strings of type 'char' is ambiguous, if you construct a String
228 * with char (char* etc) - it runtime checked that the characters are ASCII (except for the basic_string_view
229 * constructors where we check but with assertions).
230 *
231 * This mimics the behavior in Stroika v2.1 with String::FromASCII ()
232 *
233 * \note the basic_string move Constructors MAY move or copy the underlying std string, but they still maintain
234 * the same requirements on their arguments as the copy basic_string constructors (eg. char must be ascii)
235 *
236 * \see also - FromUTF8, FromSDKString, FromNarrowSDKString, FromStringConstant, FromLatin1, which are all like constructors
237 * but with special names to avoid confusion and make clear their arguments, and not participate in overloading. Note, chose
238 * this path instead of FLAG argument and explicit on CTOR, cuz more terse.
239 */
240 String ();
241 explicit String (Character c);
242 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
243 String (const CHAR_T* cString);
244 template <Memory::ISpan SPAN_OF_CHAR_T>
245 String (SPAN_OF_CHAR_T s)
247 template <IStdBasicStringCompatibleCharacter CHAR_T>
248 String (const basic_string<CHAR_T>& s);
249 template <IStdBasicStringCompatibleCharacter CHAR_T>
250 String (const basic_string_view<CHAR_T>& s);
251 template <IStdBasicStringCompatibleCharacter CHAR_T>
252 explicit String (basic_string<CHAR_T>&& s);
253 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
254 String (const Iterable<CHAR_T>& src)
255 requires (not Memory::ISpan<CHAR_T>);
256 template <IStdPathLike2UNICODEString PATHLIKE_TOSTRINGABLE>
257 explicit String (PATHLIKE_TOSTRINGABLE&& s);
258 String (String&& from) noexcept = default;
259 String (const String& from) noexcept = default;
260
261 private:
262 template <IStdPathLike2UNICODEString PATHLIKE_TOSTRINGABLE>
263 static String mkSTR_ (PATHLIKE_TOSTRINGABLE&& s);
264
265 private:
266 static shared_ptr<_IRep> CTORFromBasicStringView_ (const basic_string_view<ASCII>& str);
267 static shared_ptr<_IRep> CTORFromBasicStringView_ (const basic_string_view<char8_t>& str);
268 static shared_ptr<_IRep> CTORFromBasicStringView_ (const basic_string_view<char16_t>& str);
269 static shared_ptr<_IRep> CTORFromBasicStringView_ (const basic_string_view<char32_t>& str);
270 static shared_ptr<_IRep> CTORFromBasicStringView_ (const basic_string_view<wchar_t>& str);
271
272 public:
273 ~String () = default;
274
275 protected:
276 /**
277 */
278 using _SafeReadRepAccessor = Iterable<Character>::_SafeReadRepAccessor<_IRep>;
279
280 protected:
281 /**
282 * \pre rep MUST be not-null
283 * However, with move constructor, it maybe null on exit.
284 */
285 String (const shared_ptr<_IRep>& rep) noexcept;
286 String (shared_ptr<_IRep>&& rep) noexcept;
287
288 public:
289 nonvirtual String& operator= (String&& rhs) noexcept = default;
290 nonvirtual String& operator= (const String& rhs) noexcept = default;
291
292 public:
293 /**
294 * Create a String object from a 'char-based' utf-8 encoded string.
295 *
296 * \par Example Usage
297 * \code
298 * EXPECT_TRUE (string{u8"שלום"} == String::FromUTF8 (u8"שלום").AsUTF8 ());
299 * \endcode
300 *
301 * \note This is not generally needed, as you can just use the String::CTOR, but for cases like
302 * std::string-> String - where the conversion needs extra information (an assertion about character encoding of source characters).
303 *
304 * \note Reading improperly encoded text may result in a RuntimeException indicating improperly encoded characters.
305 */
306 template <typename CHAR_T>
307 static String FromUTF8 (span<CHAR_T> from)
308 requires (same_as<remove_cv_t<CHAR_T>, char8_t> or same_as<remove_cv_t<CHAR_T>, char>);
309 template <typename CHAR_T>
310 static String FromUTF8 (const basic_string<CHAR_T>& from)
311 requires (same_as<remove_cv_t<CHAR_T>, char8_t> or same_as<remove_cv_t<CHAR_T>, char>);
312 template <typename CHAR_T>
313 static String FromUTF8 (const CHAR_T* from)
314 requires (same_as<remove_cv_t<CHAR_T>, char8_t> or same_as<remove_cv_t<CHAR_T>, char>);
315
316 public:
317 /**
318 * Create a String object from a 'SDKChar' (os-setting - current code page) encoded string.
319 * See @SDKChar
320 * See @SDKString
321 *
322 * \note Reading improperly encoded text may result in a RuntimeException indicating improperly encoded characters.
323 */
324 static String FromSDKString (const SDKChar* from);
325 static String FromSDKString (span<const SDKChar> s);
326 static String FromSDKString (const SDKString& from);
327
328 public:
329 /**
330 * Create a String object from a 'char-based' (os-setting - current code page) encoded string.
331 *
332 * \note Reading improperly encoded text may result in a RuntimeException indicating improperly encoded characters.
333 */
334 static String FromNarrowSDKString (const char* from);
335 static String FromNarrowSDKString (span<const char> s);
336 static String FromNarrowSDKString (const string& from);
337
338 public:
339 /**
340 * Create a String object from a char based on the encoding from the argument locale.
341 * This throws an exception if there is an error performing the conversion.
342 *
343 * \note Reading improperly encoded text may result in a RuntimeException indicating improperly encoded characters.
344 */
345 static String FromNarrowString (const char* from, const locale& l);
346 static String FromNarrowString (span<const char> s, const locale& l);
347 static String FromNarrowString (const string& from, const locale& l);
348
349 public:
350 /**
351 * \brief Take the given argument data (constant span) - which must remain unchanged - constant - for the application lifetime - and treat it as a Stroika String object
352 *
353 * This allows creation of String objects with fewer memory allocations and less copyinh, and more efficient storage, in most situations
354 *
355 * The resulting String is a perfectly compliant Stroika String (somewhat akin to std::string_view vs std::string).
356 *
357 * \par Example:
358 * \code
359 * String tmp1 = "FRED";
360 * String tmp2 = String{"FRED"};
361 * String tmp3 = String::FromStringConstant ("FRED"); // same as 2 above, but faster
362 * String tmp4 = "FRED"sv; // equivalent to FromStringConstant
363 * String tmp5 = "FRED"_k; // equivalent to FromStringConstant
364 * \endcode
365 *
366 * \em WARNING - BE VERY CAREFUL - be sure arguments have application lifetime (intended use case is C string literals).
367 *
368 * \pre argument string MAY contain embedded nul characters (but for char* overloads wrong size inferred).
369 *
370 * \note In Stroika v2.1 this was called class String_ExternalMemoryOwnership_ApplicationLifetime.
371 * \note In Stroika v2.1 this was called class String_Constant.
372 * \note In Stroika v2.1 this required NUL-char termination, but no longer
373 *
374 * \note FromStringConstant with 'char' - REQUIRES that the char elements are ASCII (someday this maybe lifted and interpret as Latin1)
375 * For the case of char, we also do not check/require the nul-termination bit.
376 */
377 template <size_t SIZE, IUNICODECanUnambiguouslyConvertFrom CHAR_T>
378 static String FromStringConstant (const CHAR_T (&cString)[SIZE]);
379 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
380 static String FromStringConstant (const basic_string_view<CHAR_T>& str);
381 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
382 static String FromStringConstant (span<const CHAR_T> str);
383 static String FromStringConstant (span<const ASCII> s); // better impl in CPP file
384 static String FromStringConstant (span<const char16_t> s); // ""
385 static String FromStringConstant (span<const wchar_t> s); // "" (inl file)
386 static String FromStringConstant (span<const char32_t> s); // ""
387
388 public:
389 /**
390 * Create a String object from UNICODE Latin-1 Supplement (https://en.wikipedia.org/wiki/Latin-1_Supplement)
391 *
392 * This is roughly, but not exactly, the same as the ISO-Latin-1 single-byte character set (https://en.wikipedia.org/wiki/ISO/IEC_8859-1)
393 *
394 * \note if character code point >= 256, this will throw an exception - not defined for that range (only checked if sizeof (CHAR_T) > 1)
395 *
396 * @aliases From8bitASCII () or FromExtendedASCII ()
397 */
398 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
399 static String FromLatin1 (const CHAR_T* cString);
400 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
401 static String FromLatin1 (span<const CHAR_T> s);
402 template <IStdBasicStringCompatibleCharacter CHAR_T>
403 static String FromLatin1 (const basic_string<CHAR_T>& s);
404
405 public:
406 /**
407 * \brief appends 'rhs' string to this string (without modifying this string) and returns the combined string
408 *
409 * @see Append() for a similar function that modifies 'this'
410 */
411 template <typename T>
412 nonvirtual String Concatenate (T&& rhs) const
413 requires (is_convertible_v<T, String>);
414
415 private:
416 nonvirtual String Concatenate_ (const String& rhs) const;
417
418 public:
419 /**
420 * Returns the number of characters in the String. Note that this may not be the same as bytes,
421 * does not include NUL termination, and doesn't in any way respect NUL termination (meaning
422 * a nul-character is allowed in a Stroika string.
423 *
424 * @aliases GetLength ()
425 */
426 nonvirtual size_t size () const noexcept;
427
428 public:
429 /**
430 */
431 nonvirtual bool empty () const noexcept;
432
433 public:
434 /**
435 */
436 nonvirtual const Character GetCharAt (size_t i) const noexcept;
437
438 public:
439 /**
440 * \brief return (read-only) Character object
441 *
442 * @aliases GetCharAt (size_t i) const;
443 *
444 * \note returns const due to https://stroika.atlassian.net/browse/STK-376 - so cannot accidentally have illusion of assignment being legal
445 */
446 nonvirtual const Character operator[] (size_t i) const noexcept;
447
448 public:
449 /**
450 * InsertAt() constructs a new string by taking this string, and inserting the argument
451 * characters.
452 *
453 * \em Note that for repeated insertions, this is much less efficient than just
454 * using StringBuilder.
455 *
456 * \note that if at == this->size (), you are appending.
457 */
458 nonvirtual String InsertAt (Character c, size_t at) const;
459 nonvirtual String InsertAt (const String& s, size_t at) const;
460 nonvirtual String InsertAt (span<const Character> s, size_t at) const;
461 nonvirtual String InsertAt (span<Character> s, size_t at) const;
462
463 public:
464 /**
465 * Remove the characters at 'charAt' (RemoveAt/1) or between 'from' and 'to' (const method - doesn't modify this)
466 *
467 * It is an error if this implies removing characters off the end of the string.
468 *
469 * \par Example Usage
470 * \code
471 * String mungedData = "04 July 2014";
472 * if (optional<pair<size_t, size_t>> i = mungedData.Find (RegularExpression{"0[^\b]"})) {
473 * mungedData = mungedData.RemoveAt (*i);
474 * }
475 * \endcode
476 *
477 * \pre (charAt < size ())
478 * \pre (from <= to)
479 * \pre (to <= size ())
480 *
481 * \em Note that this is quite inefficient: consider using StringBuilder
482 */
483 nonvirtual String RemoveAt (size_t charAt) const;
484 nonvirtual String RemoveAt (size_t from, size_t to) const;
485 nonvirtual String RemoveAt (pair<size_t, size_t> fromTo) const;
486
487 public:
488 /**
489 * Remove the first occurrence of Character 'c'/'/subString/ from the string. Not an error if none
490 * found. Doesn't modify this (const method) - returns resulting string.
491 *
492 * \em Note that this is quite inefficient: consider using StringBuffer
493 */
494 nonvirtual String RemoveFirstIf (Character c) const;
495 nonvirtual String RemoveFirstIf (const String& subString) const;
496
497 public:
498 /**
499 * Remove the all occurrences of Character 'c/subString' from this string
500 * (walking front to back - if removal creates one, it too is removed).
501 * Not an error if none found. Doesn't modify this (const method) - returns resulting string.
502 */
503 nonvirtual String RemoveAll (Character c) const;
504 nonvirtual String RemoveAll (const String& subString) const;
505
506 public:
507 /**
508 * OVERLOADS WITH size_t:
509 *
510 * Produce a substring of this string, starting at 'from', and up to 'to' (or end of string
511 * for one-arg overload).
512 *
513 * *NB* This function treats the second argument differently than String::substr () -
514 * which respects the STL basic_string API. This function treats the second argument
515 * as a 'to', STL substr() treats it as a count. This amounts to the same thing for the
516 * very common cases of substr(N) - because second argument is defaulted, and,
517 * substr (0, N) - because then the count and end are the same.
518 *
519 * \pre (from <= to);
520 * \pre (to <= size ()); // for 2-arg variant
521 *
522 * \par Example Usage
523 * \code
524 * String tmp { "This is good" };
525 * Assert (tmp.SubString (5) == "is good");
526 * \endcode
527 *
528 * \par Example Usage
529 * \code
530 * const String kTest_ { "a=b"sv };
531 * const String kLbl2LookFor_ { "a="_k };
532 * if (resultLine.Find (kLbl2LookFor_)) {
533 * String tmp { resultLine.SubString (kLbl2LookFor_.length ()) };
534 * }
535 * Assert (tmp == "b");
536 * \endcode
537 *
538 * OVERLOADS WITH ptrdiff_t:
539 *
540 * This is like SubString() except that if from/to are negative, they are treated as relative to the end
541 * of the String.
542 *
543 * So for example, SubString (0, -1) is equivalent to SubString (0, size () - 1) - and so is an
544 * error if the string is empty.
545 *
546 * Similarly, SubString (-5) is equivalent to SubString (size ()-5, size ()) - so can be used
547 * to grab the end of a string.
548 *
549 * \pre (adjustedFrom <= adjustedTo);
550 * \pre (adjustedTo <= size ()); // for 2-arg variant
551 *
552 * \note \em Design Note
553 * We chose not to overload SubString() with this functionality because it would have been to easy
554 * to mask bugs.
555 *
556 * \note \em Design Note
557 * This was originally inspired by Python arrays. From https://docs.python.org/2/tutorial/introduction.html:
558 * Indices may also be negative numbers, to start counting from the right
559 *
560 * @aliases
561 * This API is identical to the javascript String.slice () method/behavior
562 * @see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
563 * 15.5.4.13 String.prototype.slice (start, end)
564 *
565 * @aliases
566 * This API - when called with negative indexes - used to be called CircularSubString ().
567 *
568 * @see substr
569 * @see SafeSubString
570 */
571 template <typename SZ>
572 nonvirtual String SubString (SZ from) const;
573 template <typename SZ1, typename SZ2>
574 nonvirtual String SubString (SZ1 from, SZ2 to) const;
575
576 public:
577 /**
578 * Like SubString(), but no requirements on from/to. These are just adjusted to the edge of the string
579 * if the exceed those endpoints. And if arguments are <0, they are interpreted as end-relative.
580 *
581 * @aliases
582 * This API - when called with negative indexes - used to be called SafeCircularSubString ().
583 *
584 * @see substr
585 * @see SubString
586 */
587 template <typename SZ>
588 nonvirtual String SafeSubString (SZ from) const;
589 template <typename SZ1, typename SZ2>
590 nonvirtual String SafeSubString (SZ1 from, SZ2 to) const;
591
592 public:
593 /**
594 * \brief Return a substring of this string, starting at 'argument' n. If n > size(), return empty string.
595 *
596 * \note This hides inherited Skip, and returns a subtype of what it returns, and is just slightly more
597 * efficient, but otherwise completley compatible behavior.
598 */
599 nonvirtual String Skip (size_t n) const;
600
601 public:
602 /**
603 * Return 'count' copies of this String (concatenated after one another).
604 */
605 nonvirtual String Repeat (unsigned int count) const;
606
607 public:
608 /**
609 * Returns true if the argument character or string is found anywhere inside this string.
610 * This is equivalent to
611 * return Matches (".*" + X + ".*"); // If X had no characters which look like they are part of
612 * // a regular expression
613 *
614 * @see Match
615 */
616 nonvirtual bool Contains (Character c, CompareOptions co = eWithCase) const;
617 nonvirtual bool Contains (const String& subString, CompareOptions co = eWithCase) const;
618
619 public:
620 /**
621 *
622 */
623 nonvirtual bool ContainsAny (Iterable<Character> cs, CompareOptions co = eWithCase) const;
624
625 public:
626 /**
627 * Returns true iff the given substring is contained in this string.
628 *
629 * Similar to:
630 * return Matches (X + ".*");
631 * except for the fact that with StartsWith() doesn't interpret 'X' as a regular expression
632 *
633 * \pre not subString.empty () -- for the subString overload (because otherwise "".StartsWith("") would be ill-defined)
634 *
635 * @see Match
636 * @see EndsWith
637 */
638 nonvirtual bool StartsWith (const Character& c, CompareOptions co = eWithCase) const;
639 nonvirtual bool StartsWith (const String& subString, CompareOptions co = eWithCase) const;
640
641 public:
642 /**
643 * Returns true iff the given substring is contained in this string.
644 *
645 * Similar to:
646 * return Matches (X + ".*");
647 * except for the fact that with StartsWith() doesn't interpret 'X' as a regular expression
648 *
649 * \pre not subString.empty () -- for the subString overload (because otherwise "".EndsWith("") would be ill-defined)
650 *
651 * @see Match
652 * @see StartsWith
653 */
654 nonvirtual bool EndsWith (const Character& c, CompareOptions co = eWithCase) const;
655 nonvirtual bool EndsWith (const String& subString, CompareOptions co = eWithCase) const;
656
657 public:
658 /**
659 * \brief Return *this if it ends with argument character, or append 'c' so that it ends with a 'c'.
660 *
661 * \note this is to specific-purpose to be a very sensible API, but I find it pretty often pretty useful. So what-the-heck.
662 */
663 nonvirtual String AssureEndsWith (const Character& c, CompareOptions co = eWithCase) const;
664
665 public:
666 /**
667 * Apply the given regular expression return true if it matches this string. This only
668 * returns true if the expression matches the ENTIRE string - all the way to the end.
669 * @see FindEach() or @see Find - to find a set of things which match.
670 *
671 * \par Example Usage
672 * \code
673 * Assert (String{"abc"}.Matches ("abc"));
674 * Assert (not (String{"abc"}.Matches ("bc")));
675 * Assert (String{"abc"}.Matches (".*bc"));
676 * Assert (not String{"abc"}.Matches ("b.*c"));
677 * \endcode
678 *
679 * \par Example Usage
680 * \code
681 * static const RegularExpression kSonosRE_{"([0-9.:]*)( - .*)"_RegEx};
682 * static const String kTestStr_{"192.168.244.104 - Sonos Play:5"};
683 * optional<String> match1;
684 * optional<String> match2;
685 * EXPECT_TRUE (kTestStr_.Matches (kSonosRE_, &match1, &match2) and match1 == "192.168.244.104" and match2 == " - Sonos Play:5");
686 * EXPECT_EQ (kTestStr_.Matches<1> (kSonosRE_), make_tuple ("192.168.244.104"_k));
687 * EXPECT_EQ (kTestStr_.Matches<2> (kSonosRE_), make_tuple ("192.168.244.104"_k, "Sonos Play:5"_k));
688 * \endcode
689 *
690 * \par Example Usage
691 * \code
692 * // https://tools.ietf.org/html/rfc3986#appendix-B
693 * static const RegularExpression kParseURLRegExp_{"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"_RegEx};
694 * optional<String> scheme;
695 * optional<String> authority;
696 * optional<String> path;
697 * optional<String> query;
698 * optional<String> fragment;
699 * if (rawURL.Matches (kParseURLRegExp_, nullptr, &scheme, nullptr, &authority, &path, nullptr, &query, nullptr, &fragment)) {
700 * DbgTrace ("***good - scheme={}"_f, scheme);
701 * DbgTrace ("***good - authority={}"_f, authority);
702 * DbgTrace ("***good - path={}"_f, path);
703 * DbgTrace ("***good - query={}"_f, query);
704 * DbgTrace ("***good - fragment={}"_f, fragment);
705 * }
706 * \endcode
707 *
708 * \par Example Usage
709 * \code
710 * // Praat 6.4.23 (October 27 2024)
711 * String processRunnerOutput = "Praat 6.4.23 (October 27 2024)";
712 * String version;
713 * return processRunnerOutput.Matches ("(\\w+)\\s([\\w\\.]+).*"_RegEx, nullptr, &version)? version: "???"sv;
714 * return get<1> (processRunnerOutput.Matches<1> ("\\w+\\s([\\w\\.]+).*"_RegEx).value_or(make_tuple("???"_k))); // Or better
715 * \endcode
716 *
717 * Details on the regular expression language/format can be found at:
718 * http://en.wikipedia.org/wiki/C%2B%2B11#Regular_expressions
719 *
720 * \note If any 'sub-match' arguments are passed to Match, they MUST be of type optional<String>* or nullptr.
721 * Passing nullptr allows matched parameters to not be returned, but still identified positionally (by index).
722 *
723 * @see Contains
724 * @see StartsWith
725 * @see EndsWith
726 * @see Find
727 * @see FindEach
728 */
729 nonvirtual bool Matches (const RegularExpression& regEx) const;
730 nonvirtual bool Matches (const RegularExpression& regEx, Containers::Sequence<String>* matches) const;
731 template <Common::IAnyOf<optional<String>*, String*, nullptr_t>... OPTIONAL_STRINGS>
732 nonvirtual bool Matches (const RegularExpression& regEx, OPTIONAL_STRINGS&&... subMatches) const;
733 template <size_t I>
734 nonvirtual optional<Common::RepeatedTuple_t<I, String>> Matches (const RegularExpression& regEx) const;
735
736 public:
737 /**
738 * Find returns the index of the first occurrence of the given Character/substring argument in
739 * this string. Find () always returns a valid string index, which is followed by the
740 * given substring, or nullopt otherwise.
741 *
742 * Find () can optionally be provided a 'startAt' offset to begin the search at.
743 *
744 * And the overload taking a RegularExpression - returns BOTH the location where the match
745 * is found, as well as the end of the match.
746 *
747 * Note - for the special case of Find(empty-string) - the return value is 0 if this string
748 * is non-empty, and nullopt if this string was empty.
749 *
750 * @aliases - could have been called IndexOf ()
751 *
752 * \pre (startAt <= size ());
753 *
754 * \par Example Usage
755 * \code
756 * const String kTest_{ "a=b" };
757 * const String kLbl2LookFor_ { "a=" };
758 * if (kTest_.Find (kLbl2LookFor_)) {
759 * String tmp { kTest_.SubString (kLbl2LookFor_.length ()) };
760 * }
761 * Assert (tmp == "b");
762 * \endcode
763 *
764 * @see FindEach ()
765 * @see FindEachString ()
766 * @see Tokenize
767 */
768 nonvirtual optional<size_t> Find (Character c, CompareOptions co = eWithCase) const;
769 nonvirtual optional<size_t> Find (Character c, size_t startAt, CompareOptions co = eWithCase) const;
770 nonvirtual optional<size_t> Find (const String& subString, CompareOptions co = eWithCase) const;
771 nonvirtual optional<size_t> Find (const String& subString, size_t startAt, CompareOptions co = eWithCase) const;
772 nonvirtual optional<pair<size_t, size_t>> Find (const RegularExpression& regEx, size_t startAt = 0) const;
773 nonvirtual Traversal::Iterator<Character> Find (const function<bool (Character item)>& that) const;
774
775 public:
776 /**
777 * This is just like Find, but captures all the matching results in an iterable result.
778 * The reason the overload for RegularExpression's returns a list of pair<size_t,size_t> is because
779 * the endpoint of the match is ambiguous. For fixed string Find, the end of match is computable
780 * from the arguments.
781 *
782 * FindEach () can be more handy to use than directly using Find () in scenarios where you want
783 * to iterate over each match:
784 * e.g.:
785 * for (auto i : s.FindEach ("xxx")) {....}
786 *
787 * Also, to count matches, you can use:
788 * size_t nMatches = FindEach (matchexp).size ();
789 *
790 * Note: FindEach handles the special case of an empty match as ignored, so FindEach(empty-str-or-regexp)
791 * always returns an empty list. Also - for the String case, it returns distinct matches, so if you
792 * search String{"AAAA"}.FindEach ("AA"), you will get 2 answers ({0, 2}).
793 *
794 * @see Find ()
795 * @see FindEachString ()
796 * @see Matches ()
797 */
798 nonvirtual Containers::Sequence<pair<size_t, size_t>> FindEach (const RegularExpression& regEx) const;
799 nonvirtual Containers::Sequence<size_t> FindEach (const String& string2SearchFor, CompareOptions co = eWithCase) const;
800
801 public:
802 /**
803 * \par Example Usage
804 * \code
805 * const String kTest_{ "a=b,"sv };
806 * const RegularExpression kRE_{ "a=(.*)[, ]" };
807 * Sequence<String> tmp1{ kTest_.FindEachString (kRE_) };
808 * Assert (tmp1.size () == 1 and tmp1[0] == "a=b,");
809 * Sequence<RegularExpressionMatch> tmp2 { kTest_.FindEachMatch (kRE_) };
810 * Assert (tmp2.size () == 1 and tmp2[0].GetFullMatch () == "a=b," and tmp2[0].GetSubMatches () == Sequence<String>{"b"});
811 * \endcode
812 *
813 * @see Find ()
814 * @see FindEachString ()
815 * @see Matches ()
816 */
817 nonvirtual Containers::Sequence<RegularExpressionMatch> FindEachMatch (const RegularExpression& regEx) const;
818
819 public:
820 /**
821 * \par Example Usage
822 * \code
823 * const String kTest_ { "a=b, c=d"_k };
824 * const RegularExpression kRE_ { "(.)=(.)" };
825 * Assert ((kTest_.FindEachString (kRE_) == vector<String>{"a=b", "c=d"}));
826 * \endcode
827 *
828 * @see Find ()
829 * @see FindEachMatch ()
830 * @see Matches ()
831 */
832 nonvirtual Containers::Sequence<String> FindEachString (const RegularExpression& regEx) const;
833
834 public:
835 /**
836 * RFind (substring) returns the index of the last occurrence of the given substring in
837 * this string. This function always returns a valid string index, which is followed by the
838 * given substring, or optional<size_t> {} otherwise.
839 *
840 * @aliases RIndexOf ()
841 */
842 nonvirtual optional<size_t> RFind (Character c) const noexcept;
843 nonvirtual optional<size_t> RFind (const String& subString) const;
844
845 public:
846 /**
847 * Replace the range of this string with the given replacement. Const method: just creates new string as described.
848 */
849 nonvirtual String Replace (size_t from, size_t to, const String& replacement) const;
850 nonvirtual String Replace (pair<size_t, size_t> fromTo, const String& replacement) const;
851
852 public:
853 /**
854 * Apply the given regular expression, with 'with' and replace each match. This doesn't
855 * modify this string, but returns the replacement string.
856 *
857 * CHECK - BUT HI HTINK WE DEFINE TO REPLACE ALL? OR MAKE PARAM?
858 * See regex_replace () for definition of the regEx language
859 *
860 * Require (not string2SearchFor.empty ());
861 *
862 * \par Example Usage
863 * \code
864 * mungedData = mungedData.ReplaceAll (RegularExpression{ "\\b0+" }, ""); // strip all leading zeros
865 * \endcode
866 *
867 * \par Example Usage
868 * \code
869 * String a = "a b \n\t c";
870 * EXPECT_EQ (a.ReplaceAll (RegularExpression{"\\s+"sv}, " "sv), "a b c");
871 * EXPECT_EQ (a.ReplaceAll ("\\s+"_RegEx, " "sv), "a b c");
872 * \endcode
873 *
874 * Note - it IS legal to have with contain the original search for string, or even
875 * to have it 'created' as part of where it gets
876 * inserted. The implementation will only replace those that pre-existed.
877 *
878 * \note To perform a regular expression replace-all, which is case insensitive, create the regular expression with CompareOptions::eCaseInsensitive
879 *
880 * \note ReplaceAll could have been called 'SafeString' or 'FilteredString' (was at one point - replaces that functionality)
881 */
882 nonvirtual String ReplaceAll (const RegularExpression& regEx, const String& with) const;
883 nonvirtual String ReplaceAll (const String& string2SearchFor, const String& with, CompareOptions co = eWithCase) const;
884 nonvirtual String ReplaceAll (const function<bool (Character)>& replaceCharP, const String& with) const;
885 nonvirtual String ReplaceAll (const Containers::Set<Character>& charSet, const String& with) const;
886
887 public:
888 /**
889 * Replace any CR or LF or CRLF sequences with plain NL-terminated text.
890 */
891 nonvirtual String NormalizeTextToNL () const;
892
893 public:
894 /**
895 * \brief Replace sequences of whitespace characters (space, tab, newline etc) with a single space (or argument space character).
896 *
897 * \note see Qt 'QString::simplify()'.Idea is Trim () (right and left) - plus replace contiguous substrings with* Character::IsSpace () with a
898 * single (given) space character.
899 */
900 nonvirtual String NormalizeSpace (Character useSpaceCharacter = ' ') const;
901
902 public:
903 /**
904 * Break this String into constituent parts. This is a simplistic API but at least handy as is.
905 *
906 * The caller can specify the token separators by set, by lambda. This defaults to the lambda "isWhitespace".
907 *
908 * This is often called 'Split' in other APIs. This is NOT (as is now) a replacement for flex, but just for
909 * simple, but common string splitting needs (though if I had a regexp param, it may approach the power of flex).
910 *
911 * \note If this->length () == 0, this method returns a list of length 0;
912 * \note Its fine for the split character/characters to be missing, in which case this
913 * returns a list of length 1
914 *
915 * \par Example Usage
916 * \code
917 * String t { "ABC DEF G" };
918 * Assert (t.Tokenize ().length () == 3);
919 * Assert (t.Tokenize ()[1] == "DEF");
920 * \endcode
921 *
922 * \par Example Usage
923 * \code
924 * String t { "foo= 7" };
925 * auto tt = t.Tokenize ({ '=' });
926 * Assert (t.length () == 2);
927 * Assert (t[1] == " 7");
928 * Assert (t[1].Trim () == "7");
929 * \endcode
930 *
931 * \par Example Usage
932 * \code
933 * String t { "foo= 7" };
934 * auto tt = t.Tokenize ({ '=', ' ' });
935 * Assert (t.length () == 2);
936 * Assert (t[1] == "7");
937 * \endcode
938 *
939 * @see Find
940 *
941 * TODO:
942 * @todo Review:
943 * http://qt-project.org/doc/qt-5.0/qtcore/qstring.html#split
944 * especially:
945 * QString line = "forename\tmiddlename surname \t \t phone";
946 * QRegularExpression sep("\\s+");
947 * str = line.section(sep, 2, 2); // str == "surname"
948 * str = line.section(sep, -3, -2); // str == "middlename surname"
949 * Make sure our Find/Tokenize is at least this simple, and maybe diff between find and split
950 * is FIND the regular expression names the things looked for and SPLIT() uses regexp to name the separators?
951 * Add something like the above to the String String demo app (when it exists)
952 */
953 nonvirtual Containers::Sequence<String> Tokenize () const;
954 nonvirtual Containers::Sequence<String> Tokenize (const function<bool (Character)>& isTokenSeperator) const;
955 nonvirtual Containers::Sequence<String> Tokenize (const RegularExpression& isSeparator) const;
956 nonvirtual Containers::Sequence<String> Tokenize (const Containers::Set<Character>& delimiters) const;
957
958 public:
959 /**
960 * \brief break the String into a series of lines;
961 *
962 * \note could almost be done with Tokenize(), except for the one-sided nl-specific trimming.
963 *
964 * \note removes line-endings (\r\n, or \n, or \r).
965 */
966 nonvirtual Containers::Sequence<String> AsLines () const;
967
968 public:
969 /**
970 * \brief Breaks this string into Lines, with AsLines (), and applies the argument filter (as if with .Map<>) producing a subset of the lines which match
971 *
972 * note this is useful to replace 'shell script' logic where you might run some command and grep through its output for all
973 * matching lines.
974 *
975 * \par Example Usage
976 * \code
977 * String firstALineOrEmpty = String{"...e.g. from output of ProcessRunner..."}.Grep ("a:").NthValue (0);
978 * \endcode
979 */
980 nonvirtual Containers::Sequence<String> Grep (const String& fgrepArg) const;
981 nonvirtual Containers::Sequence<String> Grep (const RegularExpression& egrepArg) const;
982
983 public:
984 /**
985 * \brief Useful to replace 'awk print $3' - replace with Col(2) - zero based
986 *
987 * default separator = = "\\s+"_RegEx;
988 *
989 * \par Example Usage
990 * \code
991 * Assert (String{"ffmpeg version 7.1"}.Col (2) == "7.1");
992 * \endcode
993 *
994 */
995 nonvirtual optional<String> Col (size_t i) const;
996 nonvirtual optional<String> Col (size_t i, const RegularExpression& separator) const;
997
998 public:
999 /**
1000 * \brief see Col(i) - but with default value of empty string
1001 */
1002 nonvirtual String ColValue (size_t i, const String& valueIfMissing = {}) const;
1003
1004 public:
1005 /**
1006 * String LTrim () scans the characters form the left to right, and applies the given
1007 * 'shouldBeTrimmed' function (defaults to IsWhitespace). All such characters are removed,
1008 * and the resulting string is returned. This does not modify the current string its
1009 * applied to - just returns the trimmed string.
1010 */
1011 nonvirtual String LTrim (bool (*shouldBeTrimmed) (Character) = Character::IsWhitespace) const;
1012
1013 public:
1014 /**
1015 * String RTrim () scans the characters form the right to left, and applies the given
1016 * 'shouldBeTrimmed' function (defaults to IsWhitespace). All such characters are removed,
1017 * and the resulting string is returned. This does not modify the current string its
1018 * applied to - just returns the trimmed string.
1019 *
1020 * \par Example Usage
1021 * \code
1022 * String name = origName.RTrim ([] (Character c) { return c == '\\';}); // Trim a trailing backslash(s), if present
1023 * \endcode
1024 */
1025 nonvirtual String RTrim (bool (*shouldBeTrimmed) (Character) = Character::IsWhitespace) const;
1026
1027 public:
1028 /**
1029 * String Trim () is locally equivalent to RTrim (shouldBeTrimmed).LTrim (shouldBeTrimmed).
1030 */
1031 nonvirtual String Trim (bool (*shouldBeTrimmed) (Character) = Character::IsWhitespace) const;
1032
1033 public:
1034 /**
1035 * Walk the entire string, and produce a new string consisting of all characters for which
1036 * the predicate 'removeCharIf' returned false.
1037 */
1038 nonvirtual String StripAll (bool (*removeCharIf) (Character)) const;
1039
1040 public:
1041 /**
1042 * Combine the given array into a single string (typically comma space) separated.
1043 * If given a list of length n, this adds n-1 separators.
1044 *
1045 * \note .Net version - https://docs.microsoft.com/en-us/dotnet/api/system.string.join?redirectedfrom=MSDN&view=net-6.0#System_String_Join_System_String_System_String___
1046 * \note Java version - https://docs.oracle.com/javase/8/docs/api/java/lang/String.html#join-java.lang.CharSequence-java.lang.CharSequence...-
1047 * \note Javascript - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/join
1048 *
1049 * \note - CONSIDER LOSING this as 'Iterable<>::Join' just appears to work better -- LGP 2025-01-21
1050 */
1051 static String Join (const Iterable<String>& list, const String& separator = ", "sv);
1052
1053 public:
1054 /**
1055 * Return a new string based on this string where each lower case character is replaced by its
1056 * upper case equivalent. Note that non-lower-case characters (such as punctuation) un unchanged.
1057 */
1058 nonvirtual String ToLowerCase () const;
1059
1060 public:
1061 /**
1062 * Return a new string based on this string where each lower case character is replaced by its
1063 * upper case equivalent. Note that non-upper-case characters (such as punctuation) un unchanged.
1064 */
1065 nonvirtual String ToUpperCase () const;
1066
1067 public:
1068 /**
1069 * Return true if the string contains zero non-whitespace characters.
1070 */
1071 nonvirtual bool IsWhitespace () const;
1072
1073 public:
1074 /**
1075 * \brief return the first maxLen (or fewer if string shorter) characters of this string (adding ellipsis if truncated)
1076 *
1077 * This function is for GUI/display purposes. It returns the given string, trimmed down
1078 * to at most maxLen characters, and removes whitespace (on 'to trim' side - given by keepLeft flag -
1079 * if needed to get under maxLen).
1080 *
1081 * Note in the 3-arg overload, the ellipsis string MAY be the empty string.
1082 */
1083 nonvirtual String LimitLength (size_t maxLen, StringShorteningPreference keepPref = StringShorteningPreference::ePreferKeepLeft) const;
1084 nonvirtual String LimitLength (size_t maxLen, StringShorteningPreference keepLeft, const String& ellipsis) const;
1085
1086 public:
1087 /**
1088 * CopyTo () copies the contents of this string to the target buffer.
1089 * CopyTo () does NOT nul-terminate the target buffer
1090 *
1091 * Returns span of CHAR_T objects written - a subspan of the argument span
1092 *
1093 * \pre s.size () >= UTFConvert::ComputeTargetBufferSize<CHAR_T> (...this-string-data...);
1094 *
1095 * \see See also GetData<CHAR_T> (buf) - similar functionality - except caller doesn't need to know size of buffer to allocate
1096 */
1097 template <IUNICODECanAlwaysConvertTo CHAR_T>
1098 nonvirtual span<CHAR_T> CopyTo (span<CHAR_T> s) const
1099 requires (not is_const_v<CHAR_T>);
1100
1101 public:
1102 /**
1103 * Convert String losslessly into a standard C++ type.
1104 *
1105 * Only specifically specialized variants supported: IBasicUNICODEStdString<T> or same_as<T,String>
1106 * o wstring
1107 * o u8string
1108 * o u16string
1109 * o u32string
1110 * o String (return *this; handy sometimes in templated usage; harmless)
1111 * as well as:
1112 * o filesystem::path (or anything with .wstring() -> wstring method) - note see qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin
1113 *
1114 * DEPRECATED AS OF v3.0d1 because As is const method - could do non-const As<> overload for these, but that would be confusing
1115 * o const wchar_t*
1116 * o const Character*
1117 *
1118 * \note
1119 * o As<u8string> () equivalent to AsUTF8 () call
1120 * o As<u16string> () equivalent to AsUTF16 () call
1121 * o As<u32string> () equivalent to AsUTF32 () call
1122 *
1123 * \note We tried to also have template<typename T> explicit operator T () const; - conversion operator - but
1124 * We got too frequent confusion in complex combinations of templates, like with:
1125 * Set<String> x ( *optional<String> {String{}) ); // fails cuz calls operator Set<String> ()!
1126 * Set<String> x { *optional<String> {String{}) }; // works as expected
1127 */
1128 template <typename T>
1129 nonvirtual T As () const
1130 requires (IBasicUNICODEStdString<T> or same_as<T, String> or constructible_from<T, wstring>);
1131
1132 public:
1133 /**
1134 * Create a narrow string object from this, based on the encoding from the argument locale.
1135 * This throws an exception if there is an error performing the conversion, and the 'into' overload
1136 * leaves 'into' in an undefined (but safe) state.
1137 */
1138 nonvirtual string AsNarrowString (const locale& l) const;
1139 nonvirtual string AsNarrowString (const locale& l, AllowMissingCharacterErrorsFlag) const;
1140
1141 public:
1142 /**
1143 * Convert String losslessly into a standard C++ type.
1144 * Only specifically specialized variants are supported.
1145 *
1146 * SUPPORTED result type "T": values are:
1147 * string
1148 * u8string
1149 */
1150 template <typename T = u8string>
1151 nonvirtual T AsUTF8 () const
1152 requires (same_as<T, string> or same_as<T, u8string>);
1153
1154 public:
1155 /**
1156 * Convert String losslessly into a standard C++ type u16string.
1157 *
1158 * \par Example Usage:
1159 * \code
1160 * String s = u"hi mom";
1161 * u16string su = AsUTF16 ();
1162 * \endcode
1163 *
1164 * \note - the resulting string may have a different length than this->size() due to surrogates
1165 *
1166 * @todo allow wchar_t if sizeof(wchar_t) == 2
1167 */
1168 template <typename T = u16string>
1169 nonvirtual T AsUTF16 () const
1170 requires (same_as<T, u16string> or (sizeof (wchar_t) == sizeof (char16_t) and same_as<T, wstring>));
1171
1172 public:
1173 /**
1174 * Convert String losslessly into a standard C++ type u32string.
1175 *
1176 * \par Example Usage:
1177 * \code
1178 * String s = u"hi mom";
1179 * u32string su = AsUTF32 ();
1180 * \endcode
1181 *
1182 * \note - As of Stroika 2.1d23 - the resulting string may have a different length than this->size() due to surrogates,
1183 * but eventually the intent is to fix Stroika's string class so this is not true, and it returns the length of the string
1184 * in size () with surrogates removed (in other words uses ucs32 representation). But not there yet.
1185 */
1186 template <typename T = u32string>
1187 nonvirtual T AsUTF32 () const
1188 requires (same_as<T, u32string> or (sizeof (wchar_t) == sizeof (char32_t) and same_as<T, wstring>));
1189
1190 public:
1191 /**
1192 * See docs on SDKChar for meaning (character set).
1193 *
1194 * Note - many UNICODE Strings cannot be represented in the SDKString character set (especially if narrow - depends a lot).
1195 * But in that case, AsNarrowSDKString () will throw, unless AllowMissingCharacterErrorsFlag is specified.
1196 */
1197 nonvirtual SDKString AsSDKString () const;
1199
1200 public:
1201 /**
1202 * See docs on SDKChar for meaning (character set). If SDKChar is a wide character, there is probably still a
1203 * default 'code page' to interpret narrow characters (Windows CP_ACP). This is a string in that character set.
1204 *
1205 * Note - many UNICODE Strings cannot be represented in the SDKString character set (especially if narrow - depends a lot).
1206 * But in that case, AsNarrowSDKString () will throw, unless AllowMissingCharacterErrorsFlag is specified.
1207 */
1208 nonvirtual string AsNarrowSDKString () const;
1209 nonvirtual string AsNarrowSDKString (AllowMissingCharacterErrorsFlag) const;
1210
1211 public:
1212 /**
1213 * Convert String losslessly into a standard C++ type.
1214 * Only specifically specialized variants are supported (right now just <string> supported).
1215 * The source string MUST be valid ascii characters - throw RuntimeErrorException<>
1216 *
1217 * \par Example Usage:
1218 * \code
1219 * string a1 = String{"hi mom"}.AsASCII (); // OK
1220 * string a2 = String{u"שלום"}.AsASCII (); // throws
1221 * \endcode
1222 *
1223 * \note - this is a (compatible) change of behavior: before Stroika v2.1d23, this would assert out on invalid ASCII.
1224 *
1225 * Supported Types:
1226 * o Memory::StackBuffer<char>
1227 * o string
1228 * o u8string (note any ASCII string is also legit utf-8)
1229 */
1230 template <typename T = string>
1231 nonvirtual T AsASCII () const
1232 requires requires (T* into) {
1233 { into->empty () } -> same_as<bool>;
1234 { into->push_back (ASCII{0}) };
1235 };
1236
1237 public:
1238 /**
1239 * Convert String losslessly into a standard C++ type.
1240 * Only specifically specialized variants are supported (right now just <string> supported).
1241 * If this source contains any invalid ASCII characters, this returns nullopt, and else a valid engaged string.
1242 *
1243 * Supported Types(T):
1244 * o Memory::StackBuffer<char>
1245 * o string
1246 * o u8string (note any valid ASCII string is also valid utf-8)
1247 */
1248 template <typename T = string>
1249 nonvirtual optional<T> AsASCIIQuietly () const
1250 requires requires (T* into) {
1251 { into->empty () } -> same_as<bool>;
1252 { into->push_back (ASCII{0}) };
1253 };
1254
1255 public:
1256 /**
1257 * \brief Summary data for raw contents of rep - each rep will support at least one of these span forms
1258 *
1259 * Each rep will support a span of at least one code-point type (ascii, utf8, utf16, or utf32)
1260 *
1261 * This API is guaranteed to support a span of at least one of these types (maybe more). The caller may
1262 * specify the code-point type preferred.
1263 *
1264 * \note eAscii is a subset of eSingleByteLatin1, so when the type eAscii is returned, EITHER fSingleByteLatin1 or fAscii maybe
1265 * maybe used.
1266 *
1267 * This API is public, but best to avoid depending on internals of String API - like PeekSpanData - since
1268 * this reasonably likely to change in future versions.
1269 */
1272 /**
1273 * ASCII is useful to track in storage (though same size as eSingleByteLatin1) - because requests
1274 * to convert to UTF-8 are free - ASCII is legit UTF8 (not true for eSingleByteLatin1)
1275 */
1277 /**
1278 * Latin1 - 8 bit representation of characters. But 256 of them - more than plain ascii.
1279 * And cheap/easy to convert to UNICODE (since code points of wider characters exactly the same values).
1280 */
1282 eChar16,
1283 eChar32
1284 };
1286 union {
1287 span<const ASCII> fAscii;
1288 span<const Latin1> fSingleByteLatin1;
1289 span<const char16_t> fChar16;
1290 span<const char32_t> fChar32;
1291 };
1292 };
1293
1294 public:
1295 /**
1296 * \brief return the constant character data inside the string in the form of a case variant union of different span types (at least one will be there)
1297 * templated type arg just used to pick a preferred type.
1298 *
1299 * \note CHAR_TYPE == char implies eAscii
1300 *
1301 * \note Reason for the two step API - getting the PeekSpanData, and then using - is because getting
1302 * the data is most expensive part (virtual function), and the packaged PeekSpanData gives enuf
1303 * info to do the next steps (quickly inline usually)
1304 *
1305 * This API is public, but best to avoid depending on internals of String API - like PeekSpanData - since
1306 * this reasonably likely to change in future versions.
1307 */
1308 template <IUNICODECanUnambiguouslyConvertFrom CHAR_TYPE = ASCII>
1309 nonvirtual PeekSpanData GetPeekSpanData () const;
1310
1311 public:
1312 /**
1313 * \brief return the constant character data inside the string in the form of a span or nullopt if not available for that CHAR_TYPE
1314 *
1315 * This API is public, but best to avoid depending on internals of String API - like PeekSpanData - since
1316 * this reasonably likely to change in future versions.
1317 *
1318 * \note It is generally true that the data IsASCII (span) IFF Peek<ASCII> returns non-nullopt. But this is
1319 * not ACTUALLY always true. Generally, Stroika constructs strings like this. But callers may manually construct
1320 * a String with backend rep u32string, for example (e.g because of move construct) - and that might just happen
1321 * to be all ascii. You can count on that IF you get back value from PeekData<ASCII> - it must be all ASCII. But
1322 * the contrapositive is not always true.
1323 */
1324 template <IUNICODECanUnambiguouslyConvertFrom CHAR_TYPE>
1325 static optional<span<const CHAR_TYPE>> PeekData (const PeekSpanData& pds);
1326 template <IUNICODECanUnambiguouslyConvertFrom CHAR_TYPE>
1327 nonvirtual optional<span<const CHAR_TYPE>> PeekData () const;
1328
1329 public:
1330 /**
1331 * \brief return the constant character data inside the string (rep) in the form of a span, possibly quickly and directly, and possibly copied into possiblyUsedBuffer
1332 *
1333 * This API will typically return a span of data which is internal pointers into the data of the rep (and so its invalidated on the
1334 * next change to the string).
1335 *
1336 * BUT - it maybe a span of data stored into the argument possiblyUsedBuffer (which is why it must be provided - cannot be nullptr).
1337 * If you want the freedom to not pass in this buffer, see the PeekData API.
1338 *
1339 * \note - CHAR_T must satisfy the concept IUNICODECanAlwaysConvertTo - SAFELY - because the string MIGHT contain characters not in any
1340 * unsafe char class (like ASCII or Latin1), and so there might not be a way to do the conversion. Use
1341 * PeekData () to do that - where it can return nullopt if no conversion possible.
1342 *
1343 * \par Example Usage
1344 * \code
1345 * Memory::StackBuffer<char8_t> maybeIgnoreBuf1;
1346 * span<const char8_t> thisData = GetData (&maybeIgnoreBuf1);
1347 * \endcode
1348 *
1349 * \note Prior to Stroika v3.0d1, GetData() took no arguments, and returned pair<const CHAR_TYPE*, const CHAR_TYPE*>
1350 * which is pretty similar, but not quite the same. To adapt any existing code calling that older version of the API
1351 * just add a Memory::StackBuffer<T> b; and pass &b to GetData(); And the return span is not the same as pair<> but
1352 * easily convertible.
1353 */
1354 template <IUNICODECanAlwaysConvertTo CHAR_TYPE, size_t STACK_BUFFER_SZ>
1355 static span<const CHAR_TYPE> GetData (const PeekSpanData& pds, Memory::StackBuffer<CHAR_TYPE, STACK_BUFFER_SZ>* possiblyUsedBuffer);
1356 template <IUNICODECanAlwaysConvertTo CHAR_TYPE, size_t STACK_BUFFER_SZ>
1357 nonvirtual span<const CHAR_TYPE> GetData (Memory::StackBuffer<CHAR_TYPE, STACK_BUFFER_SZ>* possiblyUsedBuffer) const;
1358
1359 public:
1360 struct EqualsComparer;
1361
1362 public:
1363 struct LessComparer;
1364
1365 public:
1366 struct ThreeWayComparer;
1367
1368 public:
1369 /**
1370 * Return true if case sensitive compare of the two IConvertibleToString objects have the same characters.
1371 * Indirects to EqualsComparer{eWithCase} (...)
1372 *
1373 * \note For reasons I don't understand, the plain function declaration of operator== and requires not do appear to be required
1374 * on all major compilers due to quirks of the operator= rewrite rules, but I don't fully understand why --LGP 2024-11-18.
1375 */
1376 nonvirtual bool operator== (const String& rhs) const;
1377 template <IConvertibleToString T>
1378 nonvirtual bool operator== (T&& rhs) const
1379 requires (not same_as<remove_cvref_t<T>, String>);
1380
1381 public:
1382 /**
1383 * Return strong_ordering of case sensitive (three-way) compare of the two IConvertibleToString objects.
1384 * Indirects to ThreeWayComparer{eWithCase} (...)
1385 *
1386 * \see operator== ()
1387 */
1388 nonvirtual strong_ordering operator<=> (const String& rhs) const;
1389 template <IConvertibleToString T>
1390 nonvirtual strong_ordering operator<=> (T&& rhs) const
1391 requires (not same_as<remove_cvref_t<T>, String>);
1392
1393 public:
1394 /**
1395 * @aliases basic_string>char>::npos
1396 *
1397 * This is only used for 'STL-compatibility APIs, like substr (), find, rfind (), etc.
1398 */
1399 static constexpr size_t npos = static_cast<size_t> (-1);
1400
1401 public:
1402 /**
1403 * @aliases size
1404 */
1405 nonvirtual size_t length () const noexcept;
1406
1407 public:
1408 /**
1409 * \note BREAKING change between Stroika 2.1 and v3 - const c_str/0 no longer guaranteed to return non-null
1410 *
1411 * Mitigating this, the non-const c_str() still will return non-null, and the const overload taking
1412 * StackBuffer<wchar_t> will also guarantee returning non-null.
1413 *
1414 * In the case of the overloads taking no arguments, the lifetime of the returned pointer is until the
1415 * next change to this string. In the case of the StackBuffer overload, the guarantee extends for the lifetime
1416 * of the argument buffer (typically just the next few lines of code).
1417 *
1418 * This will always return a value which is NUL-terminated.
1419 *
1420 * Note also - the c_str () function CAN now be somewhat EXPENSIVE, causing a mutation of the String object, so use
1421 * one of the const overloads where possible (or where performance matters).
1422 *
1423 * \note Why does c_str (StackBuffer) return a tuple?
1424 * Sometimes you just want a plain const wchar_t* you can use with an old C pointer based API. But that
1425 * fails/asserts out if you happen to have an empty string and try to get the pointer. Sometimes - you just need
1426 * the pointer!
1427 *
1428 * And why the string-view part? Because sometimes you want the LENGTH. Sure - you can just compute it again. But
1429 * that is costly. Sure you can just use the original string length. BUT THAT WOULD BE A BUG once I support
1430 * surrogates properly (at least on windows where wchar_t isn't char32_t).
1431 */
1432 nonvirtual tuple<const wchar_t*, wstring_view> c_str (Memory::StackBuffer<wchar_t>* possibleBackingStore) const;
1433
1434 public:
1435 /**
1436 * Follow the basic_string<>::find () API
1437 *
1438 * need more overloads.
1439 *
1440 * Returns String::npos if not found, else the zero based index.
1441 */
1442 nonvirtual size_t find (Character c, size_t startAt = 0) const;
1443 nonvirtual size_t find (const String& s, size_t startAt = 0) const;
1444
1445 public:
1446 /**
1447 * Follow the basic_string<>::rfind () API
1448 *
1449 * need more overloads.
1450 *
1451 * Returns String::npos if not found, else the zero based index.
1452 */
1453 nonvirtual size_t rfind (Character c) const;
1454
1455 public:
1456 /**
1457 * mimic https://en.cppreference.com/w/cpp/string/basic_string/front
1458 *
1459 * \pre not empty ()
1460 */
1461 nonvirtual Character front () const;
1462
1463 public:
1464 /**
1465 * mimic https://en.cppreference.com/w/cpp/string/basic_string/back
1466 *
1467 * \pre not empty ()
1468 */
1469 nonvirtual Character back () const;
1470
1471 public:
1472 /**
1473 * Compatable with STL::basic_string::subtr() - which interprets second argument as count. Not the same
1474 * as Stroika::String::SubString (where the second argument is a 'to')
1475 *
1476 * @see SubString
1477 *
1478 * From http://en.cppreference.com/w/cpp/string/basic_string/substr
1479 * Returns a substring [pos, pos+count). If the requested substring extends
1480 * past the end of the string, or if count == npos, the returned substring is [pos, size()).
1481 * std::out_of_range if pos > size()
1482 */
1483 nonvirtual String substr (size_t from, size_t count = npos) const;
1484
1485 public:
1486 ///////////////// DEPRECATED FUNCTIONS /////////////////
1487 [[deprecated ("Since Stroika v3.0d13 - if you must use c_str() - use the overload taking StackBuffer arg), or use As<wstring> "
1488 "().c_str ()")]] const wchar_t*
1489 c_str ();
1490 [[deprecated ("Since Stroika v3.0d12 use StringBuilder::SetAt")]] void SetCharAt (Character c, size_t i);
1491 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void erase (size_t from = 0);
1492 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void erase (size_t from, size_t count);
1493 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void push_back (wchar_t c);
1494 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void push_back (Character c);
1495 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void Append (Character c);
1496 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void Append (const String& s);
1497 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void Append (const wchar_t* s);
1498 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void Append (const wchar_t* from, const wchar_t* to);
1499 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void Append (const Character* from, const Character* to);
1500 template <typename CHAR_T>
1501 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] void Append (span<const CHAR_T> s)
1502 requires (same_as<CHAR_T, Character> or same_as<CHAR_T, char32_t>);
1503 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] String& operator+= (Character appendage);
1504 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] String& operator+= (const String& appendage);
1505 [[deprecated ("Since Stroika v3.0d12 use StringBuilder")]] String& operator+= (const wchar_t* appendageCStr);
1506 [[deprecated ("Since Stroika v3.0d12 - just use a b String{}")]] void clear ()
1507 {
1508 *this = String{};
1509 }
1510 template <typename T>
1511 [[deprecated ("Since Stroika v3.0d2, just use 0 arg version)")]] void As (T* into) const
1512 requires (IBasicUNICODEStdString<T> or same_as<T, String>)
1513 {
1514 *into = this->As<T> ();
1515 }
1516 [[deprecated ("Since Stroika v3.0d2, just use 1 arg version)")]] void AsNarrowString (const locale& l, string* into) const
1517 {
1518 *into = this->AsNarrowString (l);
1519 }
1520 template <typename T = u8string>
1521 [[deprecated ("Since Stroika v3.0d2 - use AsUTF8/0")]] void AsUTF8 (T* into) const
1522 requires (same_as<T, string> or same_as<T, u8string>)
1523 {
1524 *into = this->AsUTF8 ();
1525 }
1526 template <typename T = u16string>
1527 [[deprecated ("Since Stroika v3.0d2 - use AsUTF16/0")]] void AsUTF16 (T* into) const
1528 requires (same_as<T, u16string> or (sizeof (wchar_t) == sizeof (char16_t) and same_as<T, wstring>))
1529 {
1530 *into = AsUTF16 ();
1531 }
1532 template <typename T = u32string>
1533 [[deprecated ("Since Stroika v3.0d2 - use AsUTF32/0")]] void AsUTF32 (T* into) const
1534 requires (same_as<T, u32string> or (sizeof (wchar_t) == sizeof (char32_t) and same_as<T, wstring>))
1535 {
1536 *into = AsUTF32 ();
1537 }
1538 [[deprecated ("Since Stroika v3.0d2 - just use /0")]] void AsSDKString (SDKString* into) const
1539 {
1540 *into = AsSDKString ();
1541 }
1542 [[deprecated ("Since Stroika v3.0d2 - just use /0")]] void AsNarrowSDKString (string* into) const
1543 {
1544 *into = SDK2Narrow (AsSDKString ());
1545 }
1546 template <typename T = string>
1547 [[deprecated ("Since v3.0d2 use /0")]] void AsASCII (T* into) const
1548 requires (same_as<T, string> or same_as<T, Memory::StackBuffer<char>>)
1549 {
1550 if (not AsASCIIQuietly (into)) {
1551 ThrowInvalidAsciiException_ ();
1552 }
1553 }
1554 template <typename T = string>
1555 [[deprecated ("Since v3.0d2 use /0 overload")]] bool AsASCIIQuietly (T* into) const
1556 requires (same_as<T, string> or same_as<T, Memory::StackBuffer<char>>)
1557 {
1558 auto r = this->AsASCIIQuietly ();
1559 if (r) {
1560 *into = *r;
1561 return true;
1562 }
1563 else {
1564 return false;
1565 }
1566 }
1567 [[deprecated ("Since Stroika v3.0d5 use StringShorteningPreference argument")]] String LimitLength (size_t maxLen, bool keepLeft) const
1568 {
1569 return LimitLength (maxLen, keepLeft ? StringShorteningPreference::ePreferKeepLeft : StringShorteningPreference::ePreferKeepRight);
1570 }
1571 [[deprecated ("Since Stroika v3.0d5 use StringShorteningPreference argument")]] String LimitLength (size_t maxLen, bool keepLeft,
1572 const String& ellipsis) const
1573 {
1574 return LimitLength (maxLen, keepLeft ? StringShorteningPreference::ePreferKeepLeft : StringShorteningPreference::ePreferKeepRight, ellipsis);
1575 }
1576 template <typename CHAR_T>
1577 [[deprecated ("Since Stroika v3.0d1, String{}")]] static String FromASCII (span<const CHAR_T> s)
1578 {
1579 return String{s};
1580 }
1581 template <typename CHAR_T>
1582 [[deprecated ("Since Stroika v3.0d1, String{}")]] static String FromASCII (const CHAR_T* cString)
1583 {
1584 return String{cString};
1585 }
1586 template <IStdBasicStringCompatibleCharacter CHAR_T>
1587 [[deprecated ("Since Stroika v3.0d1, String{}")]] static String FromASCII (const basic_string<CHAR_T>& str)
1588 {
1589 return String{str};
1590 }
1591 [[deprecated ("Since Stroika v3.0d1, use span{} overload for this")]] static String FromASCII (const char* from, const char* to)
1592 {
1593 return String{span{from, to}};
1594 }
1595 [[deprecated ("Since Stroika v3.0d1, use span{} overload for this")]] static String FromASCII (const wchar_t* from, const wchar_t* to)
1596 {
1597 return String{span{from, to}};
1598 }
1599 [[deprecated ("Since Stroika v3.0d1, use span overloads")]] String InsertAt (const wchar_t* from, const wchar_t* to, size_t at) const
1600 {
1601 Memory::StackBuffer<Character> buf{Memory::eUninitialized, UTFConvert::ComputeTargetBufferSize<Character> (span{from, to})};
1602 return InsertAt (UTFConvert::kThe.ConvertSpan (span{from, to}, span{buf}), at);
1603 }
1604 [[deprecated ("Since Stroika v3.0d1, use span overloads")]] String InsertAt (const Character* from, const Character* to, size_t at) const
1605 {
1606 return InsertAt (span{from, to}, at);
1607 }
1608 [[deprecated ("Since Stroika v3.0d1, use span{} overload for this")]] static String FromLatin1 (const char* start, const char* end)
1609 {
1610 return FromLatin1 (span{start, end});
1611 }
1612 [[deprecated ("Since Stroika v3.0d1, use span{} constructor for this")]] static String FromNarrowString (const char* from,
1613 const char* to, const locale& l)
1614 {
1615 return FromNarrowString (span{from, to}, l);
1616 }
1617 [[deprecated ("Since Stroika v3.0d1, use span{} constructor for this")]] static String FromNarrowSDKString (const char* from, const char* to)
1618 {
1619 return FromNarrowSDKString (span{from, to});
1620 }
1621 template <IUNICODECanAlwaysConvertTo CHAR_T>
1622 [[deprecated ("Since Stroika v3.0d1, use span{} constructor for this")]] String (const CHAR_T* from, const CHAR_T* to)
1623 : String{span<const CHAR_T>{from, to}}
1624 {
1625 }
1626 [[deprecated (
1627 "Since Stroika v3.0d1 - use As<wstring> ().c_str () or other c_str() overload (*UNSAFE TO USE*)")]] nonvirtual const wchar_t*
1628 c_str () const noexcept;
1629 [[deprecated ("Since Stroika v3.0 - use span{} overloads")]] inline static String FromSDKString (const SDKChar* from, const SDKChar* to)
1630 {
1631 return FromSDKString (span{from, to});
1632 }
1633 [[deprecated ("Since Stroika v3.0 - use span{} overloads")]] static String FromUTF8 (const char* from, const char* to)
1634 {
1635 return FromUTF8 (span{from, to});
1636 }
1637 [[deprecated ("Since Stroika v3.0 - use span{} overloads")]] static String FromUTF8 (const char8_t* from, const char8_t* to)
1638 {
1639 return FromUTF8 (span{from, to});
1640 }
1641 template <typename T = string>
1642 [[deprecated ("Since Stroika v3.0d1 - use Character::AsAsciiQuietly")]] static bool AsASCIIQuietly (const wchar_t* fromStart,
1643 const wchar_t* fromEnd, T* into)
1644 {
1645 return Character::AsASCIIQuietly (span<const wchar_t>{fromStart, fromEnd}, into);
1646 }
1647 [[deprecated (
1648 "Since Stroika v3.0d1 due to http://stroika-bugs.sophists.com/browse/STK-965 - NOT IMPLEMENTED")]] nonvirtual const wchar_t*
1649 data () const;
1650 [[deprecated ("Since Stroika v3.0d8 - use RemoveFirstIf")]] String Remove (Character c) const
1651 {
1652 return RemoveFirstIf (c);
1653 }
1654 [[deprecated ("Since Stroika v3.0d8 - use RemoveFirstIf")]] String Remove (const String& subString) const
1655 {
1656 return RemoveFirstIf (subString);
1657 }
1658
1659 private:
1660 static shared_ptr<_IRep> mkEmpty_ ();
1661
1662 private:
1663 /**
1664 * If the argument CHAR_T is restrictive (such as ASCII/char) - this CHECKS and THROWS (Character::CheckASCII).
1665 * This function also reads the data, and sees if it can downshift 'CHAR_T' to something more restrictive, and produces
1666 * a possibly smaller rep.
1667 *
1668 * For some overloads (e..g && move) - the data is 'stolen/moved'.
1669 *
1670 * See mk_nocheck_ for a simpler - DO WHAT I SAID - operation.
1671 */
1672 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
1673 static shared_ptr<_IRep> mk_ (span<const CHAR_T> s);
1674 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
1675 static shared_ptr<_IRep> mk_ (Iterable<CHAR_T> it);
1676 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
1677 static shared_ptr<_IRep> mk_ (span<CHAR_T> s);
1678 template <IStdBasicStringCompatibleCharacter CHAR_T>
1679 static shared_ptr<_IRep> mk_ (basic_string<CHAR_T>&& s);
1680
1681 private:
1682 /*
1683 * Note the mk_nocheck_ - just does the mk of the buffer, but assuming the arguments are legit and will fit (though it may
1684 * assert in DEBUG builds this is true).
1685 *
1686 * This just blindly allocates the buffer of the given size/type for the given arguments.
1687 */
1688 template <typename CHAR_T>
1689 static shared_ptr<_IRep> mk_nocheck_ (span<const CHAR_T> s)
1690 requires (same_as<CHAR_T, ASCII> or same_as<CHAR_T, Latin1> or same_as<CHAR_T, char16_t> or same_as<CHAR_T, char32_t>);
1691
1692 private:
1693 template <unsigned_integral T>
1694 nonvirtual size_t SubString_adjust_ (T fromOrTo, size_t myLength) const;
1695 template <signed_integral T>
1696 nonvirtual size_t SubString_adjust_ (T fromOrTo, size_t myLength) const;
1697
1698 private:
1699 nonvirtual String SubString_ (const _SafeReadRepAccessor& thisAccessor, size_t from, size_t to) const;
1700
1701 protected:
1702 nonvirtual void _AssertRepValidType () const;
1703
1704 private:
1705 [[noreturn]] static void ThrowInvalidAsciiException_ (); // avoid include
1706 };
1707 static_assert (totally_ordered<String>);
1708
1709#if qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin
1710 template <>
1711 std::filesystem::path String::As<std::filesystem::path> () const;
1712#endif
1713
1714 /**
1715 * operator<< ostream adapters work as you would expect and allow writing Stroika strings easily to ostreams such as cout.
1716 *
1717 * \note uses AsNarrowSDKString (eIgnoreErrors)
1718 *
1719 * The only catch - is that Stroika strings are UNICODE based, and so may not fit perfectly with 'char' based basic_ostream<>.
1720 * To address this, Stroika strings are mapped to 'narrow sdk strings' - ignoring any errors. As this is generally not a very
1721 * good practice to do (lossy) - and generally just done for debugging/diagnostic output, this was deemed acceptable (as of Stroika v3.0d6).
1722 */
1723 wostream& operator<< (wostream& out, const String& s);
1724 ostream& operator<< (ostream& out, const String& s);
1725
1726#if qStroika_HasComponent_googletest
1727 // For googletest compatibility
1728 void PrintTo (const String& s, std::ostream* os);
1729#endif
1730
1731 /**
1732 * Protected helper Rep class.
1733 *
1734 * \note Important design note - String reps are IMMUTABLE. Changes to string like +=, create new string reps (so costly).
1735 * Use StringBuilder for that purpose in performance sensitive code.
1736 */
1737 class String::_IRep : public Iterable<Character>::_IRep {
1738 public:
1739 /**
1740 * Return the ith character in the string.
1741 */
1742 virtual Character GetAt (size_t index) const noexcept = 0;
1743
1744 public:
1745 /**
1746 * Each rep will support a span of at least one code-point type (ascii, utf8, utf16, or utf32)
1747 *
1748 * This API is guaranteed to support a span of at least one of these types (maybe more). The caller may
1749 * specify the code-point type preferred.
1750 */
1751 virtual PeekSpanData PeekData ([[maybe_unused]] optional<PeekSpanData::StorageCodePointType> preferred) const noexcept = 0;
1752
1753 public:
1754 /*
1755 * Return a pointer to mostly standard (wide, nul-terminated) C string,
1756 * whose lifetime extends to the next non-const call on this rep, or nullptr.
1757 *
1758 * It is only 'mostly' standard because it is allowed to have nul-chars embedded in it. But it will
1759 * always have str[len] == 0;
1760 *
1761 * \note Since Stroika v3.0d1, this can return nullptr (in which case the String library will allocate a new backend)
1762 *
1763 * \post returnResult == nullptr or returnResult[len] == '\0';
1764 */
1765 virtual const wchar_t* c_str_peek () const noexcept = 0;
1766
1767 private:
1768 friend class String;
1769 };
1770
1771 // Some some docs/testing...
1772 static_assert (not IConvertibleToString<int>);
1773 static_assert (not IConvertibleToString<char>); // would have been sensible to allow, but easily generates confusing results: cuz that means String x = 3 would work; confusing with ovarloads)
1774 static_assert (IConvertibleToString<string>);
1775 static_assert (IConvertibleToString<wstring>);
1776 static_assert (IConvertibleToString<u8string>);
1777 static_assert (IConvertibleToString<u16string>);
1778 static_assert (IConvertibleToString<u32string>);
1779 static_assert (not IConvertibleToString<optional<String>>);
1780
1781 namespace Private_ {
1782 // This is just anything that can be treated as a 'span<const Character>'
1783 // clang-format off
1784 template <typename T>
1785 concept ICanBeTreatedAsSpanOfCharacter_ =
1786 derived_from<remove_cvref_t<T>, String>
1787 or same_as<remove_cvref_t<T>, u8string>
1788 or same_as<remove_cvref_t<T>, u8string_view>
1789 or same_as<remove_cvref_t<T>, u16string>
1790 or same_as<remove_cvref_t<T>, u16string_view>
1791 or same_as<remove_cvref_t<T>, u32string>
1792 or same_as<remove_cvref_t<T>, u32string_view>
1793 or same_as<remove_cvref_t<T>, wstring>
1794 or same_as<remove_cvref_t<T>, wstring_view>
1795 or same_as<remove_cvref_t<T>, const Character*>
1796 or same_as<remove_cvref_t<T>, const char8_t*>
1797 or same_as<remove_cvref_t<T>, const char16_t*>
1798 or same_as<remove_cvref_t<T>, const char32_t*>
1799 or same_as<remove_cvref_t<T>, const wchar_t*>
1800 ;
1801 // clang-format on
1802
1803 template <ICanBeTreatedAsSpanOfCharacter_ USTRING, size_t STACK_BUFFER_SZ>
1804 span<const Character> AsSpanOfCharacters_ (USTRING&& s, Memory::StackBuffer<Character, STACK_BUFFER_SZ>* mostlyIgnoredBuf);
1805 }
1806
1807 /**
1808 *
1809 * \par Example Usage
1810 * \code
1811 * constexpr String::EqualsComparer kStringCIComparer_ {Characters::CompareOptions::eCaseInsensitive};
1812 * if (kStringCIComparer_ (filename.extension (), ".HFCC"sv)) {
1813 * compiledName = filename;
1814 * }
1815 * \endcode
1816 *
1817 * \note There is no String::Equals() method, because it would look queer if it took one string argument, and if it was static
1818 * it would essentially look like the above comparer, so little point.
1819 */
1820 struct String::EqualsComparer : Common::ComparisonRelationDeclarationBase<Common::ComparisonRelationType::eEquals> {
1821 /**
1822 * optional CompareOptions to CTOR allows for case insensitive compares
1823 */
1824 constexpr EqualsComparer (CompareOptions co = eWithCase);
1825
1826 /**
1827 * Extra overloads a slight performance improvement
1828 */
1829 template <IConvertibleToString LT, IConvertibleToString RT>
1830 nonvirtual bool operator() (LT&& lhs, RT&& rhs) const;
1831
1832 CompareOptions fCompareOptions;
1833
1834 private:
1835 template <Private_::ICanBeTreatedAsSpanOfCharacter_ LT, Private_::ICanBeTreatedAsSpanOfCharacter_ RT>
1836 bool Cmp_ (LT&& lhs, RT&& rhs) const;
1837 template <Private_::ICanBeTreatedAsSpanOfCharacter_ LT, Private_::ICanBeTreatedAsSpanOfCharacter_ RT>
1838 bool Cmp_Generic_ (LT&& lhs, RT&& rhs) const;
1839 };
1840
1841 /**
1842 */
1843 struct String::ThreeWayComparer : Common::ComparisonRelationDeclarationBase<Common::ComparisonRelationType::eThreeWayCompare> {
1844 /**
1845 * optional CompareOptions to CTOR allows for case insensitive compares
1846 */
1847 constexpr ThreeWayComparer (CompareOptions co = eWithCase);
1848
1849 /**
1850 * Extra overloads a slight performance improvement
1851 */
1852 template <IConvertibleToString LT, IConvertibleToString RT>
1853 nonvirtual strong_ordering operator() (LT&& lhs, RT&& rhs) const;
1854
1855 CompareOptions fCompareOptions;
1856
1857 private:
1858 template <Private_::ICanBeTreatedAsSpanOfCharacter_ LT, Private_::ICanBeTreatedAsSpanOfCharacter_ RT>
1859 strong_ordering Cmp_ (LT&& lhs, RT&& rhs) const;
1860 template <Private_::ICanBeTreatedAsSpanOfCharacter_ LT, Private_::ICanBeTreatedAsSpanOfCharacter_ RT>
1861 strong_ordering Cmp_Generic_ (LT&& lhs, RT&& rhs) const;
1862 };
1863
1864 /**
1865 * \brief very similar to ThreeWayComparer but returns true if less
1866 */
1867 struct String::LessComparer : Common::ComparisonRelationDeclarationBase<Common::ComparisonRelationType::eStrictInOrder> {
1868 constexpr LessComparer (CompareOptions co = eWithCase);
1869
1870 template <typename T1, typename T2>
1871 nonvirtual bool operator() (T1 lhs, T2 rhs) const;
1872
1873 private:
1874 ThreeWayComparer fComparer_;
1875 };
1877
1878 inline namespace Literals {
1879 /**
1880 * \brief shorthand for String::FromStringConstant { ARGUMENT }
1881 *
1882 * \par Example:
1883 * \code
1884 * String s1 = "some-string"_k;
1885 * String s2 = String::FromStringConstant ("some-string");
1886 * String s3 = "some-string"sv; // in most cases this will also work fine, and is preferable (since sv is part of C++ standard)
1887 * \endcode
1888 *
1889 * \note _k is STILL sometimes useful and better than sv, since the TYPE returned by _k is a String_Constant which IS a String
1890 * so it will work in some overload contexts where sv would fail.
1891 *
1892 * \note operator""_k with char*, requires that the argument string MUST BE ASCII (someday maybe lifted to allow Latin1)
1893 */
1894 String operator""_k (const ASCII* s, size_t len);
1895 String operator""_k (const wchar_t* s, size_t len);
1896 String operator""_k (const char8_t* s, size_t len);
1897 String operator""_k (const char16_t* s, size_t len);
1898 String operator""_k (const char32_t* s, size_t len);
1899 }
1900
1901 /**
1902 * Basic operator overload with the obvious meaning, and simply indirect to @String::Concatenate (const String& rhs)
1903 *
1904 * \note Design Note
1905 * Don't use member function so "x" + String{u"x"} works.
1906 * Insist that EITHER LHS or RHS is a string (else operator applies too widely).
1907 *
1908 * Both arguments must be convertible to a String, and at least must be String or derived from String
1909 */
1910 template <IConvertibleToString LHS_T, IConvertibleToString RHS_T>
1911 String operator+ (LHS_T&& lhs, RHS_T&& rhs)
1912 requires (derived_from<remove_cvref_t<LHS_T>, String> or derived_from<remove_cvref_t<RHS_T>, String>);
1913
1914 /**
1915 * \brief StringCombiner is a simple function object used to combine two strings visually - used in Iterable<>::Join ()
1916 *
1917 * This can combine strings in the obvious way (concatenation) - but defaults to separating them with a comma (', ').
1918 *
1919 * \note the functional api - is to be given two strings, and a flag saying if the combination is the last one in the list,
1920 * since in English, this is frequently rendered somewhat differently than the rest.
1921 */
1922 template <typename STRING = String>
1924 STRING fSeparator{", "sv};
1925 optional<STRING> fSpecialSeparatorForLastPair;
1926 STRING operator() (const STRING& lhs, const STRING& rhs, bool isLast) const;
1927 };
1928
1929 /**
1930 * kDefaultStringCombiner is just StringCombiner{}, rendered as a function object, so that it can be externed/imported
1931 * in the Iterable code without imposing a dependency on the String code.
1932 */
1933 extern const function<String (String, String, bool)> kDefaultStringCombiner;
1934
1935}
1936
1937namespace Stroika::Foundation::Traversal {
1938 // specialized as performance optimization
1939 template <>
1940 Characters::String Iterable<Characters::String>::Join (const Characters::String& separator, const optional<Characters::String>& finalSeparator) const;
1941}
1942
1943namespace std {
1944 template <>
1945 struct hash<Stroika::Foundation::Characters::String> {
1946 size_t operator() (const Stroika::Foundation::Characters::String& arg) const;
1947 };
1948}
1949
1950namespace Stroika::Foundation::Memory {
1951 class BLOB; // Forward declare to avoid mutual include issues
1952}
1953
1955 template <typename T>
1956 struct DefaultSerializer; // Forward declare to avoid mutual include issues
1957 template <>
1959 Memory::BLOB operator() (const Stroika::Foundation::Characters::String& arg) const;
1960 };
1961}
1962
1963/**
1964 * Allow std::format to work with String class
1965 *
1966 * \note SUPER PRIMITIVE ROUGH FIRST DRAFT
1967 */
1968template <>
1969struct qStroika_Foundation_Characters_FMT_PREFIX_::formatter<Stroika::Foundation::Characters::String, wchar_t> {
1970 qStroika_Foundation_Characters_FMT_PREFIX_::formatter<std::wstring, wchar_t> fDelegate2_;
1971
1972 template <typename ParseContext>
1973 constexpr typename ParseContext::iterator parse (ParseContext& ctx)
1974 {
1975 return fDelegate2_.parse (ctx);
1976 }
1977
1978 template <typename FmtContext>
1979 typename FmtContext::iterator format (Stroika::Foundation::Characters::String s, FmtContext& ctx) const
1980 {
1981 return fDelegate2_.format (s.As<std::wstring> (), ctx);
1982 }
1983};
1984template <>
1985struct qStroika_Foundation_Characters_FMT_PREFIX_::formatter<Stroika::Foundation::Characters::String, char> {
1986 bool ignoreerrors{true}; // maybe set from thread-local variable, or parse() settings, or both
1987
1988 template <typename ParseContext>
1989 constexpr typename ParseContext::iterator parse (ParseContext& ctx)
1990 {
1991 auto it = ctx.begin ();
1992 while (it != ctx.end ()) {
1993 ++it;
1994#if 0
1995 if (it == ctx.end()) {
1996 throw Common::StdCompat::format_error{"Invalid format args (missing }) for formatter<String,char>."};
1997 }
1998#endif
1999 if (*it == '}') {
2000 return it;
2001 }
2002 }
2003 return it;
2004 }
2005
2006 template <typename FmtContext>
2007 typename FmtContext::iterator format (Stroika::Foundation::Characters::String s, FmtContext& ctx) const
2008 {
2009 using namespace Stroika::Foundation::Characters;
2010 // wformat_context delegateCTX;
2011 String dr{s}; // really want to delegate to wchar_t version (with vformat) but no documented easy way to extract format_args from ctx (though its in there)
2012 if (ignoreerrors) {
2013#if __cpp_lib_ranges >= 202207L
2014 return std::ranges::copy (dr.AsNarrowSDKString (eIgnoreErrors), ctx.out ()).out;
2015#else
2016 return format_to (ctx.out (), "{}", dr.AsNarrowSDKString (eIgnoreErrors));
2017#endif
2018 }
2019 else {
2020#if __cpp_lib_ranges >= 202207L
2021 return std::ranges::copy (dr.AsNarrowSDKString (), ctx.out ()).out;
2022#else
2023 return format_to (ctx.out (), "{}", dr.AsNarrowSDKString ());
2024#endif
2025 }
2026 }
2027};
2028
2029/*
2030 ********************************************************************************
2031 ***************************** Implementation Details ***************************
2032 ********************************************************************************
2033 */
2034#include "String.inl"
2035
2036#endif /*_Stroika_Foundation_Characters_String_h_*/
#define Stroika_Define_Enum_Bounds(FIRST_ITEM, LAST_ITEM)
constexpr bool IsWhitespace() const noexcept
RegularExpression is a compiled regular expression which can be used to match on a String class.
virtual Character GetAt(size_t index) const noexcept=0
virtual PeekSpanData PeekData(optional< PeekSpanData::StorageCodePointType > preferred) const noexcept=0
Similar to String, but intended to more efficiently construct a String. Mutable type (String is large...
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
nonvirtual bool Contains(Character c, CompareOptions co=eWithCase) const
Definition String.inl:697
nonvirtual size_t length() const noexcept
Definition String.inl:1051
nonvirtual String ToUpperCase() const
Definition String.cpp:1744
static String FromNarrowString(const char *from, const locale &l)
Definition String.inl:340
nonvirtual bool Matches(const RegularExpression &regEx) const
Definition String.cpp:1133
nonvirtual bool IsWhitespace() const
Definition String.cpp:1782
nonvirtual String NormalizeTextToNL() const
Definition String.cpp:1201
static String Join(const Iterable< String > &list, const String &separator=", "sv)
Definition String.cpp:1692
static String FromStringConstant(const CHAR_T(&cString)[SIZE])
Take the given argument data (constant span) - which must remain unchanged - constant - for the appli...
Definition String.inl:386
nonvirtual String ColValue(size_t i, const String &valueIfMissing={}) const
see Col(i) - but with default value of empty string
Definition String.inl:715
nonvirtual String NormalizeSpace(Character useSpaceCharacter=' ') const
Replace sequences of whitespace characters (space, tab, newline etc) with a single space (or argument...
Definition String.cpp:1229
nonvirtual Containers::Sequence< pair< size_t, size_t > > FindEach(const RegularExpression &regEx) const
Definition String.cpp:966
nonvirtual tuple< const wchar_t *, wstring_view > c_str(Memory::StackBuffer< wchar_t > *possibleBackingStore) const
Definition String.inl:1055
nonvirtual bool operator==(const String &rhs) const
Definition String.inl:1115
nonvirtual String Repeat(unsigned int count) const
Definition String.cpp:1424
static String FromSDKString(const SDKChar *from)
Definition String.inl:447
nonvirtual String LimitLength(size_t maxLen, StringShorteningPreference keepPref=StringShorteningPreference::ePreferKeepLeft) const
return the first maxLen (or fewer if string shorter) characters of this string (adding ellipsis if tr...
Definition String.inl:745
nonvirtual String RemoveAll(Character c) const
Definition String.cpp:823
nonvirtual Containers::Sequence< RegularExpressionMatch > FindEachMatch(const RegularExpression &regEx) const
Definition String.cpp:984
nonvirtual String RemoveFirstIf(Character c) const
Definition String.cpp:807
nonvirtual string AsNarrowSDKString() const
Definition String.inl:834
nonvirtual optional< String > Col(size_t i) const
Useful to replace 'awk print $3' - replace with Col(2) - zero based.
Definition String.cpp:1362
nonvirtual String InsertAt(Character c, size_t at) const
Definition String.inl:719
nonvirtual size_t rfind(Character c) const
Definition String.inl:1075
static String FromNarrowSDKString(const char *from)
Definition String.inl:470
nonvirtual string AsNarrowString(const locale &l) const
Definition String.cpp:1838
nonvirtual String Concatenate(T &&rhs) const
appends 'rhs' string to this string (without modifying this string) and returns the combined string
nonvirtual SDKString AsSDKString() const
Definition String.inl:806
nonvirtual size_t size() const noexcept
Definition String.inl:534
nonvirtual bool EndsWith(const Character &c, CompareOptions co=eWithCase) const
Definition String.cpp:1088
nonvirtual String ToLowerCase() const
Definition String.cpp:1706
nonvirtual String ReplaceAll(const RegularExpression &regEx, const String &with) const
Definition String.cpp:1155
static constexpr size_t npos
Definition String.h:1399
nonvirtual String Replace(size_t from, size_t to, const String &replacement) const
Definition String.cpp:1045
nonvirtual String SubString(SZ from) const
nonvirtual String Trim(bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
Definition String.cpp:1592
nonvirtual strong_ordering operator<=>(const String &rhs) const
Definition String.inl:1105
nonvirtual Character back() const
Definition String.inl:1079
nonvirtual bool StartsWith(const Character &c, CompareOptions co=eWithCase) const
Definition String.cpp:1059
nonvirtual String StripAll(bool(*removeCharIf)(Character)) const
Definition String.cpp:1664
nonvirtual String AssureEndsWith(const Character &c, CompareOptions co=eWithCase) const
Return *this if it ends with argument character, or append 'c' so that it ends with a 'c'.
Definition String.cpp:1123
nonvirtual span< CHAR_T > CopyTo(span< CHAR_T > s) const
nonvirtual PeekSpanData GetPeekSpanData() const
return the constant character data inside the string in the form of a case variant union of different...
nonvirtual String SafeSubString(SZ from) const
nonvirtual Containers::Sequence< String > AsLines() const
break the String into a series of lines;
Definition String.cpp:1306
nonvirtual String LTrim(bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
Definition String.cpp:1443
nonvirtual Character front() const
Definition String.inl:1086
nonvirtual Containers::Sequence< String > Grep(const String &fgrepArg) const
Breaks this string into Lines, with AsLines (), and applies the argument filter (as if with ....
Definition String.cpp:1341
nonvirtual Containers::Sequence< String > FindEachString(const RegularExpression &regEx) const
Definition String.cpp:1001
nonvirtual String Skip(size_t n) const
Return a substring of this string, starting at 'argument' n. If n > size(), return empty string.
Definition String.inl:604
nonvirtual optional< size_t > RFind(Character c) const noexcept
Definition String.cpp:1011
static span< const CHAR_TYPE > GetData(const PeekSpanData &pds, Memory::StackBuffer< CHAR_TYPE, STACK_BUFFER_SZ > *possiblyUsedBuffer)
return the constant character data inside the string (rep) in the form of a span, possibly quickly an...
Definition String.inl:967
nonvirtual Containers::Sequence< String > Tokenize() const
Definition String.cpp:1234
nonvirtual String RemoveAt(size_t charAt) const
Definition String.inl:608
nonvirtual optional< T > AsASCIIQuietly() const
static String FromLatin1(const CHAR_T *cString)
Definition String.inl:355
static optional< span< const CHAR_TYPE > > PeekData(const PeekSpanData &pds)
return the constant character data inside the string in the form of a span or nullopt if not availabl...
Definition String.inl:910
static String FromUTF8(span< CHAR_T > from)
Definition String.inl:420
nonvirtual String RTrim(bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
Definition String.cpp:1508
nonvirtual optional< size_t > Find(Character c, CompareOptions co=eWithCase) const
Definition String.inl:685
nonvirtual String substr(size_t from, size_t count=npos) const
Definition String.inl:1092
nonvirtual size_t find(Character c, size_t startAt=0) const
Definition String.inl:1067
Set<T> is a container of T, where once an item is added, additionally adds () do nothing.
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
Iterable<T> is a base class for containers which easily produce an Iterator<T> to traverse them.
Definition Iterable.h:237
Iterable(const Iterable &) noexcept=default
Iterable are safely copyable (by value). Since Iterable uses COW, this just copies the underlying poi...
returns true iff T == u8string, u16string, u32string, or wstring - which std::string types can be una...
Definition String.h:116
anything with a 'special .STRINGTYPE conversion' method to UNICODE string, such as filesystem::path
Definition String.h:124
IUNICODECanUnambiguouslyConvertFrom is any 'character representation type' where array of them unambi...
Definition Character.h:179
char ASCII
Stroika's string/character classes treat 'char' as being an ASCII character.
Definition Character.h:59
conditional_t< qTargetPlatformSDKUseswchar_t, wchar_t, char > SDKChar
Definition SDKChar.h:71
basic_string< SDKChar > SDKString
Definition SDKString.h:38
String operator+(LHS_T &&lhs, RHS_T &&rhs)
Definition String.inl:1294
const function< String(String, String, bool)> kDefaultStringCombiner
Definition String.inl:1319
wostream & operator<<(wostream &out, const String &s)
Definition String.cpp:2035
STL namespace.
very similar to ThreeWayComparer but returns true if less
Definition String.h:1867
Summary data for raw contents of rep - each rep will support at least one of these span forms.
Definition String.h:1270
StringCombiner is a simple function object used to combine two strings visually - used in Iterable<>:...
Definition String.h:1923
function object which serializes type T to a BLOB (or BLOB like) object