String is like std::u32string, except it is much easier to use, often much more space efficient, and more easily interoperates with other string types. More...
#include <String.h>
Classes | |
class | _IRep |
struct | EqualsComparer |
struct | LessComparer |
very similar to ThreeWayComparer but returns true if less More... | |
struct | PeekSpanData |
Summary data for raw contents of rep - each rep will support at least one of these span forms. More... | |
Public Member Functions | |
String () | |
template<typename T > requires (is_convertible_v<T, String>) | |
nonvirtual String | Concatenate (T &&rhs) const |
appends 'rhs' string to this string (without modifying this string) and returns the combined string | |
nonvirtual size_t | size () const noexcept |
nonvirtual const Character | operator[] (size_t i) const noexcept |
return (read-only) Character object | |
nonvirtual String | InsertAt (Character c, size_t at) const |
nonvirtual String | RemoveAt (size_t charAt) const |
nonvirtual String | RemoveFirstIf (Character c) const |
nonvirtual String | RemoveAll (Character c) const |
template<typename SZ > | |
nonvirtual String | SubString (SZ from) const |
template<typename SZ > | |
nonvirtual String | SafeSubString (SZ from) const |
nonvirtual String | Repeat (unsigned int count) const |
nonvirtual bool | Contains (Character c, CompareOptions co=eWithCase) const |
nonvirtual bool | StartsWith (const Character &c, CompareOptions co=eWithCase) const |
nonvirtual bool | EndsWith (const Character &c, CompareOptions co=eWithCase) const |
nonvirtual String | AssureEndsWith (const Character &c, CompareOptions co=eWithCase) const |
Return *this if it ends with argument character, or append 'c' so that it ends with a 'c'. | |
nonvirtual bool | Matches (const RegularExpression ®Ex) const |
nonvirtual optional< size_t > | Find (Character c, CompareOptions co=eWithCase) const |
nonvirtual Containers::Sequence< pair< size_t, size_t > > | FindEach (const RegularExpression ®Ex) const |
nonvirtual Containers::Sequence< RegularExpressionMatch > | FindEachMatch (const RegularExpression ®Ex) const |
nonvirtual Containers::Sequence< String > | FindEachString (const RegularExpression ®Ex) const |
nonvirtual optional< size_t > | RFind (Character c) const noexcept |
nonvirtual String | Replace (size_t from, size_t to, const String &replacement) const |
nonvirtual String | ReplaceAll (const RegularExpression ®Ex, const String &with) const |
nonvirtual String | NormalizeTextToNL () const |
nonvirtual String | NormalizeSpace (Character useSpaceCharacter=' ') const |
Replace sequences of whitespace characters (space, tab, newline etc) with a single space (or argument space character). | |
nonvirtual Containers::Sequence< String > | Tokenize () const |
nonvirtual Containers::Sequence< String > | AsLines () const |
break the String into a series of lines; | |
nonvirtual Containers::Sequence< String > | Grep (const String &fgrepArg) const |
Breaks this string into Lines, with AsLines (), and applies the argument filter (as if with .Map<>) producing a subset of the lines which match. | |
nonvirtual optional< String > | Col (size_t i) const |
Useful to replace 'awk print $3' - replace with Col(2) - zero based. | |
nonvirtual String | ColValue (size_t i, const String &valueIfMissing={}) const |
see Col(i) - but with default value of empty string | |
nonvirtual String | LTrim (bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const |
nonvirtual String | RTrim (bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const |
nonvirtual String | Trim (bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const |
nonvirtual String | StripAll (bool(*removeCharIf)(Character)) const |
nonvirtual String | ToLowerCase () const |
nonvirtual String | ToUpperCase () const |
nonvirtual bool | IsWhitespace () const |
nonvirtual String | LimitLength (size_t maxLen, StringShorteningPreference keepPref=StringShorteningPreference::ePreferKeepLeft) const |
return the first maxLen (or fewer if string shorter) characters of this string (adding ellipsis if truncated) | |
template<IUNICODECanAlwaysConvertTo CHAR_T> requires (not is_const_v<CHAR_T>) | |
nonvirtual span< CHAR_T > | CopyTo (span< CHAR_T > s) const |
template<typename T > requires (IBasicUNICODEStdString<T> or same_as<T, String> or constructible_from<T, wstring>) | |
nonvirtual T | As () const |
nonvirtual string | AsNarrowString (const locale &l) const |
template<typename T = u8string> requires (same_as<T, string> or same_as<T, u8string>) | |
nonvirtual T | AsUTF8 () const |
template<typename T = u16string> requires (same_as<T, u16string> or (sizeof (wchar_t) == sizeof (char16_t) and same_as<T, wstring>)) | |
nonvirtual T | AsUTF16 () const |
template<typename T = u32string> requires (same_as<T, u32string> or (sizeof (wchar_t) == sizeof (char32_t) and same_as<T, wstring>)) | |
nonvirtual T | AsUTF32 () const |
nonvirtual SDKString | AsSDKString () const |
nonvirtual string | AsNarrowSDKString () const |
template<typename T = string> requires requires (T* into) { { into->empty () } -> same_as<bool>; { into->push_back (ASCII{0}) }; } | |
nonvirtual T | AsASCII () const |
template<typename T = string> requires requires (T* into) { { into->empty () } -> same_as<bool>; { into->push_back (ASCII{0}) }; } | |
nonvirtual optional< T > | AsASCIIQuietly () const |
template<IUNICODECanUnambiguouslyConvertFrom CHAR_TYPE = ASCII> | |
nonvirtual PeekSpanData | GetPeekSpanData () const |
return the constant character data inside the string in the form of a case variant union of different span types (at least one will be there) templated type arg just used to pick a preferred type. | |
nonvirtual bool | operator== (const String &rhs) const |
nonvirtual strong_ordering | operator<=> (const String &rhs) const |
nonvirtual size_t | length () const noexcept |
nonvirtual tuple< const wchar_t *, wstring_view > | c_str (Memory::StackBuffer< wchar_t > *possibleBackingStore) const |
nonvirtual size_t | find (Character c, size_t startAt=0) const |
nonvirtual size_t | rfind (Character c) const |
nonvirtual Character | front () const |
nonvirtual Character | back () const |
nonvirtual String | substr (size_t from, size_t count=npos) const |
template<typename CHAR_T > requires (same_as<CHAR_T, ASCII> or same_as<CHAR_T, Latin1> or same_as<CHAR_T, char16_t> or same_as<CHAR_T, char32_t>) | |
auto | mk_nocheck_ (span< const CHAR_T > s) -> shared_ptr< _IRep > |
![]() | |
Iterable (const Iterable &) noexcept=default | |
Iterable are safely copyable (by value). Since Iterable uses COW, this just copies the underlying pointer and increments the reference count. | |
Iterable (Iterable &&) noexcept=default | |
Iterable are safely moveable. | |
Iterable (CONTAINER_OF_T &&from) | |
Iterable (const initializer_list< Character > &from) | |
nonvirtual | operator bool () const |
nonvirtual Iterator< Character > | MakeIterator () const |
Create an iterator object which can be used to traverse the 'Iterable'. | |
nonvirtual size_t | size () const |
Returns the number of items contained. | |
nonvirtual bool | empty () const |
Returns true iff size() == 0. | |
nonvirtual bool | Contains (ArgByValueType< Character > element, EQUALS_COMPARER &&equalsComparer=EQUALS_COMPARER{}) const |
nonvirtual Iterator< Character > | begin () const |
Support for ranged for, and STL syntax in general. | |
nonvirtual void | Apply (const function< void(ArgByValueType< Character > item)> &doToElement, Execution::SequencePolicy seq=Execution::SequencePolicy::eDEFAULT) const |
Run the argument function (or lambda) on each element of the container. | |
nonvirtual Iterator< Character > | Find (THAT_FUNCTION &&that, Execution::SequencePolicy seq=Execution::SequencePolicy::eDEFAULT) const |
Run the argument bool-returning function (or lambda) on each element of the container, and return an iterator pointing at the first element (depending on seq) found true. (or use First() to do same thing but return optional<>) | |
nonvirtual CONTAINER_OF_T | As (CONTAINER_OF_T_CONSTRUCTOR_ARGS... args) const |
nonvirtual Character | Nth (ptrdiff_t n) const |
Find the Nth element of the Iterable<> | |
nonvirtual Character | NthValue (ptrdiff_t n, ArgByValueType< Character > defaultValue={}) const |
Find the Nth element of the Iterable<>, but allow for n to be out of range, and just return argument default-value. | |
nonvirtual RESULT_CONTAINER | Where (INCLUDE_PREDICATE &&includeIfTrue) const |
produce a subset of this iterable where argument function returns true | |
nonvirtual Iterable< Character > | Distinct (EQUALS_COMPARER &&equalsComparer=EQUALS_COMPARER{}) const |
nonvirtual RESULT_CONTAINER | Map (ELEMENT_MAPPER &&elementMapper) const |
functional API which iterates over all members of an Iterable, applies a map function to each element, and collects the results in a new Iterable | |
nonvirtual optional< REDUCED_TYPE > | Reduce (const function< REDUCED_TYPE(ArgByValueType< Character >, ArgByValueType< Character >)> &op) const |
Walk the entire list of items, and use the argument 'op' to combine (reduce) items to a resulting single item. | |
nonvirtual REDUCED_TYPE | ReduceValue (const function< REDUCED_TYPE(ArgByValueType< Character >, ArgByValueType< Character >)> &op, ArgByValueType< REDUCED_TYPE > defaultValue={}) const |
nonvirtual RESULT_T | Join (const CONVERT_TO_RESULT &convertToResult=kDefaultToStringConverter<>, const COMBINER &combiner=Characters::kDefaultStringCombiner) const |
ape the JavaScript/python 'join' function - take the parts of 'this' iterable and combine them into a new object (typically a string) | |
nonvirtual Iterable< Character > | Skip (size_t nItems) const |
nonvirtual Iterable< Character > | Take (size_t nItems) const |
nonvirtual Iterable< Character > | Slice (size_t from, size_t to) const |
nonvirtual Iterable< Character > | Top () const |
return the top/largest (possibly just top N) values from this Iterable<T> | |
nonvirtual Iterable< Character > | OrderBy (INORDER_COMPARER_TYPE &&inorderComparer=INORDER_COMPARER_TYPE{}, Execution::SequencePolicy seq=Execution::SequencePolicy::ePar) const |
nonvirtual bool | IsOrderedBy (INORDER_COMPARER_TYPE &&inorderComparer=INORDER_COMPARER_TYPE{}) const |
nonvirtual optional< Character > | First () const |
return first element in iterable, or if 'that' specified, first where 'that' is true, (or return nullopt if none) | |
nonvirtual Character | FirstValue (ArgByValueType< Character > defaultValue={}) const |
return first element in iterable provided default | |
nonvirtual optional< Character > | Last () const |
return last element in iterable, or if 'that' specified, last where 'that' is true, (or return missing) | |
nonvirtual Character | LastValue (ArgByValueType< Character > defaultValue={}) const |
nonvirtual bool | All (const function< bool(ArgByValueType< Character >)> &testEachElt) const |
return true iff argument predicate returns true for each element of the iterable | |
nonvirtual optional< Character > | Min () const |
nonvirtual RESULT_TYPE | MinValue (ArgByValueType< RESULT_TYPE > defaultValue={}) const |
nonvirtual optional< Character > | Max () const |
nonvirtual RESULT_TYPE | MaxValue (ArgByValueType< RESULT_TYPE > defaultValue={}) const |
nonvirtual optional< RESULT_TYPE > | Mean () const |
nonvirtual RESULT_TYPE | MeanValue (ArgByValueType< RESULT_TYPE > defaultValue={}) const |
nonvirtual optional< RESULT_TYPE > | Sum () const |
nonvirtual RESULT_TYPE | SumValue (ArgByValueType< RESULT_TYPE > defaultValue={}) const |
nonvirtual optional< RESULT_TYPE > | Median (const INORDER_COMPARE_FUNCTION &compare={}) const |
nonvirtual RESULT_TYPE | MedianValue (ArgByValueType< RESULT_TYPE > defaultValue={}) const |
nonvirtual Iterable< Character > | Repeat (size_t count) const |
nonvirtual bool | Any () const |
Any() same as not empty (); Any (includeIfTrue) returns true iff includeIfTrue returns true on any values in iterable. | |
nonvirtual size_t | Count () const |
with no args, same as size, with function filter arg, returns number of items that pass. | |
nonvirtual size_t | length () const |
STL-ish alias for size() - really in STL only used in string, I think, but still makes sense as an alias. | |
Static Public Member Functions | |
template<typename CHAR_T > requires (same_as<remove_cv_t<CHAR_T>, char8_t> or same_as<remove_cv_t<CHAR_T>, char>) | |
static String | FromUTF8 (span< CHAR_T > from) |
static String | FromSDKString (const SDKChar *from) |
static String | FromNarrowSDKString (const char *from) |
static String | FromNarrowString (const char *from, const locale &l) |
template<size_t SIZE, IUNICODECanUnambiguouslyConvertFrom CHAR_T> | |
static String | FromStringConstant (const CHAR_T(&cString)[SIZE]) |
Take the given argument data (constant span) - which must remain unchanged - constant - for the application lifetime - and treat it as a Stroika String object. | |
template<IUNICODECanUnambiguouslyConvertFrom CHAR_T> | |
static String | FromLatin1 (const CHAR_T *cString) |
static String | Join (const Iterable< String > &list, const String &separator=", "sv) |
template<IUNICODECanUnambiguouslyConvertFrom CHAR_TYPE> | |
static optional< span< const CHAR_TYPE > > | PeekData (const PeekSpanData &pds) |
return the constant character data inside the string in the form of a span or nullopt if not available for that CHAR_TYPE | |
template<IUNICODECanAlwaysConvertTo CHAR_TYPE, size_t STACK_BUFFER_SZ> | |
static span< const CHAR_TYPE > | GetData (const PeekSpanData &pds, Memory::StackBuffer< CHAR_TYPE, STACK_BUFFER_SZ > *possiblyUsedBuffer) |
return the constant character data inside the string (rep) in the form of a span, possibly quickly and directly, and possibly copied into possiblyUsedBuffer | |
![]() | |
static bool | SetEquals (const LHS_CONTAINER_TYPE &lhs, const RHS_CONTAINER_TYPE &rhs, EQUALS_COMPARER &&equalsComparer=EQUALS_COMPARER{}) |
static bool | MultiSetEquals (const LHS_CONTAINER_TYPE &lhs, const RHS_CONTAINER_TYPE &rhs, EQUALS_COMPARER &&equalsComparer=EQUALS_COMPARER{}) |
static bool | SequentialEquals (const LHS_CONTAINER_TYPE &lhs, const RHS_CONTAINER_TYPE &rhs, EQUALS_COMPARER &&equalsComparer=EQUALS_COMPARER{}, bool useIterableSize=false) |
static constexpr default_sentinel_t | end () noexcept |
Support for ranged for, and STL syntax in general. | |
Static Public Attributes | |
static constexpr size_t | npos = static_cast<size_t> (-1) |
![]() | |
static const function< RESULT_T(Character)> | kDefaultToStringConverter |
Protected Member Functions | |
String (const shared_ptr< _IRep > &rep) noexcept | |
![]() | |
Iterable (const shared_ptr< _IRep > &rep) noexcept | |
Iterable's are typically constructed as concrete subtype objects, whose CTOR passed in a shared copyable rep. | |
nonvirtual Memory::SharedByValue_State | _GetSharingState () const |
Additional Inherited Members | |
![]() | |
using | value_type = Character |
value_type is an alias for the type iterated over - like vector<T>::value_type | |
using | iterator = Iterator< Character > |
using | const_iterator = Iterator< Character > |
![]() | |
using | _SharedByValueRepType = Memory::SharedByValue< _IRep, Memory::SharedByValue_Traits< _IRep, shared_ptr< _IRep >, Rep_Cloner_ > > |
Lazy-copying smart pointer mostly used by implementors (can generally be ignored by users). However, protected because manipulation needed in some subclasses (rarely) - like UpdatableIteratable. | |
![]() | |
_SharedByValueRepType | _fRep |
String is like std::u32string, except it is much easier to use, often much more space efficient, and more easily interoperates with other string types.
The Stroika String class is conceptually a sequence of (UNICODE) Characters, and so there is no obvious way to map the Stroika String to a std::string (in general). However, if you specify a codepage for conversion, or are converting to/from SDKString/SDKChar, or u8string, etc, there is builtin support for that.
EOS Handling: The Stroika String class supports having embedded NUL-characters. It also supports easy construction from NUL-terminated character strings.
Since Stroika v3, there is no longer c_str () support, since Stroika doesn't internally require NUL-terminated strings, and actively encourages different compact representations of strings (c_str() requires a choice of a particular encoding to make sense).
About spans, and the \0 NUL-termination - generally do NOT include the NUL-character in your span! Stroika strings will allow this, and treat it as just another character, but its probably not what you meant.
String objects are IMMUTABLE (except for the OBVIOUS meaning case of operator= being allowed).
String reps are IMMUTABLE.
Use StringBuilder for a 'mutable' String (can be used mostly interchangeably with String).
Current Mutating methods (as of v3.0d1x) o c_str () – non-const deprecated in v3.0d13 o SetCharAt - deprecated v3.0d12 o c_str() (consider deprecating?) o operator= - deprecated v3.0d12 o clear()- deprecated v3.0d12 o Append - deprecated v3.0d12 o operator+= - deprecated v3.0d12 o erase() - deprecated v3.0d12
SOMEWHAT ironically, the only of these methods hard to replace is the non-const c_str () - and maybe there not bad cuz I deprecated? COULD just deprecate ALL of these, and then the class is fully immutable. Probably easier to understand/reason about.
Stroika::Foundation::Characters::String::String | ( | ) |
All the constructors are obvious, except o NUL-character ARE allowed in strings, except for the case of single charX* argument constructors - which find the length based on the terminating NUL-character.
o CTOR (PATHLIKE_TOSTRINGABLE&& s) - IStdPathLike2UNICODEString PATHLIKE_TOSTRINGABLE carefully excludes conflicting CTOR overloads, and purpose is to allow constructing a String from anything with a 'special conversion' method to UNICODE string, such as filesystem::path.
This mimics the behavior in Stroika v2.1 with String::FromASCII ()
Definition at line 266 of file String.inl.
|
protectednoexcept |
Definition at line 256 of file String.inl.
|
static |
Create a String object from a 'char-based' utf-8 encoded string.
Definition at line 420 of file String.inl.
Create a String object from a 'SDKChar' (os-setting - current code page) encoded string. See @SDKChar See @SDKString
Definition at line 447 of file String.inl.
|
static |
Create a String object from a 'char-based' (os-setting - current code page) encoded string.
Definition at line 470 of file String.inl.
|
static |
Create a String object from a char based on the encoding from the argument locale. This throws an exception if there is an error performing the conversion.
Definition at line 340 of file String.inl.
|
static |
Take the given argument data (constant span) - which must remain unchanged - constant - for the application lifetime - and treat it as a Stroika String object.
This allows creation of String objects with fewer memory allocations and less copyinh, and more efficient storage, in most situations
The resulting String is a perfectly compliant Stroika String (somewhat akin to std::string_view vs std::string).
WARNING - BE VERY CAREFUL - be sure arguments have application lifetime (intended use case is C string literals).
Definition at line 386 of file String.inl.
|
static |
Create a String object from UNICODE Latin-1 Supplement (https://en.wikipedia.org/wiki/Latin-1_Supplement)
This is roughly, but not exactly, the same as the ISO-Latin-1 single-byte character set (https://en.wikipedia.org/wiki/ISO/IEC_8859-1)
Definition at line 355 of file String.inl.
nonvirtual String Stroika::Foundation::Characters::String::Concatenate | ( | T && | rhs | ) | const |
appends 'rhs' string to this string (without modifying this string) and returns the combined string
|
noexcept |
Returns the number of characters in the String. Note that this may not be the same as bytes, does not include NUL termination, and doesn't in any way respect NUL termination (meaning a nul-character is allowed in a Stroika string.
Definition at line 534 of file String.inl.
|
noexcept |
return (read-only) Character object
Definition at line 735 of file String.inl.
InsertAt() constructs a new string by taking this string, and inserting the argument characters.
Note that for repeated insertions, this is much less efficient than just using StringBuilder.
Definition at line 715 of file String.inl.
String Stroika::Foundation::Characters::String::RemoveAt | ( | size_t | charAt | ) | const |
Remove the characters at 'charAt' (RemoveAt/1) or between 'from' and 'to' (const method - doesn't modify this)
It is an error if this implies removing characters off the end of the string.
Note that this is quite inefficient: consider using StringBuilder
Definition at line 604 of file String.inl.
Remove the first occurrence of Character 'c'/'/subString/ from the string. Not an error if none found. Doesn't modify this (const method) - returns resulting string.
Note that this is quite inefficient: consider using StringBuffer
Definition at line 807 of file String.cpp.
Remove the all occurrences of Character 'c/subString' from this string (walking front to back - if removal creates one, it too is removed). Not an error if none found. Doesn't modify this (const method) - returns resulting string.
Definition at line 823 of file String.cpp.
nonvirtual String Stroika::Foundation::Characters::String::SubString | ( | SZ | from | ) | const |
OVERLOADS WITH size_t:
Produce a substring of this string, starting at 'from', and up to 'to' (or end of string for one-arg overload).
NB This function treats the second argument differently than String::substr () - which respects the STL basic_string API. This function treats the second argument as a 'to', STL substr() treats it as a count. This amounts to the same thing for the very common cases of substr(N) - because second argument is defaulted, and, substr (0, N) - because then the count and end are the same.
OVERLOADS WITH ptrdiff_t:
This is like SubString() except that if from/to are negative, they are treated as relative to the end of the String.
So for example, SubString (0, -1) is equivalent to SubString (0, size () - 1) - and so is an error if the string is empty.
Similarly, SubString (-5) is equivalent to SubString (size ()-5, size ()) - so can be used to grab the end of a string.
This API is identical to the javascript String.slice () method/behavior @see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf 15.5.4.13 String.prototype.slice (start, end)
This API - when called with negative indexes - used to be called CircularSubString ().
nonvirtual String Stroika::Foundation::Characters::String::SafeSubString | ( | SZ | from | ) | const |
Like SubString(), but no requirements on from/to. These are just adjusted to the edge of the string if the exceed those endpoints. And if arguments are <0, they are interpreted as end-relative.
This API - when called with negative indexes - used to be called SafeCircularSubString ().
String String::Repeat | ( | unsigned int | count | ) | const |
Return 'count' copies of this String (concatenated after one another).
Definition at line 1424 of file String.cpp.
bool Stroika::Foundation::Characters::String::Contains | ( | Character | c, |
CompareOptions | co = eWithCase |
||
) | const |
Returns true if the argument character or string is found anywhere inside this string. This is equivalent to return Matches (".*" + X + ".*"); // If X had no characters which look like they are part of // a regular expression
Definition at line 693 of file String.inl.
bool String::StartsWith | ( | const Character & | c, |
CompareOptions | co = eWithCase |
||
) | const |
Returns true iff the given substring is contained in this string.
Similar to: return Matches (X + ".*"); except for the fact that with StartsWith() doesn't interpret 'X' as a regular expression
Definition at line 1059 of file String.cpp.
bool String::EndsWith | ( | const Character & | c, |
CompareOptions | co = eWithCase |
||
) | const |
Returns true iff the given substring is contained in this string.
Similar to: return Matches (X + ".*"); except for the fact that with StartsWith() doesn't interpret 'X' as a regular expression
Definition at line 1088 of file String.cpp.
Return *this if it ends with argument character, or append 'c' so that it ends with a 'c'.
Definition at line 1123 of file String.cpp.
bool String::Matches | ( | const RegularExpression & | regEx | ) | const |
Apply the given regular expression return true if it matches this string. This only returns true if the expression matches the ENTIRE string - all the way to the end.
Details on the regular expression language/format can be found at: http://en.wikipedia.org/wiki/C%2B%2B11#Regular_expressions
Definition at line 1133 of file String.cpp.
optional< size_t > Stroika::Foundation::Characters::String::Find | ( | Character | c, |
CompareOptions | co = eWithCase |
||
) | const |
Find returns the index of the first occurrence of the given Character/substring argument in this string. Find () always returns a valid string index, which is followed by the given substring, or nullopt otherwise.
Find () can optionally be provided a 'startAt' offset to begin the search at.
And the overload taking a RegularExpression - returns BOTH the location where the match is found, as well as the end of the match.
Note - for the special case of Find(empty-string) - the return value is 0 if this string is non-empty, and nullopt if this string was empty.
Definition at line 681 of file String.inl.
Containers::Sequence< pair< size_t, size_t > > String::FindEach | ( | const RegularExpression & | regEx | ) | const |
This is just like Find, but captures all the matching results in an iterable result. The reason the overload for RegularExpression's returns a list of pair<size_t,size_t> is because the endpoint of the match is ambiguous. For fixed string Find, the end of match is computable from the arguments.
FindEach () can be more handy to use than directly using Find () in scenarios where you want to iterate over each match: e.g.: for (auto i : s.FindEach ("xxx")) {....}
Also, to count matches, you can use: size_t nMatches = FindEach (matchexp).size ();
Note: FindEach handles the special case of an empty match as ignored, so FindEach(empty-str-or-regexp) always returns an empty list. Also - for the String case, it returns distinct matches, so if you search String{"AAAA"}.FindEach ("AA"), you will get 2 answers ({0, 2}).
Definition at line 966 of file String.cpp.
Containers::Sequence< RegularExpressionMatch > String::FindEachMatch | ( | const RegularExpression & | regEx | ) | const |
Definition at line 984 of file String.cpp.
Containers::Sequence< String > String::FindEachString | ( | const RegularExpression & | regEx | ) | const |
Definition at line 1001 of file String.cpp.
|
noexcept |
RFind (substring) returns the index of the last occurrence of the given substring in this string. This function always returns a valid string index, which is followed by the given substring, or optional<size_t> {} otherwise.
Definition at line 1011 of file String.cpp.
Replace the range of this string with the given replacement. Const method: just creates new string as described.
Definition at line 1045 of file String.cpp.
String String::ReplaceAll | ( | const RegularExpression & | regEx, |
const String & | with | ||
) | const |
Apply the given regular expression, with 'with' and replace each match. This doesn't modify this string, but returns the replacement string.
CHECK - BUT HI HTINK WE DEFINE TO REPLACE ALL? OR MAKE PARAM? See regex_replace () for definition of the regEx language
Require (not string2SearchFor.empty ());
Note - it IS legal to have with contain the original search for string, or even to have it 'created' as part of where it gets inserted. The implementation will only replace those that pre-existed.
Definition at line 1155 of file String.cpp.
String String::NormalizeTextToNL | ( | ) | const |
Replace any CR or LF or CRLF sequences with plain NL-terminated text.
Definition at line 1201 of file String.cpp.
Replace sequences of whitespace characters (space, tab, newline etc) with a single space (or argument space character).
Definition at line 1229 of file String.cpp.
Containers::Sequence< String > String::Tokenize | ( | ) | const |
Break this String into constituent parts. This is a simplistic API but at least handy as is.
The caller can specify the token separators by set, by lambda. This defaults to the lambda "isWhitespace".
This is often called 'Split' in other APIs. This is NOT (as is now) a replacement for flex, but just for simple, but common string splitting needs (though if I had a regexp param, it may approach the power of flex).
TODO:
Definition at line 1234 of file String.cpp.
break the String into a series of lines;
Definition at line 1306 of file String.cpp.
Breaks this string into Lines, with AsLines (), and applies the argument filter (as if with .Map<>) producing a subset of the lines which match.
note this is useful to replace 'shell script' logic where you might run some command and grep through its output for all matching lines.
Definition at line 1341 of file String.cpp.
optional< String > String::Col | ( | size_t | i | ) | const |
Useful to replace 'awk print $3' - replace with Col(2) - zero based.
default separator = = "\\s+"_RegEx;
Definition at line 1362 of file String.cpp.
String String::LTrim | ( | bool(*)(Character) | shouldBeTrimmed = Character::IsWhitespace | ) | const |
String LTrim () scans the characters form the left to right, and applies the given 'shouldBeTrimmed' function (defaults to IsWhitespace). All such characters are removed, and the resulting string is returned. This does not modify the current string its applied to - just returns the trimmed string.
Definition at line 1443 of file String.cpp.
String String::RTrim | ( | bool(*)(Character) | shouldBeTrimmed = Character::IsWhitespace | ) | const |
String RTrim () scans the characters form the right to left, and applies the given 'shouldBeTrimmed' function (defaults to IsWhitespace). All such characters are removed, and the resulting string is returned. This does not modify the current string its applied to - just returns the trimmed string.
Definition at line 1508 of file String.cpp.
String String::Trim | ( | bool(*)(Character) | shouldBeTrimmed = Character::IsWhitespace | ) | const |
String Trim () is locally equivalent to RTrim (shouldBeTrimmed).LTrim (shouldBeTrimmed).
Definition at line 1592 of file String.cpp.
Walk the entire string, and produce a new string consisting of all characters for which the predicate 'removeCharIf' returned false.
Definition at line 1664 of file String.cpp.
Combine the given array into a single string (typically comma space) separated. If given a list of length n, this adds n-1 separators.
Definition at line 1692 of file String.cpp.
String String::ToLowerCase | ( | ) | const |
Return a new string based on this string where each lower case character is replaced by its upper case equivalent. Note that non-lower-case characters (such as punctuation) un unchanged.
Definition at line 1706 of file String.cpp.
String String::ToUpperCase | ( | ) | const |
Return a new string based on this string where each lower case character is replaced by its upper case equivalent. Note that non-upper-case characters (such as punctuation) un unchanged.
Definition at line 1744 of file String.cpp.
bool String::IsWhitespace | ( | ) | const |
Return true if the string contains zero non-whitespace characters.
Definition at line 1782 of file String.cpp.
String Stroika::Foundation::Characters::String::LimitLength | ( | size_t | maxLen, |
StringShorteningPreference | keepPref = StringShorteningPreference::ePreferKeepLeft |
||
) | const |
return the first maxLen (or fewer if string shorter) characters of this string (adding ellipsis if truncated)
This function is for GUI/display purposes. It returns the given string, trimmed down to at most maxLen characters, and removes whitespace (on 'to trim' side - given by keepLeft flag - if needed to get under maxLen).
Note in the 3-arg overload, the ellipsis string MAY be the empty string.
Definition at line 741 of file String.inl.
nonvirtual span< CHAR_T > Stroika::Foundation::Characters::String::CopyTo | ( | span< CHAR_T > | s | ) | const |
CopyTo () copies the contents of this string to the target buffer. CopyTo () does NOT nul-terminate the target buffer
Returns span of CHAR_T objects written - a subspan of the argument span
nonvirtual T Stroika::Foundation::Characters::String::As | ( | ) | const |
Convert String losslessly into a standard C++ type.
Only specifically specialized variants supported: IBasicUNICODEStdString<T> or same_as<T,String> o wstring o u8string o u16string o u32string o String (return *this; handy sometimes in templated usage; harmless) as well as: o filesystem::path (or anything with .wstring() -> wstring method) - note see qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin
DEPRECATED AS OF v3.0d1 because As is const method - could do non-const As<> overload for these, but that would be confusing o const wchar_t* o const Character*
string String::AsNarrowString | ( | const locale & | l | ) | const |
Create a narrow string object from this, based on the encoding from the argument locale. This throws an exception if there is an error performing the conversion, and the 'into' overload leaves 'into' in an undefined (but safe) state.
Definition at line 1838 of file String.cpp.
nonvirtual T Stroika::Foundation::Characters::String::AsUTF8 | ( | ) | const |
Convert String losslessly into a standard C++ type. Only specifically specialized variants are supported.
SUPPORTED result type "T": values are: string u8string
nonvirtual T Stroika::Foundation::Characters::String::AsUTF16 | ( | ) | const |
nonvirtual T Stroika::Foundation::Characters::String::AsUTF32 | ( | ) | const |
Convert String losslessly into a standard C++ type u32string.
SDKString Stroika::Foundation::Characters::String::AsSDKString | ( | ) | const |
See docs on SDKChar for meaning (character set).
Note - many UNICODE Strings cannot be represented in the SDKString character set (especially if narrow - depends a lot). But in that case, AsNarrowSDKString () will throw, unless AllowMissingCharacterErrorsFlag is specified.
Definition at line 802 of file String.inl.
string Stroika::Foundation::Characters::String::AsNarrowSDKString | ( | ) | const |
See docs on SDKChar for meaning (character set). If SDKChar is a wide character, there is probably still a default 'code page' to interpret narrow characters (Windows CP_ACP). This is a string in that character set.
Note - many UNICODE Strings cannot be represented in the SDKString character set (especially if narrow - depends a lot). But in that case, AsNarrowSDKString () will throw, unless AllowMissingCharacterErrorsFlag is specified.
Definition at line 830 of file String.inl.
nonvirtual T Stroika::Foundation::Characters::String::AsASCII | ( | ) | const |
Convert String losslessly into a standard C++ type. Only specifically specialized variants are supported (right now just <string> supported). The source string MUST be valid ascii characters - throw RuntimeErrorException<>
Supported Types: o Memory::StackBuffer<char> o string o u8string (note any ASCII string is also legit utf-8)
nonvirtual optional< T > Stroika::Foundation::Characters::String::AsASCIIQuietly | ( | ) | const |
Convert String losslessly into a standard C++ type. Only specifically specialized variants are supported (right now just <string> supported). If this source contains any invalid ASCII characters, this returns nullopt, and else a valid engaged string.
Supported Types(T): o Memory::StackBuffer<char> o string o u8string (note any valid ASCII string is also valid utf-8)
nonvirtual PeekSpanData Stroika::Foundation::Characters::String::GetPeekSpanData | ( | ) | const |
return the constant character data inside the string in the form of a case variant union of different span types (at least one will be there) templated type arg just used to pick a preferred type.
This API is public, but best to avoid depending on internals of String API - like PeekSpanData - since this reasonably likely to change in future versions.
|
static |
return the constant character data inside the string in the form of a span or nullopt if not available for that CHAR_TYPE
This API is public, but best to avoid depending on internals of String API - like PeekSpanData - since this reasonably likely to change in future versions.
Definition at line 906 of file String.inl.
|
static |
return the constant character data inside the string (rep) in the form of a span, possibly quickly and directly, and possibly copied into possiblyUsedBuffer
This API will typically return a span of data which is internal pointers into the data of the rep (and so its invalidated on the next change to the string).
BUT - it maybe a span of data stored into the argument possiblyUsedBuffer (which is why it must be provided - cannot be nullptr). If you want the freedom to not pass in this buffer, see the PeekData API.
Definition at line 961 of file String.inl.
bool Stroika::Foundation::Characters::String::operator== | ( | const String & | rhs | ) | const |
Return true if case sensitive compare of the two IConvertibleToString objects have the same characters. Indirects to EqualsComparer{eWithCase} (...)
Definition at line 1109 of file String.inl.
strong_ordering Stroika::Foundation::Characters::String::operator<=> | ( | const String & | rhs | ) | const |
Return strong_ordering of case sensitive (three-way) compare of the two IConvertibleToString objects. Indirects to ThreeWayComparer{eWithCase} (...)
Definition at line 1099 of file String.inl.
|
noexcept |
Definition at line 1045 of file String.inl.
tuple< const wchar_t *, wstring_view > Stroika::Foundation::Characters::String::c_str | ( | Memory::StackBuffer< wchar_t > * | possibleBackingStore | ) | const |
Mitigating this, the non-const c_str() still will return non-null, and the const overload taking StackBuffer<wchar_t> will also guarantee returning non-null. In the case of the overloads taking no arguments, the lifetime of the returned pointer is until the next change to this string. In the case of the StackBuffer overload, the guarantee extends for the lifetime of the argument buffer (typically just the next few lines of code).This will always return a value which is NUL-terminated.
Note also - the c_str () function CAN now be somewhat EXPENSIVE, causing a mutation of the String object, so use one of the const overloads where possible (or where performance matters).
And why the string-view part? Because sometimes you want the LENGTH. Sure - you can just compute it again. But that is costly. Sure you can just use the original string length. BUT THAT WOULD BE A BUG once I support surrogates properly (at least on windows where wchar_t isn't char32_t).
Definition at line 1049 of file String.inl.
size_t Stroika::Foundation::Characters::String::find | ( | Character | c, |
size_t | startAt = 0 |
||
) | const |
Follow the basic_string<>::find () API
need more overloads.
Returns String::npos if not found, else the zero based index.
Definition at line 1061 of file String.inl.
size_t Stroika::Foundation::Characters::String::rfind | ( | Character | c | ) | const |
Follow the basic_string<>::rfind () API
need more overloads.
Returns String::npos if not found, else the zero based index.
Definition at line 1069 of file String.inl.
Character Stroika::Foundation::Characters::String::front | ( | ) | const |
mimic https://en.cppreference.com/w/cpp/string/basic_string/front
Definition at line 1080 of file String.inl.
Character Stroika::Foundation::Characters::String::back | ( | ) | const |
mimic https://en.cppreference.com/w/cpp/string/basic_string/back
Definition at line 1073 of file String.inl.
Compatable with STL::basic_string::subtr() - which interprets second argument as count. Not the same as Stroika::String::SubString (where the second argument is a 'to')
From http://en.cppreference.com/w/cpp/string/basic_string/substr Returns a substring [pos, pos+count). If the requested substring extends past the end of the string, or if count == npos, the returned substring is [pos, size()). std::out_of_range if pos > size()
Definition at line 1086 of file String.inl.
auto Stroika::Foundation::Characters::String::mk_nocheck_ | ( | span< const CHAR_T > | s | ) | -> shared_ptr<_IRep> |
We want to TARGET using block-allocator of 64 bytes. This works well for typical (x86) machine caches, and divides up nicely, and leaves enuf room for a decent number of characters typically.
So compute/guestimate a few sizes, and add static_asserts to check where we can. Often if these fail you can just get rid/or fix them. Not truly counted on, just trying ot generate vaguely reasonable number of characters to use.
Definition at line 617 of file String.cpp.
|
staticconstexpr |