Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
Stroika::Foundation::Characters::String Class Reference

String is like std::u32string, except it is much easier to use, often much more space efficient, and more easily interoperates with other string types. More...

#include <String.h>

Inheritance diagram for Stroika::Foundation::Characters::String:
Stroika::Foundation::Traversal::Iterable< Character > Stroika::Foundation::IO::Network::UniformResourceIdentification::SchemeType

Classes

class  _IRep
 
struct  EqualsComparer
 
struct  LessComparer
 very similar to ThreeWayComparer but returns true if less More...
 
struct  PeekSpanData
 Summary data for raw contents of rep - each rep will support at least one of these span forms. More...
 

Public Member Functions

 String ()
 
template<typename T >
requires (is_convertible_v<T, String>)
nonvirtual String Concatenate (T &&rhs) const
 appends 'rhs' string to this string (without modifying this string) and returns the combined string
 
nonvirtual size_t size () const noexcept
 
nonvirtual const Character operator[] (size_t i) const noexcept
 return (read-only) Character object
 
nonvirtual String InsertAt (Character c, size_t at) const
 
nonvirtual String RemoveAt (size_t charAt) const
 
nonvirtual String RemoveFirstIf (Character c) const
 
nonvirtual String RemoveAll (Character c) const
 
template<typename SZ >
nonvirtual String SubString (SZ from) const
 
template<typename SZ >
nonvirtual String SafeSubString (SZ from) const
 
nonvirtual String Repeat (unsigned int count) const
 
nonvirtual bool Contains (Character c, CompareOptions co=eWithCase) const
 
nonvirtual bool StartsWith (const Character &c, CompareOptions co=eWithCase) const
 
nonvirtual bool EndsWith (const Character &c, CompareOptions co=eWithCase) const
 
nonvirtual String AssureEndsWith (const Character &c, CompareOptions co=eWithCase) const
 Return *this if it ends with argument character, or append 'c' so that it ends with a 'c'.
 
nonvirtual bool Matches (const RegularExpression &regEx) const
 
nonvirtual optional< size_t > Find (Character c, CompareOptions co=eWithCase) const
 
nonvirtual Containers::Sequence< pair< size_t, size_t > > FindEach (const RegularExpression &regEx) const
 
nonvirtual Containers::Sequence< RegularExpressionMatchFindEachMatch (const RegularExpression &regEx) const
 
nonvirtual Containers::Sequence< StringFindEachString (const RegularExpression &regEx) const
 
nonvirtual optional< size_t > RFind (Character c) const noexcept
 
nonvirtual String Replace (size_t from, size_t to, const String &replacement) const
 
nonvirtual String ReplaceAll (const RegularExpression &regEx, const String &with) const
 
nonvirtual String NormalizeTextToNL () const
 
nonvirtual String NormalizeSpace (Character useSpaceCharacter=' ') const
 Replace sequences of whitespace characters (space, tab, newline etc) with a single space (or argument space character).
 
nonvirtual Containers::Sequence< StringTokenize () const
 
nonvirtual Containers::Sequence< StringAsLines () const
 break the String into a series of lines;
 
nonvirtual Containers::Sequence< StringGrep (const String &fgrepArg) const
 Breaks this string into Lines, with AsLines (), and applies the argument filter (as if with .Map<>) producing a subset of the lines which match.
 
nonvirtual optional< StringCol (size_t i) const
 Useful to replace 'awk print $3' - replace with Col(2) - zero based.
 
nonvirtual String ColValue (size_t i, const String &valueIfMissing={}) const
 see Col(i) - but with default value of empty string
 
nonvirtual String LTrim (bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
 
nonvirtual String RTrim (bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
 
nonvirtual String Trim (bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
 
nonvirtual String StripAll (bool(*removeCharIf)(Character)) const
 
nonvirtual String ToLowerCase () const
 
nonvirtual String ToUpperCase () const
 
nonvirtual bool IsWhitespace () const
 
nonvirtual String LimitLength (size_t maxLen, StringShorteningPreference keepPref=StringShorteningPreference::ePreferKeepLeft) const
 return the first maxLen (or fewer if string shorter) characters of this string (adding ellipsis if truncated)
 
template<IUNICODECanAlwaysConvertTo CHAR_T>
requires (not is_const_v<CHAR_T>)
nonvirtual span< CHAR_T > CopyTo (span< CHAR_T > s) const
 
template<typename T >
requires (IBasicUNICODEStdString<T> or same_as<T, String> or constructible_from<T, wstring>)
nonvirtual T As () const
 
nonvirtual string AsNarrowString (const locale &l) const
 
template<typename T = u8string>
requires (same_as<T, string> or same_as<T, u8string>)
nonvirtual T AsUTF8 () const
 
template<typename T = u16string>
requires (same_as<T, u16string> or (sizeof (wchar_t) == sizeof (char16_t) and same_as<T, wstring>))
nonvirtual T AsUTF16 () const
 
template<typename T = u32string>
requires (same_as<T, u32string> or (sizeof (wchar_t) == sizeof (char32_t) and same_as<T, wstring>))
nonvirtual T AsUTF32 () const
 
nonvirtual SDKString AsSDKString () const
 
nonvirtual string AsNarrowSDKString () const
 
template<typename T = string>
requires requires (T* into) { { into->empty () } -> same_as<bool>; { into->push_back (ASCII{0}) }; }
nonvirtual T AsASCII () const
 
template<typename T = string>
requires requires (T* into) { { into->empty () } -> same_as<bool>; { into->push_back (ASCII{0}) }; }
nonvirtual optional< T > AsASCIIQuietly () const
 
template<IUNICODECanUnambiguouslyConvertFrom CHAR_TYPE = ASCII>
nonvirtual PeekSpanData GetPeekSpanData () const
 return the constant character data inside the string in the form of a case variant union of different span types (at least one will be there) templated type arg just used to pick a preferred type.
 
nonvirtual bool operator== (const String &rhs) const
 
nonvirtual strong_ordering operator<=> (const String &rhs) const
 
nonvirtual size_t length () const noexcept
 
nonvirtual tuple< const wchar_t *, wstring_view > c_str (Memory::StackBuffer< wchar_t > *possibleBackingStore) const
 
nonvirtual size_t find (Character c, size_t startAt=0) const
 
nonvirtual size_t rfind (Character c) const
 
nonvirtual Character front () const
 
nonvirtual Character back () const
 
nonvirtual String substr (size_t from, size_t count=npos) const
 
template<typename CHAR_T >
requires (same_as<CHAR_T, ASCII> or same_as<CHAR_T, Latin1> or same_as<CHAR_T, char16_t> or same_as<CHAR_T, char32_t>)
auto mk_nocheck_ (span< const CHAR_T > s) -> shared_ptr< _IRep >
 
- Public Member Functions inherited from Stroika::Foundation::Traversal::Iterable< Character >
 Iterable (const Iterable &) noexcept=default
 Iterable are safely copyable (by value). Since Iterable uses COW, this just copies the underlying pointer and increments the reference count.
 
 Iterable (Iterable &&) noexcept=default
 Iterable are safely moveable.
 
 Iterable (CONTAINER_OF_T &&from)
 
 Iterable (const initializer_list< Character > &from)
 
nonvirtual operator bool () const
 
nonvirtual Iterator< Character > MakeIterator () const
 Create an iterator object which can be used to traverse the 'Iterable'.
 
nonvirtual size_t size () const
 Returns the number of items contained.
 
nonvirtual bool empty () const
 Returns true iff size() == 0.
 
nonvirtual bool Contains (ArgByValueType< Character > element, EQUALS_COMPARER &&equalsComparer=EQUALS_COMPARER{}) const
 
nonvirtual Iterator< Character > begin () const
 Support for ranged for, and STL syntax in general.
 
nonvirtual void Apply (const function< void(ArgByValueType< Character > item)> &doToElement, Execution::SequencePolicy seq=Execution::SequencePolicy::eDEFAULT) const
 Run the argument function (or lambda) on each element of the container.
 
nonvirtual Iterator< Character > Find (THAT_FUNCTION &&that, Execution::SequencePolicy seq=Execution::SequencePolicy::eDEFAULT) const
 Run the argument bool-returning function (or lambda) on each element of the container, and return an iterator pointing at the first element (depending on seq) found true. (or use First() to do same thing but return optional<>)
 
nonvirtual CONTAINER_OF_T As (CONTAINER_OF_T_CONSTRUCTOR_ARGS... args) const
 
nonvirtual Character Nth (ptrdiff_t n) const
 Find the Nth element of the Iterable<>
 
nonvirtual Character NthValue (ptrdiff_t n, ArgByValueType< Character > defaultValue={}) const
 Find the Nth element of the Iterable<>, but allow for n to be out of range, and just return argument default-value.
 
nonvirtual RESULT_CONTAINER Where (INCLUDE_PREDICATE &&includeIfTrue) const
 produce a subset of this iterable where argument function returns true
 
nonvirtual Iterable< Character > Distinct (EQUALS_COMPARER &&equalsComparer=EQUALS_COMPARER{}) const
 
nonvirtual RESULT_CONTAINER Map (ELEMENT_MAPPER &&elementMapper) const
 functional API which iterates over all members of an Iterable, applies a map function to each element, and collects the results in a new Iterable
 
nonvirtual optional< REDUCED_TYPE > Reduce (const function< REDUCED_TYPE(ArgByValueType< Character >, ArgByValueType< Character >)> &op) const
 Walk the entire list of items, and use the argument 'op' to combine (reduce) items to a resulting single item.
 
nonvirtual REDUCED_TYPE ReduceValue (const function< REDUCED_TYPE(ArgByValueType< Character >, ArgByValueType< Character >)> &op, ArgByValueType< REDUCED_TYPE > defaultValue={}) const
 
nonvirtual RESULT_T Join (const CONVERT_TO_RESULT &convertToResult=kDefaultToStringConverter<>, const COMBINER &combiner=Characters::kDefaultStringCombiner) const
 ape the JavaScript/python 'join' function - take the parts of 'this' iterable and combine them into a new object (typically a string)
 
nonvirtual Iterable< Character > Skip (size_t nItems) const
 
nonvirtual Iterable< Character > Take (size_t nItems) const
 
nonvirtual Iterable< Character > Slice (size_t from, size_t to) const
 
nonvirtual Iterable< Character > Top () const
 return the top/largest (possibly just top N) values from this Iterable<T>
 
nonvirtual Iterable< Character > OrderBy (INORDER_COMPARER_TYPE &&inorderComparer=INORDER_COMPARER_TYPE{}, Execution::SequencePolicy seq=Execution::SequencePolicy::ePar) const
 
nonvirtual bool IsOrderedBy (INORDER_COMPARER_TYPE &&inorderComparer=INORDER_COMPARER_TYPE{}) const
 
nonvirtual optional< Character > First () const
 return first element in iterable, or if 'that' specified, first where 'that' is true, (or return nullopt if none)
 
nonvirtual Character FirstValue (ArgByValueType< Character > defaultValue={}) const
 return first element in iterable provided default
 
nonvirtual optional< Character > Last () const
 return last element in iterable, or if 'that' specified, last where 'that' is true, (or return missing)
 
nonvirtual Character LastValue (ArgByValueType< Character > defaultValue={}) const
 
nonvirtual bool All (const function< bool(ArgByValueType< Character >)> &testEachElt) const
 return true iff argument predicate returns true for each element of the iterable
 
nonvirtual optional< Character > Min () const
 
nonvirtual RESULT_TYPE MinValue (ArgByValueType< RESULT_TYPE > defaultValue={}) const
 
nonvirtual optional< Character > Max () const
 
nonvirtual RESULT_TYPE MaxValue (ArgByValueType< RESULT_TYPE > defaultValue={}) const
 
nonvirtual optional< RESULT_TYPE > Mean () const
 
nonvirtual RESULT_TYPE MeanValue (ArgByValueType< RESULT_TYPE > defaultValue={}) const
 
nonvirtual optional< RESULT_TYPE > Sum () const
 
nonvirtual RESULT_TYPE SumValue (ArgByValueType< RESULT_TYPE > defaultValue={}) const
 
nonvirtual optional< RESULT_TYPE > Median (const INORDER_COMPARE_FUNCTION &compare={}) const
 
nonvirtual RESULT_TYPE MedianValue (ArgByValueType< RESULT_TYPE > defaultValue={}) const
 
nonvirtual Iterable< Character > Repeat (size_t count) const
 
nonvirtual bool Any () const
 Any() same as not empty (); Any (includeIfTrue) returns true iff includeIfTrue returns true on any values in iterable.
 
nonvirtual size_t Count () const
 with no args, same as size, with function filter arg, returns number of items that pass.
 
nonvirtual size_t length () const
 STL-ish alias for size() - really in STL only used in string, I think, but still makes sense as an alias.
 

Static Public Member Functions

template<typename CHAR_T >
requires (same_as<remove_cv_t<CHAR_T>, char8_t> or same_as<remove_cv_t<CHAR_T>, char>)
static String FromUTF8 (span< CHAR_T > from)
 
static String FromSDKString (const SDKChar *from)
 
static String FromNarrowSDKString (const char *from)
 
static String FromNarrowString (const char *from, const locale &l)
 
template<size_t SIZE, IUNICODECanUnambiguouslyConvertFrom CHAR_T>
static String FromStringConstant (const CHAR_T(&cString)[SIZE])
 Take the given argument data (constant span) - which must remain unchanged - constant - for the application lifetime - and treat it as a Stroika String object.
 
template<IUNICODECanUnambiguouslyConvertFrom CHAR_T>
static String FromLatin1 (const CHAR_T *cString)
 
static String Join (const Iterable< String > &list, const String &separator=", "sv)
 
template<IUNICODECanUnambiguouslyConvertFrom CHAR_TYPE>
static optional< span< const CHAR_TYPE > > PeekData (const PeekSpanData &pds)
 return the constant character data inside the string in the form of a span or nullopt if not available for that CHAR_TYPE
 
template<IUNICODECanAlwaysConvertTo CHAR_TYPE, size_t STACK_BUFFER_SZ>
static span< const CHAR_TYPE > GetData (const PeekSpanData &pds, Memory::StackBuffer< CHAR_TYPE, STACK_BUFFER_SZ > *possiblyUsedBuffer)
 return the constant character data inside the string (rep) in the form of a span, possibly quickly and directly, and possibly copied into possiblyUsedBuffer
 
- Static Public Member Functions inherited from Stroika::Foundation::Traversal::Iterable< Character >
static bool SetEquals (const LHS_CONTAINER_TYPE &lhs, const RHS_CONTAINER_TYPE &rhs, EQUALS_COMPARER &&equalsComparer=EQUALS_COMPARER{})
 
static bool MultiSetEquals (const LHS_CONTAINER_TYPE &lhs, const RHS_CONTAINER_TYPE &rhs, EQUALS_COMPARER &&equalsComparer=EQUALS_COMPARER{})
 
static bool SequentialEquals (const LHS_CONTAINER_TYPE &lhs, const RHS_CONTAINER_TYPE &rhs, EQUALS_COMPARER &&equalsComparer=EQUALS_COMPARER{}, bool useIterableSize=false)
 
static constexpr default_sentinel_t end () noexcept
 Support for ranged for, and STL syntax in general.
 

Static Public Attributes

static constexpr size_t npos = static_cast<size_t> (-1)
 
- Static Public Attributes inherited from Stroika::Foundation::Traversal::Iterable< Character >
static const function< RESULT_T(Character)> kDefaultToStringConverter
 

Protected Member Functions

 String (const shared_ptr< _IRep > &rep) noexcept
 
- Protected Member Functions inherited from Stroika::Foundation::Traversal::Iterable< Character >
 Iterable (const shared_ptr< _IRep > &rep) noexcept
 Iterable's are typically constructed as concrete subtype objects, whose CTOR passed in a shared copyable rep.
 
nonvirtual Memory::SharedByValue_State _GetSharingState () const
 

Additional Inherited Members

- Public Types inherited from Stroika::Foundation::Traversal::Iterable< Character >
using value_type = Character
 value_type is an alias for the type iterated over - like vector<T>::value_type
 
using iterator = Iterator< Character >
 
using const_iterator = Iterator< Character >
 
- Protected Types inherited from Stroika::Foundation::Traversal::Iterable< Character >
using _SharedByValueRepType = Memory::SharedByValue< _IRep, Memory::SharedByValue_Traits< _IRep, shared_ptr< _IRep >, Rep_Cloner_ > >
 Lazy-copying smart pointer mostly used by implementors (can generally be ignored by users). However, protected because manipulation needed in some subclasses (rarely) - like UpdatableIteratable.
 
- Protected Attributes inherited from Stroika::Foundation::Traversal::Iterable< Character >
_SharedByValueRepType _fRep
 

Detailed Description

String is like std::u32string, except it is much easier to use, often much more space efficient, and more easily interoperates with other string types.

The Stroika String class is conceptually a sequence of (UNICODE) Characters, and so there is no obvious way to map the Stroika String to a std::string (in general). However, if you specify a codepage for conversion, or are converting to/from SDKString/SDKChar, or u8string, etc, there is builtin support for that.

EOS Handling: The Stroika String class supports having embedded NUL-characters. It also supports easy construction from NUL-terminated character strings.

Since Stroika v3, there is no longer c_str () support, since Stroika doesn't internally require NUL-terminated strings, and actively encourages different compact representations of strings (c_str() requires a choice of a particular encoding to make sense).

About spans, and the \0 NUL-termination - generally do NOT include the NUL-character in your span! Stroika strings will allow this, and treat it as just another character, but its probably not what you meant.

Note
Narrow String handling Because the character set of strings of type 'char' is ambiguous, if you construct a String with char (char* etc) - it is somehow 'required' that the characters be ASCII. If using the FromConstantString () API , or operator"" _k, it is checked with Require () - so assertion failure. If you construct with String::CTOR, it will generate a runtime exception (so more costly runtime checking).
Satisfies Concepts: o static_assert (regular<String>);
Thread-Safety C++-Standard-Thread-Safety
Design note - mutability vs. immutability http://stroika-bugs.sophists.com/browse/STK-968 (see about deleting deprecated APIs and remnants of mutability) and c_str()

String objects are IMMUTABLE (except for the OBVIOUS meaning case of operator= being allowed).

String reps are IMMUTABLE.

Use StringBuilder for a 'mutable' String (can be used mostly interchangeably with String).

Current Mutating methods (as of v3.0d1x) o c_str () – non-const deprecated in v3.0d13 o SetCharAt - deprecated v3.0d12 o c_str() (consider deprecating?) o operator= - deprecated v3.0d12 o clear()- deprecated v3.0d12 o Append - deprecated v3.0d12 o operator+= - deprecated v3.0d12 o erase() - deprecated v3.0d12

SOMEWHAT ironically, the only of these methods hard to replace is the non-const c_str () - and maybe there not bad cuz I deprecated? COULD just deprecate ALL of these, and then the class is fully immutable. Probably easier to understand/reason about.

Note
Comparisons: o static_assert (totally_ordered<String>); o String::EqualsComparer, String::ThreeWayComparer and String::LessComparer provided with construction parameters to allow case insensitive compares

Definition at line 201 of file String.h.

Constructor & Destructor Documentation

◆ String() [1/2]

Stroika::Foundation::Characters::String::String ( )

All the constructors are obvious, except o NUL-character ARE allowed in strings, except for the case of single charX* argument constructors - which find the length based on the terminating NUL-character.

o CTOR (PATHLIKE_TOSTRINGABLE&& s) - IStdPathLike2UNICODEString PATHLIKE_TOSTRINGABLE carefully excludes conflicting CTOR overloads, and purpose is to allow constructing a String from anything with a 'special conversion' method to UNICODE string, such as filesystem::path.

Note
about lifetime of argument data (basic_string_view<CHAR_T> constructors) All data is copied out / saved by the end of the constructor for all constructors EXCEPT the basic_string_view<CHAR_T> constructors - where it is REQUIRED the data last 'forever'.
Precondition
for String (const basic_string_view<wchar_t>& str) - str[str.length()]=='\0';
c-string nul-terminated (which happens automatically with L"xxx"sv)
Note
'char' (using ASCII = char) constructors: Because the character-set of strings of type 'char' is ambiguous, if you construct a String with char (char* etc) - it runtime checked that the characters are ASCII (except for the basic_string_view constructors where we check but with assertions).

This mimics the behavior in Stroika v2.1 with String::FromASCII ()

Note
the basic_string move Constructors MAY move or copy the underlying std string, but they still maintain the same requirements on their arguments as the copy basic_string constructors (eg. char must be ascii)
See also
also - FromUTF8, FromSDKString, FromNarrowSDKString, FromStringConstant, FromLatin1, which are all like constructors but with special names to avoid confusion and make clear their arguments, and not participate in overloading. Note, chose this path instead of FLAG argument and explicit on CTOR, cuz more terse.

Definition at line 266 of file String.inl.

◆ String() [2/2]

Stroika::Foundation::Characters::String::String ( const shared_ptr< _IRep > &  rep)
protectednoexcept
Precondition
rep MUST be not-null However, with move constructor, it maybe null on exit.

Definition at line 256 of file String.inl.

Member Function Documentation

◆ FromUTF8()

template<typename CHAR_T >
requires (same_as<remove_cv_t<CHAR_T>, char8_t> or same_as<remove_cv_t<CHAR_T>, char>)
String Stroika::Foundation::Characters::String::FromUTF8 ( span< CHAR_T >  from)
static

Create a String object from a 'char-based' utf-8 encoded string.

Example Usage
EXPECT_TRUE (string{u8"שלום"} == String::FromUTF8 (u8"שלום").AsUTF8 ());
static String FromUTF8(span< CHAR_T > from)
Definition String.inl:420
Note
This is not generally needed, as you can just use the String::CTOR, but for cases like std::string-> String - where the conversion needs extra information (an assertion about character encoding of source characters).
Reading improperly encoded text may result in a RuntimeException indicating improperly encoded characters.

Definition at line 420 of file String.inl.

◆ FromSDKString()

String Stroika::Foundation::Characters::String::FromSDKString ( const SDKChar from)
static

Create a String object from a 'SDKChar' (os-setting - current code page) encoded string. See @SDKChar See @SDKString

Note
Reading improperly encoded text may result in a RuntimeException indicating improperly encoded characters.

Definition at line 447 of file String.inl.

◆ FromNarrowSDKString()

String Stroika::Foundation::Characters::String::FromNarrowSDKString ( const char *  from)
static

Create a String object from a 'char-based' (os-setting - current code page) encoded string.

Note
Reading improperly encoded text may result in a RuntimeException indicating improperly encoded characters.

Definition at line 470 of file String.inl.

◆ FromNarrowString()

String Stroika::Foundation::Characters::String::FromNarrowString ( const char *  from,
const locale &  l 
)
static

Create a String object from a char based on the encoding from the argument locale. This throws an exception if there is an error performing the conversion.

Note
Reading improperly encoded text may result in a RuntimeException indicating improperly encoded characters.

Definition at line 340 of file String.inl.

◆ FromStringConstant()

template<size_t SIZE, IUNICODECanUnambiguouslyConvertFrom CHAR_T>
String Stroika::Foundation::Characters::String::FromStringConstant ( const CHAR_T(&)  cString[SIZE])
static

Take the given argument data (constant span) - which must remain unchanged - constant - for the application lifetime - and treat it as a Stroika String object.

This allows creation of String objects with fewer memory allocations and less copyinh, and more efficient storage, in most situations

The resulting String is a perfectly compliant Stroika String (somewhat akin to std::string_view vs std::string).

Example:
String tmp1 = "FRED";
String tmp2 = String{"FRED"};
String tmp3 = String::FromStringConstant ("FRED"); // same as 2 above, but faster
String tmp4 = "FRED"sv; // equivalent to FromStringConstant
String tmp5 = "FRED"_k; // equivalent to FromStringConstant
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
static String FromStringConstant(const CHAR_T(&cString)[SIZE])
Take the given argument data (constant span) - which must remain unchanged - constant - for the appli...
Definition String.inl:386

WARNING - BE VERY CAREFUL - be sure arguments have application lifetime (intended use case is C string literals).

Precondition
argument string MAY contain embedded nul characters (but for char* overloads wrong size inferred).
Note
In Stroika v2.1 this was called class String_ExternalMemoryOwnership_ApplicationLifetime.
In Stroika v2.1 this was called class String_Constant.
In Stroika v2.1 this required NUL-char termination, but no longer
FromStringConstant with 'char' - REQUIRES that the char elements are ASCII (someday this maybe lifted and interpret as Latin1) For the case of char, we also do not check/require the nul-termination bit.

Definition at line 386 of file String.inl.

◆ FromLatin1()

template<IUNICODECanUnambiguouslyConvertFrom CHAR_T>
String Stroika::Foundation::Characters::String::FromLatin1 ( const CHAR_T *  cString)
static

Create a String object from UNICODE Latin-1 Supplement (https://en.wikipedia.org/wiki/Latin-1_Supplement)

This is roughly, but not exactly, the same as the ISO-Latin-1 single-byte character set (https://en.wikipedia.org/wiki/ISO/IEC_8859-1)

Note
if character code point >= 256, this will throw an exception - not defined for that range (only checked if sizeof (CHAR_T) > 1)
Aliases
From8bitASCII () or FromExtendedASCII ()

Definition at line 355 of file String.inl.

◆ Concatenate()

template<typename T >
requires (is_convertible_v<T, String>)
nonvirtual String Stroika::Foundation::Characters::String::Concatenate ( T &&  rhs) const

appends 'rhs' string to this string (without modifying this string) and returns the combined string

See also
Append() for a similar function that modifies 'this'

◆ size()

size_t Stroika::Foundation::Characters::String::size ( ) const
noexcept

Returns the number of characters in the String. Note that this may not be the same as bytes, does not include NUL termination, and doesn't in any way respect NUL termination (meaning a nul-character is allowed in a Stroika string.

Aliases
GetLength ()

Definition at line 534 of file String.inl.

◆ operator[]()

const Character Stroika::Foundation::Characters::String::operator[] ( size_t  i) const
noexcept

return (read-only) Character object

Aliases
GetCharAt (size_t i) const;
Note
returns const due to https://stroika.atlassian.net/browse/STK-376 - so cannot accidentally have illusion of assignment being legal

Definition at line 735 of file String.inl.

◆ InsertAt()

String Stroika::Foundation::Characters::String::InsertAt ( Character  c,
size_t  at 
) const

InsertAt() constructs a new string by taking this string, and inserting the argument characters.

Note that for repeated insertions, this is much less efficient than just using StringBuilder.

Note
that if at == this->size (), you are appending.

Definition at line 715 of file String.inl.

◆ RemoveAt()

String Stroika::Foundation::Characters::String::RemoveAt ( size_t  charAt) const

Remove the characters at 'charAt' (RemoveAt/1) or between 'from' and 'to' (const method - doesn't modify this)

It is an error if this implies removing characters off the end of the string.

Example Usage
String mungedData = "04 July 2014";
if (optional<pair<size_t, size_t>> i = mungedData.Find (RegularExpression{"0[^\b]"})) {
mungedData = mungedData.RemoveAt (*i);
}
RegularExpression is a compiled regular expression which can be used to match on a String class.
nonvirtual String RemoveAt(size_t charAt) const
Definition String.inl:604
nonvirtual optional< size_t > Find(Character c, CompareOptions co=eWithCase) const
Definition String.inl:681
Precondition
(charAt < size ())
(from <= to)
(to <= size ())

Note that this is quite inefficient: consider using StringBuilder

Definition at line 604 of file String.inl.

◆ RemoveFirstIf()

String String::RemoveFirstIf ( Character  c) const

Remove the first occurrence of Character 'c'/'/subString/ from the string. Not an error if none found. Doesn't modify this (const method) - returns resulting string.

Note that this is quite inefficient: consider using StringBuffer

Definition at line 807 of file String.cpp.

◆ RemoveAll()

String String::RemoveAll ( Character  c) const

Remove the all occurrences of Character 'c/subString' from this string (walking front to back - if removal creates one, it too is removed). Not an error if none found. Doesn't modify this (const method) - returns resulting string.

Definition at line 823 of file String.cpp.

◆ SubString()

template<typename SZ >
nonvirtual String Stroika::Foundation::Characters::String::SubString ( SZ  from) const

OVERLOADS WITH size_t:

Produce a substring of this string, starting at 'from', and up to 'to' (or end of string for one-arg overload).

NB This function treats the second argument differently than String::substr () - which respects the STL basic_string API. This function treats the second argument as a 'to', STL substr() treats it as a count. This amounts to the same thing for the very common cases of substr(N) - because second argument is defaulted, and, substr (0, N) - because then the count and end are the same.

Precondition
(from <= to);
(to <= size ()); // for 2-arg variant
Example Usage
String tmp { "This is good" };
Assert (tmp.SubString (5) == "is good");
Example Usage
const String kTest_ { "a=b"sv };
const String kLbl2LookFor_ { "a="_k };
if (resultLine.Find (kLbl2LookFor_)) {
String tmp { resultLine.SubString (kLbl2LookFor_.length ()) };
}
Assert (tmp == "b");
nonvirtual String SubString(SZ from) const

OVERLOADS WITH ptrdiff_t:

This is like SubString() except that if from/to are negative, they are treated as relative to the end of the String.

So for example, SubString (0, -1) is equivalent to SubString (0, size () - 1) - and so is an error if the string is empty.

Similarly, SubString (-5) is equivalent to SubString (size ()-5, size ()) - so can be used to grab the end of a string.

Precondition
(adjustedFrom <= adjustedTo);
(adjustedTo <= size ()); // for 2-arg variant
Note
Design Note We chose not to overload SubString() with this functionality because it would have been to easy to mask bugs.
Design Note This was originally inspired by Python arrays. From https://docs.python.org/2/tutorial/introduction.html: Indices may also be negative numbers, to start counting from the right
Aliases
This API is identical to the javascript String.slice () method/behavior
@see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
     15.5.4.13 String.prototype.slice (start, end)
Aliases
This API - when called with negative indexes - used to be called CircularSubString ().
See also
substr
SafeSubString

◆ SafeSubString()

template<typename SZ >
nonvirtual String Stroika::Foundation::Characters::String::SafeSubString ( SZ  from) const

Like SubString(), but no requirements on from/to. These are just adjusted to the edge of the string if the exceed those endpoints. And if arguments are <0, they are interpreted as end-relative.

Aliases
This API - when called with negative indexes - used to be called SafeCircularSubString ().
See also
substr
SubString

◆ Repeat()

String String::Repeat ( unsigned int  count) const

Return 'count' copies of this String (concatenated after one another).

Definition at line 1424 of file String.cpp.

◆ Contains()

bool Stroika::Foundation::Characters::String::Contains ( Character  c,
CompareOptions  co = eWithCase 
) const

Returns true if the argument character or string is found anywhere inside this string. This is equivalent to return Matches (".*" + X + ".*"); // If X had no characters which look like they are part of // a regular expression

See also
Match

Definition at line 693 of file String.inl.

◆ StartsWith()

bool String::StartsWith ( const Character c,
CompareOptions  co = eWithCase 
) const

Returns true iff the given substring is contained in this string.

Similar to: return Matches (X + ".*"); except for the fact that with StartsWith() doesn't interpret 'X' as a regular expression

Precondition
not subString.empty () – for the subString overload (because otherwise "".StartsWith("") would be ill-defined)
See also
Match
EndsWith

Definition at line 1059 of file String.cpp.

◆ EndsWith()

bool String::EndsWith ( const Character c,
CompareOptions  co = eWithCase 
) const

Returns true iff the given substring is contained in this string.

Similar to: return Matches (X + ".*"); except for the fact that with StartsWith() doesn't interpret 'X' as a regular expression

Precondition
not subString.empty () – for the subString overload (because otherwise "".EndsWith("") would be ill-defined)
See also
Match
StartsWith

Definition at line 1088 of file String.cpp.

◆ AssureEndsWith()

String String::AssureEndsWith ( const Character c,
CompareOptions  co = eWithCase 
) const

Return *this if it ends with argument character, or append 'c' so that it ends with a 'c'.

Note
this is to specific-purpose to be a very sensible API, but I find it pretty often pretty useful. So what-the-heck.

Definition at line 1123 of file String.cpp.

◆ Matches()

bool String::Matches ( const RegularExpression regEx) const

Apply the given regular expression return true if it matches this string. This only returns true if the expression matches the ENTIRE string - all the way to the end.

See also
FindEach() or
Find - to find a set of things which match.
Example Usage
Assert (String{"abc"}.Matches ("abc"));
Assert (not (String{"abc"}.Matches ("bc")));
Assert (String{"abc"}.Matches (".*bc"));
Assert (not String{"abc"}.Matches ("b.*c"));
nonvirtual bool Matches(const RegularExpression &regEx) const
Definition String.cpp:1133
Example Usage
static const RegularExpression kSonosRE_{"([0-9.:]*)( - .*)"_RegEx};
static const String kTestStr_{"192.168.244.104 - Sonos Play:5"};
optional<String> match1;
optional<String> match2;
EXPECT_TRUE (kTestStr_.Matches (kSonosRE_, &match1, &match2) and match1 == "192.168.244.104" and match2 == " - Sonos Play:5");
EXPECT_EQ (kTestStr_.Matches<1> (kSonosRE_), make_tuple ("192.168.244.104"_k));
EXPECT_EQ (kTestStr_.Matches<2> (kSonosRE_), make_tuple ("192.168.244.104"_k, "Sonos Play:5"_k));
Example Usage
// https://tools.ietf.org/html/rfc3986#appendix-B
static const RegularExpression kParseURLRegExp_{"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"_RegEx};
optional<String> scheme;
optional<String> authority;
optional<String> path;
optional<String> query;
optional<String> fragment;
if (rawURL.Matches (kParseURLRegExp_, nullptr, &scheme, nullptr, &authority, &path, nullptr, &query, nullptr, &fragment)) {
DbgTrace ("***good - scheme={}"_f, scheme);
DbgTrace ("***good - authority={}"_f, authority);
DbgTrace ("***good - path={}"_f, path);
DbgTrace ("***good - query={}"_f, query);
DbgTrace ("***good - fragment={}"_f, fragment);
}
#define DbgTrace
Definition Trace.h:309
Example Usage
// Praat 6.4.23 (October 27 2024)
String processRunnerOutput = "Praat 6.4.23 (October 27 2024)";
String version;
return processRunnerOutput.Matches ("(\\w+)\\s([\\w\\.]+).*"_RegEx, nullptr, &version)? version: "???"sv;
return get<1> (processRunnerOutput.Matches<1> ("\\w+\\s([\\w\\.]+).*"_RegEx).value_or(make_tuple("???"_k))); // Or better

Details on the regular expression language/format can be found at: http://en.wikipedia.org/wiki/C%2B%2B11#Regular_expressions

Note
If any 'sub-match' arguments are passed to Match, they MUST be of type optional<String>* or nullptr. Passing nullptr allows matched parameters to not be returned, but still identified positionally (by index).
See also
Contains
StartsWith
EndsWith
Find
FindEach

Definition at line 1133 of file String.cpp.

◆ Find()

optional< size_t > Stroika::Foundation::Characters::String::Find ( Character  c,
CompareOptions  co = eWithCase 
) const

Find returns the index of the first occurrence of the given Character/substring argument in this string. Find () always returns a valid string index, which is followed by the given substring, or nullopt otherwise.

Find () can optionally be provided a 'startAt' offset to begin the search at.

And the overload taking a RegularExpression - returns BOTH the location where the match is found, as well as the end of the match.

Note - for the special case of Find(empty-string) - the return value is 0 if this string is non-empty, and nullopt if this string was empty.

Aliases
  • could have been called IndexOf ()
Precondition
(startAt <= size ());
Example Usage
const String kTest_{ "a=b" };
const String kLbl2LookFor_ { "a=" };
if (kTest_.Find (kLbl2LookFor_)) {
String tmp { kTest_.SubString (kLbl2LookFor_.length ()) };
}
Assert (tmp == "b");
See also
FindEach ()
FindEachString ()
Tokenize

Definition at line 681 of file String.inl.

◆ FindEach()

Containers::Sequence< pair< size_t, size_t > > String::FindEach ( const RegularExpression regEx) const

This is just like Find, but captures all the matching results in an iterable result. The reason the overload for RegularExpression's returns a list of pair<size_t,size_t> is because the endpoint of the match is ambiguous. For fixed string Find, the end of match is computable from the arguments.

FindEach () can be more handy to use than directly using Find () in scenarios where you want to iterate over each match: e.g.: for (auto i : s.FindEach ("xxx")) {....}

Also, to count matches, you can use: size_t nMatches = FindEach (matchexp).size ();

Note: FindEach handles the special case of an empty match as ignored, so FindEach(empty-str-or-regexp) always returns an empty list. Also - for the String case, it returns distinct matches, so if you search String{"AAAA"}.FindEach ("AA"), you will get 2 answers ({0, 2}).

See also
Find ()
FindEachString ()
Matches ()

Definition at line 966 of file String.cpp.

◆ FindEachMatch()

Containers::Sequence< RegularExpressionMatch > String::FindEachMatch ( const RegularExpression regEx) const
Example Usage
const String kTest_{ "a=b,"sv };
const RegularExpression kRE_{ "a=(.*)[, ]" };
Sequence<String> tmp1{ kTest_.FindEachString (kRE_) };
Assert (tmp1.size () == 1 and tmp1[0] == "a=b,");
Sequence<RegularExpressionMatch> tmp2 { kTest_.FindEachMatch (kRE_) };
Assert (tmp2.size () == 1 and tmp2[0].GetFullMatch () == "a=b," and tmp2[0].GetSubMatches () == Sequence<String>{"b"});
See also
Find ()
FindEachString ()
Matches ()

Definition at line 984 of file String.cpp.

◆ FindEachString()

Containers::Sequence< String > String::FindEachString ( const RegularExpression regEx) const
Example Usage
const String kTest_ { "a=b, c=d"_k };
const RegularExpression kRE_ { "(.)=(.)" };
Assert ((kTest_.FindEachString (kRE_) == vector<String>{"a=b", "c=d"}));
See also
Find ()
FindEachMatch ()
Matches ()

Definition at line 1001 of file String.cpp.

◆ RFind()

optional< size_t > String::RFind ( Character  c) const
noexcept

RFind (substring) returns the index of the last occurrence of the given substring in this string. This function always returns a valid string index, which is followed by the given substring, or optional<size_t> {} otherwise.

Aliases
RIndexOf ()

Definition at line 1011 of file String.cpp.

◆ Replace()

String String::Replace ( size_t  from,
size_t  to,
const String replacement 
) const

Replace the range of this string with the given replacement. Const method: just creates new string as described.

Definition at line 1045 of file String.cpp.

◆ ReplaceAll()

String String::ReplaceAll ( const RegularExpression regEx,
const String with 
) const

Apply the given regular expression, with 'with' and replace each match. This doesn't modify this string, but returns the replacement string.

CHECK - BUT HI HTINK WE DEFINE TO REPLACE ALL? OR MAKE PARAM? See regex_replace () for definition of the regEx language

Require (not string2SearchFor.empty ());

Example Usage
mungedData = mungedData.ReplaceAll (RegularExpression{ "\\b0+" }, ""); // strip all leading zeros
Example Usage
String a = "a b \n\t c";
EXPECT_EQ (a.ReplaceAll (RegularExpression{"\\s+"sv}, " "sv), "a b c");
EXPECT_EQ (a.ReplaceAll ("\\s+"_RegEx, " "sv), "a b c");

Note - it IS legal to have with contain the original search for string, or even to have it 'created' as part of where it gets inserted. The implementation will only replace those that pre-existed.

Note
To perform a regular expression replace-all, which is case insensitive, create the regular expression with CompareOptions::eCaseInsensitive
ReplaceAll could have been called 'SafeString' or 'FilteredString' (was at one point - replaces that functionality)

Definition at line 1155 of file String.cpp.

◆ NormalizeTextToNL()

String String::NormalizeTextToNL ( ) const

Replace any CR or LF or CRLF sequences with plain NL-terminated text.

Definition at line 1201 of file String.cpp.

◆ NormalizeSpace()

String String::NormalizeSpace ( Character  useSpaceCharacter = ' ') const

Replace sequences of whitespace characters (space, tab, newline etc) with a single space (or argument space character).

Note
see Qt 'QString::simplify()'.Idea is Trim () (right and left) - plus replace contiguous substrings with* Character::IsSpace () with a single (given) space character.

Definition at line 1229 of file String.cpp.

◆ Tokenize()

Containers::Sequence< String > String::Tokenize ( ) const

Break this String into constituent parts. This is a simplistic API but at least handy as is.

The caller can specify the token separators by set, by lambda. This defaults to the lambda "isWhitespace".

This is often called 'Split' in other APIs. This is NOT (as is now) a replacement for flex, but just for simple, but common string splitting needs (though if I had a regexp param, it may approach the power of flex).

Note
If this->length () == 0, this method returns a list of length 0;
Its fine for the split character/characters to be missing, in which case this returns a list of length 1
Example Usage
String t { "ABC DEF G" };
Assert (t.Tokenize ().length () == 3);
Assert (t.Tokenize ()[1] == "DEF");
Example Usage
String t { "foo= 7" };
auto tt = t.Tokenize ({ '=' });
Assert (t.length () == 2);
Assert (t[1] == " 7");
Assert (t[1].Trim () == "7");
nonvirtual String Trim(bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
Definition String.cpp:1592
nonvirtual Containers::Sequence< String > Tokenize() const
Definition String.cpp:1234
Example Usage
String t { "foo= 7" };
auto tt = t.Tokenize ({ '=', ' ' });
Assert (t.length () == 2);
Assert (t[1] == "7");
See also
Find

TODO:

Definition at line 1234 of file String.cpp.

◆ AsLines()

Sequence< String > String::AsLines ( ) const

break the String into a series of lines;

Note
could almost be done with Tokenize(), except for the one-sided nl-specific trimming.
removes line-endings (\r
, or
, or \r).

Definition at line 1306 of file String.cpp.

◆ Grep()

Sequence< String > String::Grep ( const String fgrepArg) const

Breaks this string into Lines, with AsLines (), and applies the argument filter (as if with .Map<>) producing a subset of the lines which match.

note this is useful to replace 'shell script' logic where you might run some command and grep through its output for all matching lines.

Example Usage
String firstALineOrEmpty = String{"...e.g. from output of ProcessRunner..."}.Grep ("a:").NthValue (0);
nonvirtual Containers::Sequence< String > Grep(const String &fgrepArg) const
Breaks this string into Lines, with AsLines (), and applies the argument filter (as if with ....
Definition String.cpp:1341

Definition at line 1341 of file String.cpp.

◆ Col()

optional< String > String::Col ( size_t  i) const

Useful to replace 'awk print $3' - replace with Col(2) - zero based.

default separator = = "\\s+"_RegEx;

Example Usage
Assert (String{"ffmpeg version 7.1"}.Col (2) == "7.1");
nonvirtual optional< String > Col(size_t i) const
Useful to replace 'awk print $3' - replace with Col(2) - zero based.
Definition String.cpp:1362

Definition at line 1362 of file String.cpp.

◆ LTrim()

String String::LTrim ( bool(*)(Character shouldBeTrimmed = Character::IsWhitespace) const

String LTrim () scans the characters form the left to right, and applies the given 'shouldBeTrimmed' function (defaults to IsWhitespace). All such characters are removed, and the resulting string is returned. This does not modify the current string its applied to - just returns the trimmed string.

Definition at line 1443 of file String.cpp.

◆ RTrim()

String String::RTrim ( bool(*)(Character shouldBeTrimmed = Character::IsWhitespace) const

String RTrim () scans the characters form the right to left, and applies the given 'shouldBeTrimmed' function (defaults to IsWhitespace). All such characters are removed, and the resulting string is returned. This does not modify the current string its applied to - just returns the trimmed string.

Example Usage
String name = origName.RTrim ([] (Character c) { return c == '\\';}); // Trim a trailing backslash(s), if present
nonvirtual String RTrim(bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
Definition String.cpp:1508

Definition at line 1508 of file String.cpp.

◆ Trim()

String String::Trim ( bool(*)(Character shouldBeTrimmed = Character::IsWhitespace) const

String Trim () is locally equivalent to RTrim (shouldBeTrimmed).LTrim (shouldBeTrimmed).

Definition at line 1592 of file String.cpp.

◆ StripAll()

String String::StripAll ( bool(*)(Character removeCharIf) const

Walk the entire string, and produce a new string consisting of all characters for which the predicate 'removeCharIf' returned false.

Definition at line 1664 of file String.cpp.

◆ Join()

String String::Join ( const Iterable< String > &  list,
const String separator = ", "sv 
)
static

Combine the given array into a single string (typically comma space) separated. If given a list of length n, this adds n-1 separators.

Note
.Net version - https://docs.microsoft.com/en-us/dotnet/api/system.string.join?redirectedfrom=MSDN&view=net-6.0#System_String_Join_System_String_System_String___
Java version - https://docs.oracle.com/javase/8/docs/api/java/lang/String.html#join-java.lang.CharSequence-java.lang.CharSequence...-
Javascript - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/join
- CONSIDER LOSING this as 'Iterable<>::Join' just appears to work better – LGP 2025-01-21

Definition at line 1692 of file String.cpp.

◆ ToLowerCase()

String String::ToLowerCase ( ) const

Return a new string based on this string where each lower case character is replaced by its upper case equivalent. Note that non-lower-case characters (such as punctuation) un unchanged.

Definition at line 1706 of file String.cpp.

◆ ToUpperCase()

String String::ToUpperCase ( ) const

Return a new string based on this string where each lower case character is replaced by its upper case equivalent. Note that non-upper-case characters (such as punctuation) un unchanged.

Definition at line 1744 of file String.cpp.

◆ IsWhitespace()

bool String::IsWhitespace ( ) const

Return true if the string contains zero non-whitespace characters.

Definition at line 1782 of file String.cpp.

◆ LimitLength()

String Stroika::Foundation::Characters::String::LimitLength ( size_t  maxLen,
StringShorteningPreference  keepPref = StringShorteningPreference::ePreferKeepLeft 
) const

return the first maxLen (or fewer if string shorter) characters of this string (adding ellipsis if truncated)

This function is for GUI/display purposes. It returns the given string, trimmed down to at most maxLen characters, and removes whitespace (on 'to trim' side - given by keepLeft flag - if needed to get under maxLen).

Note in the 3-arg overload, the ellipsis string MAY be the empty string.

Definition at line 741 of file String.inl.

◆ CopyTo()

template<IUNICODECanAlwaysConvertTo CHAR_T>
requires (not is_const_v<CHAR_T>)
nonvirtual span< CHAR_T > Stroika::Foundation::Characters::String::CopyTo ( span< CHAR_T >  s) const

CopyTo () copies the contents of this string to the target buffer. CopyTo () does NOT nul-terminate the target buffer

Returns span of CHAR_T objects written - a subspan of the argument span

Precondition
s.size () >= UTFConvert::ComputeTargetBufferSize<CHAR_T> (...this-string-data...);
See also
See also GetData<CHAR_T> (buf) - similar functionality - except caller doesn't need to know size of buffer to allocate

◆ As()

template<typename T >
requires (IBasicUNICODEStdString<T> or same_as<T, String> or constructible_from<T, wstring>)
nonvirtual T Stroika::Foundation::Characters::String::As ( ) const

Convert String losslessly into a standard C++ type.

Only specifically specialized variants supported: IBasicUNICODEStdString<T> or same_as<T,String> o wstring o u8string o u16string o u32string o String (return *this; handy sometimes in templated usage; harmless) as well as: o filesystem::path (or anything with .wstring() -> wstring method) - note see qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin

DEPRECATED AS OF v3.0d1 because As is const method - could do non-const As<> overload for these, but that would be confusing o const wchar_t* o const Character*

Note
o As<u8string> () equivalent to AsUTF8 () call o As<u16string> () equivalent to AsUTF16 () call o As<u32string> () equivalent to AsUTF32 () call
We tried to also have template<typename T> explicit operator T () const; - conversion operator - but We got too frequent confusion in complex combinations of templates, like with: Set<String> x ( *optional<String> {String{}) ); // fails cuz calls operator Set<String> ()! Set<String> x { *optional<String> {String{}) }; // works as expected

◆ AsNarrowString()

string String::AsNarrowString ( const locale &  l) const

Create a narrow string object from this, based on the encoding from the argument locale. This throws an exception if there is an error performing the conversion, and the 'into' overload leaves 'into' in an undefined (but safe) state.

Definition at line 1838 of file String.cpp.

◆ AsUTF8()

template<typename T = u8string>
requires (same_as<T, string> or same_as<T, u8string>)
nonvirtual T Stroika::Foundation::Characters::String::AsUTF8 ( ) const

Convert String losslessly into a standard C++ type. Only specifically specialized variants are supported.

SUPPORTED result type "T": values are: string u8string

◆ AsUTF16()

template<typename T = u16string>
requires (same_as<T, u16string> or (sizeof (wchar_t) == sizeof (char16_t) and same_as<T, wstring>))
nonvirtual T Stroika::Foundation::Characters::String::AsUTF16 ( ) const

Convert String losslessly into a standard C++ type u16string.

Example Usage:
String s = u"hi mom";
u16string su = AsUTF16 ();
Note
- the resulting string may have a different length than this->size() due to surrogates

◆ AsUTF32()

template<typename T = u32string>
requires (same_as<T, u32string> or (sizeof (wchar_t) == sizeof (char32_t) and same_as<T, wstring>))
nonvirtual T Stroika::Foundation::Characters::String::AsUTF32 ( ) const

Convert String losslessly into a standard C++ type u32string.

Example Usage:
String s = u"hi mom";
u32string su = AsUTF32 ();
Note
- As of Stroika 2.1d23 - the resulting string may have a different length than this->size() due to surrogates, but eventually the intent is to fix Stroika's string class so this is not true, and it returns the length of the string in size () with surrogates removed (in other words uses ucs32 representation). But not there yet.

◆ AsSDKString()

SDKString Stroika::Foundation::Characters::String::AsSDKString ( ) const

See docs on SDKChar for meaning (character set).

Note - many UNICODE Strings cannot be represented in the SDKString character set (especially if narrow - depends a lot). But in that case, AsNarrowSDKString () will throw, unless AllowMissingCharacterErrorsFlag is specified.

Definition at line 802 of file String.inl.

◆ AsNarrowSDKString()

string Stroika::Foundation::Characters::String::AsNarrowSDKString ( ) const

See docs on SDKChar for meaning (character set). If SDKChar is a wide character, there is probably still a default 'code page' to interpret narrow characters (Windows CP_ACP). This is a string in that character set.

Note - many UNICODE Strings cannot be represented in the SDKString character set (especially if narrow - depends a lot). But in that case, AsNarrowSDKString () will throw, unless AllowMissingCharacterErrorsFlag is specified.

Definition at line 830 of file String.inl.

◆ AsASCII()

template<typename T = string>
requires requires (T* into) { { into->empty () } -> same_as<bool>; { into->push_back (ASCII{0}) }; }
nonvirtual T Stroika::Foundation::Characters::String::AsASCII ( ) const

Convert String losslessly into a standard C++ type. Only specifically specialized variants are supported (right now just <string> supported). The source string MUST be valid ascii characters - throw RuntimeErrorException<>

Example Usage:
string a1 = String{"hi mom"}.AsASCII (); // OK
string a2 = String{u"שלום"}.AsASCII (); // throws
Note
- this is a (compatible) change of behavior: before Stroika v2.1d23, this would assert out on invalid ASCII.

Supported Types: o Memory::StackBuffer<char> o string o u8string (note any ASCII string is also legit utf-8)

◆ AsASCIIQuietly()

template<typename T = string>
requires requires (T* into) { { into->empty () } -> same_as<bool>; { into->push_back (ASCII{0}) }; }
nonvirtual optional< T > Stroika::Foundation::Characters::String::AsASCIIQuietly ( ) const

Convert String losslessly into a standard C++ type. Only specifically specialized variants are supported (right now just <string> supported). If this source contains any invalid ASCII characters, this returns nullopt, and else a valid engaged string.

Supported Types(T): o Memory::StackBuffer<char> o string o u8string (note any valid ASCII string is also valid utf-8)

◆ GetPeekSpanData()

template<IUNICODECanUnambiguouslyConvertFrom CHAR_TYPE = ASCII>
nonvirtual PeekSpanData Stroika::Foundation::Characters::String::GetPeekSpanData ( ) const

return the constant character data inside the string in the form of a case variant union of different span types (at least one will be there) templated type arg just used to pick a preferred type.

Note
CHAR_TYPE == char implies eAscii
Reason for the two step API - getting the PeekSpanData, and then using - is because getting the data is most expensive part (virtual function), and the packaged PeekSpanData gives enuf info to do the next steps (quickly inline usually)

This API is public, but best to avoid depending on internals of String API - like PeekSpanData - since this reasonably likely to change in future versions.

◆ PeekData()

template<IUNICODECanUnambiguouslyConvertFrom CHAR_TYPE>
optional< span< const CHAR_TYPE > > Stroika::Foundation::Characters::String::PeekData ( const PeekSpanData pds)
static

return the constant character data inside the string in the form of a span or nullopt if not available for that CHAR_TYPE

This API is public, but best to avoid depending on internals of String API - like PeekSpanData - since this reasonably likely to change in future versions.

Note
It is generally true that the data IsASCII (span) IFF Peek<ASCII> returns non-nullopt. But this is not ACTUALLY always true. Generally, Stroika constructs strings like this. But callers may manually construct a String with backend rep u32string, for example (e.g because of move construct) - and that might just happen to be all ascii. You can count on that IF you get back value from PeekData<ASCII> - it must be all ASCII. But the contrapositive is not always true.

Definition at line 906 of file String.inl.

◆ GetData()

template<IUNICODECanAlwaysConvertTo CHAR_TYPE, size_t STACK_BUFFER_SZ>
span< const CHAR_TYPE > Stroika::Foundation::Characters::String::GetData ( const PeekSpanData pds,
Memory::StackBuffer< CHAR_TYPE, STACK_BUFFER_SZ > *  possiblyUsedBuffer 
)
static

return the constant character data inside the string (rep) in the form of a span, possibly quickly and directly, and possibly copied into possiblyUsedBuffer

This API will typically return a span of data which is internal pointers into the data of the rep (and so its invalidated on the next change to the string).

BUT - it maybe a span of data stored into the argument possiblyUsedBuffer (which is why it must be provided - cannot be nullptr). If you want the freedom to not pass in this buffer, see the PeekData API.

Note
- CHAR_T must satisfy the concept IUNICODECanAlwaysConvertTo - SAFELY - because the string MIGHT contain characters not in any unsafe char class (like ASCII or Latin1), and so there might not be a way to do the conversion. Use PeekData () to do that - where it can return nullopt if no conversion possible.
Example Usage
span<const char8_t> thisData = GetData (&maybeIgnoreBuf1);
static span< const CHAR_TYPE > GetData(const PeekSpanData &pds, Memory::StackBuffer< CHAR_TYPE, STACK_BUFFER_SZ > *possiblyUsedBuffer)
return the constant character data inside the string (rep) in the form of a span, possibly quickly an...
Definition String.inl:961
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
Note
Prior to Stroika v3.0d1, GetData() took no arguments, and returned pair<const CHAR_TYPE*, const CHAR_TYPE*> which is pretty similar, but not quite the same. To adapt any existing code calling that older version of the API just add a Memory::StackBuffer<T> b; and pass &b to GetData(); And the return span is not the same as pair<> but easily convertible.

Definition at line 961 of file String.inl.

◆ operator==()

bool Stroika::Foundation::Characters::String::operator== ( const String rhs) const

Return true if case sensitive compare of the two IConvertibleToString objects have the same characters. Indirects to EqualsComparer{eWithCase} (...)

Note
For reasons I don't understand, the plain function declaration of operator== and requires not do appear to be required on all major compilers due to quirks of the operator= rewrite rules, but I don't fully understand why –LGP 2024-11-18.

Definition at line 1109 of file String.inl.

◆ operator<=>()

strong_ordering Stroika::Foundation::Characters::String::operator<=> ( const String rhs) const

Return strong_ordering of case sensitive (three-way) compare of the two IConvertibleToString objects. Indirects to ThreeWayComparer{eWithCase} (...)

See also
operator== ()

Definition at line 1099 of file String.inl.

◆ length()

size_t Stroika::Foundation::Characters::String::length ( ) const
noexcept
Aliases
size

Definition at line 1045 of file String.inl.

◆ c_str()

tuple< const wchar_t *, wstring_view > Stroika::Foundation::Characters::String::c_str ( Memory::StackBuffer< wchar_t > *  possibleBackingStore) const
Note
BREAKING change between Stroika 2.1 and v3 - const c_str/0 no longer guaranteed to return non-null
  Mitigating this, the non-const c_str() still will return non-null, and the const overload taking
  StackBuffer<wchar_t> will also guarantee returning non-null.

  In the case of the overloads taking no arguments, the lifetime of the returned pointer is until the
  next change to this string.  In the case of the StackBuffer overload, the guarantee extends for the lifetime
  of the argument buffer (typically just the next few lines of code).
This will always return a value which is NUL-terminated.

Note also - the c_str () function CAN now be somewhat EXPENSIVE, causing a mutation of the String object, so use one of the const overloads where possible (or where performance matters).

Note
Why does c_str (StackBuffer) return a tuple? Sometimes you just want a plain const wchar_t* you can use with an old C pointer based API. But that fails/asserts out if you happen to have an empty string and try to get the pointer. Sometimes - you just need the pointer!

And why the string-view part? Because sometimes you want the LENGTH. Sure - you can just compute it again. But that is costly. Sure you can just use the original string length. BUT THAT WOULD BE A BUG once I support surrogates properly (at least on windows where wchar_t isn't char32_t).

Definition at line 1049 of file String.inl.

◆ find()

size_t Stroika::Foundation::Characters::String::find ( Character  c,
size_t  startAt = 0 
) const

Follow the basic_string<>::find () API

need more overloads.

Returns String::npos if not found, else the zero based index.

Definition at line 1061 of file String.inl.

◆ rfind()

size_t Stroika::Foundation::Characters::String::rfind ( Character  c) const

Follow the basic_string<>::rfind () API

need more overloads.

Returns String::npos if not found, else the zero based index.

Definition at line 1069 of file String.inl.

◆ front()

Character Stroika::Foundation::Characters::String::front ( ) const

mimic https://en.cppreference.com/w/cpp/string/basic_string/front

Precondition
not empty ()

Definition at line 1080 of file String.inl.

◆ back()

Character Stroika::Foundation::Characters::String::back ( ) const

mimic https://en.cppreference.com/w/cpp/string/basic_string/back

Precondition
not empty ()

Definition at line 1073 of file String.inl.

◆ substr()

String Stroika::Foundation::Characters::String::substr ( size_t  from,
size_t  count = npos 
) const

Compatable with STL::basic_string::subtr() - which interprets second argument as count. Not the same as Stroika::String::SubString (where the second argument is a 'to')

See also
SubString

From http://en.cppreference.com/w/cpp/string/basic_string/substr Returns a substring [pos, pos+count). If the requested substring extends past the end of the string, or if count == npos, the returned substring is [pos, size()). std::out_of_range if pos > size()

Definition at line 1086 of file String.inl.

◆ mk_nocheck_()

template<typename CHAR_T >
requires (same_as<CHAR_T, ASCII> or same_as<CHAR_T, Latin1> or same_as<CHAR_T, char16_t> or same_as<CHAR_T, char32_t>)
auto Stroika::Foundation::Characters::String::mk_nocheck_ ( span< const CHAR_T >  s) -> shared_ptr<_IRep>

We want to TARGET using block-allocator of 64 bytes. This works well for typical (x86) machine caches, and divides up nicely, and leaves enuf room for a decent number of characters typically.

So compute/guestimate a few sizes, and add static_asserts to check where we can. Often if these fail you can just get rid/or fix them. Not truly counted on, just trying ot generate vaguely reasonable number of characters to use.

Definition at line 617 of file String.cpp.

Member Data Documentation

◆ npos

constexpr size_t Stroika::Foundation::Characters::String::npos = static_cast<size_t> (-1)
staticconstexpr
Aliases
basic_string>char>npos

This is only used for 'STL-compatibility APIs, like substr (), find, rfind (), etc.

Definition at line 1390 of file String.h.


The documentation for this class was generated from the following files: