4#ifndef _Stroika_Foundation_Characters_String_h_
5#define _Stroika_Foundation_Characters_String_h_ 1
7#include "Stroika/Foundation/StroikaPreComp.h"
16#include "Stroika/Foundation/Characters/SDKString.h"
19#include "Stroika/Foundation/Containers/Sequence.h"
20#include "Stroika/Foundation/Containers/Set.h"
79#ifndef qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin
80#define qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin qStroika_Foundation_Common_Platform_Windows
85#if qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin
86namespace std::filesystem {
93 class RegularExpression;
94 class RegularExpressionMatch;
104 eDEFAULT = ePreferKeepLeft,
108 using StringShorteningPreference::ePreferKeepLeft;
109 using StringShorteningPreference::ePreferKeepMid;
110 using StringShorteningPreference::ePreferKeepRight;
115 template <
typename T>
116 concept IBasicUNICODEStdString = same_as<T, u8string> or same_as<T, u16string> or same_as<T, u32string> or same_as<T, wstring>;
123 template <
typename T>
125 { t.wstring () } -> same_as<wstring>;
126 } or
requires (T t) {
127 { t.u8string () } -> same_as<u8string>;
128 } or
requires (T t) {
129 { t.u16string () } -> same_as<u16string>;
130 } or
requires (T t) {
131 { t.u32string () } -> same_as<u32string>;
141 template <
typename T>
203 using inherited = Iterable<Character>;
242 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
244 template <Memory::ISpan SPAN_OF_CHAR_T>
247 template <IStdBasicStringCompatibleCharacter CHAR_T>
249 template <IStdBasicStringCompatibleCharacter CHAR_T>
251 template <IStdBasicStringCompatibleCharacter CHAR_T>
253 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
256 template <IStdPathLike2UNICODEString PATHLIKE_TOSTRINGABLE>
262 template <IStdPathLike2UNICODEString PATHLIKE_TOSTRINGABLE>
267 static shared_ptr<_IRep> CTORFromBasicStringView_ (
const basic_string_view<char8_t>& str);
268 static shared_ptr<_IRep> CTORFromBasicStringView_ (
const basic_string_view<char16_t>& str);
269 static shared_ptr<_IRep> CTORFromBasicStringView_ (
const basic_string_view<char32_t>& str);
270 static shared_ptr<_IRep> CTORFromBasicStringView_ (
const basic_string_view<wchar_t>& str);
278 using _SafeReadRepAccessor = Iterable<Character>::_SafeReadRepAccessor<
_IRep>;
285 String (
const shared_ptr<_IRep>& rep)
noexcept;
286 String (shared_ptr<_IRep>&& rep)
noexcept;
306 template <
typename CHAR_T>
309 template <
typename CHAR_T>
312 template <
typename CHAR_T>
383 template <
size_t SIZE, IUNICODECanUnambiguouslyConvertFrom CHAR_T>
385 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
387 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
406 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
408 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
410 template <IStdBasicStringCompatibleCharacter CHAR_T>
419 template <
typename T>
434 nonvirtual
size_t size ()
const noexcept;
439 nonvirtual
bool empty ()
const noexcept;
1107 template <IUNICODECanAlwaysConvertTo CHAR_T>
1138 template <
typename T>
1258 template <
typename T =
string>
1297 span<const ASCII> fAscii;
1299 span<const char16_t> fChar16;
1300 span<const char32_t> fChar32;
1318 template <IUNICODECanUnambiguouslyConvertFrom CHAR_TYPE = ASCII>
1334 template <IUNICODECanUnambiguouslyConvertFrom CHAR_TYPE>
1336 template <IUNICODECanUnambiguouslyConvertFrom CHAR_TYPE>
1364 template <IUNICODECanAlwaysConvertTo CHAR_TYPE,
size_t STACK_BUFFER_SZ>
1366 template <IUNICODECanAlwaysConvertTo CHAR_TYPE,
size_t STACK_BUFFER_SZ>
1376 struct ThreeWayComparer;
1387 template <IConvertibleToString T>
1399 template <IConvertibleToString T>
1409 static constexpr size_t npos =
static_cast<size_t> (-1);
1498 "().c_str ()")]]
const wchar_t*
1520 template <typename T>
1522 requires (IBasicUNICODEStdString<T> or same_as<T, String>)
1524 *into = this->As<T> ();
1526 [[deprecated ("Since Stroika
v3.0
d2,
just use 1
arg version)
")]] void AsNarrowString (const locale& l, string* into) const
1528 *into = this->AsNarrowString (l);
1530 template <typename T = u8string>
1531 [[deprecated ("Since Stroika
v3.0
d2 -
use AsUTF8/0
")]] void AsUTF8 (T* into) const
1532 requires (same_as<T, string> or same_as<T, u8string>)
1534 *into = this->AsUTF8 ();
1536 template <typename T = u16string>
1537 [[deprecated ("Since Stroika
v3.0
d2 -
use AsUTF16/0
")]] void AsUTF16 (T* into) const
1538 requires (same_as<T, u16string> or (sizeof (wchar_t) == sizeof (char16_t) and same_as<T, wstring>))
1542 template <typename T = u32string>
1543 [[deprecated ("Since Stroika
v3.0
d2 -
use AsUTF32/0
")]] void AsUTF32 (T* into) const
1544 requires (same_as<T, u32string> or (sizeof (wchar_t) == sizeof (char32_t) and same_as<T, wstring>))
1548 [[deprecated ("Since Stroika
v3.0
d2 -
just use /0
")]] void AsSDKString (SDKString* into) const
1550 *into = AsSDKString ();
1552 [[deprecated ("Since Stroika
v3.0
d2 -
just use /0
")]] void AsNarrowSDKString (string* into) const
1554 *into = SDK2Narrow (AsSDKString ());
1556 template <typename T = string>
1557 [[deprecated ("Since
v3.0
d2 use /0
")]] void AsASCII (T* into) const
1558 requires (same_as<T, string> or same_as<T, Memory::StackBuffer<char>>)
1560 if (not AsASCIIQuietly (into)) {
1561 ThrowInvalidAsciiException_ ();
1564 template <typename T = string>
1565 [[deprecated ("Since
v3.0
d2 use /0
overload")]] bool AsASCIIQuietly (T* into) const
1566 requires (same_as<T, string> or same_as<T, Memory::StackBuffer<char>>)
1568 auto r = this->AsASCIIQuietly ();
1579 return LimitLength (maxLen, keepLeft ? StringShorteningPreference::ePreferKeepLeft : StringShorteningPreference::ePreferKeepRight);
1582 const String& ellipsis) const
1584 return LimitLength (maxLen, keepLeft ? StringShorteningPreference::ePreferKeepLeft : StringShorteningPreference::ePreferKeepRight, ellipsis);
1586 template <typename CHAR_T>
1587 [[deprecated ("Since Stroika
v3.0
d1,
String{}
")]] static String FromASCII (span<const CHAR_T> s)
1591 template <typename CHAR_T>
1592 [[deprecated ("Since Stroika
v3.0
d1,
String{}
")]] static String FromASCII (const CHAR_T* cString)
1594 return String{cString};
1596 template <IStdBasicStringCompatibleCharacter CHAR_T>
1597 [[deprecated ("Since Stroika
v3.0
d1,
String{}
")]] static String FromASCII (const basic_string<CHAR_T>& str)
1601 [[deprecated ("Since Stroika
v3.0
d1,
use span{}
overload for this")]] static String FromASCII (const char* from, const char* to)
1603 return String{span{from, to}};
1605 [[deprecated ("Since Stroika
v3.0
d1,
use span{}
overload for this")]] static String FromASCII (const wchar_t* from, const wchar_t* to)
1607 return String{span{from, to}};
1609 [[deprecated ("Since Stroika
v3.0
d1,
use span
overloads")]] String InsertAt (const wchar_t* from, const wchar_t* to, size_t at) const
1611 Memory::StackBuffer<Character> buf{Memory::eUninitialized, UTFConvert::ComputeTargetBufferSize<Character> (span{from, to})};
1612 return InsertAt (UTFConvert::kThe.ConvertSpan (span{from, to}, span{buf}), at);
1614 [[deprecated ("Since Stroika
v3.0
d1,
use span
overloads")]] String InsertAt (const Character* from, const Character* to, size_t at) const
1616 return InsertAt (span{from, to}, at);
1618 [[deprecated ("Since Stroika
v3.0
d1,
use span{}
overload for this")]] static String FromLatin1 (const char* start, const char* end)
1620 return FromLatin1 (span{start, end});
1622 [[deprecated ("Since Stroika
v3.0
d1,
use span{}
constructor for this")]] static String FromNarrowString (const char* from,
1623 const char* to, const locale& l)
1625 return FromNarrowString (span{from, to}, l);
1627 [[deprecated ("Since Stroika
v3.0
d1,
use span{}
constructor for this")]] static String FromNarrowSDKString (const char* from, const char* to)
1629 return FromNarrowSDKString (span{from, to});
1631 template <IUNICODECanAlwaysConvertTo CHAR_T>
1632 [[deprecated ("Since Stroika
v3.0
d1,
use span{}
constructor for this")]] String (const CHAR_T* from, const CHAR_T* to)
1633 : String{span<const CHAR_T>{from, to}}
1638 c_str ()
const noexcept;
1639 [[
deprecated ("Since Stroika
v3.0 -
use span{}
overloads")]] inline static String FromSDKString (const SDKChar* from, const SDKChar* to)
1641 return FromSDKString (span{from, to});
1643 [[deprecated ("Since Stroika
v3.0 -
use span{}
overloads")]] static String FromUTF8 (const char* from, const char* to)
1645 return FromUTF8 (span{from, to});
1647 [[deprecated ("Since Stroika
v3.0 -
use span{}
overloads")]] static String FromUTF8 (const char8_t* from, const char8_t* to)
1649 return FromUTF8 (span{from, to});
1651 template <typename T = string>
1652 [[deprecated ("Since Stroika
v3.0
d1 -
use Character::AsAsciiQuietly
")]] static bool AsASCIIQuietly (const wchar_t* fromStart,
1653 const wchar_t* fromEnd, T* into)
1655 return Character::AsASCIIQuietly (span<const wchar_t>{fromStart, fromEnd}, into);
1660 [[
deprecated (
"Since Stroika v3.0d8 - use RemoveFirstIf")]]
String Remove (Character c)
const
1670 static shared_ptr<_IRep> mkEmpty_ ();
1682 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
1684 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
1686 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
1688 template <IStdBasicStringCompatibleCharacter CHAR_T>
1698 template <
typename CHAR_T>
1703 template <
unsigned_
integral T>
1705 template <
signed_
integral T>
1712 nonvirtual
void _AssertRepValidType ()
const;
1715 [[
noreturn]]
static void ThrowInvalidAsciiException_ ();
1717 static_assert (totally_ordered<String>);
1719#if qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin
1733 wostream&
operator<< (wostream& out,
const String& s);
1734 ostream&
operator<< (ostream& out,
const String& s);
1736#if qStroika_HasComponent_googletest
1738 void PrintTo (
const String& s, std::ostream* os);
1775 virtual const wchar_t* c_str_peek ()
const noexcept = 0;
1794 template <
typename T>
1795 concept ICanBeTreatedAsSpanOfCharacter_ =
1813 template <ICanBeTreatedAsSpanOfCharacter_ USTRING,
size_t STACK_BUFFER_SZ>
1839 template <IConvertibleToString LT, IConvertibleToString RT>
1840 nonvirtual
bool operator() (LT&& lhs, RT&& rhs)
const;
1842 CompareOptions fCompareOptions;
1845 template <Private_::ICanBeTreatedAsSpanOfCharacter_ LT, Private_::ICanBeTreatedAsSpanOfCharacter_ RT>
1846 bool Cmp_ (LT&& lhs, RT&& rhs)
const;
1847 template <Private_::ICanBeTreatedAsSpanOfCharacter_ LT, Private_::ICanBeTreatedAsSpanOfCharacter_ RT>
1848 bool Cmp_Generic_ (LT&& lhs, RT&& rhs)
const;
1857 constexpr ThreeWayComparer (CompareOptions co = eWithCase);
1862 template <IConvertibleToString LT, IConvertibleToString RT>
1863 nonvirtual strong_ordering operator() (LT&& lhs, RT&& rhs)
const;
1865 CompareOptions fCompareOptions;
1868 template <Private_::ICanBeTreatedAsSpanOfCharacter_ LT, Private_::ICanBeTreatedAsSpanOfCharacter_ RT>
1869 strong_ordering Cmp_ (LT&& lhs, RT&& rhs)
const;
1870 template <Private_::ICanBeTreatedAsSpanOfCharacter_ LT, Private_::ICanBeTreatedAsSpanOfCharacter_ RT>
1871 strong_ordering Cmp_Generic_ (LT&& lhs, RT&& rhs)
const;
1878 constexpr LessComparer (CompareOptions co = eWithCase);
1880 template <
typename T1,
typename T2>
1881 nonvirtual
bool operator() (T1 lhs, T2 rhs)
const;
1884 ThreeWayComparer fComparer_;
1888 inline namespace Literals {
1905 String operator""_k (
const wchar_t* s,
size_t len);
1906 String operator""_k (
const char8_t* s,
size_t len);
1907 String operator""_k (
const char16_t* s,
size_t len);
1908 String operator""_k (
const char32_t* s,
size_t len);
1920 template <IConvertibleToString LHS_T, IConvertibleToString RHS_T>
1932 template <
typename STRING = String>
1934 STRING fSeparator{
", "sv};
1935 optional<STRING> fSpecialSeparatorForLastPair;
1936 STRING operator() (
const STRING& lhs,
const STRING& rhs,
bool isLast)
const;
1947namespace Stroika::Foundation::Traversal {
1960namespace Stroika::Foundation::Memory {
1965 template <
typename T>
1979struct qStroika_Foundation_Characters_FMT_PREFIX_::formatter<Stroika::Foundation::Characters::String, wchar_t> {
1980 qStroika_Foundation_Characters_FMT_PREFIX_::formatter<std::wstring, wchar_t> fDelegate2_;
1982 template <
typename ParseContext>
1983 constexpr typename ParseContext::iterator parse (ParseContext& ctx)
1985 return fDelegate2_.parse (ctx);
1988 template <
typename FmtContext>
1991 return fDelegate2_.format (s.
As<std::wstring> (), ctx);
1995struct qStroika_Foundation_Characters_FMT_PREFIX_::formatter<Stroika::Foundation::Characters::String, char> {
1996 bool ignoreerrors{
true};
1998 template <
typename ParseContext>
1999 constexpr typename ParseContext::iterator parse (ParseContext& ctx)
2001 auto it = ctx.begin ();
2002 while (it != ctx.end ()) {
2005 if (it == ctx.end()) {
2006 throw Common::StdCompat::format_error{
"Invalid format args (missing }) for formatter<String,char>."};
2016 template <
typename FmtContext>
2023#if __cpp_lib_ranges >= 202207L
2024 return std::ranges::copy (dr.AsNarrowSDKString (eIgnoreErrors), ctx.out ()).out;
2026 return format_to (ctx.out (),
"{}", dr.AsNarrowSDKString (eIgnoreErrors));
2030#if __cpp_lib_ranges >= 202207L
2031 return std::ranges::copy (dr.AsNarrowSDKString (), ctx.out ()).out;
2033 return format_to (ctx.out (),
"{}", dr.AsNarrowSDKString ());
2044#include "String.inl"
#define Stroika_Define_Enum_Bounds(FIRST_ITEM, LAST_ITEM)
constexpr bool IsWhitespace() const noexcept
RegularExpression is a compiled regular expression which can be used to match on a String class.
virtual Character GetAt(size_t index) const noexcept=0
virtual PeekSpanData PeekData(optional< PeekSpanData::StorageCodePointType > preferred) const noexcept=0
Similar to String, but intended to more efficiently construct a String. Mutable type (String is large...
String is like std::u32string, except it is much easier to use, often much more space efficient,...
nonvirtual bool Contains(Character c, CompareOptions co=eWithCase) const
nonvirtual T AsUTF8() const
nonvirtual size_t length() const noexcept
nonvirtual String ToUpperCase() const
static String FromNarrowString(const char *from, const locale &l)
nonvirtual bool Matches(const RegularExpression ®Ex) const
nonvirtual bool IsWhitespace() const
nonvirtual String NormalizeTextToNL() const
static String Join(const Iterable< String > &list, const String &separator=", "sv)
static String FromStringConstant(const CHAR_T(&cString)[SIZE])
Take the given argument data (constant span) - which must remain unchanged - constant - for the appli...
nonvirtual String ColValue(size_t i, const String &valueIfMissing={}) const
see Col(i) - but with default value of empty string
nonvirtual String NormalizeSpace(Character useSpaceCharacter=' ') const
Replace sequences of whitespace characters (space, tab, newline etc) with a single space (or argument...
nonvirtual Containers::Sequence< pair< size_t, size_t > > FindEach(const RegularExpression ®Ex) const
nonvirtual tuple< const wchar_t *, wstring_view > c_str(Memory::StackBuffer< wchar_t > *possibleBackingStore) const
nonvirtual bool operator==(const String &rhs) const
nonvirtual String Repeat(unsigned int count) const
static String FromSDKString(const SDKChar *from)
nonvirtual String LimitLength(size_t maxLen, StringShorteningPreference keepPref=StringShorteningPreference::ePreferKeepLeft) const
return the first maxLen (or fewer if string shorter) characters of this string (adding ellipsis if tr...
nonvirtual String RemoveAll(Character c) const
nonvirtual Containers::Sequence< RegularExpressionMatch > FindEachMatch(const RegularExpression ®Ex) const
nonvirtual String RemoveFirstIf(Character c) const
nonvirtual T AsUTF32() const
nonvirtual string AsNarrowSDKString() const
nonvirtual optional< String > Col(size_t i) const
Useful to replace 'awk print $3' - replace with Col(2) - zero based.
nonvirtual String InsertAt(Character c, size_t at) const
nonvirtual size_t rfind(Character c) const
static String FromNarrowSDKString(const char *from)
nonvirtual string AsNarrowString(const locale &l) const
nonvirtual String Concatenate(T &&rhs) const
appends 'rhs' string to this string (without modifying this string) and returns the combined string
nonvirtual SDKString AsSDKString() const
nonvirtual size_t size() const noexcept
nonvirtual bool EndsWith(const Character &c, CompareOptions co=eWithCase) const
nonvirtual String ToLowerCase() const
nonvirtual String ReplaceAll(const RegularExpression ®Ex, const String &with) const
static constexpr size_t npos
nonvirtual String Replace(size_t from, size_t to, const String &replacement) const
nonvirtual String SubString(SZ from) const
nonvirtual String Trim(bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
nonvirtual strong_ordering operator<=>(const String &rhs) const
nonvirtual Character back() const
nonvirtual bool StartsWith(const Character &c, CompareOptions co=eWithCase) const
nonvirtual String StripAll(bool(*removeCharIf)(Character)) const
nonvirtual T AsASCII() const
nonvirtual String AssureEndsWith(const Character &c, CompareOptions co=eWithCase) const
Return *this if it ends with argument character, or append 'c' so that it ends with a 'c'.
nonvirtual span< CHAR_T > CopyTo(span< CHAR_T > s) const
nonvirtual T AsUTF16() const
nonvirtual PeekSpanData GetPeekSpanData() const
return the constant character data inside the string in the form of a case variant union of different...
nonvirtual String SafeSubString(SZ from) const
nonvirtual Containers::Sequence< String > AsLines() const
break the String into a series of lines;
nonvirtual String LTrim(bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
nonvirtual Character front() const
nonvirtual Containers::Sequence< String > Grep(const String &fgrepArg) const
Breaks this string into Lines, with AsLines (), and applies the argument filter (as if with ....
nonvirtual Containers::Sequence< String > FindEachString(const RegularExpression ®Ex) const
nonvirtual String Skip(size_t n) const
Return a substring of this string, starting at 'argument' n. If n > size(), return empty string.
nonvirtual optional< size_t > RFind(Character c) const noexcept
static span< const CHAR_TYPE > GetData(const PeekSpanData &pds, Memory::StackBuffer< CHAR_TYPE, STACK_BUFFER_SZ > *possiblyUsedBuffer)
return the constant character data inside the string (rep) in the form of a span, possibly quickly an...
nonvirtual Containers::Sequence< String > Tokenize() const
nonvirtual String RemoveAt(size_t charAt) const
nonvirtual optional< T > AsASCIIQuietly() const
static String FromLatin1(const CHAR_T *cString)
static optional< span< const CHAR_TYPE > > PeekData(const PeekSpanData &pds)
return the constant character data inside the string in the form of a span or nullopt if not availabl...
static String FromUTF8(span< CHAR_T > from)
nonvirtual String RTrim(bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
nonvirtual optional< size_t > Find(Character c, CompareOptions co=eWithCase) const
nonvirtual String substr(size_t from, size_t count=npos) const
nonvirtual size_t find(Character c, size_t startAt=0) const
Set<T> is a container of T, where once an item is added, additionally adds () do nothing.
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
Iterable<T> is a base class for containers which easily produce an Iterator<T> to traverse them.
Iterable(const Iterable &) noexcept=default
Iterable are safely copyable (by value). Since Iterable uses COW, this just copies the underlying poi...
returns true iff T == u8string, u16string, u32string, or wstring - which std::string types can be una...
anything with a 'special .STRINGTYPE conversion' method to UNICODE string, such as filesystem::path
IUNICODECanUnambiguouslyConvertFrom is any 'character representation type' where array of them unambi...
char ASCII
Stroika's string/character classes treat 'char' as being an ASCII character.
conditional_t< qTargetPlatformSDKUseswchar_t, wchar_t, char > SDKChar
StringShorteningPreference
basic_string< SDKChar > SDKString
String operator+(LHS_T &&lhs, RHS_T &&rhs)
const function< String(String, String, bool)> kDefaultStringCombiner
AllowMissingCharacterErrorsFlag
wostream & operator<<(wostream &out, const String &s)
very similar to ThreeWayComparer but returns true if less
Summary data for raw contents of rep - each rep will support at least one of these span forms.
StringCombiner is a simple function object used to combine two strings visually - used in Iterable<>:...
function object which serializes type T to a BLOB (or BLOB like) object