Stroika/_string_8cpp_source.html

/*

 * Copyright(c) Sophist Solutions, Inc. 1990-2025.  All rights reserved

 */

#include "Stroika/Foundation/StroikaPreComp.h"


#include <algorithm>

#include <climits>

#include <cstdarg>

#include <istream>

#include <regex>

#include <string>


#include "Stroika/Foundation/Characters/CString/Utilities.h"

#include "Stroika/Foundation/Characters/RegularExpression.h"

#include "Stroika/Foundation/Characters/SDKString.h"

#include "Stroika/Foundation/Characters/StringBuilder.h"

#include "Stroika/Foundation/Common/Empty.h"

#include "Stroika/Foundation/Containers/Concrete/Sequence_stdvector.h"

#include "Stroika/Foundation/Containers/Set.h"

#include "Stroika/Foundation/Containers/Support/ReserveTweaks.h"

#include "Stroika/Foundation/Cryptography/Digest/Algorithm/SuperFastHash.h"

#include "Stroika/Foundation/Debug/Cast.h"

#include "Stroika/Foundation/Execution/Exceptions.h"

#include "Stroika/Foundation/Execution/Throw.h"

#include "Stroika/Foundation/Math/Common.h"

#include "Stroika/Foundation/Memory/BlockAllocated.h"

#include "Stroika/Foundation/Memory/Common.h"

#include "Stroika/Foundation/Memory/StackBuffer.h"


#include "String.h"


using namespace Stroika::Foundation;

using namespace Stroika::Foundation::Characters;

using namespace Stroika::Foundation::Containers;

using namespace Stroika::Foundation::Common;


using Memory::StackBuffer;

using Traversal::Iterator;


// see Satisfies Concepts:

static_assert (regular<String>);


#if qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin

#include <filesystem>

#endif


namespace {


    /**

     *  Helper for sharing implementation code on string reps

     *  This REP is templated on CHAR_T. The key is that ALL characters for that string fit inside

     *  CHAR_T, so that the implementation can store them as an array, and index.

     *  So mixed 1,2,3 byte characters all get stored in a char32_t array, and a string with all ascii

     *  characters get stored in a char (1byte stride) array.

     *

     *  \note - the KEY design choice in StringRepHelperAllFitInSize_::Rep<CHAR_T> is that it contains no

     *        multi-code-point characters. This is what allows the simple calculation of array index

     *        to character offset. So use

     *              StringRepHelperAllFitInSize_::Rep<ASCII> for ascii text

     *              StringRepHelperAllFitInSize_::Rep<LATIN1> for ISOLatin1 text

     *              StringRepHelperAllFitInSize_::Rep<char16_t> for ISOLatin1/anything which is a 2-byte unicode char (not surrogates)

     *              StringRepHelperAllFitInSize_::Rep<char32_t> for anything else - this always works

     */

    struct StringRepHelperAllFitInSize_ : String {

        template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>

        struct Rep : public _IRep {

        private:

            using inherited = _IRep;


        protected:

            span<const CHAR_T> _fData;


#if qStroika_Foundation_Debug_AssertionsChecked

        private:

            mutable unsigned int fOutstandingIterators_{};

#endif


        protected:

            Rep () = default;

            Rep (span<const CHAR_T> s)

                requires (not same_as<CHAR_T, char8_t>) // char8 ironically involves 2-byte characters, cuz only ascii encoded as 1 byte

                : _fData{s}

            {

                if constexpr (same_as<CHAR_T, char> or same_as<CHAR_T, char8_t>) {

                    Require (Character::IsASCII (s));

                }

                // Any 8-bit sequence valid for Latin1

                if constexpr (same_as<CHAR_T, char16_t>) {

                    Require (UTFConvert::AllFitsInTwoByteEncoding (s));

                }

            }

            Rep& operator= (span<const CHAR_T> s)

            {

#if qStroika_Foundation_Debug_AssertionsChecked

                Require (fOutstandingIterators_ == 0);

#endif

                if constexpr (same_as<CHAR_T, char> or same_as<CHAR_T, char8_t>) {

                    Require (Character::IsASCII (s));

                }

                if constexpr (same_as<CHAR_T, char16_t>) {

                    Require (UTFConvert::AllFitsInTwoByteEncoding (s));

                }

                _fData = s;

                return *this;

            }


        public:

            // String::_IRep OVERRIDES

            virtual Character GetAt (size_t index) const noexcept override

            {

                Require (index < _fData.size ());

                // NOTE - this is safe because we never construct this type with surrogates

                return Character{static_cast<char32_t> (_fData[index])};

            }

            virtual PeekSpanData PeekData (optional<PeekSpanData::StorageCodePointType> /*preferred*/) const noexcept override

            {

                // IGNORE preferred, cuz we return what is in our REP - since returning a direct pointer to that data - no conversion possible

                if constexpr (same_as<CHAR_T, ASCII>) {

                    return PeekSpanData{PeekSpanData::StorageCodePointType::eAscii, {.fAscii = _fData}};

                }

                if constexpr (same_as<CHAR_T, Latin1>) {

                    return PeekSpanData{PeekSpanData::StorageCodePointType::eSingleByteLatin1, {.fSingleByteLatin1 = _fData}};

                }

                else if constexpr (sizeof (CHAR_T) == 2) {

                    // reinterpret_cast needed cuz of wchar_t case

                    return PeekSpanData{PeekSpanData::StorageCodePointType::eChar16,

                                        {.fChar16 = span<const char16_t>{reinterpret_cast<const char16_t*> (_fData.data ()), _fData.size ()}}};

                }

                else if constexpr (sizeof (CHAR_T) == 4) {

                    // reinterpret_cast needed cuz of wchar_t case

                    return PeekSpanData{PeekSpanData::StorageCodePointType::eChar32,

                                        {.fChar32 = span<const char32_t>{reinterpret_cast<const char32_t*> (_fData.data ()), _fData.size ()}}};

                }

            }


            // Overrides for Iterable<Character>

            // @todo - MAYBE override Apply/Find and a few others to not use default 'iterator object' implementation that has lots of indirect virtual calls

        public:

            virtual shared_ptr<Iterable<Character>::_IRep> Clone () const override

            {

                AssertNotReached (); // Since String reps now immutable, this should never be called

                return nullptr;

            }

            virtual Traversal::Iterator<value_type> MakeIterator () const override

            {

                // NOTE - UNDETECTED CALLER ERROR - if iterator constructed and used after string rep destroyed (never changed) -- LGP 2023-07-07

                struct MyIterRep_ final : Iterator<Character>::IRep, public Memory::UseBlockAllocationIfAppropriate<MyIterRep_> {

                    span<const CHAR_T> fData_; // clone span (not underlying data)

                    size_t             fIdx_{0};

#if qStroika_Foundation_Debug_AssertionsChecked

                    const Rep* fOwningRep_;

#endif

                    MyIterRep_ (span<const CHAR_T> data

#if qStroika_Foundation_Debug_AssertionsChecked

                                ,

                                const Rep* dbgRep

#endif

                                )

                        : fData_{data}

#if qStroika_Foundation_Debug_AssertionsChecked

                        , fOwningRep_{dbgRep}

#endif

                    {

#if qStroika_Foundation_Debug_AssertionsChecked

                        ++fOwningRep_->fOutstandingIterators_;

#endif

                    }

#if qStroika_Foundation_Debug_AssertionsChecked

                    virtual ~MyIterRep_ () override

                    {

                        Require (fOwningRep_->fOutstandingIterators_ > 0); // if this fails, probably cuz fOwningRep_ destroyed

                        --fOwningRep_->fOutstandingIterators_;

                    }

#endif


                    virtual unique_ptr<Iterator<Character>::IRep> Clone () const override

                    {

                        return make_unique<MyIterRep_> (fData_.subspan (fIdx_)

#if qStroika_Foundation_Debug_AssertionsChecked

                                                            ,

                                                        fOwningRep_

#endif

                        );

                    }

                    virtual void More (optional<Character>* result, bool advance) override

                    {

                        RequireNotNull (result);

                        if (advance) [[likely]] {

                            Require (fIdx_ < fData_.size ());

                            ++fIdx_;

                        }

                        if (fIdx_ < fData_.size ()) [[likely]] {

                            // NOTE - this is safe because we never construct this type with surrogates

                            *result = Character{static_cast<char32_t> (fData_[fIdx_])};

                        }

                        else {

                            *result = nullopt;

                        }

                    }

                    virtual bool Equals (const IRep* rhs) const override

                    {

                        RequireNotNull (rhs);

                        RequireMember (rhs, MyIterRep_);

                        const MyIterRep_* rrhs = Debug::UncheckedDynamicCast<const MyIterRep_*> (rhs);

                        return fData_.data () == rrhs->fData_.data () and fIdx_ == rrhs->fIdx_;

                    }

                };

                return Iterator<Character>{make_unique<MyIterRep_> (this->_fData


#if qStroika_Foundation_Debug_AssertionsChecked

                                                                    ,

                                                                    this

#endif


                                                                    )};

            }

            virtual size_t size () const override

            {

                return _fData.size ();

            }

            virtual bool empty () const override

            {

                return _fData.empty ();

            }

            virtual Traversal::Iterator<value_type> Find (const function<bool (ArgByValueType<value_type> item)>& that,

                                                          Execution::SequencePolicy                               seq) const override

            {

                return inherited::Find (that, seq); // @todo rewrite FOR PERFORMANCE to operate on fData_

            }

        };

    };


    /**

     *  Simple string rep, which dynamically allocates its storage on the heap, through an indirect pointer reference.

     *  \note   This class may assure nul-terminated (kAddNullTerminator_), and so 'capacity' always at least one greater than length.

     */

    struct DynamicallyAllocatedString : StringRepHelperAllFitInSize_ {

        template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>

        struct Rep final : public StringRepHelperAllFitInSize_::Rep<CHAR_T>, public Memory::UseBlockAllocationIfAppropriate<Rep<CHAR_T>> {

        private:

            using inherited = StringRepHelperAllFitInSize_::Rep<CHAR_T>;


        public:

            Rep (span<const CHAR_T> t1)

                : inherited{mkBuf_ (t1)}

            {

            }

            Rep ()           = delete;

            Rep (const Rep&) = delete;


        public:

            nonvirtual Rep& operator= (const Rep&) = delete;


        public:

            virtual ~Rep () override

            {

                delete[] this->_fData.data ();

            }


        private:

            static span<CHAR_T> mkBuf_ (size_t length)

            {

                size_t capacity = AdjustCapacity_ (length);

                Assert (length <= capacity);

                if constexpr (kAddNullTerminator_) {

                    Assert (length + 1 <= capacity);

                }

                CHAR_T* newBuf = new CHAR_T[capacity];

                return span{newBuf, capacity};

            }

            static span<CHAR_T> mkBuf_ (span<const CHAR_T> t1)

            {

                size_t       len = t1.size ();

                span<CHAR_T> buf = mkBuf_ (len); // note buf span is over capacity, not size

                Assert (buf.size () >= len);

                auto result = Memory::CopyBytes (t1, buf);

                if constexpr (kAddNullTerminator_) {

                    Assert (len + 1 <= buf.size ());

                    *(buf.data () + len) = '\0';

                }

                return result; // return span of just characters, even if we have extra NUL-byte (outside span)

            }


        public:

            // String::_IRep OVERRIDES

            virtual const wchar_t* c_str_peek () const noexcept override

            {

                // @todo NOTE DEPRECATED SINCE STROIKA v3.0d13, and same for kAddNullTerminator_

                if constexpr (kAddNullTerminator_) {

                    Assert (*(this->_fData.data () + this->_fData.size ()) == '\0'); // dont index into buf cuz we cheat and go one past end on purpose

                    return reinterpret_cast<const wchar_t*> (this->_fData.data ());

                }

                else {

                    return nullptr;

                }

            }


        private:

            // Stick nul-terminator byte just past the end of the span

            static constexpr bool kAddNullTerminator_ = sizeof (CHAR_T) == sizeof (wchar_t); // costs nothing to nul-terminate in this case


        private:

            static size_t AdjustCapacity_ (size_t initialCapacity)

            {

                size_t result = initialCapacity;

                if constexpr (kAddNullTerminator_) {

                    ++result;

                }

                return result;

            }

        };

    };


    /**

     *  Most Stroika strings use this 'rep': FixedCapacityInlineStorageString_

     *

     *  This String rep is like BufferedString_, except that the storage is inline in one struct/allocation

     *  for better memory allocation performance, and more importantly, better locality of data (more cpu cache friendly)

     */

    struct FixedCapacityInlineStorageString_ : StringRepHelperAllFitInSize_ {

        template <IUNICODECanUnambiguouslyConvertFrom CHAR_T, size_t CAPACITY>

        struct Rep final : public StringRepHelperAllFitInSize_::Rep<CHAR_T>,

                           public Memory::UseBlockAllocationIfAppropriate<Rep<CHAR_T, CAPACITY>> {

        private:

            using inherited = StringRepHelperAllFitInSize_::Rep<CHAR_T>;


        private:

            bool IncludesNullTerminator_ () const

            {

                if constexpr (sizeof (CHAR_T) == sizeof (wchar_t)) {

                    return this->_fData.size () < CAPACITY; // else no room

                }

                else {

                    return false;

                }

            }


        private:

            CHAR_T fBuf_[CAPACITY];


        public:

            Rep (span<const CHAR_T> t1)

                : inherited{}

            {

                // must do this logic after base construction since references data member which doesn't exist

                // til after base class construction. SHOULDNT really matter (since uninitialized data), but on

                // g++-11, and other compilers, detected as vptr UB violation if we access first

                Require (t1.size () <= CAPACITY);

                inherited::operator= (Memory::CopyBytes (t1, span<CHAR_T>{fBuf_}));

                if (IncludesNullTerminator_ ()) {

                    Assert (t1.size () + 1 <= CAPACITY);

                    fBuf_[t1.size ()] = CHAR_T{'\0'};

                }

            }

            Rep ()           = delete;

            Rep (const Rep&) = delete;


        public:

            nonvirtual Rep& operator= (const Rep&) = delete;


        public:

            // String::_IRep OVERRIDES

            virtual const wchar_t* c_str_peek () const noexcept override

            {

                if (IncludesNullTerminator_ ()) {

                    Assert (*(this->_fData.data () + this->_fData.size ()) == '\0'); // dont index into buf cuz we cheat and go one past end on purpose

                    return reinterpret_cast<const wchar_t*> (this->_fData.data ());

                }

                else {

                    return nullptr;

                }

            }

        };

    };


    /**

     *  For static full app lifetime string constants...

     */

    struct StringConstant_ : public StringRepHelperAllFitInSize_ {

        using inherited = String;


        template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>

        class DirectIndexRep final : public StringRepHelperAllFitInSize_::Rep<CHAR_T>,

                                     public Memory::UseBlockAllocationIfAppropriate<Rep<CHAR_T>> {

        private:

            using inherited = StringRepHelperAllFitInSize_::Rep<CHAR_T>;


        public:

            DirectIndexRep (span<const CHAR_T> s)

                : inherited{s} // don't copy memory - but copy raw pointers! So they MUST BE (externally promised) 'externally owned for the application lifetime and constant' - like c++ string constants

            {

            }


        public:

            // String::_IRep OVERRIDES

            virtual const wchar_t* c_str_peek () const noexcept override

            {

                return nullptr;

            }

        };

    };


    /*

     *  Used for String{move(some_string)}

     */

    struct StdStringDelegator_ : public StringRepHelperAllFitInSize_ {

        using inherited = String;


        template <IStdBasicStringCompatibleCharacter CHAR_T>

        class Rep final : public StringRepHelperAllFitInSize_::Rep<CHAR_T>, public Memory::UseBlockAllocationIfAppropriate<Rep<CHAR_T>> {

        private:

            using inherited = StringRepHelperAllFitInSize_::Rep<CHAR_T>;


        public:

            Rep (basic_string<CHAR_T>&& s)

                : inherited{span<const CHAR_T>{}}

                , fMovedData_{move (s)}

            {

                inherited::operator= (span{fMovedData_.data (), fMovedData_.size ()}); // must grab after move

            }


        public:

            // String::_IRep OVERRIDES

            virtual const wchar_t* c_str_peek () const noexcept override

            {

                if constexpr (same_as<CHAR_T, wchar_t>) {

                    return fMovedData_.c_str ();

                }

                else {

                    return nullptr;

                }

            }


        private:

            basic_string<CHAR_T> fMovedData_;

        };

    };


    /**

     *  Delegate to original String::Rep, and add in support for c_str ()

     */

    struct StringWithCStr_ : public String {

    public:

        class Rep final : public _IRep, public Memory::UseBlockAllocationIfAppropriate<Rep> {

        private:

            shared_ptr<_IRep> fUnderlyingRep_;

            wstring           fCString_;


        public:

            // Caller MUST ASSURE generates right size of Rep based on size in underlyingRepPDS

            Rep (const shared_ptr<_IRep>& underlyingRep)

                : fUnderlyingRep_{underlyingRep}

                , fCString_{}

            {

                Memory::StackBuffer<wchar_t> possibleUsedBuf;

                auto                         wideSpan = String::GetData<wchar_t> (underlyingRep->PeekData (nullopt), &possibleUsedBuf);

                fCString_.assign (wideSpan.begin (), wideSpan.end ());

            }


            // Overrides for Iterable<Character>

        public:

            virtual shared_ptr<Iterable<Character>::_IRep> Clone () const override

            {

                return fUnderlyingRep_->Clone ();

            }

            virtual Traversal::Iterator<value_type> MakeIterator () const override

            {

                return fUnderlyingRep_->MakeIterator ();

            }

            virtual size_t size () const override

            {

                return fUnderlyingRep_->size ();

            }

            virtual bool empty () const override

            {

                return fUnderlyingRep_->empty ();

            }

            virtual Traversal::Iterator<value_type> Find (const function<bool (ArgByValueType<value_type> item)>& that,

                                                          [[maybe_unused]] Execution::SequencePolicy              seq) const override

            {

                return fUnderlyingRep_->Find (that, seq);

            }


            // String::_IRep overrides - delegate

        public:

            virtual Character GetAt (size_t index) const noexcept override

            {

                return fUnderlyingRep_->GetAt (index);

            }

            virtual PeekSpanData PeekData ([[maybe_unused]] optional<PeekSpanData::StorageCodePointType> preferred) const noexcept override

            {

                return fUnderlyingRep_->PeekData (preferred);

            }

            virtual const wchar_t* c_str_peek () const noexcept override

            {

                return fCString_.c_str ();

            }

        };

    };

}


namespace {

    template <typename FACET>

    struct deletable_facet_ final : FACET {

        template <typename... Args>

        deletable_facet_ (Args&&... args)

            : FACET{forward<Args> (args)...}

        {

        }

        ~deletable_facet_ () = default;

    };

}


/*

 ********************************************************************************

 ******* Characters::Private_::RegularExpression_GetCompiled ********************

 ********************************************************************************

 */

const wregex& Characters::Private_::RegularExpression_GetCompiled (const RegularExpression& regExp)

{

    return regExp.GetCompiled ();

}


/*

 ********************************************************************************

 ************************************* String ***********************************

 ********************************************************************************

 */

shared_ptr<String::_IRep> String::CTORFromBasicStringView_ (const basic_string_view<ASCII>& str)

{

    RequireExpression (Character::IsASCII (span{str.data (), str.size ()}));

    return Memory::MakeSharedPtr<StringConstant_::DirectIndexRep<ASCII>> (span{str.data (), str.size ()});

}


shared_ptr<String::_IRep> String::CTORFromBasicStringView_ (const basic_string_view<char8_t>& str)

{

    if (Character::IsASCII (span{str.data (), str.size ()})) {

        return Memory::MakeSharedPtr<StringConstant_::DirectIndexRep<ASCII>> (Memory::SpanBytesCast<span<const ASCII>> (span{str.data (), str.size ()}));

    }

    else {

        return mk_ (span<const char8_t>{str.data (), str.size ()}); // copies data

    }

}


shared_ptr<String::_IRep> String::CTORFromBasicStringView_ (const basic_string_view<char16_t>& str)

{

    if (UTFConvert::AllFitsInTwoByteEncoding (span{str})) {

        return Memory::MakeSharedPtr<StringConstant_::DirectIndexRep<char16_t>> (span{str.data (), str.size ()});

    }

    else {

        return mk_ (span<const char16_t>{str.data (), str.size ()}); // copies data

    }

}


shared_ptr<String::_IRep> String::CTORFromBasicStringView_ (const basic_string_view<char32_t>& str)

{

    return Memory::MakeSharedPtr<StringConstant_::DirectIndexRep<char32_t>> (span{str.data (), str.size ()});

}


shared_ptr<String::_IRep> String::CTORFromBasicStringView_ (const basic_string_view<wchar_t>& str)

{

    return Memory::MakeSharedPtr<StringConstant_::DirectIndexRep<wchar_t>> (span{str.data (), str.size ()});

}


String String::FromStringConstant (span<const ASCII> s)

{

    Require (Character::IsASCII (s));

    return String{Memory::MakeSharedPtr<StringConstant_::DirectIndexRep<ASCII>> (s)};

}


String String::FromStringConstant (span<const char16_t> s)

{

    if (UTFConvert::AllFitsInTwoByteEncoding (s)) {

        return String{Memory::MakeSharedPtr<StringConstant_::DirectIndexRep<char16_t>> (s)};

    }

    else {

        return String{s};

    }

}


String String::FromStringConstant (span<const char32_t> s)

{

    return String{Memory::MakeSharedPtr<StringConstant_::DirectIndexRep<char32_t>> (s)};

}


String String::FromNarrowString (span<const char> s, const locale& l)

{

    // Note: this could use CodeCvt, but directly using std::codecvt in this case pretty simple, and

    // more efficient this way --LGP 2023-02-14


    // See http://en.cppreference.com/w/cpp/locale/codecvt/~codecvt

    using Destructible_codecvt_byname = deletable_facet_<codecvt_byname<wchar_t, char, mbstate_t>>;

    Destructible_codecvt_byname cvt{l.name ()};


    // http://en.cppreference.com/w/cpp/locale/codecvt/in

    mbstate_t                    mbstate{};

    Memory::StackBuffer<wchar_t> targetBuf{s.size ()};

    const char*                  from_next;

    wchar_t*                     to_next;

    codecvt_base::result         result =

        cvt.in (mbstate, s.data (), s.data () + s.size (), from_next, targetBuf.data (), targetBuf.data () + targetBuf.size (), to_next);

    if (result != codecvt_base::ok) [[unlikely]] {

        static const auto kException_ = Execution::RuntimeErrorException{"Error converting locale multibyte string to UNICODE"sv};

        Execution::Throw (kException_);

    }

    return String{span<const wchar_t>{targetBuf.data (), static_cast<size_t> (to_next - targetBuf.data ())}};

}


shared_ptr<String::_IRep> String::mkEmpty_ ()

{

    static constexpr wchar_t       kEmptyCStr_[] = L"";

    static const shared_ptr<_IRep> s_ = Memory::MakeSharedPtr<StringConstant_::DirectIndexRep<wchar_t>> (span{std::begin (kEmptyCStr_), 0});

    return s_;

}


template <typename CHAR_T>


inline auto String::mk_nocheck_ (span<const CHAR_T> s) -> shared_ptr<_IRep>

    requires (same_as<CHAR_T, ASCII> or same_as<CHAR_T, Latin1> or same_as<CHAR_T, char16_t> or same_as<CHAR_T, char32_t>)

{

    // No check means needed checking done before, so these assertions just help enforce that

    if constexpr (same_as<CHAR_T, ASCII>) {

        Require (Character::IsASCII (s)); // avoid later assertion error

    }

    else if constexpr (same_as<CHAR_T, Latin1>) {

        // nothing to check

    }

    else if constexpr (sizeof (CHAR_T) == 2) {

        Require (UTFConvert::AllFitsInTwoByteEncoding (s)); // avoid later assertion error

    }

    else {

        // again - if larger, nothing to check

    }


    /**

     *  We want to TARGET using block-allocator of 64 bytes. This works well for typical (x86) machine

     *  caches, and divides up nicely, and leaves enuf room for a decent number of characters typically.

     *

     *  So compute/guestimate a few sizes, and add static_asserts to check where we can. Often if these fail

     *  you can just get rid/or fix them. Not truly counted on, just trying ot generate vaguely reasonable

     *  number of characters to use.

     */

    constexpr size_t kBaseOfFixedBufSize_ = sizeof (StringRepHelperAllFitInSize_::Rep<CHAR_T>);

    static_assert (kBaseOfFixedBufSize_ < 64); // this code below assumes, so must re-tune if this ever fails

    if constexpr (qStroika_Foundation_Common_Platform_Windows and not qStroika_Foundation_Debug_AssertionsChecked) {

        static_assert (kBaseOfFixedBufSize_ == 3 * sizeof (void*));

        if constexpr (sizeof (void*) == 4) {

            static_assert (kBaseOfFixedBufSize_ == 12);

        }

        else if constexpr (sizeof (void*) == 8) {

            static_assert (kBaseOfFixedBufSize_ == 24);

        }

    }

    constexpr size_t kOverheadSizeForMakeShared_ =

        qStroika_Foundation_Common_Platform_Windows ? (sizeof (void*) == 4 ? 12 : 16) : sizeof (unsigned long) * 2;

#if qStroika_Foundation_Common_Platform_Windows

    static_assert (kOverheadSizeForMakeShared_ == sizeof (_Ref_count_base)); // not critically counted on, just to debug/fix sizes

#endif

    static constexpr size_t kNElts1_ = (64 - kBaseOfFixedBufSize_ - kOverheadSizeForMakeShared_) / sizeof (CHAR_T);

    static constexpr size_t kNElts2_ = (96 - kBaseOfFixedBufSize_ - kOverheadSizeForMakeShared_) / sizeof (CHAR_T);

    static constexpr size_t kNElts3_ = (128 - kBaseOfFixedBufSize_ - kOverheadSizeForMakeShared_) / sizeof (CHAR_T);


    // These checks are NOT important, just for documentation/reference

    if constexpr (qStroika_Foundation_Common_Platform_Windows and sizeof (CHAR_T) == 1 and not qStroika_Foundation_Debug_AssertionsChecked) {

        if constexpr (sizeof (void*) == 4) {

            static_assert (kNElts1_ == 40);

            static_assert (kNElts2_ == 72);

            static_assert (kNElts3_ == 104);

        }

        if constexpr (sizeof (void*) == 8) {

            static_assert (kNElts1_ == 24);

            static_assert (kNElts2_ == 56);

            static_assert (kNElts3_ == 88);

        }

    }


    static_assert (qStroika_Foundation_Debug_AssertionsChecked or kNElts1_ >= 6); // crazy otherwise

    static_assert (kNElts2_ > kNElts1_);                                          // ""

    static_assert (kNElts3_ > kNElts2_);                                          // ""


    static_assert (sizeof (FixedCapacityInlineStorageString_::Rep<CHAR_T, kNElts1_>) == 64 - kOverheadSizeForMakeShared_); // not quite guaranteed but close

    static_assert (sizeof (FixedCapacityInlineStorageString_::Rep<CHAR_T, kNElts2_>) == 96 - kOverheadSizeForMakeShared_);  // ""

    static_assert (sizeof (FixedCapacityInlineStorageString_::Rep<CHAR_T, kNElts3_>) == 128 - kOverheadSizeForMakeShared_); // ""


    size_t sz = s.size ();

    if (sz <= kNElts1_) {

        return Memory::MakeSharedPtr<FixedCapacityInlineStorageString_::Rep<CHAR_T, kNElts1_>> (s);

    }

    else if (sz <= kNElts2_) {

        return Memory::MakeSharedPtr<FixedCapacityInlineStorageString_::Rep<CHAR_T, kNElts2_>> (s);

    }

    else if (sz <= kNElts3_) {

        return Memory::MakeSharedPtr<FixedCapacityInlineStorageString_::Rep<CHAR_T, kNElts3_>> (s);

    }

    return Memory::MakeSharedPtr<DynamicallyAllocatedString::Rep<CHAR_T>> (s);

}


template <>

auto String::mk_ (basic_string<char>&& s) -> shared_ptr<_IRep>

{

    Character::CheckASCII (span{s.data (), s.size ()});

    return Memory::MakeSharedPtr<StdStringDelegator_::Rep<ASCII>> (move (s));

}


template <>

auto String::mk_ (basic_string<char16_t>&& s) -> shared_ptr<_IRep>

{

    if (UTFConvert::AllFitsInTwoByteEncoding (Memory::ConstSpan (span{s.data (), s.size ()}))) {

        return Memory::MakeSharedPtr<StdStringDelegator_::Rep<char16_t>> (move (s));

    }

    // copy the data if any surrogates

    Memory::StackBuffer<char32_t> wideUnicodeBuf{Memory::eUninitialized, UTFConvert::ComputeTargetBufferSize<char32_t> (span{s.data (), s.size ()})};

    return mk_nocheck_ (Memory::ConstSpan (UTFConvert::kThe.ConvertSpan (span{s.data (), s.size ()}, span{wideUnicodeBuf})));

}


template <>

auto String::mk_ (basic_string<char32_t>&& s) -> shared_ptr<_IRep>

{

    return Memory::MakeSharedPtr<StdStringDelegator_::Rep<char32_t>> (move (s));

}


template <>

auto String::mk_ (basic_string<wchar_t>&& s) -> shared_ptr<_IRep>

{

    if constexpr (sizeof (wchar_t) == 2) {

        if (UTFConvert::AllFitsInTwoByteEncoding (Memory::ConstSpan (span{s.data (), s.size ()}))) {

            return Memory::MakeSharedPtr<StdStringDelegator_::Rep<wchar_t>> (move (s));

        }

        // copy the data if any surrogates

        Memory::StackBuffer<char32_t> wideUnicodeBuf{Memory::eUninitialized,

                                                     UTFConvert::ComputeTargetBufferSize<char32_t> (span{s.data (), s.size ()})};

        return mk_nocheck_ (Memory::ConstSpan (UTFConvert::kThe.ConvertSpan (span{s.data (), s.size ()}, span{wideUnicodeBuf})));

    }

    else {

        return Memory::MakeSharedPtr<StdStringDelegator_::Rep<wchar_t>> (move (s));

    }

}


String String::Concatenate_ (const String& rhs) const

{

    // KISS, simple default 'fall-thru' case

    Memory::StackBuffer<char32_t> ignoredA;

    span                          leftSpan = GetData (&ignoredA);

    Memory::StackBuffer<char32_t> ignoredB;

    span                          rightSpan = rhs.GetData (&ignoredB);

    Memory::StackBuffer<char32_t> buf{Memory::eUninitialized, leftSpan.size () + rightSpan.size ()};

    copy (leftSpan.begin (), leftSpan.end (), buf.data ());

    copy (rightSpan.begin (), rightSpan.end (), buf.data () + leftSpan.size ());

    return mk_ (span{buf});

}


void String::SetCharAt (Character c, size_t i)

{

    // @Todo - redo with check if char is actually changing and if so use

    // mk/4 4 arg string maker instead.??? Or some such...

    Require (i >= 0);

    Require (i < size ());

    // Expensive, but you can use StringBuilder directly to avoid the performance costs

    StringBuilder sb{*this};

    Require (i < size ());

    sb.SetAt (c, i);

    *this = sb;

}


String String::InsertAt (span<const Character> s, size_t at) const

{

    Require (at >= 0);

    Require (at <= size ());

    if (s.empty ()) {

        return *this;

    }

    Memory::StackBuffer<Character> ignored1;

    span<const Character>          thisStrData = GetData (&ignored1);

    StringBuilder                  sb{thisStrData.subspan (0, at)};

    sb.Append (s);

    sb.Append (thisStrData.subspan (at));

    return sb;

}


String String::RemoveAt (size_t from, size_t to) const

{

    Require (from <= to);

    Require (to <= size ());

    if (from == to) {

        return *this;

    }

    if (from == 0) {

        return SubString (to);

    }

    _SafeReadRepAccessor accessor{this};

    size_t               length = accessor._ConstGetRep ().size ();

    if (to == length) {

        return SubString (0, from);

    }

    else {

        Memory::StackBuffer<char32_t> ignored1;

        span                          d = GetData (&ignored1);

        Memory::StackBuffer<char32_t> buf{Memory::eUninitialized, d.size () - (to - from)};

        span<char32_t>                bufSpan{buf.data (), buf.size ()};

        span                          s1 = d.subspan (0, from);

        span                          s2 = d.subspan (to);

        Memory::CopyBytes (s1, bufSpan);

        Memory::CopyBytes (s2, bufSpan.subspan (s1.size ()));

        return String{mk_ (bufSpan)};

    }

}


String String::RemoveFirstIf (Character c) const

{

    String tmp = {*this};

    if (auto o = tmp.Find (c, eWithCase)) {

        return tmp.RemoveAt (*o);

    }

    return tmp;

}


String String::RemoveFirstIf (const String& subString) const

{

    if (auto o = this->Find (subString, eWithCase)) {

        return this->SubString (0, *o) + this->SubString (*o + subString.length ());

    }

    return *this;

}


String String::RemoveAll (Character c) const

{

    // @todo REIMPL WITH STRINGBUILDER

    // quick and dirty inefficient implementation

    String tmp = {*this};

    while (auto o = tmp.Find (c, eWithCase)) {

        tmp = tmp.RemoveAt (*o);

    }

    return tmp;

}


String String::RemoveAll (const String& subString) const

{

    // @todo REIMPL WITH STRINGBUILDER

    // quick and dirty inefficient implementation

    String tmp = {*this};

    while (auto o = tmp.Find (subString, eWithCase)) {

        tmp = tmp.SubString (0, *o) + tmp.SubString (*o + subString.length ());

    }

    return tmp;

}


optional<size_t> String::Find (Character c, size_t startAt, CompareOptions co) const

{

    PeekSpanData pds = GetPeekSpanData<ASCII> ();

    // OPTIMIZED PATHS: Common case(s) and should be fast

    if (pds.fInCP == PeekSpanData::StorageCodePointType::eAscii) {

        if (c.IsASCII ()) {

            span<const char> examineSpan = pds.fAscii.subspan (startAt);

            if (co == eWithCase) {

                if (auto i = std::find (examineSpan.begin (), examineSpan.end (), c.GetAsciiCode ()); i != examineSpan.end ()) {

                    return i - examineSpan.begin () + startAt;

                }

            }

            else {

                char   lc        = c.ToLowerCase ().GetAsciiCode ();

                size_t reportIdx = startAt;

                for (auto ci : examineSpan) {

                    if (tolower (ci) == lc) {

                        return reportIdx;

                    }

                    ++reportIdx;

                }

            }

            return nullopt; // not found, possibly cuz not ascii

        }

    }

    // fallback on more generic algorithm - and copy to full character objects

    //

    // performance notes

    //      Could iterate using CharAt() and that would perform better in the case where you find c early

    //      in a string, and the string is short. The problem with the current code is that it converts the

    //      entire string (could be long) and then might not look at much of the converted data.

    //      on the other hand, if our reps are either 'ascii or char32_t wide' - which we may end up with - then

    //      this isn't too bad - cuz no copying for char32_ case either...

    Memory::StackBuffer<Character> maybeIgnoreBuf;

    span<const Character>          charSpan = GetData (pds, &maybeIgnoreBuf);

    Require (startAt <= charSpan.size ());

    span<const Character> examineSpan = charSpan.subspan (startAt);

    switch (co) {

        case eCaseInsensitive: {

            Character lcc = c.ToLowerCase ();

            for (auto i = examineSpan.begin (); i != examineSpan.end (); ++i) {

                if (i->ToLowerCase () == lcc) {

                    return startAt + (i - examineSpan.begin ());

                }

            }

        } break;

        case eWithCase: {

            if (auto i = std::find (examineSpan.begin (), examineSpan.end (), c); i != examineSpan.end ()) {

                return startAt + i - examineSpan.begin ();

            }

        } break;

    }

    return nullopt; // not found any which way

}


optional<size_t> String::Find (const String& subString, size_t startAt, CompareOptions co) const

{

    //@todo: FIX HORRIBLE PERFORMANCE!!!

    _SafeReadRepAccessor accessor{this};

    Require (startAt <= accessor._ConstGetRep ().size ());


    size_t subStrLen = subString.size ();

    if (subStrLen == 0) {

        return (accessor._ConstGetRep ().size () == 0) ? optional<size_t>{} : 0;

    }

    if (accessor._ConstGetRep ().size () < subStrLen) {

        return {}; // important test cuz size_t is unsigned

    }


    size_t limit = accessor._ConstGetRep ().size () - subStrLen;

    switch (co) {

        case eCaseInsensitive: {

            for (size_t i = startAt; i <= limit; ++i) {

                for (size_t j = 0; j < subStrLen; ++j) {

                    if (accessor._ConstGetRep ().GetAt (i + j).ToLowerCase () != subString[j].ToLowerCase ()) {

                        goto nogood1;

                    }

                }

                return i;

            nogood1:;

            }

        } break;

        case eWithCase: {

            for (size_t i = startAt; i <= limit; ++i) {

                for (size_t j = 0; j < subStrLen; ++j) {

                    if (accessor._ConstGetRep ().GetAt (i + j) != subString[j]) {

                        goto nogood2;

                    }

                }

                return i;

            nogood2:;

            }

        } break;

    }

    return {};

}


optional<pair<size_t, size_t>> String::Find (const RegularExpression& regEx, size_t startAt) const

{

    Require (startAt <= size ());

    wstring tmp = As<wstring> ();

    Require (startAt < tmp.size ());

    tmp = tmp.substr (startAt);

    wsmatch res;

    regex_search (tmp, res, regEx.GetCompiled ());

    if (res.size () >= 1) {

        size_t startOfMatch = startAt + res.position ();

        return pair<size_t, size_t>{startOfMatch, startOfMatch + res.length ()};

    }

    return {};

}


Containers::Sequence<size_t> String::FindEach (const String& string2SearchFor, CompareOptions co) const

{

    vector<size_t> result;

    for (optional<size_t> i = Find (string2SearchFor, 0, co); i; i = Find (string2SearchFor, *i, co)) {

        result.push_back (*i);

        *i += string2SearchFor.length (); // this cannot point past end of this string because we FOUND string2SearchFor

    }

    return Containers::Concrete::Sequence_stdvector{move (result)};

}


Containers::Sequence<pair<size_t, size_t>> String::FindEach (const RegularExpression& regEx) const

{

    vector<pair<size_t, size_t>> result;

    //@TODO - FIX - IF we get back zero length match

    wstring tmp{As<wstring> ()};

    wsmatch res;

    regex_search (tmp, res, regEx.GetCompiled ());

    size_t nMatches = res.size ();

    result.reserve (nMatches);

    for (size_t mi = 0; mi < nMatches; ++mi) {

        size_t matchLen = res.length (mi); // avoid populating with lots of empty matches - special case of empty search

        if (matchLen != 0) {

            result.push_back (pair<size_t, size_t>{res.position (mi), matchLen});

        }

    }

    return Containers::Concrete::Sequence_stdvector{move (result)};

}


Containers::Sequence<RegularExpressionMatch> String::FindEachMatch (const RegularExpression& regEx) const

{

    vector<RegularExpressionMatch> result;

    wstring                        tmp{As<wstring> ()};

    for (wsregex_iterator i = wsregex_iterator{tmp.begin (), tmp.end (), regEx.GetCompiled ()}; i != wsregex_iterator (); ++i) {

        wsmatch match{*i};

        Assert (match.size () != 0);

        size_t           n = match.size ();

        Sequence<String> s;

        for (size_t j = 1; j < n; ++j) {

            s.Append (match.str (j));

        }

        result.push_back (RegularExpressionMatch{match.str (0), s});

    }

    return Containers::Concrete::Sequence_stdvector{move (result)};

}


Containers::Sequence<String> String::FindEachString (const RegularExpression& regEx) const

{

    vector<String> result;

    wstring        tmp{As<wstring> ()};

    for (wsregex_iterator i = wsregex_iterator{tmp.begin (), tmp.end (), regEx.GetCompiled ()}; i != wsregex_iterator (); ++i) {

        result.push_back (String{i->str ()});

    }

    return Containers::Concrete::Sequence_stdvector{move (result)};

}


optional<size_t> String::RFind (Character c) const noexcept

{

    //@todo: FIX HORRIBLE PERFORMANCE!!!

    _SafeReadRepAccessor accessor{this};

    const _IRep&         useRep = accessor._ConstGetRep ();

    size_t               length = useRep.size ();

    for (size_t i = length; i > 0; --i) {

        if (useRep.GetAt (i - 1) == c) {

            return i - 1;

        }

    }

    return nullopt;

}


optional<size_t> String::RFind (const String& subString) const

{

    //@todo: FIX HORRIBLE PERFORMANCE!!!

    /*

     * Do quickie implementation, and don't worry about efficiency...

     */

    size_t subStrLen = subString.size ();

    if (subStrLen == 0) {

        return ((size () == 0) ? optional<size_t>{} : size () - 1);

    }


    size_t limit = size () - subStrLen + 1;

    for (size_t i = limit; i > 0; --i) {

        if (SubString (i - 1, i - 1 + subStrLen) == subString) {

            return i - 1;

        }

    }

    return nullopt;

}


String String::Replace (size_t from, size_t to, const String& replacement) const

{

    Memory::StackBuffer<wchar_t> ignored;

    span<const wchar_t>          thisSpan = GetData (&ignored);

    Require (from <= to);

    Require (to <= this->size ());

    Assert (to < thisSpan.size ());

    StringBuilder sb{thisSpan.subspan (0, from)};

    sb.Append (replacement);

    sb.Append (thisSpan.subspan (to));

    Ensure (sb == SubString (0, from) + replacement + SubString (to));

    return sb;

}


bool String::StartsWith (const Character& c, CompareOptions co) const

{

    _SafeReadRepAccessor accessor{this};

    if (accessor._ConstGetRep ().size () == 0) {

        return false;

    }

    return Character::EqualsComparer{co}(accessor._ConstGetRep ().GetAt (0), c);

}


bool String::StartsWith (const String& subString, CompareOptions co) const

{

    Require (not subString.empty ());

    if (subString.size () > size ()) {

        return false;

    }

#if qStroika_Foundation_Debug_AssertionsChecked

    bool referenceResult = ThreeWayComparer{co}(SubString (0, subString.size ()), subString) == 0;

#endif

    Memory::StackBuffer<Character> maybeIgnoreBuf1;

    Memory::StackBuffer<Character> maybeIgnoreBuf2;

    span<const Character>          subStrData = subString.GetData (&maybeIgnoreBuf1);

    span<const Character>          thisData   = GetData (&maybeIgnoreBuf2);

    bool                           result     = Character::Compare (thisData.subspan (0, subStrData.size ()), subStrData, co) == 0;

#if qStroika_Foundation_Debug_AssertionsChecked

    Ensure (result == referenceResult);

#endif

    return result;

}


bool String::EndsWith (const Character& c, CompareOptions co) const

{

    _SafeReadRepAccessor accessor{this};

    const _IRep&         useRep     = accessor._ConstGetRep ();

    size_t               thisStrLen = useRep.size ();

    if (thisStrLen == 0) {

        return false;

    }

    return Character::EqualsComparer{co}(useRep.GetAt (thisStrLen - 1), c);

}


bool String::EndsWith (const String& subString, CompareOptions co) const

{

    Require (not subString.empty ());

    _SafeReadRepAccessor subStrAccessor{&subString};

    _SafeReadRepAccessor accessor{this};

    size_t               thisStrLen = accessor._ConstGetRep ().size ();

    size_t               subStrLen  = subString.size ();

    if (subStrLen > thisStrLen) {

        return false;

    }

#if qStroika_Foundation_Debug_AssertionsChecked

    bool referenceResult = String::EqualsComparer{co}(SubString (thisStrLen - subStrLen, thisStrLen), subString);

#endif

    Memory::StackBuffer<Character> maybeIgnoreBuf1;

    Memory::StackBuffer<Character> maybeIgnoreBuf2;

    span<const Character>          subStrData = subString.GetData (&maybeIgnoreBuf1);

    span<const Character>          thisData   = GetData (&maybeIgnoreBuf2);

    bool                           result     = Character::Compare (thisData.subspan (thisStrLen - subStrLen), subStrData, co) == 0;

#if qStroika_Foundation_Debug_AssertionsChecked

    Ensure (result == referenceResult);

#endif

    return result;

}


String String::AssureEndsWith (const Character& c, CompareOptions co) const

{

    if (EndsWith (c, co)) {

        return *this;

    }

    StringBuilder sb = *this;

    sb.Append (c);

    return sb;

}


bool String::Matches (const RegularExpression& regEx) const

{

    wstring tmp{As<wstring> ()};

    return regex_match (tmp.begin (), tmp.end (), regEx.GetCompiled ());

}


bool String::Matches (const RegularExpression& regEx, Sequence<String>* matches) const

{

    RequireNotNull (matches);

    //tmphack

    wstring tmp{As<wstring> ()};

    wsmatch base_match;

    if (regex_match (tmp, base_match, regEx.GetCompiled ())) {

        matches->clear ();

        for (size_t i = 1; i < base_match.size (); ++i) {

            matches->Append (base_match[i].str ());

        }

        return true;

    }

    return false;

}


String String::ReplaceAll (const RegularExpression& regEx, const String& with) const

{

    return String{regex_replace (As<wstring> (), regEx.GetCompiled (), with.As<wstring> ())};

}


String String::ReplaceAll (const String& string2SearchFor, const String& with, CompareOptions co) const

{

    Require (not string2SearchFor.empty ());

    // simplistic quickie impl...

    String           result{*this};

    optional<size_t> i{0};

    while ((i = result.Find (string2SearchFor, *i, co))) {

        result = result.SubString (0, *i) + with + result.SubString (*i + string2SearchFor.length ());

        *i += with.length ();

    }

    return result;

}


String String::ReplaceAll (const function<bool (Character)>& replaceCharP, const String& with) const

{

    StringBuilder sb;

    for (Character i : *this) {

        if (replaceCharP (i)) {

            sb << with;

        }

        else {

            sb << i;

        }

    }

    return sb;

}


String String::ReplaceAll (const Set<Character>& charSet, const String& with) const

{

    StringBuilder sb;

    for (Character i : *this) {

        if (charSet.Contains (i)) {

            sb << with;

        }

        else {

            sb << i;

        }

    }

    return sb;

}


String String::NormalizeTextToNL () const

{

    PeekSpanData                   pds = GetPeekSpanData<ASCII> ();

    Memory::StackBuffer<Character> maybeIgnoreBuf;

    span<const Character>          charSpan = GetData (pds, &maybeIgnoreBuf);

    StringBuilder                  sb;

    bool                           everChanged{false};

    for (auto ci = charSpan.begin (); ci != charSpan.end (); ++ci) {

        Character c = *ci;

        if (c == '\r') {

            // peek at next character - and if we have a CRLF sequence - then advance pointer

            // (so we skip next NL) and pretend this was an NL..

            if (ci + 1 != charSpan.end () and *(ci + 1) == '\n') {

                ++ci;

            }

            everChanged = true;

            c           = '\n';

        }

        sb << c;

    }

    if (everChanged) {

        return sb;

    }

    else {

        return *this;

    }

}


String String::NormalizeSpace (Character useSpaceCharacter) const

{

    return ReplaceAll ("\\s+"_RegEx, String{useSpaceCharacter});

}


Containers::Sequence<String> String::Tokenize () const

{

    return Tokenize ((bool (*) (Character))Character::IsWhitespace);

}


Sequence<String> String::Tokenize (const function<bool (Character)>& isTokenSeparator) const

{

    Sequence<String> r;

    bool             inToken = false;

    StringBuilder    curToken;

    size_t           len = size ();

    for (size_t i = 0; i != len; ++i) {

        Character c          = GetCharAt (i);

        bool      newInToken = not isTokenSeparator (c);

        if (inToken != newInToken) {

            if (inToken) {

                String s{curToken.str ()};

                r += s;

                curToken.clear ();

                inToken = false;

            }

            else {

                inToken = true;

            }

        }

        if (inToken) {

            curToken << c;

        }

    }

    if (inToken) {

        String s{curToken.str ()};

        r += s;

    }

    return r;

}


Sequence<String> String::Tokenize (const RegularExpression& isSeparator) const

{

    Sequence<String> r;

    size_t           len = this->length ();

    for (size_t startAt = 0; startAt < len;) {

        if (optional<pair<size_t, size_t>> ofi = Find (isSeparator, startAt)) {

            Assert (ofi->first >= startAt);

            Assert (ofi->first <= ofi->second);

            if (ofi->first == ofi->second) [[unlikely]] {

                static const auto kException_ =

                    Execution::RuntimeErrorException{"separator regular expression argument to Tokenize must be non-empty or not match"sv};

                Execution::Throw (kException_);

            }

            if (ofi->first > startAt) {

                r += SubString (startAt, ofi->first);

            }

            else {

                Assert (startAt == 0); // special case - start of string

            }

            startAt = ofi->second;

            Assert (startAt <= len);

        }

        else {

            r += SubString (startAt); // if no match, the rest of the string is a non-separator

            break;

        }

    }

    return r;

}

Sequence<String> String::Tokenize (const Set<Character>& delimiters) const

{

    /*

     *  @todo Inefficient impl, to encourage code saving. Do more efficiently.

     */

    return Tokenize ([delimiters] (Character c) -> bool { return delimiters.Contains (c); });

}


Sequence<String> String::AsLines () const

{

    Sequence<String> r;

    StringBuilder    curLineSB;

    for (auto i = this->MakeIterator (); i; ++i) {

        Character c = *i;

        // look for \r, \r\n, or \n

        switch (c.GetCharacterCode ()) {

            case '\r': {

                auto ii = i;

                ++ii;

                if (ii and *ii == '\n') {

                    i = ii;

                }

                r += curLineSB.str ();

                curLineSB.clear ();

                break;

            }

            case '\n': {

                r += curLineSB.str ();

                curLineSB.clear ();

                break;

            }

            default: {

                curLineSB.push_back (c);

                break;

            }

        }

    }

    if (not curLineSB.empty ()) { // non-terminated lines included

        r += curLineSB.str ();

    }

    return r;

}


Sequence<String> String::Grep (const String& fgrepArg) const

{

    Sequence<String> r;

    for (auto i : AsLines ()) {

        if (i.Contains (fgrepArg)) {

            r += i;

        }

    }

    return r;

}


Sequence<String> String::Grep (const RegularExpression& egrepArg) const

{

    Sequence<String> r;

    for (auto i : AsLines ()) {

        if (i.Matches (egrepArg)) {

            r += i;

        }

    }

    return r;

}


optional<String> String::Col (size_t i) const

{

    static const RegularExpression kWS_ = "\\s+"_RegEx;

    return Col (i, kWS_);

}


optional<String> String::Col (size_t i, const RegularExpression& separator) const

{

    return Tokenize (separator).Nth (i);

}


String String::SubString_ (const _SafeReadRepAccessor& thisAccessor, size_t from, size_t to) const

{

    constexpr bool kWholeStringOptionization_ =

        false; // empirically, this costs about 1%. My WAG is that 1% cost not a good tradeoff cuz I dont think this gets triggered that often - LGP 2023-09-26

    Require (from <= to);

    Require (to <= this->size ());


    // Could do this more simply, but since this function is a bottleneck, handle each representation case separately

    if (from == to) [[unlikely]] {

        return mkEmpty_ ();

    }

    PeekSpanData psd = thisAccessor._ConstGetRep ().PeekData (nullopt);

    switch (psd.fInCP) {

        case PeekSpanData::eAscii: {

            if constexpr (kWholeStringOptionization_) {

                if (from == 0 and to == psd.fAscii.size ()) [[unlikely]] {

                    return *this; // unclear if this optimization is worthwhile

                }

            }

            return mk_nocheck_ (psd.fAscii.subspan (from, to - from)); // no check cuz we already know its all ASCII and nothing smaller

        }

        case PeekSpanData::eSingleByteLatin1: {

            if constexpr (kWholeStringOptionization_) {

                if (from == 0 and to == psd.fSingleByteLatin1.size ()) [[unlikely]] {

                    return *this; // unclear if this optimization is worthwhile

                }

            }

            return mk_ (psd.fSingleByteLatin1.subspan (from, to - from)); // note still needs to re-examine text, cuz subset maybe pure ascii (etc)

        }

        case PeekSpanData::eChar16: {

            if constexpr (kWholeStringOptionization_) {

                if (from == 0 and to == psd.fChar16.size ()) [[unlikely]] {

                    return *this; // unclear if this optimization is worthwhile

                }

            }

            return mk_ (psd.fChar16.subspan (from, to - from)); // note still needs to re-examine text, cuz subset maybe pure ascii (etc)

        }

        case PeekSpanData::eChar32: {

            if constexpr (kWholeStringOptionization_) {

                if (from == 0 and to == psd.fChar32.size ()) [[unlikely]] {

                    return *this; // unclear if this optimization is worthwhile

                }

            }

            return mk_ (psd.fChar32.subspan (from, to - from)); // note still needs to re-examine text, cuz subset maybe pure ascii (etc)

        }

        default:

            AssertNotReached ();

            return String{};

    }

}


String String::Repeat (unsigned int count) const

{

    switch (count) {

        case 0:

            return String{};

        case 1:

            return *this;

        case 2:

            return *this + *this;

        default: {

            StringBuilder result;

            for (unsigned int i = 0; i < count; ++i) {

                result << *this;

            }

            return result;

        }

    }

}


String String::LTrim (bool (*shouldBeTrimmed) (Character)) const

{

    RequireNotNull (shouldBeTrimmed);

    auto referenceImpl = [&] () {

        _SafeReadRepAccessor accessor{this};

        size_t               length = accessor._ConstGetRep ().size ();

        for (size_t i = 0; i < length; ++i) {

            if (not(*shouldBeTrimmed) (accessor._ConstGetRep ().GetAt (i))) {

                if (i == 0) {

                    return *this; // no change in string

                }

                else {

                    return SubString (i, length);

                }

            }

        }

        return String{}; // all trimmed

    };

    auto commonAlgorithm = [&]<typename T> (span<const T> lowLevelCharSpan) -> String {

        size_t length = lowLevelCharSpan.size ();

        for (size_t i = 0; i < length; ++i) {

            static_assert (Common::IAnyOf<T, ASCII, Latin1, char32_t>); // this works for ASCII, Latin1, char32_t, but for char16_t - not so much - trickier

            Character c{lowLevelCharSpan[i]};

            // drop not-so-subtle hint to optimizer this is likely the function, and can be called, and hopefully hoisted outside the loop, and inlined

            bool thisCharacterTrimmed = [&] () {

                if (shouldBeTrimmed == (bool (*) (Character))Character::IsWhitespace) [[likely]] {

                    return Character::IsWhitespace (c);

                }

                else {

                    return shouldBeTrimmed (c);

                }

            }();

            if (not thisCharacterTrimmed) {

                if (i == 0) {

#if qStroika_Foundation_Debug_AssertionsChecked

                    Assert (*this == referenceImpl ());

#endif

                    return *this; // no change in string

                }

                else {

#if qStroika_Foundation_Debug_AssertionsChecked

                    Assert (mk_ (lowLevelCharSpan.subspan (i)) == referenceImpl ());

#endif

                    return mk_ (lowLevelCharSpan.subspan (i));

                }

            }

        }

        return String{}; // all trimmed

    };

    _SafeReadRepAccessor accessor{this};

    PeekSpanData         psd = accessor._ConstGetRep ().PeekData (nullopt);

    switch (psd.fInCP) {

        case PeekSpanData::eAscii: {

            return commonAlgorithm (psd.fAscii);

        }

        case PeekSpanData::eSingleByteLatin1: {

            return commonAlgorithm (psd.fSingleByteLatin1);

        }

        case PeekSpanData::eChar32: {

            return commonAlgorithm (psd.fChar32);

        }

    }

    return referenceImpl (); // due to tricks with surrogates, and rarity, not worth worrying about char16_t case

}


String String::RTrim (bool (*shouldBeTrimmed) (Character)) const

{

    RequireNotNull (shouldBeTrimmed);

    auto referenceImpl = [&] () {

        _SafeReadRepAccessor accessor{this};

        ptrdiff_t            length         = accessor._ConstGetRep ().size ();

        ptrdiff_t            endOfFirstTrim = length;

        for (; endOfFirstTrim != 0; --endOfFirstTrim) {

            if ((*shouldBeTrimmed) (accessor._ConstGetRep ().GetAt (endOfFirstTrim - 1))) {

                // keep going backwards

            }

            else {

                break;

            }

        }

        if (endOfFirstTrim == 0) {

            return String{}; // all trimmed

        }

        else if (endOfFirstTrim == length) {

            return *this; // nothing trimmed

        }

        else {

            return SubString (0, endOfFirstTrim);

        }

    };


    auto commonAlgorithm = [&]<typename T> (span<const T> lowLevelCharSpan) -> String {

        size_t    length         = lowLevelCharSpan.size ();

        ptrdiff_t endOfFirstTrim = length;

        for (; endOfFirstTrim != 0; --endOfFirstTrim) {

            static_assert (Common::IAnyOf<T, ASCII, Latin1, char32_t>); // this works for ASCII, Latin1, char32_t, but for char16_t - not so much - trickier

            Character c{lowLevelCharSpan[endOfFirstTrim - 1]};

            // drop not-so-subtle hint to optimizer this is likely the function, and can be called, and hopefully hoisted outside the loop, and inlined

            bool thisCharacterTrimmed = [&] () {

                if (shouldBeTrimmed == (bool (*) (Character))Character::IsWhitespace) [[likely]] {

                    return Character::IsWhitespace (c);

                }

                else {

                    return shouldBeTrimmed (c);

                }

            }();

            if (thisCharacterTrimmed) {

                // keep going backwards

            }

            else {

                break;

            }

        }

        if (endOfFirstTrim == 0) {

#if qStroika_Foundation_Debug_AssertionsChecked

            Assert (String{} == referenceImpl ());

#endif

            return String{}; // all trimmed

        }

        else if (static_cast<size_t> (endOfFirstTrim) == length) {

#if qStroika_Foundation_Debug_AssertionsChecked

            Assert (*this == referenceImpl ());

#endif

            return *this; // nothing trimmed

        }

        else {

#if qStroika_Foundation_Debug_AssertionsChecked

            Assert (mk_ (lowLevelCharSpan.subspan (0, endOfFirstTrim)) == referenceImpl ());

#endif

            return mk_ (lowLevelCharSpan.subspan (0, endOfFirstTrim)); //return SubString (0, endOfFirstTrim);

        }

    };


    _SafeReadRepAccessor accessor{this};

    PeekSpanData         psd = accessor._ConstGetRep ().PeekData (nullopt);

    switch (psd.fInCP) {

        case PeekSpanData::eAscii: {

            return commonAlgorithm (psd.fAscii);

        }

        case PeekSpanData::eSingleByteLatin1: {

            return commonAlgorithm (psd.fSingleByteLatin1);

        }

        case PeekSpanData::eChar32: {

            return commonAlgorithm (psd.fChar32);

        }

    }

    return referenceImpl (); // due to tricks with surrogates, and rarity, not worth worrying about char16_t case

}


String String::Trim (bool (*shouldBeTrimmed) (Character)) const

{

    RequireNotNull (shouldBeTrimmed);


    auto referenceImpl = [&] () { return LTrim (shouldBeTrimmed).RTrim (shouldBeTrimmed); };


    // declared here to encourage inlining the common case of Character::IsWhitespace

    auto useCharTrimmedFunc = [&] (Character c) {

        if (shouldBeTrimmed == (bool (*) (Character))Character::IsWhitespace) [[likely]] {

            return Character::IsWhitespace (c);

        }

        else {

            return shouldBeTrimmed (c);

        }

    };


    auto commonAlgorithm = [&]<typename T> (span<const T> lowLevelCharSpan) -> String {

        size_t length       = lowLevelCharSpan.size ();

        size_t firstKeptIdx = 0;

        for (; firstKeptIdx < length; ++firstKeptIdx) {

            static_assert (Common::IAnyOf<T, ASCII, Latin1, char32_t>); // this works for ASCII, Latin1, char32_t, but for char16_t - not so much - trickier

            Character c{lowLevelCharSpan[firstKeptIdx]};

            if (not useCharTrimmedFunc (c)) {

                break;

            }

        }

        ptrdiff_t endOfFirstTrim = length;

        for (; static_cast<size_t> (endOfFirstTrim) != firstKeptIdx; --endOfFirstTrim) {

            static_assert (Common::IAnyOf<T, ASCII, Latin1, char32_t>); // this works for ASCII, Latin1, char32_t, but for char16_t - not so much - trickier

            Character c{lowLevelCharSpan[endOfFirstTrim - 1]};

            if (useCharTrimmedFunc (c)) {

                // keep going backwards

            }

            else {

                break;

            }

        }

        if (firstKeptIdx == 0 and static_cast<size_t> (endOfFirstTrim) == length) {

#if qStroika_Foundation_Debug_AssertionsChecked

            Assert (*this == referenceImpl ());

#endif

            return *this; // nothing changed, just bump reference count on shared_ptr

        }

        if (firstKeptIdx == length) {

#if qStroika_Foundation_Debug_AssertionsChecked

            Assert (String{} == referenceImpl ());

#endif

            return String{}; // trimmed everything way

        }

        Assert (static_cast<ptrdiff_t> (firstKeptIdx) < endOfFirstTrim);

#if qStroika_Foundation_Debug_AssertionsChecked

        Assert (mk_ (lowLevelCharSpan.subspan (firstKeptIdx, endOfFirstTrim - firstKeptIdx)) == referenceImpl ());

#endif

        return mk_ (lowLevelCharSpan.subspan (firstKeptIdx, endOfFirstTrim - firstKeptIdx));

    };


    _SafeReadRepAccessor accessor{this};

    PeekSpanData         psd = accessor._ConstGetRep ().PeekData (nullopt);

    switch (psd.fInCP) {

        case PeekSpanData::eAscii: {

            return commonAlgorithm (psd.fAscii);

        }

        case PeekSpanData::eSingleByteLatin1: {

            return commonAlgorithm (psd.fSingleByteLatin1);

        }

        case PeekSpanData::eChar32: {

            return commonAlgorithm (psd.fChar32);

        }

    }

    return referenceImpl (); // due to tricks with surrogates, and rarity, not worth worrying about char16_t case

}


String String::StripAll (bool (*removeCharIf) (Character)) const

{

    RequireNotNull (removeCharIf);


    // NB: optimize special case where removeCharIf is always false

    //

    // Walk string and find first character we need to remove

    StringBuilder<StringBuilder_Options<char32_t>> result{*this}; // StringBuilder_Options<char32_t> so operator[] is fast

    size_t                                         n = result.size ();

    for (size_t i = 0; i < n; ++i) {

        Character c = result[i];

        if (removeCharIf (c)) {

            // on first removal, clone part of string done so far, and start appending

            StringBuilder tmp = result.As<String> ().SubString (0, i);

            // Now keep iterating IN THIS LOOP appending characters and return at the end of this loop

            ++i;

            for (; i < n; ++i) {

                c = result[i];

                if (not removeCharIf (c)) {

                    tmp += c;

                }

            }

            return tmp;

        }

    }

    return *this; // if we NEVER get removeCharIf return false, just clone this

}


String String::Join (const Iterable<String>& list, const String& separator)

{

    StringBuilder result;

    for (const String& i : list) {

        result << i << separator;

    }

    if (result.empty ()) {

        return result.str ();

    }

    else {

        return result.str ().SubString (0, -static_cast<int> (separator.size ()));

    }

}


String String::ToLowerCase () const

{

    StringBuilder        result;

    bool                 changed{false}; // if no change, no need to allocate new object

    _SafeReadRepAccessor accessor{this};

    PeekSpanData         psd = accessor._ConstGetRep ().PeekData (nullopt);

    if (psd.fInCP == PeekSpanData::eAscii) [[likely]] {

        // optimization but other case would work no matter what

        for (auto c : psd.fAscii) {

            if (isupper (c)) {

                changed = true;

                result.push_back (static_cast<ASCII> (tolower (c)));

            }

            else {

                result.push_back (c);

            }

        }

    }

    else {

        Memory::StackBuffer<Character> maybeIgnoreBuf1;

        for (Character c : GetData (psd, &maybeIgnoreBuf1)) {

            if (c.IsUpperCase ()) {

                changed = true;

                result.push_back (c.ToLowerCase ());

            }

            else {

                result.push_back (c);

            }

        }

    }

    if (changed) {

        return result.str ();

    }

    else {

        return *this;

    }

}


String String::ToUpperCase () const

{

    StringBuilder        result;

    bool                 changed{false}; // if no change, no need to allocate new object

    _SafeReadRepAccessor accessor{this};

    PeekSpanData         psd = accessor._ConstGetRep ().PeekData (nullopt);

    if (psd.fInCP == PeekSpanData::eAscii) [[likely]] {

        // optimization but other case would work no matter what

        for (auto c : psd.fAscii) {

            if (islower (c)) {

                changed = true;

                result.push_back (static_cast<ASCII> (toupper (c)));

            }

            else {

                result.push_back (c);

            }

        }

    }

    else {

        Memory::StackBuffer<Character> maybeIgnoreBuf1;

        for (Character c : GetData (psd, &maybeIgnoreBuf1)) {

            if (c.IsLowerCase ()) {

                changed = true;

                result.push_back (c.ToUpperCase ());

            }

            else {

                result.push_back (c);

            }

        }

    }

    if (changed) {

        return result.str ();

    }

    else {

        return *this;

    }

}


bool String::IsWhitespace () const

{

    // It is all whitespace if the first non-whitespace character is 'EOF'

    return not Find ([] (Character c) -> bool { return not c.IsWhitespace (); });

}


String String::LimitLength (size_t maxLen, StringShorteningPreference keepPref, const String& ellipsis) const

{

    // @todo Consider making this the 'REFERENCE' impl, and doing a specific one with a specific StringBuilder, and doing

    // the trim/split directly, if I see this show up in a profile, for performance sake --LGP 2023-12-11

    if (length () < maxLen) [[likely]] {

        return *this; // frequent optimization

    }

    String operateOn = [&] () {

        switch (keepPref) {

            case StringShorteningPreference::ePreferKeepLeft:

                return LTrim ();

            case StringShorteningPreference::ePreferKeepRight:

                return RTrim ();

            case StringShorteningPreference::ePreferKeepMid:

                return Trim (); // not sure we need to trim - but probably best

            default:

                RequireNotReached ();

                return *this;

        }

    }();

    if (operateOn.length () <= maxLen) {

        return operateOn;

    }

    size_t useLen = [&] () {

        size_t useLen           = maxLen;

        size_t ellipsisTotalLen = ellipsis.length ();

        if (keepPref == StringShorteningPreference::ePreferKeepMid) {

            ellipsisTotalLen *= 2;

        }

        if (useLen > ellipsisTotalLen) {

            useLen -= ellipsisTotalLen;

        }

        else {

            useLen = 0;

        }

        return useLen;

    }();

    switch (keepPref) {

        case StringShorteningPreference::ePreferKeepLeft:

            return operateOn.substr (0, useLen) + ellipsis;

        case StringShorteningPreference::ePreferKeepRight:

            return ellipsis + operateOn.substr (operateOn.length () - useLen);

        case StringShorteningPreference::ePreferKeepMid:

            return ellipsis + operateOn.substr (operateOn.length () / 2 - useLen / 2, useLen) + ellipsis;

        default:

            RequireNotReached ();

            return *this;

    }

}


string String::AsNarrowString (const locale& l) const

{

    // Note: this could use CodeCvt, but directly using std::codecvt in this case pretty simple, and

    // more efficient this way --LGP 2023-02-14


    // See http://en.cppreference.com/w/cpp/locale/codecvt/~codecvt

    using Destructible_codecvt_byname = deletable_facet_<codecvt_byname<wchar_t, char, mbstate_t>>;

    Destructible_codecvt_byname cvt{l.name ()};


    Memory::StackBuffer<wchar_t> maybeIgnoreBuf1;

    span<const wchar_t>          thisData = GetData (&maybeIgnoreBuf1);

    // http://en.cppreference.com/w/cpp/locale/codecvt/out

    mbstate_t                 mbstate{};

    const wchar_t*            from_next;

    char*                     to_next;

    Memory::StackBuffer<char> into{Memory::eUninitialized, thisData.size () * 5}; // not sure what size is always big enuf

    codecvt_base::result      result =

        cvt.out (mbstate, thisData.data (), thisData.data () + thisData.size (), from_next, into.data (), into.end (), to_next);

    if (result != codecvt_base::ok) [[unlikely]] {

        static const auto kException_ = Execution::RuntimeErrorException{"Error converting locale multibyte string to UNICODE"sv};

        Execution::Throw (kException_);

    }

    return string{into.data (), to_next};

}


string String::AsNarrowString (const locale& l, AllowMissingCharacterErrorsFlag) const

{

    // Note: this could use CodeCvt, but directly using std::codecvt in this case pretty simple, and

    // more efficient this way --LGP 2023-02-14


    // See http://en.cppreference.com/w/cpp/locale/codecvt/~codecvt

    using Destructible_codecvt_byname = deletable_facet_<codecvt_byname<wchar_t, char, mbstate_t>>;

    Destructible_codecvt_byname cvt{l.name ()};


    Memory::StackBuffer<wchar_t> maybeIgnoreBuf1;

    span<const wchar_t>          thisData = GetData (&maybeIgnoreBuf1);

    // http://en.cppreference.com/w/cpp/locale/codecvt/out

    mbstate_t                 mbstate{};

    Memory::StackBuffer<char> into{Memory::eUninitialized, thisData.size () * 5}; // not sure what size is always big enuf

    const wchar_t*            readFrom  = thisData.data ();

    char*                     intoIndex = into.data ();

Again:

    const wchar_t* from_next{nullptr};

    char*          to_next{nullptr};

    codecvt_base::result result = cvt.out (mbstate, readFrom, thisData.data () + thisData.size (), from_next, intoIndex, into.end (), to_next);

    if (result != codecvt_base::ok) [[unlikely]] {

        if (from_next != thisData.data () + thisData.size ()) {

            readFrom  = from_next + 1; // unclear how much to skip (due to surrogates) - but likely this is a good guess

            *to_next  = '?';           // write 'bad' character

            intoIndex = to_next + 1;

            goto Again;

        }

    }

    return string{into.data (), to_next};

}


void String::erase (size_t from)

{

    *this = RemoveAt (from, size ());

}


void String::erase (size_t from, size_t count)

{

    // http://stroika-bugs.sophists.com/browse/STK-445

    // @todo - NOT ENVELOPE THREADSAFE

    // MUST ACQUIRE ACCESSOR HERE - not just that RemoteAt threadsafe - but must SYNC at this point - need AssureExternallySycnonized stuff here!!!

    //

    // TODO: Double check STL definition - but I think they allow for count to be 'too much' - and silently trim to end...

    size_t max2Erase = static_cast<size_t> (max (static_cast<ptrdiff_t> (0), static_cast<ptrdiff_t> (size ()) - static_cast<ptrdiff_t> (from)));

    *this = RemoveAt (from, from + min (count, max2Erase));

}


const wchar_t* String::c_str () const noexcept

{

    // UNSAFE - DEPRECATED  - lose before v3 actually released -- LGP 2023-06-28

    DISABLE_COMPILER_MSC_WARNING_START (4996);

    DISABLE_COMPILER_GCC_WARNING_START ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");

    DISABLE_COMPILER_CLANG_WARNING_START ("clang diagnostic ignored \"-Wdeprecated-declarations\"");

    return const_cast<String*> (this)->c_str ();

    DISABLE_COMPILER_MSC_WARNING_END (4996);

    DISABLE_COMPILER_GCC_WARNING_END ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");

    DISABLE_COMPILER_CLANG_WARNING_END ("clang diagnostic ignored \"-Wdeprecated-declarations\"");

}

const wchar_t* String::c_str ()

{

    // DEPRECATED SINCE STROIKA v3.0d13

    // Rarely used mechanism, of replacing the underlying rep, for the iterable, as needed

    _SafeReadRepAccessor accessor{this};

    const wchar_t*       result = accessor._ConstGetRep ().c_str_peek ();

    if (result == nullptr) {

        _fRep  = Memory::MakeSharedPtr<StringWithCStr_::Rep> (accessor._ConstGetRepSharedPtr ());

        result = _SafeReadRepAccessor{this}._ConstGetRep ().c_str_peek ();

        AssertNotNull (result);

    }

    EnsureNotNull (result);

    Ensure (result[size ()] == '\0' or (::wcslen (result) > size () and sizeof (wchar_t) == 2)); // if there are surrogates, wcslen () might be larger than size

    return result;

}


[[noreturn]] void String::ThrowInvalidAsciiException_ ()

{

    static const auto kException_ = Execution::RuntimeErrorException{"Error converting non-ascii text to string"sv};

    Execution::Throw (kException_);

}


#if qStroika_Foundation_Characters_AsPathAutoMapMSYSAndCygwin

template <>

std::filesystem::path String::As<std::filesystem::path> () const

{

    // CYGWIN creates paths like /cygdrive/c/folder for c:/folder

    // MSYS creates paths like /c/folder for c:/folder

    static const String kMSYSDrivePrefix_ = "/"sv;

    static const String kCygrivePrefix_   = "/cygdrive/"sv;

    if (StartsWith (kCygrivePrefix_)) {

        String ss = SubString (kCygrivePrefix_.length ());

        if (ss.length () > 1 and ss[0].IsASCII () and ss[0].IsAlphabetic () and ss[1] == '/') {

            wstring w = ss.As<wstring> (); // now map c/folder to c:/folder

            w.insert (w.begin () + 1, ':');

            return filesystem::path{w};

        }

    }

    if (StartsWith (kMSYSDrivePrefix_)) {

        String ss = SubString (kMSYSDrivePrefix_.length ());

        if (ss.length () > 1 and ss[0].IsASCII () and ss[0].IsAlphabetic () and ss[1] == '/') {

            wstring w = ss.As<wstring> (); // now map c/folder to c:/folder

            w.insert (w.begin () + 1, ':');

            return filesystem::path{w};

        }

    }

    return filesystem::path{As<wstring> ()};

}

#endif


/*

 ********************************************************************************

 ****************************** StringCombiner **********************************

 ********************************************************************************

 */

template <>

String StringCombiner<String>::operator() (const String& lhs, const String& rhs, bool isLast) const

{

    StringBuilder sb{lhs};

    if (isLast and fSpecialSeparatorForLastPair) [[unlikely]] {

        sb << *fSpecialSeparatorForLastPair;

    }

    else {

        sb << fSeparator;

    }

    sb << rhs;

    return sb;

}


/*

 ********************************************************************************

 ******************* Iterable<Characters::String>::Join *************************

 ********************************************************************************

 */

namespace Stroika::Foundation::Traversal {

    // specialized as performance optimization

    template <>

    Characters::String Iterable<Characters::String>::Join (const Characters::String& separator, const optional<Characters::String>& finalSeparator) const

    {

        using namespace Characters;

#if qStroika_Foundation_Debug_AssertionsChecked

        String referenceResult =

            this->Join (Iterable<String>::kDefaultToStringConverter<String>,

                        Characters::StringCombiner<String>{.fSeparator = separator, .fSpecialSeparatorForLastPair = finalSeparator});

#endif

        StringBuilder sb;

        size_t        cnt = this->size ();

        this->Apply ([&, idx = 0u] (const String& i) mutable {

            if (idx == 0) {

                sb = i;

            }

            else {

                if (finalSeparator and idx + 1 == cnt) [[unlikely]] {

                    sb << *finalSeparator;

                }

                else {

                    sb << separator;

                }

                sb << i;

            }

            ++idx;

        });

#if qStroika_Foundation_Debug_AssertionsChecked

        Ensure (sb == referenceResult);

#endif

        return sb;

    }

}


/*

 ********************************************************************************

 ********************************** operator<< **********************************

 ********************************************************************************

 */


wostream& Characters::operator<< (wostream& out, const String& s)

{

    Memory::StackBuffer<wchar_t> maybeIgnoreBuf1;

    span<const wchar_t>          sData = s.GetData (&maybeIgnoreBuf1);

    out.write (sData.data (), sData.size ());

    return out;

}


ostream& Characters::operator<< (ostream& out, const String& s)

{

    return out << s.AsNarrowSDKString (eIgnoreErrors);

}


/*

 ********************************************************************************

 *********** hash<Stroika::Foundation::Characters::String> **********************

 ********************************************************************************

 */

size_t std::hash<String>::operator() (const String& arg) const

{

    using namespace Cryptography::Digest;

    using DIGESTER = Digester<Algorithm::SuperFastHash>; // pick arbitrarily which algorithm to use for now -- err on the side of quick and dirty

    static constexpr DIGESTER kDigester_{};

    // Note this could easily use char8_t, wchar_t, char32_t, or whatever. Choose char8_t on the theory that

    // this will most often avoid a copy, and making the most often case faster is probably a win. Also, even close, it

    // will have less 'empty space' and be more compact, so will digest faster.

    Memory::StackBuffer<char8_t> maybeIgnoreBuf1;

    span<const char8_t>          s = arg.GetData (&maybeIgnoreBuf1);

    if (s.empty ()) {

        static const size_t kZeroDigest_ = kDigester_ (nullptr, nullptr);

        return kZeroDigest_;

    }

    else {

        return kDigester_ (as_bytes (s));

    }

}


/*

 ********************************************************************************

 ******************** DataExchange::DefaultSerializer<String> *******************

 ********************************************************************************

 */


Memory::BLOB DataExchange::DefaultSerializer<String>::operator() (const String& arg) const

{

    //

    // Could have used char8_t, char16_t, or char32_t here quite plausibly. Chose char8_t for several reasons:

    //      >   Nearly always smallest representation (assuming most data is ascii)

    //      >   It is cross-platform/portable - not byte order dependent (NOT a promise going forward, so maybe

    //          not a good thing - but a thing)

    //      >   Since we expect most data reps to be ascii, this will involve the least copying, most likely, in

    //          the GetData call

    //

    Memory::StackBuffer<char8_t> maybeIgnoreBuf1;

    return Memory::BLOB{as_bytes (arg.GetData (&maybeIgnoreBuf1))};

}


AssertNotNull
#define AssertNotNull(p)
Definition Assertions.h:333

EnsureNotNull
#define EnsureNotNull(p)
Definition Assertions.h:340

RequireMember
#define RequireMember(p, c)
Definition Assertions.h:326

RequireNotReached
#define RequireNotReached()
Definition Assertions.h:385

qStroika_Foundation_Debug_AssertionsChecked
#define qStroika_Foundation_Debug_AssertionsChecked
The qStroika_Foundation_Debug_AssertionsChecked flag determines if assertions are checked and validat...
Definition Assertions.h:48

RequireNotNull
#define RequireNotNull(p)
Definition Assertions.h:347

RequireExpression
#define RequireExpression(c)
Definition Assertions.h:267

AssertNotReached
#define AssertNotReached()
Definition Assertions.h:355

BlockAllocated.h

Stroika::Foundation::Memory::UseBlockAllocationIfAppropriate
conditional_t< qStroika_Foundation_Memory_PreferBlockAllocation and andTrueCheck, BlockAllocationUseHelper< T >, Common::Empty > UseBlockAllocationIfAppropriate
Use this to enable block allocation for a particular class. Beware of subclassing.
Definition BlockAllocated.h:131

Cast.h

Utilities.h

Stroika::Foundation::Characters::CString::Equals
bool Equals(const T *lhs, const T *rhs)
strcmp or wsccmp() as appropriate == 0

Empty.h

RegularExpression.h

Sequence_stdvector.h

StackBuffer.h

String.h

StringBuilder.h

SuperFastHash.h

Stroika::Foundation::Characters::Character
Definition Character.h:218

Stroika::Foundation::Characters::Character::IsASCII
constexpr bool IsASCII() const noexcept
Return true iff the given character (or all in span) is (are) in the ascii range [0....
Definition Character.inl:227

Stroika::Foundation::Characters::Character::CheckASCII
static constexpr void CheckASCII(span< const CHAR_T > s)
if not IsASCII (arg) throw RuntimeException...
Definition Character.inl:252

Stroika::Foundation::Characters::Character::ToLowerCase
nonvirtual Character ToLowerCase() const noexcept
Definition Character.inl:492

Stroika::Foundation::Characters::Character::GetAsciiCode
nonvirtual ASCII GetAsciiCode() const noexcept
Definition Character.inl:183

Stroika::Foundation::Characters::Character::Compare
static constexpr strong_ordering Compare(span< const CHAR_T, E1 > lhs, span< const CHAR_T, E2 > rhs, CompareOptions co) noexcept
Definition Character.inl:539

Stroika::Foundation::Characters::Character::IsLowerCase
nonvirtual bool IsLowerCase() const noexcept
Definition Character.inl:454

Stroika::Foundation::Characters::Character::GetCharacterCode
constexpr char32_t GetCharacterCode() const noexcept
Return the char32_t UNICODE code-point associated with this character.
Definition Character.inl:188

Stroika::Foundation::Characters::Character::ToUpperCase
nonvirtual Character ToUpperCase() const noexcept
Definition Character.inl:501

Stroika::Foundation::Characters::Character::IsWhitespace
constexpr bool IsWhitespace() const noexcept
Definition Character.inl:394

Stroika::Foundation::Characters::Character::IsUpperCase
nonvirtual bool IsUpperCase() const noexcept
Definition Character.inl:449

Stroika::Foundation::Characters::RegularExpression
RegularExpression is a compiled regular expression which can be used to match on a String class.
Definition RegularExpression.h:25

Stroika::Foundation::Characters::RegularExpressionMatch
Definition RegularExpression.h:129

Stroika::Foundation::Characters::String::_IRep
Definition String.h:1737

Stroika::Foundation::Characters::String::_IRep::GetAt
virtual Character GetAt(size_t index) const noexcept=0

Stroika::Foundation::Characters::StringBuilder
Similar to String, but intended to more efficiently construct a String. Mutable type (String is large...
Definition StringBuilder.h:73

Stroika::Foundation::Characters::StringBuilder::size
nonvirtual size_t size() const noexcept
Definition StringBuilder.inl:206

Stroika::Foundation::Characters::StringBuilder::As
nonvirtual RESULT_T As() const

Stroika::Foundation::Characters::StringBuilder::Append
nonvirtual void Append(span< const CHAR_T > s)

Stroika::Foundation::Characters::StringBuilder::empty
nonvirtual bool empty() const noexcept
Definition StringBuilder.inl:217

Stroika::Foundation::Characters::StringBuilder::str
nonvirtual String str() const
Definition StringBuilder.inl:284

Stroika::Foundation::Characters::String
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201

Stroika::Foundation::Characters::String::length
nonvirtual size_t length() const noexcept
Definition String.inl:1051

Stroika::Foundation::Characters::String::ToUpperCase
nonvirtual String ToUpperCase() const
Definition String.cpp:1744

Stroika::Foundation::Characters::String::FromNarrowString
static String FromNarrowString(const char *from, const locale &l)
Definition String.inl:340

Stroika::Foundation::Characters::String::Matches
nonvirtual bool Matches(const RegularExpression &regEx) const
Definition String.cpp:1133

Stroika::Foundation::Characters::String::IsWhitespace
nonvirtual bool IsWhitespace() const
Definition String.cpp:1782

Stroika::Foundation::Characters::String::NormalizeTextToNL
nonvirtual String NormalizeTextToNL() const
Definition String.cpp:1201

Stroika::Foundation::Characters::String::Join
static String Join(const Iterable< String > &list, const String &separator=", "sv)
Definition String.cpp:1692

Stroika::Foundation::Characters::String::FromStringConstant
static String FromStringConstant(const CHAR_T(&cString)[SIZE])
Take the given argument data (constant span) - which must remain unchanged - constant - for the appli...
Definition String.inl:386

Stroika::Foundation::Characters::String::NormalizeSpace
nonvirtual String NormalizeSpace(Character useSpaceCharacter=' ') const
Replace sequences of whitespace characters (space, tab, newline etc) with a single space (or argument...
Definition String.cpp:1229

Stroika::Foundation::Characters::String::FindEach
nonvirtual Containers::Sequence< pair< size_t, size_t > > FindEach(const RegularExpression &regEx) const
Definition String.cpp:966

Stroika::Foundation::Characters::String::Repeat
nonvirtual String Repeat(unsigned int count) const
Definition String.cpp:1424

Stroika::Foundation::Characters::String::LimitLength
nonvirtual String LimitLength(size_t maxLen, StringShorteningPreference keepPref=StringShorteningPreference::ePreferKeepLeft) const
return the first maxLen (or fewer if string shorter) characters of this string (adding ellipsis if tr...
Definition String.inl:745

Stroika::Foundation::Characters::String::RemoveAll
nonvirtual String RemoveAll(Character c) const
Definition String.cpp:823

Stroika::Foundation::Characters::String::FindEachMatch
nonvirtual Containers::Sequence< RegularExpressionMatch > FindEachMatch(const RegularExpression &regEx) const
Definition String.cpp:984

Stroika::Foundation::Characters::String::RemoveFirstIf
nonvirtual String RemoveFirstIf(Character c) const
Definition String.cpp:807

Stroika::Foundation::Characters::String::AsNarrowSDKString
nonvirtual string AsNarrowSDKString() const
Definition String.inl:834

Stroika::Foundation::Characters::String::Col
nonvirtual optional< String > Col(size_t i) const
Useful to replace 'awk print $3' - replace with Col(2) - zero based.
Definition String.cpp:1362

Stroika::Foundation::Characters::String::InsertAt
nonvirtual String InsertAt(Character c, size_t at) const
Definition String.inl:719

Stroika::Foundation::Characters::String::AsNarrowString
nonvirtual string AsNarrowString(const locale &l) const
Definition String.cpp:1838

Stroika::Foundation::Characters::String::size
nonvirtual size_t size() const noexcept
Definition String.inl:534

Stroika::Foundation::Characters::String::EndsWith
nonvirtual bool EndsWith(const Character &c, CompareOptions co=eWithCase) const
Definition String.cpp:1088

Stroika::Foundation::Characters::String::ToLowerCase
nonvirtual String ToLowerCase() const
Definition String.cpp:1706

Stroika::Foundation::Characters::String::ReplaceAll
nonvirtual String ReplaceAll(const RegularExpression &regEx, const String &with) const
Definition String.cpp:1155

Stroika::Foundation::Characters::String::Replace
nonvirtual String Replace(size_t from, size_t to, const String &replacement) const
Definition String.cpp:1045

Stroika::Foundation::Characters::String::SubString
nonvirtual String SubString(SZ from) const

Stroika::Foundation::Characters::String::Trim
nonvirtual String Trim(bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
Definition String.cpp:1592

Stroika::Foundation::Characters::String::StartsWith
nonvirtual bool StartsWith(const Character &c, CompareOptions co=eWithCase) const
Definition String.cpp:1059

Stroika::Foundation::Characters::String::StripAll
nonvirtual String StripAll(bool(*removeCharIf)(Character)) const
Definition String.cpp:1664

Stroika::Foundation::Characters::String::AssureEndsWith
nonvirtual String AssureEndsWith(const Character &c, CompareOptions co=eWithCase) const
Return *this if it ends with argument character, or append 'c' so that it ends with a 'c'.
Definition String.cpp:1123

Stroika::Foundation::Characters::String::AsLines
nonvirtual Containers::Sequence< String > AsLines() const
break the String into a series of lines;
Definition String.cpp:1306

Stroika::Foundation::Characters::String::LTrim
nonvirtual String LTrim(bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
Definition String.cpp:1443

Stroika::Foundation::Characters::String::Grep
nonvirtual Containers::Sequence< String > Grep(const String &fgrepArg) const
Breaks this string into Lines, with AsLines (), and applies the argument filter (as if with ....
Definition String.cpp:1341

Stroika::Foundation::Characters::String::FindEachString
nonvirtual Containers::Sequence< String > FindEachString(const RegularExpression &regEx) const
Definition String.cpp:1001

Stroika::Foundation::Characters::String::RFind
nonvirtual optional< size_t > RFind(Character c) const noexcept
Definition String.cpp:1011

Stroika::Foundation::Characters::String::GetData
static span< const CHAR_TYPE > GetData(const PeekSpanData &pds, Memory::StackBuffer< CHAR_TYPE, STACK_BUFFER_SZ > *possiblyUsedBuffer)
return the constant character data inside the string (rep) in the form of a span, possibly quickly an...
Definition String.inl:967

Stroika::Foundation::Characters::String::Tokenize
nonvirtual Containers::Sequence< String > Tokenize() const
Definition String.cpp:1234

Stroika::Foundation::Characters::String::RemoveAt
nonvirtual String RemoveAt(size_t charAt) const
Definition String.inl:608

Stroika::Foundation::Characters::String::RTrim
nonvirtual String RTrim(bool(*shouldBeTrimmed)(Character)=Character::IsWhitespace) const
Definition String.cpp:1508

Stroika::Foundation::Characters::String::Find
nonvirtual optional< size_t > Find(Character c, CompareOptions co=eWithCase) const
Definition String.inl:685

Stroika::Foundation::Characters::String::substr
nonvirtual String substr(size_t from, size_t count=npos) const
Definition String.inl:1092

Stroika::Foundation::Characters::String::As
nonvirtual T As() const

Stroika::Foundation::Characters::UTFConvert::kThe
static const UTFConvert kThe
Nearly always use this default UTFConvert.
Definition UTFConvert.h:369

Stroika::Foundation::Characters::UTFConvert::AllFitsInTwoByteEncoding
static constexpr bool AllFitsInTwoByteEncoding(span< const CHAR_T > s) noexcept
Definition UTFConvert.inl:239

Stroika::Foundation::Containers::Concrete::Sequence_stdvector
Sequence_stdvector<T> is an std::vector-based concrete implementation of the Sequence<T> container pa...
Definition Sequence_stdvector.h:39

Stroika::Foundation::Containers::Sequence
A generalization of a vector: a container whose elements are keyed by the natural numbers.
Definition Library/Sources/Stroika/Foundation/Containers/Sequence.h:187

Stroika::Foundation::Containers::Sequence::push_back
nonvirtual void push_back(ArgByValueType< value_type > item)
Definition Sequence.inl:436

Stroika::Foundation::Containers::Sequence::Append
nonvirtual void Append(ArgByValueType< value_type > item)
Definition Sequence.inl:330

Stroika::Foundation::Containers::Sequence::clear
nonvirtual void clear()
Definition Sequence.inl:451

Stroika::Foundation::Containers::Set
Set<T> is a container of T, where once an item is added, additionally adds () do nothing.
Definition Library/Sources/Stroika/Foundation/Containers/Set.h:105

Stroika::Foundation::Execution::RuntimeErrorException
Definition Exceptions.h:198

Stroika::Foundation::Memory::BLOB
Definition BLOB.h:68

Stroika::Foundation::Memory::InlineBuffer
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
Definition InlineBuffer.h:93

Stroika::Foundation::Memory::InlineBuffer::size
nonvirtual size_t size() const noexcept
Definition InlineBuffer.inl:416

Stroika::Foundation::Traversal::Iterable
Iterable<T> is a base class for containers which easily produce an Iterator<T> to traverse them.
Definition Iterable.h:237

Stroika::Foundation::Traversal::Iterable::Join
nonvirtual RESULT_T Join(const CONVERT_TO_RESULT &convertToResult=kDefaultToStringConverter<>, const COMBINER &combiner=Characters::kDefaultStringCombiner) const
ape the JavaScript/python 'join' function - take the parts of 'this' iterable and combine them into a...

Stroika::Foundation::Traversal::Iterable< Character >::_fRep
_SharedByValueRepType _fRep
Definition Iterable.h:1442

Stroika::Foundation::Traversal::Iterable::size
nonvirtual size_t size() const
Returns the number of items contained.
Definition Iterable.inl:302

Stroika::Foundation::Traversal::Iterable< Character >::MakeIterator
nonvirtual Iterator< Character > MakeIterator() const
Create an iterator object which can be used to traverse the 'Iterable'.
Definition Iterable.inl:296

Stroika::Foundation::Traversal::Iterator
An Iterator<T> is a copyable object which allows traversing the contents of some container....
Definition Iterator.h:225

Stroika::Foundation::Common::IAnyOf
concept - trivial shorthand for variadic same_as A or same_as B, or ...
Definition Concepts.h:189

Stroika::Foundation::Characters
Definition Character.h:31

Stroika::Foundation::Characters::ASCII
char ASCII
Stroika's string/character classes treat 'char' as being an ASCII character.
Definition Character.h:59

Stroika::Foundation::Characters::StringShorteningPreference
StringShorteningPreference
Definition String.h:99

Stroika::Foundation::Characters::DISABLE_COMPILER_MSC_WARNING_START
DISABLE_COMPILER_MSC_WARNING_START(4996)

Stroika::Foundation::Characters::AllowMissingCharacterErrorsFlag
AllowMissingCharacterErrorsFlag
Definition SDKString.h:54

Stroika::Foundation::Characters::operator<<
wostream & operator<<(wostream &out, const String &s)
Definition String.cpp:2035

Stroika::Foundation::Common
Definition Character.inl:624

Stroika::Foundation::Common::ArgByValueType
conditional_t<(sizeof(CHECK_T)<=2 *sizeof(void *)) and is_trivially_copyable_v< CHECK_T >, CHECK_T, const CHECK_T & > ArgByValueType
This is an alias for 'T' - but how we want to pass it on stack as formal parameter.
Definition TypeHints.h:32

Stroika::Foundation::Containers
Definition String.h:55

Stroika::Foundation::Execution::SequencePolicy
SequencePolicy
equivalent which of 4 types being used std::execution::sequenced_policy, parallel_policy,...
Definition Foundation/Execution/Common.h:21

Stroika::Foundation::Execution::Throw
void Throw(T &&e2Throw)
identical to builtin C++ 'throw' except that it does helpful, type dependent DbgTrace() messages firs...
Definition Throw.inl:43

Stroika::Foundation
Definition BloomFilter.h:22

Stroika::Foundation::Characters::Character::EqualsComparer
Definition Character.h:535

Stroika::Foundation::Characters::String::EqualsComparer
Definition String.h:1820

Stroika::Foundation::Characters::String::PeekSpanData
Summary data for raw contents of rep - each rep will support at least one of these span forms.
Definition String.h:1270

Stroika::Foundation::Characters::String::PeekSpanData::eSingleByteLatin1
@ eSingleByteLatin1
Definition String.h:1281

Stroika::Foundation::Characters::String::PeekSpanData::eAscii
@ eAscii
Definition String.h:1276

Stroika::Foundation::Characters::StringCombiner
StringCombiner is a simple function object used to combine two strings visually - used in Iterable<>:...
Definition String.h:1923

Stroika::Foundation::DataExchange::DefaultSerializer::operator()
Memory::BLOB operator()(const T &t) const
Definition String.cpp:2076