6#if !qCompilerAndStdLib_stdlib_ranges_pretty_broken_Buggy
12#include "Stroika/Foundation/Memory/Common.h"
17 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T,
size_t E1,
size_t E2>
18 constexpr strong_ordering Compare_CS_ (span<const CHAR_T, E1> lhs, span<const CHAR_T, E2> rhs)
20 size_t lLen = lhs.size ();
21 size_t rLen = rhs.size ();
22 size_t length = min (lLen, rLen);
23 const CHAR_T* li = lhs.data ();
24 const CHAR_T* ri = rhs.data ();
31 constexpr bool kCanUseMemCmpOptimization_ =
sizeof (CHAR_T) == 1 or (std::endian::native == std::endian::big);
34 constexpr bool kUseStdTraitsCompare_ =
true;
36 if constexpr (kUseStdTraitsCompare_) {
37 using TRAITS_CHAR_T = conditional_t<
sizeof (CHAR_T) == 4,
char32_t, CHAR_T>;
38 int r = std::char_traits<TRAITS_CHAR_T>::compare (
reinterpret_cast<const TRAITS_CHAR_T*
> (li),
39 reinterpret_cast<const TRAITS_CHAR_T*
> (ri), length);
40 if (r != 0) [[likely]] {
44 else if constexpr (kCanUseMemCmpOptimization_) {
45 int r = std::memcmp (li, ri, length);
46 if (r != 0) [[likely]] {
51 const CHAR_T* lend = li + length;
52 for (; li != lend; ++li, ++ri) {
53 if (*li != *ri) [[likely]] {
60 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T,
size_t E1,
size_t E2>
61 constexpr strong_ordering Compare_CI_ (span<const CHAR_T, E1> lhs, span<const CHAR_T, E2> rhs)
63 size_t lLen = lhs.size ();
64 size_t rLen = rhs.size ();
65 size_t length = min (lLen, rLen);
66 const CHAR_T* li = lhs.data ();
67 const CHAR_T* ri = rhs.data ();
68 const CHAR_T* lend = li + length;
69 for (; li != lend; ++li, ++ri) {
72 if constexpr (same_as<CHAR_T, Character>) {
73 lc = li->ToLowerCase ();
74 rc = ri->ToLowerCase ();
76 else if constexpr (same_as<CHAR_T, ASCII>) {
79 lc =
static_cast<CHAR_T
> (std::tolower (
static_cast<unsigned char> (*li)));
80 rc =
static_cast<CHAR_T
> (std::tolower (
static_cast<unsigned char> (*ri)));
84 lc =
static_cast<CHAR_T
> (std::towlower (
static_cast<wchar_t> (*li)));
85 rc =
static_cast<CHAR_T
> (std::towlower (
static_cast<wchar_t> (*ri)));
87 if (lc != rc) [[likely]] {
93 void ThrowNotIsASCII_ ();
94 void ThrowNotIsLatin1_ ();
95 void ThrowSurrogatesOutOfRange_ ();
103 constexpr inline Latin1::operator uint8_t ()
const
113 template <IPossibleCharacterRepresentation CHAR_T>
117 if constexpr (same_as<remove_cv_t<CHAR_T>, Character>) {
120 else if constexpr (same_as<remove_cv_t<CHAR_T>, Latin1>) {
121 return static_cast<uint8_t
> (c) <= 0x7f;
124 return static_cast<make_unsigned_t<CHAR_T>
> (c) <= 0x7f;
128 : fCharacterCode_{
'\0'}
132 : fCharacterCode_{static_cast<char32_t> (c)}
134 if (is_constant_evaluated ()) {
137 if (not is_constant_evaluated () and not
IsASCII (c)) [[unlikely]] {
138 Private_::ThrowNotIsASCII_ ();
142 : fCharacterCode_{
static_cast<char32_t> (c)}
146 : fCharacterCode_{static_cast<char32_t> (c)}
148 if (IsSurrogatePair_Hi (c)) [[unlikely]] {
149 Private_::ThrowSurrogatesOutOfRange_ ();
160 constexpr int halfShift = 10;
161 constexpr char32_t halfBase = 0x0010000UL;
162 if (not IsSurrogatePair_Hi (hiSurrogate)) [[unlikely]] {
163 Private_::ThrowSurrogatesOutOfRange_ ();
165 if (not IsSurrogatePair_Lo (lowSurrogate)) [[unlikely]] {
166 Private_::ThrowSurrogatesOutOfRange_ ();
168 fCharacterCode_ = ((hiSurrogate -
kUNICODESurrogate_High_Start) << halfShift) + (lowSurrogate - kUNICODESurrogate_Low_Start) + halfBase;
175 : fCharacterCode_{
static_cast<char32_t> (c)}
177 if constexpr (
sizeof (wchar_t) != 4) {
178 if (IsSurrogatePair_Hi (c)) [[unlikely]] {
179 Private_::ThrowSurrogatesOutOfRange_ ();
186 return static_cast<char> (fCharacterCode_);
190 return fCharacterCode_;
192 constexpr Character::operator char32_t () const noexcept
194 return fCharacterCode_;
196 template <
typename T>
197 constexpr T Character::As () const noexcept
198 requires (same_as<T,
char32_t> or (sizeof (
wchar_t) == sizeof (
char32_t) and same_as<T,
wchar_t>))
206 template <IUNICODECodePo
int T>
210 if constexpr (
sizeof (T) ==
sizeof (
char32_t)) {
215 else if constexpr (same_as<T, wchar_t>) {
216 Assert (
sizeof (
wchar_t) ==
sizeof (
char16_t));
217 this->AsHelper_ (
reinterpret_cast<Memory::StackBuffer<char16_t>*
> (buf));
218 Ensure (1 <= buf->
size () and buf->
size () <= 3);
221 else if constexpr (same_as<T, char8_t> or same_as<T, char16_t>) {
222 this->AsHelper_ (buf);
223 Ensure (1 <= buf->
size () and buf->
size () <= 3);
229 return 0x0 <= fCharacterCode_ and fCharacterCode_ <= 0x7f;
231 template <IPossibleCharacterRepresentation CHAR_T>
234 constexpr auto charComparer = [] ()
noexcept {
235 if constexpr (same_as<remove_cv_t<CHAR_T>,
Character>) {
236 return [] (
Character c)
noexcept {
return c.IsASCII (); };
238 else if constexpr (same_as<remove_cv_t<CHAR_T>, Latin1>) {
239 return [] (Latin1 c)
noexcept {
return static_cast<uint8_t
> (c) <= 0x7f; };
242 return [] (CHAR_T c)
noexcept {
return static_cast<make_unsigned_t<CHAR_T>
> (c) <= 0x7f; };
245#if qCompilerAndStdLib_stdlib_ranges_pretty_broken_Buggy
246 return std::all_of (fromS.begin (), fromS.end (), charComparer);
248 return ranges::all_of (fromS, charComparer);
251 template <IPossibleCharacterRepresentation CHAR_T>
254 if (not
IsASCII (s)) [[unlikely]] {
255 if (is_constant_evaluated ()) {
256 throw "Argument not valid ASCII";
259 Private_::ThrowNotIsASCII_ ();
263 template <IPossibleCharacterRepresentation CHAR_T>
270 return 0x0 <= fCharacterCode_ and fCharacterCode_ <= 0xff;
272 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
275 if constexpr (same_as<CHAR_T, ASCII> or same_as<CHAR_T, Latin1>) {
279 else if constexpr (same_as<CHAR_T, char8_t>) {
282 if (fromS.size () % 2 == 0) {
283 for (
auto i = fromS.begin (); i < fromS.end (); ++i) {
290 if ((c1 & 0b11100000) == 0b11000000 and ((c2 & 0b11000000) == 0b10000000 and (c1 & 0b00111111) <= 0b011)) [[likely]] {
302 static_assert (2 <=
sizeof (CHAR_T) and
sizeof (CHAR_T) <= 4);
303 constexpr auto charComparer = [] ()
noexcept {
304 if constexpr (same_as<remove_cv_t<CHAR_T>, Character>) {
305 return [] (Character c)
noexcept {
return c.IsLatin1 (); };
308 return [] (CHAR_T c)
noexcept {
return static_cast<make_unsigned_t<CHAR_T>
> (c) <= 0xff; };
311#if qCompilerAndStdLib_stdlib_ranges_pretty_broken_Buggy
312 return std::all_of (fromS.begin (), fromS.end (), charComparer);
314 return ranges::all_of (fromS, charComparer);
318 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
321 if (not
IsLatin1 (s)) [[unlikely]] {
322 Private_::ThrowNotIsLatin1_ ();
325 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
330 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
333 constexpr auto eNone = ASCIIOrLatin1Result::eNone;
334 constexpr auto eLatin1 = ASCIIOrLatin1Result::eLatin1;
335 constexpr auto eASCII = ASCIIOrLatin1Result::eASCII;
336 if constexpr (same_as<CHAR_T, ASCII> or same_as<CHAR_T, Latin1>) {
338 return IsASCII (s) ? eASCII : eLatin1;
341 constexpr auto isASCII = [] ()
noexcept {
342 if constexpr (same_as<remove_cv_t<CHAR_T>, Character>) {
343 return [] (Character c)
noexcept {
return c.IsASCII (); };
346 return [] (CHAR_T c)
noexcept {
return static_cast<make_unsigned_t<CHAR_T>
> (c) <= 0x7f; };
349#if qCompilerAndStdLib_stdlib_ranges_pretty_broken_Buggy
351 for (; i != s.end () and isASCII (*i); ++i)
353 size_t leadingAsciiCharCnt =
static_cast<size_t> (i - s.begin ());
354 if (leadingAsciiCharCnt == s.size ()) [[likely]] {
358 auto leadingASCIISpan = ranges::take_while_view (s, isASCII);
359 size_t leadingAsciiCharCnt =
static_cast<size_t> (ranges::distance (leadingASCIISpan));
360 if (leadingAsciiCharCnt == s.size ()) [[likely]] {
364 span remainingInputSpan = s.subspan (leadingAsciiCharCnt);
365 if constexpr (same_as<CHAR_T, char8_t>) {
367 return IsLatin1 (remainingInputSpan) ? eLatin1 : eNone;
369 constexpr auto isLatin1 = [] ()
noexcept {
370 if constexpr (same_as<remove_cv_t<CHAR_T>, Character>) {
371 return [] (Character c)
noexcept {
return c.IsLatin1 (); };
374 return [] (CHAR_T c)
noexcept {
return static_cast<make_unsigned_t<CHAR_T>
> (c) <= 0xff; };
377#if qCompilerAndStdLib_stdlib_ranges_pretty_broken_Buggy
378 auto ii = remainingInputSpan.begin ();
379 for (; ii != remainingInputSpan.end () and isLatin1 (*ii); ++ii)
381 size_t remainingLatin1 =
static_cast<size_t> (ii - remainingInputSpan.begin ());
382 if (remainingLatin1 == remainingInputSpan.size ()) [[likely]] {
386 auto remainingLatin1 = ranges::take_while_view (remainingInputSpan, isLatin1);
387 if (
static_cast<size_t> (ranges::distance (remainingLatin1)) == remainingInputSpan.size ()) [[likely]] {
397 if (0x09 <= fCharacterCode_ and fCharacterCode_ <= 0x0D) [[unlikely]] {
400 else if (fCharacterCode_ == 0x20) [[unlikely]] {
403 else if (fCharacterCode_ >= 0x1680) [[unlikely]] {
405 if (fCharacterCode_ == 0x1680 or fCharacterCode_ == 0x180E) [[unlikely]] {
408 else if (0x2000 <= fCharacterCode_ and fCharacterCode_ <= 0x2006) [[unlikely]] {
411 else if (0x2008 <= fCharacterCode_ and fCharacterCode_ <= 0x200A) [[unlikely]] {
414 else if (fCharacterCode_ == 0x2028 or fCharacterCode_ == 0x2029 or fCharacterCode_ == 0x205F or fCharacterCode_ == 0x3000) [[unlikely]] {
419 DISABLE_COMPILER_CLANG_WARNING_START (
"clang diagnostic ignored \"-Wconstant-evaluated\"");
420 DISABLE_COMPILER_GCC_WARNING_START (
"GCC diagnostic ignored \"-Wtautological-compare\"");
421 if constexpr (not std::is_constant_evaluated ()) {
422 Ensure (result == !!iswspace (
static_cast<wchar_t> (fCharacterCode_)));
424 DISABLE_COMPILER_GCC_WARNING_END (
"GCC diagnostic ignored \"-Wtautological-compare\"");
425 DISABLE_COMPILER_CLANG_WARNING_END (
"clang diagnostic ignored \"-Wconstant-evaluated\"");
426 DISABLE_COMPILER_MSC_WARNING_END (5063)
431 return c.IsWhitespace ();
433 inline bool Character::IsDigit () const noexcept
437 return !!iswdigit (
static_cast<wchar_t> (fCharacterCode_));
439 inline bool Character::IsHexDigit () const noexcept
442 return !!iswxdigit (
static_cast<wchar_t> (fCharacterCode_));
444 inline bool Character::IsAlphabetic () const noexcept
447 return !!iswalpha (
static_cast<wchar_t> (fCharacterCode_));
452 return !!iswupper (
static_cast<wchar_t> (fCharacterCode_));
457 return !!iswlower (
static_cast<wchar_t> (fCharacterCode_));
459 inline bool Character::IsAlphaNumeric () const noexcept
462 return !!iswalnum (
static_cast<wchar_t> (fCharacterCode_));
464 inline bool Character::IsPunctuation () const noexcept
467 return !!iswpunct (
static_cast<wchar_t> (fCharacterCode_));
481 if (0 <= fCharacterCode_ and fCharacterCode_ <= 0x1f) [[unlikely]] {
484 if (0x7f <= fCharacterCode_ and fCharacterCode_ <= 0x9f) [[unlikely]] {
487 if (0x2028 == fCharacterCode_ or 0x2029 == fCharacterCode_) [[unlikely]] {
499 return static_cast<wchar_t> (::towlower (
static_cast<wchar_t> (fCharacterCode_)));
504 return static_cast<wchar_t> (::towupper (
static_cast<wchar_t> (fCharacterCode_)));
506 template <
typename RESULT_T, IPossibleCharacterRepresentation CHAR_T>
508 requires requires (RESULT_T* into) {
509 { into->empty () } -> same_as<bool>;
510 { into->push_back (
ASCII{0}) };
514 Require (into->empty ());
516 if constexpr (same_as<remove_cv_t<CHAR_T>,
Character>) {
518 if (c.IsASCII ()) [[likely]] {
519 into->push_back (c.GetAsciiCode ());
527 for (CHAR_T c : fromS) {
528 if (
static_cast<make_unsigned_t<CHAR_T>
> (c) <= 0x7f) [[likely]] {
529 into->push_back (
static_cast<char> (c));
538 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T,
size_t E1,
size_t E2>
539 constexpr strong_ordering
Character::Compare (span<const CHAR_T, E1> lhs, span<const CHAR_T, E2> rhs, CompareOptions co)
noexcept
541 Require (co == eWithCase or co == eCaseInsensitive);
544 return Private_::Compare_CS_ (lhs, rhs);
545 case eCaseInsensitive:
546 return Private_::Compare_CI_ (lhs, rhs);
549 return strong_ordering::equal;
560 constexpr char32_t kMinCode_ = 0x10000;
562 (kUNICODESurrogate_Low_End - kUNICODESurrogate_Low_Start);
563 return kMinCode_ <= fCharacterCode_ and fCharacterCode_ <= kMaxCode_;
567 return IsSurrogatePair_Hi (hiSurrogate) and IsSurrogatePair_Lo (lowSurrogate);
569 constexpr bool Character::IsSurrogatePair_Hi (
char16_t hiSurrogate)
573 constexpr bool Character::IsSurrogatePair_Lo (
char16_t lowSurrogate)
575 return kUNICODESurrogate_Low_Start <= lowSurrogate and lowSurrogate <= kUNICODESurrogate_Low_End;
583 constexpr int halfShift = 10;
584 constexpr char32_t halfBase = 0x0010000UL;
585 constexpr char32_t halfMask = 0x3FFUL;
586 char32_t ch = fCharacterCode_ - halfBase;
588 static_cast<char16_t> ((ch & halfMask) + kUNICODESurrogate_Low_Start)};
597 : fCompareOptions{co}
600 constexpr bool Character::EqualsComparer::operator() (
Character lhs,
Character rhs)
const noexcept
603 return Character::Compare (Memory::ConstSpan (span{&lhs, 1}), Memory::ConstSpan (span{&rhs, 1}), fCompareOptions) == 0;
612 : fCompareOptions{co}
619 return Character::Compare (Memory::ConstSpan (span{&lhs, 1}), Memory::ConstSpan (span{&rhs, 1}), fCompareOptions);
625 template <
typename ENUM_TYPE>
629 {Characters::CompareOptions::eCaseInsensitive, L
"Case-Insensitive"},
630 {Characters::CompareOptions::eWithCase, L
"With-Case"},
#define RequireNotNull(p)
#define AssertNotReached()
constexpr bool IsSurrogatePair() const
constexpr bool IsASCII() const noexcept
Return true iff the given character (or all in span) is (are) in the ascii range [0....
static void CheckLatin1(span< const CHAR_T > s)
if not IsLatin1 (arg) throw RuntimeException...
static constexpr void CheckASCII(span< const CHAR_T > s)
if not IsASCII (arg) throw RuntimeException...
static constexpr char16_t kUNICODESurrogate_High_Start
static constexpr ASCIIOrLatin1Result IsASCIIOrLatin1(span< const CHAR_T > s) noexcept
nonvirtual Character ToLowerCase() const noexcept
nonvirtual ASCII GetAsciiCode() const noexcept
static constexpr strong_ordering Compare(span< const CHAR_T, E1 > lhs, span< const CHAR_T, E2 > rhs, CompareOptions co) noexcept
constexpr bool IsControl() const noexcept
nonvirtual bool IsLowerCase() const noexcept
constexpr char32_t GetCharacterCode() const noexcept
Return the char32_t UNICODE code-point associated with this character.
constexpr pair< char16_t, char16_t > GetSurrogatePair() const
nonvirtual Character ToUpperCase() const noexcept
static bool AsASCIIQuietly(span< const CHAR_T > fromS, RESULT_T *into)
constexpr Character() noexcept
constexpr bool IsLatin1() const noexcept
Return true iff the given character (or all in span) is (are) in the ascii/iso-latin range [0....
constexpr bool IsWhitespace() const noexcept
nonvirtual bool IsUpperCase() const noexcept
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
nonvirtual void push_back(Common::ArgByValueType< T > e)
nonvirtual size_t size() const noexcept
char ASCII
Stroika's string/character classes treat 'char' as being an ASCII character.
DISABLE_COMPILER_MSC_WARNING_START(4996)
constexpr strong_ordering CompareResultNormalizer(FROM_INT_TYPE f)
constexpr EqualsComparer(Stroika::Foundation::Characters::CompareOptions co=Stroika::Foundation::Characters::CompareOptions::eWithCase) noexcept
constexpr ThreeWayComparer(Stroika::Foundation::Characters::CompareOptions co=Stroika::Foundation::Characters::CompareOptions::eWithCase) noexcept