301 span<const INTERMEDIATE_CHAR_T> intermediateSpan = fBytesVSIntermediateCvt_.Bytes2Characters (from, intermediateBuf);
304 ConversionResultWithStatus cr = fIntermediateVSFinalCHARCvt_.ConvertQuietly (intermediateSpan, to);
305 switch (cr.fStatus) {
306 case ConversionStatusFlag::sourceIllegal:
307 UTFConvert::Throw (cr.fStatus, cr.fSourceConsumed);
308 case ConversionStatusFlag::sourceExhausted:
310 if (not from->empty ()) {
311 *from = from->subspan (0, from->size () - 1);
315 return span<CHAR_T>{};
317 case ConversionStatusFlag::ok:
318 return to.subspan (0, cr.fTargetProduced);
321 return span<CHAR_T>{};
326 virtual span<byte> Characters2Bytes (span<const CHAR_T> from, span<byte> to)
const override
328 Require (to.size () >= ComputeTargetByteBufferSize (from) or to.size () >= this->_Characters2Bytes (from));
329 if constexpr (
sizeof (CHAR_T) ==
sizeof (INTERMEDIATE_CHAR_T)) {
330 return fBytesVSIntermediateCvt_.Characters2Bytes (Memory::SpanBytesCast<span<const INTERMEDIATE_CHAR_T>> (from), to);
337 Memory::StackBuffer<INTERMEDIATE_CHAR_T> intermediateBuf{
338 fIntermediateVSFinalCHARCvt_.template ComputeTargetBufferSize<INTERMEDIATE_CHAR_T> (from)};
343 span<INTERMEDIATE_CHAR_T> intermediateSpan =
344 fIntermediateVSFinalCHARCvt_.ConvertSpan (from, span<INTERMEDIATE_CHAR_T>{intermediateBuf.data (), intermediateBuf.size ()});
347 return fBytesVSIntermediateCvt_.Characters2Bytes (intermediateSpan, to);
350 virtual size_t ComputeTargetCharacterBufferSize (variant<span<const byte>,
size_t> src)
const override
352 size_t intermediateCharCntMax = [&] () {
353 if (
const size_t* i = get_if<size_t> (&src)) {
354 return fBytesVSIntermediateCvt_.ComputeTargetCharacterBufferSize (*i);
357 return fBytesVSIntermediateCvt_.ComputeTargetCharacterBufferSize (get<span<const byte>> (src));
360 if constexpr (
sizeof (CHAR_T) ==
sizeof (INTERMEDIATE_CHAR_T)) {
361 return intermediateCharCntMax;
364 return fIntermediateVSFinalCHARCvt_.template ComputeTargetBufferSize<INTERMEDIATE_CHAR_T, CHAR_T> (intermediateCharCntMax);
367 virtual size_t ComputeTargetByteBufferSize (variant<span<const CHAR_T>,
size_t> src)
const override
369 size_t intermediateCharCntMax = [&] () {
370 if constexpr (
sizeof (CHAR_T) ==
sizeof (INTERMEDIATE_CHAR_T)) {
371 if (
const size_t* i = get_if<size_t> (&src)) {
375 return get<span<const CHAR_T>> (src).size ();
379 if (
const size_t* i = get_if<size_t> (&src)) {
380 return fIntermediateVSFinalCHARCvt_.template ComputeTargetBufferSize<INTERMEDIATE_CHAR_T, CHAR_T> (*i);
383 return fIntermediateVSFinalCHARCvt_.template ComputeTargetBufferSize<INTERMEDIATE_CHAR_T> (get<span<const CHAR_T>> (src));
387 return fBytesVSIntermediateCvt_.ComputeTargetByteBufferSize (intermediateCharCntMax);
389 CodeCvt<INTERMEDIATE_CHAR_T> fBytesVSIntermediateCvt_;
390 conditional_t<
sizeof (CHAR_T) !=
sizeof (INTERMEDIATE_CHAR_T), UTFConvert,
byte> fIntermediateVSFinalCHARCvt_;
399 template <IUNICODECanAlwaysConvertTo CHAR_T>
400 template <
typename STD_CODE_CVT_T>
401 struct CodeCvt<CHAR_T>::CodeCvt_WrapStdCodeCvt_ : CodeCvt<CHAR_T>::IRep {
402 unique_ptr<STD_CODE_CVT_T> fCodeCvt_;
403 optional<Character> fInvalidCharacterReplacement_;
404 optional<span<byte>> fInvalidCharacterReplacementBytes_;
405 using extern_type =
typename STD_CODE_CVT_T::extern_type;
406 extern_type fInvalidCharacterReplacementBytesBuf[8];
407 static_assert (same_as<CHAR_T, typename STD_CODE_CVT_T::intern_type>);
408#if qCompilerAndStdLib_arm_asan_FaultStackUseAfterScope_Buggy
409 Stroika_Foundation_Debug_ATTRIBUTE_NO_SANITIZE_ADDRESS
411 CodeCvt_WrapStdCodeCvt_ (
const Options& options, unique_ptr<STD_CODE_CVT_T>&& codeCvt)
412 : fCodeCvt_{move (codeCvt)}
413 , fInvalidCharacterReplacement_{options.fInvalidCharacterReplacement}
415 if (fInvalidCharacterReplacement_) {
416 mbstate_t ignoredMBState{};
417 Memory::StackBuffer<CHAR_T> tmpBuf;
418 span<const CHAR_T> invalCharPartlyEncode = fInvalidCharacterReplacement_->As<CHAR_T> (&tmpBuf);
419 const CHAR_T* ignoreCharsConsumed =
nullptr;
420 extern_type* bytesInvalChar = fInvalidCharacterReplacementBytesBuf;
421 DISABLE_COMPILER_MSC_WARNING_START (4996)
422 auto r = fCodeCvt_->out (ignoredMBState, invalCharPartlyEncode.data (),
423 invalCharPartlyEncode.data () + invalCharPartlyEncode.size (), ignoreCharsConsumed,
424 fInvalidCharacterReplacementBytesBuf,
425 fInvalidCharacterReplacementBytesBuf + size (fInvalidCharacterReplacementBytesBuf), bytesInvalChar);
426 DISABLE_COMPILER_MSC_WARNING_END (4996)
427 if (r == STD_CODE_CVT_T::ok) {
428 fInvalidCharacterReplacementBytes_ = as_writable_bytes (
429 span{fInvalidCharacterReplacementBytesBuf}.subspan (0, bytesInvalChar - fInvalidCharacterReplacementBytesBuf));
432 Private_::ThrowInvalidCharacterProvidedDoesntFitWithProvidedCodeCvt_ ();
436 virtual Options GetOptions ()
const override
438 return Options{.fInvalidCharacterReplacement = fInvalidCharacterReplacement_};
440 virtual span<CHAR_T> Bytes2Characters (span<const byte>* from, span<CHAR_T> to)
const override
443 Require (to.size () >= ComputeTargetCharacterBufferSize (*from));
444 const extern_type* _First1 =
reinterpret_cast<const extern_type*
> (from->data ());
445 const extern_type* _Last1 = _First1 + from->size ();
446 const extern_type* _Mid1 = _First1;
447 CHAR_T* _First2 = to.data ();
448 CHAR_T* _Last2 = _First2 + to.size ();
449 CHAR_T* _Mid2 = _First2;
450 mbstate_t ignoredMBState{};
451 size_t bytesDone = 0;
452 size_t charsDone = 0;
454 auto r = fCodeCvt_->in (ignoredMBState, _First1 + bytesDone, _Last1, _Mid1, _First2 + charsDone, _Last2, _Mid2);
455 if (r == STD_CODE_CVT_T::partial) {
456 *from = from->subspan (charsDone +
static_cast<size_t> (_Mid2 - _First2));
457 Assert (from->size () != 0);
459 else if (r != STD_CODE_CVT_T::ok) {
460 if (fInvalidCharacterReplacement_) {
461 bytesDone = _Mid1 - _First1 + 1;
462 charsDone = _Mid2 - _First2;
464 Memory::StackBuffer<CHAR_T> badCharTmpBuf;
465 span<const CHAR_T> badCharReplaceSpan = fInvalidCharacterReplacement_->As<CHAR_T> (&badCharTmpBuf);
466 span<CHAR_T> copied = Memory::CopyBytes (badCharReplaceSpan, span{&_First2[charsDone], _Last2});
467 Assert (copied.size () >= 0);
468 charsDone += copied.size ();
469 Assert (charsDone <= to.size ());
473 Private_::ThrowErrorConvertingBytes2Characters_ (_Mid1 - _First1);
477 Require (_Mid1 == _Last1);
478 *from = span<const byte>{};
480 return to.subspan (0, _Mid2 - _First2);
482 virtual span<byte> Characters2Bytes (span<const CHAR_T> from, span<byte> to)
const override
484 Require (to.size () >= ComputeTargetByteBufferSize (from));
485 const CHAR_T* _First1 = from.data ();
486 const CHAR_T* _Last1 = _First1 + from.size ();
487 const CHAR_T* _Mid1 = _First1;
488 extern_type* _First2 =
reinterpret_cast<extern_type*
> (to.data ());
489 extern_type* _Last2 = _First2 + to.size ();
490 extern_type* _Mid2 = _First2;
491 mbstate_t ignoredMBState{};
492 size_t charsDone = 0;
493 size_t bytesDone = 0;
495 auto r = fCodeCvt_->out (ignoredMBState, _First1 + charsDone, _Last1, _Mid1, _First2 + bytesDone, _Last2, _Mid2);
496 if (r != STD_CODE_CVT_T::ok) {
497 if (fInvalidCharacterReplacement_) {
498 charsDone = _Mid1 - _First1 + 1;
499 bytesDone = _Mid2 - _First2;
500 memcpy (_First2 + bytesDone, fInvalidCharacterReplacementBytes_->data (), fInvalidCharacterReplacementBytes_->size ());
501 bytesDone += fInvalidCharacterReplacementBytes_->size ();
505 Private_::ThrowErrorConvertingCharacters2Bytes_ (_Mid1 - _First1);
508 Require (_Mid1 == _Last1);
509 return to.subspan (0, _Mid2 - _First2);
511 virtual size_t ComputeTargetCharacterBufferSize (variant<span<const byte>,
size_t> src)
const override
514 if (
const size_t* i = get_if<size_t> (&src)) {
518 return get<span<const byte>> (src).size ();
521 virtual size_t ComputeTargetByteBufferSize (variant<span<const CHAR_T>,
size_t> src)
const override
523 if (
const size_t* i = get_if<size_t> (&src)) {
524 return (*i) * fCodeCvt_->max_length ();
529 return get<span<const CHAR_T>> (src).size () * fCodeCvt_->max_length ();
536 struct BuiltinSingleByteTableCodePageRep_ final : CodeCvt<char16_t>::IRep {
537 BuiltinSingleByteTableCodePageRep_ (CodePage cp, optional<Character> invalidCharacterReplacement);
538 virtual ~BuiltinSingleByteTableCodePageRep_ () =
default;
539 virtual CodeCvt<char16_t>::Options GetOptions ()
const override
541 optional<char16_t> invalRepChar;
542 if (fInvalidCharacterReplacementByte_ != nullopt) {
544 auto byteSpan = span{&*fInvalidCharacterReplacementByte_, 1};
545 (void)this->Bytes2Characters (&byteSpan, span{&x, 1});
548 return CodeCvt<char16_t>::Options{.fInvalidCharacterReplacement = invalRepChar};
550 virtual span<char16_t> Bytes2Characters (span<const byte>* from, span<char16_t> to)
const override;
551 virtual span<byte> Characters2Bytes (span<const char16_t> from, span<byte> to)
const override;
552 virtual size_t ComputeTargetCharacterBufferSize (variant<span<const byte>,
size_t> src)
const override;
553 virtual size_t ComputeTargetByteBufferSize (variant<span<const char16_t>,
size_t> src)
const override;
554 const char16_t* fMap_;
555 optional<byte> fInvalidCharacterReplacementByte_;
557#if qStroika_Foundation_Common_Platform_Windows
558 struct WindowsNative_ final : CodeCvt<char16_t>::IRep {
559 constexpr WindowsNative_ (CodePage cp)
563 virtual ~WindowsNative_ () =
default;
564 virtual CodeCvt<char16_t>::Options GetOptions ()
const override
568 virtual span<char16_t> Bytes2Characters (span<const byte>* from, span<char16_t> to)
const override;
569 virtual span<byte> Characters2Bytes (span<const char16_t> from, span<byte> to)
const override;
570 virtual size_t ComputeTargetCharacterBufferSize (variant<span<const byte>,
size_t> src)
const override;
571 virtual size_t ComputeTargetByteBufferSize (variant<span<const char16_t>,
size_t> src)
const override;
582 template <IUNICODECanAlwaysConvertTo CHAR_T>
583 template <qCompilerAndStdLib_Constra
intDiffersInTemplateRedeclaration_BWA (IUNICODECanAlwaysConvertTo) FROM_CHAR_T_OPTIONS>
584 constexpr inline auto CodeCvt<CHAR_T>::Options::New (
typename CodeCvt<FROM_CHAR_T_OPTIONS>::Options o) -> Options
586 return Options{.fInvalidCharacterReplacement = o.fInvalidCharacterReplacement};
594 template <IUNICODECanAlwaysConvertTo CHAR_T>
596 : fRep_{make_shared<UTFConvertRep_<char8_t>> (options)}
599 template <IUNICODECanAlwaysConvertTo CHAR_T>
603 if constexpr (same_as<CHAR_T,
wchar_t>) {
604 *
this = mkFromStdCodeCvt<codecvt_byname<wchar_t, char, mbstate_t>> (options, l.name ());
606 else if constexpr (same_as<CHAR_T, char16_t> or same_as<CHAR_T, char32_t>) {
607 *
this = mkFromStdCodeCvt<codecvt_byname<CHAR_T, char8_t, mbstate_t>> (options, l.name ());
609 else if constexpr (same_as<CHAR_T, Character>) {
611 CodeCvt<char32_t>::Options::New<CHAR_T> (options), l.name ()));
617 DISABLE_COMPILER_MSC_WARNING_END (4996)
619 template <IUNICODECanAlwaysConvertTo CHAR_T>
622 if (charset == WellKnownCharsets::kISO_8859_1) {
623 fRep_ = make_shared<Latin1ConvertRep_> (options);
625 else if (charset == WellKnownCharsets::kUTF8) {
626 *
this = CodeCvt<CHAR_T>{UnicodeExternalEncodings::eUTF8};
628 else if (same_as<CHAR_T, Character>) {
631 fRep_ = make_shared<UTF2UTFRep_<
char32_t>> (CodeCvt<
char32_t>::mkFromStdCodeCvt<codecvt_byname<
char32_t,
char8_t, mbstate_t>> (
632 CodeCvt<
char32_t>::Options::New<CHAR_T> (options), charset.AsNarrowSDKString ()));
633 DISABLE_COMPILER_MSC_WARNING_END (4996)
636 Private_::ThrowCharsetNotSupportedException_ (charset);
639 template <IUNICODECanAlwaysConvertTo CHAR_T>
644 case UnicodeExternalEncodings::eUTF8:
645 fRep_ = make_shared<UTFConvertRep_<char8_t>> (options);
647 case UnicodeExternalEncodings::eUTF16_BE:
648 case UnicodeExternalEncodings::eUTF16_LE:
649 if (e == UnicodeExternalEncodings::eUTF16) {
650 fRep_ = make_shared<UTFConvertRep_<char16_t>> (options);
653 fRep_ = make_shared<UTFConvertSwappedRep_<char16_t>> (options);
656 case UnicodeExternalEncodings::eUTF32_BE:
657 case UnicodeExternalEncodings::eUTF32_LE:
658 if (e == UnicodeExternalEncodings::eUTF32) {
659 fRep_ = make_shared<UTFConvertRep_<char32_t>> (options);
662 fRep_ = make_shared<UTFConvertSwappedRep_<char32_t>> (options);
669 template <IUNICODECanAlwaysConvertTo CHAR_T>
670 CodeCvt<CHAR_T>::CodeCvt (span<const byte>* guessFormatFrom,
const optional<CodeCvt>& useElse,
const Options& options)
674 Require (useElse == nullopt or useElse->GetOptions ().fInvalidCharacterReplacement == options.fInvalidCharacterReplacement);
675 if (optional<tuple<UnicodeExternalEncodings, size_t>> r =
ReadByteOrderMark (*guessFormatFrom)) {
676 *guessFormatFrom = guessFormatFrom->subspan (get<size_t> (*r));
677 fRep_ =
CodeCvt{get<UnicodeExternalEncodings> (*r), options}.fRep_;
680 fRep_ = useElse ? useElse->fRep_ :
CodeCvt{options}.fRep_;
683 template <IUNICODECanAlwaysConvertTo CHAR_T>
692 case WellKnownCodePages::kANSI:
693 case WellKnownCodePages::kMAC:
694 case WellKnownCodePages::kPC:
695 case WellKnownCodePages::kPCA:
696 case WellKnownCodePages::kGreek:
697 case WellKnownCodePages::kTurkish:
698 case WellKnownCodePages::kHebrew:
699 case WellKnownCodePages::kArabic:
700 fRep_ = make_shared<UTF2UTFRep_<char16_t>> (
701 CodeCvt<char16_t> (make_shared<Private_::BuiltinSingleByteTableCodePageRep_> (cp, options.fInvalidCharacterReplacement)));
703 case WellKnownCodePages::kUTF8:
704 fRep_ = make_shared<UTFConvertRep_<char8_t>> (options);
706 case WellKnownCodePages::kUNICODE_WIDE:
707 fRep_ = make_shared<UTFConvertRep_<char16_t>> (options);
709 case WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN:
710 fRep_ = make_shared<UTFConvertSwappedRep_<char16_t>> (options);
713#if qStroika_Foundation_Common_Platform_Windows
714 if (options.fInvalidCharacterReplacement) {
715 Private_::ThrowCodePageNotSupportedException_ (cp);
717 fRep_ = make_shared<UTF2UTFRep_<char16_t>> (CodeCvt<char16_t> (make_shared<Private_::WindowsNative_> (cp)));
720 Private_::ThrowCodePageNotSupportedException_ (cp);
724 template <IUNICODECanAlwaysConvertTo CHAR_T>
725 template <IUNICODECanAlwaysConvertTo INTERMEDIATE_CHAR_T>
727 : fRep_{make_shared<UTF2UTFRep_<INTERMEDIATE_CHAR_T>> (basedOn)}
730 template <IUNICODECanAlwaysConvertTo CHAR_T>
735 template <IUNICODECanAlwaysConvertTo CHAR_T>
736 template <IStdCodeCVT STD_CODECVT,
typename... ARGS>
738 requires (same_as<CHAR_T, typename STD_CODECVT::intern_type>)
740 auto u = make_unique<Private_::deletable_facet_<STD_CODECVT>> (forward<ARGS> (args)...);
741 return CodeCvt<CHAR_T>{make_shared<CodeCvt_WrapStdCodeCvt_<Private_::deletable_facet_<STD_CODECVT>>> (options, move (u))};
743 template <IUNICODECanAlwaysConvertTo CHAR_T>
744 inline auto CodeCvt<CHAR_T>::GetOptions () const -> Options
746 return fRep_->GetOptions ();
748 template <IUNICODECanAlwaysConvertTo CHAR_T>
752 return fRep_->Bytes2Characters (&from, span{to}).size ();