302 span<const INTERMEDIATE_CHAR_T> intermediateSpan = fBytesVSIntermediateCvt_.Bytes2Characters (from, intermediateBuf);
305 ConversionResultWithStatus cr = fIntermediateVSFinalCHARCvt_.ConvertQuietly (intermediateSpan, to);
306 switch (cr.fStatus) {
307 case ConversionStatusFlag::sourceIllegal:
308 UTFConvert::Throw (cr.fStatus, cr.fSourceConsumed);
309 case ConversionStatusFlag::sourceExhausted:
311 if (not from->empty ()) {
312 *from = from->subspan (0, from->size () - 1);
316 return span<CHAR_T>{};
318 case ConversionStatusFlag::ok:
319 return to.subspan (0, cr.fTargetProduced);
322 return span<CHAR_T>{};
327 virtual span<byte> Characters2Bytes (span<const CHAR_T> from, span<byte> to)
const override
329 Require (to.size () >= ComputeTargetByteBufferSize (from) or to.size () >= this->_Characters2Bytes (from));
330 if constexpr (
sizeof (CHAR_T) ==
sizeof (INTERMEDIATE_CHAR_T)) {
331 return fBytesVSIntermediateCvt_.Characters2Bytes (Memory::SpanBytesCast<span<const INTERMEDIATE_CHAR_T>> (from), to);
338 Memory::StackBuffer<INTERMEDIATE_CHAR_T> intermediateBuf{
339 fIntermediateVSFinalCHARCvt_.template ComputeTargetBufferSize<INTERMEDIATE_CHAR_T> (from)};
344 span<INTERMEDIATE_CHAR_T> intermediateSpan =
345 fIntermediateVSFinalCHARCvt_.ConvertSpan (from, span<INTERMEDIATE_CHAR_T>{intermediateBuf.data (), intermediateBuf.size ()});
348 return fBytesVSIntermediateCvt_.Characters2Bytes (intermediateSpan, to);
351 virtual size_t ComputeTargetCharacterBufferSize (variant<span<const byte>,
size_t> src)
const override
353 size_t intermediateCharCntMax = [&] () {
354 if (
const size_t* i = get_if<size_t> (&src)) {
355 return fBytesVSIntermediateCvt_.ComputeTargetCharacterBufferSize (*i);
358 return fBytesVSIntermediateCvt_.ComputeTargetCharacterBufferSize (get<span<const byte>> (src));
361 if constexpr (
sizeof (CHAR_T) ==
sizeof (INTERMEDIATE_CHAR_T)) {
362 return intermediateCharCntMax;
365 return fIntermediateVSFinalCHARCvt_.template ComputeTargetBufferSize<INTERMEDIATE_CHAR_T, CHAR_T> (intermediateCharCntMax);
368 virtual size_t ComputeTargetByteBufferSize (variant<span<const CHAR_T>,
size_t> src)
const override
370 size_t intermediateCharCntMax = [&] () {
371 if constexpr (
sizeof (CHAR_T) ==
sizeof (INTERMEDIATE_CHAR_T)) {
372 if (
const size_t* i = get_if<size_t> (&src)) {
376 return get<span<const CHAR_T>> (src).size ();
380 if (
const size_t* i = get_if<size_t> (&src)) {
381 return fIntermediateVSFinalCHARCvt_.template ComputeTargetBufferSize<INTERMEDIATE_CHAR_T, CHAR_T> (*i);
384 return fIntermediateVSFinalCHARCvt_.template ComputeTargetBufferSize<INTERMEDIATE_CHAR_T> (get<span<const CHAR_T>> (src));
388 return fBytesVSIntermediateCvt_.ComputeTargetByteBufferSize (intermediateCharCntMax);
390 CodeCvt<INTERMEDIATE_CHAR_T> fBytesVSIntermediateCvt_;
391 conditional_t<
sizeof (CHAR_T) !=
sizeof (INTERMEDIATE_CHAR_T), UTFConvert,
byte> fIntermediateVSFinalCHARCvt_;
400 template <IUNICODECanAlwaysConvertTo CHAR_T>
401 template <
typename STD_CODE_CVT_T>
402 struct CodeCvt<CHAR_T>::CodeCvt_WrapStdCodeCvt_ : CodeCvt<CHAR_T>::IRep {
403 unique_ptr<STD_CODE_CVT_T> fCodeCvt_;
404 optional<Character> fInvalidCharacterReplacement_;
405 optional<span<byte>> fInvalidCharacterReplacementBytes_;
406 using extern_type =
typename STD_CODE_CVT_T::extern_type;
407 extern_type fInvalidCharacterReplacementBytesBuf[8];
408 static_assert (same_as<CHAR_T, typename STD_CODE_CVT_T::intern_type>);
409#if qCompilerAndStdLib_arm_asan_FaultStackUseAfterScope_Buggy
410 Stroika_Foundation_Debug_ATTRIBUTE_NO_SANITIZE_ADDRESS
412 CodeCvt_WrapStdCodeCvt_ (
const Options& options, unique_ptr<STD_CODE_CVT_T>&& codeCvt)
413 : fCodeCvt_{move (codeCvt)}
414 , fInvalidCharacterReplacement_{options.fInvalidCharacterReplacement}
416 if (fInvalidCharacterReplacement_) {
417 mbstate_t ignoredMBState{};
418 Memory::StackBuffer<CHAR_T> tmpBuf;
419 span<const CHAR_T> invalCharPartlyEncode = fInvalidCharacterReplacement_->As<CHAR_T> (&tmpBuf);
420 const CHAR_T* ignoreCharsConsumed =
nullptr;
421 extern_type* bytesInvalChar = fInvalidCharacterReplacementBytesBuf;
422 DISABLE_COMPILER_MSC_WARNING_START (4996)
424 fCodeCvt_->out (ignoredMBState, invalCharPartlyEncode.data (), invalCharPartlyEncode.data () + invalCharPartlyEncode.size (),
425 ignoreCharsConsumed, fInvalidCharacterReplacementBytesBuf,
426 fInvalidCharacterReplacementBytesBuf + Memory::NEltsOf (fInvalidCharacterReplacementBytesBuf), bytesInvalChar);
427 DISABLE_COMPILER_MSC_WARNING_END (4996)
428 if (r == STD_CODE_CVT_T::ok) {
429 fInvalidCharacterReplacementBytes_ = as_writable_bytes (
430 span{fInvalidCharacterReplacementBytesBuf}.subspan (0, bytesInvalChar - fInvalidCharacterReplacementBytesBuf));
433 Private_::ThrowInvalidCharacterProvidedDoesntFitWithProvidedCodeCvt_ ();
437 virtual Options GetOptions ()
const override
439 return Options{.fInvalidCharacterReplacement = fInvalidCharacterReplacement_};
441 virtual span<CHAR_T> Bytes2Characters (span<const byte>* from, span<CHAR_T> to)
const override
444 Require (to.size () >= ComputeTargetCharacterBufferSize (*from));
445 const extern_type* _First1 =
reinterpret_cast<const extern_type*
> (from->data ());
446 const extern_type* _Last1 = _First1 + from->size ();
447 const extern_type* _Mid1 = _First1;
448 CHAR_T* _First2 = to.data ();
449 CHAR_T* _Last2 = _First2 + to.size ();
450 CHAR_T* _Mid2 = _First2;
451 mbstate_t ignoredMBState{};
452 size_t bytesDone = 0;
453 size_t charsDone = 0;
455 auto r = fCodeCvt_->in (ignoredMBState, _First1 + bytesDone, _Last1, _Mid1, _First2 + charsDone, _Last2, _Mid2);
456 if (r == STD_CODE_CVT_T::partial) {
457 *from = from->subspan (charsDone +
static_cast<size_t> (_Mid2 - _First2));
458 Assert (from->size () != 0);
460 else if (r != STD_CODE_CVT_T::ok) {
461 if (fInvalidCharacterReplacement_) {
462 bytesDone = _Mid1 - _First1 + 1;
463 charsDone = _Mid2 - _First2;
465 Memory::StackBuffer<CHAR_T> badCharTmpBuf;
466 span<const CHAR_T> badCharReplaceSpan = fInvalidCharacterReplacement_->As<CHAR_T> (&badCharTmpBuf);
467 span<CHAR_T> copied = Memory::CopyBytes (badCharReplaceSpan, span{&_First2[charsDone], _Last2});
468 Assert (copied.size () >= 0);
469 charsDone += copied.size ();
470 Assert (charsDone <= to.size ());
474 Private_::ThrowErrorConvertingBytes2Characters_ (_Mid1 - _First1);
478 Require (_Mid1 == _Last1);
479 *from = span<const byte>{};
481 return to.subspan (0, _Mid2 - _First2);
483 virtual span<byte> Characters2Bytes (span<const CHAR_T> from, span<byte> to)
const override
485 Require (to.size () >= ComputeTargetByteBufferSize (from));
486 const CHAR_T* _First1 = from.data ();
487 const CHAR_T* _Last1 = _First1 + from.size ();
488 const CHAR_T* _Mid1 = _First1;
489 extern_type* _First2 =
reinterpret_cast<extern_type*
> (to.data ());
490 extern_type* _Last2 = _First2 + to.size ();
491 extern_type* _Mid2 = _First2;
492 mbstate_t ignoredMBState{};
493 size_t charsDone = 0;
494 size_t bytesDone = 0;
496 auto r = fCodeCvt_->out (ignoredMBState, _First1 + charsDone, _Last1, _Mid1, _First2 + bytesDone, _Last2, _Mid2);
497 if (r != STD_CODE_CVT_T::ok) {
498 if (fInvalidCharacterReplacement_) {
499 charsDone = _Mid1 - _First1 + 1;
500 bytesDone = _Mid2 - _First2;
501 memcpy (_First2 + bytesDone, fInvalidCharacterReplacementBytes_->data (), fInvalidCharacterReplacementBytes_->size ());
502 bytesDone += fInvalidCharacterReplacementBytes_->size ();
506 Private_::ThrowErrorConvertingCharacters2Bytes_ (_Mid1 - _First1);
509 Require (_Mid1 == _Last1);
510 return to.subspan (0, _Mid2 - _First2);
512 virtual size_t ComputeTargetCharacterBufferSize (variant<span<const byte>,
size_t> src)
const override
515 if (
const size_t* i = get_if<size_t> (&src)) {
519 return get<span<const byte>> (src).size ();
522 virtual size_t ComputeTargetByteBufferSize (variant<span<const CHAR_T>,
size_t> src)
const override
524 if (
const size_t* i = get_if<size_t> (&src)) {
525 return (*i) * fCodeCvt_->max_length ();
530 return get<span<const CHAR_T>> (src).size () * fCodeCvt_->max_length ();
537 struct BuiltinSingleByteTableCodePageRep_ final : CodeCvt<char16_t>::IRep {
538 BuiltinSingleByteTableCodePageRep_ (CodePage cp, optional<Character> invalidCharacterReplacement);
539 virtual ~BuiltinSingleByteTableCodePageRep_ () =
default;
540 virtual CodeCvt<char16_t>::Options GetOptions ()
const override
542 optional<char16_t> invalRepChar;
543 if (fInvalidCharacterReplacementByte_ != nullopt) {
545 auto byteSpan = span{&*fInvalidCharacterReplacementByte_, 1};
546 (void)this->Bytes2Characters (&byteSpan, span{&x, 1});
549 return CodeCvt<char16_t>::Options{.fInvalidCharacterReplacement = invalRepChar};
551 virtual span<char16_t> Bytes2Characters (span<const byte>* from, span<char16_t> to)
const override;
552 virtual span<byte> Characters2Bytes (span<const char16_t> from, span<byte> to)
const override;
553 virtual size_t ComputeTargetCharacterBufferSize (variant<span<const byte>,
size_t> src)
const override;
554 virtual size_t ComputeTargetByteBufferSize (variant<span<const char16_t>,
size_t> src)
const override;
555 const char16_t* fMap_;
556 optional<byte> fInvalidCharacterReplacementByte_;
558#if qStroika_Foundation_Common_Platform_Windows
559 struct WindowsNative_ final : CodeCvt<char16_t>::IRep {
560 constexpr WindowsNative_ (CodePage cp)
564 virtual ~WindowsNative_ () =
default;
565 virtual CodeCvt<char16_t>::Options GetOptions ()
const override
569 virtual span<char16_t> Bytes2Characters (span<const byte>* from, span<char16_t> to)
const override;
570 virtual span<byte> Characters2Bytes (span<const char16_t> from, span<byte> to)
const override;
571 virtual size_t ComputeTargetCharacterBufferSize (variant<span<const byte>,
size_t> src)
const override;
572 virtual size_t ComputeTargetByteBufferSize (variant<span<const char16_t>,
size_t> src)
const override;
583 template <IUNICODECanAlwaysConvertTo CHAR_T>
584 template <qCompilerAndStdLib_Constra
intDiffersInTemplateRedeclaration_BWA (IUNICODECanAlwaysConvertTo) FROM_CHAR_T_OPTIONS>
585 constexpr inline auto CodeCvt<CHAR_T>::Options::New (
typename CodeCvt<FROM_CHAR_T_OPTIONS>::Options o) -> Options
587 return Options{.fInvalidCharacterReplacement = o.fInvalidCharacterReplacement};
595 template <IUNICODECanAlwaysConvertTo CHAR_T>
597 : fRep_{make_shared<UTFConvertRep_<char8_t>> (options)}
600 template <IUNICODECanAlwaysConvertTo CHAR_T>
604 if constexpr (same_as<CHAR_T,
wchar_t>) {
605 *
this = mkFromStdCodeCvt<codecvt_byname<wchar_t, char, mbstate_t>> (options, l.name ());
607 else if constexpr (same_as<CHAR_T, char16_t> or same_as<CHAR_T, char32_t>) {
608 *
this = mkFromStdCodeCvt<codecvt_byname<CHAR_T, char8_t, mbstate_t>> (options, l.name ());
610 else if constexpr (same_as<CHAR_T, Character>) {
612 CodeCvt<char32_t>::Options::New<CHAR_T> (options), l.name ()));
618 DISABLE_COMPILER_MSC_WARNING_END (4996)
620 template <IUNICODECanAlwaysConvertTo CHAR_T>
623 if (charset == WellKnownCharsets::kISO_8859_1) {
624 fRep_ = make_shared<Latin1ConvertRep_> (options);
626 else if (charset == WellKnownCharsets::kUTF8) {
627 *
this = CodeCvt<CHAR_T>{UnicodeExternalEncodings::eUTF8};
629 else if (same_as<CHAR_T, Character>) {
632 fRep_ = make_shared<UTF2UTFRep_<
char32_t>> (CodeCvt<
char32_t>::mkFromStdCodeCvt<codecvt_byname<
char32_t,
char8_t, mbstate_t>> (
633 CodeCvt<
char32_t>::Options::New<CHAR_T> (options), charset.AsNarrowSDKString ()));
634 DISABLE_COMPILER_MSC_WARNING_END (4996)
637 Private_::ThrowCharsetNotSupportedException_ (charset);
640 template <IUNICODECanAlwaysConvertTo CHAR_T>
645 case UnicodeExternalEncodings::eUTF8:
646 fRep_ = make_shared<UTFConvertRep_<char8_t>> (options);
648 case UnicodeExternalEncodings::eUTF16_BE:
649 case UnicodeExternalEncodings::eUTF16_LE:
650 if (e == UnicodeExternalEncodings::eUTF16) {
651 fRep_ = make_shared<UTFConvertRep_<char16_t>> (options);
654 fRep_ = make_shared<UTFConvertSwappedRep_<char16_t>> (options);
657 case UnicodeExternalEncodings::eUTF32_BE:
658 case UnicodeExternalEncodings::eUTF32_LE:
659 if (e == UnicodeExternalEncodings::eUTF32) {
660 fRep_ = make_shared<UTFConvertRep_<char32_t>> (options);
663 fRep_ = make_shared<UTFConvertSwappedRep_<char32_t>> (options);
670 template <IUNICODECanAlwaysConvertTo CHAR_T>
671 CodeCvt<CHAR_T>::CodeCvt (span<const byte>* guessFormatFrom,
const optional<CodeCvt>& useElse,
const Options& options)
675 Require (useElse == nullopt or useElse->GetOptions ().fInvalidCharacterReplacement == options.fInvalidCharacterReplacement);
676 if (optional<tuple<UnicodeExternalEncodings, size_t>> r =
ReadByteOrderMark (*guessFormatFrom)) {
677 *guessFormatFrom = guessFormatFrom->subspan (get<size_t> (*r));
678 fRep_ =
CodeCvt{get<UnicodeExternalEncodings> (*r), options}.fRep_;
681 fRep_ = useElse ? useElse->fRep_ :
CodeCvt{options}.fRep_;
684 template <IUNICODECanAlwaysConvertTo CHAR_T>
693 case WellKnownCodePages::kANSI:
694 case WellKnownCodePages::kMAC:
695 case WellKnownCodePages::kPC:
696 case WellKnownCodePages::kPCA:
697 case WellKnownCodePages::kGreek:
698 case WellKnownCodePages::kTurkish:
699 case WellKnownCodePages::kHebrew:
700 case WellKnownCodePages::kArabic:
701 fRep_ = make_shared<UTF2UTFRep_<char16_t>> (
702 CodeCvt<char16_t> (make_shared<Private_::BuiltinSingleByteTableCodePageRep_> (cp, options.fInvalidCharacterReplacement)));
704 case WellKnownCodePages::kUTF8:
705 fRep_ = make_shared<UTFConvertRep_<char8_t>> (options);
707 case WellKnownCodePages::kUNICODE_WIDE:
708 fRep_ = make_shared<UTFConvertRep_<char16_t>> (options);
710 case WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN:
711 fRep_ = make_shared<UTFConvertSwappedRep_<char16_t>> (options);
714#if qStroika_Foundation_Common_Platform_Windows
715 if (options.fInvalidCharacterReplacement) {
716 Private_::ThrowCodePageNotSupportedException_ (cp);
718 fRep_ = make_shared<UTF2UTFRep_<char16_t>> (CodeCvt<char16_t> (make_shared<Private_::WindowsNative_> (cp)));
721 Private_::ThrowCodePageNotSupportedException_ (cp);
725 template <IUNICODECanAlwaysConvertTo CHAR_T>
726 template <IUNICODECanAlwaysConvertTo INTERMEDIATE_CHAR_T>
728 : fRep_{make_shared<UTF2UTFRep_<INTERMEDIATE_CHAR_T>> (basedOn)}
731 template <IUNICODECanAlwaysConvertTo CHAR_T>
736 template <IUNICODECanAlwaysConvertTo CHAR_T>
737 template <IStdCodeCVT STD_CODECVT,
typename... ARGS>
739 requires (same_as<CHAR_T, typename STD_CODECVT::intern_type>)
741 auto u = make_unique<Private_::deletable_facet_<STD_CODECVT>> (forward<ARGS> (args)...);
742 return CodeCvt<CHAR_T>{make_shared<CodeCvt_WrapStdCodeCvt_<Private_::deletable_facet_<STD_CODECVT>>> (options, move (u))};
744 template <IUNICODECanAlwaysConvertTo CHAR_T>
745 inline auto CodeCvt<CHAR_T>::GetOptions () const -> Options
747 return fRep_->GetOptions ();
749 template <IUNICODECanAlwaysConvertTo CHAR_T>
753 return fRep_->Bytes2Characters (&from, span{to}).size ();