Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
TextToBinary.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
8#include "Stroika/Foundation/Containers/Support/ReserveTweaks.h"
10#include "Stroika/Foundation/Execution/Common.h"
11#include "Stroika/Foundation/Execution/OperationNotSupportedException.h"
14
16
17#include "TextToBinary.h"
18
19using std::byte;
20
21using namespace Stroika::Foundation;
22using namespace Stroika::Foundation::Streams;
23using namespace Stroika::Foundation::Streams::TextToBinary;
24
29
30namespace {
31 class Rep_ final : public InputStream::IRep<byte>, public Memory::UseBlockAllocationIfAppropriate<Rep_> {
32 public:
33 Rep_ (const InputStream::Ptr<Character>& src)
34 : fSrc_{src}
35 {
36 }
37
38 protected:
39 virtual bool IsSeekable () const override
40 {
41 return false; // avoid seekability at first cuz makes impl much more costly/complex
42 }
43 virtual void CloseRead () override
44 {
45 if (fSrc_ != nullptr) {
46 fSrc_.Close ();
47 }
48 Ensure (not IsOpenRead ());
49 Ensure (fSrc_ == nullptr);
50 }
51 virtual bool IsOpenRead () const override
52 {
53 return fSrc_ != nullptr;
54 }
55 virtual optional<size_t> AvailableToRead () override
56 {
57 // this is easy, because an upstream character always translates into at least one byte, so just check if any upstream characters
58 if (fSrcBufferedSpan_.size () > 0) {
59 return fSrcBufferedSpan_.size ();
60 }
61 return fSrc_.AvailableToRead ();
62 }
63 virtual optional<SeekOffsetType> RemainingLength () override
64 {
65 Require (IsOpenRead ());
66 return nullopt; // possible, but not easy...
67 }
68 virtual optional<span<byte>> Read (span<byte> intoBuffer, NoDataAvailableHandling blockFlag) override
69 {
70 // NB: CURRENTLY HARDWIRE CONVERT TO UTF8- but later allow params to specify convert-to
71 Require (IsOpenRead ());
72 Require (not intoBuffer.empty ());
73 // first see if any partially translated bytes to return
74 Again:
75 if (not fSrcBufferedSpan_.empty ()) [[unlikely]] {
76 auto copiedIntoSpan = Memory::CopyBytes (fSrcBufferedSpan_.subspan (0, min (fSrcBufferedSpan_.size (), intoBuffer.size ())), intoBuffer);
77 Assert (copiedIntoSpan.size () >= 1);
78 fSrcBufferedSpan_ = fSrcBufferedSpan_.subspan (copiedIntoSpan.size ()); // skip copied bytes
79 _fOffset += copiedIntoSpan.size ();
80 return intoBuffer.subspan (0, copiedIntoSpan.size ());
81 }
82 // more likely - KISS for now - read one character from upstream, patch fSrcBufferedSpan_, and try again
83 Assert (fSrcBufferedSpan_.empty ());
84 Character readBuf[1];
85 if (auto o = fSrc_.GetRepRWRef ().Read (span{readBuf}, blockFlag)) {
86 if (size_t nChars = o->size ()) {
87 Assert (nChars == 1); // for now cuz we have small buffer - could enlarge
88 fSrcBufferedSpan_ = Memory::SpanBytesCast<span<byte>> (Characters::UTFConvert::kThe.ConvertSpan (
89 span{readBuf, nChars}, Memory::SpanBytesCast<span<char8_t>> (span{fSrcBufferedRawBytes_})));
90 Assert (not fSrcBufferedSpan_.empty ()); // cuz at least one character
91 goto Again; // cuz it has data it can pull
92 }
93 else {
94 // if we got here (Read worked, but returned zero characters), nothing in our buf, and nothing upstream - EOF
95 return span<byte>{};
96 }
97 }
98 else {
99 Assert (blockFlag == eDontBlock);
100 return nullopt; // nothing pre-read, and nothing available upstream, so nothing yet
101 }
102 }
103 virtual SeekOffsetType GetReadOffset () const override
104 {
105 AssertExternallySynchronizedMutex::ReadContext declareContext{fThisAssertExternallySynchronized_};
106 Require (IsOpenRead ());
107 return _fOffset;
108 }
109
110 protected:
112 byte fSrcBufferedRawBytes_[4]; // not used directly, but always through fSrcBufferedSpan_
113 span<byte> fSrcBufferedSpan_;
114 SeekOffsetType _fOffset{0};
115 [[no_unique_address]] Debug::AssertExternallySynchronizedMutex fThisAssertExternallySynchronized_;
116 };
117}
118
119/*
120 ********************************************************************************
121 *********************** Streams::TextToBinary::Reader::New *********************
122 ********************************************************************************
123 */
124auto TextToBinary::Reader::New (const InputStream::Ptr<Character>& srcStream) -> InputStream::Ptr<byte>
125{
126 return InputStream::Ptr<byte>{make_shared<Rep_> (srcStream)};
127}
128
129auto TextToBinary::Reader::New (const Traversal::Iterable<Character>& srcText) -> InputStream::Ptr<byte>
130{
131 // @todo - Could make this more efficient (by combining into one object) - but for now KISS
132 return New (IterableToInputStream::New<Character> (srcText));
133}
conditional_t< qStroika_Foundation_Memory_PreferBlockAllocation and andTrueCheck, BlockAllocationUseHelper< T >, Common::Empty > UseBlockAllocationIfAppropriate
Use this to enable block allocation for a particular class. Beware of subclassing.
NoDataAvailableHandling
If eDontBlock passed to most Stream APIs, then when the code would do a blocking read,...
Definition Stream.h:90
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
static const UTFConvert kThe
Nearly always use this default UTFConvert.
Definition UTFConvert.h:369
nonvirtual span< TRG_T > ConvertSpan(span< const SRC_T > source, span< TRG_T > target) const
Convert between UTF-N encoded (including the special case of ASCII, and Latin1) character spans (e....
NOT a real mutex - just a debugging infrastructure support tool so in debug builds can be assured thr...
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
virtual bool IsSeekable() const =0
virtual SeekOffsetType GetReadOffset() const =0
virtual optional< span< ElementType > > Read(span< ElementType > intoBuffer, NoDataAvailableHandling blockFlag)=0
virtual optional< SeekOffsetType > RemainingLength()
returns nullopt if not known (typical, and the default) - but sometimes it is known,...
virtual optional< size_t > AvailableToRead()
returns nullopt if nothing known available, zero if known EOF, and any other number of elements (typi...
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
Iterable<T> is a base class for containers which easily produce an Iterator<T> to traverse them.
Definition Iterable.h:237