Stroika Library 3.0d23
 
Loading...
Searching...
No Matches
TextToBinary.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2026. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
8#include "Stroika/Foundation/Containers/Support/ReserveTweaks.h"
10#include "Stroika/Foundation/Execution/Common.h"
11#include "Stroika/Foundation/Execution/OperationNotSupportedException.h"
17
18#include "TextToBinary.h"
19
20using std::byte;
21
22using namespace Stroika::Foundation;
23using namespace Stroika::Foundation::Streams;
24using namespace Stroika::Foundation::Streams::TextToBinary;
25
30
31namespace {
32 class Rep_ final : public InputStream::IRep<byte>, public Memory::UseBlockAllocationIfAppropriate<Rep_> {
33 public:
34 Rep_ (const InputStream::Ptr<Character>& src)
35 : fSrc_{src}
36 {
37 }
38
39 protected:
40 virtual bool IsSeekable () const override
41 {
42 // @todo https://stroika.atlassian.net/browse/STK-1028 - make optionally seekable
43 return false; // avoid seekability at first cuz makes impl much more costly/complex
44 }
45 virtual void CloseRead () override
46 {
47 if (fSrc_ != nullptr) {
48 fSrc_.Close ();
49 }
50 Ensure (not IsOpenRead ());
51 Ensure (fSrc_ == nullptr);
52 }
53 virtual bool IsOpenRead () const override
54 {
55 return fSrc_ != nullptr;
56 }
57 virtual optional<size_t> AvailableToRead () override
58 {
59 // this is easy, because an upstream character always translates into at least one byte, so just check if any upstream characters
60 if (fSrcBufferedSpan_.size () > 0) {
61 return fSrcBufferedSpan_.size ();
62 }
63 return fSrc_.AvailableToRead ();
64 }
65 virtual optional<SeekOffsetType> RemainingLength () override
66 {
67 Require (IsOpenRead ());
68 return nullopt; // possible, but not easy...
69 }
70 virtual optional<span<byte>> Read (span<byte> intoBuffer, NoDataAvailableHandling blockFlag) override
71 {
72 // NB: CURRENTLY HARDWIRE CONVERT TO UTF8- but later allow params to specify convert-to
73 Require (IsOpenRead ());
74 Require (not intoBuffer.empty ());
75 // first see if any partially translated bytes to return
76 Again:
77 if (not fSrcBufferedSpan_.empty ()) [[unlikely]] {
78 auto copiedIntoSpan = Memory::CopyBytes (fSrcBufferedSpan_.subspan (0, min (fSrcBufferedSpan_.size (), intoBuffer.size ())), intoBuffer);
79 Assert (copiedIntoSpan.size () >= 1);
80 fSrcBufferedSpan_ = fSrcBufferedSpan_.subspan (copiedIntoSpan.size ()); // skip copied bytes
81 _fOffset += copiedIntoSpan.size ();
82 return intoBuffer.subspan (0, copiedIntoSpan.size ());
83 }
84 // more likely - KISS for now - read one character from upstream, patch fSrcBufferedSpan_, and try again
85 Assert (fSrcBufferedSpan_.empty ());
86 Character readBuf[1];
87 if (auto o = fSrc_.GetRepRWRef ().Read (span{readBuf}, blockFlag)) {
88 if (size_t nChars = o->size ()) {
89 Assert (nChars == 1); // for now cuz we have small buffer - could enlarge
90 fSrcBufferedSpan_ = Memory::SpanBytesCast<span<byte>> (Characters::UTFConvert::kThe.ConvertSpan (
91 span{readBuf, nChars}, Memory::SpanBytesCast<span<char8_t>> (span{fSrcBufferedRawBytes_})));
92 Assert (not fSrcBufferedSpan_.empty ()); // cuz at least one character
93 goto Again; // cuz it has data it can pull
94 }
95 else {
96 // if we got here (Read worked, but returned zero characters), nothing in our buf, and nothing upstream - EOF
97 return span<byte>{};
98 }
99 }
100 else {
101 Assert (blockFlag == eDontBlock);
102 return nullopt; // nothing pre-read, and nothing available upstream, so nothing yet
103 }
104 }
105 virtual SeekOffsetType GetReadOffset () const override
106 {
107 AssertExternallySynchronizedMutex::ReadContext declareContext{fThisAssertExternallySynchronized_};
108 Require (IsOpenRead ());
109 return _fOffset;
110 }
111
112 protected:
114 byte fSrcBufferedRawBytes_[4]; // not used directly, but always through fSrcBufferedSpan_
115 span<byte> fSrcBufferedSpan_;
116 SeekOffsetType _fOffset{0};
118 };
119}
120
121/*
122 ********************************************************************************
123 *********************** Streams::TextToBinary::Reader::New *********************
124 ********************************************************************************
125 */
126auto TextToBinary::Reader::New (const InputStream::Ptr<Character>& srcStream, optional<SeekableFlag> seekable) -> InputStream::Ptr<byte>
127{
128 auto result = InputStream::Ptr<byte>{Memory::MakeSharedPtr<Rep_> (srcStream)};
129 // @todo - this could be more efficient by working embedding some of this logic into Rep_
130 if (seekable == SeekableFlag::eSeekable) {
131 result = BufferedInputStream::New (result, SeekableFlag::eSeekable);
132 Ensure (result.IsSeekable ());
133 }
134 return result;
135}
136
137auto TextToBinary::Reader::New (const Traversal::Iterable<Character>& srcText) -> InputStream::Ptr<byte>
138{
139 // @todo - Could make this more efficient (by combining into one object) - but for now KISS
140 auto result = New (IterableToInputStream::New<Character> (srcText));
141 Ensure (result.IsSeekable ());
142 return result;
143}
conditional_t< qStroika_Foundation_Memory_PreferBlockAllocation and andTrueCheck, BlockAllocationUseHelper< T >, Common::Empty > UseBlockAllocationIfAppropriate
Use this to enable block allocation for a particular class. Beware of subclassing.
#define qStroika_ATTRIBUTE_NO_UNIQUE_ADDRESS_VCFORCE
[[msvc::no_unique_address]] isn't always broken in MSVC. Annotate with this on things where its not b...
Definition StdCompat.h:445
NoDataAvailableHandling
If eDontBlock passed to most Stream APIs, then when the code would do a blocking read,...
Definition Stream.h:90
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
static const UTFConvert kThe
Nearly always use this default UTFConvert.
Definition UTFConvert.h:369
nonvirtual span< TRG_T > ConvertSpan(span< const SRC_T > source, span< TRG_T > target) const
Convert between UTF-N encoded (including the special case of ASCII, and Latin1) character spans (e....
NOT a real mutex - just a debugging infrastructure support tool so in debug builds can be assured thr...
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
virtual bool IsSeekable() const =0
virtual SeekOffsetType GetReadOffset() const =0
virtual optional< span< ElementType > > Read(span< ElementType > intoBuffer, NoDataAvailableHandling blockFlag)=0
virtual optional< SeekOffsetType > RemainingLength()
returns nullopt if not known (typical, and the default) - but sometimes it is known,...
virtual optional< size_t > AvailableToRead()
returns nullopt if nothing known available, zero if known EOF, and any other number of elements (typi...
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
Iterable<T> is a base class for containers which easily produce an Iterator<T> to traverse them.
Definition Iterable.h:237