Stroika Library 3.0d20
 
Loading...
Searching...
No Matches
TextToBinary.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
8#include "Stroika/Foundation/Containers/Support/ReserveTweaks.h"
10#include "Stroika/Foundation/Execution/Common.h"
11#include "Stroika/Foundation/Execution/OperationNotSupportedException.h"
16
17#include "TextToBinary.h"
18
19using std::byte;
20
21using namespace Stroika::Foundation;
22using namespace Stroika::Foundation::Streams;
23using namespace Stroika::Foundation::Streams::TextToBinary;
24
29
30namespace {
31 class Rep_ final : public InputStream::IRep<byte>, public Memory::UseBlockAllocationIfAppropriate<Rep_> {
32 public:
33 Rep_ (const InputStream::Ptr<Character>& src)
34 : fSrc_{src}
35 {
36 }
37
38 protected:
39 virtual bool IsSeekable () const override
40 {
41 // @todo https://stroika.atlassian.net/browse/STK-1028 - make optionally seekable
42 return false; // avoid seekability at first cuz makes impl much more costly/complex
43 }
44 virtual void CloseRead () override
45 {
46 if (fSrc_ != nullptr) {
47 fSrc_.Close ();
48 }
49 Ensure (not IsOpenRead ());
50 Ensure (fSrc_ == nullptr);
51 }
52 virtual bool IsOpenRead () const override
53 {
54 return fSrc_ != nullptr;
55 }
56 virtual optional<size_t> AvailableToRead () override
57 {
58 // this is easy, because an upstream character always translates into at least one byte, so just check if any upstream characters
59 if (fSrcBufferedSpan_.size () > 0) {
60 return fSrcBufferedSpan_.size ();
61 }
62 return fSrc_.AvailableToRead ();
63 }
64 virtual optional<SeekOffsetType> RemainingLength () override
65 {
66 Require (IsOpenRead ());
67 return nullopt; // possible, but not easy...
68 }
69 virtual optional<span<byte>> Read (span<byte> intoBuffer, NoDataAvailableHandling blockFlag) override
70 {
71 // NB: CURRENTLY HARDWIRE CONVERT TO UTF8- but later allow params to specify convert-to
72 Require (IsOpenRead ());
73 Require (not intoBuffer.empty ());
74 // first see if any partially translated bytes to return
75 Again:
76 if (not fSrcBufferedSpan_.empty ()) [[unlikely]] {
77 auto copiedIntoSpan = Memory::CopyBytes (fSrcBufferedSpan_.subspan (0, min (fSrcBufferedSpan_.size (), intoBuffer.size ())), intoBuffer);
78 Assert (copiedIntoSpan.size () >= 1);
79 fSrcBufferedSpan_ = fSrcBufferedSpan_.subspan (copiedIntoSpan.size ()); // skip copied bytes
80 _fOffset += copiedIntoSpan.size ();
81 return intoBuffer.subspan (0, copiedIntoSpan.size ());
82 }
83 // more likely - KISS for now - read one character from upstream, patch fSrcBufferedSpan_, and try again
84 Assert (fSrcBufferedSpan_.empty ());
85 Character readBuf[1];
86 if (auto o = fSrc_.GetRepRWRef ().Read (span{readBuf}, blockFlag)) {
87 if (size_t nChars = o->size ()) {
88 Assert (nChars == 1); // for now cuz we have small buffer - could enlarge
89 fSrcBufferedSpan_ = Memory::SpanBytesCast<span<byte>> (Characters::UTFConvert::kThe.ConvertSpan (
90 span{readBuf, nChars}, Memory::SpanBytesCast<span<char8_t>> (span{fSrcBufferedRawBytes_})));
91 Assert (not fSrcBufferedSpan_.empty ()); // cuz at least one character
92 goto Again; // cuz it has data it can pull
93 }
94 else {
95 // if we got here (Read worked, but returned zero characters), nothing in our buf, and nothing upstream - EOF
96 return span<byte>{};
97 }
98 }
99 else {
100 Assert (blockFlag == eDontBlock);
101 return nullopt; // nothing pre-read, and nothing available upstream, so nothing yet
102 }
103 }
104 virtual SeekOffsetType GetReadOffset () const override
105 {
106 AssertExternallySynchronizedMutex::ReadContext declareContext{fThisAssertExternallySynchronized_};
107 Require (IsOpenRead ());
108 return _fOffset;
109 }
110
111 protected:
113 byte fSrcBufferedRawBytes_[4]; // not used directly, but always through fSrcBufferedSpan_
114 span<byte> fSrcBufferedSpan_;
115 SeekOffsetType _fOffset{0};
116 [[no_unique_address]] Debug::AssertExternallySynchronizedMutex fThisAssertExternallySynchronized_;
117 };
118}
119
120/*
121 ********************************************************************************
122 *********************** Streams::TextToBinary::Reader::New *********************
123 ********************************************************************************
124 */
125auto TextToBinary::Reader::New (const InputStream::Ptr<Character>& srcStream, optional<SeekableFlag> seekable) -> InputStream::Ptr<byte>
126{
127 auto result = InputStream::Ptr<byte>{make_shared<Rep_> (srcStream)};
128 // @todo - this could be more efficient by working embedding some of this logic into Rep_
129 if (seekable == SeekableFlag::eSeekable) {
130 result = BufferedInputStream::New (result, SeekableFlag::eSeekable);
131 Ensure (result.IsSeekable ());
132 }
133 return result;
134}
135
136auto TextToBinary::Reader::New (const Traversal::Iterable<Character>& srcText) -> InputStream::Ptr<byte>
137{
138 // @todo - Could make this more efficient (by combining into one object) - but for now KISS
139 auto result = New (IterableToInputStream::New<Character> (srcText));
140 Ensure (result.IsSeekable ());
141 return result;
142}
conditional_t< qStroika_Foundation_Memory_PreferBlockAllocation and andTrueCheck, BlockAllocationUseHelper< T >, Common::Empty > UseBlockAllocationIfAppropriate
Use this to enable block allocation for a particular class. Beware of subclassing.
NoDataAvailableHandling
If eDontBlock passed to most Stream APIs, then when the code would do a blocking read,...
Definition Stream.h:90
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
static const UTFConvert kThe
Nearly always use this default UTFConvert.
Definition UTFConvert.h:369
nonvirtual span< TRG_T > ConvertSpan(span< const SRC_T > source, span< TRG_T > target) const
Convert between UTF-N encoded (including the special case of ASCII, and Latin1) character spans (e....
NOT a real mutex - just a debugging infrastructure support tool so in debug builds can be assured thr...
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
virtual bool IsSeekable() const =0
virtual SeekOffsetType GetReadOffset() const =0
virtual optional< span< ElementType > > Read(span< ElementType > intoBuffer, NoDataAvailableHandling blockFlag)=0
virtual optional< SeekOffsetType > RemainingLength()
returns nullopt if not known (typical, and the default) - but sometimes it is known,...
virtual optional< size_t > AvailableToRead()
returns nullopt if nothing known available, zero if known EOF, and any other number of elements (typi...
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
Iterable<T> is a base class for containers which easily produce an Iterator<T> to traverse them.
Definition Iterable.h:237