Stroika Library 3.0d23
 
Loading...
Searching...
No Matches
StreamReader.h
Go to the documentation of this file.
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2026. All rights reserved
3 */
4#ifndef _Stroika_Foundation_Streams_StreamReader_h_
5#define _Stroika_Foundation_Streams_StreamReader_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <memory>
10
11#include "Stroika/Foundation/Common/Common.h"
13
14/**
15 * \file
16 *
17 * \note Code-Status: <a href="Code-Status.md#Beta">Beta</a>
18 */
19
20namespace Stroika::Foundation::Streams {
21
22 /**
23 * \brief StreamReader is an non-essential Stream utility, adding simplicity of use for a common use case, and a significant performance boost.
24 *
25 * \see also https://learn.microsoft.com/en-us/dotnet/api/system.io.streamreader?view=net-8.0 - similar idea, except there you specify conversion from
26 * binary stream.
27 *
28 * StreamReader is an unnecessary class for using the Streams library, but it is easy to use, has a similar API to InputStream<T>::Ptr, and significantly more performant.
29 *
30 * \note Stream reader is always fully seekable, and requires its argument (construction time) to be fully seekable as well.
31 *
32 * \note Similar to BufferedInputStream - but that provides an actual Stream object, and is slightly less performant for direct use.
33 *
34 * TODO:
35 * \todo consider if should take templated parameter indicating buffer size (really template parameter StreamReaderDefaultOptions - with various settings)
36 */
37 template <typename ELEMENT_TYPE>
38 struct StreamReader {
39 public:
40 /**
41 */
42 using ElementType = ELEMENT_TYPE;
43
44 public:
45 /**
46 * \note Do NOT use the InputStream::Ptr<ElementType> passed in at the same time as its being used by the
47 * StreamReader, or grave disorder may result. StreamReader assumes its the only one seeking and reading
48 * through the input stream. See SynchronizeToUnderlyingStream ().
49 *
50 * \note At destruction, StreamReader automatically calls SynchronizeToUnderlyingStream
51 *
52 * \pre underlyingReadFromStreamAdopted.Seekable (); // so it can read ahead and synchronize back in dtor
53 *
54 * \note a future version of StreamReader might allow underlyingReadFromStreamAdopted.Seekable () and just pre-read to allow seeking, but then
55 * 'synchronize' feature wont work
56 */
57 StreamReader (const typename InputStream::Ptr<ElementType>& underlyingReadFromStreamAdopted);
58 StreamReader () = delete;
59 StreamReader (const StreamReader&) = delete;
60
61 public:
62 /**
63 * \note this does IgnoreExceptionsForCall (this->SynchronizeToUnderlyingStream ())
64 */
66
67 public:
68 /**
69 * \brief Read into data referenced by span argument - and using argument blocking strategy (default blocking)
70 *
71 * returns nullopt ONLY if blockFlag = eNonBlocking AND there is NO data available without blocking.
72 *
73 * a return of NOT missing, but an empty span implies EOF.
74 *
75 * BLOCKING until data is available, but can return with fewer elements than argument span-size
76 * without prejudice about how much more is available.
77 *
78 * \note It is legal to call Read () if its already returned EOF, but then it MUST return EOF again.
79 *
80 * \pre not intoBuffer.empty ()
81 *
82 * @see ReadAll () to read all the data from the stream at once.
83 *
84 * @see ReadBlocking () - often simplest to use API
85 * @see ReadOrThrow ()
86 * @see ReadNonBlocking ()
87 *
88 * \see also InputStream::Ptr::Read () - identical API (except non-const)
89 */
90 nonvirtual optional<span<ElementType>> Read (span<ElementType> intoBuffer, NoDataAvailableHandling blockFlag);
91
92 public:
93 /**
94 * \brief ReadBlocking () reads either a single element, or fills in argument intoBuffer - but never blocks (nor throws EWouldBlock)
95 *
96 * ReadBlocking ():
97 * Reads a single element (blocking as long as needed) - or nullopt when at EOF.
98 *
99 * ReadBlocking(span<ElementType> intoBuffer)
100 * fills in subspan with at least one element (or zero if at EOF)
101 *
102 * \see also InputStream::Ptr::ReadBlocking () - identical API (except non-const)
103 */
104 nonvirtual optional<ElementType> ReadBlocking ();
105 nonvirtual span<ElementType> ReadBlocking (span<ElementType> intoBuffer);
106 nonvirtual span<ElementType> ReadBlocking (Memory::InlineBuffer<ElementType>* intoBuffer, ElementType upToSentinel);
107
108 public:
109 /**
110 * \brief read into intoBuffer - returning nullopt if would block, and else returning subspan of input with read data present
111 *
112 * same as Read (intoBuffer, NoDataAvailableHandling::eDontBlock)
113 *
114 * \see also InputStream::Ptr::ReadBlocking () - identical API(except non-const)
115 */
116 nonvirtual optional<span<ElementType>> ReadNonBlocking (span<ElementType> intoBuffer);
117
118 public:
119 /**
120 * \brief Read (either one or into argument span) and taking NoDataAvailableHandling blockFlag), and throw if would block
121 *
122 * same as Read() APIs, but instead of returning optional, for cases where nullopt would be returned, throw EWouldBlock
123 *
124 * \note if blockFlag == eBlockIfNoDataAvailable, this amounts to *Read(...args), since it will never generate an
125 * eWouldBlock exception;
126 *
127 * when to use this variant? If implementing API where you are handed a blockFlag, but want to KISS, and
128 * just throw if EWouldBlock ..
129 *
130 * \see also InputStream::Ptr::ReadOrThrow () - identical API(except non-const)
131 */
132 nonvirtual span<ElementType> ReadOrThrow (span<ElementType> intoBuffer, NoDataAvailableHandling blockFlag);
133
134 public:
135 /**
136 * \brief Logically the same as InputStream::Ptr<ELEMENT_TYPE>::Peek () but reading cached data
137 */
138 nonvirtual optional<ElementType> Peek ();
139 nonvirtual span<ElementType> Peek (span<ElementType> intoBuffer);
140
141 public:
142 /**
143 * \brief Logically the same as InputStream::Ptr<ELEMENT_TYPE>::GetOffset () - but without being 'synchronized' it maybe a different value than the underlying stream
144 */
145 nonvirtual SeekOffsetType GetOffset () const;
146
147 public:
148 /**
149 * \brief Logically the same as InputStream::Ptr<ELEMENT_TYPE>::Seek () - but without being 'synchronized' it maybe a different value than the underlying stream
150 */
151 nonvirtual SeekOffsetType Seek (SeekOffsetType offset);
152 nonvirtual SeekOffsetType Seek (Whence whence, SignedSeekOffsetType offset);
153
154 public:
155 /**
156 * \brief Logically the same as InputStream::Ptr<ELEMENT_TYPE>::ReadAll ()
157 *
158 * @todo add other overloads from InputStream::Ptr::ReadAll() - ...
159 */
160 nonvirtual size_t ReadAll (ElementType* intoStart, ElementType* intoEnd);
161
162 public:
163 /**
164 * \brief returns nullopt if nothing known available, zero if known EOF, and any other number of elements (typically 1) if that number know to be available to read
165 *
166 * \see also (different) RemainingLength ()
167 */
168 nonvirtual optional<size_t> AvailableToRead () const;
169
170 public:
171 /**
172 * \brief returns nullopt if not known (typical, and the default) - but sometimes it is known, and quite helpful)
173 *
174 * \note - Similar to AvailableToRead, but different. For example, on a socket-stream, you can tell how many bytes
175 * are available to read (buffered by kernel). But no guess about the remaining length of the stream (how many bytes
176 * will appear before end).
177 *
178 * But for a disk file, you MIGHT (not always - like unix special files) know the length of the file. This is for that case.
179 */
180 nonvirtual optional<SeekOffsetType> RemainingLength () const;
181
182 public:
183 /**
184 * If you must use the underlying stream along-side StreamReader, you can use
185 * SynchronizeToUnderlyingStream and SynchronizeFromUnderlyingStream to allow each class to update
186 * each other.
187 */
188 nonvirtual void SynchronizeToUnderlyingStream ();
189
190 public:
191 /**
192 * If you must use the underlying stream along-side StreamReader, you can use
193 * SynchronizeToUnderlyingStream and SynchronizeFromUnderlyingStream to allow each class to update
194 * each other.
195 */
196 nonvirtual void SynchronizeFromUnderlyingStream ();
197
198 public:
199 /**
200 * \brief check if the stream is currently at EOF
201 *
202 * \note - IsAtEOF/0 may do a blocking Read () call.
203 * \note - IsAtEOF (eDontBlock) returns optional<bool> - nullopt if would block, and false if known not at EOF, and true if known EOF;
204 * this differs from most Stroika streams APIs - in that nullopt here means 'EWouldBlock';
205 */
206 nonvirtual bool IsAtEOF ();
207 nonvirtual optional<bool> IsAtEOF (NoDataAvailableHandling blockFlag);
208
209 public:
210 [[deprecated ("Since Stroika v3.0d5 use Read/1-span")]] size_t Read (ElementType* intoStart, ElementType* intoEnd)
211 {
212 return Read (span{intoStart, intoEnd}).size ();
213 }
214
215 private:
216 // may want to tune these; but I did a little tuning on Windows --LGP 2022-12-17
217 static constexpr size_t kDefaultReadBufferSize_ = 8 * 1024;
218 static constexpr size_t kMaxBufferedChunkSize_ = 64 * 1024;
219 // Note the reason for kCountPingPingBufs_ == 2: generally we get a hit with one which is best. You
220 // typically read through the buffer one element after another. But its not rare to do put-backs of an
221 // element or two. And if you do that - at the edges - you can get degenerate behavior - keep filling cache
222 // going back one and forward one. The PingPong/2 value here prevents that degenerate case.
223 static constexpr size_t kCountPingPingBufs_ = 2;
224
225 private:
226 // Hack to allow use of inline buffer and uninitialized array even though Character class not 'trivial', it probably should be (or I'm not checking the right trait)
227 using InlineBufferElementType_ = conditional_t<same_as<Characters::Character, ElementType>, char32_t, ElementType>;
228 static_assert (sizeof (ElementType) == sizeof (InlineBufferElementType_));
229
230 private:
231 struct CacheBlock_ {
232 CacheBlock_ () = default;
233 size_t GetSize () const;
234 SeekOffsetType GetStart () const;
235 SeekOffsetType GetEnd () const;
236 optional<ElementType> Peek1FromCache (SeekOffsetType actualOffset) const;
237 optional<ElementType> Read1FromCache (SeekOffsetType* actualOffset);
238 optional<size_t> ReadFromCache (SeekOffsetType* actualOffset, span<ElementType> into);
239 void FillCacheWith (SeekOffsetType s, span<InlineBufferElementType_> into);
240
241 private:
242 // Cache uses wchar_t instead of Character so can use resize_uninitialized () - requires is_trivially_constructible
243 SeekOffsetType fCacheWindowBufStart_{0}; // buffer a 'window' around the current data. Not infinite the whole file, but can be pretty big
245 };
246
247 private:
248 typename InputStream::Ptr<ElementType> fStrm_;
249 SeekOffsetType fOffset_{0};
250 SeekOffsetType fFarthestReadInUnderlyingStream_{0};
251 CacheBlock_ fCacheBlocks_[kCountPingPingBufs_];
252 size_t fCacheBlockLastFilled_{0};
253
254 private:
255 nonvirtual optional<ElementType> Peek1FromCache_ () const;
256 nonvirtual optional<ElementType> Read1FromCache_ ();
257 nonvirtual optional<size_t> ReadFromCache_ (span<ElementType> into);
258 nonvirtual void FillCacheWith_ (SeekOffsetType s, span<InlineBufferElementType_> into);
259 nonvirtual optional<size_t> Read_Slow_Case_ (span<ElementType> into, NoDataAvailableHandling blockFlag);
260 };
261
262}
263
264/*
265 ********************************************************************************
266 ***************************** Implementation Details ***************************
267 ********************************************************************************
268 */
269#include "StreamReader.inl"
270
271#endif /*_Stroika_Foundation_Streams_StreamReader_h_*/
NoDataAvailableHandling
If eDontBlock passed to most Stream APIs, then when the code would do a blocking read,...
Definition Stream.h:90
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
StreamReader is an non-essential Stream utility, adding simplicity of use for a common use case,...
nonvirtual optional< size_t > AvailableToRead() const
returns nullopt if nothing known available, zero if known EOF, and any other number of elements (typi...
nonvirtual SeekOffsetType Seek(SeekOffsetType offset)
Logically the same as InputStream::Ptr<ELEMENT_TYPE>::Seek () - but without being 'synchronized' it m...
nonvirtual bool IsAtEOF()
check if the stream is currently at EOF
nonvirtual optional< ElementType > Peek()
Logically the same as InputStream::Ptr<ELEMENT_TYPE>::Peek () but reading cached data.
nonvirtual optional< span< ElementType > > Read(span< ElementType > intoBuffer, NoDataAvailableHandling blockFlag)
Read into data referenced by span argument - and using argument blocking strategy (default blocking)
nonvirtual SeekOffsetType GetOffset() const
Logically the same as InputStream::Ptr<ELEMENT_TYPE>::GetOffset () - but without being 'synchronized'...
nonvirtual size_t ReadAll(ElementType *intoStart, ElementType *intoEnd)
Logically the same as InputStream::Ptr<ELEMENT_TYPE>::ReadAll ()
nonvirtual optional< SeekOffsetType > RemainingLength() const
returns nullopt if not known (typical, and the default) - but sometimes it is known,...
nonvirtual optional< ElementType > ReadBlocking()
ReadBlocking () reads either a single element, or fills in argument intoBuffer - but never blocks (no...
nonvirtual optional< span< ElementType > > ReadNonBlocking(span< ElementType > intoBuffer)
read into intoBuffer - returning nullopt if would block, and else returning subspan of input with rea...
nonvirtual span< ElementType > ReadOrThrow(span< ElementType > intoBuffer, NoDataAvailableHandling blockFlag)
Read (either one or into argument span) and taking NoDataAvailableHandling blockFlag),...