Stroika Library 3.0d20
 
Loading...
Searching...
No Matches
StreamReader.h
Go to the documentation of this file.
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Foundation_Streams_StreamReader_h_
5#define _Stroika_Foundation_Streams_StreamReader_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <memory>
10
11#include "Stroika/Foundation/Common/Common.h"
13
14/**
15 * \file
16 *
17 * \note Code-Status: <a href="Code-Status.md#Beta">Beta</a>
18 */
19
20namespace Stroika::Foundation::Streams {
21
22 /**
23 * \brief StreamReader is an non-essential Stream utility, adding simplicity of use for a common use case, and a significant performance boost.
24 *
25 * \see also https://learn.microsoft.com/en-us/dotnet/api/system.io.streamreader?view=net-8.0 - similar idea, except there you specify conversion from
26 * binary stream.
27 *
28 * StreamReader is an unnecessary class for using the Streams library, but it is easy to use, has a similar API to InputStream<T>::Ptr, and significantly more performant
29 *
30 * \note Similar to BufferedInputStream - but that provides an actual Stream object, and is slightly less performant for direct use.
31 *
32 * TODO:
33 * \todo consider if should take templated parameter indicating buffer size (really template parameter StreamReaderDefaultOptions - with various settings)
34 */
35 template <typename ELEMENT_TYPE>
36 struct StreamReader {
37 public:
38 /**
39 */
40 using ElementType = ELEMENT_TYPE;
41
42 public:
43 /**
44 * \note Do NOT use the InputStream::Ptr<ElementType> passed in at the same time as its being used by the
45 * StreamReader, or grave disorder may result. StreamReader assumes its the only one seeking and reading
46 * through the input stream. See SynchronizeToUnderlyingStream ().
47 *
48 * \note At destruction, StreamReader automatically calls SynchronizeToUnderlyingStream
49 *
50 * \pre underlyingReadFromStreamAdopted.Seekable (); // so it can read ahead and synchronize back in dtor
51 *
52 * \note a future version of StreamReader might allow underlyingReadFromStreamAdopted.Seekable () and just pre-read to allow seeking, but then
53 * 'synchronize' feature wont work
54 */
55 StreamReader (const typename InputStream::Ptr<ElementType>& underlyingReadFromStreamAdopted);
56 StreamReader () = delete;
57 StreamReader (const StreamReader&) = delete;
58
59 public:
60 /**
61 * \note this does IgnoreExceptionsForCall (this->SynchronizeToUnderlyingStream ())
62 */
64
65 public:
66 /**
67 * \brief Read into data referenced by span argument - and using argument blocking strategy (default blocking)
68 *
69 * returns nullopt ONLY if blockFlag = eNonBlocking AND there is NO data available without blocking.
70 *
71 * a return of NOT missing, but an empty span implies EOF.
72 *
73 * BLOCKING until data is available, but can return with fewer elements than argument span-size
74 * without prejudice about how much more is available.
75 *
76 * \note It is legal to call Read () if its already returned EOF, but then it MUST return EOF again.
77 *
78 * \pre not intoBuffer.empty ()
79 *
80 * @see ReadAll () to read all the data from the stream at once.
81 *
82 * @see ReadBlocking () - often simplest to use API
83 * @see ReadOrThrow ()
84 * @see ReadNonBlocking ()
85 *
86 * \see also InputStream::Ptr::Read () - identical API (except non-const)
87 */
88 nonvirtual optional<span<ElementType>> Read (span<ElementType> intoBuffer, NoDataAvailableHandling blockFlag);
89
90 public:
91 /**
92 * \brief ReadBlocking () reads either a single element, or fills in argument intoBuffer - but never blocks (nor throws EWouldBlock)
93 *
94 * ReadBlocking ():
95 * Reads a single element (blocking as long as needed) - or nullopt when at EOF.
96 *
97 * ReadBlocking(span<ElementType> intoBuffer)
98 * fills in subspan with at least one element (or zero if at EOF)
99 *
100 * \see also InputStream::Ptr::ReadBlocking () - identical API (except non-const)
101 */
102 nonvirtual optional<ElementType> ReadBlocking ();
103 nonvirtual span<ElementType> ReadBlocking (span<ElementType> intoBuffer);
104 nonvirtual span<ElementType> ReadBlocking (Memory::InlineBuffer<ElementType>* intoBuffer, ElementType upToSentinel);
105
106 public:
107 /**
108 * \brief read into intoBuffer - returning nullopt if would block, and else returning subspan of input with read data present
109 *
110 * same as Read (intoBuffer, NoDataAvailableHandling::eDontBlock)
111 *
112 * \see also InputStream::Ptr::ReadBlocking () - identical API(except non-const)
113 */
114 nonvirtual optional<span<ElementType>> ReadNonBlocking (span<ElementType> intoBuffer);
115
116 public:
117 /**
118 * \brief Read (either one or into argument span) and taking NoDataAvailableHandling blockFlag), and throw if would block
119 *
120 * same as Read() APIs, but instead of returning optional, for cases where nullopt would be returned, throw EWouldBlock
121 *
122 * \note if blockFlag == eBlockIfNoDataAvailable, this amounts to *Read(...args), since it will never generate an
123 * eWouldBlock exception;
124 *
125 * when to use this variant? If implementing API where you are handed a blockFlag, but want to KISS, and
126 * just throw if EWouldBlock ..
127 *
128 * \see also InputStream::Ptr::ReadOrThrow () - identical API(except non-const)
129 */
130 nonvirtual span<ElementType> ReadOrThrow (span<ElementType> intoBuffer, NoDataAvailableHandling blockFlag);
131
132 public:
133 /**
134 * \brief Logically the same as InputStream::Ptr<ELEMENT_TYPE>::Peek () but reading cached data
135 */
136 nonvirtual optional<ElementType> Peek ();
137 nonvirtual span<ElementType> Peek (span<ElementType> intoBuffer);
138
139 public:
140 /**
141 * \brief Logically the same as InputStream::Ptr<ELEMENT_TYPE>::GetOffset () - but without being 'synchronized' it maybe a different value than the underlying stream
142 */
143 nonvirtual SeekOffsetType GetOffset () const;
144
145 public:
146 /**
147 * \brief Logically the same as InputStream::Ptr<ELEMENT_TYPE>::Seek () - but without being 'synchronized' it maybe a different value than the underlying stream
148 */
149 nonvirtual SeekOffsetType Seek (SeekOffsetType offset);
150 nonvirtual SeekOffsetType Seek (Whence whence, SignedSeekOffsetType offset);
151
152 public:
153 /**
154 * \brief Logically the same as InputStream::Ptr<ELEMENT_TYPE>::ReadAll ()
155 *
156 * @todo add other overloads from InputStream::Ptr::ReadAll() - ...
157 */
158 nonvirtual size_t ReadAll (ElementType* intoStart, ElementType* intoEnd);
159
160 public:
161 /**
162 * \brief returns nullopt if nothing known available, zero if known EOF, and any other number of elements (typically 1) if that number know to be available to read
163 *
164 * \see also (different) RemainingLength ()
165 */
166 nonvirtual optional<size_t> AvailableToRead () const;
167
168 public:
169 /**
170 * \brief returns nullopt if not known (typical, and the default) - but sometimes it is known, and quite helpful)
171 *
172 * \note - Similar to AvailableToRead, but different. For example, on a socket-stream, you can tell how many bytes
173 * are available to read (buffered by kernel). But no guess about the remaining length of the stream (how many bytes
174 * will appear before end).
175 *
176 * But for a disk file, you MIGHT (not always - like unix special files) know the length of the file. This is for that case.
177 */
178 nonvirtual optional<SeekOffsetType> RemainingLength () const;
179
180 public:
181 /**
182 * If you must use the underlying stream along-side StreamReader, you can use
183 * SynchronizeToUnderlyingStream and SynchronizeFromUnderlyingStream to allow each class to update
184 * each other.
185 */
186 nonvirtual void SynchronizeToUnderlyingStream ();
187
188 public:
189 /**
190 * If you must use the underlying stream along-side StreamReader, you can use
191 * SynchronizeToUnderlyingStream and SynchronizeFromUnderlyingStream to allow each class to update
192 * each other.
193 */
194 nonvirtual void SynchronizeFromUnderlyingStream ();
195
196 public:
197 nonvirtual bool IsAtEOF ();
198
199 public:
200 [[deprecated ("Since Stroika v3.0d5 use Read/1-span")]] size_t Read (ElementType* intoStart, ElementType* intoEnd)
201 {
202 return Read (span{intoStart, intoEnd}).size ();
203 }
204
205 private:
206 // may want to tune these; but I did a little tuning on Windows --LGP 2022-12-17
207 static constexpr size_t kDefaultReadBufferSize_ = 8 * 1024;
208 static constexpr size_t kMaxBufferedChunkSize_ = 64 * 1024;
209 // Note the reason for kCountPingPingBufs_ == 2: generally we get a hit with one which is best. You
210 // typically read through the buffer one element after another. But its not rare to do put-backs of an
211 // element or two. And if you do that - at the edges - you can get degenerate behavior - keep filling cache
212 // going back one and forward one. The PingPong/2 value here prevents that degenerate case.
213 static constexpr size_t kCountPingPingBufs_ = 2;
214
215 private:
216 // Hack to allow use of inline buffer and uninitialized array even though Character class not 'trivial', it probably should be (or I'm not checking the right trait)
217 using InlineBufferElementType_ = conditional_t<same_as<Characters::Character, ElementType>, char32_t, ElementType>;
218 static_assert (sizeof (ElementType) == sizeof (InlineBufferElementType_));
219
220 private:
221 struct CacheBlock_ {
222 CacheBlock_ () = default;
223 size_t GetSize () const;
224 SeekOffsetType GetStart () const;
225 SeekOffsetType GetEnd () const;
226 optional<ElementType> Peek1FromCache (SeekOffsetType actualOffset) const;
227 optional<ElementType> Read1FromCache (SeekOffsetType* actualOffset);
228 optional<size_t> ReadFromCache (SeekOffsetType* actualOffset, span<ElementType> into);
229 void FillCacheWith (SeekOffsetType s, span<InlineBufferElementType_> into);
230
231 private:
232 // Cache uses wchar_t instead of Character so can use resize_uninitialized () - requires is_trivially_constructible
233 SeekOffsetType fCacheWindowBufStart_{0}; // buffer a 'window' around the current data. Not infinite the whole file, but can be pretty big
235 };
236
237 private:
238 typename InputStream::Ptr<ElementType> fStrm_;
239 SeekOffsetType fOffset_{0};
240 SeekOffsetType fFarthestReadInUnderlyingStream_{0};
241 CacheBlock_ fCacheBlocks_[kCountPingPingBufs_];
242 size_t fCacheBlockLastFilled_{0};
243
244 private:
245 nonvirtual optional<ElementType> Peek1FromCache_ () const;
246 nonvirtual optional<ElementType> Read1FromCache_ ();
247 nonvirtual optional<size_t> ReadFromCache_ (span<ElementType> into);
248 nonvirtual void FillCacheWith_ (SeekOffsetType s, span<InlineBufferElementType_> into);
249 nonvirtual optional<size_t> Read_Slow_Case_ (span<ElementType> into, NoDataAvailableHandling blockFlag);
250 };
251
252}
253
254/*
255 ********************************************************************************
256 ***************************** Implementation Details ***************************
257 ********************************************************************************
258 */
259#include "StreamReader.inl"
260
261#endif /*_Stroika_Foundation_Streams_StreamReader_h_*/
NoDataAvailableHandling
If eDontBlock passed to most Stream APIs, then when the code would do a blocking read,...
Definition Stream.h:90
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
StreamReader is an non-essential Stream utility, adding simplicity of use for a common use case,...
nonvirtual optional< size_t > AvailableToRead() const
returns nullopt if nothing known available, zero if known EOF, and any other number of elements (typi...
nonvirtual SeekOffsetType Seek(SeekOffsetType offset)
Logically the same as InputStream::Ptr<ELEMENT_TYPE>::Seek () - but without being 'synchronized' it m...
nonvirtual optional< ElementType > Peek()
Logically the same as InputStream::Ptr<ELEMENT_TYPE>::Peek () but reading cached data.
nonvirtual optional< span< ElementType > > Read(span< ElementType > intoBuffer, NoDataAvailableHandling blockFlag)
Read into data referenced by span argument - and using argument blocking strategy (default blocking)
nonvirtual SeekOffsetType GetOffset() const
Logically the same as InputStream::Ptr<ELEMENT_TYPE>::GetOffset () - but without being 'synchronized'...
nonvirtual size_t ReadAll(ElementType *intoStart, ElementType *intoEnd)
Logically the same as InputStream::Ptr<ELEMENT_TYPE>::ReadAll ()
nonvirtual optional< SeekOffsetType > RemainingLength() const
returns nullopt if not known (typical, and the default) - but sometimes it is known,...
nonvirtual optional< ElementType > ReadBlocking()
ReadBlocking () reads either a single element, or fills in argument intoBuffer - but never blocks (no...
nonvirtual optional< span< ElementType > > ReadNonBlocking(span< ElementType > intoBuffer)
read into intoBuffer - returning nullopt if would block, and else returning subspan of input with rea...
nonvirtual span< ElementType > ReadOrThrow(span< ElementType > intoBuffer, NoDataAvailableHandling blockFlag)
Read (either one or into argument span) and taking NoDataAvailableHandling blockFlag),...