Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
StreamReader.h
Go to the documentation of this file.
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Foundation_Streams_StreamReader_h_
5#define _Stroika_Foundation_Streams_StreamReader_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <memory>
10
11#include "Stroika/Foundation/Common/Common.h"
13
14/**
15 * \file
16 *
17 * \note Code-Status: <a href="Code-Status.md#Beta">Beta</a>
18 */
19
20namespace Stroika::Foundation::Streams {
21
22 /**
23 * \brief StreamReader is an non-essential Stream utility, adding simplicity of use for a common use case, and a significant performance boost.
24 *
25 * \see also https://learn.microsoft.com/en-us/dotnet/api/system.io.streamreader?view=net-8.0 - similar idea, except there you specify conversion from
26 * binary stream.
27 *
28 * StreamReader is an unnecessary class for using the Streams library, but it is easy to use, similar to InputStream<T>::Ptr, and significantly more performant
29 *
30 * \note Similar to BufferedInputStream - but that provides an actual Stream object, and is slightly less performant for direct use.
31 *
32 * TODO:
33 * \todo consider if should take templated parameter indicating buffer size (really template parameter StreamReaderDefaultOptions - with various settings
34 */
35 template <typename ELEMENT_TYPE>
36 struct StreamReader {
37 public:
38 /**
39 */
40 using ElementType = ELEMENT_TYPE;
41
42 public:
43 /**
44 * \note Do NOT use the InputStream::Ptr<ElementType> passed in at the same time as its being used by the
45 * StreamReader, or grave disorder may result. StreamReader assumes its the only one seeking and reading
46 * through the input stream. See SynchronizeToUnderlyingStream ().
47 *
48 * \note At destruction, StreamReader automatically calls SynchronizeToUnderlyingStream
49 *
50 * \pre underlyingReadFromStreamAdopted.Seekable ();
51 */
52 StreamReader (const typename InputStream::Ptr<ElementType>& underlyingReadFromStreamAdopted);
53 StreamReader () = delete;
54 StreamReader (const StreamReader&) = delete;
55
56 public:
57 /**
58 */
60
61 public:
62 /**
63 * \brief Read into data referenced by span argument - and using argument blocking strategy (default blocking)
64 *
65 * returns nullopt ONLY if blockFlag = eNonBlocking AND there is NO data available without blocking.
66 *
67 * a return of NOT missing, but an empty span implies EOF.
68 *
69 * BLOCKING until data is available, but can return with fewer elements than argument span-size
70 * without prejudice about how much more is available.
71 *
72 * \note It is legal to call Read () if its already returned EOF, but then it MUST return EOF again.
73 *
74 * \pre not intoBuffer.empty ()
75 *
76 * @see ReadAll () to read all the data from the stream at once.
77 *
78 * @see ReadBlocking () - often simplest to use API
79 * @see ReadOrThrow ()
80 * @see ReadNonBlocking ()
81 *
82 * \see also InputStream::Ptr::Read () - identical API (except non-const)
83 */
84 nonvirtual optional<span<ElementType>> Read (span<ElementType> intoBuffer, NoDataAvailableHandling blockFlag);
85
86 public:
87 /**
88 * \brief ReadBlocking () reads either a single element, or fills in argument intoBuffer - but never blocks (nor throws EWouldBlock)
89 *
90 * ReadBlocking ():
91 * Reads a single element (blocking as long as needed) - or nullopt when at EOF.
92 *
93 * ReadBlocking(span<ElementType> intoBuffer)
94 * fills in subspan with at least one element (or zero if at EOF)
95 *
96 * \see also InputStream::Ptr::ReadBlocking () - identical API (except non-const)
97 */
98 nonvirtual optional<ElementType> ReadBlocking ();
99 nonvirtual span<ElementType> ReadBlocking (span<ElementType> intoBuffer);
100 nonvirtual span<ElementType> ReadBlocking (Memory::InlineBuffer<ElementType>* intoBuffer, ElementType upToSentinel);
101
102 public:
103 /**
104 * \brief read into intoBuffer - returning nullopt if would block, and else returning subspan of input with read data present
105 *
106 * same as Read (intoBuffer, NoDataAvailableHandling::eDontBlock)
107 *
108 * \see also InputStream::Ptr::ReadBlocking () - identical API(except non-const)
109 */
110 nonvirtual optional<span<ElementType>> ReadNonBlocking (span<ElementType> intoBuffer);
111
112 public:
113 /**
114 * \brief Read (either one or into argument span) and taking NoDataAvailableHandling blockFlag), and throw if would block
115 *
116 * same as Read() APIs, but instead of returning optional, for cases where nullopt would be returned, throw EWouldBlock
117 *
118 * \note if blockFlag == eBlockIfNoDataAvailable, this amounts to *Read(...args), since it will never generate an
119 * eWouldBlock exception;
120 *
121 * when to use this variant? If implementing API where you are handed a blockFlag, but want to KISS, and
122 * just throw if EWouldBlock ..
123 *
124 * \see also InputStream::Ptr::ReadOrThrow () - identical API(except non-const)
125 */
126 nonvirtual span<ElementType> ReadOrThrow (span<ElementType> intoBuffer, NoDataAvailableHandling blockFlag);
127
128 public:
129 /**
130 * \brief Logically the same as InputStream::Ptr<ELEMENT_TYPE>::Peek () but reading cached data
131 */
132 nonvirtual optional<ElementType> Peek ();
133 nonvirtual span<ElementType> Peek (span<ElementType> intoBuffer);
134
135 public:
136 /**
137 * \brief Logically the same as InputStream::Ptr<ELEMENT_TYPE>::GetOffset () - but without being 'synchronized' it maybe a different value than the underlying stream
138 */
139 nonvirtual SeekOffsetType GetOffset () const;
140
141 public:
142 /**
143 * \brief Logically the same as InputStream::Ptr<ELEMENT_TYPE>::Seek () - but without being 'synchronized' it maybe a different value than the underlying stream
144 */
145 nonvirtual SeekOffsetType Seek (SeekOffsetType offset);
146 nonvirtual SeekOffsetType Seek (Whence whence, SignedSeekOffsetType offset);
147
148 public:
149 /**
150 * \brief Logically the same as InputStream::Ptr<ELEMENT_TYPE>::ReadAll ()
151 *
152 * @todo add other overloads from InputStream::Ptr::ReadAll() - ...
153 */
154 nonvirtual size_t ReadAll (ElementType* intoStart, ElementType* intoEnd);
155
156 public:
157 /**
158 * \brief returns nullopt if nothing known available, zero if known EOF, and any other number of elements (typically 1) if that number know to be available to read
159 *
160 * \see also (different) RemainingLength ()
161 */
162 nonvirtual optional<size_t> AvailableToRead () const;
163
164 public:
165 /**
166 * \brief returns nullopt if not known (typical, and the default) - but sometimes it is known, and quite helpful)
167 *
168 * \note - Similar to AvailableToRead, but different. For example, on a socket-stream, you can tell how many bytes
169 * are available to read (buffered by kernel). But no guess about the remaining length of the stream (how many bytes
170 * will appear before end).
171 *
172 * But for a disk file, you MIGHT (not always - like unix special files) know the length of the file. This is for that case.
173 */
174 nonvirtual optional<SeekOffsetType> RemainingLength () const;
175
176 public:
177 /**
178 * If you must use the underlying stream along-side StreamReader, you can use
179 * SynchronizeToUnderlyingStream and SynchronizeFromUnderlyingStream to allow each class to update
180 * each other.
181 */
182 nonvirtual void SynchronizeToUnderlyingStream ();
183
184 public:
185 /**
186 * If you must use the underlying stream along-side StreamReader, you can use
187 * SynchronizeToUnderlyingStream and SynchronizeFromUnderlyingStream to allow each class to update
188 * each other.
189 */
190 nonvirtual void SynchronizeFromUnderlyingStream ();
191
192 public:
193 nonvirtual bool IsAtEOF ();
194
195 public:
196 [[deprecated ("Since Stroika v3.0d5 use Read/1-span")]] size_t Read (ElementType* intoStart, ElementType* intoEnd)
197 {
198 return Read (span{intoStart, intoEnd}).size ();
199 }
200
201 private:
202 // may want to tune these; but I did a little tuning on Windows --LGP 2022-12-17
203 static constexpr size_t kDefaultReadBufferSize_ = 8 * 1024;
204 static constexpr size_t kMaxBufferedChunkSize_ = 64 * 1024;
205 // Note the reason for kCountPingPingBufs_ == 2: generally we get a hit with one which is best. You
206 // typically read through the buffer one element after another. But its not rare to do put-backs of an
207 // element or two. And if you do that - at the edges - you can get degenerate behavior - keep filling cache
208 // going back one and forward one. The PingPong/2 value here prevents that degenerate case.
209 static constexpr size_t kCountPingPingBufs_ = 2;
210
211 private:
212 // Hack to allow use of inline buffer and uninitialized array even though Character class not 'trivial', it probably should be (or I'm not checking the right trait)
213 using InlineBufferElementType_ = conditional_t<same_as<Characters::Character, ElementType>, char32_t, ElementType>;
214 static_assert (sizeof (ElementType) == sizeof (InlineBufferElementType_));
215
216 private:
217 struct CacheBlock_ {
218 CacheBlock_ () = default;
219 size_t GetSize () const;
220 SeekOffsetType GetStart () const;
221 SeekOffsetType GetEnd () const;
222 optional<ElementType> Peek1FromCache (SeekOffsetType actualOffset) const;
223 optional<ElementType> Read1FromCache (SeekOffsetType* actualOffset);
224 optional<size_t> ReadFromCache (SeekOffsetType* actualOffset, span<ElementType> into);
225 void FillCacheWith (SeekOffsetType s, span<InlineBufferElementType_> into);
226
227 private:
228 // Cache uses wchar_t instead of Character so can use resize_uninitialized () - requires is_trivially_constructible
229 SeekOffsetType fCacheWindowBufStart_{0}; // buffer a 'window' around the current data. Not infinite the whole file, but can be pretty big
231 };
232
233 private:
234 typename InputStream::Ptr<ElementType> fStrm_;
235 SeekOffsetType fOffset_{0};
236 SeekOffsetType fFarthestReadInUnderlyingStream_{0};
237 CacheBlock_ fCacheBlocks_[kCountPingPingBufs_];
238 size_t fCacheBlockLastFilled_{0};
239
240 private:
241 nonvirtual optional<ElementType> Peek1FromCache_ () const;
242 nonvirtual optional<ElementType> Read1FromCache_ ();
243 nonvirtual optional<size_t> ReadFromCache_ (span<ElementType> into);
244 nonvirtual void FillCacheWith_ (SeekOffsetType s, span<InlineBufferElementType_> into);
245 nonvirtual optional<size_t> Read_Slow_Case_ (span<ElementType> into, NoDataAvailableHandling blockFlag);
246 };
247
248}
249
250/*
251 ********************************************************************************
252 ***************************** Implementation Details ***************************
253 ********************************************************************************
254 */
255#include "StreamReader.inl"
256
257#endif /*_Stroika_Foundation_Streams_StreamReader_h_*/
NoDataAvailableHandling
If eDontBlock passed to most Stream APIs, then when the code would do a blocking read,...
Definition Stream.h:90
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
StreamReader is an non-essential Stream utility, adding simplicity of use for a common use case,...
nonvirtual optional< size_t > AvailableToRead() const
returns nullopt if nothing known available, zero if known EOF, and any other number of elements (typi...
nonvirtual SeekOffsetType Seek(SeekOffsetType offset)
Logically the same as InputStream::Ptr<ELEMENT_TYPE>::Seek () - but without being 'synchronized' it m...
nonvirtual optional< ElementType > Peek()
Logically the same as InputStream::Ptr<ELEMENT_TYPE>::Peek () but reading cached data.
nonvirtual optional< span< ElementType > > Read(span< ElementType > intoBuffer, NoDataAvailableHandling blockFlag)
Read into data referenced by span argument - and using argument blocking strategy (default blocking)
nonvirtual SeekOffsetType GetOffset() const
Logically the same as InputStream::Ptr<ELEMENT_TYPE>::GetOffset () - but without being 'synchronized'...
nonvirtual size_t ReadAll(ElementType *intoStart, ElementType *intoEnd)
Logically the same as InputStream::Ptr<ELEMENT_TYPE>::ReadAll ()
nonvirtual optional< SeekOffsetType > RemainingLength() const
returns nullopt if not known (typical, and the default) - but sometimes it is known,...
nonvirtual optional< ElementType > ReadBlocking()
ReadBlocking () reads either a single element, or fills in argument intoBuffer - but never blocks (no...
nonvirtual optional< span< ElementType > > ReadNonBlocking(span< ElementType > intoBuffer)
read into intoBuffer - returning nullopt if would block, and else returning subspan of input with rea...
nonvirtual span< ElementType > ReadOrThrow(span< ElementType > intoBuffer, NoDataAvailableHandling blockFlag)
Read (either one or into argument span) and taking NoDataAvailableHandling blockFlag),...