Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
BLOB.h
Go to the documentation of this file.
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Foundation_Memory_BLOB_h_
5#define _Stroika_Foundation_Memory_BLOB_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <array>
10#include <compare>
11#include <memory>
12#include <vector>
13
14#include "Stroika/Foundation/Common/Common.h"
15#include "Stroika/Foundation/Common/Concepts.h"
17#include "Stroika/Foundation/Memory/Common.h"
18#include "Stroika/Foundation/Streams/InputStream.h" // maybe bad - leads to circularity problems but hard to pre-declare InputStream
19
20/**
21 * \file
22 *
23 * \note Code-Status: <a href="Code-Status.md#Beta">Beta</a>
24 *
25 */
26
28 class String;
29}
30
31namespace Stroika::Foundation::Cryptography::Encoding::Algorithm::Base64 {
32 struct Options;
33}
34
35namespace Stroika::Foundation::Memory {
36
37 /**
38 * A BLOB is a read-only binary region of memory. Once a BLOB is constructed, the data inside cannot
39 * change (except by assignment - being assigned over).
40 *
41 * A BLOB can be thought of as a 'forever' span<const byte>.
42 *
43 * \note Satisfies Concepts:
44 * o static_assert (totally_ordered<BLOB>);
45 * o static_assert (regular<BLOB>);
46 *
47 * \note \em Thread-Safety <a href="Thread-Safety.md#C++-Standard-Thread-Safety">C++-Standard-Thread-Safety</a>
48 *
49 * \note <a href="Design-Overview.md#Comparisons">Comparisons</a>:
50 * o static_assert (totally_ordered<BLOB>);
51 *
52 * This is like memcmp() - bytewise unsigned comparison
53 *
54 * \note Interactions with Memory::MemoryMappedFileReader
55 * We provide no AUTOMATIC way to combine these, because its not safe (in general, but can be given specific application
56 * knowledge). And it can be quite efficient. So use BLOB::Adopt() on some existing MemoryMappedFileReader - but its up to use/
57 * application to assure the lifetime of the file reader is > any copies of the shared_rep derived from the adopted BLOB.
58 *
59 * \note Performance
60 * o Copying a BLOB is just copying a shared_ptr
61 * o Allocation should be extremely cheap, due to the use of make_shared<> or allocate_shared<> or
62 * Memory::UseBlockAllocationIfAppropriate<>
63 * which should use the block allocation storage mechanism, which is generally a lock free very fast allocator.
64 * And the use of InlineBuffer<64> means that allocation of BLOBs of size <= 64 should requite no calls to the
65 * global ::operator new/malloc/free/delete
66 */
67 class BLOB {
68 protected:
69 struct _IRep;
70
71 public:
72 /**
73 * \par Example Usage
74 * \code
75 * BLOB b1 = BLOB{ 0x29, 0x14, 0x4a };
76 * \endcode
77 *
78 * \see also FromHex, FromRaw (constructor like static functions with special names to make treatment more clear)
79 * \see also Attach, and AttachAndDelete, more rarely useful (and dangerous).
80 */
81 BLOB ();
82 // clang-format off
83 BLOB (BLOB&& src) noexcept = default;
84 BLOB (const BLOB& src) noexcept = default;
85 // clang-format on
86 template <ranges::range CONTAINER_OF_BYTE>
87 BLOB (const CONTAINER_OF_BYTE& data)
88 requires (is_convertible_v<typename CONTAINER_OF_BYTE::value_type, byte> or
89 is_convertible_v<typename CONTAINER_OF_BYTE::value_type, uint8_t>);
90 BLOB (span<const byte> s);
91 BLOB (const byte* start, const byte* end);
92 BLOB (const uint8_t* start, const uint8_t* end);
93 BLOB (const initializer_list<pair<const byte*, const byte*>>& startEndPairs);
94 BLOB (const initializer_list<BLOB>& list2Concatenate);
95 BLOB (const initializer_list<byte>& bytes);
96 BLOB (const initializer_list<uint8_t>& bytes);
97
98 protected:
99 /**
100 * Subclass BLOB, and provider your own 'rep' type, to create more efficient storage.
101 */
102 explicit BLOB (const shared_ptr<_IRep>& rep);
103 explicit BLOB (shared_ptr<_IRep>&& rep);
104
105 public:
106 /**
107 */
108 nonvirtual BLOB& operator= (BLOB&& rhs) noexcept = default;
109 nonvirtual BLOB& operator= (const BLOB& rhs) = default;
110
111 public:
112 /**
113 * \brief Convert string of hex bytes to BLOB.
114 *
115 * Like a constructor, but where you clearly name the intention of how to interpret the
116 * bytes.
117 *
118 * Spaces allowed, but treat as array of (possibly space delimited) hex bytes to BLOB.
119 *
120 * Upper/LowerCase OK, but invalid characters generate throw.
121 *
122 * \par Example Usage
123 * \code
124 * Assert ((BLOB::FromHex ("29144adb4ece20450956e813652fe8d6") == BLOB { 0x29, 0x14, 0x4a, 0xdb, 0x4e, 0xce, 0x20, 0x45, 0x09, 0x56, 0xe8, 0x13, 0x65, 0x2f, 0xe8, 0xd6 }));
125 * \endcode
126 *
127 * \todo probably allow optionally leading 0x....
128 */
129 static BLOB FromHex (const char* b);
130 static BLOB FromHex (span<const char> s);
131 static BLOB FromHex (string_view s);
132 static BLOB FromHex (const Characters::String& s);
133
134 public:
135 /**
136 * \brief Convert string of base64 bytes to BLOB.
137 *
138 * Like a constructor, but where you clearly name the intention of how to interpret the bytes.
139 *
140 * Spaces allowed, but treat as array of (possibly space delimited) base64 bytes to BLOB.
141 *
142 * Upper/LowerCase OK, but invalid characters generate throw.
143 *
144 * \par Example Usage
145 * \code
146 * Assert ((BLOB::FromBase64 ("aGVsbG8=") == BLOB { 'h', 'e', 'l', 'l', 'o' }));
147 * \endcode
148 *
149 * \todo probably allow optionally leading 0x....
150 */
151 static BLOB FromBase64 (const char* b);
152 static BLOB FromBase64 (span<const char> s);
153 static BLOB FromBase64 (string_view s);
154 static BLOB FromBase64 (const Characters::String& s);
155
156 public:
157 /**
158 * \brief Convert pointed to/referenced data to BLOB (treating the argument as raw bytes).
159 *
160 * Like a constructor, but where you clearly name the intention of how to interpret the
161 * bytes.
162 *
163 * This does little more than a cast (taking into account sizeof T).
164 *
165 * Overloads taking const char*, or const wchar_t* only (char_traits<T>), are assumed to be C-Strings (NUL-terminated).
166 *
167 * \note ALL overloads require T is be 'trivially_copyable' - just like memcpy()
168 */
169 template <Common::trivially_copyable T>
170 static BLOB FromRaw (const T* s, const T* e);
171 template <Common::trivially_copyable T>
172 static BLOB FromRaw (const T* s, size_t sz);
173 template <Common::trivially_copyable T>
174 static BLOB FromRaw (const T* s)
175 requires (same_as<typename char_traits<T>::char_type, T>);
176 template <Common::trivially_copyable T>
177 static BLOB FromRaw (const basic_string<T>& s)
178 requires (same_as<typename char_traits<T>::char_type, T>);
179 template <Common::trivially_copyable T>
180 static BLOB FromRaw (const T& s);
181
182 public:
183 /*
184 * \brief Create a BLOB from the given data - without copying the data (dangerous if not used carefully, but can be used to efficiently reference constant data).
185 *
186 * \note its ILLEGAL and may cause grave disorder, if the caller changes the data passed to Attach() while the derived BLOB (or a copy) exists.
187 *
188 * Typically this is intended to be used to wrap permanent constant data, such as static (text space) read-only data, ROM stuff, etc. It can be used to wrap
189 * data in memory mapped files, but IFF that file mapping will remain permanent (and data immutable). USE WITH CAUTION!
190 *
191 * \see also AttachAndDelete
192 */
193 template <typename BYTEISH, size_t EXTENT = dynamic_extent>
194 static BLOB Attach (span<BYTEISH, EXTENT> s)
195 requires (convertible_to<BYTEISH, const byte> or convertible_to<BYTEISH, const uint8_t>);
196 template <typename BYTEISH, size_t EXTENT>
197 static BLOB Attach (BYTEISH (&data)[EXTENT])
198 requires (convertible_to<BYTEISH, const byte> or convertible_to<BYTEISH, const uint8_t>);
199
200 public:
201 /*
202 * \brief like Attach () - but at last reference to BLOB, will call delete[] data
203 *
204 * RARELY useful, but could be needed if you must manually fill in the data after allocation, before wrapping it in a BLOB.
205 * Note - because of how the data is deleted, you must allocate with new byte[nnn].
206 */
207 static BLOB AttachAndDelete (const byte* s, size_t arrayLen);
208
209 public:
210 /**
211 * \pre i < size ();
212 */
213 nonvirtual byte operator[] (const size_t i) const;
214
215 public:
216 /**
217 * Returns true iff the size of the BLOB is zero.
218 */
219 nonvirtual bool empty () const;
220
221 public:
222 /**
223 * Convert BLOB losslessly into a standard C++ type.
224 * Supported Types for 'T' include:
225 * o span<const byte>
226 * o span<const uint8_t>
227 * o pair<const byte*, const byte*>
228 * o pair<const uint8_t*, const uint8_t*>
229 * o vector<byte>
230 * o vector<uint8_t>
231 * o Streams::InputStream::Ptr<byte>
232 * o string (bytes as characters - note this MAY include NUL-bytes - https://stackoverflow.com/questions/2845769/can-a-stdstring-contain-embedded-nulls)
233 * o any T where is_trivially_copyable
234 *
235 * \note If T is span<> or pair<> (byte or uint8_t) - this returns INTERNAL pointers into the BLOB storage,
236 * so use with care.
237 */
238 template <typename T>
239 nonvirtual T As () const
240#if !qCompilerAndStdLib_template_requires_doesnt_work_with_specialization_Buggy
241 // clang-format off
242 requires (
243 Common::IAnyOf<T,span<const byte>,span<const uint8_t>, pair<const byte*, const byte*>, pair<const uint8_t*, const uint8_t*>, vector<byte> ,vector<uint8_t>, Streams::InputStream::Ptr<byte>,string>
244 or is_trivially_copyable_v<T>
245 )
246#endif
247 ;
248 // clang-format on
249
250 public:
251 /**
252 * Return a string of hex bytes - two characters per byte, lower case HEX characters.
253 *
254 * \par Example Usage
255 * \code
256 * Assert (BLOB::Hex ("29144adb4ece20450956e813652fe8d6").AsHex () == "29144adb4ece20450956e813652fe8d6");
257 * \endcode
258 *
259 * \see also FromHex ()
260 */
261 template <typename STRING_TYPE = Characters::String>
262 nonvirtual STRING_TYPE AsHex (size_t maxBytesToShow = numeric_limits<size_t>::max ()) const
263#if !qCompilerAndStdLib_template_requires_doesnt_work_with_specialization_Buggy
264 requires (same_as<Characters::String, STRING_TYPE>)
265#endif
266 ;
267
268 public:
269 /**
270 * Return a string of base64 encoded bytes.
271 *
272 * \par Example Usage
273 * \code
274 * BLOB{'h', 'e', 'l', 'l', 'o'}.AsBase64 () == "aGVsbG8=")
275 * \endcode
276 *
277 * \see also AsHex (), FromBase64
278 */
279 template <typename STRING_TYPE = Characters::String>
280 nonvirtual STRING_TYPE AsBase64 () const
281#if !qCompilerAndStdLib_template_requires_doesnt_work_with_specialization_Buggy
282 requires (same_as<Characters::String, STRING_TYPE>)
283#endif
284 ;
285 template <typename STRING_TYPE = Characters::String>
286 nonvirtual STRING_TYPE AsBase64 (const Cryptography::Encoding::Algorithm::Base64::Options& o) const
287#if !qCompilerAndStdLib_template_requires_doesnt_work_with_specialization_Buggy
288 requires (same_as<Characters::String, STRING_TYPE>)
289#endif
290 ;
291
292 public:
293 /**
294 * EXPERIMENMT MAKING THIS NON-EXPLICIT conversion operator. Makes a bunch of other code simpler, and it makes sense.
295 * Trouble is - will it cause ambiguity later, and need to be removed. No obvious way to tell but testing a bit..
296 * --LGP 2023-12-21
297 */
298 /*explicit*/ operator Streams::InputStream::Ptr<byte> () const;
299
300 public:
301 /**
302 * Return a BLOB made by concatenating this BLOB count times.
303 */
304 nonvirtual BLOB Repeat (unsigned int count) const;
305
306 public:
307 /**
308 * Return a BLOB made from the given sub-range of bytes.
309 * \pre startAt <= endAt
310 * \pre endAt < GetSize ()
311 */
312 nonvirtual BLOB Slice (size_t startAt, size_t endAt) const;
313
314 public:
315 /**
316 * Pointers returned by begin(), remain valid for the lifetime of the containing BLOB.
317 * \note - this is legal to call even on an empty BLOB (but may return nullptr)
318 */
319 nonvirtual const byte* begin () const;
320
321 public:
322 /**
323 * Pointers returned by end(), remain valid for the lifetime of the containing BLOB.
324 * \note - this is legal to call even on an empty BLOB (but may return nullptr)
325 */
326 nonvirtual const byte* end () const;
327
328 public:
329 /**
330 * Returns the number of bytes in the BLOB.
331 */
332 nonvirtual size_t GetSize () const;
333
334 public:
335 /**
336 */
337 nonvirtual strong_ordering operator<=> (const BLOB& rhs) const;
338
339 public:
340 /**
341 */
342 nonvirtual bool operator== (const BLOB& rhs) const;
343
344 private:
345 static strong_ordering TWC_ (const BLOB& lhs, const BLOB& rhs); // utility code share between c++17 and c++20 versions
346
347 public:
348 /**
349 * @todo cleanup and switch to spans!!
350 */
351 nonvirtual const byte* data () const;
352
353 public:
354 /**
355 * @aliases GetSize()
356 */
357 nonvirtual size_t size () const;
358
359 public:
360 /**
361 * @aliases GetSize()
362 */
363 nonvirtual size_t length () const;
364
365 public:
366 /**
367 * @see Characters::ToString()
368 * Return a debug-friendly, display version of the current BLOB. This is not guaranteed parsable or usable except for debugging.
369 *
370 * \note this displays the BLOB as hex bytes. To interpret as a string, try
371 * \code
372 * Assert (BinaryToText::Reader::New (blob).ReadAll () == "hello world");
373 * \endcode
374 */
375 nonvirtual Characters::String ToString (size_t maxBytesToShow = 80) const;
376
377 public:
378 /**
379 * Trivial alias BLOB ({*this, rhs});
380 */
381 nonvirtual BLOB operator+ (const BLOB& rhs) const;
382
383 public:
384 [[deprecated ("Since Stroika v3.0d5 use span overload")]] static BLOB FromHex (const char* s, const char* e);
385 template <typename T>
386 [[deprecated ("Since Stroika v3.0d5 - use As/0")]] void As (T* into) const
387 {
388 *into = this->As<T> ();
389 }
390 [[deprecated ("Since Stroika v3.0d4 use span")]] static BLOB Attach (const byte* start, const byte* end)
391 {
392 return Attach (span{start, end});
393 }
394 [[deprecated ("Since Stroika v3.0d4 use Attach")]] static BLOB AttachApplicationLifetime (const byte* start, const byte* end)
395 {
396 return Attach (span{start, end});
397 }
398 template <size_t SIZE>
399 [[deprecated ("Since Stroika v3.0d4 use Attach")]] static BLOB AttachApplicationLifetime (const byte (&data)[SIZE])
400 {
401 return Attach (span{data, SIZE});
402 }
403 template <typename... ARGS>
404 [[deprecated ("Since Stroika v3.0d5 use FromHex")]] static BLOB Hex (ARGS... args)
405 {
406 return FromHex (args...);
407 }
408 template <typename... ARGS>
409 [[deprecated ("Since Stroika v3.0d5 use FromRaw")]] static BLOB Raw (ARGS... args)
410 {
411 return FromRaw (args...);
412 }
413
414 private:
415 struct BasicRep_;
416 struct ZeroRep_;
417 struct AdoptRep_; // e.g. for static constexpr arrays
418 struct AdoptAndDeleteRep_; // for user allocated new byte[]....
419
420 private:
421 [[no_unique_address]] Debug::AssertExternallySynchronizedMutex fThisAssertExternallySynchronized_;
422 shared_ptr<_IRep> fRep_;
423 };
424 static_assert (totally_ordered<BLOB>);
425 static_assert (regular<BLOB>);
426
427 // @todo make this work
428 //static_assert (convertible_to<BLOB, span<byte>>);
429
430 /**
431 * This abstract interface defines the behavior of a BLOB.
432 */
433 struct BLOB::_IRep {
434 _IRep () = default;
435 _IRep (const _IRep&) = delete;
436 virtual ~_IRep () = default;
437 virtual span<const byte> GetBounds () const = 0;
438
439 nonvirtual const _IRep& operator= (const _IRep&) = delete;
440 };
441
442 inline namespace Literals {
443
444 /**
445 * @brief alias for BLOB::Attach - so treats argument as BLOB of bytes
446 *
447 * \par Example Usage
448 * \code
449 * BLOB b1 = "hello"_blob;
450 * Assert (b.size () == 5);
451 * Assert (b[0] == static_cast<byte> ('h'));
452 * \endcode
453 */
454 inline BLOB operator"" _blob (const char* str, size_t len);
455
456 }
457
458}
459
460/*
461 ********************************************************************************
462 ***************************** Implementation Details ***************************
463 ********************************************************************************
464 */
465#include "BLOB.inl"
466
467#endif /*_Stroika_Foundation_Memory_BLOB_h_*/
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
nonvirtual size_t length() const
Definition BLOB.inl:271
nonvirtual const byte * end() const
Definition BLOB.inl:258
nonvirtual STRING_TYPE AsBase64() const
nonvirtual Characters::String ToString(size_t maxBytesToShow=80) const
Definition BLOB.cpp:382
nonvirtual BLOB Repeat(unsigned int count) const
Definition BLOB.cpp:349
static BLOB FromHex(const char *b)
Convert string of hex bytes to BLOB.
Definition BLOB.inl:122
nonvirtual byte operator[](const size_t i) const
Definition BLOB.inl:239
nonvirtual BLOB Slice(size_t startAt, size_t endAt) const
Definition BLOB.cpp:374
nonvirtual BLOB operator+(const BLOB &rhs) const
Definition BLOB.inl:306
nonvirtual bool empty() const
Definition BLOB.inl:246
nonvirtual const byte * begin() const
Definition BLOB.inl:253
nonvirtual STRING_TYPE AsHex(size_t maxBytesToShow=numeric_limits< size_t >::max()) const
nonvirtual size_t GetSize() const
Definition BLOB.inl:264
static BLOB FromBase64(const char *b)
Convert string of base64 bytes to BLOB.
Definition BLOB.inl:135
static BLOB FromRaw(const T *s, const T *e)
Convert pointed to/referenced data to BLOB (treating the argument as raw bytes).
Definition BLOB.inl:145
nonvirtual size_t size() const
Definition BLOB.inl:281
nonvirtual const byte * data() const
Definition BLOB.inl:276
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
concept - trivial shorthand for variadic same_as A or same_as B, or ...
Definition Concepts.h:175