Stroika Library 3.0d20
 
Loading...
Searching...
No Matches
BLOB.h
Go to the documentation of this file.
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Foundation_Memory_BLOB_h_
5#define _Stroika_Foundation_Memory_BLOB_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <array>
10#include <compare>
11#include <memory>
12#include <vector>
13
14#include "Stroika/Foundation/Common/Common.h"
15#include "Stroika/Foundation/Common/Concepts.h"
17#include "Stroika/Foundation/Memory/Common.h"
18#include "Stroika/Foundation/Streams/InputStream.h" // maybe bad - leads to circularity problems but hard to pre-declare InputStream
19
20/**
21 * \file
22 *
23 * \note Code-Status: <a href="Code-Status.md#Beta">Beta</a>
24 *
25 */
26
28 class String;
29}
30
31namespace Stroika::Foundation::Cryptography::Encoding::Algorithm::Base64 {
32 struct Options;
33}
34
35namespace Stroika::Foundation::Memory {
36
37 /**
38 * A BLOB is a read-only binary region of memory. Once a BLOB is constructed, the data inside cannot
39 * change (except by assignment - being assigned over).
40 *
41 * A BLOB can be thought of as a 'forever' span<const byte>.
42 *
43 * \note Satisfies Concepts:
44 * o static_assert (totally_ordered<BLOB>);
45 * o static_assert (regular<BLOB>);
46 * o static_assert (Common::explicitly_convertible_to<BLOB, span<const byte>>);
47 *
48 * \note \em Thread-Safety <a href="Thread-Safety.md#C++-Standard-Thread-Safety">C++-Standard-Thread-Safety</a>
49 *
50 * \note <a href="Design-Overview.md#Comparisons">Comparisons</a>:
51 * o static_assert (totally_ordered<BLOB>);
52 *
53 * This is like memcmp() - bytewise unsigned comparison
54 *
55 * \note Interactions with Memory::MemoryMappedFileReader
56 * We provide no AUTOMATIC way to combine these, because its not safe (in general, but can be given specific application
57 * knowledge). And it can be quite efficient. So use BLOB::Adopt() on some existing MemoryMappedFileReader - but its up to use/
58 * application to assure the lifetime of the file reader is > any copies of the shared_rep derived from the adopted BLOB.
59 *
60 * \note Performance
61 * o Copying a BLOB is just copying a shared_ptr
62 * o Allocation should be extremely cheap, due to the use of make_shared<> or allocate_shared<> or
63 * Memory::UseBlockAllocationIfAppropriate<>
64 * which should use the block allocation storage mechanism, which is generally a lock free very fast allocator.
65 * And the use of InlineBuffer<64> means that allocation of BLOBs of size <= 64 should requite no calls to the
66 * global ::operator new/malloc/free/delete
67 */
68 class BLOB {
69 protected:
70 struct _IRep;
71
72 public:
73 /**
74 * \par Example Usage
75 * \code
76 * BLOB b1 = BLOB{ 0x29, 0x14, 0x4a };
77 * \endcode
78 *
79 * \see also FromHex, FromRaw (constructor like static functions with special names to make treatment more clear)
80 * \see also Attach, and AttachAndDelete, more rarely useful (and dangerous).
81 */
82 BLOB ();
83 // clang-format off
84 BLOB (BLOB&& src) noexcept = default;
85 BLOB (const BLOB& src) noexcept = default;
86 // clang-format on
87 template <ranges::range CONTAINER_OF_BYTE>
88 BLOB (const CONTAINER_OF_BYTE& data)
89 requires (is_convertible_v<typename CONTAINER_OF_BYTE::value_type, byte> or
90 is_convertible_v<typename CONTAINER_OF_BYTE::value_type, uint8_t>);
91 BLOB (span<const byte> s);
92 BLOB (span<const uint8_t> s);
93 BLOB (const byte* start, const byte* end);
94 BLOB (const uint8_t* start, const uint8_t* end);
95 BLOB (const initializer_list<pair<const byte*, const byte*>>& startEndPairs);
96 BLOB (const initializer_list<BLOB>& list2Concatenate);
97 BLOB (const initializer_list<byte>& bytes);
98 BLOB (const initializer_list<uint8_t>& bytes);
99
100 protected:
101 /**
102 * Subclass BLOB, and provider your own 'rep' type, to create more efficient storage.
103 */
104 explicit BLOB (const shared_ptr<_IRep>& rep);
105 explicit BLOB (shared_ptr<_IRep>&& rep);
106
107 public:
108 /**
109 */
110 nonvirtual BLOB& operator= (BLOB&& rhs) noexcept = default;
111 nonvirtual BLOB& operator= (const BLOB& rhs) = default;
112
113 public:
114 /**
115 * \brief Convert string of hex bytes to BLOB.
116 *
117 * Like a constructor, but where you clearly name the intention of how to interpret the
118 * bytes.
119 *
120 * Spaces allowed, but treat as array of (possibly space delimited) hex bytes to BLOB.
121 *
122 * Upper/LowerCase OK, but invalid characters generate throw.
123 *
124 * \par Example Usage
125 * \code
126 * Assert ((BLOB::FromHex ("29144adb4ece20450956e813652fe8d6") == BLOB { 0x29, 0x14, 0x4a, 0xdb, 0x4e, 0xce, 0x20, 0x45, 0x09, 0x56, 0xe8, 0x13, 0x65, 0x2f, 0xe8, 0xd6 }));
127 * \endcode
128 *
129 * \todo probably allow optionally leading 0x....
130 */
131 static BLOB FromHex (const char* b);
132 static BLOB FromHex (span<const char> s);
133 static BLOB FromHex (string_view s);
134 static BLOB FromHex (const Characters::String& s);
135
136 public:
137 /**
138 * \brief Convert string of base64 bytes to BLOB.
139 *
140 * Like a constructor, but where you clearly name the intention of how to interpret the bytes.
141 *
142 * Spaces allowed, but treat as array of (possibly space delimited) base64 bytes to BLOB.
143 *
144 * Upper/LowerCase OK, but invalid characters generate throw.
145 *
146 * \par Example Usage
147 * \code
148 * Assert ((BLOB::FromBase64 ("aGVsbG8=") == BLOB { 'h', 'e', 'l', 'l', 'o' }));
149 * \endcode
150 *
151 * \todo probably allow optionally leading 0x....
152 */
153 static BLOB FromBase64 (const char* b);
154 static BLOB FromBase64 (span<const char> s);
155 static BLOB FromBase64 (string_view s);
156 static BLOB FromBase64 (const Characters::String& s);
157
158 public:
159 /**
160 * \brief Convert pointed to/referenced data to BLOB (treating the argument as raw bytes).
161 *
162 * Like a constructor, but where you clearly name the intention of how to interpret the
163 * bytes.
164 *
165 * This does little more than a cast (taking into account sizeof T).
166 *
167 * Overloads taking const char*, or const wchar_t* only (char_traits<T>), are assumed to be C-Strings (NUL-terminated).
168 *
169 * \note ALL overloads require T is be 'trivially_copyable' - just like memcpy()
170 */
171 template <Common::trivially_copyable T>
172 static BLOB FromRaw (const T* s, const T* e);
173 template <Common::trivially_copyable T>
174 static BLOB FromRaw (const T* s, size_t sz);
175 template <Common::trivially_copyable T>
176 static BLOB FromRaw (const T* s)
177 requires (same_as<typename char_traits<T>::char_type, T>);
178 template <Common::trivially_copyable T>
179 static BLOB FromRaw (const basic_string<T>& s)
180 requires (same_as<typename char_traits<T>::char_type, T>);
181 template <Common::trivially_copyable T>
182 static BLOB FromRaw (const T& s);
183
184 public:
185 /*
186 * \brief Create a BLOB from the given data - without copying the data (dangerous if not used carefully, but can be used to efficiently reference constant data).
187 *
188 * \note its ILLEGAL and may cause grave disorder, if the caller changes the data passed to Attach() while the derived BLOB (or a copy) exists.
189 *
190 * Typically this is intended to be used to wrap permanent constant data, such as static (text space) read-only data, ROM stuff, etc. It can be used to wrap
191 * data in memory mapped files, but IFF that file mapping will remain permanent (and data immutable). USE WITH CAUTION!
192 *
193 * \see also AttachAndDelete
194 */
195 template <typename BYTEISH, size_t EXTENT = dynamic_extent>
196 static BLOB Attach (span<BYTEISH, EXTENT> s)
197 requires (convertible_to<BYTEISH, const byte> or convertible_to<BYTEISH, const uint8_t>);
198 template <typename BYTEISH, size_t EXTENT>
199 static BLOB Attach (BYTEISH (&data)[EXTENT])
200 requires (convertible_to<BYTEISH, const byte> or convertible_to<BYTEISH, const uint8_t>);
201
202 public:
203 /*
204 * \brief like Attach () - but at last reference to BLOB, will call delete[] data
205 *
206 * RARELY useful, but could be needed if you must manually fill in the data after allocation, before wrapping it in a BLOB.
207 * Note - because of how the data is deleted, you must allocate with new byte[nnn].
208 */
209 static BLOB AttachAndDelete (const byte* s, size_t arrayLen);
210
211 public:
212 /**
213 * \pre i < size ();
214 */
215 nonvirtual byte operator[] (const size_t i) const;
216
217 public:
218 /**
219 * Returns true iff the size of the BLOB is zero.
220 */
221 nonvirtual bool empty () const;
222
223 public:
224 /**
225 * Convert BLOB losslessly into a standard C++ type.
226 * Supported Types for 'T' include:
227 * o span<const byte>
228 * o span<const uint8_t>
229 * o pair<const byte*, const byte*>
230 * o pair<const uint8_t*, const uint8_t*>
231 * o vector<byte>
232 * o vector<uint8_t>
233 * o Streams::InputStream::Ptr<byte>
234 * o string (bytes as characters - note this MAY include NUL-bytes - https://stackoverflow.com/questions/2845769/can-a-stdstring-contain-embedded-nulls)
235 * o any T where is_trivially_copyable
236 *
237 * \note If T is span<> or pair<> (byte or uint8_t) - this returns INTERNAL pointers into the BLOB storage,
238 * so use with care.
239 */
240 template <typename T>
241 nonvirtual T As () const
242#if !qCompilerAndStdLib_template_requires_doesnt_work_with_specialization_Buggy
243 // clang-format off
244 requires (
245 Common::IAnyOf<T,span<const byte>,span<const uint8_t>, pair<const byte*, const byte*>, pair<const uint8_t*, const uint8_t*>, vector<byte> ,vector<uint8_t>, Streams::InputStream::Ptr<byte>,string>
246 or is_trivially_copyable_v<T>
247 )
248#endif
249 ;
250 // clang-format on
251
252 public:
253 /**
254 * Return a string of hex bytes - two characters per byte, lower case HEX characters.
255 *
256 * \par Example Usage
257 * \code
258 * Assert (BLOB::Hex ("29144adb4ece20450956e813652fe8d6").AsHex () == "29144adb4ece20450956e813652fe8d6");
259 * \endcode
260 *
261 * \see also FromHex ()
262 */
263 template <typename STRING_TYPE = Characters::String>
264 nonvirtual STRING_TYPE AsHex (size_t maxBytesToShow = numeric_limits<size_t>::max ()) const
265#if !qCompilerAndStdLib_template_requires_doesnt_work_with_specialization_Buggy
266 requires (same_as<Characters::String, STRING_TYPE>)
267#endif
268 ;
269
270 public:
271 /**
272 * Return a string of base64 encoded bytes.
273 *
274 * \par Example Usage
275 * \code
276 * BLOB{'h', 'e', 'l', 'l', 'o'}.AsBase64 () == "aGVsbG8=")
277 * \endcode
278 *
279 * \see also AsHex (), FromBase64
280 */
281 template <typename STRING_TYPE = Characters::String>
282 nonvirtual STRING_TYPE AsBase64 () const
283#if !qCompilerAndStdLib_template_requires_doesnt_work_with_specialization_Buggy
284 requires (same_as<Characters::String, STRING_TYPE>)
285#endif
286 ;
287 template <typename STRING_TYPE = Characters::String>
288 nonvirtual STRING_TYPE AsBase64 (const Cryptography::Encoding::Algorithm::Base64::Options& o) const
289#if !qCompilerAndStdLib_template_requires_doesnt_work_with_specialization_Buggy
290 requires (same_as<Characters::String, STRING_TYPE>)
291#endif
292 ;
293
294 public:
295 /**
296 * EXPERIMENMT MAKING THIS NON-EXPLICIT conversion operator. Makes a bunch of other code simpler, and it makes sense.
297 * Trouble is - will it cause ambiguity later, and need to be removed. No obvious way to tell but testing a bit..
298 * --LGP 2023-12-21
299 */
300 /*explicit*/ operator Streams::InputStream::Ptr<byte> () const;
301
302 public:
303 /**
304 * Return a BLOB made by concatenating this BLOB count times.
305 */
306 nonvirtual BLOB Repeat (unsigned int count) const;
307
308 public:
309 /**
310 * Return a BLOB made from the given sub-range of bytes.
311 * \pre startAt <= endAt
312 * \pre endAt < GetSize ()
313 */
314 nonvirtual BLOB Slice (size_t startAt, size_t endAt) const;
315
316 public:
317 /**
318 * Pointers returned by begin(), remain valid for the lifetime of the containing BLOB.
319 * \note - this is legal to call even on an empty BLOB (but may return nullptr)
320 */
321 nonvirtual const byte* begin () const;
322
323 public:
324 /**
325 * Pointers returned by end(), remain valid for the lifetime of the containing BLOB.
326 * \note - this is legal to call even on an empty BLOB (but may return nullptr)
327 */
328 nonvirtual const byte* end () const;
329
330 public:
331 /**
332 * Returns the number of bytes in the BLOB.
333 */
334 nonvirtual size_t GetSize () const;
335
336 public:
337 /**
338 */
339 nonvirtual strong_ordering operator<=> (const BLOB& rhs) const;
340
341 public:
342 /**
343 */
344 nonvirtual bool operator== (const BLOB& rhs) const;
345
346 private:
347 static strong_ordering TWC_ (const BLOB& lhs, const BLOB& rhs); // utility code share between c++17 and c++20 versions
348
349 public:
350 /**
351 * @todo cleanup and switch to spans!!
352 */
353 nonvirtual const byte* data () const;
354
355 public:
356 /**
357 * @aliases GetSize()
358 */
359 nonvirtual size_t size () const;
360
361 public:
362 /**
363 * @aliases GetSize()
364 */
365 nonvirtual size_t length () const;
366
367 public:
368 /**
369 * @see Characters::ToString()
370 * Return a debug-friendly, display version of the current BLOB. This is not guaranteed parsable or usable except for debugging.
371 *
372 * \note this displays the BLOB as hex bytes. To interpret as a string, try
373 * \code
374 * Assert (BinaryToText::Reader::New (blob).ReadAll () == "hello world");
375 * \endcode
376 */
377 nonvirtual Characters::String ToString (size_t maxBytesToShow = 80) const;
378
379 public:
380 /**
381 * Trivial alias BLOB ({*this, rhs});
382 */
383 nonvirtual BLOB operator+ (const BLOB& rhs) const;
384
385 public:
386 [[deprecated ("Since Stroika v3.0d5 use span overload")]] static BLOB FromHex (const char* s, const char* e);
387 template <typename T>
388 [[deprecated ("Since Stroika v3.0d5 - use As/0")]] void As (T* into) const
389 {
390 *into = this->As<T> ();
391 }
392 [[deprecated ("Since Stroika v3.0d4 use span")]] static BLOB Attach (const byte* start, const byte* end)
393 {
394 return Attach (span{start, end});
395 }
396 [[deprecated ("Since Stroika v3.0d4 use Attach")]] static BLOB AttachApplicationLifetime (const byte* start, const byte* end)
397 {
398 return Attach (span{start, end});
399 }
400 template <size_t SIZE>
401 [[deprecated ("Since Stroika v3.0d4 use Attach")]] static BLOB AttachApplicationLifetime (const byte (&data)[SIZE])
402 {
403 return Attach (span{data, SIZE});
404 }
405 template <typename... ARGS>
406 [[deprecated ("Since Stroika v3.0d5 use FromHex")]] static BLOB Hex (ARGS... args)
407 {
408 return FromHex (args...);
409 }
410 template <typename... ARGS>
411 [[deprecated ("Since Stroika v3.0d5 use FromRaw")]] static BLOB Raw (ARGS... args)
412 {
413 return FromRaw (args...);
414 }
415
416 private:
417 struct BasicRep_;
418 struct ZeroRep_;
419 struct AdoptRep_; // e.g. for static constexpr arrays
420 struct AdoptAndDeleteRep_; // for user allocated new byte[]....
421
422 private:
423 [[no_unique_address]] Debug::AssertExternallySynchronizedMutex fThisAssertExternallySynchronized_;
424 shared_ptr<_IRep> fRep_;
425 };
426 static_assert (totally_ordered<BLOB>);
427 static_assert (regular<BLOB>);
428 static_assert (Common::explicitly_convertible_to<BLOB, span<const byte>>);
429
430 // @todo make this work
431 //static_assert (convertible_to<BLOB, span<byte>>);
432
433 /**
434 * This abstract interface defines the behavior of a BLOB.
435 */
436 struct BLOB::_IRep {
437 _IRep () = default;
438 _IRep (const _IRep&) = delete;
439 virtual ~_IRep () = default;
440 virtual span<const byte> GetBounds () const = 0;
441
442 nonvirtual const _IRep& operator= (const _IRep&) = delete;
443 };
444
445 inline namespace Literals {
446
447 /**
448 * @brief alias for BLOB::Attach - so treats argument as BLOB of bytes
449 *
450 * \par Example Usage
451 * \code
452 * BLOB b1 = "hello"_blob;
453 * Assert (b.size () == 5);
454 * Assert (b[0] == static_cast<byte> ('h'));
455 * \endcode
456 */
457 inline BLOB operator""_blob (const char* str, size_t len);
458
459 }
460
461}
462
463/*
464 ********************************************************************************
465 ***************************** Implementation Details ***************************
466 ********************************************************************************
467 */
468#include "BLOB.inl"
469
470#endif /*_Stroika_Foundation_Memory_BLOB_h_*/
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
nonvirtual size_t length() const
Definition BLOB.inl:276
nonvirtual const byte * end() const
Definition BLOB.inl:263
nonvirtual STRING_TYPE AsBase64() const
nonvirtual Characters::String ToString(size_t maxBytesToShow=80) const
Definition BLOB.cpp:382
nonvirtual BLOB Repeat(unsigned int count) const
Definition BLOB.cpp:349
static BLOB FromHex(const char *b)
Convert string of hex bytes to BLOB.
Definition BLOB.inl:127
nonvirtual byte operator[](const size_t i) const
Definition BLOB.inl:244
nonvirtual BLOB Slice(size_t startAt, size_t endAt) const
Definition BLOB.cpp:374
nonvirtual BLOB operator+(const BLOB &rhs) const
Definition BLOB.inl:311
nonvirtual bool empty() const
Definition BLOB.inl:251
nonvirtual const byte * begin() const
Definition BLOB.inl:258
nonvirtual STRING_TYPE AsHex(size_t maxBytesToShow=numeric_limits< size_t >::max()) const
nonvirtual size_t GetSize() const
Definition BLOB.inl:269
static BLOB FromBase64(const char *b)
Convert string of base64 bytes to BLOB.
Definition BLOB.inl:140
static BLOB FromRaw(const T *s, const T *e)
Convert pointed to/referenced data to BLOB (treating the argument as raw bytes).
Definition BLOB.inl:150
nonvirtual size_t size() const
Definition BLOB.inl:286
nonvirtual const byte * data() const
Definition BLOB.inl:281
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
concept - trivial shorthand for variadic same_as A or same_as B, or ...
Definition Concepts.h:189