Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
BinaryToText.h
Go to the documentation of this file.
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Foundation_Streams_BinaryToText_h_
5#define _Stroika_Foundation_Streams_BinaryToText_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <optional>
10
15
16/**
17 * \file
18 *
19 * \note Code-Status: <a href="Code-Status.md#Beta">Beta</a>
20 */
21
22namespace Stroika::Foundation::Memory {
23 class BLOB;
24}
25namespace Stroika::Foundation::Streams {
26 using Characters::Character;
27}
28
29namespace Stroika::Foundation::Streams::BinaryToText {
30
31 /**
32 */
33 enum class AutomaticCodeCvtFlags {
34 eReadBOMAndIfNotPresentUseUTF8,
35 eReadBOMAndIfNotPresentUseCurrentLocale,
36
37 eDEFAULT = eReadBOMAndIfNotPresentUseCurrentLocale
38 };
39 using AutomaticCodeCvtFlags::eReadBOMAndIfNotPresentUseCurrentLocale;
40 using AutomaticCodeCvtFlags::eReadBOMAndIfNotPresentUseUTF8;
41
42 /**
43 * \brief BinaryToText::Reader::Ptr is an InputStream::Ptr<Character>, usually constructed wrapping some binary object or binary stream
44 *
45 * \note This was called TextInputStreamBinaryAdapter
46 * \note This was called TextReader
47 *
48 * \note This is similar to the .net TextReader (https://msdn.microsoft.com/en-us/library/system.io.textreader(v=vs.110).aspx) except that
49 * much of the 'reading' API is baked into InputStream::Ptr<Character>.
50 *
51 * \note BinaryToText::Reader's are smart about not reading more than they need to from the source Stream (unless you make that stream buffered, in
52 * which case the buffering can cause it to read ahead)
53 *
54 * But BinaryToText::Reader itself doesn't read ahead more than it needs to to complete requested methods.
55 *
56 * \par Example Usage
57 * \code
58 * for (String line : BinaryToText::Reader::New (FileInputStream::New ("/tmp/foo")).ReadLines ()) {
59 * }
60 * \endcode
61 *
62 * \par Example Usage
63 * \code
64 * Assert (BinaryToText::Reader::New (String{"hello world"}).ReadAll () == "hello world");
65 * \endcode
66 *
67 * \note Reading improperly encoded text may result in a RuntimeException indicating improperly encoded characters.
68 *
69 * \note \em Thread-Safety <a href="Thread-Safety.md#C++-Standard-Thread-Safety-For-Envelope-Plus-Must-Externally-Synchronize-Letter">C++-Standard-Thread-Safety-For-Envelope-Plus-Must-Externally-Synchronize-Letter</a>
70 *
71 * Seekable defaults to the same value as that of the underlying stream wrapped.
72 * For the constructor taking const InputStream::Ptr<Character>& src, the seekability mimics that of the original source.
73 * Constructors taking a BLOB, the resulting stream will be seekable..
74 *
75 * But when you specify it explicitly, the given value will be used.
76 *
77 * \note Depending on the underlying source (e.g. binary stream) - maintaining seekability may be expensive in terms
78 * of memory usage.
79 */
80 namespace Reader {
81
82 /**
83 * \brief BinaryToText::Readers produce text in the form of an InputStream of 'Character' objects (so you might get the text with ReadAll()).
84 */
86
87 /**
88 * This flag controls whether the TextReader instance will try to read-ahead (typically in order to cache). This is generally
89 * a good thing, but for some stream uses, its important to not read ahead - e.g. in the underlying binary stream contains multiple objects and we
90 * are just reading one).
91 *
92 * \note eReadAheadAllowed doesn't mean the underlying class actually WILL read ahead, only that it is permitted to.
93 */
94 enum class ReadAhead {
95 eReadAheadNever,
96 eReadAheadAllowed,
97 };
98 using ReadAhead::eReadAheadAllowed;
99 using ReadAhead::eReadAheadNever;
100
101 /**
102 * \brief Create an InputStream::Ptr<Character> from the arguments (usually binary source) - which can be used to Read out the text as a string
103 *
104 * \par Example Usage
105 * \code
106 * for (String line : BinaryToText::Reader::New (FileInputStream::New (kProcCPUInfoFileName_, FileInputStream::eNotSeekable)).ReadLines ()) {
107 * DbgTrace ("***in Common::GetSystemConfiguration_CPU capture_ line={}"_f, line);
108 * }
109 * \endcode
110 *
111 * \note New (const InputStream::Ptr<byte>& src,... overloads)
112 * o Seekability
113 * if not specified, its copied from the src binary stream.
114 * o CodeCvt flags
115 * either as specified, or, if src.IsSeekable () - defaults to AutomaticCodeCvtFlags::eDEFAULT (which looks at the BOM)
116 * and if not seekable and not specified, use CodeCvt<>{locale{}}.
117 *
118 * o These defaults changed in Stroika v3.0d5 (mostly before 3.0d5 - defaults for seekability changed and code page sometimes defaulted to UTF8).
119 */
120 Ptr New (const InputStream::Ptr<byte>& src, optional<AutomaticCodeCvtFlags> codeCvtFlags = {}, optional<SeekableFlag> seekable = {},
121 ReadAhead readAhead = eReadAheadAllowed);
122 Ptr New (const InputStream::Ptr<byte>& src, const Characters::CodeCvt<>& codeConverter, optional<SeekableFlag> seekable = {},
123 ReadAhead readAhead = eReadAheadAllowed);
124 Ptr New (const InputStream::Ptr<Character>& src);
126 template <typename... ARGS>
127 Ptr New (Execution::InternallySynchronized internallySynchronized, ARGS... args);
128
129 }
130
131 // note - @todo COULD do this as an OutStream::Ptr/Writer - but never found a need/use
132 namespace Writer {
133 }
134
135 /**
136 * \brief convert BLOB (using optional encoding parameter) to a String
137 *
138 * \note shorthand for BinaryToText::Reader::New (src, codeCvtFlags).ReadAll ())
139 *
140 * \note could have done with stream, but the API names suggest side-effect free, and that wouldn't be,
141 * since it seeks the src input stream.
142 */
143 Characters::String Convert (const Memory::BLOB& src, optional<AutomaticCodeCvtFlags> codeCvtFlags = {});
144 Characters::String Convert (const Memory::BLOB& src, const Characters::CodeCvt<>& codeConverter);
145
146}
147
148/*
149 ********************************************************************************
150 ***************************** Implementation Details ***************************
151 ********************************************************************************
152 */
153#include "BinaryToText.inl"
154
155#endif /*_Stroika_Foundation_Streams_BinaryToText_h_*/
Profile Convert(const VariantValue &v)
Definition Profile.cpp:50
CodeCvt unifies byte <-> unicode conversions, vaguely inspired by (and wraps) std::codecvt,...
Definition CodeCvt.h:118
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
A Streams::Ptr<ELEMENT_TYPE> is a smart-pointer to a stream of elements of type T.
Definition Stream.h:170
Iterable<T> is a base class for containers which easily produce an Iterator<T> to traverse them.
Definition Iterable.h:237
Ptr New(const InputStream::Ptr< byte > &src, optional< AutomaticCodeCvtFlags > codeCvtFlags={}, optional< SeekableFlag > seekable={}, ReadAhead readAhead=eReadAheadAllowed)
Create an InputStream::Ptr<Character> from the arguments (usually binary source) - which can be used ...
InputStream::Ptr< Character > Ptr
BinaryToText::Readers produce text in the form of an InputStream of 'Character' objects (so you might...