Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
TextConvert.h
Go to the documentation of this file.
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Foundation_Characters_TextConvert_h_
5#define _Stroika_Foundation_Characters_TextConvert_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <optional>
10#include <span>
11
13
14/**
15 * \file
16 * Wrappers and extensions to the CodeCvt/std::codecvt<> logic. Often when reading/writing files (or a series of bytes)
17 * you have no, or limited knowledge of the code page. This contains logic to help with that case.
18 *
19 * @todo - this will replace the (probably to be deprecated) CodePage module - at least much/most of it.
20 */
21
23
24 using namespace std;
25
26 /**
27 * \flag used to indicate if ByteOrderMark should be included (in other Stroika modules).
28 */
29 enum class ByteOrderMark {
30 eInclude,
31 eDontInclude
32 };
33
34 /**
35 * returns the byte order mark for the given unicode encoding. Size is always <= kMaxBOMSize
36 */
37 constexpr span<const byte> GetByteOrderMark (UnicodeExternalEncodings e) noexcept;
38
39 /**
40 * Max size of span returned by GetByteOrderMark ()
41 */
42 constexpr size_t kMaxBOMSize = 3;
43
44 /**
45 * returns guessed encoding, and number of bytes consumed. If 'd' doesn't contain
46 * BOM (possible cuz not large enuf) - returns nullopt
47 *
48 * Pass in any size span, but recommended to use size kMaxBOMSize (less and you may miss some, more and wont use extra data).
49 *
50 * \par Example Usage:
51 * \code
52 * span<const byte> from = argument;
53 * if (optional<tuple<UnicodeExternalEncodings, size_t>> o = ReadByteOrderMark (from)) {
54 * return make_tuple (Characters::CodeCvt<Character> (get<0> (*o)), get<1> (*o));
55 * }
56 * else {
57 * return make_tuple (Characters::CodeCvt<Character> (UnicodeExternalEncodings::eDEFAULT), 0);
58 * }
59 * \endcode
60 */
61 constexpr optional<tuple<UnicodeExternalEncodings, size_t>> ReadByteOrderMark (span<const byte> d) noexcept;
62
63 /**
64 * \pre into.size () >= SizeOfByteOrderMark (e)
65 *
66 * returns remaining span to write into (basically just into.subspan(SizeOfByteOrderMark (e))
67 * so caller can continue writing
68 */
69 span<byte> WriteByteOrderMark (UnicodeExternalEncodings e, span<byte> into);
70
71}
72
73/*
74 ********************************************************************************
75 ***************************** Implementation Details ***************************
76 ********************************************************************************
77 */
78#include "TextConvert.inl"
79
80#endif /*_Stroika_Foundation_Characters_TextConvert_h_*/
span< byte > WriteByteOrderMark(UnicodeExternalEncodings e, span< byte > into)
constexpr optional< tuple< UnicodeExternalEncodings, size_t > > ReadByteOrderMark(span< const byte > d) noexcept
constexpr span< const byte > GetByteOrderMark(UnicodeExternalEncodings e) noexcept
UnicodeExternalEncodings
list of external UNICODE character encodings, for file IO (eDEFAULT = eUTF8)
Definition UTFConvert.h:31
STL namespace.