Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
TextConvert.inl
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#include "Stroika/Foundation/Memory/Common.h"
5
7
8 /*
9 ********************************************************************************
10 ************************** Characters::GetByteOrderMark ************************
11 ********************************************************************************
12 */
13 namespace Private_ {
14 using namespace Memory;
15 constexpr byte kUTF8_bom[] = {0xEF_b, 0xBB_b, 0xBF_b};
16 constexpr byte kUTF16_BE_bom[] = {0xFE_b, 0xFF_b};
17 constexpr byte kUTF16_LE_bom[] = {0xFF_b, 0xFE_b};
18 constexpr byte kUTF32_BE_bom[] = {0xFE_b, 0xFF_b, 0x00_b};
19 constexpr byte kUTF32_LE_bom[] = {0xFF_b, 0xFE_b, 0x00_b};
20 }
21 constexpr span<const byte> GetByteOrderMark (UnicodeExternalEncodings e) noexcept
22 {
23 // Values from https://en.wikipedia.org/wiki/Byte_order_mark
24 switch (e) {
25 case UnicodeExternalEncodings::eUTF8: {
26#if qCompilerAndStdLib_span_requires_explicit_type_for_BLOBCVT_Buggy
27 return span<const byte>{Private_ ::kUTF8_bom, sizeof (Private_ ::kUTF8_bom)};
28#else
29 return span{Private_ ::kUTF8_bom, sizeof (Private_ ::kUTF8_bom)};
30#endif
31 }
32 case UnicodeExternalEncodings::eUTF16_BE: {
33#if qCompilerAndStdLib_span_requires_explicit_type_for_BLOBCVT_Buggy
34 return span<const byte>{Private_ ::kUTF16_BE_bom, sizeof (Private_ ::kUTF16_BE_bom)};
35#else
36 return span{Private_ ::kUTF16_BE_bom, sizeof (Private_ ::kUTF16_BE_bom)};
37#endif
38 }
39 case UnicodeExternalEncodings::eUTF16_LE: {
40#if qCompilerAndStdLib_span_requires_explicit_type_for_BLOBCVT_Buggy
41 return span<const byte>{Private_ ::kUTF16_LE_bom, sizeof (Private_ ::kUTF16_LE_bom)};
42#else
43 return span{Private_ ::kUTF16_LE_bom, sizeof (Private_ ::kUTF16_LE_bom)};
44#endif
45 }
46 case UnicodeExternalEncodings::eUTF32_BE: {
47#if qCompilerAndStdLib_span_requires_explicit_type_for_BLOBCVT_Buggy
48 return span<const byte>{Private_ ::kUTF32_BE_bom, sizeof (Private_ ::kUTF32_BE_bom)};
49#else
50 return span<const byte>{Private_ ::kUTF32_BE_bom, sizeof (Private_ ::kUTF32_BE_bom)};
51#endif
52 }
53 case UnicodeExternalEncodings::eUTF32_LE: {
54#if qCompilerAndStdLib_span_requires_explicit_type_for_BLOBCVT_Buggy
55 return span<const byte>{Private_ ::kUTF32_LE_bom, sizeof (Private_ ::kUTF32_LE_bom)};
56#else
57 return span{Private_ ::kUTF32_LE_bom, sizeof (Private_ ::kUTF32_LE_bom)};
58#endif
59 }
60 default:
62 return span<const byte>{};
63 }
64 }
65
66 /*
67 ********************************************************************************
68 *********************** Characters::ReadByteOrderMark **************************
69 ********************************************************************************
70 */
71 constexpr optional<tuple<UnicodeExternalEncodings, size_t>> ReadByteOrderMark (span<const byte> d) noexcept
72 {
73 auto check1 = [&d] (auto bom) -> optional<tuple<UnicodeExternalEncodings, size_t>> {
74 if (d.size () >= bom.size () and std::memcmp (bom.data (), d.data (), bom.size ()) == 0) {
75 return make_tuple (UnicodeExternalEncodings::eUTF8, bom.size ());
76 }
77 return nullopt;
78 };
79 // UTF-8 unambiguous and most likely, so check it first
80 if (auto r = check1 (GetByteOrderMark (UnicodeExternalEncodings::eUTF8))) {
81 return r;
82 }
83 // next check UTF32 BOM, only because needed to do before char16 BOM, cuz they overlap
84 if (auto r = check1 (GetByteOrderMark (UnicodeExternalEncodings::eUTF32_BE))) {
85 return r;
86 }
87 if (auto r = check1 (GetByteOrderMark (UnicodeExternalEncodings::eUTF32_LE))) {
88 return r;
89 }
90 // next check UTF16
91 if (auto r = check1 (GetByteOrderMark (UnicodeExternalEncodings::eUTF16_BE))) {
92 return r;
93 }
94 if (auto r = check1 (GetByteOrderMark (UnicodeExternalEncodings::eUTF16_LE))) {
95 return r;
96 }
97 return nullopt;
98 }
99
100 /*
101 ********************************************************************************
102 *********************** Characters::WriteByteOrderMark *************************
103 ********************************************************************************
104 */
105 inline span<byte> WriteByteOrderMark (UnicodeExternalEncodings e, span<byte> into)
106 {
107 Require (into.size () >= GetByteOrderMark (e).size ());
108 auto bom = GetByteOrderMark (e);
109 std::memcpy (into.data (), bom.data (), bom.size ());
110 return into.subspan (bom.size ());
111 }
112
113}
#define AssertNotReached()
Definition Assertions.h:355
span< byte > WriteByteOrderMark(UnicodeExternalEncodings e, span< byte > into)
constexpr optional< tuple< UnicodeExternalEncodings, size_t > > ReadByteOrderMark(span< const byte > d) noexcept
constexpr span< const byte > GetByteOrderMark(UnicodeExternalEncodings e) noexcept
UnicodeExternalEncodings
list of external UNICODE character encodings, for file IO (eDEFAULT = eUTF8)
Definition UTFConvert.h:31