Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
CodeCvt.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
6#include "Stroika/Foundation/Common/Common.h"
7#include "Stroika/Foundation/Execution/Exceptions.h"
8
10
11#include "CodeCvt.h"
12
13using std::byte;
14
15using namespace Stroika::Foundation;
17using namespace Stroika::Foundation::Memory;
18
19// #define qGenTableDumper_ 1
20
21#if qGenTableDumper_
22#include <fstream>
23#endif
24
25/*
26 ********************************************************************************
27 ************ Private_::ThrowErrorConvertingBytes2Characters_ *******************
28 ********************************************************************************
29 */
30void Characters::Private_::ThrowErrorConvertingBytes2Characters_ (size_t nSrcCharsWhereError)
31{
33}
34
35/*
36 ********************************************************************************
37 ************ Private_::ThrowErrorConvertingCharacters2Bytes_ *******************
38 ********************************************************************************
39 */
40void Characters::Private_::ThrowErrorConvertingCharacters2Bytes_ (size_t nSrcCharsWhereError)
41{
43}
44
45/*
46 ********************************************************************************
47 ************** Private_::ThrowCodePageNotSupportedException_ *******************
48 ********************************************************************************
49 */
50void Characters::Private_::ThrowCodePageNotSupportedException_ (CodePage cp)
51{
53}
54
55/*
56 ********************************************************************************
57 *************** Private_::ThrowCharsetNotSupportedException_ *******************
58 ********************************************************************************
59 */
60void Characters::Private_::ThrowCharsetNotSupportedException_ (const Charset& charset)
61{
62 Execution::Throw (Execution::RuntimeErrorException{"Cannot construct CodeCvt with provided charset': "sv + static_cast<String> (charset)});
63}
64
65/*
66 ********************************************************************************
67 **** Private_::ThrowInvalidCharacterProvidedDoesntFitWithProvidedCodeCvt_ ******
68 ********************************************************************************
69 */
70void Characters::Private_::ThrowInvalidCharacterProvidedDoesntFitWithProvidedCodeCvt_ ()
71{
72 static const auto kException_ =
73 Execution::RuntimeErrorException{"Cannot construct CodeCvt with provided std::code_cvt and provided 'invalid character'"sv};
74 Execution::Throw (kException_);
75}
76
77/*
78 ********************************************************************************
79 ********************** Private_::AsNarrowSDKString_ ****************************
80 ********************************************************************************
81 */
82string Characters::Private_::AsNarrowSDKString_ (const String& s)
83{
84 return s.AsNarrowSDKString ();
85}
86
87/*
88 ********************************************************************************
89 ************** Private_::BuiltinSingleByteTableCodePageRep_ ********************
90 ********************************************************************************
91 */
92#if qGenTableDumper_
93void dumpTable (CodePage cp, std::filesystem::path p)
94{
95 ofstream o{p, ios::out};
96 o << " static constexpr char16_t kMap_[256] = {" << endl;
97 for (int i = 0; i < 256; ++i) {
98 wchar_t wc{};
99 Verify (MultiByteToWideChar (cp, 0, (char*)&i, 1, &wc, 1));
100 o << hex << "0x" << static_cast<int> (wc) << ", ";
101 if (i % 16 == 15) {
102 o << endl;
103 }
104 }
105 o << endl << " };" << endl;
106};
107#endif
108
109#if qGenTableDumper_
110//const int ignored1 = (dumpTable (WellKnownCodePages::kANSI, "kCodePage_ANSI.txt"), 1);
111//const int ignored2 = (dumpTable (WellKnownCodePages::kMAC, "kCodePage_MAC.txt"), 1);
112//const int ignored3 = (dumpTable (WellKnownCodePages::kPC, "kCodePage_PC.txt"), 1);
113//const int ignored4 = (dumpTable (kCodePage_PCA, "kCodePage_PCA.txt"), 1);
114//const int ignored5 = (dumpTable (WellKnownCodePages::kGreek, "kCodePage_GREEK.txt"), 1);
115//const int ignored6 = (dumpTable (WellKnownCodePages::kTurkish, "kCodePage_Turkish.txt"), 1);
116//const int ignored7 = (dumpTable (WellKnownCodePages::kHebrew, "kCodePage_HEBREW.txt"), 1);
117//const int ignored8 = (dumpTable (kCodePage_ARABIC, "kCodePage_ARABIC.txt"), 1);
118#endif
119
120Characters::Private_::BuiltinSingleByteTableCodePageRep_::BuiltinSingleByteTableCodePageRep_ (CodePage cp, optional<Character> invalidCharacterReplacement)
121{
122 switch (cp) {
123 // Tables generated with qGenTableDumper_ on Windows - 2023-07-23
124 case WellKnownCodePages::kANSI: {
125 static constexpr char16_t kMap_[256] = {
126 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
127 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
128 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
129 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
130 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
131 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
132 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
133 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
134 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x8d, 0x17d, 0x8f,
135 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x17e, 0x178,
136 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
137 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
138 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
139 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
140 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
141 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
142 };
143 fMap_ = &kMap_[0];
144 } break;
145 case WellKnownCodePages::kMAC: {
146 static constexpr char16_t kMap_[256] = {
147 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe,
148 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
149 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
150 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
151 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a,
152 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
153 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
154 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
155 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0xc4, 0xc5, 0xc7, 0xc9, 0xd1, 0xd6, 0xdc,
156 0xe1, 0xe0, 0xe2, 0xe4, 0xe3, 0xe5, 0xe7, 0xe9, 0xe8, 0xea, 0xeb, 0xed, 0xec, 0xee, 0xef,
157 0xf1, 0xf3, 0xf2, 0xf4, 0xf6, 0xf5, 0xfa, 0xf9, 0xfb, 0xfc, 0x2020, 0xb0, 0xa2, 0xa3, 0xa7,
158 0x2022, 0xb6, 0xdf, 0xae, 0xa9, 0x2122, 0xb4, 0xa8, 0x2260, 0xc6, 0xd8, 0x221e, 0xb1, 0x2264, 0x2265,
159 0xa5, 0xb5, 0x2202, 0x2211, 0x220f, 0x3c0, 0x222b, 0xaa, 0xba, 0x2126, 0xe6, 0xf8, 0xbf, 0xa1, 0xac,
160 0x221a, 0x192, 0x2248, 0x2206, 0xab, 0xbb, 0x2026, 0xa0, 0xc0, 0xc3, 0xd5, 0x152, 0x153, 0x2013, 0x2014,
161 0x201c, 0x201d, 0x2018, 0x2019, 0xf7, 0x25ca, 0xff, 0x178, 0x2044, 0x20ac, 0x2039, 0x203a, 0xfb01, 0xfb02, 0x2021,
162 0xb7, 0x201a, 0x201e, 0x2030, 0xc2, 0xca, 0xc1, 0xcb, 0xc8, 0xcd, 0xce, 0xcf, 0xcc, 0xd3, 0xd4,
163 0xf8ff, 0xd2, 0xda, 0xdb, 0xd9, 0x131, 0x2c6, 0x2dc, 0xaf, 0x2d8, 0x2d9, 0x2da, 0xb8, 0x2dd, 0x2db,
164 0x2c7,
165 };
166 fMap_ = &kMap_[0];
167 } break;
168 case WellKnownCodePages::kPC: {
169 static constexpr char16_t kMap_[256] = {
170 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe,
171 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
172 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
173 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
174 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a,
175 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
176 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
177 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
178 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5,
179 0xe7, 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5, 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2,
180 0xfb, 0xf9, 0xff, 0xd6, 0xdc, 0xa2, 0xa3, 0xa5, 0x20a7, 0x192, 0xe1, 0xed, 0xf3, 0xfa, 0xf1,
181 0xd1, 0xaa, 0xba, 0xbf, 0x2310, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb, 0x2591, 0x2592, 0x2593, 0x2502,
182 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534, 0x252c,
183 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, 0x2568, 0x2564,
184 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, 0x3b1,
185 0xdf, 0x393, 0x3c0, 0x3a3, 0x3c3, 0xb5, 0x3c4, 0x3a6, 0x398, 0x3a9, 0x3b4, 0x221e, 0x3c6, 0x3b5, 0x2229,
186 0x2261, 0xb1, 0x2265, 0x2264, 0x2320, 0x2321, 0xf7, 0x2248, 0xb0, 0x2219, 0xb7, 0x221a, 0x207f, 0xb2, 0x25a0,
187 0xa0,
188 };
189 fMap_ = &kMap_[0];
190 } break;
191 case WellKnownCodePages::kPCA: {
192 static constexpr char16_t kMap_[256] = {
193 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
194 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
195 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
196 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
197 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
198 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
199 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
200 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
201 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5,
202 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, 0xff, 0xd6, 0xdc, 0xf8, 0xa3, 0xd8, 0xd7, 0x192,
203 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, 0xbf, 0xae, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb,
204 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0xc1, 0xc2, 0xc0, 0xa9, 0x2563, 0x2551, 0x2557, 0x255d, 0xa2, 0xa5, 0x2510,
205 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0xe3, 0xc3, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0xa4,
206 0xf0, 0xd0, 0xca, 0xcb, 0xc8, 0x131, 0xcd, 0xce, 0xcf, 0x2518, 0x250c, 0x2588, 0x2584, 0xa6, 0xcc, 0x2580,
207 0xd3, 0xdf, 0xd4, 0xd2, 0xf5, 0xd5, 0xb5, 0xfe, 0xde, 0xda, 0xdb, 0xd9, 0xfd, 0xdd, 0xaf, 0xb4,
208 0xad, 0xb1, 0x2017, 0xbe, 0xb6, 0xa7, 0xf7, 0xb8, 0xb0, 0xa8, 0xb7, 0xb9, 0xb3, 0xb2, 0x25a0, 0xa0,
209 };
210 fMap_ = &kMap_[0];
211 } break;
212 case WellKnownCodePages::kGreek: {
213 static constexpr char16_t kMap_[256] = {
214 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
215 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
216 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
217 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
218 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
219 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
220 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
221 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
222 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x88, 0x2030, 0x8a, 0x2039, 0x8c, 0x8d, 0x8e, 0x8f,
223 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x98, 0x2122, 0x9a, 0x203a, 0x9c, 0x9d, 0x9e, 0x9f,
224 0xa0, 0x385, 0x386, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xf8f9, 0xab, 0xac, 0xad, 0xae, 0x2015,
225 0xb0, 0xb1, 0xb2, 0xb3, 0x384, 0xb5, 0xb6, 0xb7, 0x388, 0x389, 0x38a, 0xbb, 0x38c, 0xbd, 0x38e, 0x38f,
226 0x390, 0x391, 0x392, 0x393, 0x394, 0x395, 0x396, 0x397, 0x398, 0x399, 0x39a, 0x39b, 0x39c, 0x39d, 0x39e, 0x39f,
227 0x3a0, 0x3a1, 0xf8fa, 0x3a3, 0x3a4, 0x3a5, 0x3a6, 0x3a7, 0x3a8, 0x3a9, 0x3aa, 0x3ab, 0x3ac, 0x3ad, 0x3ae, 0x3af,
228 0x3b0, 0x3b1, 0x3b2, 0x3b3, 0x3b4, 0x3b5, 0x3b6, 0x3b7, 0x3b8, 0x3b9, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3be, 0x3bf,
229 0x3c0, 0x3c1, 0x3c2, 0x3c3, 0x3c4, 0x3c5, 0x3c6, 0x3c7, 0x3c8, 0x3c9, 0x3ca, 0x3cb, 0x3cc, 0x3cd, 0x3ce, 0xf8fb,
230 };
231 fMap_ = &kMap_[0];
232 } break;
233 case WellKnownCodePages::kTurkish: {
234 static constexpr char16_t kMap_[256] = {
235 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
236 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
237 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
238 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
239 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
240 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
241 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
242 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
243 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x8d, 0x8e, 0x8f,
244 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x9e, 0x178,
245 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
246 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
247 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
248 0x11e, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x130, 0x15e, 0xdf,
249 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
250 0x11f, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x131, 0x15f, 0xff,
251 };
252 fMap_ = &kMap_[0];
253 } break;
254 case WellKnownCodePages::kHebrew: {
255 static constexpr char16_t kMap_[256] = {
256 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe,
257 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
258 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
259 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
260 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a,
261 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
262 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
263 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
264 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020,
265 0x2021, 0x2c6, 0x2030, 0x8a, 0x2039, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022,
266 0x2013, 0x2014, 0x2dc, 0x2122, 0x9a, 0x203a, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0x20aa,
267 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xd7, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3,
268 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xf7, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0x5b0, 0x5b1, 0x5b2,
269 0x5b3, 0x5b4, 0x5b5, 0x5b6, 0x5b7, 0x5b8, 0x5b9, 0x5ba, 0x5bb, 0x5bc, 0x5bd, 0x5be, 0x5bf, 0x5c0, 0x5c1,
270 0x5c2, 0x5c3, 0x5f0, 0x5f1, 0x5f2, 0x5f3, 0x5f4, 0xf88d, 0xf88e, 0xf88f, 0xf890, 0xf891, 0xf892, 0xf893, 0x5d0,
271 0x5d1, 0x5d2, 0x5d3, 0x5d4, 0x5d5, 0x5d6, 0x5d7, 0x5d8, 0x5d9, 0x5da, 0x5db, 0x5dc, 0x5dd, 0x5de, 0x5df,
272 0x5e0, 0x5e1, 0x5e2, 0x5e3, 0x5e4, 0x5e5, 0x5e6, 0x5e7, 0x5e8, 0x5e9, 0x5ea, 0xf894, 0xf895, 0x200e, 0x200f,
273 0xf896,
274 };
275 fMap_ = &kMap_[0];
276 } break;
277 case WellKnownCodePages::kArabic: {
278 static constexpr char16_t kMap_[256] = {
279 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
280 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
281 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
282 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
283 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
284 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
285 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
286 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
287 0x20ac, 0x67e, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x679, 0x2039, 0x152, 0x686, 0x698, 0x688,
288 0x6af, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x6a9, 0x2122, 0x691, 0x203a, 0x153, 0x200c, 0x200d, 0x6ba,
289 0xa0, 0x60c, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x6be, 0xab, 0xac, 0xad, 0xae, 0xaf,
290 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0x61b, 0xbb, 0xbc, 0xbd, 0xbe, 0x61f,
291 0x6c1, 0x621, 0x622, 0x623, 0x624, 0x625, 0x626, 0x627, 0x628, 0x629, 0x62a, 0x62b, 0x62c, 0x62d, 0x62e, 0x62f,
292 0x630, 0x631, 0x632, 0x633, 0x634, 0x635, 0x636, 0xd7, 0x637, 0x638, 0x639, 0x63a, 0x640, 0x641, 0x642, 0x643,
293 0xe0, 0x644, 0xe2, 0x645, 0x646, 0x647, 0x648, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0x649, 0x64a, 0xee, 0xef,
294 0x64b, 0x64c, 0x64d, 0x64e, 0xf4, 0x64f, 0x650, 0xf7, 0x651, 0xf9, 0x652, 0xfb, 0xfc, 0x200e, 0x200f, 0x6d2,
295 };
296 fMap_ = &kMap_[0];
297 } break;
298 default: {
300 }
301 }
302 AssertNotNull (fMap_);
303 if (invalidCharacterReplacement) {
304 if (auto pi = std::find (fMap_, fMap_ + 256, invalidCharacterReplacement->As<char32_t> ()); pi != fMap_ + 256) {
305 fInvalidCharacterReplacementByte_ = static_cast<byte> (pi - fMap_);
306 }
307 }
308}
309
310span<char16_t> Characters::Private_::BuiltinSingleByteTableCodePageRep_::Bytes2Characters (span<const byte>* from, span<char16_t> to) const
311{
312 RequireNotNull (from);
313 Require (from->size () <= to.size ());
314 // all bytes DEFINED to map to some unicode character
315 char16_t* oi = to.data ();
316 for (byte i : *from) {
317 *oi++ = fMap_[(int)i];
318 }
319 *from = span<const byte>{}; // always consume all bytes - no partial characters
320 return to.subspan (oi - to.data ());
321}
322
323span<byte> Characters::Private_::BuiltinSingleByteTableCodePageRep_::Characters2Bytes (span<const char16_t> from, span<byte> to) const
324{
325 // very simple, but stageringly inefficient algorithm for this case... LGP 2023-07-22
326 // Note easy to fix for most characters with reverse direction lookup table (all bit unicode characters back to their binary rep and rest in smaller lookaside table)
327 byte* oi = to.data ();
328 for (char16_t i : from) {
329 if (auto pi = std::find (fMap_, fMap_ + 256, i); pi != fMap_ + 256) {
330 *oi++ = static_cast<byte> (pi - fMap_);
331 }
332 else {
333 if (fInvalidCharacterReplacementByte_) {
334 *oi++ = *fInvalidCharacterReplacementByte_;
335 }
336 else {
337 size_t nCharsConsumed = oi - to.data (); // one char at a time on both so same and avoids counting or using explicit iterator
338 Execution::Throw (CharacterEncodingException{CharacterEncodingException::eEncoding, nCharsConsumed}); // @todo COULD safe/capture the encoding name as well here easy enuf...
339 }
340 }
341 }
342 return to.subspan (oi - to.data ());
343}
344
345size_t Characters::Private_::BuiltinSingleByteTableCodePageRep_::ComputeTargetCharacterBufferSize (variant<span<const byte>, size_t> src) const
346{
347 if (const size_t* i = get_if<size_t> (&src)) {
348 return *i;
349 }
350 else {
351 return get<span<const byte>> (src).size ();
352 }
353}
354
355size_t Characters::Private_::BuiltinSingleByteTableCodePageRep_::ComputeTargetByteBufferSize (variant<span<const char16_t>, size_t> src) const
356{
357 if (const size_t* i = get_if<size_t> (&src)) {
358 return *i;
359 }
360 else {
361 return get<span<const char16_t>> (src).size ();
362 }
363}
364
365#if qStroika_Foundation_Common_Platform_Windows
366/*
367 ********************************************************************************
368 ***************** Characters::Private_::WindowsNative_ *************************
369 ********************************************************************************
370 */
371span<char16_t> Characters::Private_::WindowsNative_::Bytes2Characters (span<const byte>* from, span<char16_t> to) const
372{
373 RequireNotNull (from);
374 Require (ComputeTargetCharacterBufferSize (*from) <= to.size ());
375 static constexpr DWORD kFLAGS_ = MB_ERR_INVALID_CHARS;
376 int r = ::MultiByteToWideChar (fCodePage_, kFLAGS_, reinterpret_cast<LPCCH> (from->data ()), static_cast<int> (from->size ()),
377 reinterpret_cast<LPWSTR> (to.data ()), static_cast<int> (to.size ()));
378 if (r == 0) {
379 if (from->empty ()) {
380 return span<char16_t>{}; // OK - empty from produces empty to
381 }
382 else {
383 Execution::ThrowSystemErrNo ();
384 }
385 }
386 else {
387 return to.subspan (0, static_cast<size_t> (r));
388 }
389}
390
391span<byte> Characters::Private_::WindowsNative_::Characters2Bytes (span<const char16_t> from, span<byte> to) const
392{
393 Require (ComputeTargetByteBufferSize (from) <= to.size ());
394 static constexpr DWORD kFLAGS_ = 0; // WC_ERR_INVALID_CHARS doesn't work (https://learn.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-widechartomultibyte), so must use lpUsedDefaultChar
395 BOOL usedDefaultChar{false};
396 int r = ::WideCharToMultiByte (fCodePage_, kFLAGS_, reinterpret_cast<LPCWCH> (from.data ()), static_cast<int> (from.size ()),
397 reinterpret_cast<LPSTR> (to.data ()), static_cast<int> (to.size ()), nullptr, &usedDefaultChar);
398 if (usedDefaultChar) {
399 Execution::ThrowSystemErrNo (ERROR_NO_UNICODE_TRANSLATION);
400 }
401 if (r == 0) {
402 if (from.empty ()) {
403 return span<byte>{}; // OK - empty from produces empty to
404 }
405 else {
406 Execution::ThrowSystemErrNo ();
407 }
408 }
409 else {
410 Assert (r > 0);
411 return to.subspan (0, static_cast<size_t> (r));
412 }
413}
414
415size_t Characters::Private_::WindowsNative_::ComputeTargetCharacterBufferSize (variant<span<const byte>, size_t> src) const
416{
417 if (const size_t* i = get_if<size_t> (&src)) {
418 return *i;
419 }
420 else {
421 auto s = get<span<const byte>> (src);
422 static constexpr DWORD kFLAGS_ = MB_ERR_INVALID_CHARS;
423 int r = ::MultiByteToWideChar (fCodePage_, kFLAGS_, reinterpret_cast<LPCCH> (s.data ()), static_cast<int> (s.size ()), nullptr, 0);
424 Assert (r >= 0);
425 if (r == 0) {
426 if (s.size () == 0) {
427 return 0;
428 }
429 else {
430 Execution::ThrowSystemErrNo ();
431 }
432 }
433 else {
434 return static_cast<size_t> (r);
435 }
436 }
437}
438
439size_t Characters::Private_::WindowsNative_::ComputeTargetByteBufferSize (variant<span<const char16_t>, size_t> src) const
440{
441 if (const size_t* i = get_if<size_t> (&src)) {
442 constexpr size_t kMaxBytesPerCharWAG_ = 6; // @todo improve this WAG, or find some reference/basis, but I think for UNICODE (excluding UTF-7) - I think this number is 4
443 return *i * kMaxBytesPerCharWAG_;
444 }
445 else {
446 auto s = get<span<const char16_t>> (src);
447 static constexpr DWORD kFLAGS_ = 0; // WC_ERR_INVALID_CHARS doesn't work (https://learn.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-widechartomultibyte)
448 int r = ::WideCharToMultiByte (fCodePage_, kFLAGS_, reinterpret_cast<LPCWCH> (s.data ()), static_cast<int> (s.size ()), nullptr, 0,
449 nullptr, nullptr);
450 Assert (r >= 0);
451 if (r == 0) {
452 if (s.size () == 0) {
453 return 0;
454 }
455 else {
456 Execution::ThrowSystemErrNo ();
457 }
458 }
459 else {
460 Assert (r > 0);
461 return static_cast<size_t> (r);
462 }
463 }
464}
465#endif
#define AssertNotNull(p)
Definition Assertions.h:333
#define RequireNotNull(p)
Definition Assertions.h:347
#define Verify(c)
Definition Assertions.h:419
An error occurred encoding or decoding a character
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
nonvirtual string AsNarrowSDKString() const
Definition String.inl:830
nonvirtual size_t size() const noexcept
Definition String.inl:534
void Throw(T &&e2Throw)
identical to builtin C++ 'throw' except that it does helpful, type dependent DbgTrace() messages firs...
Definition Throw.inl:43