Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
CodePage.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
6#include <algorithm>
7#include <mutex>
8#include <set>
9
14#include "Stroika/Foundation/Common/Common.h"
15#include "Stroika/Foundation/Containers/Common.h"
16#include "Stroika/Foundation/Execution/Common.h"
17#include "Stroika/Foundation/Execution/Exceptions.h"
19
20#include "CodePage.h"
21
22using std::byte;
23
24using namespace Stroika::Foundation;
26using namespace Stroika::Foundation::Memory;
27
28// Comment this in to turn on aggressive noisy DbgTrace in this module
29//#define USE_NOISY_TRACE_IN_THIS_MODULE_ 1
30
31#ifndef qBuildInTableDrivenCodePageBuilderProc
32#define qBuildInTableDrivenCodePageBuilderProc 0
33#endif
34
35/*
36 * Hack to build 'myiswalpha' and 'myiswpunct' for SPR#1220 (revised and moved here for SPR#1306 and class
37 * 'CharacterProperties').
38 */
39#ifndef qBuildMemoizedISXXXBuilderProc
40#define qBuildMemoizedISXXXBuilderProc 0
41#endif
42
43/*
44 * Use this to test my IsWXXX functions produce the right results. Only test under WinXP,
45 * since that is the reference they are copying (SPR#1229).
46 */
47#ifndef qTestMyISWXXXFunctions
48#define qTestMyISWXXXFunctions 0
49#endif
50
51#if qBuildInTableDrivenCodePageBuilderProc || qBuildMemoizedISXXXBuilderProc
52#include <fstream>
53#endif
54
55using namespace Characters;
56
57#if qStroika_Foundation_Common_Platform_Windows
58namespace {
59 inline const wchar_t* SAFE_WIN_WCHART_CAST_ (const char16_t* t)
60 {
61 return reinterpret_cast<const wchar_t*> (t);
62 }
63 inline wchar_t* SAFE_WIN_WCHART_CAST_ (char16_t* t)
64 {
65 return reinterpret_cast<wchar_t*> (t);
66 }
67 inline const char16_t* SAFE_WIN_WCHART_CAST_ (const wchar_t* t)
68 {
69 return reinterpret_cast<const char16_t*> (t);
70 }
71 inline char16_t* SAFE_WIN_WCHART_CAST_ (wchar_t* t)
72 {
73 return reinterpret_cast<char16_t*> (t);
74 }
75}
76#endif
77
78/*
79 ********************************************************************************
80 **************************** Characters::GetCharsetString **********************
81 ********************************************************************************
82 */
83wstring Characters::GetCharsetString (CodePage cp)
84{
85 switch (cp) {
86 case WellKnownCodePages::kUTF8:
87 return L"utf-8";
88 default:
89 return Characters::CString::Format (L"CodePage %d", cp);
90 }
91}
92
93namespace {
94 template <CodePage CODEPAGE>
95 class TableDrivenCodePageConverter_ {
96 public:
97 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt);
98 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt);
99 };
100 inline void MapToUNICODEFromTable_ (const char16_t kMap_[256], const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
101 {
102 size_t nCharsToCopy = min (inMBCharCnt, *outCharCnt);
103 for (size_t i = 0; i < nCharsToCopy; ++i) {
104 outChars[i] = kMap_[(unsigned char)inMBChars[i]];
105 }
106 *outCharCnt = nCharsToCopy;
107 }
108 inline void MapFromUNICODEFromTable_ (const char16_t kMap_[256], const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
109 {
110 size_t nCharsToCopy = min (inCharCnt, *outCharCnt);
111 for (size_t i = 0; i < nCharsToCopy; ++i) {
112 size_t j = 0;
113 for (; j < 256; ++j) {
114 if (kMap_[j] == inChars[i]) {
115 outChars[i] = static_cast<char> (j);
116 break;
117 }
118 }
119 if (j == 256) {
120 /*Perhaps should add extra args to this function - as in Win32 API - for handling this missing-char case*/
121 outChars[i] = '?';
122 }
123 }
124 *outCharCnt = nCharsToCopy;
125 }
126 template <>
127 class TableDrivenCodePageConverter_<WellKnownCodePages::kANSI> {
128 public:
129 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
130 {
131 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
132 }
133 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
134 {
135 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
136 }
137
138 private:
139 static constexpr char16_t kMap_[256] = {
140 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
141 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
142 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
143 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
144 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
145 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
146 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
147 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
148 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x8d, 0x17d, 0x8f,
149 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x17e, 0x178,
150 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
151 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
152 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
153 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
154 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
155 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
156 };
157 };
158 template <>
159 class TableDrivenCodePageConverter_<WellKnownCodePages::kMAC> {
160 public:
161 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
162 {
163 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
164 }
165 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
166 {
167 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
168 }
169
170 private:
171 static constexpr char16_t kMap_[256] = {
172 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
173 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
174 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
175 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
176 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
177 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
178 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
179 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
180 0xc4, 0xc5, 0xc7, 0xc9, 0xd1, 0xd6, 0xdc, 0xe1, 0xe0, 0xe2, 0xe4, 0xe3, 0xe5, 0xe7, 0xe9, 0xe8,
181 0xea, 0xeb, 0xed, 0xec, 0xee, 0xef, 0xf1, 0xf3, 0xf2, 0xf4, 0xf6, 0xf5, 0xfa, 0xf9, 0xfb, 0xfc,
182 0x2020, 0xb0, 0xa2, 0xa3, 0xa7, 0x2022, 0xb6, 0xdf, 0xae, 0xa9, 0x2122, 0xb4, 0xa8, 0x2260, 0xc6, 0xd8,
183 0x221e, 0xb1, 0x2264, 0x2265, 0xa5, 0xb5, 0x2202, 0x2211, 0x220f, 0x3c0, 0x222b, 0xaa, 0xba, 0x2126, 0xe6, 0xf8,
184 0xbf, 0xa1, 0xac, 0x221a, 0x192, 0x2248, 0x2206, 0xab, 0xbb, 0x2026, 0xa0, 0xc0, 0xc3, 0xd5, 0x152, 0x153,
185 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0xf7, 0x25ca, 0xff, 0x178, 0x2044, 0x20ac, 0x2039, 0x203a, 0xfb01, 0xfb02,
186 0x2021, 0xb7, 0x201a, 0x201e, 0x2030, 0xc2, 0xca, 0xc1, 0xcb, 0xc8, 0xcd, 0xce, 0xcf, 0xcc, 0xd3, 0xd4,
187 0xf8ff, 0xd2, 0xda, 0xdb, 0xd9, 0x131, 0x2c6, 0x2dc, 0xaf, 0x2d8, 0x2d9, 0x2da, 0xb8, 0x2dd, 0x2db, 0x2c7,
188 };
189 };
190 template <>
191 class TableDrivenCodePageConverter_<WellKnownCodePages::kPC> {
192 public:
193 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
194 {
195 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
196 }
197 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
198 {
199 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
200 }
201
202 private:
203 static constexpr char16_t kMap_[256] = {
204 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
205 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
206 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
207 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
208 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
209 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
210 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
211 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
212 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5,
213 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, 0xff, 0xd6, 0xdc, 0xa2, 0xa3, 0xa5, 0x20a7, 0x192,
214 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, 0xbf, 0x2310, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb,
215 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510,
216 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567,
217 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
218 0x3b1, 0xdf, 0x393, 0x3c0, 0x3a3, 0x3c3, 0xb5, 0x3c4, 0x3a6, 0x398, 0x3a9, 0x3b4, 0x221e, 0x3c6, 0x3b5, 0x2229,
219 0x2261, 0xb1, 0x2265, 0x2264, 0x2320, 0x2321, 0xf7, 0x2248, 0xb0, 0x2219, 0xb7, 0x221a, 0x207f, 0xb2, 0x25a0, 0xa0,
220 };
221 };
222 template <>
223 class TableDrivenCodePageConverter_<WellKnownCodePages::kPCA> {
224 public:
225 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
226 {
227 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
228 }
229 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
230 {
231 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
232 }
233
234 private:
235 static constexpr char16_t kMap_[256] = {
236 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
237 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
238 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
239 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
240 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
241 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
242 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
243 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
244 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5,
245 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, 0xff, 0xd6, 0xdc, 0xf8, 0xa3, 0xd8, 0xd7, 0x192,
246 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, 0xbf, 0xae, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb,
247 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0xc1, 0xc2, 0xc0, 0xa9, 0x2563, 0x2551, 0x2557, 0x255d, 0xa2, 0xa5, 0x2510,
248 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0xe3, 0xc3, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0xa4,
249 0xf0, 0xd0, 0xca, 0xcb, 0xc8, 0x131, 0xcd, 0xce, 0xcf, 0x2518, 0x250c, 0x2588, 0x2584, 0xa6, 0xcc, 0x2580,
250 0xd3, 0xdf, 0xd4, 0xd2, 0xf5, 0xd5, 0xb5, 0xfe, 0xde, 0xda, 0xdb, 0xd9, 0xfd, 0xdd, 0xaf, 0xb4,
251 0xad, 0xb1, 0x2017, 0xbe, 0xb6, 0xa7, 0xf7, 0xb8, 0xb0, 0xa8, 0xb7, 0xb9, 0xb3, 0xb2, 0x25a0, 0xa0,
252 };
253 };
254 template <>
255 class TableDrivenCodePageConverter_<WellKnownCodePages::kGreek> {
256 public:
257 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
258 {
259 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
260 }
261 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
262 {
263 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
264 }
265
266 private:
267 static constexpr char16_t kMap_[256] = {
268 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
269 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
270 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
271 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
272 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
273 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
274 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
275 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
276 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x88, 0x2030, 0x8a, 0x2039, 0x8c, 0x8d, 0x8e, 0x8f,
277 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x98, 0x2122, 0x9a, 0x203a, 0x9c, 0x9d, 0x9e, 0x9f,
278 0xa0, 0x385, 0x386, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xf8f9, 0xab, 0xac, 0xad, 0xae, 0x2015,
279 0xb0, 0xb1, 0xb2, 0xb3, 0x384, 0xb5, 0xb6, 0xb7, 0x388, 0x389, 0x38a, 0xbb, 0x38c, 0xbd, 0x38e, 0x38f,
280 0x390, 0x391, 0x392, 0x393, 0x394, 0x395, 0x396, 0x397, 0x398, 0x399, 0x39a, 0x39b, 0x39c, 0x39d, 0x39e, 0x39f,
281 0x3a0, 0x3a1, 0xf8fa, 0x3a3, 0x3a4, 0x3a5, 0x3a6, 0x3a7, 0x3a8, 0x3a9, 0x3aa, 0x3ab, 0x3ac, 0x3ad, 0x3ae, 0x3af,
282 0x3b0, 0x3b1, 0x3b2, 0x3b3, 0x3b4, 0x3b5, 0x3b6, 0x3b7, 0x3b8, 0x3b9, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3be, 0x3bf,
283 0x3c0, 0x3c1, 0x3c2, 0x3c3, 0x3c4, 0x3c5, 0x3c6, 0x3c7, 0x3c8, 0x3c9, 0x3ca, 0x3cb, 0x3cc, 0x3cd, 0x3ce, 0xf8fb,
284 };
285 };
286 template <>
287 class TableDrivenCodePageConverter_<WellKnownCodePages::kTurkish> {
288 public:
289 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
290 {
291 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
292 }
293 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
294 {
295 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
296 }
297
298 private:
299 static constexpr char16_t kMap_[256] = {
300 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
301 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
302 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
303 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
304 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
305 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
306 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
307 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
308 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x8d, 0x8e, 0x8f,
309 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x9e, 0x178,
310 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
311 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
312 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
313 0x11e, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x130, 0x15e, 0xdf,
314 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
315 0x11f, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x131, 0x15f, 0xff,
316 };
317 };
318 template <>
319 class TableDrivenCodePageConverter_<WellKnownCodePages::kHebrew> {
320 public:
321 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
322 {
323 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
324 }
325 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
326 {
327 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
328 }
329
330 private:
331 static constexpr char16_t kMap_[256] = {
332 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
333 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
334 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
335 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
336 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
337 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
338 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
339 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
340 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x8a, 0x2039, 0x8c, 0x8d, 0x8e, 0x8f,
341 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x2dc, 0x2122, 0x9a, 0x203a, 0x9c, 0x9d, 0x9e, 0x9f,
342 0xa0, 0xa1, 0xa2, 0xa3, 0x20aa, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xd7, 0xab, 0xac, 0xad, 0xae, 0xaf,
343 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xf7, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
344 0x5b0, 0x5b1, 0x5b2, 0x5b3, 0x5b4, 0x5b5, 0x5b6, 0x5b7, 0x5b8, 0x5b9, 0x5ba, 0x5bb, 0x5bc, 0x5bd, 0x5be, 0x5bf,
345 0x5c0, 0x5c1, 0x5c2, 0x5c3, 0x5f0, 0x5f1, 0x5f2, 0x5f3, 0x5f4, 0xf88d, 0xf88e, 0xf88f, 0xf890, 0xf891, 0xf892, 0xf893,
346 0x5d0, 0x5d1, 0x5d2, 0x5d3, 0x5d4, 0x5d5, 0x5d6, 0x5d7, 0x5d8, 0x5d9, 0x5da, 0x5db, 0x5dc, 0x5dd, 0x5de, 0x5df,
347 0x5e0, 0x5e1, 0x5e2, 0x5e3, 0x5e4, 0x5e5, 0x5e6, 0x5e7, 0x5e8, 0x5e9, 0x5ea, 0xf894, 0xf895, 0x200e, 0x200f, 0xf896,
348 };
349 };
350 template <>
351 class TableDrivenCodePageConverter_<WellKnownCodePages::kArabic> {
352 public:
353 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
354 {
355 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
356 }
357 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
358 {
359 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
360 }
361
362 private:
363 static constexpr char16_t kMap_[256] = {
364 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
365 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
366 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
367 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
368 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
369 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
370 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
371 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
372 0x20ac, 0x67e, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x679, 0x2039, 0x152, 0x686, 0x698, 0x688,
373 0x6af, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x6a9, 0x2122, 0x691, 0x203a, 0x153, 0x200c, 0x200d, 0x6ba,
374 0xa0, 0x60c, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x6be, 0xab, 0xac, 0xad, 0xae, 0xaf,
375 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0x61b, 0xbb, 0xbc, 0xbd, 0xbe, 0x61f,
376 0x6c1, 0x621, 0x622, 0x623, 0x624, 0x625, 0x626, 0x627, 0x628, 0x629, 0x62a, 0x62b, 0x62c, 0x62d, 0x62e, 0x62f,
377 0x630, 0x631, 0x632, 0x633, 0x634, 0x635, 0x636, 0xd7, 0x637, 0x638, 0x639, 0x63a, 0x640, 0x641, 0x642, 0x643,
378 0xe0, 0x644, 0xe2, 0x645, 0x646, 0x647, 0x648, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0x649, 0x64a, 0xee, 0xef,
379 0x64b, 0x64c, 0x64d, 0x64e, 0xf4, 0x64f, 0x650, 0xf7, 0x651, 0xf9, 0x652, 0xfb, 0xfc, 0x200e, 0x200f, 0x6d2,
380 };
381 };
382}
383
384namespace {
385
386#if qBuildInTableDrivenCodePageBuilderProc
387 static void WriteCodePageTable (CodePage codePage);
388
389 struct DoRunIt {
390 DoRunIt ()
391 {
392 //WriteCodePageTable (WellKnownCodePages::kANSI);
393 //WriteCodePageTable (WellKnownCodePages::kMAC);
394 //WriteCodePageTable (WellKnownCodePages::kPC);
395 //WriteCodePageTable (WellKnownCodePages::kPCA);
396 //WriteCodePageTable (WellKnownCodePages::kGreek);
397 //WriteCodePageTable (WellKnownCodePages::kTurkish);
398 //WriteCodePageTable (WellKnownCodePages::kHebrew);
399 //WriteCodePageTable (WellKnownCodePages::kArabic);
400 }
401 } gRunIt;
402#endif
403
404#if qBuildMemoizedISXXXBuilderProc
405 template <typename FUNCTION>
406 void WriteMemoizedIsXXXProc (FUNCTION function, const string& origFunctionName, const string& functionName)
407 {
408 ofstream outStream ("IsXXXProc.txt");
409
410 outStream << "bool " << functionName << " (wchar_t c)\n";
411 outStream << "{\n";
412
413 outStream << "\t// ********** " << functionName << " (AUTOGENERATED memoize of " << origFunctionName << " - " << __DATE__ << ") ***********\n";
414 outStream << "\t// Hack for SPR#1220 and SPR#1306\n";
415
416 // SPR#1308 - the generated if/then skipchain can be somewhat long, and therefore inefficient.
417 // This is needlessly so. In principle - we could break up the long skipchain into many smaller ones
418 // nested hierarchically. This would involve first pre-computing the entire list of regions, and
419 // then generating the if/then code recursively since before you can generate INNER code you need
420 // to know the full range of codepoints contained in all contained ifs.
421 //
422 // Anyhow - there is a trivial implementation that gets us most of the speedup we seek - since most
423 // characters looked up fall in the 0..256 range. So - just handle that specially.
424 //
425
426 const wchar_t kBoundaryForSpecialLoop1 = 255;
427 const wchar_t kBoundaryForSpecialLoop2 = 5000;
428
429 for (int j = 0; j <= 2; ++j) {
430
431 if (j == 0) {
432 outStream << "\tif (c < " << int (kBoundaryForSpecialLoop1) << ") {\n";
433 }
434 else if (j == 1) {
435 outStream << "\telse if (c < " << int (kBoundaryForSpecialLoop2) << ") {\n";
436 }
437 else {
438 outStream << "\telse {\n";
439 }
440
441 outStream << "\t\tif (";
442 bool firstTime = true;
443 bool hasLastTrue = false;
444 size_t firstRangeIdxTrue = 0;
445 size_t startLoop = 0;
446 size_t endLoop = 0;
447 if (j == 0) {
448 endLoop = kBoundaryForSpecialLoop1;
449 }
450 else if (j == 1) {
451 startLoop = kBoundaryForSpecialLoop1;
452 endLoop = kBoundaryForSpecialLoop2;
453 }
454 else {
455 startLoop = kBoundaryForSpecialLoop2;
456 endLoop = 256 * 256;
457 }
458 for (size_t i = startLoop; i < endLoop; ++i) {
459 bool isT = function (static_cast<wchar_t> (i));
460
461 if (((not isT) or (i + 1 == endLoop)) and hasLastTrue) {
462 // then emit the range...
463 if (not firstTime) {
464 outStream << "\t\t\t||";
465 }
466 size_t rangeEnd = isT ? i : i - 1;
467 outStream << "(" << firstRangeIdxTrue << " <= c && c <= " << rangeEnd << ")";
468 firstTime = false;
469 outStream << "\n";
470 hasLastTrue = false;
471 }
472 if (isT and not hasLastTrue) {
473 firstRangeIdxTrue = i;
474 hasLastTrue = true;
475 }
476 }
477 if (firstTime) {
478 outStream << "false";
479 }
480
481 outStream << "\t\t\t) {\n";
482 outStream << "\t\t\t#if qTestMyISWXXXFunctions\n";
483 outStream << "\t\t\tAssert (" << origFunctionName << "(c));\n";
484 outStream << "\t\t\t#endif\n";
485 outStream << "\t\t\treturn true;\n";
486 outStream << "\t\t}\n";
487
488 outStream << "\t}\n";
489 }
490 outStream << "\t#if qTestMyISWXXXFunctions\n";
491 outStream << "\tAssert (!" << origFunctionName << "(c));\n";
492 outStream << "\t#endif\n";
493 outStream << "\treturn false;\n";
494 outStream << "}\n";
495 outStream << "\n";
496 }
497
498 struct DoRunIt {
499 DoRunIt ()
500 {
501 WriteMemoizedIsXXXProc (iswalpha, "iswalpha", "CharacterProperties::IsAlpha_M");
502 //WriteMemoizedIsXXXProc (iswalnum, "iswalnum", "CharacterProperties::Ialnum_M");
503 //WriteMemoizedIsXXXProc (iswpunct, "iswpunct", "CharacterProperties::IsPunct_M");
504 //WriteMemoizedIsXXXProc (iswspace, "iswspace", "CharacterProperties::IsSpace_M");
505 //WriteMemoizedIsXXXProc (iswcntrl, "iswcntrl", "CharacterProperties::IsCntrl_M");
506 //WriteMemoizedIsXXXProc (iswdigit, "iswdigit", "CharacterProperties::IsDigit_M");
507 }
508 } gRunIt;
509#endif
510
511#if qTestMyISWXXXFunctions
512 class MyIsWXXXTesterFunctions {
513 public:
514 MyIsWXXXTesterFunctions ()
515 {
516 for (wchar_t c = 0; c < 0xffff; ++c) {
517 Assert (CharacterProperties::IsAlpha_M (c) == !!iswalpha (c));
518 Assert (CharacterProperties::IsAlnum_M (c) == !!iswalnum (c));
519 Assert (CharacterProperties::IsPunct_M (c) == !!iswpunct (c));
520 Assert (CharacterProperties::IsSpace_M (c) == !!iswspace (c));
521 Assert (CharacterProperties::IsCntrl_M (c) == !!iswcntrl (c));
522 Assert (CharacterProperties::IsDigit_M (c) == !!iswdigit (c));
523 }
524 }
525 } sMyIsWXXXTesterFunctions;
526#endif
527}
528
529/*
530 ********************************************************************************
531 ******************** CodePageNotSupportedException *****************************
532 ********************************************************************************
533 */
534CodePageNotSupportedException::CodePageNotSupportedException (CodePage codePage)
535 : fMsg_{"Code page {} not supported"_f(codePage).AsNarrowSDKString ()}
536 , fCodePage_{codePage}
537{
538}
539
540const char* CodePageNotSupportedException::what () const noexcept
541{
542 return fMsg_.c_str ();
543}
544
545/*
546 ********************************************************************************
547 ******************************** CodePageConverter *****************************
548 ********************************************************************************
549 */
550DISABLE_COMPILER_MSC_WARNING_START (4996);
551DISABLE_COMPILER_GCC_WARNING_START ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
552DISABLE_COMPILER_CLANG_WARNING_START ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
553size_t CodePageConverter::MapFromUNICODE_QuickComputeOutBufSize (const wchar_t* inChars, size_t inCharCnt) const
554{
555 size_t resultSize;
556 switch (fCodePage) {
557 case WellKnownCodePages::kANSI:
558 resultSize = inCharCnt * 1;
559 break;
560 case WellKnownCodePages::kMAC:
561 resultSize = inCharCnt * 1;
562 break;
563 case WellKnownCodePages::kPC:
564 resultSize = inCharCnt * 1;
565 break;
566 case WellKnownCodePages::kPCA:
567 resultSize = inCharCnt * 1;
568 break;
569 case WellKnownCodePages::kSJIS:
570 resultSize = inCharCnt * 2;
571 break;
572 break; // ITHINK thats right... BOM appears to be 5 chars long? LGP 2001-09-11
573 case WellKnownCodePages::kUTF8:
574 resultSize = UTFConvert::ComputeTargetBufferSize<char8_t> (span{inChars, inChars + inCharCnt});
575 default:
576 resultSize = inCharCnt * 8;
577 break; // I THINK that should always be enough - but who knows...
578 }
579 if (GetHandleBOM ()) {
580 switch (fCodePage) {
581 case WellKnownCodePages::kUNICODE_WIDE:
582 case WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN: {
583 // BOM (byte order mark)
584 resultSize += 2;
585 } break;
586 case WellKnownCodePages::kUTF8: {
587 resultSize += 3; // BOM (byte order mark)
588 }
589 }
590 }
591 return resultSize;
592}
593
594void CodePageConverter::MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt) const
595{
596 Require (inMBCharCnt == 0 or inMBChars != nullptr);
597 RequireNotNull (outCharCnt);
598 Require (*outCharCnt == 0 or outChars != nullptr);
599
600 if (GetHandleBOM ()) {
601 size_t bytesToStrip = 0;
602 if (CodePagesGuesser{}.Guess (inMBChars, inMBCharCnt, nullptr, &bytesToStrip) == fCodePage) {
603 Assert (inMBCharCnt >= bytesToStrip);
604 inMBChars += bytesToStrip;
605 inMBCharCnt -= bytesToStrip;
606 }
607 }
608 switch (fCodePage) {
609 case WellKnownCodePages::kANSI:
610 TableDrivenCodePageConverter_<WellKnownCodePages::kANSI>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
611 break;
612 case WellKnownCodePages::kMAC:
613 TableDrivenCodePageConverter_<WellKnownCodePages::kMAC>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
614 break;
615 case WellKnownCodePages::kPC:
616 TableDrivenCodePageConverter_<WellKnownCodePages::kPC>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
617 break;
618 case WellKnownCodePages::kPCA:
619 TableDrivenCodePageConverter_<WellKnownCodePages::kPCA>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
620 break;
621 case WellKnownCodePages::kGreek:
622 TableDrivenCodePageConverter_<WellKnownCodePages::kGreek>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
623 break;
624 case WellKnownCodePages::kTurkish:
625 TableDrivenCodePageConverter_<WellKnownCodePages::kTurkish>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
626 break;
627 case WellKnownCodePages::kHebrew:
628 TableDrivenCodePageConverter_<WellKnownCodePages::kHebrew>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
629 break;
630 case WellKnownCodePages::kArabic:
631 TableDrivenCodePageConverter_<WellKnownCodePages::kArabic>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
632 break;
633 case WellKnownCodePages::kUNICODE_WIDE: {
634 const wchar_t* inWChars = reinterpret_cast<const wchar_t*> (inMBChars);
635 size_t inWCharCnt = (inMBCharCnt / sizeof (wchar_t));
636 *outCharCnt = inWCharCnt;
637 (void)::memcpy (outChars, inWChars, inWCharCnt * sizeof (wchar_t));
638 } break;
639 case WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN: {
640 const wchar_t* inWChars = reinterpret_cast<const wchar_t*> (inMBChars);
641 size_t inWCharCnt = (inMBCharCnt / sizeof (wchar_t));
642 *outCharCnt = inWCharCnt;
643 for (size_t i = 0; i < inWCharCnt; ++i) {
644 wchar_t c = inWChars[i];
645 // byteswap
646 c = ((c & 0xff) << 8) + (c >> 8);
647 outChars[i] = c;
648 }
649 } break;
650 case WellKnownCodePages::kUTF8: {
651 *outCharCnt = UTFConvert::kThe.Convert (span{inMBChars, inMBChars + inMBCharCnt}, span{outChars, *outCharCnt}).fTargetProduced;
652 } break;
653 default: {
654#if qStroika_Foundation_Common_Platform_Windows
655 Characters::Platform::Windows::PlatformCodePageConverter{fCodePage}.MapToUNICODE (inMBChars, inMBCharCnt,
656 SAFE_WIN_WCHART_CAST_ (outChars), outCharCnt);
657#else
659#endif
660 } break;
661 }
662
663#if qStroika_Foundation_Common_Platform_Windows && 0
665 // Assure my baked tables (and UTF8 converters) perform the same as the builtin Win32 API
666 size_t tstCharCnt = *outCharCnt;
667 StackBuffer<wchar_t> tstBuf{Memory::eUninitialized, *outCharCnt};
668 Characters::Platform::Windows::PlatformCodePageConverter{fCodePage}.MapToUNICODE (inMBChars, inMBCharCnt, tstBuf, &tstCharCnt);
669 Assert (tstCharCnt == *outCharCnt);
670 Assert (memcmp (tstBuf, outChars, sizeof (wchar_t) * tstCharCnt) == 0);
671 }
672#endif
673}
674
675void CodePageConverter::MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char32_t* outChars, size_t* outCharCnt) const
676{
677 // Not really right - but hopefully adquate for starters -- LGP 2011-09-06
678 StackBuffer<char16_t> tmpBuf{Memory::eUninitialized, *outCharCnt};
679 MapToUNICODE (inMBChars, inMBCharCnt, tmpBuf.data (), outCharCnt);
680 for (size_t i = 0; i < *outCharCnt; ++i) {
681 outChars[i] = tmpBuf[i];
682 }
683}
684
685void CodePageConverter::MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt) const
686{
687 Require (inCharCnt == 0 or inChars != nullptr);
688 RequireNotNull (outCharCnt);
689 Require (*outCharCnt == 0 or outChars != nullptr);
690
691 [[maybe_unused]] size_t outBufferSize = *outCharCnt;
692#if qStroika_Foundation_Debug_AssertionsChecked && qStroika_Foundation_Common_Platform_Windows
693 size_t countOfBOMCharsAdded = 0; // just for the Windows debug check at the end
694#endif
695
696 switch (fCodePage) {
697 case WellKnownCodePages::kANSI:
698 TableDrivenCodePageConverter_<WellKnownCodePages::kANSI>::MapFromUNICODE (inChars, inCharCnt, outChars, outCharCnt);
699 break;
700 case WellKnownCodePages::kMAC:
701 TableDrivenCodePageConverter_<WellKnownCodePages::kMAC>::MapFromUNICODE (inChars, inCharCnt, outChars, outCharCnt);
702 break;
703 case WellKnownCodePages::kPC:
704 TableDrivenCodePageConverter_<WellKnownCodePages::kPC>::MapFromUNICODE (inChars, inCharCnt, outChars, outCharCnt);
705 break;
706 case WellKnownCodePages::kPCA:
707 TableDrivenCodePageConverter_<WellKnownCodePages::kPCA>::MapFromUNICODE (inChars, inCharCnt, outChars, outCharCnt);
708 break;
709 case WellKnownCodePages::kUNICODE_WIDE: {
710 if (*outCharCnt >= 2) {
711 wchar_t* outWBytes = reinterpret_cast<wchar_t*> (outChars);
712 size_t outByteCount = (inCharCnt * sizeof (wchar_t));
713 if (GetHandleBOM ()) {
714 ++outWBytes; // skip BOM
715 outByteCount -= 2; // subtract for BOM
716 }
717 *outCharCnt = outByteCount;
718 if (GetHandleBOM ()) {
719 outChars[0] = '\xff';
720 outChars[1] = '\xfe';
721#if qStroika_Foundation_Debug_AssertionsChecked && qStroika_Foundation_Common_Platform_Windows
722 countOfBOMCharsAdded = 2;
723#endif
724 }
725 (void)::memcpy (outWBytes, inChars, inCharCnt * sizeof (wchar_t));
726 }
727 else {
728 *outCharCnt = 0;
729 }
730 } break;
731 case WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN: {
732 if (*outCharCnt >= 2) {
733 wchar_t* outWBytes = reinterpret_cast<wchar_t*> (outChars);
734 size_t outByteCount = (inCharCnt * sizeof (wchar_t));
735 if (GetHandleBOM ()) {
736 ++outWBytes; // skip BOM
737 outByteCount -= 2; // subtract for BOM
738 }
739 *outCharCnt = outByteCount;
740 if (GetHandleBOM ()) {
741 outChars[0] = '\xfe';
742 outChars[1] = '\xff';
743#if qStroika_Foundation_Debug_AssertionsChecked && qStroika_Foundation_Common_Platform_Windows
744 countOfBOMCharsAdded = 2;
745#endif
746 }
747 for (size_t i = 0; i < inCharCnt; ++i) {
748 wchar_t c = inChars[i];
749 // byteswap
750 c = ((c & 0xff) << 8) + (c >> 8);
751 outWBytes[i] = c;
752 }
753 }
754 else {
755 *outCharCnt = 0;
756 }
757 } break;
758 case WellKnownCodePages::kUTF8: {
759 char* useOutChars = outChars;
760 size_t useOutCharCount = *outCharCnt;
761 if (GetHandleBOM ()) {
762 if (*outCharCnt >= 3) {
763 useOutChars += 3; // skip BOM
764 useOutCharCount -= 3;
765 reinterpret_cast<unsigned char*> (outChars)[0] = 0xef;
766 reinterpret_cast<unsigned char*> (outChars)[1] = 0xbb;
767 reinterpret_cast<unsigned char*> (outChars)[2] = 0xbf;
768#if qStroika_Foundation_Debug_AssertionsChecked && qStroika_Foundation_Common_Platform_Windows
769 countOfBOMCharsAdded = 3;
770#endif
771 }
772 else {
773 useOutCharCount = 0;
774 }
775 }
776 useOutCharCount =
777 UTFConvert::kThe.Convert (span{inChars, inCharCnt}, span{reinterpret_cast<char8_t*> (useOutChars), useOutCharCount}).fTargetProduced;
778 if (GetHandleBOM ()) {
779 useOutCharCount += 3;
780 }
781 *outCharCnt = useOutCharCount;
782 } break;
783 default: {
784#if qStroika_Foundation_Common_Platform_Windows
785 Characters::Platform::Windows::PlatformCodePageConverter{fCodePage}.MapFromUNICODE (SAFE_WIN_WCHART_CAST_ (inChars), inCharCnt,
786 outChars, outCharCnt);
787#else
789#endif
790 }
791 }
792
793#if qStroika_Foundation_Common_Platform_Windows
795 // Assure my baked tables perform the same as the builtin Win32 API
796 size_t win32TstCharCnt = outBufferSize;
797 StackBuffer<char> win32TstBuf{Memory::eUninitialized, win32TstCharCnt};
798
799 Characters::Platform::Windows::PlatformCodePageConverter{fCodePage}.MapFromUNICODE (SAFE_WIN_WCHART_CAST_ (inChars), inCharCnt,
800 win32TstBuf.data (), &win32TstCharCnt);
801
802// SPR#0813 (and SPR#1277) - assert this produces the right result OR a '?' character -
803// used for bad conversions. Reason is cuz for characters that don't map - our table and
804// the system table can differ in how they map depending on current OS code page.
805#if qStroika_Foundation_Debug_AssertionsChecked
806 Assert ((win32TstCharCnt + countOfBOMCharsAdded) == *outCharCnt or outChars[0] == '?');
807 Assert (memcmp (win32TstBuf.data (), outChars + countOfBOMCharsAdded, win32TstCharCnt) == 0 or outChars[0] == '?');
808#endif
809 }
810#endif
811}
812
813void CodePageConverter::MapFromUNICODE (const char32_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt) const
814{
815 char* useOutChars = outChars;
816 size_t useOutCharCount = *outCharCnt;
817 bool addBOM = GetHandleBOM () and useOutCharCount >= 3;
818 if (addBOM) {
819 useOutChars += 3; // skip BOM
820 useOutCharCount -= 3;
821 reinterpret_cast<unsigned char*> (outChars)[0] = 0xef;
822 reinterpret_cast<unsigned char*> (outChars)[1] = 0xbb;
823 reinterpret_cast<unsigned char*> (outChars)[2] = 0xbf;
824 }
825 *outCharCnt = UTFConvert::kThe.Convert (span{inChars, inCharCnt}, span{reinterpret_cast<char8_t*> (useOutChars), useOutCharCount}).fTargetProduced;
826 if (addBOM) {
827 *outCharCnt += 3;
828 }
829}
830DISABLE_COMPILER_MSC_WARNING_END (4996);
831DISABLE_COMPILER_GCC_WARNING_END ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
832DISABLE_COMPILER_CLANG_WARNING_END ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
833
834/*
835 ********************************************************************************
836 ***************************** CodePagesInstalled *******************************
837 ********************************************************************************
838 */
839namespace {
840#if qStroika_Foundation_Common_Platform_Windows
841 shared_ptr<set<CodePage>> s_EnumCodePagesProc_Accumulator_;
842 BOOL FAR PASCAL EnumCodePagesProc_ (LPTSTR lpCodePageString)
843 {
844 s_EnumCodePagesProc_Accumulator_->insert (_ttoi (lpCodePageString));
845 return 1;
846 }
847#endif
848}
849
850CodePagesInstalled::CodePagesInstalled ()
851{
852 Assert (fCodePages_.size () == 0);
853
854 shared_ptr<set<CodePage>> accum = make_shared<set<CodePage>> ();
855#if qStroika_Foundation_Common_Platform_Windows
856 static mutex sCritSec_;
857 {
858 [[maybe_unused]] lock_guard critSec{sCritSec_};
859 Assert (s_EnumCodePagesProc_Accumulator_.get () == nullptr);
860 s_EnumCodePagesProc_Accumulator_ = accum;
861 ::EnumSystemCodePages (EnumCodePagesProc_, CP_INSTALLED);
862 s_EnumCodePagesProc_Accumulator_.reset ();
863 }
864#endif
865 // Add these 'fake' code pages - which I believe are always available, but never listed by EnumSystemCodePages()
866 accum->insert (WellKnownCodePages::kUNICODE_WIDE);
867 accum->insert (WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN);
868 accum->insert (WellKnownCodePages::kUTF8);
869 fCodePages_ = vector<CodePage>{accum->begin (), accum->end ()};
870}
871
872/*
873 ********************************************************************************
874 ********************************** CodePagesGuesser ****************************
875 ********************************************************************************
876 */
877DISABLE_COMPILER_MSC_WARNING_START (4996);
878DISABLE_COMPILER_GCC_WARNING_START ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
879DISABLE_COMPILER_CLANG_WARNING_START ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
880CodePage CodePagesGuesser::Guess (const void* input, size_t nBytes, Confidence* confidence, size_t* bytesFromFrontToStrip)
881{
882 if (confidence != nullptr) {
883 *confidence = Confidence::eLow;
884 }
885 if (bytesFromFrontToStrip != nullptr) {
886 *bytesFromFrontToStrip = 0;
887 }
888 if (nBytes >= 2) {
889 unsigned char c0 = reinterpret_cast<const unsigned char*> (input)[0];
890 unsigned char c1 = reinterpret_cast<const unsigned char*> (input)[1];
891 if (c0 == 0xff and c1 == 0xfe) {
892 if (confidence != nullptr) {
893 *confidence = Confidence::eHigh;
894 }
895 if (bytesFromFrontToStrip != nullptr) {
896 *bytesFromFrontToStrip = 2;
897 }
898 return WellKnownCodePages::kUNICODE_WIDE;
899 }
900 if (c0 == 0xfe and c1 == 0xff) {
901 if (confidence != nullptr) {
902 *confidence = Confidence::eHigh;
903 }
904 if (bytesFromFrontToStrip != nullptr) {
905 *bytesFromFrontToStrip = 2;
906 }
907 return WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN;
908 }
909 if (nBytes >= 3) {
910 unsigned char c2 = reinterpret_cast<const unsigned char*> (input)[2];
911 if (c0 == 0xef and c1 == 0xbb and c2 == 0xbf) {
912 if (confidence != nullptr) {
913 *confidence = Confidence::eHigh;
914 }
915 if (bytesFromFrontToStrip != nullptr) {
916 *bytesFromFrontToStrip = 3;
917 }
918 return WellKnownCodePages::kUTF8;
919 }
920 }
921 }
922
923 /*
924 * Final ditch efforts if we don't recognize any prefix.
925 */
926 if (confidence != nullptr) {
927 *confidence = Confidence::eLow;
928 }
929 return Characters::GetDefaultSDKCodePage ();
930}
931DISABLE_COMPILER_MSC_WARNING_END (4996);
932DISABLE_COMPILER_GCC_WARNING_END ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
933DISABLE_COMPILER_CLANG_WARNING_END ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
934
935/*
936 ********************************************************************************
937 ****************************** CodePagePrettyNameMapper ************************
938 ********************************************************************************
939 */
940CodePagePrettyNameMapper::CodePageNames CodePagePrettyNameMapper::sCodePageNames_ = CodePagePrettyNameMapper::MakeDefaultCodePageNames ();
941
942CodePagePrettyNameMapper::CodePageNames CodePagePrettyNameMapper::MakeDefaultCodePageNames ()
943{
944 CodePageNames codePageNames;
945 codePageNames.fUNICODE_WIDE = L"UNICODE {wide characters}"sv;
946 codePageNames.fUNICODE_WIDE_BIGENDIAN = L"UNICODE {wide characters - big endian}"sv;
947 codePageNames.fANSI = L"ANSI (1252)"sv;
948 codePageNames.fMAC = L"MAC (2)"sv;
949 codePageNames.fPC = L"IBM PC United States code page (437)"sv;
950 codePageNames.fSJIS = L"Japanese SJIS {932}"sv;
951 codePageNames.fUTF8 = L"UNICODE {UTF-8}"sv;
952 codePageNames.f850 = L"Latin I - MS-DOS Multilingual (850)"sv;
953 codePageNames.f851 = L"Latin II - MS-DOS Slavic (850)"sv;
954 codePageNames.f866 = L"Russian - MS-DOS (866)"sv;
955 codePageNames.f936 = L"Chinese {Simplfied} (936)"sv;
956 codePageNames.f949 = L"Korean (949)"sv;
957 codePageNames.f950 = L"Chinese {Traditional} (950)"sv;
958 codePageNames.f1250 = L"Eastern European Windows (1250)"sv;
959 codePageNames.f1251 = L"Cyrilic (1251)"sv;
960 codePageNames.f10000 = L"Roman {Macintosh} (10000)"sv;
961 codePageNames.f10001 = L"Japanese {Macintosh} (10001)"sv;
962 codePageNames.f50220 = L"Japanese JIS (50220)"sv;
963 return codePageNames;
964}
965
966wstring CodePagePrettyNameMapper::GetName (CodePage cp)
967{
968 switch (cp) {
969 case WellKnownCodePages::kUNICODE_WIDE:
970 return sCodePageNames_.fUNICODE_WIDE;
971 case WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN:
972 return sCodePageNames_.fUNICODE_WIDE_BIGENDIAN;
973 case WellKnownCodePages::kANSI:
974 return sCodePageNames_.fANSI;
975 case WellKnownCodePages::kMAC:
976 return sCodePageNames_.fMAC;
977 case WellKnownCodePages::kPC:
978 return sCodePageNames_.fPC;
979 case WellKnownCodePages::kSJIS:
980 return sCodePageNames_.fSJIS;
981 case WellKnownCodePages::kUTF8:
982 return sCodePageNames_.fUTF8;
983 case 850:
984 return sCodePageNames_.f850;
985 case 851:
986 return sCodePageNames_.f851;
987 case 866:
988 return sCodePageNames_.f866;
989 case 936:
990 return sCodePageNames_.f936;
991 case 949:
992 return sCodePageNames_.f949;
993 case 950:
994 return sCodePageNames_.f950;
995 case 1250:
996 return sCodePageNames_.f1250;
997 case WellKnownCodePages::kCyrilic:
998 return sCodePageNames_.f1251;
999 case 10000:
1000 return sCodePageNames_.f10000;
1001 case 10001:
1002 return sCodePageNames_.f10001;
1003 case 50220:
1004 return sCodePageNames_.f50220;
1005 default: {
1006 return Characters::CString::Format (L"%d", cp);
1007 }
1008 }
1009}
1010
1011/*
1012 ********************************************************************************
1013 *********************** Characters::WideStringToNarrow *************************
1014 ********************************************************************************
1015 */
1016void Characters::WideStringToNarrow (const wchar_t* wsStart, const wchar_t* wsEnd, CodePage codePage, string* intoResult)
1017{
1018 RequireNotNull (intoResult);
1019 Require (wsStart <= wsEnd);
1020#if qStroika_Foundation_Common_Platform_Windows
1021 Platform::Windows::WideStringToNarrow (wsStart, wsEnd, codePage, intoResult);
1022#else
1023 *intoResult = CodeCvt<wchar_t>{codePage}.String2Bytes<string> (span{wsStart, wsEnd});
1024#endif
1025}
1026
1027/*
1028 ********************************************************************************
1029 *********************** Characters::NarrowStringToWide *************************
1030 ********************************************************************************
1031 */
1032#if 0
1033namespace {
1034 void PortableNarrowStringToWide_ (const char* sStart, const char* sEnd, CodePage codePage, wstring* intoResult)
1035 {
1036 RequireNotNull (intoResult);
1037 Require (sStart <= sEnd);
1038 size_t inSize = sEnd - sStart;
1039 CodePageConverter cc{codePage};
1040 // this grossly overestimates size - which is a problem for the RESIZE below!!! COULD pointlessly run out of memroy and intitialize data to good values...
1041 size_t outSizeBuf = cc.MapToUNICODE_QuickComputeOutBufSize (sStart, inSize);
1042 intoResult->resize (outSizeBuf);
1043 size_t actualOutSize = 0;
1044 if (inSize != 0) {
1045 actualOutSize = outSizeBuf;
1046 cc.MapToUNICODE (sStart, inSize, Containers::Start (*intoResult), &actualOutSize);
1047 if (intoResult->size () != actualOutSize) {
1048 // shrink
1049 Assert (intoResult->size () > actualOutSize);
1050 intoResult->resize (actualOutSize);
1051 }
1052 }
1053 }
1054}
1055#endif
1056void Characters::NarrowStringToWide (const char* sStart, const char* sEnd, CodePage codePage, wstring* intoResult)
1057{
1058 RequireNotNull (intoResult);
1059 Require (sStart <= sEnd);
1060#if qStroika_Foundation_Common_Platform_Windows
1061 Platform::Windows::NarrowStringToWide (sStart, sEnd, codePage, intoResult);
1062#else
1063 *intoResult =
1064 CodeCvt<wchar_t>{codePage}.Bytes2String<wstring> (span{reinterpret_cast<const byte*> (sStart), static_cast<size_t> (sEnd - sStart)});
1065#endif
1066}
1067
1068/*
1069 ********************************************************************************
1070 *********************** MapUNICODETextWithMaybeBOMTowstring ********************
1071 ********************************************************************************
1072 */
1073DISABLE_COMPILER_MSC_WARNING_START (4996);
1074DISABLE_COMPILER_GCC_WARNING_START ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
1075DISABLE_COMPILER_CLANG_WARNING_START ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
1076wstring Characters::MapUNICODETextWithMaybeBOMTowstring (const char* start, const char* end)
1077{
1078 // THIS IMPL DEPRECATED SO GO AWAY SOON
1079 Require (start <= end);
1080 if (start == end) {
1081 return wstring{};
1082 }
1083 else {
1084 size_t outBufSize = end - start;
1085 StackBuffer<wchar_t> wideBuf{Memory::eUninitialized, outBufSize};
1086 size_t outCharCount = outBufSize;
1087 MapSBUnicodeTextWithMaybeBOMToUNICODE (start, end - start, wideBuf.data (), &outCharCount);
1088 Assert (outCharCount <= outBufSize);
1089 if (outCharCount == 0) {
1090 return wstring{};
1091 }
1092
1093 // The wideBuf may be NUL-terminated or not (depending on whether the input was NUL-terminated or not).
1094 // Be sure to construct the resuting string with the right end-of-string pointer (the length doesn't include
1095 // the NUL-char)
1096 return wstring{wideBuf.data (), wideBuf[outCharCount - 1] == '\0' ? (outCharCount - 1) : outCharCount};
1097 }
1098}
1099DISABLE_COMPILER_MSC_WARNING_END (4996);
1100DISABLE_COMPILER_GCC_WARNING_END ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
1101DISABLE_COMPILER_CLANG_WARNING_END ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
1102
1103/*
1104 ********************************************************************************
1105 *********************** MapUNICODETextWithMaybeBOMTowstring ********************
1106 ********************************************************************************
1107 */
1108DISABLE_COMPILER_MSC_WARNING_START (4996);
1109DISABLE_COMPILER_GCC_WARNING_START ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
1110DISABLE_COMPILER_CLANG_WARNING_START ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
1111vector<byte> Characters::MapUNICODETextToSerializedFormat (const wchar_t* start, const wchar_t* end, CodePage useCP)
1112{
1113 // THIS IMPL DEPRECATED SO GO AWAY SOON
1114 CodePageConverter cpc{useCP, CodePageConverter::eHandleBOM};
1115 size_t outCharCount = cpc.MapFromUNICODE_QuickComputeOutBufSize (start, end - start);
1116 StackBuffer<char> buf{Memory::eUninitialized, outCharCount};
1117 cpc.MapFromUNICODE (start, end - start, buf.data (), &outCharCount);
1118 const byte* bs = reinterpret_cast<const byte*> (static_cast<const char*> (buf));
1119 return vector<byte>{bs, bs + outCharCount};
1120}
1121DISABLE_COMPILER_MSC_WARNING_END (4996);
1122DISABLE_COMPILER_GCC_WARNING_END ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
1123DISABLE_COMPILER_CLANG_WARNING_END ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
#define qStroika_Foundation_Debug_AssertionsChecked
The qStroika_Foundation_Debug_AssertionsChecked flag determines if assertions are checked and validat...
Definition Assertions.h:48
#define RequireNotNull(p)
Definition Assertions.h:347
CodeCvt unifies byte <-> unicode conversions, vaguely inspired by (and wraps) std::codecvt,...
Definition CodeCvt.h:118
nonvirtual STRINGISH Bytes2String(span< const byte > from) const
nonvirtual BLOBISH String2Bytes(span< const CHAR_T > from) const
virtual const char * what() const noexcept override
Definition CodePage.cpp:540
nonvirtual ConversionResult Convert(span< const SRC_T > source, span< TRG_T > target) const
Convert between UTF-N encoded strings/spans (including the special case of ASCII, and Latin1) (e....
static const UTFConvert kThe
Nearly always use this default UTFConvert.
Definition UTFConvert.h:369
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
CONTAINER::value_type * Start(CONTAINER &c)
For a contiguous container (such as a vector or basic_string) - find the pointer to the start of the ...
void Throw(T &&e2Throw)
identical to builtin C++ 'throw' except that it does helpful, type dependent DbgTrace() messages firs...
Definition Throw.inl:43