Stroika Library 3.0d23x
 
Loading...
Searching...
No Matches
CodePage.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2026. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
6#include <algorithm>
7#include <mutex>
8#include <set>
9
14#include "Stroika/Foundation/Common/Common.h"
15#include "Stroika/Foundation/Containers/Common.h"
16#include "Stroika/Foundation/Execution/Common.h"
17#include "Stroika/Foundation/Execution/Exceptions.h"
20
21#include "CodePage.h"
22
23using std::byte;
24
25using namespace Stroika::Foundation;
27using namespace Stroika::Foundation::Memory;
28
29// Comment this in to turn on aggressive noisy DbgTrace in this module
30//#define USE_NOISY_TRACE_IN_THIS_MODULE_ 1
31
32#ifndef qBuildInTableDrivenCodePageBuilderProc
33#define qBuildInTableDrivenCodePageBuilderProc 0
34#endif
35
36/*
37 * Hack to build 'myiswalpha' and 'myiswpunct' for SPR#1220 (revised and moved here for SPR#1306 and class
38 * 'CharacterProperties').
39 */
40#ifndef qBuildMemoizedISXXXBuilderProc
41#define qBuildMemoizedISXXXBuilderProc 0
42#endif
43
44/*
45 * Use this to test my IsWXXX functions produce the right results. Only test under WinXP,
46 * since that is the reference they are copying (SPR#1229).
47 */
48#ifndef qTestMyISWXXXFunctions
49#define qTestMyISWXXXFunctions 0
50#endif
51
52#if qBuildInTableDrivenCodePageBuilderProc || qBuildMemoizedISXXXBuilderProc
53#include <fstream>
54#endif
55
56using namespace Characters;
57
58#if qStroika_Foundation_Common_Platform_Windows
59namespace {
60 inline const wchar_t* SAFE_WIN_WCHART_CAST_ (const char16_t* t)
61 {
62 return reinterpret_cast<const wchar_t*> (t);
63 }
64 inline wchar_t* SAFE_WIN_WCHART_CAST_ (char16_t* t)
65 {
66 return reinterpret_cast<wchar_t*> (t);
67 }
68 inline const char16_t* SAFE_WIN_WCHART_CAST_ (const wchar_t* t)
69 {
70 return reinterpret_cast<const char16_t*> (t);
71 }
72 inline char16_t* SAFE_WIN_WCHART_CAST_ (wchar_t* t)
73 {
74 return reinterpret_cast<char16_t*> (t);
75 }
76}
77#endif
78
79/*
80 ********************************************************************************
81 **************************** Characters::GetCharsetString **********************
82 ********************************************************************************
83 */
84wstring Characters::GetCharsetString (CodePage cp)
85{
86 switch (cp) {
87 case WellKnownCodePages::kUTF8:
88 return L"utf-8";
89 default:
90 return Characters::CString::Format (L"CodePage %d", cp);
91 }
92}
93
94namespace {
95 template <CodePage CODEPAGE>
96 class TableDrivenCodePageConverter_ {
97 public:
98 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt);
99 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt);
100 };
101 inline void MapToUNICODEFromTable_ (const char16_t kMap_[256], const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
102 {
103 size_t nCharsToCopy = min (inMBCharCnt, *outCharCnt);
104 for (size_t i = 0; i < nCharsToCopy; ++i) {
105 outChars[i] = kMap_[(unsigned char)inMBChars[i]];
106 }
107 *outCharCnt = nCharsToCopy;
108 }
109 inline void MapFromUNICODEFromTable_ (const char16_t kMap_[256], const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
110 {
111 size_t nCharsToCopy = min (inCharCnt, *outCharCnt);
112 for (size_t i = 0; i < nCharsToCopy; ++i) {
113 size_t j = 0;
114 for (; j < 256; ++j) {
115 if (kMap_[j] == inChars[i]) {
116 outChars[i] = static_cast<char> (j);
117 break;
118 }
119 }
120 if (j == 256) {
121 /*Perhaps should add extra args to this function - as in Win32 API - for handling this missing-char case*/
122 outChars[i] = '?';
123 }
124 }
125 *outCharCnt = nCharsToCopy;
126 }
127 template <>
128 class TableDrivenCodePageConverter_<WellKnownCodePages::kANSI> {
129 public:
130 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
131 {
132 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
133 }
134 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
135 {
136 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
137 }
138
139 private:
140 static constexpr char16_t kMap_[256] = {
141 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
142 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
143 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
144 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
145 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
146 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
147 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
148 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
149 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x8d, 0x17d, 0x8f,
150 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x17e, 0x178,
151 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
152 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
153 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
154 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
155 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
156 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
157 };
158 };
159 template <>
160 class TableDrivenCodePageConverter_<WellKnownCodePages::kMAC> {
161 public:
162 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
163 {
164 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
165 }
166 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
167 {
168 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
169 }
170
171 private:
172 static constexpr char16_t kMap_[256] = {
173 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
174 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
175 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
176 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
177 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
178 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
179 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
180 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
181 0xc4, 0xc5, 0xc7, 0xc9, 0xd1, 0xd6, 0xdc, 0xe1, 0xe0, 0xe2, 0xe4, 0xe3, 0xe5, 0xe7, 0xe9, 0xe8,
182 0xea, 0xeb, 0xed, 0xec, 0xee, 0xef, 0xf1, 0xf3, 0xf2, 0xf4, 0xf6, 0xf5, 0xfa, 0xf9, 0xfb, 0xfc,
183 0x2020, 0xb0, 0xa2, 0xa3, 0xa7, 0x2022, 0xb6, 0xdf, 0xae, 0xa9, 0x2122, 0xb4, 0xa8, 0x2260, 0xc6, 0xd8,
184 0x221e, 0xb1, 0x2264, 0x2265, 0xa5, 0xb5, 0x2202, 0x2211, 0x220f, 0x3c0, 0x222b, 0xaa, 0xba, 0x2126, 0xe6, 0xf8,
185 0xbf, 0xa1, 0xac, 0x221a, 0x192, 0x2248, 0x2206, 0xab, 0xbb, 0x2026, 0xa0, 0xc0, 0xc3, 0xd5, 0x152, 0x153,
186 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0xf7, 0x25ca, 0xff, 0x178, 0x2044, 0x20ac, 0x2039, 0x203a, 0xfb01, 0xfb02,
187 0x2021, 0xb7, 0x201a, 0x201e, 0x2030, 0xc2, 0xca, 0xc1, 0xcb, 0xc8, 0xcd, 0xce, 0xcf, 0xcc, 0xd3, 0xd4,
188 0xf8ff, 0xd2, 0xda, 0xdb, 0xd9, 0x131, 0x2c6, 0x2dc, 0xaf, 0x2d8, 0x2d9, 0x2da, 0xb8, 0x2dd, 0x2db, 0x2c7,
189 };
190 };
191 template <>
192 class TableDrivenCodePageConverter_<WellKnownCodePages::kPC> {
193 public:
194 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
195 {
196 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
197 }
198 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
199 {
200 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
201 }
202
203 private:
204 static constexpr char16_t kMap_[256] = {
205 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
206 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
207 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
208 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
209 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
210 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
211 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
212 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
213 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5,
214 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, 0xff, 0xd6, 0xdc, 0xa2, 0xa3, 0xa5, 0x20a7, 0x192,
215 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, 0xbf, 0x2310, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb,
216 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510,
217 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567,
218 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
219 0x3b1, 0xdf, 0x393, 0x3c0, 0x3a3, 0x3c3, 0xb5, 0x3c4, 0x3a6, 0x398, 0x3a9, 0x3b4, 0x221e, 0x3c6, 0x3b5, 0x2229,
220 0x2261, 0xb1, 0x2265, 0x2264, 0x2320, 0x2321, 0xf7, 0x2248, 0xb0, 0x2219, 0xb7, 0x221a, 0x207f, 0xb2, 0x25a0, 0xa0,
221 };
222 };
223 template <>
224 class TableDrivenCodePageConverter_<WellKnownCodePages::kPCA> {
225 public:
226 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
227 {
228 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
229 }
230 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
231 {
232 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
233 }
234
235 private:
236 static constexpr char16_t kMap_[256] = {
237 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
238 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
239 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
240 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
241 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
242 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
243 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
244 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
245 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5,
246 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, 0xff, 0xd6, 0xdc, 0xf8, 0xa3, 0xd8, 0xd7, 0x192,
247 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, 0xbf, 0xae, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb,
248 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0xc1, 0xc2, 0xc0, 0xa9, 0x2563, 0x2551, 0x2557, 0x255d, 0xa2, 0xa5, 0x2510,
249 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0xe3, 0xc3, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0xa4,
250 0xf0, 0xd0, 0xca, 0xcb, 0xc8, 0x131, 0xcd, 0xce, 0xcf, 0x2518, 0x250c, 0x2588, 0x2584, 0xa6, 0xcc, 0x2580,
251 0xd3, 0xdf, 0xd4, 0xd2, 0xf5, 0xd5, 0xb5, 0xfe, 0xde, 0xda, 0xdb, 0xd9, 0xfd, 0xdd, 0xaf, 0xb4,
252 0xad, 0xb1, 0x2017, 0xbe, 0xb6, 0xa7, 0xf7, 0xb8, 0xb0, 0xa8, 0xb7, 0xb9, 0xb3, 0xb2, 0x25a0, 0xa0,
253 };
254 };
255 template <>
256 class TableDrivenCodePageConverter_<WellKnownCodePages::kGreek> {
257 public:
258 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
259 {
260 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
261 }
262 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
263 {
264 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
265 }
266
267 private:
268 static constexpr char16_t kMap_[256] = {
269 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
270 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
271 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
272 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
273 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
274 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
275 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
276 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
277 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x88, 0x2030, 0x8a, 0x2039, 0x8c, 0x8d, 0x8e, 0x8f,
278 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x98, 0x2122, 0x9a, 0x203a, 0x9c, 0x9d, 0x9e, 0x9f,
279 0xa0, 0x385, 0x386, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xf8f9, 0xab, 0xac, 0xad, 0xae, 0x2015,
280 0xb0, 0xb1, 0xb2, 0xb3, 0x384, 0xb5, 0xb6, 0xb7, 0x388, 0x389, 0x38a, 0xbb, 0x38c, 0xbd, 0x38e, 0x38f,
281 0x390, 0x391, 0x392, 0x393, 0x394, 0x395, 0x396, 0x397, 0x398, 0x399, 0x39a, 0x39b, 0x39c, 0x39d, 0x39e, 0x39f,
282 0x3a0, 0x3a1, 0xf8fa, 0x3a3, 0x3a4, 0x3a5, 0x3a6, 0x3a7, 0x3a8, 0x3a9, 0x3aa, 0x3ab, 0x3ac, 0x3ad, 0x3ae, 0x3af,
283 0x3b0, 0x3b1, 0x3b2, 0x3b3, 0x3b4, 0x3b5, 0x3b6, 0x3b7, 0x3b8, 0x3b9, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3be, 0x3bf,
284 0x3c0, 0x3c1, 0x3c2, 0x3c3, 0x3c4, 0x3c5, 0x3c6, 0x3c7, 0x3c8, 0x3c9, 0x3ca, 0x3cb, 0x3cc, 0x3cd, 0x3ce, 0xf8fb,
285 };
286 };
287 template <>
288 class TableDrivenCodePageConverter_<WellKnownCodePages::kTurkish> {
289 public:
290 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
291 {
292 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
293 }
294 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
295 {
296 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
297 }
298
299 private:
300 static constexpr char16_t kMap_[256] = {
301 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
302 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
303 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
304 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
305 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
306 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
307 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
308 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
309 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x8d, 0x8e, 0x8f,
310 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x9e, 0x178,
311 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
312 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
313 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
314 0x11e, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x130, 0x15e, 0xdf,
315 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
316 0x11f, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x131, 0x15f, 0xff,
317 };
318 };
319 template <>
320 class TableDrivenCodePageConverter_<WellKnownCodePages::kHebrew> {
321 public:
322 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
323 {
324 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
325 }
326 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
327 {
328 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
329 }
330
331 private:
332 static constexpr char16_t kMap_[256] = {
333 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
334 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
335 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
336 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
337 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
338 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
339 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
340 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
341 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x8a, 0x2039, 0x8c, 0x8d, 0x8e, 0x8f,
342 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x2dc, 0x2122, 0x9a, 0x203a, 0x9c, 0x9d, 0x9e, 0x9f,
343 0xa0, 0xa1, 0xa2, 0xa3, 0x20aa, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xd7, 0xab, 0xac, 0xad, 0xae, 0xaf,
344 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xf7, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
345 0x5b0, 0x5b1, 0x5b2, 0x5b3, 0x5b4, 0x5b5, 0x5b6, 0x5b7, 0x5b8, 0x5b9, 0x5ba, 0x5bb, 0x5bc, 0x5bd, 0x5be, 0x5bf,
346 0x5c0, 0x5c1, 0x5c2, 0x5c3, 0x5f0, 0x5f1, 0x5f2, 0x5f3, 0x5f4, 0xf88d, 0xf88e, 0xf88f, 0xf890, 0xf891, 0xf892, 0xf893,
347 0x5d0, 0x5d1, 0x5d2, 0x5d3, 0x5d4, 0x5d5, 0x5d6, 0x5d7, 0x5d8, 0x5d9, 0x5da, 0x5db, 0x5dc, 0x5dd, 0x5de, 0x5df,
348 0x5e0, 0x5e1, 0x5e2, 0x5e3, 0x5e4, 0x5e5, 0x5e6, 0x5e7, 0x5e8, 0x5e9, 0x5ea, 0xf894, 0xf895, 0x200e, 0x200f, 0xf896,
349 };
350 };
351 template <>
352 class TableDrivenCodePageConverter_<WellKnownCodePages::kArabic> {
353 public:
354 static void MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt)
355 {
356 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
357 }
358 static void MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt)
359 {
360 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
361 }
362
363 private:
364 static constexpr char16_t kMap_[256] = {
365 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
366 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
367 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
368 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
369 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
370 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
371 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
372 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
373 0x20ac, 0x67e, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x679, 0x2039, 0x152, 0x686, 0x698, 0x688,
374 0x6af, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x6a9, 0x2122, 0x691, 0x203a, 0x153, 0x200c, 0x200d, 0x6ba,
375 0xa0, 0x60c, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x6be, 0xab, 0xac, 0xad, 0xae, 0xaf,
376 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0x61b, 0xbb, 0xbc, 0xbd, 0xbe, 0x61f,
377 0x6c1, 0x621, 0x622, 0x623, 0x624, 0x625, 0x626, 0x627, 0x628, 0x629, 0x62a, 0x62b, 0x62c, 0x62d, 0x62e, 0x62f,
378 0x630, 0x631, 0x632, 0x633, 0x634, 0x635, 0x636, 0xd7, 0x637, 0x638, 0x639, 0x63a, 0x640, 0x641, 0x642, 0x643,
379 0xe0, 0x644, 0xe2, 0x645, 0x646, 0x647, 0x648, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0x649, 0x64a, 0xee, 0xef,
380 0x64b, 0x64c, 0x64d, 0x64e, 0xf4, 0x64f, 0x650, 0xf7, 0x651, 0xf9, 0x652, 0xfb, 0xfc, 0x200e, 0x200f, 0x6d2,
381 };
382 };
383}
384
385namespace {
386
387#if qBuildInTableDrivenCodePageBuilderProc
388 static void WriteCodePageTable (CodePage codePage);
389
390 struct DoRunIt {
391 DoRunIt ()
392 {
393 //WriteCodePageTable (WellKnownCodePages::kANSI);
394 //WriteCodePageTable (WellKnownCodePages::kMAC);
395 //WriteCodePageTable (WellKnownCodePages::kPC);
396 //WriteCodePageTable (WellKnownCodePages::kPCA);
397 //WriteCodePageTable (WellKnownCodePages::kGreek);
398 //WriteCodePageTable (WellKnownCodePages::kTurkish);
399 //WriteCodePageTable (WellKnownCodePages::kHebrew);
400 //WriteCodePageTable (WellKnownCodePages::kArabic);
401 }
402 } gRunIt;
403#endif
404
405#if qBuildMemoizedISXXXBuilderProc
406 template <typename FUNCTION>
407 void WriteMemoizedIsXXXProc (FUNCTION function, const string& origFunctionName, const string& functionName)
408 {
409 ofstream outStream ("IsXXXProc.txt");
410
411 outStream << "bool " << functionName << " (wchar_t c)\n";
412 outStream << "{\n";
413
414 outStream << "\t// ********** " << functionName << " (AUTOGENERATED memoize of " << origFunctionName << " - " << __DATE__ << ") ***********\n";
415 outStream << "\t// Hack for SPR#1220 and SPR#1306\n";
416
417 // SPR#1308 - the generated if/then skipchain can be somewhat long, and therefore inefficient.
418 // This is needlessly so. In principle - we could break up the long skipchain into many smaller ones
419 // nested hierarchically. This would involve first pre-computing the entire list of regions, and
420 // then generating the if/then code recursively since before you can generate INNER code you need
421 // to know the full range of codepoints contained in all contained ifs.
422 //
423 // Anyhow - there is a trivial implementation that gets us most of the speedup we seek - since most
424 // characters looked up fall in the 0..256 range. So - just handle that specially.
425 //
426
427 const wchar_t kBoundaryForSpecialLoop1 = 255;
428 const wchar_t kBoundaryForSpecialLoop2 = 5000;
429
430 for (int j = 0; j <= 2; ++j) {
431
432 if (j == 0) {
433 outStream << "\tif (c < " << int (kBoundaryForSpecialLoop1) << ") {\n";
434 }
435 else if (j == 1) {
436 outStream << "\telse if (c < " << int (kBoundaryForSpecialLoop2) << ") {\n";
437 }
438 else {
439 outStream << "\telse {\n";
440 }
441
442 outStream << "\t\tif (";
443 bool firstTime = true;
444 bool hasLastTrue = false;
445 size_t firstRangeIdxTrue = 0;
446 size_t startLoop = 0;
447 size_t endLoop = 0;
448 if (j == 0) {
449 endLoop = kBoundaryForSpecialLoop1;
450 }
451 else if (j == 1) {
452 startLoop = kBoundaryForSpecialLoop1;
453 endLoop = kBoundaryForSpecialLoop2;
454 }
455 else {
456 startLoop = kBoundaryForSpecialLoop2;
457 endLoop = 256 * 256;
458 }
459 for (size_t i = startLoop; i < endLoop; ++i) {
460 bool isT = function (static_cast<wchar_t> (i));
461
462 if (((not isT) or (i + 1 == endLoop)) and hasLastTrue) {
463 // then emit the range...
464 if (not firstTime) {
465 outStream << "\t\t\t||";
466 }
467 size_t rangeEnd = isT ? i : i - 1;
468 outStream << "(" << firstRangeIdxTrue << " <= c && c <= " << rangeEnd << ")";
469 firstTime = false;
470 outStream << "\n";
471 hasLastTrue = false;
472 }
473 if (isT and not hasLastTrue) {
474 firstRangeIdxTrue = i;
475 hasLastTrue = true;
476 }
477 }
478 if (firstTime) {
479 outStream << "false";
480 }
481
482 outStream << "\t\t\t) {\n";
483 outStream << "\t\t\t#if qTestMyISWXXXFunctions\n";
484 outStream << "\t\t\tAssert (" << origFunctionName << "(c));\n";
485 outStream << "\t\t\t#endif\n";
486 outStream << "\t\t\treturn true;\n";
487 outStream << "\t\t}\n";
488
489 outStream << "\t}\n";
490 }
491 outStream << "\t#if qTestMyISWXXXFunctions\n";
492 outStream << "\tAssert (!" << origFunctionName << "(c));\n";
493 outStream << "\t#endif\n";
494 outStream << "\treturn false;\n";
495 outStream << "}\n";
496 outStream << "\n";
497 }
498
499 struct DoRunIt {
500 DoRunIt ()
501 {
502 WriteMemoizedIsXXXProc (iswalpha, "iswalpha", "CharacterProperties::IsAlpha_M");
503 //WriteMemoizedIsXXXProc (iswalnum, "iswalnum", "CharacterProperties::Ialnum_M");
504 //WriteMemoizedIsXXXProc (iswpunct, "iswpunct", "CharacterProperties::IsPunct_M");
505 //WriteMemoizedIsXXXProc (iswspace, "iswspace", "CharacterProperties::IsSpace_M");
506 //WriteMemoizedIsXXXProc (iswcntrl, "iswcntrl", "CharacterProperties::IsCntrl_M");
507 //WriteMemoizedIsXXXProc (iswdigit, "iswdigit", "CharacterProperties::IsDigit_M");
508 }
509 } gRunIt;
510#endif
511
512#if qTestMyISWXXXFunctions
513 class MyIsWXXXTesterFunctions {
514 public:
515 MyIsWXXXTesterFunctions ()
516 {
517 for (wchar_t c = 0; c < 0xffff; ++c) {
518 Assert (CharacterProperties::IsAlpha_M (c) == !!iswalpha (c));
519 Assert (CharacterProperties::IsAlnum_M (c) == !!iswalnum (c));
520 Assert (CharacterProperties::IsPunct_M (c) == !!iswpunct (c));
521 Assert (CharacterProperties::IsSpace_M (c) == !!iswspace (c));
522 Assert (CharacterProperties::IsCntrl_M (c) == !!iswcntrl (c));
523 Assert (CharacterProperties::IsDigit_M (c) == !!iswdigit (c));
524 }
525 }
526 } sMyIsWXXXTesterFunctions;
527#endif
528}
529
530/*
531 ********************************************************************************
532 ******************** CodePageNotSupportedException *****************************
533 ********************************************************************************
534 */
535CodePageNotSupportedException::CodePageNotSupportedException (CodePage codePage)
536 : fMsg_{"Code page {} not supported"_f(codePage).AsNarrowSDKString ()}
537 , fCodePage_{codePage}
538{
539}
540
541const char* CodePageNotSupportedException::what () const noexcept
542{
543 return fMsg_.c_str ();
544}
545
546/*
547 ********************************************************************************
548 ******************************** CodePageConverter *****************************
549 ********************************************************************************
550 */
551DISABLE_COMPILER_MSC_WARNING_START (4996);
552DISABLE_COMPILER_GCC_WARNING_START ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
553DISABLE_COMPILER_CLANG_WARNING_START ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
554size_t CodePageConverter::MapFromUNICODE_QuickComputeOutBufSize (const wchar_t* inChars, size_t inCharCnt) const
555{
556 size_t resultSize;
557 switch (fCodePage) {
558 case WellKnownCodePages::kANSI:
559 resultSize = inCharCnt * 1;
560 break;
561 case WellKnownCodePages::kMAC:
562 resultSize = inCharCnt * 1;
563 break;
564 case WellKnownCodePages::kPC:
565 resultSize = inCharCnt * 1;
566 break;
567 case WellKnownCodePages::kPCA:
568 resultSize = inCharCnt * 1;
569 break;
570 case WellKnownCodePages::kSJIS:
571 resultSize = inCharCnt * 2;
572 break;
573 break; // ITHINK thats right... BOM appears to be 5 chars long? LGP 2001-09-11
574 case WellKnownCodePages::kUTF8:
575 resultSize = UTFConvert::ComputeTargetBufferSize<char8_t> (span{inChars, inChars + inCharCnt});
576 default:
577 resultSize = inCharCnt * 8;
578 break; // I THINK that should always be enough - but who knows...
579 }
580 if (GetHandleBOM ()) {
581 switch (fCodePage) {
582 case WellKnownCodePages::kUNICODE_WIDE:
583 case WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN: {
584 // BOM (byte order mark)
585 resultSize += 2;
586 } break;
587 case WellKnownCodePages::kUTF8: {
588 resultSize += 3; // BOM (byte order mark)
589 }
590 }
591 }
592 return resultSize;
593}
594
595void CodePageConverter::MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char16_t* outChars, size_t* outCharCnt) const
596{
597 Require (inMBCharCnt == 0 or inMBChars != nullptr);
598 RequireNotNull (outCharCnt);
599 Require (*outCharCnt == 0 or outChars != nullptr);
600
601 if (GetHandleBOM ()) {
602 size_t bytesToStrip = 0;
603 if (CodePagesGuesser{}.Guess (inMBChars, inMBCharCnt, nullptr, &bytesToStrip) == fCodePage) {
604 Assert (inMBCharCnt >= bytesToStrip);
605 inMBChars += bytesToStrip;
606 inMBCharCnt -= bytesToStrip;
607 }
608 }
609 switch (fCodePage) {
610 case WellKnownCodePages::kANSI:
611 TableDrivenCodePageConverter_<WellKnownCodePages::kANSI>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
612 break;
613 case WellKnownCodePages::kMAC:
614 TableDrivenCodePageConverter_<WellKnownCodePages::kMAC>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
615 break;
616 case WellKnownCodePages::kPC:
617 TableDrivenCodePageConverter_<WellKnownCodePages::kPC>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
618 break;
619 case WellKnownCodePages::kPCA:
620 TableDrivenCodePageConverter_<WellKnownCodePages::kPCA>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
621 break;
622 case WellKnownCodePages::kGreek:
623 TableDrivenCodePageConverter_<WellKnownCodePages::kGreek>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
624 break;
625 case WellKnownCodePages::kTurkish:
626 TableDrivenCodePageConverter_<WellKnownCodePages::kTurkish>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
627 break;
628 case WellKnownCodePages::kHebrew:
629 TableDrivenCodePageConverter_<WellKnownCodePages::kHebrew>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
630 break;
631 case WellKnownCodePages::kArabic:
632 TableDrivenCodePageConverter_<WellKnownCodePages::kArabic>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
633 break;
634 case WellKnownCodePages::kUNICODE_WIDE: {
635 const wchar_t* inWChars = reinterpret_cast<const wchar_t*> (inMBChars);
636 size_t inWCharCnt = (inMBCharCnt / sizeof (wchar_t));
637 *outCharCnt = inWCharCnt;
638 (void)::memcpy (outChars, inWChars, inWCharCnt * sizeof (wchar_t));
639 } break;
640 case WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN: {
641 const wchar_t* inWChars = reinterpret_cast<const wchar_t*> (inMBChars);
642 size_t inWCharCnt = (inMBCharCnt / sizeof (wchar_t));
643 *outCharCnt = inWCharCnt;
644 for (size_t i = 0; i < inWCharCnt; ++i) {
645 wchar_t c = inWChars[i];
646 // byteswap
647 c = ((c & 0xff) << 8) + (c >> 8);
648 outChars[i] = c;
649 }
650 } break;
651 case WellKnownCodePages::kUTF8: {
652 *outCharCnt = UTFConvert::kThe.Convert (span{inMBChars, inMBChars + inMBCharCnt}, span{outChars, *outCharCnt}).fTargetProduced;
653 } break;
654 default: {
655#if qStroika_Foundation_Common_Platform_Windows
656 Characters::Platform::Windows::PlatformCodePageConverter{fCodePage}.MapToUNICODE (inMBChars, inMBCharCnt,
657 SAFE_WIN_WCHART_CAST_ (outChars), outCharCnt);
658#else
660#endif
661 } break;
662 }
663
664#if qStroika_Foundation_Common_Platform_Windows && 0
666 // Assure my baked tables (and UTF8 converters) perform the same as the builtin Win32 API
667 size_t tstCharCnt = *outCharCnt;
668 StackBuffer<wchar_t> tstBuf{Memory::eUninitialized, *outCharCnt};
669 Characters::Platform::Windows::PlatformCodePageConverter{fCodePage}.MapToUNICODE (inMBChars, inMBCharCnt, tstBuf, &tstCharCnt);
670 Assert (tstCharCnt == *outCharCnt);
671 Assert (memcmp (tstBuf, outChars, sizeof (wchar_t) * tstCharCnt) == 0);
672 }
673#endif
674}
675
676void CodePageConverter::MapToUNICODE (const char* inMBChars, size_t inMBCharCnt, char32_t* outChars, size_t* outCharCnt) const
677{
678 // Not really right - but hopefully adquate for starters -- LGP 2011-09-06
679 StackBuffer<char16_t> tmpBuf{Memory::eUninitialized, *outCharCnt};
680 MapToUNICODE (inMBChars, inMBCharCnt, tmpBuf.data (), outCharCnt);
681 for (size_t i = 0; i < *outCharCnt; ++i) {
682 outChars[i] = tmpBuf[i];
683 }
684}
685
686void CodePageConverter::MapFromUNICODE (const char16_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt) const
687{
688 Require (inCharCnt == 0 or inChars != nullptr);
689 RequireNotNull (outCharCnt);
690 Require (*outCharCnt == 0 or outChars != nullptr);
691
692 [[maybe_unused]] size_t outBufferSize = *outCharCnt;
693#if qStroika_Foundation_Debug_AssertionsChecked && qStroika_Foundation_Common_Platform_Windows
694 size_t countOfBOMCharsAdded = 0; // just for the Windows debug check at the end
695#endif
696
697 switch (fCodePage) {
698 case WellKnownCodePages::kANSI:
699 TableDrivenCodePageConverter_<WellKnownCodePages::kANSI>::MapFromUNICODE (inChars, inCharCnt, outChars, outCharCnt);
700 break;
701 case WellKnownCodePages::kMAC:
702 TableDrivenCodePageConverter_<WellKnownCodePages::kMAC>::MapFromUNICODE (inChars, inCharCnt, outChars, outCharCnt);
703 break;
704 case WellKnownCodePages::kPC:
705 TableDrivenCodePageConverter_<WellKnownCodePages::kPC>::MapFromUNICODE (inChars, inCharCnt, outChars, outCharCnt);
706 break;
707 case WellKnownCodePages::kPCA:
708 TableDrivenCodePageConverter_<WellKnownCodePages::kPCA>::MapFromUNICODE (inChars, inCharCnt, outChars, outCharCnt);
709 break;
710 case WellKnownCodePages::kUNICODE_WIDE: {
711 if (*outCharCnt >= 2) {
712 wchar_t* outWBytes = reinterpret_cast<wchar_t*> (outChars);
713 size_t outByteCount = (inCharCnt * sizeof (wchar_t));
714 if (GetHandleBOM ()) {
715 ++outWBytes; // skip BOM
716 outByteCount -= 2; // subtract for BOM
717 }
718 *outCharCnt = outByteCount;
719 if (GetHandleBOM ()) {
720 outChars[0] = '\xff';
721 outChars[1] = '\xfe';
722#if qStroika_Foundation_Debug_AssertionsChecked && qStroika_Foundation_Common_Platform_Windows
723 countOfBOMCharsAdded = 2;
724#endif
725 }
726 (void)::memcpy (outWBytes, inChars, inCharCnt * sizeof (wchar_t));
727 }
728 else {
729 *outCharCnt = 0;
730 }
731 } break;
732 case WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN: {
733 if (*outCharCnt >= 2) {
734 wchar_t* outWBytes = reinterpret_cast<wchar_t*> (outChars);
735 size_t outByteCount = (inCharCnt * sizeof (wchar_t));
736 if (GetHandleBOM ()) {
737 ++outWBytes; // skip BOM
738 outByteCount -= 2; // subtract for BOM
739 }
740 *outCharCnt = outByteCount;
741 if (GetHandleBOM ()) {
742 outChars[0] = '\xfe';
743 outChars[1] = '\xff';
744#if qStroika_Foundation_Debug_AssertionsChecked && qStroika_Foundation_Common_Platform_Windows
745 countOfBOMCharsAdded = 2;
746#endif
747 }
748 for (size_t i = 0; i < inCharCnt; ++i) {
749 wchar_t c = inChars[i];
750 // byteswap
751 c = ((c & 0xff) << 8) + (c >> 8);
752 outWBytes[i] = c;
753 }
754 }
755 else {
756 *outCharCnt = 0;
757 }
758 } break;
759 case WellKnownCodePages::kUTF8: {
760 char* useOutChars = outChars;
761 size_t useOutCharCount = *outCharCnt;
762 if (GetHandleBOM ()) {
763 if (*outCharCnt >= 3) {
764 useOutChars += 3; // skip BOM
765 useOutCharCount -= 3;
766 reinterpret_cast<unsigned char*> (outChars)[0] = 0xef;
767 reinterpret_cast<unsigned char*> (outChars)[1] = 0xbb;
768 reinterpret_cast<unsigned char*> (outChars)[2] = 0xbf;
769#if qStroika_Foundation_Debug_AssertionsChecked && qStroika_Foundation_Common_Platform_Windows
770 countOfBOMCharsAdded = 3;
771#endif
772 }
773 else {
774 useOutCharCount = 0;
775 }
776 }
777 useOutCharCount =
778 UTFConvert::kThe.Convert (span{inChars, inCharCnt}, span{reinterpret_cast<char8_t*> (useOutChars), useOutCharCount}).fTargetProduced;
779 if (GetHandleBOM ()) {
780 useOutCharCount += 3;
781 }
782 *outCharCnt = useOutCharCount;
783 } break;
784 default: {
785#if qStroika_Foundation_Common_Platform_Windows
786 Characters::Platform::Windows::PlatformCodePageConverter{fCodePage}.MapFromUNICODE (SAFE_WIN_WCHART_CAST_ (inChars), inCharCnt,
787 outChars, outCharCnt);
788#else
790#endif
791 }
792 }
793
794#if qStroika_Foundation_Common_Platform_Windows
796 // Assure my baked tables perform the same as the builtin Win32 API
797 size_t win32TstCharCnt = outBufferSize;
798 StackBuffer<char> win32TstBuf{Memory::eUninitialized, win32TstCharCnt};
799
800 Characters::Platform::Windows::PlatformCodePageConverter{fCodePage}.MapFromUNICODE (SAFE_WIN_WCHART_CAST_ (inChars), inCharCnt,
801 win32TstBuf.data (), &win32TstCharCnt);
802
803// SPR#0813 (and SPR#1277) - assert this produces the right result OR a '?' character -
804// used for bad conversions. Reason is cuz for characters that don't map - our table and
805// the system table can differ in how they map depending on current OS code page.
806#if qStroika_Foundation_Debug_AssertionsChecked
807 Assert ((win32TstCharCnt + countOfBOMCharsAdded) == *outCharCnt or outChars[0] == '?');
808 Assert (memcmp (win32TstBuf.data (), outChars + countOfBOMCharsAdded, win32TstCharCnt) == 0 or outChars[0] == '?');
809#endif
810 }
811#endif
812}
813
814void CodePageConverter::MapFromUNICODE (const char32_t* inChars, size_t inCharCnt, char* outChars, size_t* outCharCnt) const
815{
816 char* useOutChars = outChars;
817 size_t useOutCharCount = *outCharCnt;
818 bool addBOM = GetHandleBOM () and useOutCharCount >= 3;
819 if (addBOM) {
820 useOutChars += 3; // skip BOM
821 useOutCharCount -= 3;
822 reinterpret_cast<unsigned char*> (outChars)[0] = 0xef;
823 reinterpret_cast<unsigned char*> (outChars)[1] = 0xbb;
824 reinterpret_cast<unsigned char*> (outChars)[2] = 0xbf;
825 }
826 *outCharCnt = UTFConvert::kThe.Convert (span{inChars, inCharCnt}, span{reinterpret_cast<char8_t*> (useOutChars), useOutCharCount}).fTargetProduced;
827 if (addBOM) {
828 *outCharCnt += 3;
829 }
830}
831DISABLE_COMPILER_MSC_WARNING_END (4996);
832DISABLE_COMPILER_GCC_WARNING_END ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
833DISABLE_COMPILER_CLANG_WARNING_END ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
834
835/*
836 ********************************************************************************
837 ***************************** CodePagesInstalled *******************************
838 ********************************************************************************
839 */
840namespace {
841#if qStroika_Foundation_Common_Platform_Windows
842 shared_ptr<set<CodePage>> s_EnumCodePagesProc_Accumulator_;
843 BOOL FAR PASCAL EnumCodePagesProc_ (LPTSTR lpCodePageString)
844 {
845 s_EnumCodePagesProc_Accumulator_->insert (_ttoi (lpCodePageString));
846 return 1;
847 }
848#endif
849}
850
851CodePagesInstalled::CodePagesInstalled ()
852{
853 Assert (fCodePages_.size () == 0);
854
855 shared_ptr<set<CodePage>> accum = MakeSharedPtr<set<CodePage>> ();
856#if qStroika_Foundation_Common_Platform_Windows
857 static mutex sCritSec_;
858 {
859 [[maybe_unused]] lock_guard critSec{sCritSec_};
860 Assert (s_EnumCodePagesProc_Accumulator_.get () == nullptr);
861 s_EnumCodePagesProc_Accumulator_ = accum;
862 ::EnumSystemCodePages (EnumCodePagesProc_, CP_INSTALLED);
863 s_EnumCodePagesProc_Accumulator_.reset ();
864 }
865#endif
866 // Add these 'fake' code pages - which I believe are always available, but never listed by EnumSystemCodePages()
867 accum->insert (WellKnownCodePages::kUNICODE_WIDE);
868 accum->insert (WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN);
869 accum->insert (WellKnownCodePages::kUTF8);
870 fCodePages_ = vector<CodePage>{accum->begin (), accum->end ()};
871}
872
873/*
874 ********************************************************************************
875 ********************************** CodePagesGuesser ****************************
876 ********************************************************************************
877 */
878DISABLE_COMPILER_MSC_WARNING_START (4996);
879DISABLE_COMPILER_GCC_WARNING_START ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
880DISABLE_COMPILER_CLANG_WARNING_START ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
881CodePage CodePagesGuesser::Guess (const void* input, size_t nBytes, Confidence* confidence, size_t* bytesFromFrontToStrip)
882{
883 if (confidence != nullptr) {
884 *confidence = Confidence::eLow;
885 }
886 if (bytesFromFrontToStrip != nullptr) {
887 *bytesFromFrontToStrip = 0;
888 }
889 if (nBytes >= 2) {
890 unsigned char c0 = reinterpret_cast<const unsigned char*> (input)[0];
891 unsigned char c1 = reinterpret_cast<const unsigned char*> (input)[1];
892 if (c0 == 0xff and c1 == 0xfe) {
893 if (confidence != nullptr) {
894 *confidence = Confidence::eHigh;
895 }
896 if (bytesFromFrontToStrip != nullptr) {
897 *bytesFromFrontToStrip = 2;
898 }
899 return WellKnownCodePages::kUNICODE_WIDE;
900 }
901 if (c0 == 0xfe and c1 == 0xff) {
902 if (confidence != nullptr) {
903 *confidence = Confidence::eHigh;
904 }
905 if (bytesFromFrontToStrip != nullptr) {
906 *bytesFromFrontToStrip = 2;
907 }
908 return WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN;
909 }
910 if (nBytes >= 3) {
911 unsigned char c2 = reinterpret_cast<const unsigned char*> (input)[2];
912 if (c0 == 0xef and c1 == 0xbb and c2 == 0xbf) {
913 if (confidence != nullptr) {
914 *confidence = Confidence::eHigh;
915 }
916 if (bytesFromFrontToStrip != nullptr) {
917 *bytesFromFrontToStrip = 3;
918 }
919 return WellKnownCodePages::kUTF8;
920 }
921 }
922 }
923
924 /*
925 * Final ditch efforts if we don't recognize any prefix.
926 */
927 if (confidence != nullptr) {
928 *confidence = Confidence::eLow;
929 }
930 return Characters::GetDefaultSDKCodePage ();
931}
932DISABLE_COMPILER_MSC_WARNING_END (4996);
933DISABLE_COMPILER_GCC_WARNING_END ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
934DISABLE_COMPILER_CLANG_WARNING_END ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
935
936/*
937 ********************************************************************************
938 ****************************** CodePagePrettyNameMapper ************************
939 ********************************************************************************
940 */
941CodePagePrettyNameMapper::CodePageNames CodePagePrettyNameMapper::sCodePageNames_ = CodePagePrettyNameMapper::MakeDefaultCodePageNames ();
942
943CodePagePrettyNameMapper::CodePageNames CodePagePrettyNameMapper::MakeDefaultCodePageNames ()
944{
945 CodePageNames codePageNames;
946 codePageNames.fUNICODE_WIDE = L"UNICODE {wide characters}"sv;
947 codePageNames.fUNICODE_WIDE_BIGENDIAN = L"UNICODE {wide characters - big endian}"sv;
948 codePageNames.fANSI = L"ANSI (1252)"sv;
949 codePageNames.fMAC = L"MAC (2)"sv;
950 codePageNames.fPC = L"IBM PC United States code page (437)"sv;
951 codePageNames.fSJIS = L"Japanese SJIS {932}"sv;
952 codePageNames.fUTF8 = L"UNICODE {UTF-8}"sv;
953 codePageNames.f850 = L"Latin I - MS-DOS Multilingual (850)"sv;
954 codePageNames.f851 = L"Latin II - MS-DOS Slavic (850)"sv;
955 codePageNames.f866 = L"Russian - MS-DOS (866)"sv;
956 codePageNames.f936 = L"Chinese {Simplfied} (936)"sv;
957 codePageNames.f949 = L"Korean (949)"sv;
958 codePageNames.f950 = L"Chinese {Traditional} (950)"sv;
959 codePageNames.f1250 = L"Eastern European Windows (1250)"sv;
960 codePageNames.f1251 = L"Cyrilic (1251)"sv;
961 codePageNames.f10000 = L"Roman {Macintosh} (10000)"sv;
962 codePageNames.f10001 = L"Japanese {Macintosh} (10001)"sv;
963 codePageNames.f50220 = L"Japanese JIS (50220)"sv;
964 return codePageNames;
965}
966
967wstring CodePagePrettyNameMapper::GetName (CodePage cp)
968{
969 switch (cp) {
970 case WellKnownCodePages::kUNICODE_WIDE:
971 return sCodePageNames_.fUNICODE_WIDE;
972 case WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN:
973 return sCodePageNames_.fUNICODE_WIDE_BIGENDIAN;
974 case WellKnownCodePages::kANSI:
975 return sCodePageNames_.fANSI;
976 case WellKnownCodePages::kMAC:
977 return sCodePageNames_.fMAC;
978 case WellKnownCodePages::kPC:
979 return sCodePageNames_.fPC;
980 case WellKnownCodePages::kSJIS:
981 return sCodePageNames_.fSJIS;
982 case WellKnownCodePages::kUTF8:
983 return sCodePageNames_.fUTF8;
984 case 850:
985 return sCodePageNames_.f850;
986 case 851:
987 return sCodePageNames_.f851;
988 case 866:
989 return sCodePageNames_.f866;
990 case 936:
991 return sCodePageNames_.f936;
992 case 949:
993 return sCodePageNames_.f949;
994 case 950:
995 return sCodePageNames_.f950;
996 case 1250:
997 return sCodePageNames_.f1250;
998 case WellKnownCodePages::kCyrilic:
999 return sCodePageNames_.f1251;
1000 case 10000:
1001 return sCodePageNames_.f10000;
1002 case 10001:
1003 return sCodePageNames_.f10001;
1004 case 50220:
1005 return sCodePageNames_.f50220;
1006 default: {
1007 return Characters::CString::Format (L"%d", cp);
1008 }
1009 }
1010}
1011
1012/*
1013 ********************************************************************************
1014 *********************** Characters::WideStringToNarrow *************************
1015 ********************************************************************************
1016 */
1017void Characters::WideStringToNarrow (const wchar_t* wsStart, const wchar_t* wsEnd, CodePage codePage, string* intoResult)
1018{
1019 RequireNotNull (intoResult);
1020 Require (wsStart <= wsEnd);
1021#if qStroika_Foundation_Common_Platform_Windows
1022 Platform::Windows::WideStringToNarrow (wsStart, wsEnd, codePage, intoResult);
1023#else
1024 *intoResult = CodeCvt<wchar_t>{codePage}.String2Bytes<string> (span{wsStart, wsEnd});
1025#endif
1026}
1027
1028/*
1029 ********************************************************************************
1030 *********************** Characters::NarrowStringToWide *************************
1031 ********************************************************************************
1032 */
1033#if 0
1034namespace {
1035 void PortableNarrowStringToWide_ (const char* sStart, const char* sEnd, CodePage codePage, wstring* intoResult)
1036 {
1037 RequireNotNull (intoResult);
1038 Require (sStart <= sEnd);
1039 size_t inSize = sEnd - sStart;
1040 CodePageConverter cc{codePage};
1041 // this grossly overestimates size - which is a problem for the RESIZE below!!! COULD pointlessly run out of memroy and intitialize data to good values...
1042 size_t outSizeBuf = cc.MapToUNICODE_QuickComputeOutBufSize (sStart, inSize);
1043 intoResult->resize (outSizeBuf);
1044 size_t actualOutSize = 0;
1045 if (inSize != 0) {
1046 actualOutSize = outSizeBuf;
1047 cc.MapToUNICODE (sStart, inSize, Containers::Start (*intoResult), &actualOutSize);
1048 if (intoResult->size () != actualOutSize) {
1049 // shrink
1050 Assert (intoResult->size () > actualOutSize);
1051 intoResult->resize (actualOutSize);
1052 }
1053 }
1054 }
1055}
1056#endif
1057void Characters::NarrowStringToWide (const char* sStart, const char* sEnd, CodePage codePage, wstring* intoResult)
1058{
1059 RequireNotNull (intoResult);
1060 Require (sStart <= sEnd);
1061#if qStroika_Foundation_Common_Platform_Windows
1062 Platform::Windows::NarrowStringToWide (sStart, sEnd, codePage, intoResult);
1063#else
1064 *intoResult =
1065 CodeCvt<wchar_t>{codePage}.Bytes2String<wstring> (span{reinterpret_cast<const byte*> (sStart), static_cast<size_t> (sEnd - sStart)});
1066#endif
1067}
1068
1069/*
1070 ********************************************************************************
1071 *********************** MapUNICODETextWithMaybeBOMTowstring ********************
1072 ********************************************************************************
1073 */
1074DISABLE_COMPILER_MSC_WARNING_START (4996);
1075DISABLE_COMPILER_GCC_WARNING_START ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
1076DISABLE_COMPILER_CLANG_WARNING_START ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
1077wstring Characters::MapUNICODETextWithMaybeBOMTowstring (const char* start, const char* end)
1078{
1079 // THIS IMPL DEPRECATED SO GO AWAY SOON
1080 Require (start <= end);
1081 if (start == end) {
1082 return wstring{};
1083 }
1084 else {
1085 size_t outBufSize = end - start;
1086 StackBuffer<wchar_t> wideBuf{Memory::eUninitialized, outBufSize};
1087 size_t outCharCount = outBufSize;
1088 MapSBUnicodeTextWithMaybeBOMToUNICODE (start, end - start, wideBuf.data (), &outCharCount);
1089 Assert (outCharCount <= outBufSize);
1090 if (outCharCount == 0) {
1091 return wstring{};
1092 }
1093
1094 // The wideBuf may be NUL-terminated or not (depending on whether the input was NUL-terminated or not).
1095 // Be sure to construct the resuting string with the right end-of-string pointer (the length doesn't include
1096 // the NUL-char)
1097 return wstring{wideBuf.data (), wideBuf[outCharCount - 1] == '\0' ? (outCharCount - 1) : outCharCount};
1098 }
1099}
1100DISABLE_COMPILER_MSC_WARNING_END (4996);
1101DISABLE_COMPILER_GCC_WARNING_END ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
1102DISABLE_COMPILER_CLANG_WARNING_END ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
1103
1104/*
1105 ********************************************************************************
1106 *********************** MapUNICODETextWithMaybeBOMTowstring ********************
1107 ********************************************************************************
1108 */
1109DISABLE_COMPILER_MSC_WARNING_START (4996);
1110DISABLE_COMPILER_GCC_WARNING_START ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
1111DISABLE_COMPILER_CLANG_WARNING_START ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
1112vector<byte> Characters::MapUNICODETextToSerializedFormat (const wchar_t* start, const wchar_t* end, CodePage useCP)
1113{
1114 // THIS IMPL DEPRECATED SO GO AWAY SOON
1115 CodePageConverter cpc{useCP, CodePageConverter::eHandleBOM};
1116 size_t outCharCount = cpc.MapFromUNICODE_QuickComputeOutBufSize (start, end - start);
1117 StackBuffer<char> buf{Memory::eUninitialized, outCharCount};
1118 cpc.MapFromUNICODE (start, end - start, buf.data (), &outCharCount);
1119 const byte* bs = reinterpret_cast<const byte*> (static_cast<const char*> (buf));
1120 return vector<byte>{bs, bs + outCharCount};
1121}
1122DISABLE_COMPILER_MSC_WARNING_END (4996);
1123DISABLE_COMPILER_GCC_WARNING_END ("GCC diagnostic ignored \"-Wdeprecated-declarations\"");
1124DISABLE_COMPILER_CLANG_WARNING_END ("clang diagnostic ignored \"-Wdeprecated-declarations\"");
#define qStroika_Foundation_Debug_AssertionsChecked
The qStroika_Foundation_Debug_AssertionsChecked flag determines if assertions are checked and validat...
Definition Assertions.h:48
#define RequireNotNull(p)
Definition Assertions.h:347
CodeCvt unifies byte <-> unicode conversions, vaguely inspired by (and wraps) std::codecvt,...
Definition CodeCvt.h:118
nonvirtual STRINGISH Bytes2String(span< const byte > from) const
nonvirtual BLOBISH String2Bytes(span< const CHAR_T > from) const
virtual const char * what() const noexcept override
Definition CodePage.cpp:541
nonvirtual ConversionResult Convert(span< const SRC_T > source, span< TRG_T > target) const
Convert between UTF-N encoded strings/spans (including the special case of ASCII, and Latin1) (e....
static const UTFConvert kThe
Nearly always use this default UTFConvert.
Definition UTFConvert.h:369
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
CONTAINER::value_type * Start(CONTAINER &c)
For a contiguous container (such as a vector or basic_string) - find the pointer to the start of the ...
void Throw(T &&e2Throw)
identical to builtin C++ 'throw' except that it does helpful, type dependent DbgTrace() messages firs...
Definition Throw.inl:43