4#include "Stroika/Foundation/StroikaPreComp.h"
14#include "Stroika/Foundation/Common/Common.h"
15#include "Stroika/Foundation/Containers/Common.h"
16#include "Stroika/Foundation/Execution/Common.h"
17#include "Stroika/Foundation/Execution/Exceptions.h"
26using namespace Stroika::Foundation::Memory;
31#ifndef qBuildInTableDrivenCodePageBuilderProc
32#define qBuildInTableDrivenCodePageBuilderProc 0
39#ifndef qBuildMemoizedISXXXBuilderProc
40#define qBuildMemoizedISXXXBuilderProc 0
47#ifndef qTestMyISWXXXFunctions
48#define qTestMyISWXXXFunctions 0
51#if qBuildInTableDrivenCodePageBuilderProc || qBuildMemoizedISXXXBuilderProc
55using namespace Characters;
57#if qStroika_Foundation_Common_Platform_Windows
59 inline const wchar_t* SAFE_WIN_WCHART_CAST_ (
const char16_t* t)
61 return reinterpret_cast<const wchar_t*
> (t);
63 inline wchar_t* SAFE_WIN_WCHART_CAST_ (
char16_t* t)
65 return reinterpret_cast<wchar_t*
> (t);
67 inline const char16_t* SAFE_WIN_WCHART_CAST_ (
const wchar_t* t)
69 return reinterpret_cast<const char16_t*
> (t);
71 inline char16_t* SAFE_WIN_WCHART_CAST_ (
wchar_t* t)
73 return reinterpret_cast<char16_t*
> (t);
83wstring Characters::GetCharsetString (
CodePage cp)
86 case WellKnownCodePages::kUTF8:
89 return Characters::CString::Format (L
"CodePage %d", cp);
94 template <CodePage CODEPAGE>
95 class TableDrivenCodePageConverter_ {
97 static void MapToUNICODE (
const char* inMBChars,
size_t inMBCharCnt,
char16_t* outChars,
size_t* outCharCnt);
98 static void MapFromUNICODE (
const char16_t* inChars,
size_t inCharCnt,
char* outChars,
size_t* outCharCnt);
100 inline void MapToUNICODEFromTable_ (
const char16_t kMap_[256],
const char* inMBChars,
size_t inMBCharCnt,
char16_t* outChars,
size_t* outCharCnt)
102 size_t nCharsToCopy = min (inMBCharCnt, *outCharCnt);
103 for (
size_t i = 0; i < nCharsToCopy; ++i) {
104 outChars[i] = kMap_[(
unsigned char)inMBChars[i]];
106 *outCharCnt = nCharsToCopy;
108 inline void MapFromUNICODEFromTable_ (
const char16_t kMap_[256],
const char16_t* inChars,
size_t inCharCnt,
char* outChars,
size_t* outCharCnt)
110 size_t nCharsToCopy = min (inCharCnt, *outCharCnt);
111 for (
size_t i = 0; i < nCharsToCopy; ++i) {
113 for (; j < 256; ++j) {
114 if (kMap_[j] == inChars[i]) {
115 outChars[i] =
static_cast<char> (j);
124 *outCharCnt = nCharsToCopy;
127 class TableDrivenCodePageConverter_<WellKnownCodePages::kANSI> {
129 static void MapToUNICODE (
const char* inMBChars,
size_t inMBCharCnt,
char16_t* outChars,
size_t* outCharCnt)
131 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
133 static void MapFromUNICODE (
const char16_t* inChars,
size_t inCharCnt,
char* outChars,
size_t* outCharCnt)
135 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
139 static constexpr char16_t kMap_[256] = {
140 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
141 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
142 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
143 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
144 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
145 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
146 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
147 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
148 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x8d, 0x17d, 0x8f,
149 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x17e, 0x178,
150 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
151 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
152 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
153 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
154 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
155 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
159 class TableDrivenCodePageConverter_<WellKnownCodePages::kMAC> {
161 static void MapToUNICODE (
const char* inMBChars,
size_t inMBCharCnt,
char16_t* outChars,
size_t* outCharCnt)
163 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
165 static void MapFromUNICODE (
const char16_t* inChars,
size_t inCharCnt,
char* outChars,
size_t* outCharCnt)
167 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
171 static constexpr char16_t kMap_[256] = {
172 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
173 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
174 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
175 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
176 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
177 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
178 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
179 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
180 0xc4, 0xc5, 0xc7, 0xc9, 0xd1, 0xd6, 0xdc, 0xe1, 0xe0, 0xe2, 0xe4, 0xe3, 0xe5, 0xe7, 0xe9, 0xe8,
181 0xea, 0xeb, 0xed, 0xec, 0xee, 0xef, 0xf1, 0xf3, 0xf2, 0xf4, 0xf6, 0xf5, 0xfa, 0xf9, 0xfb, 0xfc,
182 0x2020, 0xb0, 0xa2, 0xa3, 0xa7, 0x2022, 0xb6, 0xdf, 0xae, 0xa9, 0x2122, 0xb4, 0xa8, 0x2260, 0xc6, 0xd8,
183 0x221e, 0xb1, 0x2264, 0x2265, 0xa5, 0xb5, 0x2202, 0x2211, 0x220f, 0x3c0, 0x222b, 0xaa, 0xba, 0x2126, 0xe6, 0xf8,
184 0xbf, 0xa1, 0xac, 0x221a, 0x192, 0x2248, 0x2206, 0xab, 0xbb, 0x2026, 0xa0, 0xc0, 0xc3, 0xd5, 0x152, 0x153,
185 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0xf7, 0x25ca, 0xff, 0x178, 0x2044, 0x20ac, 0x2039, 0x203a, 0xfb01, 0xfb02,
186 0x2021, 0xb7, 0x201a, 0x201e, 0x2030, 0xc2, 0xca, 0xc1, 0xcb, 0xc8, 0xcd, 0xce, 0xcf, 0xcc, 0xd3, 0xd4,
187 0xf8ff, 0xd2, 0xda, 0xdb, 0xd9, 0x131, 0x2c6, 0x2dc, 0xaf, 0x2d8, 0x2d9, 0x2da, 0xb8, 0x2dd, 0x2db, 0x2c7,
191 class TableDrivenCodePageConverter_<WellKnownCodePages::kPC> {
193 static void MapToUNICODE (
const char* inMBChars,
size_t inMBCharCnt,
char16_t* outChars,
size_t* outCharCnt)
195 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
197 static void MapFromUNICODE (
const char16_t* inChars,
size_t inCharCnt,
char* outChars,
size_t* outCharCnt)
199 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
203 static constexpr char16_t kMap_[256] = {
204 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
205 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
206 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
207 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
208 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
209 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
210 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
211 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
212 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5,
213 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, 0xff, 0xd6, 0xdc, 0xa2, 0xa3, 0xa5, 0x20a7, 0x192,
214 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, 0xbf, 0x2310, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb,
215 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510,
216 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567,
217 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
218 0x3b1, 0xdf, 0x393, 0x3c0, 0x3a3, 0x3c3, 0xb5, 0x3c4, 0x3a6, 0x398, 0x3a9, 0x3b4, 0x221e, 0x3c6, 0x3b5, 0x2229,
219 0x2261, 0xb1, 0x2265, 0x2264, 0x2320, 0x2321, 0xf7, 0x2248, 0xb0, 0x2219, 0xb7, 0x221a, 0x207f, 0xb2, 0x25a0, 0xa0,
223 class TableDrivenCodePageConverter_<WellKnownCodePages::kPCA> {
225 static void MapToUNICODE (
const char* inMBChars,
size_t inMBCharCnt,
char16_t* outChars,
size_t* outCharCnt)
227 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
229 static void MapFromUNICODE (
const char16_t* inChars,
size_t inCharCnt,
char* outChars,
size_t* outCharCnt)
231 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
235 static constexpr char16_t kMap_[256] = {
236 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
237 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
238 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
239 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
240 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
241 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
242 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
243 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
244 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5,
245 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, 0xff, 0xd6, 0xdc, 0xf8, 0xa3, 0xd8, 0xd7, 0x192,
246 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, 0xbf, 0xae, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb,
247 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0xc1, 0xc2, 0xc0, 0xa9, 0x2563, 0x2551, 0x2557, 0x255d, 0xa2, 0xa5, 0x2510,
248 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0xe3, 0xc3, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0xa4,
249 0xf0, 0xd0, 0xca, 0xcb, 0xc8, 0x131, 0xcd, 0xce, 0xcf, 0x2518, 0x250c, 0x2588, 0x2584, 0xa6, 0xcc, 0x2580,
250 0xd3, 0xdf, 0xd4, 0xd2, 0xf5, 0xd5, 0xb5, 0xfe, 0xde, 0xda, 0xdb, 0xd9, 0xfd, 0xdd, 0xaf, 0xb4,
251 0xad, 0xb1, 0x2017, 0xbe, 0xb6, 0xa7, 0xf7, 0xb8, 0xb0, 0xa8, 0xb7, 0xb9, 0xb3, 0xb2, 0x25a0, 0xa0,
255 class TableDrivenCodePageConverter_<WellKnownCodePages::kGreek> {
257 static void MapToUNICODE (
const char* inMBChars,
size_t inMBCharCnt,
char16_t* outChars,
size_t* outCharCnt)
259 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
261 static void MapFromUNICODE (
const char16_t* inChars,
size_t inCharCnt,
char* outChars,
size_t* outCharCnt)
263 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
267 static constexpr char16_t kMap_[256] = {
268 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
269 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
270 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
271 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
272 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
273 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
274 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
275 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
276 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x88, 0x2030, 0x8a, 0x2039, 0x8c, 0x8d, 0x8e, 0x8f,
277 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x98, 0x2122, 0x9a, 0x203a, 0x9c, 0x9d, 0x9e, 0x9f,
278 0xa0, 0x385, 0x386, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xf8f9, 0xab, 0xac, 0xad, 0xae, 0x2015,
279 0xb0, 0xb1, 0xb2, 0xb3, 0x384, 0xb5, 0xb6, 0xb7, 0x388, 0x389, 0x38a, 0xbb, 0x38c, 0xbd, 0x38e, 0x38f,
280 0x390, 0x391, 0x392, 0x393, 0x394, 0x395, 0x396, 0x397, 0x398, 0x399, 0x39a, 0x39b, 0x39c, 0x39d, 0x39e, 0x39f,
281 0x3a0, 0x3a1, 0xf8fa, 0x3a3, 0x3a4, 0x3a5, 0x3a6, 0x3a7, 0x3a8, 0x3a9, 0x3aa, 0x3ab, 0x3ac, 0x3ad, 0x3ae, 0x3af,
282 0x3b0, 0x3b1, 0x3b2, 0x3b3, 0x3b4, 0x3b5, 0x3b6, 0x3b7, 0x3b8, 0x3b9, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3be, 0x3bf,
283 0x3c0, 0x3c1, 0x3c2, 0x3c3, 0x3c4, 0x3c5, 0x3c6, 0x3c7, 0x3c8, 0x3c9, 0x3ca, 0x3cb, 0x3cc, 0x3cd, 0x3ce, 0xf8fb,
287 class TableDrivenCodePageConverter_<WellKnownCodePages::kTurkish> {
289 static void MapToUNICODE (
const char* inMBChars,
size_t inMBCharCnt,
char16_t* outChars,
size_t* outCharCnt)
291 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
293 static void MapFromUNICODE (
const char16_t* inChars,
size_t inCharCnt,
char* outChars,
size_t* outCharCnt)
295 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
299 static constexpr char16_t kMap_[256] = {
300 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
301 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
302 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
303 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
304 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
305 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
306 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
307 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
308 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x8d, 0x8e, 0x8f,
309 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x9e, 0x178,
310 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
311 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
312 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
313 0x11e, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x130, 0x15e, 0xdf,
314 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
315 0x11f, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x131, 0x15f, 0xff,
319 class TableDrivenCodePageConverter_<WellKnownCodePages::kHebrew> {
321 static void MapToUNICODE (
const char* inMBChars,
size_t inMBCharCnt,
char16_t* outChars,
size_t* outCharCnt)
323 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
325 static void MapFromUNICODE (
const char16_t* inChars,
size_t inCharCnt,
char* outChars,
size_t* outCharCnt)
327 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
331 static constexpr char16_t kMap_[256] = {
332 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
333 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
334 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
335 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
336 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
337 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
338 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
339 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
340 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x8a, 0x2039, 0x8c, 0x8d, 0x8e, 0x8f,
341 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x2dc, 0x2122, 0x9a, 0x203a, 0x9c, 0x9d, 0x9e, 0x9f,
342 0xa0, 0xa1, 0xa2, 0xa3, 0x20aa, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xd7, 0xab, 0xac, 0xad, 0xae, 0xaf,
343 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xf7, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
344 0x5b0, 0x5b1, 0x5b2, 0x5b3, 0x5b4, 0x5b5, 0x5b6, 0x5b7, 0x5b8, 0x5b9, 0x5ba, 0x5bb, 0x5bc, 0x5bd, 0x5be, 0x5bf,
345 0x5c0, 0x5c1, 0x5c2, 0x5c3, 0x5f0, 0x5f1, 0x5f2, 0x5f3, 0x5f4, 0xf88d, 0xf88e, 0xf88f, 0xf890, 0xf891, 0xf892, 0xf893,
346 0x5d0, 0x5d1, 0x5d2, 0x5d3, 0x5d4, 0x5d5, 0x5d6, 0x5d7, 0x5d8, 0x5d9, 0x5da, 0x5db, 0x5dc, 0x5dd, 0x5de, 0x5df,
347 0x5e0, 0x5e1, 0x5e2, 0x5e3, 0x5e4, 0x5e5, 0x5e6, 0x5e7, 0x5e8, 0x5e9, 0x5ea, 0xf894, 0xf895, 0x200e, 0x200f, 0xf896,
351 class TableDrivenCodePageConverter_<WellKnownCodePages::kArabic> {
353 static void MapToUNICODE (
const char* inMBChars,
size_t inMBCharCnt,
char16_t* outChars,
size_t* outCharCnt)
355 MapToUNICODEFromTable_ (kMap_, inMBChars, inMBCharCnt, outChars, outCharCnt);
357 static void MapFromUNICODE (
const char16_t* inChars,
size_t inCharCnt,
char* outChars,
size_t* outCharCnt)
359 MapFromUNICODEFromTable_ (kMap_, inChars, inCharCnt, outChars, outCharCnt);
363 static constexpr char16_t kMap_[256] = {
364 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
365 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
366 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
367 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
368 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
369 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
370 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
371 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
372 0x20ac, 0x67e, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x679, 0x2039, 0x152, 0x686, 0x698, 0x688,
373 0x6af, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x6a9, 0x2122, 0x691, 0x203a, 0x153, 0x200c, 0x200d, 0x6ba,
374 0xa0, 0x60c, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x6be, 0xab, 0xac, 0xad, 0xae, 0xaf,
375 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0x61b, 0xbb, 0xbc, 0xbd, 0xbe, 0x61f,
376 0x6c1, 0x621, 0x622, 0x623, 0x624, 0x625, 0x626, 0x627, 0x628, 0x629, 0x62a, 0x62b, 0x62c, 0x62d, 0x62e, 0x62f,
377 0x630, 0x631, 0x632, 0x633, 0x634, 0x635, 0x636, 0xd7, 0x637, 0x638, 0x639, 0x63a, 0x640, 0x641, 0x642, 0x643,
378 0xe0, 0x644, 0xe2, 0x645, 0x646, 0x647, 0x648, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0x649, 0x64a, 0xee, 0xef,
379 0x64b, 0x64c, 0x64d, 0x64e, 0xf4, 0x64f, 0x650, 0xf7, 0x651, 0xf9, 0x652, 0xfb, 0xfc, 0x200e, 0x200f, 0x6d2,
386#if qBuildInTableDrivenCodePageBuilderProc
387 static void WriteCodePageTable (
CodePage codePage);
404#if qBuildMemoizedISXXXBuilderProc
405 template <
typename FUNCTION>
406 void WriteMemoizedIsXXXProc (FUNCTION function,
const string& origFunctionName,
const string& functionName)
408 ofstream outStream (
"IsXXXProc.txt");
410 outStream <<
"bool " << functionName <<
" (wchar_t c)\n";
413 outStream <<
"\t// ********** " << functionName <<
" (AUTOGENERATED memoize of " << origFunctionName <<
" - " << __DATE__ <<
") ***********\n";
414 outStream <<
"\t// Hack for SPR#1220 and SPR#1306\n";
426 const wchar_t kBoundaryForSpecialLoop1 = 255;
427 const wchar_t kBoundaryForSpecialLoop2 = 5000;
429 for (
int j = 0; j <= 2; ++j) {
432 outStream <<
"\tif (c < " << int (kBoundaryForSpecialLoop1) <<
") {\n";
435 outStream <<
"\telse if (c < " << int (kBoundaryForSpecialLoop2) <<
") {\n";
438 outStream <<
"\telse {\n";
441 outStream <<
"\t\tif (";
442 bool firstTime =
true;
443 bool hasLastTrue =
false;
444 size_t firstRangeIdxTrue = 0;
445 size_t startLoop = 0;
448 endLoop = kBoundaryForSpecialLoop1;
451 startLoop = kBoundaryForSpecialLoop1;
452 endLoop = kBoundaryForSpecialLoop2;
455 startLoop = kBoundaryForSpecialLoop2;
458 for (
size_t i = startLoop; i < endLoop; ++i) {
459 bool isT = function (
static_cast<wchar_t> (i));
461 if (((not isT) or (i + 1 == endLoop)) and hasLastTrue) {
464 outStream <<
"\t\t\t||";
466 size_t rangeEnd = isT ? i : i - 1;
467 outStream <<
"(" << firstRangeIdxTrue <<
" <= c && c <= " << rangeEnd <<
")";
472 if (isT and not hasLastTrue) {
473 firstRangeIdxTrue = i;
478 outStream <<
"false";
481 outStream <<
"\t\t\t) {\n";
482 outStream <<
"\t\t\t#if qTestMyISWXXXFunctions\n";
483 outStream <<
"\t\t\tAssert (" << origFunctionName <<
"(c));\n";
484 outStream <<
"\t\t\t#endif\n";
485 outStream <<
"\t\t\treturn true;\n";
486 outStream <<
"\t\t}\n";
488 outStream <<
"\t}\n";
490 outStream <<
"\t#if qTestMyISWXXXFunctions\n";
491 outStream <<
"\tAssert (!" << origFunctionName <<
"(c));\n";
492 outStream <<
"\t#endif\n";
493 outStream <<
"\treturn false;\n";
501 WriteMemoizedIsXXXProc (iswalpha,
"iswalpha",
"CharacterProperties::IsAlpha_M");
511#if qTestMyISWXXXFunctions
512 class MyIsWXXXTesterFunctions {
514 MyIsWXXXTesterFunctions ()
516 for (
wchar_t c = 0; c < 0xffff; ++c) {
517 Assert (CharacterProperties::IsAlpha_M (c) == !!iswalpha (c));
518 Assert (CharacterProperties::IsAlnum_M (c) == !!iswalnum (c));
519 Assert (CharacterProperties::IsPunct_M (c) == !!iswpunct (c));
520 Assert (CharacterProperties::IsSpace_M (c) == !!iswspace (c));
521 Assert (CharacterProperties::IsCntrl_M (c) == !!iswcntrl (c));
522 Assert (CharacterProperties::IsDigit_M (c) == !!iswdigit (c));
525 } sMyIsWXXXTesterFunctions;
534CodePageNotSupportedException::CodePageNotSupportedException (
CodePage codePage)
535 : fMsg_{
"Code page {} not supported"_f(codePage).AsNarrowSDKString ()}
536 , fCodePage_{codePage}
542 return fMsg_.c_str ();
550DISABLE_COMPILER_MSC_WARNING_START (4996);
551DISABLE_COMPILER_GCC_WARNING_START (
"GCC diagnostic ignored \"-Wdeprecated-declarations\"");
552DISABLE_COMPILER_CLANG_WARNING_START (
"clang diagnostic ignored \"-Wdeprecated-declarations\"");
553size_t CodePageConverter::MapFromUNICODE_QuickComputeOutBufSize (
const wchar_t* inChars,
size_t inCharCnt)
const
557 case WellKnownCodePages::kANSI:
558 resultSize = inCharCnt * 1;
560 case WellKnownCodePages::kMAC:
561 resultSize = inCharCnt * 1;
563 case WellKnownCodePages::kPC:
564 resultSize = inCharCnt * 1;
566 case WellKnownCodePages::kPCA:
567 resultSize = inCharCnt * 1;
569 case WellKnownCodePages::kSJIS:
570 resultSize = inCharCnt * 2;
573 case WellKnownCodePages::kUTF8:
574 resultSize = UTFConvert::ComputeTargetBufferSize<char8_t> (span{inChars, inChars + inCharCnt});
576 resultSize = inCharCnt * 8;
579 if (GetHandleBOM ()) {
581 case WellKnownCodePages::kUNICODE_WIDE:
582 case WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN: {
586 case WellKnownCodePages::kUTF8: {
594void CodePageConverter::MapToUNICODE (
const char* inMBChars,
size_t inMBCharCnt,
char16_t* outChars,
size_t* outCharCnt)
const
596 Require (inMBCharCnt == 0 or inMBChars !=
nullptr);
598 Require (*outCharCnt == 0 or outChars !=
nullptr);
600 if (GetHandleBOM ()) {
601 size_t bytesToStrip = 0;
602 if (CodePagesGuesser{}.Guess (inMBChars, inMBCharCnt,
nullptr, &bytesToStrip) == fCodePage) {
603 Assert (inMBCharCnt >= bytesToStrip);
604 inMBChars += bytesToStrip;
605 inMBCharCnt -= bytesToStrip;
609 case WellKnownCodePages::kANSI:
610 TableDrivenCodePageConverter_<WellKnownCodePages::kANSI>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
612 case WellKnownCodePages::kMAC:
613 TableDrivenCodePageConverter_<WellKnownCodePages::kMAC>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
615 case WellKnownCodePages::kPC:
616 TableDrivenCodePageConverter_<WellKnownCodePages::kPC>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
618 case WellKnownCodePages::kPCA:
619 TableDrivenCodePageConverter_<WellKnownCodePages::kPCA>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
621 case WellKnownCodePages::kGreek:
622 TableDrivenCodePageConverter_<WellKnownCodePages::kGreek>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
624 case WellKnownCodePages::kTurkish:
625 TableDrivenCodePageConverter_<WellKnownCodePages::kTurkish>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
627 case WellKnownCodePages::kHebrew:
628 TableDrivenCodePageConverter_<WellKnownCodePages::kHebrew>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
630 case WellKnownCodePages::kArabic:
631 TableDrivenCodePageConverter_<WellKnownCodePages::kArabic>::MapToUNICODE (inMBChars, inMBCharCnt, outChars, outCharCnt);
633 case WellKnownCodePages::kUNICODE_WIDE: {
634 const wchar_t* inWChars =
reinterpret_cast<const wchar_t*
> (inMBChars);
635 size_t inWCharCnt = (inMBCharCnt /
sizeof (wchar_t));
636 *outCharCnt = inWCharCnt;
637 (void)::memcpy (outChars, inWChars, inWCharCnt *
sizeof (
wchar_t));
639 case WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN: {
640 const wchar_t* inWChars =
reinterpret_cast<const wchar_t*
> (inMBChars);
641 size_t inWCharCnt = (inMBCharCnt /
sizeof (wchar_t));
642 *outCharCnt = inWCharCnt;
643 for (
size_t i = 0; i < inWCharCnt; ++i) {
644 wchar_t c = inWChars[i];
646 c = ((c & 0xff) << 8) + (c >> 8);
650 case WellKnownCodePages::kUTF8: {
651 *outCharCnt =
UTFConvert::kThe.
Convert (span{inMBChars, inMBChars + inMBCharCnt}, span{outChars, *outCharCnt}).fTargetProduced;
654#if qStroika_Foundation_Common_Platform_Windows
655 Characters::Platform::Windows::PlatformCodePageConverter{fCodePage}.MapToUNICODE (inMBChars, inMBCharCnt,
656 SAFE_WIN_WCHART_CAST_ (outChars), outCharCnt);
663#if qStroika_Foundation_Common_Platform_Windows && 0
666 size_t tstCharCnt = *outCharCnt;
668 Characters::Platform::Windows::PlatformCodePageConverter{fCodePage}.MapToUNICODE (inMBChars, inMBCharCnt, tstBuf, &tstCharCnt);
669 Assert (tstCharCnt == *outCharCnt);
670 Assert (memcmp (tstBuf, outChars,
sizeof (
wchar_t) * tstCharCnt) == 0);
675void CodePageConverter::MapToUNICODE (
const char* inMBChars,
size_t inMBCharCnt,
char32_t* outChars,
size_t* outCharCnt)
const
679 MapToUNICODE (inMBChars, inMBCharCnt, tmpBuf.data (), outCharCnt);
680 for (
size_t i = 0; i < *outCharCnt; ++i) {
681 outChars[i] = tmpBuf[i];
685void CodePageConverter::MapFromUNICODE (
const char16_t* inChars,
size_t inCharCnt,
char* outChars,
size_t* outCharCnt)
const
687 Require (inCharCnt == 0 or inChars !=
nullptr);
689 Require (*outCharCnt == 0 or outChars !=
nullptr);
691 [[maybe_unused]]
size_t outBufferSize = *outCharCnt;
692#if qStroika_Foundation_Debug_AssertionsChecked && qStroika_Foundation_Common_Platform_Windows
693 size_t countOfBOMCharsAdded = 0;
697 case WellKnownCodePages::kANSI:
698 TableDrivenCodePageConverter_<WellKnownCodePages::kANSI>::MapFromUNICODE (inChars, inCharCnt, outChars, outCharCnt);
700 case WellKnownCodePages::kMAC:
701 TableDrivenCodePageConverter_<WellKnownCodePages::kMAC>::MapFromUNICODE (inChars, inCharCnt, outChars, outCharCnt);
703 case WellKnownCodePages::kPC:
704 TableDrivenCodePageConverter_<WellKnownCodePages::kPC>::MapFromUNICODE (inChars, inCharCnt, outChars, outCharCnt);
706 case WellKnownCodePages::kPCA:
707 TableDrivenCodePageConverter_<WellKnownCodePages::kPCA>::MapFromUNICODE (inChars, inCharCnt, outChars, outCharCnt);
709 case WellKnownCodePages::kUNICODE_WIDE: {
710 if (*outCharCnt >= 2) {
711 wchar_t* outWBytes =
reinterpret_cast<wchar_t*
> (outChars);
712 size_t outByteCount = (inCharCnt *
sizeof (wchar_t));
713 if (GetHandleBOM ()) {
717 *outCharCnt = outByteCount;
718 if (GetHandleBOM ()) {
719 outChars[0] =
'\xff';
720 outChars[1] =
'\xfe';
721#if qStroika_Foundation_Debug_AssertionsChecked && qStroika_Foundation_Common_Platform_Windows
722 countOfBOMCharsAdded = 2;
725 (void)::memcpy (outWBytes, inChars, inCharCnt *
sizeof (
wchar_t));
731 case WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN: {
732 if (*outCharCnt >= 2) {
733 wchar_t* outWBytes =
reinterpret_cast<wchar_t*
> (outChars);
734 size_t outByteCount = (inCharCnt *
sizeof (wchar_t));
735 if (GetHandleBOM ()) {
739 *outCharCnt = outByteCount;
740 if (GetHandleBOM ()) {
741 outChars[0] =
'\xfe';
742 outChars[1] =
'\xff';
743#if qStroika_Foundation_Debug_AssertionsChecked && qStroika_Foundation_Common_Platform_Windows
744 countOfBOMCharsAdded = 2;
747 for (
size_t i = 0; i < inCharCnt; ++i) {
748 wchar_t c = inChars[i];
750 c = ((c & 0xff) << 8) + (c >> 8);
758 case WellKnownCodePages::kUTF8: {
759 char* useOutChars = outChars;
760 size_t useOutCharCount = *outCharCnt;
761 if (GetHandleBOM ()) {
762 if (*outCharCnt >= 3) {
764 useOutCharCount -= 3;
765 reinterpret_cast<unsigned char*
> (outChars)[0] = 0xef;
766 reinterpret_cast<unsigned char*
> (outChars)[1] = 0xbb;
767 reinterpret_cast<unsigned char*
> (outChars)[2] = 0xbf;
768#if qStroika_Foundation_Debug_AssertionsChecked && qStroika_Foundation_Common_Platform_Windows
769 countOfBOMCharsAdded = 3;
777 UTFConvert::kThe.
Convert (span{inChars, inCharCnt}, span{
reinterpret_cast<char8_t*
> (useOutChars), useOutCharCount}).fTargetProduced;
778 if (GetHandleBOM ()) {
779 useOutCharCount += 3;
781 *outCharCnt = useOutCharCount;
784#if qStroika_Foundation_Common_Platform_Windows
785 Characters::Platform::Windows::PlatformCodePageConverter{fCodePage}.MapFromUNICODE (SAFE_WIN_WCHART_CAST_ (inChars), inCharCnt,
786 outChars, outCharCnt);
793#if qStroika_Foundation_Common_Platform_Windows
796 size_t win32TstCharCnt = outBufferSize;
799 Characters::Platform::Windows::PlatformCodePageConverter{fCodePage}.MapFromUNICODE (SAFE_WIN_WCHART_CAST_ (inChars), inCharCnt,
800 win32TstBuf.data (), &win32TstCharCnt);
805#if qStroika_Foundation_Debug_AssertionsChecked
806 Assert ((win32TstCharCnt + countOfBOMCharsAdded) == *outCharCnt or outChars[0] ==
'?');
807 Assert (memcmp (win32TstBuf.data (), outChars + countOfBOMCharsAdded, win32TstCharCnt) == 0 or outChars[0] ==
'?');
813void CodePageConverter::MapFromUNICODE (
const char32_t* inChars,
size_t inCharCnt,
char* outChars,
size_t* outCharCnt)
const
815 char* useOutChars = outChars;
816 size_t useOutCharCount = *outCharCnt;
817 bool addBOM = GetHandleBOM () and useOutCharCount >= 3;
820 useOutCharCount -= 3;
821 reinterpret_cast<unsigned char*
> (outChars)[0] = 0xef;
822 reinterpret_cast<unsigned char*
> (outChars)[1] = 0xbb;
823 reinterpret_cast<unsigned char*
> (outChars)[2] = 0xbf;
825 *outCharCnt =
UTFConvert::kThe.
Convert (span{inChars, inCharCnt}, span{
reinterpret_cast<char8_t*
> (useOutChars), useOutCharCount}).fTargetProduced;
830DISABLE_COMPILER_MSC_WARNING_END (4996);
831DISABLE_COMPILER_GCC_WARNING_END (
"GCC diagnostic ignored \"-Wdeprecated-declarations\"");
832DISABLE_COMPILER_CLANG_WARNING_END (
"clang diagnostic ignored \"-Wdeprecated-declarations\"");
840#if qStroika_Foundation_Common_Platform_Windows
841 shared_ptr<set<CodePage>> s_EnumCodePagesProc_Accumulator_;
842 BOOL FAR PASCAL EnumCodePagesProc_ (LPTSTR lpCodePageString)
844 s_EnumCodePagesProc_Accumulator_->insert (_ttoi (lpCodePageString));
850CodePagesInstalled::CodePagesInstalled ()
852 Assert (fCodePages_.size () == 0);
854 shared_ptr<set<CodePage>> accum = make_shared<set<CodePage>> ();
855#if qStroika_Foundation_Common_Platform_Windows
856 static mutex sCritSec_;
858 [[maybe_unused]] lock_guard critSec{sCritSec_};
859 Assert (s_EnumCodePagesProc_Accumulator_.get () ==
nullptr);
860 s_EnumCodePagesProc_Accumulator_ = accum;
861 ::EnumSystemCodePages (EnumCodePagesProc_, CP_INSTALLED);
862 s_EnumCodePagesProc_Accumulator_.reset ();
866 accum->insert (WellKnownCodePages::kUNICODE_WIDE);
867 accum->insert (WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN);
868 accum->insert (WellKnownCodePages::kUTF8);
869 fCodePages_ = vector<CodePage>{accum->begin (), accum->end ()};
877DISABLE_COMPILER_MSC_WARNING_START (4996);
878DISABLE_COMPILER_GCC_WARNING_START (
"GCC diagnostic ignored \"-Wdeprecated-declarations\"");
879DISABLE_COMPILER_CLANG_WARNING_START (
"clang diagnostic ignored \"-Wdeprecated-declarations\"");
880CodePage CodePagesGuesser::Guess (
const void* input,
size_t nBytes, Confidence* confidence,
size_t* bytesFromFrontToStrip)
882 if (confidence !=
nullptr) {
883 *confidence = Confidence::eLow;
885 if (bytesFromFrontToStrip !=
nullptr) {
886 *bytesFromFrontToStrip = 0;
889 unsigned char c0 =
reinterpret_cast<const unsigned char*
> (input)[0];
890 unsigned char c1 =
reinterpret_cast<const unsigned char*
> (input)[1];
891 if (c0 == 0xff and c1 == 0xfe) {
892 if (confidence !=
nullptr) {
893 *confidence = Confidence::eHigh;
895 if (bytesFromFrontToStrip !=
nullptr) {
896 *bytesFromFrontToStrip = 2;
898 return WellKnownCodePages::kUNICODE_WIDE;
900 if (c0 == 0xfe and c1 == 0xff) {
901 if (confidence !=
nullptr) {
902 *confidence = Confidence::eHigh;
904 if (bytesFromFrontToStrip !=
nullptr) {
905 *bytesFromFrontToStrip = 2;
907 return WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN;
910 unsigned char c2 =
reinterpret_cast<const unsigned char*
> (input)[2];
911 if (c0 == 0xef and c1 == 0xbb and c2 == 0xbf) {
912 if (confidence !=
nullptr) {
913 *confidence = Confidence::eHigh;
915 if (bytesFromFrontToStrip !=
nullptr) {
916 *bytesFromFrontToStrip = 3;
918 return WellKnownCodePages::kUTF8;
926 if (confidence !=
nullptr) {
927 *confidence = Confidence::eLow;
929 return Characters::GetDefaultSDKCodePage ();
931DISABLE_COMPILER_MSC_WARNING_END (4996);
932DISABLE_COMPILER_GCC_WARNING_END (
"GCC diagnostic ignored \"-Wdeprecated-declarations\"");
933DISABLE_COMPILER_CLANG_WARNING_END (
"clang diagnostic ignored \"-Wdeprecated-declarations\"");
940CodePagePrettyNameMapper::CodePageNames CodePagePrettyNameMapper::sCodePageNames_ = CodePagePrettyNameMapper::MakeDefaultCodePageNames ();
942CodePagePrettyNameMapper::CodePageNames CodePagePrettyNameMapper::MakeDefaultCodePageNames ()
944 CodePageNames codePageNames;
945 codePageNames.fUNICODE_WIDE = L
"UNICODE {wide characters}"sv;
946 codePageNames.fUNICODE_WIDE_BIGENDIAN = L
"UNICODE {wide characters - big endian}"sv;
947 codePageNames.fANSI = L
"ANSI (1252)"sv;
948 codePageNames.fMAC = L
"MAC (2)"sv;
949 codePageNames.fPC = L
"IBM PC United States code page (437)"sv;
950 codePageNames.fSJIS = L
"Japanese SJIS {932}"sv;
951 codePageNames.fUTF8 = L
"UNICODE {UTF-8}"sv;
952 codePageNames.f850 = L
"Latin I - MS-DOS Multilingual (850)"sv;
953 codePageNames.f851 = L
"Latin II - MS-DOS Slavic (850)"sv;
954 codePageNames.f866 = L
"Russian - MS-DOS (866)"sv;
955 codePageNames.f936 = L
"Chinese {Simplfied} (936)"sv;
956 codePageNames.f949 = L
"Korean (949)"sv;
957 codePageNames.f950 = L
"Chinese {Traditional} (950)"sv;
958 codePageNames.f1250 = L
"Eastern European Windows (1250)"sv;
959 codePageNames.f1251 = L
"Cyrilic (1251)"sv;
960 codePageNames.f10000 = L
"Roman {Macintosh} (10000)"sv;
961 codePageNames.f10001 = L
"Japanese {Macintosh} (10001)"sv;
962 codePageNames.f50220 = L
"Japanese JIS (50220)"sv;
963 return codePageNames;
966wstring CodePagePrettyNameMapper::GetName (
CodePage cp)
969 case WellKnownCodePages::kUNICODE_WIDE:
970 return sCodePageNames_.fUNICODE_WIDE;
971 case WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN:
972 return sCodePageNames_.fUNICODE_WIDE_BIGENDIAN;
973 case WellKnownCodePages::kANSI:
974 return sCodePageNames_.fANSI;
975 case WellKnownCodePages::kMAC:
976 return sCodePageNames_.fMAC;
977 case WellKnownCodePages::kPC:
978 return sCodePageNames_.fPC;
979 case WellKnownCodePages::kSJIS:
980 return sCodePageNames_.fSJIS;
981 case WellKnownCodePages::kUTF8:
982 return sCodePageNames_.fUTF8;
984 return sCodePageNames_.f850;
986 return sCodePageNames_.f851;
988 return sCodePageNames_.f866;
990 return sCodePageNames_.f936;
992 return sCodePageNames_.f949;
994 return sCodePageNames_.f950;
996 return sCodePageNames_.f1250;
997 case WellKnownCodePages::kCyrilic:
998 return sCodePageNames_.f1251;
1000 return sCodePageNames_.f10000;
1002 return sCodePageNames_.f10001;
1004 return sCodePageNames_.f50220;
1006 return Characters::CString::Format (L
"%d", cp);
1016void Characters::WideStringToNarrow (
const wchar_t* wsStart,
const wchar_t* wsEnd,
CodePage codePage,
string* intoResult)
1019 Require (wsStart <= wsEnd);
1020#if qStroika_Foundation_Common_Platform_Windows
1021 Platform::Windows::WideStringToNarrow (wsStart, wsEnd, codePage, intoResult);
1034 void PortableNarrowStringToWide_ (
const char* sStart,
const char* sEnd,
CodePage codePage, wstring* intoResult)
1037 Require (sStart <= sEnd);
1038 size_t inSize = sEnd - sStart;
1039 CodePageConverter cc{codePage};
1041 size_t outSizeBuf = cc.MapToUNICODE_QuickComputeOutBufSize (sStart, inSize);
1042 intoResult->resize (outSizeBuf);
1043 size_t actualOutSize = 0;
1045 actualOutSize = outSizeBuf;
1046 cc.MapToUNICODE (sStart, inSize,
Containers::Start (*intoResult), &actualOutSize);
1047 if (intoResult->size () != actualOutSize) {
1049 Assert (intoResult->size () > actualOutSize);
1050 intoResult->resize (actualOutSize);
1056void Characters::NarrowStringToWide (
const char* sStart,
const char* sEnd,
CodePage codePage, wstring* intoResult)
1059 Require (sStart <= sEnd);
1060#if qStroika_Foundation_Common_Platform_Windows
1061 Platform::Windows::NarrowStringToWide (sStart, sEnd, codePage, intoResult);
1073DISABLE_COMPILER_MSC_WARNING_START (4996);
1074DISABLE_COMPILER_GCC_WARNING_START (
"GCC diagnostic ignored \"-Wdeprecated-declarations\"");
1075DISABLE_COMPILER_CLANG_WARNING_START (
"clang diagnostic ignored \"-Wdeprecated-declarations\"");
1076wstring Characters::MapUNICODETextWithMaybeBOMTowstring (
const char* start,
const char* end)
1079 Require (start <= end);
1084 size_t outBufSize = end - start;
1086 size_t outCharCount = outBufSize;
1087 MapSBUnicodeTextWithMaybeBOMToUNICODE (start, end - start, wideBuf.data (), &outCharCount);
1088 Assert (outCharCount <= outBufSize);
1089 if (outCharCount == 0) {
1096 return wstring{wideBuf.data (), wideBuf[outCharCount - 1] ==
'\0' ? (outCharCount - 1) : outCharCount};
1099DISABLE_COMPILER_MSC_WARNING_END (4996);
1100DISABLE_COMPILER_GCC_WARNING_END (
"GCC diagnostic ignored \"-Wdeprecated-declarations\"");
1101DISABLE_COMPILER_CLANG_WARNING_END (
"clang diagnostic ignored \"-Wdeprecated-declarations\"");
1108DISABLE_COMPILER_MSC_WARNING_START (4996);
1109DISABLE_COMPILER_GCC_WARNING_START (
"GCC diagnostic ignored \"-Wdeprecated-declarations\"");
1110DISABLE_COMPILER_CLANG_WARNING_START (
"clang diagnostic ignored \"-Wdeprecated-declarations\"");
1111vector<byte> Characters::MapUNICODETextToSerializedFormat (
const wchar_t* start,
const wchar_t* end,
CodePage useCP)
1114 CodePageConverter cpc{useCP, CodePageConverter::eHandleBOM};
1115 size_t outCharCount = cpc.MapFromUNICODE_QuickComputeOutBufSize (start, end - start);
1117 cpc.MapFromUNICODE (start, end - start, buf.data (), &outCharCount);
1118 const byte* bs =
reinterpret_cast<const byte*
> (
static_cast<const char*
> (buf));
1119 return vector<byte>{bs, bs + outCharCount};
1121DISABLE_COMPILER_MSC_WARNING_END (4996);
1122DISABLE_COMPILER_GCC_WARNING_END (
"GCC diagnostic ignored \"-Wdeprecated-declarations\"");
1123DISABLE_COMPILER_CLANG_WARNING_END (
"clang diagnostic ignored \"-Wdeprecated-declarations\"");
#define qStroika_Foundation_Debug_AssertionsChecked
The qStroika_Foundation_Debug_AssertionsChecked flag determines if assertions are checked and validat...
#define RequireNotNull(p)
CodeCvt unifies byte <-> unicode conversions, vaguely inspired by (and wraps) std::codecvt,...
nonvirtual STRINGISH Bytes2String(span< const byte > from) const
nonvirtual BLOBISH String2Bytes(span< const CHAR_T > from) const
virtual const char * what() const noexcept override
nonvirtual ConversionResult Convert(span< const SRC_T > source, span< TRG_T > target) const
Convert between UTF-N encoded strings/spans (including the special case of ASCII, and Latin1) (e....
static const UTFConvert kThe
Nearly always use this default UTFConvert.
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
CONTAINER::value_type * Start(CONTAINER &c)
For a contiguous container (such as a vector or basic_string) - find the pointer to the start of the ...
void Throw(T &&e2Throw)
identical to builtin C++ 'throw' except that it does helpful, type dependent DbgTrace() messages firs...