Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
UTFConvert.inl
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#if qStroika_Foundation_Common_Platform_Windows
5#include <windows.h>
6#endif
7
10//#include "Stroika/Foundation/Memory/BlockAllocated.h" // causes include embrace problems
11#include "Stroika/Foundation/Memory/Common.h"
13
15
16 namespace Private_ {
17 // Memory::ValueOf() - but avoid #include
18 template <typename T>
19 inline T ValueOf_ (const optional<T>& t)
20 {
21 Require (t);
22 return *t;
23 }
24 }
25
26 /*
27 ********************************************************************************
28 *************************** Characters::UTFConvert *****************************
29 ********************************************************************************
30 */
31#if qCompilerAndStdLib_DefaultMemberInitializerNeededEnclosingForDefaultFunArg_Buggy
32 constexpr UTFConvert::UTFConvert ()
33 : UTFConvert{Options{}}
34 {
35 }
36#endif
37 constexpr UTFConvert::UTFConvert (const Options& options)
38 : fOriginalOptions_{options}
39 , fUsingOptions{options}
40 {
41#if qStroika_Foundation_Common_Platform_Windows
42 if (fUsingOptions.fPreferredImplementation == nullopt and options.fInvalidCharacterReplacement == nullopt) {
43 fUsingOptions.fPreferredImplementation = Options::Implementation::eWindowsAPIWide2FromMultibyte;
44 }
45#endif
46 if (fUsingOptions.fPreferredImplementation == nullopt) {
47 fUsingOptions.fPreferredImplementation = Options::Implementation::eStroikaPortable;
48 }
49 if (options.fInvalidCharacterReplacement) {
50 // For now, thats all that supports fInvalidCharacterReplacement, but could do others pretty easily - probably
51 // Windows Wide2Multipbyte etc doesnt work with this, so dont use there.
52 // - LGP 2023-08-07
53 Require (fUsingOptions.fPreferredImplementation == Options::Implementation::eStroikaPortable);
54 }
55 }
56 inline constexpr auto UTFConvert::GetOptions () const -> Options
57 {
58 return fOriginalOptions_;
59 }
60 inline constexpr UTFConvert UTFConvert::kThe;
61 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
62 constexpr optional<size_t> UTFConvert::NextCharacter (span<const CHAR_T> s)
63 {
64 if (s.empty ()) {
65 return optional<size_t>{};
66 }
67 // Logic based on table from https://en.wikipedia.org/wiki/UTF-8#Encoding
68 if constexpr (same_as<CHAR_T, ASCII> or same_as<CHAR_T, Latin1>) {
69 return 1;
70 }
71 else if constexpr (same_as<CHAR_T, char8_t>) {
72 auto i = s.begin ();
73 // starting first byte
74 uint8_t firstByte = static_cast<uint8_t> (*i);
75 if (Memory::BitSubstring (firstByte, 7, 8) == 0b0) {
76 return 1;
77 }
78 if (i != s.end ()) {
79 ++i;
80 if (Memory::BitSubstring (firstByte, 5, 8) == 0b110) {
81 return i == s.end () ? optional<size_t>{} : 2;
82 }
83 }
84 if (i != s.end ()) {
85 ++i;
86 if (Memory::BitSubstring (firstByte, 4, 8) == 0b1110) {
87 return i == s.end () ? optional<size_t>{} : 3;
88 }
89 }
90 if (i != s.end ()) {
91 ++i;
92 if (Memory::BitSubstring (firstByte, 3, 8) == 0b11110) {
93 return i == s.end () ? optional<size_t>{} : 4;
94 }
95 }
96 return nullopt;
97 }
98 else if constexpr (sizeof (CHAR_T) == 2) {
99 // @todo - must find docs
100 auto i = s.begin ();
101 // starting first char16_t
103 return nullopt;
104 }
105 else if constexpr (sizeof (CHAR_T) == 4) {
106 return 1;
107 }
109 return nullopt;
110 }
111 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
112 constexpr optional<size_t> UTFConvert::ComputeCharacterLength (span<const CHAR_T> s)
113 {
114 if constexpr (sizeof (CHAR_T) == 4) {
115 return s.size ();
116 }
117 else {
118 size_t charCount{};
119 size_t i = 0;
120 while (optional<size_t> nOctets = NextCharacter (s.subspan (i))) {
121 ++charCount;
122 i += *nOctets;
123 if (i == s.size ()) [[unlikely]] {
124 break; // used up all the input span, so we're done
125 }
126 }
127 if (s.size () == i) [[likely]] {
128 return charCount;
129 }
130 else {
131 return nullopt; // didn't end evenly at end of span, so something went wrong
132 }
133 }
134 }
135 template <IUNICODECanUnambiguouslyConvertTo TO, IUNICODECanUnambiguouslyConvertFrom FROM>
136 constexpr size_t UTFConvert::ComputeTargetBufferSize (size_t srcSize)
137 {
138 if constexpr (sizeof (FROM) == sizeof (TO)) {
139 if constexpr (same_as<FROM, TO>) {
140 return srcSize; // not super useful to do this conversion, but given how if constexpr works/evaluates, its often important than this code compiles, even if it doesn't execute
141 }
142 if constexpr (same_as<FROM, Latin1> and same_as<TO, char8_t>) {
143 return srcSize * 2; // some latin1 characters - such as 0xb5 - MICRO SIGN - goto two UTF bytes
144 }
145 return srcSize; // I think this is right, but less certain than before I noticed the Latin1/UTF issue above
146 }
147 if constexpr (sizeof (FROM) == 1) {
148 // worst case is each src byte is a character: for small buffers, not worth computing tighter limit but for larger, could
149 // plausibly avoid a malloc, and even without, more likely to avoid wasted RAM/fragmentation for larger allocations
150 return srcSize;
151 }
152 else if constexpr (sizeof (FROM) == 2) {
153 if constexpr (sizeof (TO) == 1) {
154 // From https://stackoverflow.com/questions/9533258/what-is-the-maximum-number-of-bytes-for-a-utf-8-encoded-character
155 // answer if translating only characters from UTF-16 to UTF-8: 4 bytes
156 // @todo fix this is really smaller... I think 3 - look at https://en.wikipedia.org/wiki/UTF-8 more closely
157
158 // also - for larger strings - MIGHT be worth a closer estimate?
159 return 4 * srcSize;
160 }
161 else {
162 Require (sizeof (TO) == 4);
163 return srcSize; // worst case is no surrogate pairs
164 }
165 }
166 else if constexpr (sizeof (FROM) == 4) {
167 if constexpr (sizeof (TO) == 1) {
168 // From https://stackoverflow.com/questions/9533258/what-is-the-maximum-number-of-bytes-for-a-utf-8-encoded-character
169 // the maximum number of bytes for a character in UTF-8 is ... 4 (really 4 safe now so use that - was 6 bytes)
170 return 4 * srcSize;
171 }
172 else if constexpr (sizeof (TO) == 2) {
173 return 2 * srcSize;
174 }
175 else {
176 return srcSize;
177 }
178 }
179 else {
181 return 0;
182 }
183 }
184 template <IUNICODECanUnambiguouslyConvertTo TO, IUNICODECanUnambiguouslyConvertFrom FROM>
185 constexpr size_t UTFConvert::ComputeTargetBufferSize (span<const FROM> src)
186 requires (not is_const_v<TO>)
187 {
188 // NOTE - most of these routines could be (easily) optimized to actually compute the number
189 // of characters, instead of using an upper bound, but that would involve walking the source
190 // and counting characters. NOT clearly worth while. MAYBE as a hueristic, if src > 50 or so
191 // its worth counting, but anything smaller will just end up in a fixed sized buffer, so the exact
192 // count doesn't matter
193 // @small performance todo!!!!
194
195 // This code just checks cases where we peek at characters, then falls through to ComputeTargetBufferSize (size_t)
196 if constexpr (sizeof (FROM) != sizeof (TO)) {
197 if constexpr (sizeof (FROM) == 1) {
198 // worst case is each src byte is a character: for small buffers, not worth computing tighter limit but for larger, could
199 // plausibly avoid a malloc, and even without, more likely to avoid wasted RAM/fragmentation for larger allocations
200 if constexpr (sizeof (TO) == 4) {
201 if (src.size () * sizeof (TO) > Memory::kStackBuffer_TargetInlineByteBufferSize) {
202 if (auto i = ComputeCharacterLength (src)) {
203 return *i;
204 }
205 }
206 }
207 }
208 else if constexpr (sizeof (FROM) == 4) {
209 if constexpr (sizeof (TO) == 1) {
210 // From https://stackoverflow.com/questions/9533258/what-is-the-maximum-number-of-bytes-for-a-utf-8-encoded-character
211 // the maximum number of bytes for a character in UTF-8 is ... 4 (really 4 safe now so use that - was 6 bytes)
212
213 // @todo this is probably worth walking the characters and doing a better estimate
214 if (src.size () * 4 > Memory::kStackBuffer_TargetInlineByteBufferSize) {
215 // walk the characters, and see how much space each will use when encoded
216 size_t sz{};
217 for (auto c : src) {
218 if (isascii (static_cast<char32_t> (c))) {
219 ++sz;
220 }
221 else {
222 sz += 4; // @todo look at cases/ranges - not too hard to do better than this - very frequently just two bytes
223 }
224 }
225 return sz;
226 }
227 }
228 }
229 }
230 return ComputeTargetBufferSize<TO, FROM> (src.size ());
231 }
232 template <IUNICODECanUnambiguouslyConvertTo TO, IUNICODECanUnambiguouslyConvertFrom FROM>
233 constexpr size_t UTFConvert::ComputeTargetBufferSize (span<FROM> src)
234 requires (not is_const_v<TO>)
235 {
236 return ComputeTargetBufferSize<TO> (Memory::ConstSpan (src));
237 }
238 template <IUNICODECanUnambiguouslyConvertFrom CHAR_T>
239 constexpr bool UTFConvert::AllFitsInTwoByteEncoding (span<const CHAR_T> s) noexcept
240 {
241 if constexpr (same_as<CHAR_T, ASCII> or same_as<CHAR_T, Latin1>) {
242 return true;
243 }
244 // see https://en.wikipedia.org/wiki/UTF-16
245 // @todo - THIS IS VERY WRONG - and MUCH MORE COMPLEX - but will only return false negatives so OK to start
246
247 // note - tried to simplify with conditional_t but both sides evaluated
248 if constexpr (same_as<remove_cv_t<CHAR_T>, Character>) {
249 for (Character c : s) {
250 if (c.GetCharacterCode () > 0xd7ff) [[unlikely]] {
251 return false;
252 }
253 }
254 }
255 else if constexpr (same_as<remove_cv_t<CHAR_T>, Latin1>) {
256 return true;
257 }
258 else if constexpr (same_as<remove_cv_t<CHAR_T>, char8_t>) {
259 const char8_t* b = s.data ();
260 const char8_t* e = b + s.size ();
261 for (const char8_t* i = b; i < e;) {
262 auto n = NextCharacter (span<const char8_t>{i, e});
263 if (not n.has_value () or *n > 2) [[unlikely]] {
264 return false;
265 }
266 i += *n;
267 }
268 }
269 else {
270 for (CHAR_T c : s) {
271 if (static_cast<make_unsigned_t<CHAR_T>> (c) > 0xd7ff) [[unlikely]] {
272 return false;
273 }
274 }
275 }
276 return true;
277 }
278 template <IUNICODECanUnambiguouslyConvertFrom SRC_T, IUNICODECanUnambiguouslyConvertTo TRG_T>
279 inline auto UTFConvert::Convert (span<const SRC_T> source, span<TRG_T> target) const -> ConversionResult
280 {
281 Require ((target.size () >= ComputeTargetBufferSize<TRG_T> (source)));
282 ConversionResultWithStatus result = ConvertQuietly (source, target);
283 ThrowIf_ (result.fStatus, result.fSourceConsumed);
284 return result; // slice - no need to return 'status' - we throw on any status but success
285 }
286 template <IUNICODECanUnambiguouslyConvertFrom SRC_T, IUNICODECanUnambiguouslyConvertTo TRG_T>
287 inline auto UTFConvert::Convert (span<SRC_T> source, span<TRG_T> target) const -> ConversionResult
288 {
289 return Convert (Memory::ConstSpan (source), target);
290 }
291 template <typename TO, typename FROM>
292 inline TO UTFConvert::Convert (const FROM& from) const
293 requires ((same_as<TO, string> or same_as<TO, wstring> or same_as<TO, u8string> or same_as<TO, u16string> or same_as<TO, u32string>) and
294 (same_as<FROM, string> or same_as<FROM, wstring> or same_as<FROM, u8string> or same_as<FROM, u16string> or same_as<FROM, u32string>))
295 {
296 if constexpr (same_as<TO, FROM>) {
297 return from;
298 }
299 else {
300 size_t cvtBufSize = ComputeTargetBufferSize<typename TO::value_type> (span{from});
301 Memory::StackBuffer<typename TO::value_type> buf{Memory::eUninitialized, cvtBufSize};
302 return TO{buf.begin (), get<1> (Convert (span{from}, span{buf}))};
303 }
304 }
305 template <IUNICODECanUnambiguouslyConvertFrom SRC_T, IUNICODECanUnambiguouslyConvertTo TRG_T>
306 inline span<TRG_T> UTFConvert::ConvertSpan (span<const SRC_T> source, span<TRG_T> target) const
307 requires (not is_const_v<TRG_T>)
308 {
309 return span{target.data (), Convert (source, target).fTargetProduced};
310 }
311 template <IUNICODECanUnambiguouslyConvertFrom SRC_T, IUNICODECanUnambiguouslyConvertTo TRG_T>
312 inline span<TRG_T> UTFConvert::ConvertSpan (span<SRC_T> source, span<TRG_T> target) const
313 requires (not is_const_v<TRG_T>)
314 {
315 return ConvertSpan (Memory::ConstSpan (source), target);
316 }
317 template <IUNICODECanUnambiguouslyConvertFrom SRC_T, IUNICODECanUnambiguouslyConvertTo TRG_T>
318 inline auto UTFConvert::ConvertQuietly (span<const SRC_T> source, span<TRG_T> target) const -> ConversionResultWithStatus
319 requires (not is_const_v<TRG_T>)
320 {
321 Require ((target.size () >= ComputeTargetBufferSize<TRG_T> (source)));
322 using PRIMITIVE_SRC_T = typename decltype (this->ConvertToPrimitiveSpan_ (source))::value_type;
323 using PRIMITIVE_TRG_T = typename decltype (this->ConvertToPrimitiveSpan_ (target))::value_type;
324 if constexpr (same_as<SRC_T, TRG_T>) {
325 Memory::CopyBytes (source, target);
326 return ConversionResultWithStatus{{.fSourceConsumed = source.size (), .fTargetProduced = source.size ()}, ConversionStatusFlag::ok};
327 }
328 else if constexpr (same_as<PRIMITIVE_SRC_T, PRIMITIVE_TRG_T> and sizeof (PRIMITIVE_SRC_T) != 1) {
329 static_assert (not same_as<SRC_T, TRG_T>);
330 static_assert (sizeof (SRC_T) == sizeof (TRG_T)); // I THINK - else this needs rethinking...
331 Memory::CopyBytes (Memory::SpanBytesCast<span<const TRG_T>> (source), target);
332 return ConversionResultWithStatus{{.fSourceConsumed = source.size (), .fTargetProduced = source.size ()}, ConversionStatusFlag::ok};
333 }
334 else if constexpr (same_as<SRC_T, Latin1>) {
335 if constexpr (same_as<TRG_T, char8_t>) {
336 // Based on https://stackoverflow.com/questions/4059775/convert-iso-8859-1-strings-to-utf-8-in-c-c
337 char8_t* outPtr = target.data ();
338 for (const SRC_T ch : source) {
339 if (ch < 0x80) {
340 *outPtr++ = ch;
341 }
342 else {
343 *outPtr++ = 0xc0 | ch >> 6;
344 *outPtr++ = 0x80 | (ch & 0x3f);
345 }
346 }
347 Assert (outPtr <= target.data () + target.size ());
348 return ConversionResultWithStatus{
349 {.fSourceConsumed = source.size (), .fTargetProduced = static_cast<size_t> (outPtr - target.data ())}, ConversionStatusFlag::ok};
350 }
351 else {
352 // ALL TRG_T (but UTF8 and ASCII) have Latin1 as a strict subset so simply copy
353 Memory::CopySpanData (source, target);
354 return ConversionResultWithStatus{{.fSourceConsumed = source.size (), .fTargetProduced = source.size ()}, ConversionStatusFlag::ok};
355 }
356 }
357 else {
358 switch (Private_::ValueOf_ (fUsingOptions.fPreferredImplementation)) {
359 case Options::Implementation::eStroikaPortable: {
360 return ConvertQuietly_StroikaPortable_ (fUsingOptions.fInvalidCharacterReplacement, ConvertToPrimitiveSpan_ (source),
361 ConvertToPrimitiveSpan_ (target));
362 }
363#if qStroika_Foundation_Common_Platform_Windows
364 case Options::Implementation::eWindowsAPIWide2FromMultibyte: {
365 if constexpr ((sizeof (SRC_T) == 1 and sizeof (TRG_T) == 2) or (sizeof (SRC_T) == 2 and sizeof (TRG_T) == 1)) {
366 return ConvertQuietly_Win32_ (ConvertToPrimitiveSpan_ (source), ConvertToPrimitiveSpan_ (target));
367 }
368 }
369#endif
370#if __has_include("boost/locale/encoding_utf.hpp")
371 case Options::Implementation::eBoost_Locale: {
372 if constexpr (same_as<SRC_T, char8_t> and same_as<TRG_T, char16_t>) {
373 return ConvertQuietly_boost_locale_ (ConvertToPrimitiveSpan_ (source), ConvertToPrimitiveSpan_ (target));
374 }
375 }
376#endif
377 case Options::Implementation::eCodeCVT: {
378 if constexpr ((same_as<SRC_T, char16_t> or same_as<SRC_T, char32_t>) and same_as<TRG_T, char8_t>) {
379 return ConvertQuietly_codeCvt_ (source, target);
380 }
381 if constexpr (same_as<SRC_T, char8_t> and (same_as<TRG_T, char16_t> or same_as<SRC_T, char32_t>)) {
382 return ConvertQuietly_codeCvt_ (source, target);
383 }
384 }
385 }
386 return ConvertQuietly_StroikaPortable_ (fUsingOptions.fInvalidCharacterReplacement, ConvertToPrimitiveSpan_ (source),
387 ConvertToPrimitiveSpan_ (target)); // default if preferred not available
388 }
389 }
390 template <IUNICODECanUnambiguouslyConvertTo TRG_T, IUNICODECanUnambiguouslyConvertFrom SRC_T>
391 size_t UTFConvert::ConvertOffset (span<const SRC_T> source, size_t srcIndex) const
392 {
393 static_assert (not is_const_v<TRG_T>);
394 // needlessly costly way to compute, but hopefully adequate for now -- LGP 2022-12-27
395 // @todo redo using CountCharacters...
396 Require (srcIndex <= source.size ());
397 span<const SRC_T> fakeSrc{source.begin (), srcIndex};
398 Memory::StackBuffer<TRG_T> fakeOut{ComputeTargetBufferSize<TRG_T> (fakeSrc)};
399 ConversionResult r = Convert (fakeSrc, span{fakeOut});
400 return r.fTargetProduced;
401 }
402 template <IUNICODECanUnambiguouslyConvertFrom FromT>
403 constexpr auto UTFConvert::ConvertToPrimitiveSpan_ (span<FromT> f) -> span<CompatibleT_<FromT>>
404 {
405 return span{(CompatibleT_<FromT>*)f.data (), f.size ()};
406 }
407#if qStroika_Foundation_Common_Platform_Windows
408 inline auto UTFConvert::ConvertQuietly_Win32_ (span<const char8_t> source, span<char16_t> target) -> ConversionResultWithStatus
409 {
410 if (source.begin () == source.end ()) {
411 return ConversionResultWithStatus{{0, 0}, ConversionStatusFlag::ok};
412 }
413 else {
414 int srcLen = static_cast<int> (source.size ());
415 int trgLen = static_cast<int> (target.size ());
416 int convertedLength = ::MultiByteToWideChar (CP_UTF8, 0, reinterpret_cast<const char*> (source.data ()), srcLen,
417 reinterpret_cast<WCHAR*> (&*target.begin ()), trgLen);
418 return ConversionResultWithStatus{{static_cast<size_t> (srcLen), // wag - dont think WideCharToMultiByte tells us how much source consumed
419 static_cast<size_t> (convertedLength)},
420 convertedLength == 0 ? ConversionStatusFlag::sourceIllegal : ConversionStatusFlag::ok};
421 }
422 }
423 inline auto UTFConvert::ConvertQuietly_Win32_ (span<const char16_t> source, span<char8_t> target) -> ConversionResultWithStatus
424 {
425 if (source.begin () == source.end ()) {
426 return ConversionResultWithStatus{{0, 0}, ConversionStatusFlag::ok};
427 }
428 else {
429 int srcLen = static_cast<int> (source.size ());
430 int trgLen = static_cast<int> (target.size ());
431 int convertedLength = ::WideCharToMultiByte (CP_UTF8, 0, reinterpret_cast<const WCHAR*> (source.data ()), srcLen,
432 reinterpret_cast<char*> (target.data ()), trgLen, nullptr, nullptr);
433 return ConversionResultWithStatus{{static_cast<size_t> (srcLen), // wag - dont think WideCharToMultiByte tells us how much source consumed
434 static_cast<size_t> (convertedLength)},
435 convertedLength == 0 ? ConversionStatusFlag::sourceIllegal : ConversionStatusFlag::ok};
436 }
437 }
438#endif
439 inline void UTFConvert::ThrowIf_ (ConversionStatusFlag cr, size_t errorAtSourceOffset)
440 {
441 switch (cr) {
443 break;
444 default:
445 Throw (cr, errorAtSourceOffset);
446 }
447 }
448#if __has_include("boost/locale/encoding_utf.hpp")
449 inline auto UTFConvert::ConvertQuietly_boost_locale_ (span<const char8_t> source, const span<char16_t> target) -> ConversionResultWithStatus
450 {
451 if (source.empty ()) {
452 return ConversionResultWithStatus{{0, 0}, ConversionStatusFlag::ok};
453 }
454 basic_string<char8_t> src = basic_string<char8_t>{source.data (), source.size ()};
455 u16string r = boost::locale::conv::utf_to_utf<char16_t> (src.c_str ());
456 Memory::CopyBytes (span<char16_t>{r}, target);
457 return ConversionResultWithStatus{{source.size (), r.size ()}, ConversionStatusFlag::ok};
458#if 0
459 utf::code_point c;
460 char16_t* p = target.begin ();
461 while(begin!=end) {
462 c=utf::utf_traits<char8_t>::template decode<char8_t const *>(begin,end);
463 if(c==utf::illegal || c==utf::incomplete) {
464 // throw or ignore
465
466 }
467 else {
468 utf::utf_traits<char16_t>::template encode(c,p);
469 }
470 }
471
472 // do something more like this loop/inserter stuff
473 std::basic_string<CharOut> result;
474 result.reserve(end-begin);
475 typedef std::back_insert_iterator<std::basic_string<CharOut> > inserter_type;
476 inserter_type inserter(result);
477 utf::code_point c;
478 while(begin!=end) {
479 c=utf::utf_traits<CharIn>::template decode<CharIn const *>(begin,end);
480 if(c==utf::illegal || c==utf::incomplete) {
481 if(how==stop)
482 throw conversion_error();
483 }
484 else {
485 utf::utf_traits<CharOut>::template encode<inserter_type>(c,inserter);
486 }
487 }
488 return result;
489#endif
490 //tmphack to test
491 return ConversionResultWithStatus{{0, 0}, ConversionStatusFlag::ok};
492 }
493#endif
494
495}
#define AssertNotImplemented()
Definition Assertions.h:401
#define AssertNotReached()
Definition Assertions.h:355
Profile Convert(const VariantValue &v)
Definition Profile.cpp:50
nonvirtual ConversionResult Convert(span< const SRC_T > source, span< TRG_T > target) const
Convert between UTF-N encoded strings/spans (including the special case of ASCII, and Latin1) (e....
static const UTFConvert kThe
Nearly always use this default UTFConvert.
Definition UTFConvert.h:369
nonvirtual ConversionResultWithStatus ConvertQuietly(span< const SRC_T > source, span< TRG_T > target) const
Convert UTF encoded (char8_t, char16_t, char32_t, char, wchar_t, ASCII, Latin1) characters to from ea...
static constexpr bool AllFitsInTwoByteEncoding(span< const CHAR_T > s) noexcept
static constexpr optional< size_t > ComputeCharacterLength(span< const CHAR_T > s)
static constexpr optional< size_t > NextCharacter(span< const CHAR_T > s)
nonvirtual span< TRG_T > ConvertSpan(span< const SRC_T > source, span< TRG_T > target) const
Convert between UTF-N encoded (including the special case of ASCII, and Latin1) character spans (e....
nonvirtual size_t ConvertOffset(span< const SRC_T > source, size_t srcIndex) const
static constexpr size_t ComputeTargetBufferSize(span< const FROM > src)
constexpr UTFConvert(const Options &options=Options{})