Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
CodeCvt.h
Go to the documentation of this file.
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Foundation_Characters_CodeCvt_h_
5#define _Stroika_Foundation_Characters_CodeCvt_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <bit>
10#include <locale>
11#include <optional>
12#include <span>
13#include <variant>
14
19
20/**
21 * \file
22 * Simple wrapper on std::codecvt, abstracting commonalities between std::codecvt and UTFConvert, to map characters (UNICODE) <--> bytes
23 */
24
26
27 using namespace std;
28
29 class String; // forward declare to reference without deadly embrace
30
31 namespace Private_ {
32 template <typename>
33 inline constexpr bool IsStdCodeCvt_ = false;
34 template <class _Elem, class _Byte, class _Statype>
35 inline constexpr bool IsStdCodeCvt_<const std::codecvt_byname<_Elem, _Byte, _Statype>*> = true; // @todo NOT CLEAR WHY THIS IS NEEDED???
36 template <class _Elem, class _Byte, class _Statype>
37 inline constexpr bool IsStdCodeCvt_<const std::codecvt<_Elem, _Byte, _Statype>*> = true;
38 }
39
40 /**
41 * Is std::codecvt or subclass of std::codecvt (such as codecvt_byname).
42 */
43 template <typename STD_CODECVT_T>
44 concept IStdCodeCVT = Private_::IsStdCodeCvt_<const STD_CODECVT_T*>;
45 DISABLE_COMPILER_MSC_WARNING_START (4996) // https://cplusplus.github.io/LWG/issue3767 DEPRECATED
46 static_assert (IStdCodeCVT<std::codecvt<char16_t, char8_t, std::mbstate_t>>);
47 static_assert (IStdCodeCVT<std::codecvt<char32_t, char8_t, std::mbstate_t>>);
48 DISABLE_COMPILER_MSC_WARNING_END (4996)
49 static_assert (IStdCodeCVT<std::codecvt<wchar_t, char, std::mbstate_t>>);
50 static_assert (IStdCodeCVT<std::codecvt_byname<wchar_t, char, std::mbstate_t>>);
51
52 /**
53 * \brief CodeCvt unifies byte <-> unicode conversions, vaguely inspired by (and wraps) std::codecvt, as well as UTFConvert etc, to map between span<bytes> and a span<UNICODE code-point>
54 *
55 * Note, UTFConvert is probably a slightly better API, and better designed, and faster. HOWEVER, it ONLY converts to/from UNICODE. std::codecvt can convert to/from
56 * any locale code page, and is is more general.
57 *
58 * Use the CodeCvt<> API when your code conversions may involve non UNICODE byte representations.
59 *
60 * Note that this class - like codecvt - can be used to 'page' over an input, and incrementally convert it (though how it does this
61 * differs from codecvt - not maintaining a partial state - but instead adjusting the amount consumed from the input to reflect
62 * full-character conversions).
63 *
64 * \note - if encountering invalid data in the input (invalid characters) - this will 'THROW' and not just fill in special bogus replacement
65 * characters.
66 *
67 * \note - the BINARY format character is OPAQUE given this API (you get/set bytes). The CHAR_T in the template argument
68 * refers to the 'CHARACTER' format you map to/from binary format (so typically wchar_t, or char32_t maybe).
69 *
70 * Enhancements over std::codecvt:
71 * o this is a span<> based API
72 * o You can subclass IRep (to provide your own CodeCvt implementation) and copy CodeCvt objects.
73 * (unless I'm missing something, you can do one or the other with std::codecvt, but not both)
74 * o Simpler backend virtual API, so easier to create your own compliant CodeCvt object.
75 * o CodeCvt leverages these two things via UTFConvert (which uses different library backends to do
76 * the UTF code conversion, hopefully enuf faster to make up for the virtual call overhead this
77 * class introduces).
78 * o Don't support 'partial' conversion.
79 * If there is insufficient space in the target buffer, this is an ASSERTION error - UNSUPPORTED.
80 * ALL 'srcSpan' CHARACTER data MUST be consumed/converted (for byte data; we allow
81 * only a single partial character at the end for Bytes2Characters takes ptr to span and updates span
82 * to reflect remaining bytes).
83 * o Dont bother templating output byte type (std::covert supports all the useless
84 * ones but misses the most useful, at least for fileIO, binary IO)
85 * o Don't support mbstate_t. Its opaque, and a PITA. And redundant.
86 * o lots of templated combinations (codecvt) dont make sense and dont work and there is no hint/validation
87 * clarity about which you can use/make sense and which you cannot with std::codecvt. Hopefully
88 * this class will make more sense.
89 * It can be used to convert (abstract API) between ANY combination of 'target hidden in implementation'
90 * and exposed CHAR_T characters (reading or writing). DEFAULT CTORS only provide the combinations
91 * supported by stdc++ (and a little more). To get other combinations, you must use subclass.
92 * o 'equivalent code-point types automatically supported (e.g wchar_t == char16 or char32, and
93 * Character==char32_t).
94 * o No explicit 'external_type' exposed. Just bytes go in and out vs (CHAR_T) UNICODE characters.
95 * This erasure of the 'encoding' type from the CodeCvt<CHAR_T> allows it to be used generically
96 * where its hidden in the 'rep' what kind of encoding is used.
97 *
98 * Difference:
99 * o Maybe enhancement, maybe step back:
100 * Must call ComputeTargetCharacterBufferSize/ComputeTargetByteBufferSize and provide
101 * an output buffer large enuf. This way, can NEVER get get partial conversion due to lack of output buffer space (which simplfies alot
102 * within this API). NOTE - large enuf doesn't necessarily mean as large as ComputeTargetCharacterBufferSize/ComputeTargetByteBufferSize would say, as those
103 * provide safe estimate. If you know for special reasons, you can use a smaller size, but the call must always FIT - no 'targetExhausted' exceptions thrown.
104 * o no 'noconv' error code (better in that simpler, but worse in that forces throw on bad characters)
105 *
106 * Enhancements over UTFConvert:
107 * o UTFConvert only supports UNICODE <-> UNICODE translations, even if in different
108 * UNICODE encodings. This API supports UNICODE <-> any arbitrary output binary format.
109 * o So in particular, it supports translating between UNICODE characters and locale encodings (e.g. SHIFT_JIS, or whatever).
110 *
111 * And:
112 * o All the existing codecvt objects (which map to/from UNICODE) can easily be wrapped in a CodeCvt
113 *
114 * CodeCvt as smart Ptr class, and an 'abstract class' (IRep) in that only for some CHAR_T types
115 * can it be instantiated directly (the ones std c++ supports, char_16_t, char32_t, and wchar_t with locale).
116 */
117 template <IUNICODECanAlwaysConvertTo CHAR_T = Character>
118 class CodeCvt {
119 public:
120 using intern_type = CHAR_T; // what codecvt calls the (internal/CHAR_T) character type
121
122 public:
123 struct IRep;
124
125 public:
126 /**
127 */
128 struct Options {
129 /**
130 * NOTE - do as New not CTOR, so can still construct Options using designated initializers.
131 */
132 template <qCompilerAndStdLib_ConstraintDiffersInTemplateRedeclaration_BWA (IUNICODECanAlwaysConvertTo) FROM_CHAR_T_OPTIONS>
133 static constexpr inline Options New (typename CodeCvt<FROM_CHAR_T_OPTIONS>::Options o);
134
135 /**
136 * if fInvalidCharacterReplacement is nullopt (the default) - conversion APIs throw on invalid characters, and
137 * otherwise use the value provided in fInvalidCharacterReplacement as the replacement for invalid characters.
138 *
139 * Note - CodeCvt allows the Character to be comprised of multiple CHAR_T characters potentially.
140 */
141 optional<Character> fInvalidCharacterReplacement;
142 };
143
144 public:
145 /**
146 * Default CTOR:
147 * Produces the fastest available CodeCvt(), between the templated UNICODE code-point
148 * and UTF-8 (as the binary format).
149 *
150 * CodeCvt (const locale& l):
151 * Produces a CodeCvt which maps (back and forth) between bytes in the 'locale' character set, and
152 * UNICODE Characters.
153 *
154 * CodeCvt (const string& localeName):
155 * Is equivalent to mkFromStdCodeCvt<...> (std::codecvt_byname {localeName}) - so it can throw if no such locale name
156 *
157 * CodeCvt (span<const byte>* guessFormatFrom)
158 * the initial part of the span data (up to kMaxBOMSize bytes) are examined and used to select the CodeCvt to create (else default CodeCvt created).
159 * If a BOM is found, guessFormatFrom is adjusted to skip it.
160 *
161 * CodeCvt (CodePage):
162 * Can throw if the code page is not recognized. NOTE - CodePage is a Windows concept, and though many code pages
163 * are provided portable (@todo list) - many more are not, and will fail on non-windows, and succeed only on windows.
164 * The (Windows) code pages which are always (portably) provided include:
165 * WellKnownCodePages::kANSI
166 * WellKnownCodePages::kMAC
167 * WellKnownCodePages::PC, // IBM PC code page 437
168 * WellKnownCodePages::PCA, // IBM PC code page 850, used by IBM Personal System/2
169 * WellKnownCodePages::kGreek,
170 * WellKnownCodePages::kTurkish,
171 * WellKnownCodePages::kHebrew,
172 * WellKnownCodePages::kArabic,
173 * WellKnownCodePages::kUNICODE_WIDE, // Standard UNICODE for MS-Windows
174 * WellKnownCodePages::kUNICODE_WIDE_BIGENDIAN,
175 * WellKnownCodePages::UTF8
176 *
177 * To use (wrap) existing std::codecvt<A,B,C> class:
178 * Quirky, because classes not generally directly instantiable, so instead specify CLASS as template param
179 * and ARGS to CTOR.
180 * CodeCvt<CHAR_T,std::codecvt<CHAR_T, BINARY_T, MBSTATE_T>> {args to that class}
181 * Note works with subclasses of std::codecvt like std::codecvt_byname
182 *
183 * To get OTHER conversions, say between char16_t, and char32_t (combines/chains CodeCvt's):
184 * CodeCvt<CHAR_T>{UnicodeExternalEncodings} - Uses UTFConvert, along with any needed byte swapping
185 * CodeCvt<CHAR_T>{const CodeCvt<OTHER_CHAR_T> basedOn} - Use this to combine CodeCvt's (helpful for locale one)
186 *
187 * \par Example Usage:
188 * \code
189 * CodeCvt cc{"en_US.UTF8"};
190 * constexpr char8_t someRandomText[] = u8"hello mom";
191 * span<const byte> someRandomTextBinarySpan = as_bytes (span<const char8_t> {someRandomText, Characters::CString::Length (someRandomText)});
192 * StackBuffer<Character> buf{cc.ComputeTargetCharacterBufferSize (someRandomTextBinarySpan)};
193 * auto b = cc.Bytes2Characters (&someRandomTextBinarySpan, span{buf});
194 * EXPECT_TRUE (someRandomTextBinarySpan.size () == 0); // ALL CONSUMED
195 * EXPECT_TRUE (b.size () == 9 and b[0] == 'h');
196 * \endcode
197 *
198 * \par Example Usage:
199 * \code
200 * // codeCvt Between UTF16 Characters And UTF8BinaryFormat, best/fastest way
201 * CodeCvt<char16_t> codeCvt1{};
202 *
203 * // codeCvt Between UTF16 Characters And UTF8BinaryFormat using std::codecvt<char16_t, char8_t, std::mbstate_t>
204 * CodeCvt<char16_t> codeCvt2 = CodeCvt<char16_t>::mkFromStdCodeCvt<std::codecvt<char16_t, char8_t, std::mbstate_t>> ();
205 *
206 * // codeCvt Between UTF16 Characters using codecvt_byname
207 * CodeCvt<char16_t> codeCvt3 = CodeCvt<char16_t,std::codecvt_byname>>{locale{"en_US.UTF8"}};
208 *
209 * // or equivalently
210 * CodeCvt<char16_t> codeCvt4{"en_US.UTF8"};
211 * \endcode
212 *
213 *
214 * @todo - ASSURE THESE 'locale-names' always work:
215 * (case insensitive equals)
216 * utf-8
217 * ISO-8859-1
218 */
219 CodeCvt (const Options& options = Options{});
220 CodeCvt (const CodeCvt& src) = default;
221 CodeCvt (CodeCvt&& src) = default;
222 CodeCvt (const locale& l, const Options& options = Options{});
223 CodeCvt (const Charset& charset, const Options& options = Options{});
224 CodeCvt (UnicodeExternalEncodings e, const Options& options = Options{});
225 CodeCvt (span<const byte>* guessFormatFrom, const optional<CodeCvt>& useElse = nullopt, const Options& options = Options{});
226 CodeCvt (CodePage e, const Options& options = Options{});
227 template <IUNICODECanAlwaysConvertTo INTERMEDIATE_CHAR_T>
228 CodeCvt (const CodeCvt<INTERMEDIATE_CHAR_T>& basedOn);
229 CodeCvt (const shared_ptr<IRep>& rep);
230
231 public:
232 /**
233 * Note, though logically this should be a CodeCvt constructor, since you cannot directly construct
234 * the STD_CODECVT, it cannot be passed by argument to the constructor. And so their
235 * appears no way to deduce or specify those constructor template arguments. But that can be done
236 * explicitly with a static function, and that is what we do with mkFromStdCodeCvt.
237 *
238 * \note - everything else has options last argument, but since we use ... parameter pack, options must be first here.
239 */
240 template <IStdCodeCVT STD_CODECVT, typename... ARGS>
241 static CodeCvt mkFromStdCodeCvt (const Options& options = {}, ARGS... args)
242 requires (same_as<CHAR_T, typename STD_CODECVT::intern_type>);
243
244 public:
245 nonvirtual CodeCvt& operator= (const CodeCvt& rhs) = default;
246 nonvirtual CodeCvt& operator= (CodeCvt&& rhs) = default;
247
248 public:
249 /**
250 */
251 nonvirtual Options GetOptions () const;
252
253 public:
254 /**
255 * \brief convert span byte (external serialized format) parameters to characters (like std::codecvt<>::in () - but with spans, and simpler api)
256 *
257 * Convert bytes 'from' to characters 'to'.
258 *
259 * Arguments:
260 * o span<byte> from - initially all of which will be converted or an exception thrown (only if data corrupt/unconvertable) (updated to point to bytes which form part of a single additional character)
261 * o span<CHAR_T> to - buffer to have data converted 'into'
262 * NOTE - all we require is that the result fit into 'to'. But we offer a quick way to compute a buffer 'large enough' - (call ComputeTargetCharacterBufferSize).
263 * But (a more expensive) way is to call Bytes2Characters/1 and that will tell you exactly how many needed.
264 * Returns:
265 * subspan of 'to', with converted characters.
266 * Throws on failure (corrupt source content).
267 * And '*from' updated to reflect any remaining bytes that are part of the next character.
268 *
269 * Source bytes must begin on a valid character boundary (unlike codecvt - no mbstate).
270 * If the input buffer ends with any incomplete characters, *from will refer to those characters
271 * on function completion.
272 *
273 * The overload taking pointer to from returns the amount left. The overload taking span<> - not pointer -
274 * throws if not all consumed.
275 *
276 * The caller typically will wish to save those, and resubmit their BytesToCharacter call
277 * with a new buffer, starting with those (but there is no requirement to do so).
278 *
279 * No state is maintained. ALL the input is converted expect possibly a few bytes at the end of the input
280 * which constitute a partial character.
281 *
282 * This implies that given a 'lead byte' as argument to Bytes2Characters, this function can return
283 * an EMPTY span, and that would not be an error (so no throw).
284 *
285 * \note we use the name 'Bytes' - because its suggestive of meaning, and in every case I'm aware of
286 * the target type will be char, or char8_t, or byte. But its certainly not guaranteed to be serialized
287 * to byte, and the codecvt API calls this extern_type
288 *
289 * /2 overload \pre to.size () >= min(Bytes2Characters(*from), ComputeTargetCharacterBufferSize (*from)) on input.
290 * span<const byte>* \post from->size () very small on return (at most partial character)
291 *
292 * \see also Bytes2String for similar function, but operating on strings
293 */
294 nonvirtual size_t Bytes2Characters (span<const byte> from) const;
295 nonvirtual span<CHAR_T> Bytes2Characters (span<const byte>* from, span<CHAR_T> to) const;
296 nonvirtual span<CHAR_T> Bytes2Characters (span<const byte> from, span<CHAR_T> to) const;
297
298 public:
299 /*
300 * \brief convert span<character> parameter to a span of bytes (like std::codecvt<>::out () - but with spans, and otherwise simpler API)
301 *
302 * Convert characters 'from' to bytes 'to'.
303 *
304 * Arguments:
305 * o span<character> from - all of which will be converted or an exception thrown (only if data corrupt/unconvertable).
306 * o OPTIONAL span<byte> to - buffer to have data converted 'into', which MUST be large enuf (call ComputeTargetByteBufferSize)
307 * o span<span> to - buffer to have data converted 'into'
308 * NOTE - all we require is that the result fit into 'to'. BUt we offer a quick way to compute a buffer 'large enough' - (call ComputeTargetByteBufferSize).
309 * But (a more expensive) way is to call Characters2Bytes/1 and that will tell you exactly how many needed.
310 *
311 * Returns:
312 * (sub)subspan of 'to', (if provided)with characters converted to appropriate span of bytes.
313 * Else returns number of bytes it would have been converted to /1 overload).
314 * Throws on failure.
315 *
316 * No state is maintained. ALL the input is converted to all the output, on character
317 * boundaries.
318 *
319 * \note we use the name 'Bytes' - because its suggestive of meaning, and in most cases
320 * the target type will be char, or char8_t, or byte. But its certainly not guaranteed to be serialized
321 * to byte, and the codecvt API calls this extern_type
322 *
323 * /2 overload \pre to.size () >= min(ComputeTargetByteBufferSize (from),Characters2Bytes(from)) on input.
324 *
325 * \see also String2Bytes for similar function, but operating on strings
326 */
327 nonvirtual size_t Characters2Bytes (span<const CHAR_T> from) const;
328 nonvirtual span<byte> Characters2Bytes (span<const CHAR_T> from, span<byte> to) const;
329
330 public:
331 /*
332 * \brief Compute the size of 'to' span to use in call to Bytes2Characters
333 *
334 * \note this may guess a size too large, but will always guess a size large enuf
335 * In the case of the size_t overload, its obviously a worst-case guess
336 */
337 nonvirtual size_t ComputeTargetCharacterBufferSize (span<const byte> src) const;
338 nonvirtual size_t ComputeTargetCharacterBufferSize (size_t srcSize) const;
339
340 public:
341 /*
342 * \brief Compute the size of 'to' span to use in call to Characters2Bytes
343 *
344 * \note this may guess a size too large, but will always guess a size large enuf
345 * In the case of the size_t overload, its obviously a worst-case guess
346 */
347 nonvirtual size_t ComputeTargetByteBufferSize (span<const CHAR_T> src) const;
348 nonvirtual size_t ComputeTargetByteBufferSize (size_t srcSize) const;
349
350 public:
351 /**
352 * Convert a span of bytes (in a coding defined by the constructor to CodeCvt) to a 'string' like object - anything constructible from a 'span' of characters (e.g. String or wstring)
353 *
354 * NOTE - when converting Bytes2String, the String must be encoded using CHAR_T characters.
355 * The binary rep - can be anything - of course.
356 *
357 * \par Example Usage
358 * \code
359 * span<const byte> bytes = from_somewhere;
360 * static const CodeCvt<wchar_t> kCvt_{UnicodeExternalEncodings::eUTF8};
361 * wstring result = kCvt_.Bytes2String<wstring> (bytes);
362 * \endcode
363 *
364 * \par Example Usage
365 * \code
366 * span<const byte> bytes = from_somewhere;
367 * wstring result = CodeCvt<wchar_t>{locale{}}.Bytes2String<wstring> (bytes);
368 * \endcode
369 */
370 template <constructible_from<const CHAR_T*, const CHAR_T*> STRINGISH>
371 nonvirtual STRINGISH Bytes2String (span<const byte> from) const;
372
373 public:
374 /**
375 * Convert a span of characters ('string') to a BLOB-like object - anything constructible from a 'span' of bytes; note that container of a span of bytes maybe 'string' (special case).
376 *
377 * NOTE - when converting String2Bytes, the String must be encoded using CHAR_T characters.
378 * The binary rep - can be anything - of course.
379 *
380 * \par Example Usage
381 * \code
382 * span<const wchar_t> s = from_somewhere;
383 * static const CodeCvt<wchar_t> kCvt_{UnicodeExternalEncodings::eUTF8};
384 * string utf8String = kCvt_.String2Bytes<string> (s);
385 * \endcode
386 *
387 * \par Example Usage
388 * \code
389 * span<const wchar_t> s = from_somewhere;
390 * Memory::BLOB localeFormatRenderingOfUnicodeInputAsLocaleFormatByteStream = CodeCvt<wchar_t>{locale{}}.String2Bytes<Memory::BLOB> (s);
391 * \endcode
392 *
393 * \par Example Usage
394 * \code
395 * span<const wchar_t> s = from_somewhere;
396 * string localeFormatRenderingOfUnicodeInputAsLocaleFormatByteStream = CodeCvt<wchar_t>{locale{}}.String2Bytes<string> (s);
397 * \endcode
398 */
399 template <constructible_from<const byte*, const byte*> BLOBISH>
400 nonvirtual BLOBISH String2Bytes (span<const CHAR_T> from) const;
401
402 private:
403 shared_ptr<IRep> fRep_;
404
405#if qCompilerAndStdLib_template_second_concept_Buggy
406 private:
407 template <typename SERIALIZED_CHAR_T>
408 struct UTFConvertRep_;
409
410 private:
411 struct Latin1ConvertRep_;
412
413 private:
414 template <typename SERIALIZED_CHAR_T>
415 struct UTFConvertSwappedRep_;
416
417 private:
418 template <typename INTERMEDIATE_CHAR_T>
419 struct UTF2UTFRep_;
420#else
421 private:
422 template <IUNICODECanAlwaysConvertTo SERIALIZED_CHAR_T>
423 struct UTFConvertRep_;
424
425 private:
426 struct Latin1ConvertRep_;
427
428 private:
429 template <IUNICODECanAlwaysConvertTo SERIALIZED_CHAR_T>
430 struct UTFConvertSwappedRep_;
431
432 private:
433 template <IUNICODECanAlwaysConvertTo INTERMEDIATE_CHAR_T>
434 struct UTF2UTFRep_;
435#endif
436
437 private:
438 // requires CHAR_T = typename STD_CODE_CVT_T::intern_type
439 template <typename STD_CODE_CVT_T>
440 struct CodeCvt_WrapStdCodeCvt_;
441 };
442
443 template <IUNICODECanAlwaysConvertTo CHAR_T>
444 struct CodeCvt<CHAR_T>::IRep {
445 virtual ~IRep () = default;
446 virtual Options GetOptions () const = 0;
447 virtual span<CHAR_T> Bytes2Characters (span<const byte>* from, span<CHAR_T> to) const = 0;
448 virtual span<byte> Characters2Bytes (span<const CHAR_T> from, span<byte> to) const = 0;
449 virtual size_t ComputeTargetCharacterBufferSize (variant<span<const byte>, size_t> src) const = 0;
450 virtual size_t ComputeTargetByteBufferSize (variant<span<const CHAR_T>, size_t> src) const = 0;
451
452 protected:
453 // handy utility for subclasses / requires calls
454 nonvirtual size_t _Bytes2Characters (span<const byte> from) const;
455 nonvirtual size_t _Characters2Bytes (span<const CHAR_T> from) const;
456 };
457
458}
459
460/*
461 ********************************************************************************
462 ***************************** Implementation Details ***************************
463 ********************************************************************************
464 */
465#include "CodeCvt.inl"
466
467#endif /*_Stroika_Foundation_Characters_CodeCvt_h_*/
CodeCvt unifies byte <-> unicode conversions, vaguely inspired by (and wraps) std::codecvt,...
Definition CodeCvt.h:118
static CodeCvt mkFromStdCodeCvt(const Options &options={}, ARGS... args)
nonvirtual STRINGISH Bytes2String(span< const byte > from) const
nonvirtual size_t Bytes2Characters(span< const byte > from) const
convert span byte (external serialized format) parameters to characters (like std::codecvt<>::in () -...
Definition CodeCvt.inl:750
nonvirtual BLOBISH String2Bytes(span< const CHAR_T > from) const
UNICODE string can be always be converted into array of this type.
Definition Character.h:132
UnicodeExternalEncodings
list of external UNICODE character encodings, for file IO (eDEFAULT = eUTF8)
Definition UTFConvert.h:31
STL namespace.