Stroika Library 3.0d23x
 
Loading...
Searching...
No Matches
Variant/JSON/Reader.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2026. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
6#include <optional>
7#include <stack>
8
9#if __has_include("boost/json.hpp")
10#include <boost/json.hpp>
11#include <boost/json/basic_parser_impl.hpp>
12#endif
13
14#include "Stroika/Foundation/Characters/FloatConversion.h"
16#include "Stroika/Foundation/Characters/String2Int.h"
18#include "Stroika/Foundation/Containers/Concrete/Mapping_HashTable.h"
19#include "Stroika/Foundation/Containers/Support/ReserveTweaks.h"
20#include "Stroika/Foundation/DataExchange/BadFormatException.h"
27
28#include "Reader.h"
29
30using namespace Stroika::Foundation;
33using namespace Stroika::Foundation::Execution;
34using namespace Stroika::Foundation::Memory;
35using namespace Stroika::Foundation::Streams;
36using namespace Stroika::Foundation::Traversal;
37
39using Memory::MakeSharedPtr;
40
41// Comment this in to turn on aggressive noisy DbgTrace in this module
42//#define USE_NOISY_TRACE_IN_THIS_MODULE_ 1
43
44namespace {
45
46 /*
47 * Parse strategy:
48 * o Pre-pass to map all input to UNICODE - and then just handle a sequence of unicode strings
49 * o Classic recursive decent parser.
50 * o Inline lexical analysis (cuz very simple)
51 */
52
53 enum ReadState_ {
54 eNeutral_,
55 eInObject_,
56 eInArray_,
57 eInNumber_,
58 eInString_,
59 };
60
61 // use StreamReader to get buffering of input data (performance tweak), and a couple helper methods
62 struct MyBufferedStreamReader_ final : StreamReader<Character> {
63 MyBufferedStreamReader_ (const InputStream::Ptr<ElementType>& underlyingReadFromStreamAdopted)
64 : StreamReader<Character>{underlyingReadFromStreamAdopted}
65 {
66 }
67 [[nodiscard]] inline char32_t NextChar ()
68 {
69 Require (not IsAtEOF ());
70 return ReadBlocking ()->As<char32_t> ();
71 }
72 inline void AdvanceOne ()
73 {
74 Require (not IsAtEOF ());
75 Seek (eFromCurrent, 1);
76 }
77 inline void BackupOne ()
78 {
79 Seek (eFromCurrent, -1);
80 }
81 };
82
83 VariantValue Reader_value_ (MyBufferedStreamReader_& in);
84
85 /*
86 */
87 inline bool IsJSONSpace_ (char32_t c)
88 {
89 // iswspace was pretty slow (on windoze) - showing up as significant in performance profiling.
90 // According to:
91 // https://www.json.org/json-en.html
92 // only spaces allowed in json source are:
93 // ws = *(
94 // %x20 / ; Space
95 // %x09 / ; Horizontal tab
96 // %x0A / ; Line feed or New line
97 // %x0D ; Carriage return
98 // )
99 switch (c) {
100 case ' ':
101 case '\t':
102 case '\r':
103 case '\n':
104 return true;
105 default:
106 return false;
107 }
108 }
109
110 inline bool IsJSONDigit_ (char32_t c)
111 {
112 // iswdigit tweak (showed up as significant - 6% of full runtime)
113 // According to:
114 // https://www.json.org/json-en.html
115 // appears just digits 0..9
116 // slightly longer way but doesn't assume ascii, and probably compiles to the same thing is '0' <= n <= '9'
117 switch (c) {
118 case '0':
119 case '1':
120 case '2':
121 case '3':
122 case '4':
123 case '5':
124 case '6':
125 case '7':
126 case '8':
127 case '9':
128 return true;
129 default:
130 return false;
131 }
132 }
133
134 // throw if bad hex digit
135 uint8_t HexChar2Num_ (char c)
136 {
137 if ('0' <= c and c <= '9') [[likely]] {
138 return static_cast<uint8_t> (c - '0');
139 }
140 if ('a' <= c and c <= 'f') [[likely]] {
141 return static_cast<uint8_t> ((c - 'a') + 10);
142 }
143 if ('A' <= c and c <= 'F') [[likely]] {
144 return static_cast<uint8_t> ((c - 'A') + 10);
145 }
146 static const auto kException_{BadFormatException{"JSON: bad hex digit after \\u"sv}};
147 Throw (kException_);
148 }
149
150 // 'in' is positioned to the start of string, and we read, leaving in positioned just after the end of the string
151 String Reader_String_ (MyBufferedStreamReader_& in)
152 {
153 Require (not in.IsAtEOF ());
154 char32_t c = in.NextChar ();
155 if (c != '\"') [[unlikely]] {
156 static const auto kException_{BadFormatException{"JSON: Expected quoted string"sv}};
157 Throw (kException_);
158 }
159 /*
160 * accumulate chars, and check for close-quote
161 */
162 StringBuilder<StringBuilder_Options<char8_t, 64>> result; // StringBuilder_Options: text often ascii, empirically a bit faster with on windows on jsonparser regtest
163 while (true) {
164 if (in.IsAtEOF ()) [[unlikely]] {
165 static const auto kException_{BadFormatException{"JSON: Unexpected EOF reading string (looking for close quote)"sv}};
166 Throw (kException_);
167 }
168 c = in.NextChar ();
169 if (c == '\"') [[unlikely]] {
170 return result.str ();
171 }
172 else if (c == '\\') [[unlikely]] {
173 // quoted character read...
174 if (in.IsAtEOF ()) [[unlikely]] {
175 static const auto kException_{BadFormatException{"JSON: Unexpected EOF reading string (looking for close quote)"sv}};
176 Throw (kException_);
177 }
178 c = in.NextChar ();
179 switch (c) {
180 case 'b':
181 c = '\b';
182 break;
183 case 'f':
184 c = '\f';
185 break;
186 case 'n':
187 c = '\n';
188 break;
189 case 'r':
190 c = '\r';
191 break;
192 case 't':
193 c = '\t';
194 break;
195 case 'u': {
196 // Not sure this is right -- But I hope so ... -- LGP 2012-11-29
197 char32_t newC = '\0';
198 for (int n = 0; n < 4; ++n) {
199 if (in.IsAtEOF ()) [[unlikely]] {
200 static const auto kException_{BadFormatException{"JSON: Unexpected EOF reading string (looking for close quote)"sv}};
201 Throw (kException_);
202 }
203 newC += HexChar2Num_ (static_cast<char> (in.NextChar ()));
204 if (n != 3) {
205 newC <<= 4;
206 }
207 }
208 c = newC;
209 } break;
210 default: {
211 // if we have \N for any unrecognized N, just treat it as N
212 }
213 }
214 }
215 result += c; // must handle other character quoting (besides \u which was preflighted)
216 }
217 }
218
219 // 'in' is positioned to the second character of number (first passed as arg), and we read, leaving in positioned just after end of number
220 static constexpr Character kDash_{'-'};
221 VariantValue Reader_Number_ (char32_t initialChar, MyBufferedStreamReader_& in)
222 {
223 Require (initialChar == '-' or IsJSONDigit_ (initialChar));
224
225 bool containsDot = false;
226 // ACCUMULATE STRING, and then call builtin number parsing functions...
227 // This accumulation is NOT as restrictive as it could be - but should accept all valid numbers
228 StringBuilder tmp;
229 for (char32_t c = initialChar; c != '\0'; c = in.ReadBlocking ().value_or ('\0').As<char32_t> ()) {
230 if (IsJSONDigit_ (c) or c == '.' or c == 'e' or c == 'E' or c == '+' or c == '-') [[likely]] {
231 tmp += c;
232 if (c == '.') [[unlikely]] {
233 containsDot = true;
234 }
235 }
236 else {
237 // any other character signals end of number (not a syntax error)
238 // but backup - don't consume next character - not part of number
239 Assert (not tmp.empty ()); // at least consumed 'initialChar'
240 in.BackupOne ();
241 break;
242 }
243 }
244 Assert (not tmp.empty ());
245 StackBuffer<char32_t> ignoreBuf;
246 span<const char32_t> tmpData = tmp.GetData (&ignoreBuf);
247 if (containsDot) {
248 return VariantValue{FloatConversion::ToFloat<long double> (tmpData)};
249 }
250 else {
251 // if no - use unsigned since has wider range (if no -)
252 return (initialChar == kDash_) ? VariantValue{String2Int<long long int> (tmpData)}
253 : VariantValue{String2Int<unsigned long long int> (tmpData)};
254 }
255 }
256
257 // NOTE: THIS STARTS SEEKED JUST PAST OPENING '{'
258 VariantValue Reader_Object_ (MyBufferedStreamReader_& in)
259 {
260 Mapping_HashTable<String, VariantValue>::DEFAULT_HASHTABLE<> result; // slight tweak using stl map, and move-construct Stroika map at the end
261
262 // accumulate elements, and check for close-array
263 enum LookingFor {
264 eName, // this means looking for start of next field/member of the object
265 eValue,
266 eColon,
267 eComma // when looking for comma could either find it, or } marking end of object
268 };
269 LookingFor lf = eName;
270 optional<String> curName;
271 while (true) {
272 optional<Character> oNextChar = in.Peek ();
273 if (not oNextChar.has_value ()) [[unlikely]] {
274 static const auto kException_{BadFormatException{"JSON: Unexpected EOF reading object (looking for '}')"sv}};
275 Throw (kException_);
276 }
277 char32_t nextChar = oNextChar->As<char32_t> ();
278 if (IsJSONSpace_ (nextChar)) [[likely]] {
279 in.AdvanceOne (); // skip char
280 continue;
281 }
282 switch (lf) {
283 case eName: {
284 Assert (curName == nullopt);
285 if (nextChar == '}') {
286 in.AdvanceOne (); // finished object
288 }
289 else if (nextChar == '\"') [[likely]] {
290 curName = Reader_String_ (in); // starting a new data member (with a string)
291 lf = eColon;
292 }
293 else {
294 static const auto kException_{
295 BadFormatException{"JSON: Reading object, looking for a name, didn't find a close brace or open quote"sv}};
296 Throw (kException_);
297 }
298 } break;
299 case eComma: {
300 Assert (curName == nullopt);
301 if (nextChar == '}') {
302 in.AdvanceOne (); // finished object
304 }
305 else if (nextChar == ',') [[likely]] {
306 in.AdvanceOne (); // consume it, and look for name next (start of next object member)
307 lf = eName;
308 }
309 else {
310 static const auto kException_{BadFormatException{"JSON: Reading object, looking for a comma, but found something else"sv}};
311 Throw (kException_);
312 }
313 } break;
314 case eColon: {
315 Assert (curName);
316 if (nextChar == ':') [[likely]] {
317 in.AdvanceOne (); // consume the separator, and look the the value
318 lf = eValue;
319 }
320 else {
321 static const auto kException_{BadFormatException{"JSON: Reading object, looking for a colon, but found something else"sv}};
322 Throw (kException_);
323 }
324 } break;
325 case eValue: {
326 Assert (curName);
327 // dont care what the character is, read a new value
328 result.insert ({ValueOf (curName), Reader_value_ (in)});
329 curName = nullopt;
330 lf = eComma; // and look for another field/data member
331 } break;
332 }
333 }
334 }
335
336 // NOTE - called with OPENING '[' already read
337 VariantValue Reader_Array_ (MyBufferedStreamReader_& in)
338 {
339 vector<VariantValue> result;
340
341 // accumulate elements, and check for close-array
342 bool lookingForElt = true;
343 while (true) {
344 if (in.IsAtEOF ()) [[unlikely]] {
345 static const auto kException_{BadFormatException{"JSON: Unexpected EOF reading array (looking for ']')"sv}};
346 Throw (kException_);
347 }
348 char32_t peekedChar = in.Peek ()->As<char32_t> ();
349 if (peekedChar == ']') {
350 if (lookingForElt) {
351 // allow ending ',' - harmless - could be more aggressive - but if so - careful of zero-sized array special case
352 }
353 in.AdvanceOne ();
355 }
356 else if (peekedChar == ',') {
357 if (lookingForElt) [[unlikely]] {
358 static const auto kException_{BadFormatException{"JSON: Unexpected second ',' in reading array"sv}};
359 Throw (kException_);
360 }
361 else {
362 lookingForElt = true;
363 }
364 in.AdvanceOne ();
365 }
366 else if (IsJSONSpace_ (peekedChar)) {
367 in.AdvanceOne ();
368 }
369 else {
370 // not looking at whitespace, in midst of array, and array not terminated, so better be looking at a value
371 if (lookingForElt) [[likely]] {
372 Containers::Support::ReserveTweaks::Reserve4Add1 (result);
373 result.push_back (Reader_value_ (in));
374 lookingForElt = false;
375 }
376 else {
377 static const auto kException_{BadFormatException{"JSON: Unexpected character (missing ',' ?) in reading array"sv}};
378 Throw (kException_);
379 }
380 }
381 }
382 }
383
384 VariantValue Reader_SpecialToken_ (char32_t initialChar, MyBufferedStreamReader_& in)
385 {
386 switch (initialChar) {
387 case 'f': {
388 Character buf[4];
389 if (in.ReadAll (begin (buf), end (buf)) == 4 and buf[0] == 'a' and buf[1] == 'l' and buf[2] == 's' and buf[3] == 'e') {
390 return VariantValue{false};
391 }
392 } break;
393 case 't': {
394 Character buf[3];
395 if (in.ReadAll (begin (buf), end (buf)) == 3 and buf[0] == 'r' and buf[1] == 'u' and buf[2] == 'e') {
396 return VariantValue{true};
397 }
398 } break;
399 case 'n': {
400 Character buf[3];
401 if (in.ReadAll (begin (buf), end (buf)) == 3 and buf[0] == 'u' and buf[1] == 'l' and buf[2] == 'l') {
402 return VariantValue{};
403 }
404 } break;
405 }
406 static const auto kException_{BadFormatException{"JSON: Unrecognized token"sv}};
407 Throw (kException_);
408 }
409
410 VariantValue Reader_value_ (MyBufferedStreamReader_& in)
411 {
412 // Skip initial whitespace, and look for any value:
413 // string
414 // number
415 // object
416 // array
417 // true
418 // false
419 // null
420 for (optional<Character> oc = in.ReadBlocking (); oc; oc = in.ReadBlocking ()) {
421 switch (oc->As<char32_t> ()) {
422 case '\"':
423 in.BackupOne ();
424 return Reader_String_ (in);
425
426 case '0':
427 case '1':
428 case '2':
429 case '3':
430 case '4':
431 case '5':
432 case '6':
433 case '7':
434 case '8':
435 case '9':
436 case '-':
437 return Reader_Number_ (oc->As<char32_t> (), in);
438
439 case '{':
440 return Reader_Object_ (in);
441 case '[':
442 return Reader_Array_ (in);
443
444 case 't':
445 case 'f':
446 case 'n':
447 return Reader_SpecialToken_ (oc->As<char32_t> (), in);
448
449 default: {
450 if (IsJSONSpace_ (oc->As<char32_t> ())) [[likely]] {
451 // ignore
452 }
453 else {
454 static const auto kException_{BadFormatException{"JSON: Unexpected character looking for start of value"sv}};
455 Throw (kException_);
456 }
457 }
458 }
459 }
460 // if we get here - nothing found
461 static const auto kException_{BadFormatException{"JSON: Unexpected EOF looking for value"sv}};
462 Throw (kException_);
463 }
464}
465
466/*
467 ********************************************************************************
468 ************************* Variant::JSON::Reader ********************************
469 ********************************************************************************
470 */
471class Variant::JSON::Reader::NativeRep_ : public Variant::Reader::_IRep {
472public:
473 NativeRep_ () = default;
474 NativeRep_ (const NativeRep_&) = default;
475 virtual _SharedPtrIRep Clone () const override
476 {
477 return MakeSharedPtr<NativeRep_> (*this);
478 }
479 virtual optional<filesystem::path> GetDefaultFileSuffix () const override
480 {
481 return ".json"sv;
482 }
483 virtual VariantValue Read (const InputStream::Ptr<byte>& in) const override
484 {
485 return Read (BinaryToText::Reader::New (in, nullopt, SeekableFlag::eSeekable));
486 }
487 virtual VariantValue Read (const InputStream::Ptr<Character>& in) const override
488 {
489#if USE_NOISY_TRACE_IN_THIS_MODULE_
490 Debug::TraceContextBumper ctx{"DataExchange::JSON::Reader::NativeRep_::Read"};
491#endif
492 MyBufferedStreamReader_ reader{in.IsSeekable () ? in : BufferedInputStream::New (in, SeekableFlag::eSeekable)};
493 return Reader_value_ (reader);
494 }
495};
496#if __has_include("boost/json.hpp")
497
498namespace {
499 // Based on example: https://www.boost.org/doc/libs/1_83_0/libs/json/doc/html/json/examples.html#json.examples.validate
500 struct BoostSAXHandler_ {
501 /// The maximum number of elements allowed in an array
502 static constexpr std::size_t max_array_size = static_cast<size_t> (-1);
503
504 /// The maximum number of elements allowed in an object
505 static constexpr std::size_t max_object_size = static_cast<size_t> (-1);
506
507 /// The maximum number of characters allowed in a string
508 static constexpr std::size_t max_string_size = static_cast<size_t> (-1);
509
510 /// The maximum number of characters allowed in a key
511 static constexpr std::size_t max_key_size = static_cast<size_t> (-1);
512
513 BoostSAXHandler_ () = default;
514 BoostSAXHandler_ (const BoostSAXHandler_&) = delete;
515
516 bool on_document_begin ([[maybe_unused]] error_code& ec)
517 {
518 Assert (fStack_.empty ());
519 fStack_.emplace (Context_::eSimple);
520 return true;
521 }
522 bool on_document_end ([[maybe_unused]] error_code& ec)
523 {
524 Assert (fStack_.size () == 1);
525 this->PopContext_ ();
526 Assert (fStack_.empty ());
527 return true;
528 }
529
530 bool on_array_begin ([[maybe_unused]] error_code& ec)
531 {
532 fStack_.emplace (Context_::eArray);
533 return true;
534 }
535 bool on_array_end ([[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
536 {
537 Assert (fStack_.top ().GetContextType () == Context_::eArray);
538 Assert (fStack_.top ().PeekAccumVector_ ().size () == n);
539 PopContext_ ();
540 return true;
541 }
542
543 bool on_object_begin ([[maybe_unused]] error_code& ec)
544 {
545 fStack_.emplace (Context_::eMap);
546 return true;
547 }
548 bool on_object_end ([[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
549 {
550 Assert (fStack_.top ().GetContextType () == Context_::eMap);
551 Assert (fStack_.top ().PeekAccumObj_ ().size () == n);
552 PopContext_ ();
553 return true;
554 }
555
556 bool on_string_part (string_view s, [[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
557 {
558 fPartSaver_.push_back (span<const char>{s});
559 return true;
560 }
561 bool on_string (string_view s, [[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
562 {
563 // Tricky - not really documented (I can find) - but seems if n != s.size() - we must use fPartSaver from on_key_part
564 if (s.size () == n) {
565 AddCompleteValue_ (VariantValue{toStroikaString_ (s)});
566 }
567 else {
568 fPartSaver_.push_back (span<const char>{s});
569 String res = toStroikaString_ (fPartSaver_);
570 fPartSaver_.clear ();
571 AddCompleteValue_ (VariantValue{res});
572 }
573 return true;
574 }
575
576 bool on_key_part (string_view s, [[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
577 {
578 // tricky - save text in buffer, for use on subsequent onKey
579 Assert (fStack_.top ().GetContextType () == Context_::eMap);
580 fPartSaver_.push_back (span<const char>{s});
581 return true;
582 }
583 bool on_key (string_view s, [[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
584 {
585 Assert (fStack_.top ().GetContextType () == Context_::eMap);
586 Assert (s.size () <= n);
587 // Tricky - not really documented (I can find) - but seems if n != s.size() - we must use fPartSaver from on_key_part
588 if (s.size () == n) {
589 fStack_.top ().fKey = toStroikaString_ (s);
590 }
591 else {
592 fPartSaver_.push_back (span<const char>{s});
593 fStack_.top ().fKey = toStroikaString_ (fPartSaver_);
594 fPartSaver_.clear ();
595 }
596 return true;
597 }
598
599 bool on_number_part ([[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
600 {
601 // No need to track anything for numbers, as boost appears to incrementally parse and keep its state internally
602 return true;
603 }
604 bool on_int64 (int64_t i, [[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
605 {
606 AddCompleteValue_ (VariantValue{i});
607 return true;
608 }
609 bool on_uint64 (uint64_t u, [[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
610 {
611 AddCompleteValue_ (VariantValue{u});
612 return true;
613 }
614 bool on_double (double d, [[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
615 {
616 AddCompleteValue_ (VariantValue{d});
617 return true;
618 }
619
620 bool on_bool (bool b, [[maybe_unused]] error_code& ec)
621 {
622 AddCompleteValue_ (VariantValue{b});
623 return true;
624 }
625
626 bool on_null ([[maybe_unused]] error_code& ec)
627 {
628 AddCompleteValue_ (VariantValue{});
629 return true;
630 }
631
632 bool on_comment_part ([[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
633 {
634 // ignore comments
635 return true;
636 }
637 bool on_comment ([[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
638 {
639 // ignore comments
640 return true;
641 }
642
643 // Careful not to use string_view directly cuz Stroika assumes we can keep that pointer (String_Constant) - so map to span, and also character to use char8_t
644 // to signal character set, and not ASCII
645 template <typename CONTAINER_OF_CHAR_BUT_REALLY_UTF8>
646 static String toStroikaString_ (CONTAINER_OF_CHAR_BUT_REALLY_UTF8 sv)
647 requires requires (CONTAINER_OF_CHAR_BUT_REALLY_UTF8 t) {
648 { span<const char>{t} };
649 }
650 {
651 return String{SpanBytesCast<span<const char8_t>> (span<const char>{sv})};
652 }
653
654 VariantValue GetConstructedValue () const
655 {
656 return fCompletedFinalValue_;
657 }
658 void AddCompleteValue_ (VariantValue v)
659 {
660 Assert (not fStack_.empty ());
661 Context_& t = fStack_.top ();
662 switch (t.GetContextType ()) {
663 case Context_::eArray:
664 t.PeekAccumVector_ ().push_back (v);
665 break;
666 case Context_::eMap:
667 t.PeekAccumObj_ ().insert ({t.fKey, v});
668 break;
669 case Context_::eSimple:
670 t.PeekSimpleValue_ () = v;
671 break;
672 default:
674 }
675 }
676 void PopContext_ ()
677 {
678 Assert (not fStack_.empty ());
679 // complete what is at the top of the stack and do AddCompleteValue_ to the new top of the stack
680 Context_& t = fStack_.top ();
681 VariantValue vv = [&t] () {
682 switch (t.GetContextType ()) {
683 case Context_::eArray:
684 return VariantValue{move (t.PeekAccumVector_ ())};
685 case Context_::eMap:
686 return VariantValue{Mapping_HashTable<String, VariantValue>{move (t.PeekAccumObj_ ())}};
687 case Context_::eSimple:
688 return t.PeekSimpleValue_ ();
689 default:
691 return VariantValue{};
692 }
693 }();
694 fStack_.pop ();
695 if (fStack_.empty ()) {
696 fCompletedFinalValue_ = vv;
697 }
698 else {
699 AddCompleteValue_ (vv);
700 }
701 }
702
703 /*
704 * We have a stack of context objects for in progress parses. This cheaply maintains the data at each point of the stack.
705 */
706 struct Context_ {
707
708 // NOTE - critical these enums correspond to index values of std::variant<> below
709 enum ContextType_ {
710 eSimple,
711 eArray,
712 eMap
713 };
714
715 Context_ () = delete;
716 Context_ (ContextType_ ct)
717 {
718 switch (ct) {
719 case eSimple:
720 fVV_ = VariantValue{};
721 break;
722 case eArray:
723 fVV_ = vector<VariantValue>{};
724 break;
725 case eMap:
727 break;
728 }
729 Ensure (ct == GetContextType ()); // ensure ContextType_ enum in same order as variant<> arguments
730 }
731 ~Context_ () = default;
732 Context_ (const Context_&) = delete;
733 Context_ (Context_&&) = default;
734
735 ContextType_ GetContextType () const
736 {
737 return static_cast<ContextType_> (fVV_.index ());
738 }
739
740 vector<VariantValue>& PeekAccumVector_ ()
741 {
742 Require (GetContextType () == eArray);
743 return get<vector<VariantValue>> (fVV_);
744 }
746 {
747 Require (GetContextType () == eMap);
748 return get<Mapping_HashTable<String, VariantValue>::DEFAULT_HASHTABLE<>> (fVV_);
749 }
750 VariantValue& PeekSimpleValue_ ()
751 {
752 Require (GetContextType () == eSimple);
753 return get<VariantValue> (fVV_);
754 }
755
756 // use variant to save construct/destruct of unneeded parts
757 variant<VariantValue, vector<VariantValue>, Mapping_HashTable<String, VariantValue>::DEFAULT_HASHTABLE<>> fVV_;
758
759 String fKey; // only allowed of context type = eMap (so COULD embed in above variant, but KISS) - also could use optional<String> which would help some things and make others worse...
760 };
761
762 std::stack<Context_> fStack_;
763 VariantValue fCompletedFinalValue_; // only filled in when stack is empty
764
765 // doesn't need to be in stack context cuz cannot fill partial string/key/etc with intervening pop/push
766 // Not using StringBuilder here cuz could contain partial strings
767 InlineBuffer<char, 512> fPartSaver_;
768 };
769}
770class Variant::JSON::Reader::BoostRep_ : public Variant::Reader::_IRep {
771public:
772 BoostRep_ () = default;
773 BoostRep_ (const BoostRep_&) = default;
774 virtual _SharedPtrIRep Clone () const override
775 {
776 return MakeSharedPtr<BoostRep_> (*this);
777 }
778 virtual optional<filesystem::path> GetDefaultFileSuffix () const override
779 {
780 return ".json"sv;
781 }
782 virtual VariantValue Read (const InputStream::Ptr<byte>& in) const override
783 {
784#if USE_NOISY_TRACE_IN_THIS_MODULE_
785 Debug::TraceContextBumper ctx{"DataExchange::JSON::Reader::BoostRep_::Read"};
786#endif
787 using namespace boost;
788 InputStream::Ptr<byte> useInStream = in.IsSeekable () ? in : BufferedInputStream::New (in, SeekableFlag::eSeekable);
789 Assert (useInStream.IsSeekable ());
790 byte buf[8 * 1024]; // experimentally - larger buffers didn't help speed
791 constexpr bool kUseSAX_ = true; // experimentally, on windows, sax about 10% faster than stream_parser/convert way
792 try {
793 if constexpr (kUseSAX_) {
794 json::basic_parser<BoostSAXHandler_> p{json::parse_options{}};
795 const size_t targetChunkSize = std::size (buf);
796 size_t actualChunkSize;
797 boost::system::error_code ec;
798 while ((actualChunkSize = useInStream.ReadBlocking (span{buf, targetChunkSize}).size ()) != 0) {
799 ec.clear ();
800 size_t nParsed = p.write_some (true, reinterpret_cast<const char*> (begin (buf)), actualChunkSize, ec);
801 Assert (nParsed <= actualChunkSize);
802 if (nParsed < actualChunkSize) {
803 useInStream.Seek (eFromCurrent, static_cast<SignedSeekOffsetType> (nParsed) - static_cast<SignedSeekOffsetType> (actualChunkSize));
804 break;
805 }
806 if (p.done ()) {
807 break; // good parse
808 }
809 else if (ec) {
810 Throw (BadFormatException{String::FromNarrowSDKString (ec.what ())});
811 }
812 }
813 if (not p.done ()) {
814 p.write_some (false, nullptr, 0, ec);
815 if (ec and ec != boost::json::error::extra_data /*and ec != boost::json::error::incomplete*/) {
816 Throw (BadFormatException{String::FromNarrowSDKString (ec.what ())});
817 }
818 }
819 return p.handler ().GetConstructedValue ();
820 }
821 else {
822 json::stream_parser p;
823 const size_t targetChunkSize = std::size (buf);
824 size_t actualChunkSize;
825 while ((actualChunkSize = useInStream.ReadBlocking (span{buf, targetChunkSize}).size ()) != 0) {
826 boost::system::error_code ec;
827 size_t nParsed = p.write_some (reinterpret_cast<const char*> (begin (buf)), actualChunkSize, ec);
828 Assert (nParsed <= actualChunkSize);
829 if (nParsed < actualChunkSize) {
830 useInStream.Seek (eFromCurrent, static_cast<SignedSeekOffsetType> (nParsed) - static_cast<SignedSeekOffsetType> (actualChunkSize));
831 break;
832 }
833 if (p.done ()) {
834 break; // good parse
835 }
836 else if (ec) {
837 Throw (BadFormatException{String::FromNarrowSDKString (ec.what ())});
838 }
839 }
840 if (not p.done ()) {
841 p.finish (); // in case wrote text like '3' to buffer, ambiguous if done
842 }
843 Assert (p.done ());
844 return DataExchange::VariantValue{p.release ()}; // Transform boost objects to Stroika objects
845 }
846 }
847 catch (...) {
848 Throw (BadFormatException{Characters::ToString (current_exception ())});
849 }
850 }
851 virtual VariantValue Read (const InputStream::Ptr<Character>& in) const override
852 {
853#if USE_NOISY_TRACE_IN_THIS_MODULE_
854 Debug::TraceContextBumper ctx{"DataExchange::JSON::Reader::BoostRep_::Read"};
855#endif
856 Require (in.IsSeekable ());
857 return Read (_ToByteReader (in));
858 }
859};
860#endif
861
862inline auto Variant::JSON::Reader::mk_ (const ReaderOptions& options) -> shared_ptr<_IRep>
863{
864 switch (options.fPreferredAlgorithm.value_or (ReaderOptions::Algorithm::eDEFAULT)) {
865 case ReaderOptions::Algorithm::eStroikaNative:
866 return MakeSharedPtr<NativeRep_> ();
867#if __has_include("boost/json.hpp")
868 case ReaderOptions::Algorithm::eBoost:
869 return MakeSharedPtr<BoostRep_> ();
870#endif
871 default:
873 return nullptr;
874 }
875}
876
877Variant::JSON::Reader::Reader (const ReaderOptions& options)
878 : inherited{mk_ (options)}
879{
880}
#define AssertNotReached()
Definition Assertions.h:355
constexpr const T & ValueOf(const optional< T > &t)
Same as *t, but Requires that 't' is engaged.
Definition Optional.inl:156
Similar to String, but intended to more efficiently construct a String. Mutable type (String is large...
nonvirtual span< const CHAR_T > GetData(Memory::StackBuffer< CHAR_T > *probablyIgnoredBuf) const
access a span of data located inside the StringBuilder. Return internal pointer, or pointer internal ...
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
Mapping_HashTable<KEY_TYPE, MAPPED_VALUE_TYPE, TRAITS> is a HashTable based concrete implementation o...
Sequence_stdvector<T> is an std::vector-based concrete implementation of the Sequence<T> container pa...
nonvirtual void insert(ArgByValueType< value_type > kvp)
Definition Mapping.inl:456
Simple variant-value (case variant union) object, with (variant) basic types analogous to a value in ...
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
nonvirtual optional< ElementType > ReadBlocking() const
ReadBlocking () reads either a single element, or fills in argument intoBuffer - but never blocks (no...
nonvirtual SeekOffsetType Seek(SeekOffsetType offset) const
nonvirtual bool IsSeekable() const
Returns true iff this object was constructed with a seekable input stream rep.
Definition Stream.inl:44
void Throw(T &&e2Throw)
identical to builtin C++ 'throw' except that it does helpful, type dependent DbgTrace() messages firs...
Definition Throw.inl:43
Ptr New(const InputStream::Ptr< byte > &src, optional< AutomaticCodeCvtFlags > codeCvtFlags={}, optional< SeekableFlag > seekable={}, ReadAhead readAhead=eReadAheadAllowed)
Create an InputStream::Ptr<Character> from the arguments (usually binary source) - which can be used ...
StreamReader is an non-essential Stream utility, adding simplicity of use for a common use case,...
nonvirtual SeekOffsetType Seek(SeekOffsetType offset)
Logically the same as InputStream::Ptr<ELEMENT_TYPE>::Seek () - but without being 'synchronized' it m...
nonvirtual optional< ElementType > ReadBlocking()
ReadBlocking () reads either a single element, or fills in argument intoBuffer - but never blocks (no...