Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
Variant/JSON/Reader.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
6#include <optional>
7#include <stack>
8
9#if __has_include("boost/json.hpp")
10#include <boost/json.hpp>
11#include <boost/json/basic_parser_impl.hpp>
12#endif
13
14#include "Stroika/Foundation/Characters/FloatConversion.h"
16#include "Stroika/Foundation/Characters/String2Int.h"
18#include "Stroika/Foundation/Containers/Concrete/Mapping_stdhashmap.h"
19#include "Stroika/Foundation/Containers/Support/ReserveTweaks.h"
20#include "Stroika/Foundation/DataExchange/BadFormatException.h"
25
26#include "Reader.h"
27
28using namespace Stroika::Foundation;
31using namespace Stroika::Foundation::Streams;
32using namespace Stroika::Foundation::Traversal;
33
35
36// Comment this in to turn on aggressive noisy DbgTrace in this module
37//#define USE_NOISY_TRACE_IN_THIS_MODULE_ 1
38
39namespace {
40
41 /*
42 * Parse strategy:
43 * o Pre-pass to map all input to UNICODE - and then just handle a sequence of unicode strings
44 * o Classic recursive decent parser.
45 * o Inline lexical analysis (cuz very simple)
46 */
47
48 enum ReadState_ {
49 eNeutral_,
50 eInObject_,
51 eInArray_,
52 eInNumber_,
53 eInString_,
54 };
55
56 // use StreamReader to get buffering of input data (performance tweak), and a couple helper methods
57 struct MyBufferedStreamReader_ : StreamReader<Character> {
58 MyBufferedStreamReader_ (const InputStream::Ptr<ElementType>& underlyingReadFromStreamAdopted)
59 : StreamReader<Character>{underlyingReadFromStreamAdopted}
60 {
61 }
62 [[nodiscard]] inline char32_t NextChar ()
63 {
64 Require (not IsAtEOF ());
65 return ReadBlocking ()->As<char32_t> ();
66 }
67 inline void AdvanceOne ()
68 {
69 Require (not IsAtEOF ());
70 Seek (eFromCurrent, 1);
71 }
72 inline void BackupOne ()
73 {
74 Seek (eFromCurrent, -1);
75 }
76 };
77
78 VariantValue Reader_value_ (MyBufferedStreamReader_& in);
79
80 /*
81 */
82 inline bool IsJSONSpace_ (char32_t c)
83 {
84 // iswspace was pretty slow (on windoze) - showing up as significant in performance profiling.
85 // According to:
86 // https://www.json.org/json-en.html
87 // only spaces allowed in json source are:
88 // ws = *(
89 // %x20 / ; Space
90 // %x09 / ; Horizontal tab
91 // %x0A / ; Line feed or New line
92 // %x0D ; Carriage return
93 // )
94 switch (c) {
95 case ' ':
96 case '\t':
97 case '\r':
98 case '\n':
99 return true;
100 default:
101 return false;
102 }
103 }
104
105 inline bool IsJSONDigit_ (char32_t c)
106 {
107 // iswdigit tweak (showed up as significant - 6% of full runtime)
108 // According to:
109 // https://www.json.org/json-en.html
110 // appears just digits 0..9
111 // slightly longer way but doesn't assume ascii, and probably compiles to the same thing is '0' <= n <= '9'
112 switch (c) {
113 case '0':
114 case '1':
115 case '2':
116 case '3':
117 case '4':
118 case '5':
119 case '6':
120 case '7':
121 case '8':
122 case '9':
123 return true;
124 default:
125 return false;
126 }
127 }
128
129 // throw if bad hex digit
130 uint8_t HexChar2Num_ (char c)
131 {
132 if ('0' <= c and c <= '9') [[likely]] {
133 return static_cast<uint8_t> (c - '0');
134 }
135 if ('a' <= c and c <= 'f') [[likely]] {
136 return static_cast<uint8_t> ((c - 'a') + 10);
137 }
138 if ('A' <= c and c <= 'F') [[likely]] {
139 return static_cast<uint8_t> ((c - 'A') + 10);
140 }
141 static const auto kException_{BadFormatException{"JSON: bad hex digit after \\u"sv}};
142 Execution::Throw (kException_);
143 }
144
145 // 'in' is positioned to the start of string, and we read, leaving in positioned just after the end of the string
146 String Reader_String_ (MyBufferedStreamReader_& in)
147 {
148 Require (not in.IsAtEOF ());
149 char32_t c = in.NextChar ();
150 if (c != '\"') [[unlikely]] {
151 static const auto kException_{BadFormatException{"JSON: Expected quoted string"sv}};
152 Execution::Throw (kException_);
153 }
154 /*
155 * accumulate chars, and check for close-quote
156 */
157 StringBuilder<StringBuilder_Options<char8_t, 64>> result; // StringBuilder_Options: text often ascii, empirically a bit faster with on windows on jsonparser regtest
158 while (true) {
159 if (in.IsAtEOF ()) [[unlikely]] {
160 static const auto kException_{BadFormatException{"JSON: Unexpected EOF reading string (looking for close quote)"sv}};
161 Execution::Throw (kException_);
162 }
163 c = in.NextChar ();
164 if (c == '\"') [[unlikely]] {
165 return result.str ();
166 }
167 else if (c == '\\') [[unlikely]] {
168 // quoted character read...
169 if (in.IsAtEOF ()) [[unlikely]] {
170 static const auto kException_{BadFormatException{"JSON: Unexpected EOF reading string (looking for close quote)"sv}};
171 Execution::Throw (kException_);
172 }
173 c = in.NextChar ();
174 switch (c) {
175 case 'b':
176 c = '\b';
177 break;
178 case 'f':
179 c = '\f';
180 break;
181 case 'n':
182 c = '\n';
183 break;
184 case 'r':
185 c = '\r';
186 break;
187 case 't':
188 c = '\t';
189 break;
190 case 'u': {
191 // Not sure this is right -- But I hope so ... -- LGP 2012-11-29
192 char32_t newC = '\0';
193 for (int n = 0; n < 4; ++n) {
194 if (in.IsAtEOF ()) [[unlikely]] {
195 static const auto kException_{BadFormatException{"JSON: Unexpected EOF reading string (looking for close quote)"sv}};
196 Execution::Throw (kException_);
197 }
198 newC += HexChar2Num_ (static_cast<char> (in.NextChar ()));
199 if (n != 3) {
200 newC <<= 4;
201 }
202 }
203 c = newC;
204 } break;
205 default: {
206 // if we have \N for any unrecognized N, just treat it as N
207 }
208 }
209 }
210 result += c; // must handle other character quoting (besides \u which was preflighted)
211 }
212 }
213
214 // 'in' is positioned to the second character of number (first passed as arg), and we read, leaving in positioned just after end of number
215 static constexpr Character kDash_{'-'};
216 VariantValue Reader_Number_ (char32_t initialChar, MyBufferedStreamReader_& in)
217 {
218 Require (initialChar == '-' or IsJSONDigit_ (initialChar));
219
220 bool containsDot = false;
221 // ACCUMULATE STRING, and then call builtin number parsing functions...
222 // This accumulation is NOT as restrictive as it could be - but should accept all valid numbers
223 StringBuilder tmp;
224 for (char32_t c = initialChar; c != '\0'; c = in.ReadBlocking ().value_or ('\0').As<char32_t> ()) {
225 if (IsJSONDigit_ (c) or c == '.' or c == 'e' or c == 'E' or c == '+' or c == '-') [[likely]] {
226 tmp += c;
227 if (c == '.') [[unlikely]] {
228 containsDot = true;
229 }
230 }
231 else {
232 // any other character signals end of number (not a syntax error)
233 // but backup - don't consume next character - not part of number
234 Assert (not tmp.empty ()); // at least consumed 'initialChar'
235 in.BackupOne ();
236 break;
237 }
238 }
239 Assert (not tmp.empty ());
241 span<const char32_t> tmpData = tmp.GetData (&ignoreBuf);
242 if (containsDot) {
243 return VariantValue{FloatConversion::ToFloat<long double> (tmpData)};
244 }
245 else {
246 // if no - use unsigned since has wider range (if no -)
247 return (initialChar == kDash_) ? VariantValue{String2Int<long long int> (tmpData)}
248 : VariantValue{String2Int<unsigned long long int> (tmpData)};
249 }
250 }
251
252 // NOTE: THIS STARTS SEEKED JUST PAST OPENING '{'
253 VariantValue Reader_Object_ (MyBufferedStreamReader_& in)
254 {
255 Mapping_stdhashmap<String, VariantValue>::STDHASHMAP<> result; // slight tweak using stl map, and move-construct Stroika map at the end
256
257 // accumulate elements, and check for close-array
258 enum LookingFor {
259 eName, // this means looking for start of next field/member of the object
260 eValue,
261 eColon,
262 eComma // when looking for comma could either find it, or } marking end of object
263 };
264 LookingFor lf = eName;
265 optional<String> curName;
266 while (true) {
267 optional<Character> oNextChar = in.Peek ();
268 if (not oNextChar.has_value ()) [[unlikely]] {
269 static const auto kException_{BadFormatException{"JSON: Unexpected EOF reading object (looking for '}')"sv}};
270 Execution::Throw (kException_);
271 }
272 char32_t nextChar = oNextChar->As<char32_t> ();
273 if (IsJSONSpace_ (nextChar)) [[likely]] {
274 in.AdvanceOne (); // skip char
275 continue;
276 }
277 switch (lf) {
278 case eName: {
279 Assert (curName == nullopt);
280 if (nextChar == '}') {
281 in.AdvanceOne (); // finished object
283 }
284 else if (nextChar == '\"') [[likely]] {
285 curName = Reader_String_ (in); // starting a new data member (with a string)
286 lf = eColon;
287 }
288 else {
289 static const auto kException_{
290 BadFormatException{"JSON: Reading object, looking for a name, didn't find a close brace or open quote"sv}};
291 Execution::Throw (kException_);
292 }
293 } break;
294 case eComma: {
295 Assert (curName == nullopt);
296 if (nextChar == '}') {
297 in.AdvanceOne (); // finished object
299 }
300 else if (nextChar == ',') [[likely]] {
301 in.AdvanceOne (); // consume it, and look for name next (start of next object member)
302 lf = eName;
303 }
304 else {
305 static const auto kException_{BadFormatException{"JSON: Reading object, looking for a comma, but found something else"sv}};
306 Execution::Throw (kException_);
307 }
308 } break;
309 case eColon: {
310 Assert (curName);
311 if (nextChar == ':') [[likely]] {
312 in.AdvanceOne (); // consume the separator, and look the the value
313 lf = eValue;
314 }
315 else {
316 static const auto kException_{BadFormatException{"JSON: Reading object, looking for a colon, but found something else"sv}};
317 Execution::Throw (kException_);
318 }
319 } break;
320 case eValue: {
321 Assert (curName);
322 // dont care what the character is, read a new value
323 result.insert ({Memory::ValueOf (curName), Reader_value_ (in)});
324 curName = nullopt;
325 lf = eComma; // and look for another field/data member
326 } break;
327 }
328 }
329 }
330
331 // NOTE - called with OPENING '[' already read
332 VariantValue Reader_Array_ (MyBufferedStreamReader_& in)
333 {
334 vector<VariantValue> result;
335
336 // accumulate elements, and check for close-array
337 bool lookingForElt = true;
338 while (true) {
339 if (in.IsAtEOF ()) [[unlikely]] {
340 static const auto kException_{BadFormatException{"JSON: Unexpected EOF reading array (looking for ']')"sv}};
341 Execution::Throw (kException_);
342 }
343 char32_t peekedChar = in.Peek ()->As<char32_t> ();
344 if (peekedChar == ']') {
345 if (lookingForElt) {
346 // allow ending ',' - harmless - could be more aggressive - but if so - careful of zero-sized array special case
347 }
348 in.AdvanceOne ();
350 }
351 else if (peekedChar == ',') {
352 if (lookingForElt) [[unlikely]] {
353 static const auto kException_{BadFormatException{"JSON: Unexpected second ',' in reading array"sv}};
354 Execution::Throw (kException_);
355 }
356 else {
357 lookingForElt = true;
358 }
359 in.AdvanceOne ();
360 }
361 else if (IsJSONSpace_ (peekedChar)) {
362 in.AdvanceOne ();
363 }
364 else {
365 // not looking at whitespace, in midst of array, and array not terminated, so better be looking at a value
366 if (lookingForElt) [[likely]] {
367 Containers::Support::ReserveTweaks::Reserve4Add1 (result);
368 result.push_back (Reader_value_ (in));
369 lookingForElt = false;
370 }
371 else {
372 static const auto kException_{BadFormatException{"JSON: Unexpected character (missing ',' ?) in reading array"sv}};
373 Execution::Throw (kException_);
374 }
375 }
376 }
377 }
378
379 VariantValue Reader_SpecialToken_ (char32_t initialChar, MyBufferedStreamReader_& in)
380 {
381 switch (initialChar) {
382 case 'f': {
383 Character buf[4];
384 if (in.ReadAll (begin (buf), end (buf)) == 4 and buf[0] == 'a' and buf[1] == 'l' and buf[2] == 's' and buf[3] == 'e') {
385 return VariantValue{false};
386 }
387 } break;
388 case 't': {
389 Character buf[3];
390 if (in.ReadAll (begin (buf), end (buf)) == 3 and buf[0] == 'r' and buf[1] == 'u' and buf[2] == 'e') {
391 return VariantValue{true};
392 }
393 } break;
394 case 'n': {
395 Character buf[3];
396 if (in.ReadAll (begin (buf), end (buf)) == 3 and buf[0] == 'u' and buf[1] == 'l' and buf[2] == 'l') {
397 return VariantValue{};
398 }
399 } break;
400 }
401 static const auto kException_{BadFormatException{"JSON: Unrecognized token"sv}};
402 Execution::Throw (kException_);
403 }
404
405 VariantValue Reader_value_ (MyBufferedStreamReader_& in)
406 {
407 // Skip initial whitespace, and look for any value:
408 // string
409 // number
410 // object
411 // array
412 // true
413 // false
414 // null
415 for (optional<Character> oc = in.ReadBlocking (); oc; oc = in.ReadBlocking ()) {
416 switch (oc->As<char32_t> ()) {
417 case '\"':
418 in.BackupOne ();
419 return Reader_String_ (in);
420
421 case '0':
422 case '1':
423 case '2':
424 case '3':
425 case '4':
426 case '5':
427 case '6':
428 case '7':
429 case '8':
430 case '9':
431 case '-':
432 return Reader_Number_ (oc->As<char32_t> (), in);
433
434 case '{':
435 return Reader_Object_ (in);
436 case '[':
437 return Reader_Array_ (in);
438
439 case 't':
440 case 'f':
441 case 'n':
442 return Reader_SpecialToken_ (oc->As<char32_t> (), in);
443
444 default: {
445 if (IsJSONSpace_ (oc->As<char32_t> ())) [[likely]] {
446 // ignore
447 }
448 else {
449 static const auto kException_{BadFormatException{"JSON: Unexpected character looking for start of value"sv}};
450 Execution::Throw (kException_);
451 }
452 }
453 }
454 }
455 // if we get here - nothing found
456 static const auto kException_{BadFormatException{"JSON: Unexpected EOF looking for value"sv}};
457 Execution::Throw (kException_);
458 }
459}
460
461/*
462 ********************************************************************************
463 ************************* Variant::JSON::Reader ********************************
464 ********************************************************************************
465 */
466class Variant::JSON::Reader::NativeRep_ : public Variant::Reader::_IRep {
467public:
468 virtual _SharedPtrIRep Clone () const override
469 {
470 return make_shared<NativeRep_> (); // no instance data
471 }
472 virtual optional<filesystem::path> GetDefaultFileSuffix () const override
473 {
474 return ".json"sv;
475 }
476 virtual VariantValue Read (const Streams::InputStream::Ptr<byte>& in) override
477 {
478 using namespace Streams;
479 return Read (BinaryToText::Reader::New (in, nullopt, SeekableFlag::eSeekable));
480 }
481 virtual VariantValue Read (const Streams::InputStream::Ptr<Character>& in) override
482 {
483#if USE_NOISY_TRACE_IN_THIS_MODULE_
484 Debug::TraceContextBumper ctx{"DataExchange::JSON::Reader::NativeRep_::Read"};
485#endif
486 Require (in.IsSeekable ());
487 MyBufferedStreamReader_ reader{in};
488 return Reader_value_ (reader);
489 }
490};
491#if __has_include("boost/json.hpp")
492
493namespace {
494 // Based on example: https://www.boost.org/doc/libs/1_83_0/libs/json/doc/html/json/examples.html#json.examples.validate
495 struct BoostSAXHandler_ {
496 /// The maximum number of elements allowed in an array
497 static constexpr std::size_t max_array_size = static_cast<size_t> (-1);
498
499 /// The maximum number of elements allowed in an object
500 static constexpr std::size_t max_object_size = static_cast<size_t> (-1);
501
502 /// The maximum number of characters allowed in a string
503 static constexpr std::size_t max_string_size = static_cast<size_t> (-1);
504
505 /// The maximum number of characters allowed in a key
506 static constexpr std::size_t max_key_size = static_cast<size_t> (-1);
507
508 BoostSAXHandler_ () = default;
509 BoostSAXHandler_ (const BoostSAXHandler_&) = delete;
510
511 bool on_document_begin ([[maybe_unused]] error_code& ec)
512 {
513 Assert (fStack_.empty ());
514 fStack_.emplace (Context_::eSimple);
515 return true;
516 }
517 bool on_document_end ([[maybe_unused]] error_code& ec)
518 {
519 Assert (fStack_.size () == 1);
520 this->PopContext_ ();
521 Assert (fStack_.empty ());
522 return true;
523 }
524
525 bool on_array_begin ([[maybe_unused]] error_code& ec)
526 {
527 fStack_.emplace (Context_::eArray);
528 return true;
529 }
530 bool on_array_end ([[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
531 {
532 Assert (fStack_.top ().GetContextType () == Context_::eArray);
533 Assert (fStack_.top ().PeekAccumVector_ ().size () == n);
534 PopContext_ ();
535 return true;
536 }
537
538 bool on_object_begin ([[maybe_unused]] error_code& ec)
539 {
540 fStack_.emplace (Context_::eMap);
541 return true;
542 }
543 bool on_object_end ([[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
544 {
545 Assert (fStack_.top ().GetContextType () == Context_::eMap);
546 Assert (fStack_.top ().PeekAccumObj_ ().size () == n);
547 PopContext_ ();
548 return true;
549 }
550
551 bool on_string_part (string_view s, [[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
552 {
553 fPartSaver_.push_back (span<const char>{s});
554 return true;
555 }
556 bool on_string (string_view s, [[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
557 {
558 // Tricky - not really documented (I can find) - but seems if n != s.size() - we must use fPartSaver from on_key_part
559 if (s.size () == n) {
560 AddCompleteValue_ (VariantValue{toStroikaString_ (s)});
561 }
562 else {
563 fPartSaver_.push_back (span<const char>{s});
564 String res = toStroikaString_ (fPartSaver_);
565 fPartSaver_.clear ();
566 AddCompleteValue_ (VariantValue{res});
567 }
568 return true;
569 }
570
571 bool on_key_part (string_view s, [[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
572 {
573 // tricky - save text in buffer, for use on subsequent onKey
574 Assert (fStack_.top ().GetContextType () == Context_::eMap);
575 fPartSaver_.push_back (span<const char>{s});
576 return true;
577 }
578 bool on_key (string_view s, [[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
579 {
580 Assert (fStack_.top ().GetContextType () == Context_::eMap);
581 Assert (s.size () <= n);
582 // Tricky - not really documented (I can find) - but seems if n != s.size() - we must use fPartSaver from on_key_part
583 if (s.size () == n) {
584 fStack_.top ().fKey = toStroikaString_ (s);
585 }
586 else {
587 fPartSaver_.push_back (span<const char>{s});
588 fStack_.top ().fKey = toStroikaString_ (fPartSaver_);
589 fPartSaver_.clear ();
590 }
591 return true;
592 }
593
594 bool on_number_part ([[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
595 {
596 // No need to track anything for numbers, as boost appears to incrementally parse and keep its state internally
597 return true;
598 }
599 bool on_int64 (int64_t i, [[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
600 {
601 AddCompleteValue_ (VariantValue{i});
602 return true;
603 }
604 bool on_uint64 (uint64_t u, [[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
605 {
606 AddCompleteValue_ (VariantValue{u});
607 return true;
608 }
609 bool on_double (double d, [[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
610 {
611 AddCompleteValue_ (VariantValue{d});
612 return true;
613 }
614
615 bool on_bool (bool b, [[maybe_unused]] error_code& ec)
616 {
617 AddCompleteValue_ (VariantValue{b});
618 return true;
619 }
620
621 bool on_null ([[maybe_unused]] error_code& ec)
622 {
623 AddCompleteValue_ (VariantValue{});
624 return true;
625 }
626
627 bool on_comment_part ([[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
628 {
629 // ignore comments
630 return true;
631 }
632 bool on_comment ([[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
633 {
634 // ignore comments
635 return true;
636 }
637
638 // Careful not to use string_view directly cuz Stroika assumes we can keep that pointer (String_Constant) - so map to span, and also character to use char8_t
639 // to signal character set, and not ASCII
640 template <typename CONTAINER_OF_CHAR_BUT_REALLY_UTF8>
641 static String toStroikaString_ (CONTAINER_OF_CHAR_BUT_REALLY_UTF8 sv)
642 requires requires (CONTAINER_OF_CHAR_BUT_REALLY_UTF8 t) {
643 { span<const char>{t} };
644 }
645 {
646 return String{Memory::SpanBytesCast<span<const char8_t>> (span<const char>{sv})};
647 }
648
649 VariantValue GetConstructedValue () const
650 {
651 return fCompletedFinalValue_;
652 }
653 void AddCompleteValue_ (VariantValue v)
654 {
655 Assert (not fStack_.empty ());
656 Context_& t = fStack_.top ();
657 switch (t.GetContextType ()) {
658 case Context_::eArray:
659 t.PeekAccumVector_ ().push_back (v);
660 break;
661 case Context_::eMap:
662 t.PeekAccumObj_ ().insert ({t.fKey, v});
663 break;
664 case Context_::eSimple:
665 t.PeekSimpleValue_ () = v;
666 break;
667 default:
669 }
670 }
671 void PopContext_ ()
672 {
673 Assert (not fStack_.empty ());
674 // complete what is at the top of the stack and do AddCompleteValue_ to the new top of the stack
675 Context_& t = fStack_.top ();
676 VariantValue vv = [&t] () {
677 switch (t.GetContextType ()) {
678 case Context_::eArray:
679 return VariantValue{move (t.PeekAccumVector_ ())};
680 case Context_::eMap:
681 return VariantValue{Mapping_stdhashmap<String, VariantValue>{move (t.PeekAccumObj_ ())}};
682 case Context_::eSimple:
683 return t.PeekSimpleValue_ ();
684 default:
686 return VariantValue{};
687 }
688 }();
689 fStack_.pop ();
690 if (fStack_.empty ()) {
691 fCompletedFinalValue_ = vv;
692 }
693 else {
694 AddCompleteValue_ (vv);
695 }
696 }
697
698 /*
699 * We have a stack of context objects for in progress parses. This cheaply maintains the data at each point of the stack.
700 */
701 struct Context_ {
702
703 // NOTE - critical these enums correspond to index values of std::variant<> below
704 enum ContextType_ {
705 eSimple,
706 eArray,
707 eMap
708 };
709
710 Context_ () = delete;
711 Context_ (ContextType_ ct)
712 {
713 switch (ct) {
714 case eSimple:
715 fVV_ = VariantValue{};
716 break;
717 case eArray:
718 fVV_ = vector<VariantValue>{};
719 break;
720 case eMap:
722 break;
723 }
724 Ensure (ct == GetContextType ()); // ensure ContextType_ enum in same order as variant<> arguments
725 }
726 ~Context_ () = default;
727 Context_ (const Context_&) = delete;
728 Context_ (Context_&&) = default;
729
730 ContextType_ GetContextType () const
731 {
732 return static_cast<ContextType_> (fVV_.index ());
733 }
734
735 vector<VariantValue>& PeekAccumVector_ ()
736 {
737 Require (GetContextType () == eArray);
738 return get<vector<VariantValue>> (fVV_);
739 }
741 {
742 Require (GetContextType () == eMap);
743 return get<Mapping_stdhashmap<String, VariantValue>::STDHASHMAP<>> (fVV_);
744 }
745 VariantValue& PeekSimpleValue_ ()
746 {
747 Require (GetContextType () == eSimple);
748 return get<VariantValue> (fVV_);
749 }
750
751 // use variant to save construct/destruct of unneeded parts
752 variant<VariantValue, vector<VariantValue>, Mapping_stdhashmap<String, VariantValue>::STDHASHMAP<>> fVV_;
753
754 String fKey; // only allowed of context type = eMap (so COULD embed in above variant, but KISS) - also could use optional<String> which would help some things and make others worse...
755 };
756
757 std::stack<Context_> fStack_;
758 VariantValue fCompletedFinalValue_; // only filled in when stack is empty
759
760 // doesn't need to be in stack context cuz cannot fill partial string/key/etc with intervening pop/push
761 // Not using StringBuilder here cuz could contain partial strings
763 };
764}
765class Variant::JSON::Reader::BoostRep_ : public Variant::Reader::_IRep {
766public:
767 virtual _SharedPtrIRep Clone () const override
768 {
769 return make_shared<BoostRep_> (); // no instance data
770 }
771 virtual optional<filesystem::path> GetDefaultFileSuffix () const override
772 {
773 return ".json"sv;
774 }
775 virtual VariantValue Read (const Streams::InputStream::Ptr<byte>& in) override
776 {
777#if USE_NOISY_TRACE_IN_THIS_MODULE_
778 Debug::TraceContextBumper ctx{"DataExchange::JSON::Reader::BoostRep_::Read"};
779#endif
780 using namespace Streams;
781 using namespace boost;
782 bool inSeekable = in.IsSeekable ();
783 constexpr bool kUseSAX_ = true; // experimentally, on windows, sax about 10% faster than stream_parser/convert way
784 try {
785 if constexpr (kUseSAX_) {
786 json::basic_parser<BoostSAXHandler_> p{json::parse_options{}};
787 byte buf[8 * 1024]; // experimentally - larger buffers didn't help speed
788 const size_t targetChunkSize = inSeekable ? Memory::NEltsOf (buf) : 1;
789 size_t actualChunkSize;
790 boost::system::error_code ec;
791 while ((actualChunkSize = in.ReadBlocking (span{buf, targetChunkSize}).size ()) != 0) {
792 ec.clear ();
793 size_t nParsed = p.write_some (true, reinterpret_cast<const char*> (begin (buf)), actualChunkSize, ec);
794 Assert (nParsed <= actualChunkSize);
795 if (nParsed < actualChunkSize) {
796 in.Seek (eFromCurrent, static_cast<SignedSeekOffsetType> (nParsed) - static_cast<SignedSeekOffsetType> (actualChunkSize));
797 break;
798 }
799 if (p.done ()) {
800 break; // good parse
801 }
802 else if (ec) {
803 Execution::Throw (DataExchange::BadFormatException{String::FromNarrowSDKString (ec.what ())});
804 }
805 }
806 if (not p.done ()) {
807 p.write_some (false, nullptr, 0, ec);
808 if (ec and ec != boost::json::error::extra_data /*and ec != boost::json::error::incomplete*/) {
809 Execution::Throw (DataExchange::BadFormatException{String::FromNarrowSDKString (ec.what ())});
810 }
811 }
812 return p.handler ().GetConstructedValue ();
813 }
814 else {
815 json::stream_parser p;
816 byte buf[8 * 1024];
817 const size_t targetChunkSize = inSeekable ? Memory::NEltsOf (buf) : 1;
818 size_t actualChunkSize;
819 while ((actualChunkSize = in.ReadBlocking (span{buf, targetChunkSize}).size ()) != 0) {
820 boost::system::error_code ec;
821 size_t nParsed = p.write_some (reinterpret_cast<const char*> (begin (buf)), actualChunkSize, ec);
822 Assert (nParsed <= actualChunkSize);
823 if (nParsed < actualChunkSize) {
824 in.Seek (eFromCurrent, static_cast<SignedSeekOffsetType> (nParsed) - static_cast<SignedSeekOffsetType> (actualChunkSize));
825 break;
826 }
827 if (p.done ()) {
828 break; // good parse
829 }
830 else if (ec) {
831 Execution::Throw (DataExchange::BadFormatException{String::FromNarrowSDKString (ec.what ())});
832 }
833 }
834 if (not p.done ()) {
835 p.finish (); // in case wrote text like '3' to buffer, ambiguous if done
836 }
837 Assert (p.done ());
838 return DataExchange::VariantValue{p.release ()}; // Transform boost objects to Stroika objects
839 }
840 }
841 catch (...) {
842 Execution::Throw (DataExchange::BadFormatException{Characters::ToString (current_exception ())});
843 }
844 }
845 virtual VariantValue Read (const Streams::InputStream::Ptr<Character>& in) override
846 {
847#if USE_NOISY_TRACE_IN_THIS_MODULE_
848 Debug::TraceContextBumper ctx{"DataExchange::JSON::Reader::BoostRep_::Read"};
849#endif
850 Require (in.IsSeekable ());
851 using namespace Streams;
852 return Read (_ToByteReader (in));
853 }
854};
855#endif
856
857inline auto Variant::JSON::Reader::mk_ (const ReaderOptions& options) -> shared_ptr<_IRep>
858{
859 switch (options.fPreferredAlgorithm.value_or (ReaderOptions::Algorithm::eDEFAULT)) {
860 case ReaderOptions::Algorithm::eStroikaNative:
861 return make_shared<NativeRep_> ();
862#if __has_include("boost/json.hpp")
863 case ReaderOptions::Algorithm::eBoost:
864 return make_shared<BoostRep_> ();
865#endif
866 default:
868 return nullptr;
869 }
870}
871
872Variant::JSON::Reader::Reader (const ReaderOptions& options)
873 : inherited{mk_ (options)}
874{
875}
#define AssertNotReached()
Definition Assertions.h:355
Similar to String, but intended to more efficiently construct a String. Mutable type (String is large...
nonvirtual span< const CHAR_T > GetData(Memory::StackBuffer< CHAR_T > *probablyIgnoredBuf) const
access a span of data located inside the StringBuilder. Return internal pointer, or pointer internal ...
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
Mapping_stdhashmap<KEY_TYPE, MAPPED_VALUE_TYPE, TRAITS> is an std::map-based concrete implementation ...
Sequence_stdvector<T> is an std::vector-based concrete implementation of the Sequence<T> container pa...
nonvirtual void insert(ArgByValueType< value_type > kvp)
Definition Mapping.inl:426
Simple variant-value (case variant union) object, with (variant) basic types analogous to a value in ...
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
nonvirtual optional< ElementType > ReadBlocking() const
ReadBlocking () reads either a single element, or fills in argument intoBuffer - but never blocks (no...
nonvirtual SeekOffsetType Seek(SeekOffsetType offset) const
nonvirtual bool IsSeekable() const
Returns true iff this object was constructed with a seekable input stream rep.
Definition Stream.inl:44
void Throw(T &&e2Throw)
identical to builtin C++ 'throw' except that it does helpful, type dependent DbgTrace() messages firs...
Definition Throw.inl:43
Ptr New(const InputStream::Ptr< byte > &src, optional< AutomaticCodeCvtFlags > codeCvtFlags={}, optional< SeekableFlag > seekable={}, ReadAhead readAhead=eReadAheadAllowed)
Create an InputStream::Ptr<Character> from the arguments (usually binary source) - which can be used ...
StreamReader is an non-essential Stream utility, adding simplicity of use for a common use case,...
nonvirtual SeekOffsetType Seek(SeekOffsetType offset)
Logically the same as InputStream::Ptr<ELEMENT_TYPE>::Seek () - but without being 'synchronized' it m...
nonvirtual optional< ElementType > ReadBlocking()
ReadBlocking () reads either a single element, or fills in argument intoBuffer - but never blocks (no...