Stroika Library 3.0d21
 
Loading...
Searching...
No Matches
Variant/JSON/Reader.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
6#include <optional>
7#include <stack>
8
9#if __has_include("boost/json.hpp")
10#include <boost/json.hpp>
11#include <boost/json/basic_parser_impl.hpp>
12#endif
13
14#include "Stroika/Foundation/Characters/FloatConversion.h"
16#include "Stroika/Foundation/Characters/String2Int.h"
18#include "Stroika/Foundation/Containers/Concrete/Mapping_HashTable.h"
19#include "Stroika/Foundation/Containers/Support/ReserveTweaks.h"
20#include "Stroika/Foundation/DataExchange/BadFormatException.h"
26
27#include "Reader.h"
28
29using namespace Stroika::Foundation;
32using namespace Stroika::Foundation::Execution;
33using namespace Stroika::Foundation::Memory;
34using namespace Stroika::Foundation::Streams;
35using namespace Stroika::Foundation::Traversal;
36
38
39// Comment this in to turn on aggressive noisy DbgTrace in this module
40//#define USE_NOISY_TRACE_IN_THIS_MODULE_ 1
41
42namespace {
43
44 /*
45 * Parse strategy:
46 * o Pre-pass to map all input to UNICODE - and then just handle a sequence of unicode strings
47 * o Classic recursive decent parser.
48 * o Inline lexical analysis (cuz very simple)
49 */
50
51 enum ReadState_ {
52 eNeutral_,
53 eInObject_,
54 eInArray_,
55 eInNumber_,
56 eInString_,
57 };
58
59 // use StreamReader to get buffering of input data (performance tweak), and a couple helper methods
60 struct MyBufferedStreamReader_ : StreamReader<Character> {
61 MyBufferedStreamReader_ (const InputStream::Ptr<ElementType>& underlyingReadFromStreamAdopted)
62 : StreamReader<Character>{underlyingReadFromStreamAdopted}
63 {
64 }
65 [[nodiscard]] inline char32_t NextChar ()
66 {
67 Require (not IsAtEOF ());
68 return ReadBlocking ()->As<char32_t> ();
69 }
70 inline void AdvanceOne ()
71 {
72 Require (not IsAtEOF ());
73 Seek (eFromCurrent, 1);
74 }
75 inline void BackupOne ()
76 {
77 Seek (eFromCurrent, -1);
78 }
79 };
80
81 VariantValue Reader_value_ (MyBufferedStreamReader_& in);
82
83 /*
84 */
85 inline bool IsJSONSpace_ (char32_t c)
86 {
87 // iswspace was pretty slow (on windoze) - showing up as significant in performance profiling.
88 // According to:
89 // https://www.json.org/json-en.html
90 // only spaces allowed in json source are:
91 // ws = *(
92 // %x20 / ; Space
93 // %x09 / ; Horizontal tab
94 // %x0A / ; Line feed or New line
95 // %x0D ; Carriage return
96 // )
97 switch (c) {
98 case ' ':
99 case '\t':
100 case '\r':
101 case '\n':
102 return true;
103 default:
104 return false;
105 }
106 }
107
108 inline bool IsJSONDigit_ (char32_t c)
109 {
110 // iswdigit tweak (showed up as significant - 6% of full runtime)
111 // According to:
112 // https://www.json.org/json-en.html
113 // appears just digits 0..9
114 // slightly longer way but doesn't assume ascii, and probably compiles to the same thing is '0' <= n <= '9'
115 switch (c) {
116 case '0':
117 case '1':
118 case '2':
119 case '3':
120 case '4':
121 case '5':
122 case '6':
123 case '7':
124 case '8':
125 case '9':
126 return true;
127 default:
128 return false;
129 }
130 }
131
132 // throw if bad hex digit
133 uint8_t HexChar2Num_ (char c)
134 {
135 if ('0' <= c and c <= '9') [[likely]] {
136 return static_cast<uint8_t> (c - '0');
137 }
138 if ('a' <= c and c <= 'f') [[likely]] {
139 return static_cast<uint8_t> ((c - 'a') + 10);
140 }
141 if ('A' <= c and c <= 'F') [[likely]] {
142 return static_cast<uint8_t> ((c - 'A') + 10);
143 }
144 static const auto kException_{BadFormatException{"JSON: bad hex digit after \\u"sv}};
145 Throw (kException_);
146 }
147
148 // 'in' is positioned to the start of string, and we read, leaving in positioned just after the end of the string
149 String Reader_String_ (MyBufferedStreamReader_& in)
150 {
151 Require (not in.IsAtEOF ());
152 char32_t c = in.NextChar ();
153 if (c != '\"') [[unlikely]] {
154 static const auto kException_{BadFormatException{"JSON: Expected quoted string"sv}};
155 Throw (kException_);
156 }
157 /*
158 * accumulate chars, and check for close-quote
159 */
160 StringBuilder<StringBuilder_Options<char8_t, 64>> result; // StringBuilder_Options: text often ascii, empirically a bit faster with on windows on jsonparser regtest
161 while (true) {
162 if (in.IsAtEOF ()) [[unlikely]] {
163 static const auto kException_{BadFormatException{"JSON: Unexpected EOF reading string (looking for close quote)"sv}};
164 Throw (kException_);
165 }
166 c = in.NextChar ();
167 if (c == '\"') [[unlikely]] {
168 return result.str ();
169 }
170 else if (c == '\\') [[unlikely]] {
171 // quoted character read...
172 if (in.IsAtEOF ()) [[unlikely]] {
173 static const auto kException_{BadFormatException{"JSON: Unexpected EOF reading string (looking for close quote)"sv}};
174 Throw (kException_);
175 }
176 c = in.NextChar ();
177 switch (c) {
178 case 'b':
179 c = '\b';
180 break;
181 case 'f':
182 c = '\f';
183 break;
184 case 'n':
185 c = '\n';
186 break;
187 case 'r':
188 c = '\r';
189 break;
190 case 't':
191 c = '\t';
192 break;
193 case 'u': {
194 // Not sure this is right -- But I hope so ... -- LGP 2012-11-29
195 char32_t newC = '\0';
196 for (int n = 0; n < 4; ++n) {
197 if (in.IsAtEOF ()) [[unlikely]] {
198 static const auto kException_{BadFormatException{"JSON: Unexpected EOF reading string (looking for close quote)"sv}};
199 Throw (kException_);
200 }
201 newC += HexChar2Num_ (static_cast<char> (in.NextChar ()));
202 if (n != 3) {
203 newC <<= 4;
204 }
205 }
206 c = newC;
207 } break;
208 default: {
209 // if we have \N for any unrecognized N, just treat it as N
210 }
211 }
212 }
213 result += c; // must handle other character quoting (besides \u which was preflighted)
214 }
215 }
216
217 // 'in' is positioned to the second character of number (first passed as arg), and we read, leaving in positioned just after end of number
218 static constexpr Character kDash_{'-'};
219 VariantValue Reader_Number_ (char32_t initialChar, MyBufferedStreamReader_& in)
220 {
221 Require (initialChar == '-' or IsJSONDigit_ (initialChar));
222
223 bool containsDot = false;
224 // ACCUMULATE STRING, and then call builtin number parsing functions...
225 // This accumulation is NOT as restrictive as it could be - but should accept all valid numbers
226 StringBuilder tmp;
227 for (char32_t c = initialChar; c != '\0'; c = in.ReadBlocking ().value_or ('\0').As<char32_t> ()) {
228 if (IsJSONDigit_ (c) or c == '.' or c == 'e' or c == 'E' or c == '+' or c == '-') [[likely]] {
229 tmp += c;
230 if (c == '.') [[unlikely]] {
231 containsDot = true;
232 }
233 }
234 else {
235 // any other character signals end of number (not a syntax error)
236 // but backup - don't consume next character - not part of number
237 Assert (not tmp.empty ()); // at least consumed 'initialChar'
238 in.BackupOne ();
239 break;
240 }
241 }
242 Assert (not tmp.empty ());
243 StackBuffer<char32_t> ignoreBuf;
244 span<const char32_t> tmpData = tmp.GetData (&ignoreBuf);
245 if (containsDot) {
246 return VariantValue{FloatConversion::ToFloat<long double> (tmpData)};
247 }
248 else {
249 // if no - use unsigned since has wider range (if no -)
250 return (initialChar == kDash_) ? VariantValue{String2Int<long long int> (tmpData)}
251 : VariantValue{String2Int<unsigned long long int> (tmpData)};
252 }
253 }
254
255 // NOTE: THIS STARTS SEEKED JUST PAST OPENING '{'
256 VariantValue Reader_Object_ (MyBufferedStreamReader_& in)
257 {
258 Mapping_HashTable<String, VariantValue>::DEFAULT_HASHTABLE<> result; // slight tweak using stl map, and move-construct Stroika map at the end
259
260 // accumulate elements, and check for close-array
261 enum LookingFor {
262 eName, // this means looking for start of next field/member of the object
263 eValue,
264 eColon,
265 eComma // when looking for comma could either find it, or } marking end of object
266 };
267 LookingFor lf = eName;
268 optional<String> curName;
269 while (true) {
270 optional<Character> oNextChar = in.Peek ();
271 if (not oNextChar.has_value ()) [[unlikely]] {
272 static const auto kException_{BadFormatException{"JSON: Unexpected EOF reading object (looking for '}')"sv}};
273 Throw (kException_);
274 }
275 char32_t nextChar = oNextChar->As<char32_t> ();
276 if (IsJSONSpace_ (nextChar)) [[likely]] {
277 in.AdvanceOne (); // skip char
278 continue;
279 }
280 switch (lf) {
281 case eName: {
282 Assert (curName == nullopt);
283 if (nextChar == '}') {
284 in.AdvanceOne (); // finished object
286 }
287 else if (nextChar == '\"') [[likely]] {
288 curName = Reader_String_ (in); // starting a new data member (with a string)
289 lf = eColon;
290 }
291 else {
292 static const auto kException_{
293 BadFormatException{"JSON: Reading object, looking for a name, didn't find a close brace or open quote"sv}};
294 Throw (kException_);
295 }
296 } break;
297 case eComma: {
298 Assert (curName == nullopt);
299 if (nextChar == '}') {
300 in.AdvanceOne (); // finished object
302 }
303 else if (nextChar == ',') [[likely]] {
304 in.AdvanceOne (); // consume it, and look for name next (start of next object member)
305 lf = eName;
306 }
307 else {
308 static const auto kException_{BadFormatException{"JSON: Reading object, looking for a comma, but found something else"sv}};
309 Throw (kException_);
310 }
311 } break;
312 case eColon: {
313 Assert (curName);
314 if (nextChar == ':') [[likely]] {
315 in.AdvanceOne (); // consume the separator, and look the the value
316 lf = eValue;
317 }
318 else {
319 static const auto kException_{BadFormatException{"JSON: Reading object, looking for a colon, but found something else"sv}};
320 Throw (kException_);
321 }
322 } break;
323 case eValue: {
324 Assert (curName);
325 // dont care what the character is, read a new value
326 result.insert ({ValueOf (curName), Reader_value_ (in)});
327 curName = nullopt;
328 lf = eComma; // and look for another field/data member
329 } break;
330 }
331 }
332 }
333
334 // NOTE - called with OPENING '[' already read
335 VariantValue Reader_Array_ (MyBufferedStreamReader_& in)
336 {
337 vector<VariantValue> result;
338
339 // accumulate elements, and check for close-array
340 bool lookingForElt = true;
341 while (true) {
342 if (in.IsAtEOF ()) [[unlikely]] {
343 static const auto kException_{BadFormatException{"JSON: Unexpected EOF reading array (looking for ']')"sv}};
344 Throw (kException_);
345 }
346 char32_t peekedChar = in.Peek ()->As<char32_t> ();
347 if (peekedChar == ']') {
348 if (lookingForElt) {
349 // allow ending ',' - harmless - could be more aggressive - but if so - careful of zero-sized array special case
350 }
351 in.AdvanceOne ();
353 }
354 else if (peekedChar == ',') {
355 if (lookingForElt) [[unlikely]] {
356 static const auto kException_{BadFormatException{"JSON: Unexpected second ',' in reading array"sv}};
357 Throw (kException_);
358 }
359 else {
360 lookingForElt = true;
361 }
362 in.AdvanceOne ();
363 }
364 else if (IsJSONSpace_ (peekedChar)) {
365 in.AdvanceOne ();
366 }
367 else {
368 // not looking at whitespace, in midst of array, and array not terminated, so better be looking at a value
369 if (lookingForElt) [[likely]] {
370 Containers::Support::ReserveTweaks::Reserve4Add1 (result);
371 result.push_back (Reader_value_ (in));
372 lookingForElt = false;
373 }
374 else {
375 static const auto kException_{BadFormatException{"JSON: Unexpected character (missing ',' ?) in reading array"sv}};
376 Throw (kException_);
377 }
378 }
379 }
380 }
381
382 VariantValue Reader_SpecialToken_ (char32_t initialChar, MyBufferedStreamReader_& in)
383 {
384 switch (initialChar) {
385 case 'f': {
386 Character buf[4];
387 if (in.ReadAll (begin (buf), end (buf)) == 4 and buf[0] == 'a' and buf[1] == 'l' and buf[2] == 's' and buf[3] == 'e') {
388 return VariantValue{false};
389 }
390 } break;
391 case 't': {
392 Character buf[3];
393 if (in.ReadAll (begin (buf), end (buf)) == 3 and buf[0] == 'r' and buf[1] == 'u' and buf[2] == 'e') {
394 return VariantValue{true};
395 }
396 } break;
397 case 'n': {
398 Character buf[3];
399 if (in.ReadAll (begin (buf), end (buf)) == 3 and buf[0] == 'u' and buf[1] == 'l' and buf[2] == 'l') {
400 return VariantValue{};
401 }
402 } break;
403 }
404 static const auto kException_{BadFormatException{"JSON: Unrecognized token"sv}};
405 Throw (kException_);
406 }
407
408 VariantValue Reader_value_ (MyBufferedStreamReader_& in)
409 {
410 // Skip initial whitespace, and look for any value:
411 // string
412 // number
413 // object
414 // array
415 // true
416 // false
417 // null
418 for (optional<Character> oc = in.ReadBlocking (); oc; oc = in.ReadBlocking ()) {
419 switch (oc->As<char32_t> ()) {
420 case '\"':
421 in.BackupOne ();
422 return Reader_String_ (in);
423
424 case '0':
425 case '1':
426 case '2':
427 case '3':
428 case '4':
429 case '5':
430 case '6':
431 case '7':
432 case '8':
433 case '9':
434 case '-':
435 return Reader_Number_ (oc->As<char32_t> (), in);
436
437 case '{':
438 return Reader_Object_ (in);
439 case '[':
440 return Reader_Array_ (in);
441
442 case 't':
443 case 'f':
444 case 'n':
445 return Reader_SpecialToken_ (oc->As<char32_t> (), in);
446
447 default: {
448 if (IsJSONSpace_ (oc->As<char32_t> ())) [[likely]] {
449 // ignore
450 }
451 else {
452 static const auto kException_{BadFormatException{"JSON: Unexpected character looking for start of value"sv}};
453 Throw (kException_);
454 }
455 }
456 }
457 }
458 // if we get here - nothing found
459 static const auto kException_{BadFormatException{"JSON: Unexpected EOF looking for value"sv}};
460 Throw (kException_);
461 }
462}
463
464/*
465 ********************************************************************************
466 ************************* Variant::JSON::Reader ********************************
467 ********************************************************************************
468 */
469class Variant::JSON::Reader::NativeRep_ : public Variant::Reader::_IRep {
470public:
471 NativeRep_ () = default;
472 NativeRep_ (const NativeRep_&) = default;
473 virtual _SharedPtrIRep Clone () const override
474 {
475 return make_shared<NativeRep_> (*this);
476 }
477 virtual optional<filesystem::path> GetDefaultFileSuffix () const override
478 {
479 return ".json"sv;
480 }
481 virtual VariantValue Read (const InputStream::Ptr<byte>& in) override
482 {
483 return Read (BinaryToText::Reader::New (in, nullopt, SeekableFlag::eSeekable));
484 }
485 virtual VariantValue Read (const InputStream::Ptr<Character>& in) override
486 {
487#if USE_NOISY_TRACE_IN_THIS_MODULE_
488 Debug::TraceContextBumper ctx{"DataExchange::JSON::Reader::NativeRep_::Read"};
489#endif
490 MyBufferedStreamReader_ reader{in.IsSeekable () ? in : BufferedInputStream::New (in, SeekableFlag::eSeekable)};
491 return Reader_value_ (reader);
492 }
493};
494#if __has_include("boost/json.hpp")
495
496namespace {
497 // Based on example: https://www.boost.org/doc/libs/1_83_0/libs/json/doc/html/json/examples.html#json.examples.validate
498 struct BoostSAXHandler_ {
499 /// The maximum number of elements allowed in an array
500 static constexpr std::size_t max_array_size = static_cast<size_t> (-1);
501
502 /// The maximum number of elements allowed in an object
503 static constexpr std::size_t max_object_size = static_cast<size_t> (-1);
504
505 /// The maximum number of characters allowed in a string
506 static constexpr std::size_t max_string_size = static_cast<size_t> (-1);
507
508 /// The maximum number of characters allowed in a key
509 static constexpr std::size_t max_key_size = static_cast<size_t> (-1);
510
511 BoostSAXHandler_ () = default;
512 BoostSAXHandler_ (const BoostSAXHandler_&) = delete;
513
514 bool on_document_begin ([[maybe_unused]] error_code& ec)
515 {
516 Assert (fStack_.empty ());
517 fStack_.emplace (Context_::eSimple);
518 return true;
519 }
520 bool on_document_end ([[maybe_unused]] error_code& ec)
521 {
522 Assert (fStack_.size () == 1);
523 this->PopContext_ ();
524 Assert (fStack_.empty ());
525 return true;
526 }
527
528 bool on_array_begin ([[maybe_unused]] error_code& ec)
529 {
530 fStack_.emplace (Context_::eArray);
531 return true;
532 }
533 bool on_array_end ([[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
534 {
535 Assert (fStack_.top ().GetContextType () == Context_::eArray);
536 Assert (fStack_.top ().PeekAccumVector_ ().size () == n);
537 PopContext_ ();
538 return true;
539 }
540
541 bool on_object_begin ([[maybe_unused]] error_code& ec)
542 {
543 fStack_.emplace (Context_::eMap);
544 return true;
545 }
546 bool on_object_end ([[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
547 {
548 Assert (fStack_.top ().GetContextType () == Context_::eMap);
549 Assert (fStack_.top ().PeekAccumObj_ ().size () == n);
550 PopContext_ ();
551 return true;
552 }
553
554 bool on_string_part (string_view s, [[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
555 {
556 fPartSaver_.push_back (span<const char>{s});
557 return true;
558 }
559 bool on_string (string_view s, [[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
560 {
561 // Tricky - not really documented (I can find) - but seems if n != s.size() - we must use fPartSaver from on_key_part
562 if (s.size () == n) {
563 AddCompleteValue_ (VariantValue{toStroikaString_ (s)});
564 }
565 else {
566 fPartSaver_.push_back (span<const char>{s});
567 String res = toStroikaString_ (fPartSaver_);
568 fPartSaver_.clear ();
569 AddCompleteValue_ (VariantValue{res});
570 }
571 return true;
572 }
573
574 bool on_key_part (string_view s, [[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
575 {
576 // tricky - save text in buffer, for use on subsequent onKey
577 Assert (fStack_.top ().GetContextType () == Context_::eMap);
578 fPartSaver_.push_back (span<const char>{s});
579 return true;
580 }
581 bool on_key (string_view s, [[maybe_unused]] std::size_t n, [[maybe_unused]] error_code& ec)
582 {
583 Assert (fStack_.top ().GetContextType () == Context_::eMap);
584 Assert (s.size () <= n);
585 // Tricky - not really documented (I can find) - but seems if n != s.size() - we must use fPartSaver from on_key_part
586 if (s.size () == n) {
587 fStack_.top ().fKey = toStroikaString_ (s);
588 }
589 else {
590 fPartSaver_.push_back (span<const char>{s});
591 fStack_.top ().fKey = toStroikaString_ (fPartSaver_);
592 fPartSaver_.clear ();
593 }
594 return true;
595 }
596
597 bool on_number_part ([[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
598 {
599 // No need to track anything for numbers, as boost appears to incrementally parse and keep its state internally
600 return true;
601 }
602 bool on_int64 (int64_t i, [[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
603 {
604 AddCompleteValue_ (VariantValue{i});
605 return true;
606 }
607 bool on_uint64 (uint64_t u, [[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
608 {
609 AddCompleteValue_ (VariantValue{u});
610 return true;
611 }
612 bool on_double (double d, [[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
613 {
614 AddCompleteValue_ (VariantValue{d});
615 return true;
616 }
617
618 bool on_bool (bool b, [[maybe_unused]] error_code& ec)
619 {
620 AddCompleteValue_ (VariantValue{b});
621 return true;
622 }
623
624 bool on_null ([[maybe_unused]] error_code& ec)
625 {
626 AddCompleteValue_ (VariantValue{});
627 return true;
628 }
629
630 bool on_comment_part ([[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
631 {
632 // ignore comments
633 return true;
634 }
635 bool on_comment ([[maybe_unused]] string_view s, [[maybe_unused]] error_code& ec)
636 {
637 // ignore comments
638 return true;
639 }
640
641 // Careful not to use string_view directly cuz Stroika assumes we can keep that pointer (String_Constant) - so map to span, and also character to use char8_t
642 // to signal character set, and not ASCII
643 template <typename CONTAINER_OF_CHAR_BUT_REALLY_UTF8>
644 static String toStroikaString_ (CONTAINER_OF_CHAR_BUT_REALLY_UTF8 sv)
645 requires requires (CONTAINER_OF_CHAR_BUT_REALLY_UTF8 t) {
646 { span<const char>{t} };
647 }
648 {
649 return String{SpanBytesCast<span<const char8_t>> (span<const char>{sv})};
650 }
651
652 VariantValue GetConstructedValue () const
653 {
654 return fCompletedFinalValue_;
655 }
656 void AddCompleteValue_ (VariantValue v)
657 {
658 Assert (not fStack_.empty ());
659 Context_& t = fStack_.top ();
660 switch (t.GetContextType ()) {
661 case Context_::eArray:
662 t.PeekAccumVector_ ().push_back (v);
663 break;
664 case Context_::eMap:
665 t.PeekAccumObj_ ().insert ({t.fKey, v});
666 break;
667 case Context_::eSimple:
668 t.PeekSimpleValue_ () = v;
669 break;
670 default:
672 }
673 }
674 void PopContext_ ()
675 {
676 Assert (not fStack_.empty ());
677 // complete what is at the top of the stack and do AddCompleteValue_ to the new top of the stack
678 Context_& t = fStack_.top ();
679 VariantValue vv = [&t] () {
680 switch (t.GetContextType ()) {
681 case Context_::eArray:
682 return VariantValue{move (t.PeekAccumVector_ ())};
683 case Context_::eMap:
684 return VariantValue{Mapping_HashTable<String, VariantValue>{move (t.PeekAccumObj_ ())}};
685 case Context_::eSimple:
686 return t.PeekSimpleValue_ ();
687 default:
689 return VariantValue{};
690 }
691 }();
692 fStack_.pop ();
693 if (fStack_.empty ()) {
694 fCompletedFinalValue_ = vv;
695 }
696 else {
697 AddCompleteValue_ (vv);
698 }
699 }
700
701 /*
702 * We have a stack of context objects for in progress parses. This cheaply maintains the data at each point of the stack.
703 */
704 struct Context_ {
705
706 // NOTE - critical these enums correspond to index values of std::variant<> below
707 enum ContextType_ {
708 eSimple,
709 eArray,
710 eMap
711 };
712
713 Context_ () = delete;
714 Context_ (ContextType_ ct)
715 {
716 switch (ct) {
717 case eSimple:
718 fVV_ = VariantValue{};
719 break;
720 case eArray:
721 fVV_ = vector<VariantValue>{};
722 break;
723 case eMap:
725 break;
726 }
727 Ensure (ct == GetContextType ()); // ensure ContextType_ enum in same order as variant<> arguments
728 }
729 ~Context_ () = default;
730 Context_ (const Context_&) = delete;
731 Context_ (Context_&&) = default;
732
733 ContextType_ GetContextType () const
734 {
735 return static_cast<ContextType_> (fVV_.index ());
736 }
737
738 vector<VariantValue>& PeekAccumVector_ ()
739 {
740 Require (GetContextType () == eArray);
741 return get<vector<VariantValue>> (fVV_);
742 }
744 {
745 Require (GetContextType () == eMap);
746 return get<Mapping_HashTable<String, VariantValue>::DEFAULT_HASHTABLE<>> (fVV_);
747 }
748 VariantValue& PeekSimpleValue_ ()
749 {
750 Require (GetContextType () == eSimple);
751 return get<VariantValue> (fVV_);
752 }
753
754 // use variant to save construct/destruct of unneeded parts
755 variant<VariantValue, vector<VariantValue>, Mapping_HashTable<String, VariantValue>::DEFAULT_HASHTABLE<>> fVV_;
756
757 String fKey; // only allowed of context type = eMap (so COULD embed in above variant, but KISS) - also could use optional<String> which would help some things and make others worse...
758 };
759
760 std::stack<Context_> fStack_;
761 VariantValue fCompletedFinalValue_; // only filled in when stack is empty
762
763 // doesn't need to be in stack context cuz cannot fill partial string/key/etc with intervening pop/push
764 // Not using StringBuilder here cuz could contain partial strings
765 InlineBuffer<char, 512> fPartSaver_;
766 };
767}
768class Variant::JSON::Reader::BoostRep_ : public Variant::Reader::_IRep {
769public:
770 BoostRep_ () = default;
771 BoostRep_ (const BoostRep_&) = default;
772 virtual _SharedPtrIRep Clone () const override
773 {
774 return make_shared<BoostRep_> (*this);
775 }
776 virtual optional<filesystem::path> GetDefaultFileSuffix () const override
777 {
778 return ".json"sv;
779 }
780 virtual VariantValue Read (const InputStream::Ptr<byte>& in) override
781 {
782#if USE_NOISY_TRACE_IN_THIS_MODULE_
783 Debug::TraceContextBumper ctx{"DataExchange::JSON::Reader::BoostRep_::Read"};
784#endif
785 using namespace boost;
786 InputStream::Ptr<byte> useInStream = in.IsSeekable () ? in : BufferedInputStream::New (in, SeekableFlag::eSeekable);
787 Assert (useInStream.IsSeekable ());
788 byte buf[8 * 1024]; // experimentally - larger buffers didn't help speed
789 constexpr bool kUseSAX_ = true; // experimentally, on windows, sax about 10% faster than stream_parser/convert way
790 try {
791 if constexpr (kUseSAX_) {
792 json::basic_parser<BoostSAXHandler_> p{json::parse_options{}};
793 const size_t targetChunkSize = std::size (buf);
794 size_t actualChunkSize;
795 boost::system::error_code ec;
796 while ((actualChunkSize = useInStream.ReadBlocking (span{buf, targetChunkSize}).size ()) != 0) {
797 ec.clear ();
798 size_t nParsed = p.write_some (true, reinterpret_cast<const char*> (begin (buf)), actualChunkSize, ec);
799 Assert (nParsed <= actualChunkSize);
800 if (nParsed < actualChunkSize) {
801 useInStream.Seek (eFromCurrent, static_cast<SignedSeekOffsetType> (nParsed) - static_cast<SignedSeekOffsetType> (actualChunkSize));
802 break;
803 }
804 if (p.done ()) {
805 break; // good parse
806 }
807 else if (ec) {
808 Throw (BadFormatException{String::FromNarrowSDKString (ec.what ())});
809 }
810 }
811 if (not p.done ()) {
812 p.write_some (false, nullptr, 0, ec);
813 if (ec and ec != boost::json::error::extra_data /*and ec != boost::json::error::incomplete*/) {
814 Throw (BadFormatException{String::FromNarrowSDKString (ec.what ())});
815 }
816 }
817 return p.handler ().GetConstructedValue ();
818 }
819 else {
820 json::stream_parser p;
821 const size_t targetChunkSize = std::size (buf);
822 size_t actualChunkSize;
823 while ((actualChunkSize = useInStream.ReadBlocking (span{buf, targetChunkSize}).size ()) != 0) {
824 boost::system::error_code ec;
825 size_t nParsed = p.write_some (reinterpret_cast<const char*> (begin (buf)), actualChunkSize, ec);
826 Assert (nParsed <= actualChunkSize);
827 if (nParsed < actualChunkSize) {
828 useInStream.Seek (eFromCurrent, static_cast<SignedSeekOffsetType> (nParsed) - static_cast<SignedSeekOffsetType> (actualChunkSize));
829 break;
830 }
831 if (p.done ()) {
832 break; // good parse
833 }
834 else if (ec) {
835 Throw (BadFormatException{String::FromNarrowSDKString (ec.what ())});
836 }
837 }
838 if (not p.done ()) {
839 p.finish (); // in case wrote text like '3' to buffer, ambiguous if done
840 }
841 Assert (p.done ());
842 return DataExchange::VariantValue{p.release ()}; // Transform boost objects to Stroika objects
843 }
844 }
845 catch (...) {
846 Throw (BadFormatException{Characters::ToString (current_exception ())});
847 }
848 }
849 virtual VariantValue Read (const InputStream::Ptr<Character>& in) override
850 {
851#if USE_NOISY_TRACE_IN_THIS_MODULE_
852 Debug::TraceContextBumper ctx{"DataExchange::JSON::Reader::BoostRep_::Read"};
853#endif
854 Require (in.IsSeekable ());
855 return Read (_ToByteReader (in));
856 }
857};
858#endif
859
860inline auto Variant::JSON::Reader::mk_ (const ReaderOptions& options) -> shared_ptr<_IRep>
861{
862 switch (options.fPreferredAlgorithm.value_or (ReaderOptions::Algorithm::eDEFAULT)) {
863 case ReaderOptions::Algorithm::eStroikaNative:
864 return make_shared<NativeRep_> ();
865#if __has_include("boost/json.hpp")
866 case ReaderOptions::Algorithm::eBoost:
867 return make_shared<BoostRep_> ();
868#endif
869 default:
871 return nullptr;
872 }
873}
874
875Variant::JSON::Reader::Reader (const ReaderOptions& options)
876 : inherited{mk_ (options)}
877{
878}
#define AssertNotReached()
Definition Assertions.h:355
constexpr const T & ValueOf(const optional< T > &t)
Same as *t, but Requires that 't' is engaged.
Definition Optional.inl:156
Similar to String, but intended to more efficiently construct a String. Mutable type (String is large...
nonvirtual span< const CHAR_T > GetData(Memory::StackBuffer< CHAR_T > *probablyIgnoredBuf) const
access a span of data located inside the StringBuilder. Return internal pointer, or pointer internal ...
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
Mapping_HashTable<KEY_TYPE, MAPPED_VALUE_TYPE, TRAITS> is a HashTable based concrete implementation o...
Sequence_stdvector<T> is an std::vector-based concrete implementation of the Sequence<T> container pa...
nonvirtual void insert(ArgByValueType< value_type > kvp)
Definition Mapping.inl:456
Simple variant-value (case variant union) object, with (variant) basic types analogous to a value in ...
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
nonvirtual optional< ElementType > ReadBlocking() const
ReadBlocking () reads either a single element, or fills in argument intoBuffer - but never blocks (no...
nonvirtual SeekOffsetType Seek(SeekOffsetType offset) const
nonvirtual bool IsSeekable() const
Returns true iff this object was constructed with a seekable input stream rep.
Definition Stream.inl:44
void Throw(T &&e2Throw)
identical to builtin C++ 'throw' except that it does helpful, type dependent DbgTrace() messages firs...
Definition Throw.inl:43
Ptr New(const InputStream::Ptr< byte > &src, optional< AutomaticCodeCvtFlags > codeCvtFlags={}, optional< SeekableFlag > seekable={}, ReadAhead readAhead=eReadAheadAllowed)
Create an InputStream::Ptr<Character> from the arguments (usually binary source) - which can be used ...
StreamReader is an non-essential Stream utility, adding simplicity of use for a common use case,...
nonvirtual SeekOffsetType Seek(SeekOffsetType offset)
Logically the same as InputStream::Ptr<ELEMENT_TYPE>::Seek () - but without being 'synchronized' it m...
nonvirtual optional< ElementType > ReadBlocking()
ReadBlocking () reads either a single element, or fills in argument intoBuffer - but never blocks (no...