Stroika Library 3.0d23x
 
Loading...
Searching...
No Matches
Xerces.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2026. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
6#include <fstream>
7
8#include <xercesc/validators/schema/identity/XPathException.hpp>
9
11#include "Stroika/Foundation/DataExchange/BadFormatException.h"
15#include "Stroika/Foundation/Execution/Exceptions.h"
16#include "Stroika/Foundation/Execution/Throw.h"
19#include "Stroika/Foundation/Memory/Common.h"
23
24#include "Xerces.h"
25
26using namespace Stroika::Foundation;
29using namespace Stroika::Foundation::DataExchange::XML;
30using namespace Stroika::Foundation::DataExchange::XML::DOM;
31using namespace Stroika::Foundation::DataExchange::XML::Schema;
32using namespace Stroika::Foundation::DataExchange::XML::Providers::Xerces;
33using namespace Stroika::Foundation::Debug;
34using namespace Stroika::Foundation::Execution;
35using namespace Stroika::Foundation::Streams;
36
37XERCES_CPP_NAMESPACE_USE;
38
39using std::byte;
40
41using Memory::MakeSharedPtr;
42
43// Comment this in to turn on aggressive noisy DbgTrace in this module
44//#define USE_NOISY_TRACE_IN_THIS_MODULE_ 1
45
46static_assert (qStroika_HasComponent_xerces, "Don't compile this file if qStroika_HasComponent_xerces not set");
47
48CompileTimeFlagChecker_SOURCE (Stroika::Foundation::DataExchange::XML, qStroika_HasComponent_xerces, qStroika_HasComponent_xerces);
49
50namespace {
51 // From https://www.w3.org/TR/xml-names/
52 // In a namespace declaration, the URI reference is the normalized value of the attribute, so replacement of XML
53 // character and entity references has already been done before any comparison.
54 //
55 // Not 100% sure, but I think that means decode %x stuff too (at least that fixes bug I'm encountering with ASTM-CCR files)
56 // --LGP 2024-01-31
57 constexpr auto kUseURIEncodingFlag_ = URI::StringPCTEncodedFlag::eDecoded;
58}
59
60/*
61 */
62#define START_LIB_EXCEPTION_MAPPER_ try {
63#define END_LIB_EXCEPTION_MAPPER_ \
64 } \
65 catch (const xercesc::OutOfMemoryException&) \
66 { \
67 Execution::Throw (bad_alloc{}, "xerces OutOfMemoryException - throwing bad_alloc"); \
68 } \
69 catch (...) \
70 { \
71 Execution::ReThrow (); \
72 }
73
74#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
75/*
76 * A helpful class to isolate Xerces (etc) memory management calls. Could be the basis
77 * of future performance/memory optimizations, but for now, just a helpful debugging/tracking
78 * class.
79 */
80struct Provider::MyXercesMemMgr_ : public MemoryManager {
81public:
82 MyXercesMemMgr_ ()
83 : fAllocator{fBaseAllocator}
84 {
85 }
86 ~MyXercesMemMgr_ ()
87 {
88#if qStroika_Foundation_Debug_AssertionsChecked
89 auto snapshot = fAllocator.GetSnapshot ();
90 Assert (snapshot.fAllocations.empty ()); // else we have a memory leak which can be perused here in the debugger
91#endif
92 }
93
94public:
97 mutex fLastSnapshot_CritSection;
98 Memory::LeakTrackingGeneralPurposeAllocator::Snapshot fLastSnapshot;
99
100public:
101 void DUMPCurMemStats ()
102 {
103 TraceContextBumper ctx{"Xerces::MyXercesMemMgr_::DUMPCurMemStats"};
104 [[maybe_unused]] lock_guard critSec{fLastSnapshot_CritSection};
105 fAllocator.DUMPCurMemStats (fLastSnapshot);
106 // now copy current map to prev for next time this gets called
107 fLastSnapshot = fAllocator.GetSnapshot ();
108 }
109
110public:
111 virtual MemoryManager* getExceptionMemoryManager () override
112 {
113 return this;
114 }
115 virtual void* allocate (XMLSize_t size) override
116 {
117 try {
118 return fAllocator.Allocate (size);
119 }
120 catch (...) {
121 // NB: use throw not Exception::Throw () since that requires its a subclass of exception (or SilentException)
122 throw (OutOfMemoryException{}); // quirk cuz this is the class Xerces expects and catches internally (why not bad_alloc?) - sigh...
123 }
124 }
125 virtual void deallocate (void* p) override
126 {
127 if (p != nullptr) {
128 return fAllocator.Deallocate (p);
129 }
130 }
131};
132#endif
133
134namespace {
135 struct MySchemaResolver_ : public XMLEntityResolver {
136 private:
137 Resource::ResolverPtr fResolver_;
138
139 public:
140 MySchemaResolver_ (const Resource::ResolverPtr& resolver)
141 : fResolver_{resolver}
142 {
143 }
144 virtual InputSource* resolveEntity (XMLResourceIdentifier* resourceIdentifier) override
145 {
146 // @todo consider exposting this API outside the module, and/or providing option to wget missing namespaces, or have option for where to fetch from?
147 TraceContextBumper ctx{"Xerces::{}::MySchemaResolver_::resolveEntity"};
148 RequireNotNull (resourceIdentifier);
149
150 if (fResolver_ != nullptr) {
151 if (auto o = fResolver_.Lookup (Resource::Name{.fNamespace = xercesString2String (resourceIdentifier->getNameSpace ()),
152 .fPublicID = xercesString2String (resourceIdentifier->getPublicId ()),
153 .fSystemID = xercesString2String (resourceIdentifier->getSystemId ())})) {
154 return mkMemInputSrc_ (o->fData);
155 }
156 }
157 return nullptr;
158 }
159
160 private:
161 static InputSource* mkMemInputSrc_ (const Memory::BLOB& schemaData)
162 {
163 if (schemaData.empty ()) [[unlikely]] {
164 // not sure this is useful case? Should assert/throw?
165 return new MemBufInputSource{nullptr, 0, "", true};
166 }
167 else {
168 XMLByte* useBuf = new XMLByte[schemaData.GetSize ()];
169 memcpy (useBuf, schemaData.begin (), schemaData.GetSize ());
170 return new MemBufInputSource{useBuf, schemaData.GetSize (), "", true};
171 }
172 }
173 };
174}
175
176namespace {
177 void SetupCommonParserFeatures_ (SAX2XMLReader& reader)
178 {
179 reader.setFeature (XMLUni::fgSAX2CoreNameSpaces, true);
180 reader.setFeature (XMLUni::fgXercesDynamic, false);
181 reader.setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, false); // false: * *Do not report attributes used for Namespace declarations, and optionally do not report original prefixed names
182 }
183 void SetupCommonParserFeatures_ (SAX2XMLReader& reader, bool validatingWithSchema)
184 {
185 reader.setFeature (XMLUni::fgXercesSchema, validatingWithSchema);
186 reader.setFeature (XMLUni::fgSAX2CoreValidation, validatingWithSchema);
187
188 // The purpose of this maybe to find errors with the schema itself, in which case,
189 // we shouldn't bother (esp for release builds)
190 // (leave for now til we performance test)
191 // -- LGP 2007-06-21
192 reader.setFeature (XMLUni::fgXercesSchemaFullChecking, validatingWithSchema);
193 reader.setFeature (XMLUni::fgXercesUseCachedGrammarInParse, validatingWithSchema);
194 reader.setFeature (XMLUni::fgXercesIdentityConstraintChecking, validatingWithSchema);
195
196 // we only want to use loaded schemas - don't save any more into the grammar cache, since that
197 // is global/shared.
198 reader.setFeature (XMLUni::fgXercesCacheGrammarFromParse, false);
199 }
200}
201
202namespace {
203 struct Map2StroikaExceptionsErrorReporter_ : public XMLErrorReporter, public ErrorHandler {
204 public:
205 virtual void error ([[maybe_unused]] const unsigned int errCode, [[maybe_unused]] const XMLCh* const errDomain,
206 [[maybe_unused]] const ErrTypes type, const XMLCh* const errorText, [[maybe_unused]] const XMLCh* const systemId,
207 [[maybe_unused]] const XMLCh* const publicId, const XMLFileLoc lineNum, const XMLFileLoc colNum) override
208 {
209 Execution::Throw (BadFormatException{errorText, static_cast<unsigned int> (lineNum), static_cast<unsigned int> (colNum), 0});
210 }
211 virtual void resetErrors () override
212 {
213 }
214 virtual void warning ([[maybe_unused]] const SAXParseException& exc) override
215 {
216 // ignore
217 }
218 virtual void error (const SAXParseException& exc) override
219 {
220 Execution::Throw (BadFormatException{exc.getMessage (), static_cast<unsigned int> (exc.getLineNumber ()),
221 static_cast<unsigned int> (exc.getColumnNumber ()), 0});
222 }
223 virtual void fatalError (const SAXParseException& exc) override
224 {
225 Execution::Throw (BadFormatException{exc.getMessage (), static_cast<unsigned int> (exc.getLineNumber ()),
226 static_cast<unsigned int> (exc.getColumnNumber ()), 0});
227 }
228 };
229}
230
231namespace {
232 struct SchemaRep_ : IXercesSchemaRep {
233#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
234 static inline atomic<unsigned int> sLiveCnt{0};
235#endif
236 SchemaRep_ (const Streams::InputStream::Ptr<byte>& schemaData, const Resource::ResolverPtr& resolver)
237 : fTargetNamespace{}
238 , fResolver{resolver}
239 , fSchemaData{schemaData.ReadAll ()}
240 {
241 AssertNotNull (XMLPlatformUtils::fgMemoryManager);
242 XMLGrammarPoolImpl* grammarPool = new (XMLPlatformUtils::fgMemoryManager) XMLGrammarPoolImpl{XMLPlatformUtils::fgMemoryManager};
243 try {
244 Require (not fSchemaData.empty ());
245 MemBufInputSource mis{reinterpret_cast<const XMLByte*> (fSchemaData.begin ()), fSchemaData.GetSize (), u""};
246
247 MySchemaResolver_ mySchemaResolver{resolver};
248 // Directly construct SAX2XMLReaderImpl so we can use XMLEntityResolver - which passes along namespace (regular
249 // EntityResolve just passes systemID
250 // shared_ptr<SAX2XMLReader> reader = shared_ptr<SAX2XMLReader> (XMLReaderFactory::createXMLReader (XMLPlatformUtils::fgMemoryManager, grammarPool));
251 //
252 shared_ptr<SAX2XMLReaderImpl> reader = shared_ptr<SAX2XMLReaderImpl> (
253 new (XMLPlatformUtils::fgMemoryManager) SAX2XMLReaderImpl{XMLPlatformUtils::fgMemoryManager, grammarPool});
254 reader->setXMLEntityResolver (&mySchemaResolver);
255
256 SetupCommonParserFeatures_ (*reader, true);
257
258 // Reset fgXercesCacheGrammarFromParse to TRUE so we actually load the XSD here
259 reader->setFeature (XMLUni::fgXercesCacheGrammarFromParse, true);
260 reader->setErrorHandler (&fErrorReporter_);
261 xercesc::Grammar* g = reader->loadGrammar (mis, Grammar::SchemaGrammarType, true);
262 AssertNotNull (g);
263 const XMLCh* ts = g->getTargetNamespace ();
264 if (ts and *ts) {
265 fTargetNamespace = URI{xercesString2String (ts)};
266 }
267 }
268 catch (...) {
269 delete grammarPool;
270 Execution::ReThrow ();
271 }
272 fCachedGrammarPool = grammarPool;
273#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
274 ++sLiveCnt;
275#endif
276 }
277 SchemaRep_ (const SchemaRep_&) = delete;
278 virtual ~SchemaRep_ ()
279 {
280 delete fCachedGrammarPool;
281#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
282 Assert (sLiveCnt > 0);
283 --sLiveCnt;
284#endif
285 }
286 optional<URI> fTargetNamespace;
287 Resource::ResolverPtr fResolver;
288 Memory::BLOB fSchemaData;
289 xercesc::XMLGrammarPool* fCachedGrammarPool{nullptr};
290 Map2StroikaExceptionsErrorReporter_ fErrorReporter_;
291
292 virtual const Providers::ISchemaProvider* GetProvider () const override
293 {
294 return &XML::Providers::Xerces::kDefaultProvider;
295 }
296 virtual optional<URI> GetTargetNamespace () const override
297 {
298 return fTargetNamespace;
299 }
300 virtual Memory::BLOB GetData () override
301 {
302 return fSchemaData;
303 }
304 // not super useful, except if you want to clone
305 virtual Resource::ResolverPtr GetResolver () override
306 {
307 return fResolver;
308 }
309 virtual xercesc::XMLGrammarPool* GetCachedGrammarPool () override
310 {
311 return fCachedGrammarPool;
312 }
313 };
314}
315
316namespace {
317 class StdIStream_InputSource_ : public InputSource {
318 protected:
319 class StdIStream_InputStream : public XERCES_CPP_NAMESPACE_QUALIFIER BinInputStream {
320 public:
321 StdIStream_InputStream (InputStream::Ptr<byte> in)
322 : fSource{in}
323 {
324 }
325 ~StdIStream_InputStream () = default;
326
327 public:
328 virtual XMLFilePos curPos () const override
329 {
330 return fSource.GetOffset ();
331 }
332 virtual XMLSize_t readBytes (XMLByte* const toFill, const XMLSize_t maxToRead) override
333 {
334 return fSource.ReadBlocking (span{reinterpret_cast<byte*> (toFill), maxToRead}).size ();
335 }
336 virtual const XMLCh* getContentType () const override
337 {
338 return nullptr;
339 }
340
341 protected:
343 };
344
345 public:
346 StdIStream_InputSource_ (InputStream::Ptr<byte> in, const XMLCh* const bufId = nullptr)
347 : InputSource{bufId}
348 , fSource{in}
349 {
350 }
351 virtual BinInputStream* makeStream () const override
352 {
353 return new (getMemoryManager ()) StdIStream_InputStream{fSource};
354 }
355
356 protected:
358 };
359}
360
361namespace {
362 class SAX2PrintHandlers_ : public DefaultHandler {
363 private:
365
366 public:
367 SAX2PrintHandlers_ (StructuredStreamEvents::IConsumer* callback)
368 : fCallback_{callback}
369 {
370 }
371
372 public:
373 virtual void startDocument () override
374 {
375 if (fCallback_ != nullptr) {
376 fCallback_->StartDocument ();
377 }
378 }
379 virtual void endDocument () override
380 {
381 if (fCallback_ != nullptr) {
382 fCallback_->EndDocument ();
383 }
384 }
385 virtual void startElement (const XMLCh* const uri, const XMLCh* const localName, const XMLCh* const /*qname*/, const Attributes& attributes) override
386 {
387 Require (uri != nullptr);
388 Require (localName != nullptr);
389 if (fCallback_ != nullptr) {
390 using Name = StructuredStreamEvents::Name;
391 Mapping<Name, String> useAttrs;
392 size_t attributesLen = attributes.getLength ();
393 for (XMLSize_t i = 0; i < attributesLen; ++i) {
394 Name attrName{xercesString2String (attributes.getURI (i)), xercesString2String (attributes.getLocalName (i)), Name::eAttribute};
395 useAttrs.Add (attrName, xercesString2String (attributes.getValue (i)));
396 }
397 fCallback_->StartElement (Name{xercesString2String (uri), xercesString2String (localName)}, useAttrs);
398 }
399 }
400 virtual void endElement (const XMLCh* const uri, const XMLCh* const localName, [[maybe_unused]] const XMLCh* const qname) override
401 {
402 Require (uri != nullptr);
403 Require (localName != nullptr);
404 Require (qname != nullptr);
405 if (fCallback_ != nullptr) {
406 fCallback_->EndElement (StructuredStreamEvents::Name{xercesString2String (uri), xercesString2String (localName)});
407 }
408 }
409 virtual void characters (const XMLCh* const chars, const XMLSize_t length) override
410 {
411 Require (chars != nullptr);
412 Require (length != 0);
413 if (fCallback_ != nullptr) {
414 fCallback_->TextInsideElement (xercesString2String (chars, chars + length));
415 }
416 }
417 };
418}
419
420namespace {
421 /*
422 * Short lifetime. Don't save these iterator objects. Just use them to enumerate a collection and then let them
423 * go. They (could) become invalid after a call to update the database.
424 */
425 class SubNodeIterator_ {
426 public:
427 class Rep {
428 public:
429 Rep () = default;
430 virtual ~Rep () = default;
431
432 public:
433 virtual bool IsAtEnd () const = 0;
434 virtual void Next () = 0;
435 virtual Node::Ptr Current () const = 0;
436 virtual size_t GetLength () const = 0;
437 };
438 explicit SubNodeIterator_ (const shared_ptr<Rep>& from)
439 : fRep{from}
440 {
441 }
442
443 public:
444 nonvirtual bool NotDone () const
445 {
446 return not fRep->IsAtEnd ();
447 }
448 nonvirtual bool IsAtEnd () const
449 {
450 return fRep->IsAtEnd ();
451 }
452 nonvirtual void Next ()
453 {
454 fRep->Next ();
455 }
456 nonvirtual Node::Ptr Current () const
457 {
458 return fRep->Current ();
459 }
460 nonvirtual size_t GetLength () const
461 {
462 return fRep->GetLength ();
463 }
464 nonvirtual void operator++ ()
465 {
466 Next ();
467 }
468 nonvirtual void operator++ (int)
469 {
470 Next ();
471 }
472 nonvirtual Node::Ptr operator* () const
473 {
474 return Current ();
475 }
476
477 protected:
478 shared_ptr<Rep> fRep;
479 };
480}
481
482namespace {
483 template <class TYPE>
484 class AutoRelease_ {
485 public:
486 AutoRelease_ (TYPE* p)
487 : p_ (p)
488 {
489 }
490 AutoRelease_ (const AutoRelease_<TYPE>&) = delete;
491 AutoRelease_<TYPE>& operator= (const AutoRelease_<TYPE>&) = delete;
492 ~AutoRelease_ ()
493 {
494 if (p_ != 0)
495 p_->release ();
496 }
497
498 TYPE& operator* () const
499 {
500 return *p_;
501 }
502 TYPE* operator->() const
503 {
504 return p_;
505 }
506 operator TYPE* () const
507 {
508 return p_;
509 }
510 TYPE* get () const
511 {
512 return p_;
513 }
514 TYPE* adopt ()
515 {
516 TYPE* tmp = p_;
517 p_ = 0;
518 return tmp;
519 }
520 TYPE* swap (TYPE* p)
521 {
522 TYPE* tmp = p_;
523 p_ = p;
524 return tmp;
525 }
526 void set (TYPE* p)
527 {
528 if (p_ != 0)
529 p_->release ();
530 p_ = p;
531 }
532
533 private:
534 TYPE* p_;
535 };
536}
537
538namespace {
539 DOMImplementation& GetDOMIMPL_ ()
540 {
541 static constexpr XMLCh kDOMImplFeatureDeclaration_[] = u"Core";
542 // safe to save in a static var? -- LGP 2007-05-20
543 // from perusing implementation - this appears safe to cache and re-use in differnt threads
544 static DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation (kDOMImplFeatureDeclaration_);
545 AssertNotNull (impl);
546 return *impl;
547 }
548
549 constexpr bool qDumpXMLOnValidationError_ = qStroika_Foundation_Debug_AssertionsChecked;
550
551#if qStroika_Foundation_Debug_AssertionsChecked
552 bool ValidNewNodeName_ (const String& n)
553 {
554 if (n.empty ()) {
555 return false;
556 }
557 if (n.find (':') != wstring::npos) { // if triggered, you probably used XPath as arg for CreateElement call!!!
558 return false;
559 }
560 return true;
561 }
562#endif
563}
564
565namespace {
566 void DoWrite2Stream_ (xercesc::DOMNode* node2Write, const Streams::OutputStream::Ptr<byte>& to, const SerializationOptions& options)
567 {
568 AutoRelease_<DOMLSOutput> theOutputDesc = GetDOMIMPL_ ().createLSOutput ();
569 theOutputDesc->setEncoding (XMLUni::fgUTF8EncodingString);
570 AutoRelease_<DOMLSSerializer> writer = GetDOMIMPL_ ().createLSSerializer ();
571 DOMConfiguration* dc = writer->getDomConfig ();
572 dc->setParameter (XMLUni::fgDOMWRTFormatPrettyPrint, options.fPrettyPrint);
573 dc->setParameter (XMLUni::fgDOMWRTBOM, true);
574 class myOutputter : public XMLFormatTarget {
575 public:
577 myOutputter (const Streams::OutputStream::Ptr<byte>& to)
578 : fOut{to}
579 {
580 }
581 virtual void writeChars (const XMLByte* const toWrite, const XMLSize_t count, [[maybe_unused]] XMLFormatter* const formatter) override
582 {
583 fOut.Write (span<const byte>{reinterpret_cast<const byte*> (toWrite), count});
584 }
585 virtual void flush () override
586 {
587 fOut.Flush ();
588 }
589 };
590 myOutputter dest{to};
591 theOutputDesc->setByteStream (&dest);
592 writer->write (node2Write, theOutputDesc);
593 }
594 // Currently unused but maybe needed again if we support 'moving' nodes from one doc to another
595 DOMNode* RecursivelySetNamespace_ (DOMNode* n, const XMLCh* namespaceURI)
596 {
597 RequireNotNull (n);
598 // namespaceURI CAN be nullptr
599 switch (n->getNodeType ()) {
600 case DOMNode::ELEMENT_NODE: {
601 xercesc::DOMDocument* doc = n->getOwnerDocument ();
602 AssertNotNull (doc);
603 n = doc->renameNode (n, namespaceURI, n->getNodeName ());
604 ThrowIfNull (n);
605 for (DOMNode* child = n->getFirstChild (); child != nullptr; child = child->getNextSibling ()) {
606 child = RecursivelySetNamespace_ (child, namespaceURI);
607 }
608 } break;
609 }
610 return n;
611 }
612 constexpr XMLCh* kXerces2XMLDBDocumentKey_ = nullptr; // just a unique key to lookup our doc object from the xerces doc object.
613 // Could use real str, then xerces does strcmp() - but this appears slightly faster
614 // so long as no conflict....
615 String GetTextForDOMNode_ (const DOMNode* node)
616 {
617 RequireNotNull (node);
618 if (node->getNodeType () == DOMNode::COMMENT_NODE) {
619 // The below hack doesn't seem to work for comment nodes - at least in one case - they had
620 // no children nodes so just returned empty string...
621 //
622 // The above comemnt about comment-nodes not working is old, and undated. Revisit this at some point. Though this backup mode isn't so bad.
623 goto BackupMode;
624 }
625
626 {
627 // try quick impl if all children are textnodes
628 //
629 // This trick is from:
630 // http://www.codesynthesis.com/~boris/blog/category/xerces-c/
631 // 2007-06-13, because default impl appears to allocate strings and never frees (til owning doc freed)
632 // for getTextContent calls...
633 //
634 using xercesc::DOMNode;
635 using xercesc::DOMText;
637 for (DOMNode* n = node->getFirstChild (); n != nullptr; n = n->getNextSibling ()) {
638 switch (n->getNodeType ()) {
639 case DOMNode::TEXT_NODE:
640 case DOMNode::CDATA_SECTION_NODE: {
641 DOMText* t (static_cast<DOMText*> (n));
642 // Note - we don't do the usual 'reserve' trick here because the string is generally made up of a single text node
643 r += t->getData ();
644 break;
645 }
646 case DOMNode::ELEMENT_NODE: {
647 goto BackupMode;
648 }
649 }
650 }
651 return r.str ();
652 }
653 BackupMode:
654 /*
655 * Note that this is SOMETHING OF A (temporary) memory leak. Xerces does free the memory when the document is freed.
656 */
657 DbgTrace ("WARNING: GetTextForDOMNode_::BackupMode used"_f);
658 return node->getTextContent ();
659 }
660
661 Node::Ptr WrapXercesNodeInStroikaNode_ (DOMNode* n);
662 Element::Ptr WrapXercesNodeInStroikaNode_ (DOMElement* n);
663}
664
665namespace {
666 class SubNodeIteratorOver_SiblingList_Rep_ : public SubNodeIterator_::Rep,
667 Memory::UseBlockAllocationIfAppropriate<SubNodeIteratorOver_SiblingList_Rep_> {
668 public:
669 // Called iterates over CHILDREN of given parentNode
670 SubNodeIteratorOver_SiblingList_Rep_ (DOMNode* nodeParent)
671 : fParentNode{nodeParent}
672 , fCachedMainListLen{static_cast<size_t> (-1)}
673 {
674 RequireNotNull (nodeParent);
675 START_LIB_EXCEPTION_MAPPER_
676 {
677 fCurNode_ = nodeParent->getFirstChild ();
678 }
679 END_LIB_EXCEPTION_MAPPER_
680 }
681 virtual bool IsAtEnd () const override
682 {
683 return fCurNode_ == nullptr;
684 }
685 virtual void Next () override
686 {
687 Require (not IsAtEnd ());
688 AssertNotNull (fCurNode_);
689 START_LIB_EXCEPTION_MAPPER_
690 {
691 fCurNode_ = fCurNode_->getNextSibling ();
692 }
693 END_LIB_EXCEPTION_MAPPER_
694 }
695 virtual Node::Ptr Current () const override
696 {
697 return WrapXercesNodeInStroikaNode_ (fCurNode_);
698 }
699 virtual size_t GetLength () const override
700 {
701 if (fCachedMainListLen == static_cast<size_t> (-1)) {
702 size_t n = 0;
703 START_LIB_EXCEPTION_MAPPER_
704 {
705 for (DOMNode* i = fParentNode->getFirstChild (); i != nullptr; (i = i->getNextSibling ()), ++n)
706 ;
707 }
708 END_LIB_EXCEPTION_MAPPER_
709 fCachedMainListLen = n;
710 }
711 return fCachedMainListLen;
712 }
713
714 private:
715 DOMNode* fParentNode{nullptr};
716 DOMNode* fCurNode_{nullptr};
717 mutable size_t fCachedMainListLen{};
718 };
719}
720
721namespace {
722 struct NodeRep_ : IXercesNodeRep, Memory::UseBlockAllocationIfAppropriate<NodeRep_> {
723 NodeRep_ (DOMNode* n)
724 : fNode_{n}
725 {
726 RequireNotNull (n);
727 }
728 virtual const Providers::IDOMProvider* GetProvider () const override
729 {
730 return &Providers::Xerces::kDefaultProvider;
731 }
732 virtual bool Equals (const IRep* rhs) const override
733 {
734 RequireNotNull (fNode_);
735 RequireNotNull (rhs);
736 return fNode_ == dynamic_cast<const NodeRep_*> (rhs)->fNode_;
737 }
738 virtual Node::Type GetNodeType () const override
739 {
740 AssertNotNull (fNode_);
741 START_LIB_EXCEPTION_MAPPER_
742 {
743 switch (fNode_->getNodeType ()) {
744 case DOMNode::ELEMENT_NODE:
745 return Node::eElementNT;
746 case DOMNode::ATTRIBUTE_NODE:
747 return Node::eAttributeNT;
748 case DOMNode::TEXT_NODE:
749 return Node::eTextNT;
750 case DOMNode::COMMENT_NODE:
751 return Node::eCommentNT;
752 default:
753 return Node::eOtherNT;
754 }
755 }
756 END_LIB_EXCEPTION_MAPPER_
757 }
758 virtual NameWithNamespace GetName () const override
759 {
760 AssertNotNull (fNode_);
761 Require (GetNodeType () == Node::eElementNT or GetNodeType () == Node::eAttributeNT);
762 START_LIB_EXCEPTION_MAPPER_
763 {
764 AssertNotNull (fNode_->getNodeName ());
765 const XMLCh* n = fNode_->getNamespaceURI ();
766 return NameWithNamespace{n == nullptr ? optional<URI>{} : URI{xercesString2String (n)}, fNode_->getNodeName ()};
767 }
768 END_LIB_EXCEPTION_MAPPER_
769 }
770 virtual void SetName (const NameWithNamespace& name) override
771 {
772 AssertNotNull (fNode_);
773#if qStroika_Foundation_Debug_AssertionsChecked
774 Require (ValidNewNodeName_ (name.fName));
775#endif
776 START_LIB_EXCEPTION_MAPPER_
777 {
778 xercesc::DOMDocument* doc = fNode_->getOwnerDocument ();
779 AssertNotNull (doc);
780 fNode_ = doc->renameNode (
781 fNode_, name.fNamespace == nullopt ? nullptr : name.fNamespace->As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str (),
782 name.fName.As<u16string> ().c_str ());
783 AssertNotNull (fNode_);
784 }
785 END_LIB_EXCEPTION_MAPPER_
786 }
787 virtual String GetValue () const override
788 {
789 AssertNotNull (fNode_);
790 START_LIB_EXCEPTION_MAPPER_
791 {
792 return GetTextForDOMNode_ (fNode_);
793 }
794 END_LIB_EXCEPTION_MAPPER_
795 }
796 virtual void SetValue (const String& v) override
797 {
798 AssertNotNull (fNode_);
799 START_LIB_EXCEPTION_MAPPER_
800 {
801 fNode_->setTextContent (v.empty () ? nullptr : v.As<u16string> ().c_str ());
802 }
803 END_LIB_EXCEPTION_MAPPER_
804 }
805 virtual void DeleteNode () override
806 {
807 START_LIB_EXCEPTION_MAPPER_
808 {
809 DOMNode* selNode = fNode_;
810 ThrowIfNull (selNode);
811 DOMNode* parentNode = selNode->getParentNode ();
812 if (parentNode == nullptr) {
813 // This happens if the selected node is an attribute
814 if (fNode_ != nullptr) {
815 const XMLCh* ln = selNode->getNodeName ();
816 AssertNotNull (ln);
817 DOMElement* de = dynamic_cast<DOMElement*> (fNode_);
818 de->removeAttribute (ln);
819 }
820 }
821 else {
822 (void)parentNode->removeChild (selNode);
823 }
824 }
825 END_LIB_EXCEPTION_MAPPER_
826 }
827 virtual Node::Ptr GetParentNode () const override
828 {
829 AssertNotNull (fNode_);
830 START_LIB_EXCEPTION_MAPPER_
831 {
832 auto p = fNode_->getParentNode ();
833 return p == nullptr ? nullptr : WrapXercesNodeInStroikaNode_ (p);
834 }
835 END_LIB_EXCEPTION_MAPPER_
836 }
837 virtual void Write (const Streams::OutputStream::Ptr<byte>& to, const SerializationOptions& options) const override
838 {
839 START_LIB_EXCEPTION_MAPPER_
840 {
841 DoWrite2Stream_ (fNode_, to, options);
842 }
843 END_LIB_EXCEPTION_MAPPER_
844 }
845 virtual xercesc::DOMNode* GetInternalTRep () override
846 {
847 return fNode_;
848 }
849 // must carefully think out mem managment here - cuz not ref counted - around as long as owning doc...
850 DOMNode* fNode_;
851 };
852}
853
854namespace {
855 DISABLE_COMPILER_MSC_WARNING_START (4250) // inherits via dominance warning
856 struct ElementRep_ : Element::IRep, Memory::InheritAndUseBlockAllocationIfAppropriate<ElementRep_, NodeRep_> {
858 ElementRep_ (DOMNode* n)
859 : inherited{n}
860 {
861 RequireNotNull (n);
862 Require (n->getNodeType () == DOMNode::ELEMENT_NODE);
863 }
864 virtual Node::Type GetNodeType () const override
865 {
866 AssertNotNull (fNode_);
867 Assert (fNode_->getNodeType () == DOMNode::ELEMENT_NODE);
868 return Node::eElementNT;
869 }
870 virtual optional<String> GetAttribute (const NameWithNamespace& attrName) const override
871 {
872 AssertNotNull (fNode_);
873 START_LIB_EXCEPTION_MAPPER_
874 {
875 if (fNode_->getNodeType () == DOMNode::ELEMENT_NODE) {
876 DOMElement* elt = Debug::UncheckedDynamicCast<DOMElement*> (fNode_);
877 const XMLCh* s = attrName.fNamespace
878 ? elt->getAttributeNS (attrName.fNamespace->As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str (),
879 attrName.fName.As<u16string> ().c_str ())
880 : elt->getAttribute (attrName.fName.As<u16string> ().c_str ());
881 AssertNotNull (s);
882 if (*s != '\0') {
883 return s;
884 }
885 }
886 return nullopt;
887 }
888 END_LIB_EXCEPTION_MAPPER_
889 }
890 virtual void SetAttribute (const NameWithNamespace& attrName, const optional<String>& v) override
891 {
892 Require (GetNodeType () == Node::eElementNT);
893 AssertNotNull (fNode_);
894 START_LIB_EXCEPTION_MAPPER_
895 {
896 DOMElement* element = dynamic_cast<DOMElement*> (fNode_);
897 ThrowIfNull (element);
898 /*
899 * SINCE STROIKA V3.0d5 - if no namespace given on attrName, we use nullptr as the namespace (not do any inheritance)
900 * attributes usually have no namespace!!!!
901 */
902 if (v) {
903 /*
904 * For reasons that elude maybe (maybe because it was standard for XML early on)
905 * all my attributes are free of namespaces. So why use setAttributeNS? Because otherwise
906 * the XQilla code fails to match on the attribute names at all in its XPath stuff.
907 * Considered copying the namespace from the parent element (fNode_->getNamespaceURI()),
908 * but XQilla didnt like that either (maybe then I needed M: on xpath).
909 * A different subclass object of DOMAttrNode is created - one that doesnt have a getLocalName,
910 * or something like that. Anyhow - this appears to do the right thing for now...
911 * -- LGP 2007-06-13
912 *
913 * Not using XQilla anymore. And may have been another misunderstanding anyhow. So have attributes
914 * inherit the namespace of the element they are from, by default --LGP 2024-01-09
915 *
916 * MAYBE related to http://stroika-bugs.sophists.com/browse/STK-999 - diff symptoms but similar workaround
917 */
918 element->setAttributeNS (attrName.fNamespace ? attrName.fNamespace->As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str () : nullptr,
919 attrName.fName.As<u16string> ().c_str (), v->As<u16string> ().c_str ());
920 }
921 else {
922 element->removeAttributeNS (
923 attrName.fNamespace ? attrName.fNamespace->As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str () : nullptr,
924 attrName.fName.As<u16string> ().c_str ());
925 }
926 }
927 END_LIB_EXCEPTION_MAPPER_
928 }
929 virtual Element::Ptr InsertElement (const NameWithNamespace& eltName, const Element::Ptr& afterNode) override
930 {
931#if qStroika_Foundation_Debug_AssertionsChecked
932 Require (ValidNewNodeName_ (eltName.fName));
933#endif
934 START_LIB_EXCEPTION_MAPPER_
935 {
936 xercesc::DOMDocument* doc = fNode_->getOwnerDocument ();
937 // unsure if we should use smartpointer here - thinkout xerces & smart ptrs & mem management
938 // unclear if we should set namespace to existing node namespace or nullptr here?
939 DOMNode* child = doc->createElementNS ((eltName.fNamespace == nullopt)
940 ? fNode_->getNamespaceURI ()
941 : eltName.fNamespace->As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str (),
942 eltName.fName.As<u16string> ().c_str ());
943 DOMNode* refChildNode = nullptr;
944 if (afterNode == nullptr) {
945 // this means PREPEND.
946 // If there is a first element, then insert before it. If no elements, then append is the same thing.
947 refChildNode = fNode_->getFirstChild ();
948 }
949 else {
950 refChildNode = dynamic_cast<NodeRep_&> (*afterNode.GetRep ()).GetInternalTRep ()->getNextSibling ();
951 }
952 DOMNode* childx = fNode_->insertBefore (child, refChildNode);
953 ThrowIfNull (childx);
954 return WrapXercesNodeInStroikaNode_ (childx);
955 }
956 END_LIB_EXCEPTION_MAPPER_
957 }
958 virtual Element::Ptr AppendElement (const NameWithNamespace& eltName) override
959 {
960#if qStroika_Foundation_Debug_AssertionsChecked
961 Require (ValidNewNodeName_ (eltName.fName));
962#endif
963 START_LIB_EXCEPTION_MAPPER_
964 {
965 xercesc::DOMDocument* doc = fNode_->getOwnerDocument ();
966 DOMNode* child{};
967 if (eltName.fNamespace) {
968 u16string namespaceURI = eltName.fNamespace->As<String> (kUseURIEncodingFlag_).As<u16string> ();
969 child = doc->createElementNS (namespaceURI.c_str (), eltName.fName.As<u16string> ().c_str ());
970 }
971 else {
972 const XMLCh* namespaceURI = fNode_->getNamespaceURI (); //? or should be null?
973 child = doc->createElementNS (namespaceURI, eltName.fName.As<u16string> ().c_str ());
974 }
975 DOMNode* childx = fNode_->appendChild (child);
976 ThrowIfNull (childx);
977 return WrapXercesNodeInStroikaNode_ (childx);
978 }
979 END_LIB_EXCEPTION_MAPPER_
980 }
981 virtual Iterable<Node::Ptr> GetChildren () const override
982 {
983 AssertNotNull (fNode_);
984 START_LIB_EXCEPTION_MAPPER_
985 {
986 return Traversal::CreateGenerator<Node::Ptr> (
987 [sni = SubNodeIterator_{MakeSharedPtr<SubNodeIteratorOver_SiblingList_Rep_> (fNode_)}] () mutable -> optional<Node::Ptr> {
988 if (sni.IsAtEnd ()) {
989 return optional<Node::Ptr>{};
990 }
991 Node::Ptr r = *sni;
992 ++sni;
993 return r;
994 });
995 }
996 END_LIB_EXCEPTION_MAPPER_
997 }
998 struct XPathQueryHelper_ {
999 optional<AutoRelease_<DOMXPathNSResolver>> resolver;
1000 DOMXPathResult::ResultType rt{};
1001 optional<AutoRelease_<DOMXPathExpression>> expr;
1002 XPathQueryHelper_ (DOMNode* n, const XPath::Expression& e, bool firstOnly)
1003 {
1004 xercesc::DOMDocument* doc = n->getOwnerDocument ();
1005 resolver.emplace (doc->createNSResolver (nullptr));
1006 auto namespaceDefs = e.GetOptions ().fNamespaces;
1007 if (namespaceDefs.GetDefaultNamespace ()) {
1008 (*resolver)->addNamespaceBinding (
1009 u"", namespaceDefs.GetDefaultNamespace ()->As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str ());
1010 }
1011 for (Common::KeyValuePair ni : namespaceDefs.GetPrefixedNamespaces ()) {
1012 (*resolver)->addNamespaceBinding (ni.fKey.As<u16string> ().c_str (),
1013 ni.fValue.As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str ());
1014 }
1015 try {
1016 expr.emplace (doc->createExpression (e.GetExpression ().As<u16string> ().c_str (), *resolver));
1017 }
1018 catch (const xercesc::DOMXPathException&) {
1019 // MANY basic things are not supported in Xerces XPath - like a[1] - brackets not supported.
1020 Execution::Throw (XPath::XPathExpressionNotSupported::kThe);
1021 }
1022 switch (e.GetOptions ().fResultTypeIndex.value_or (DOMXPathResult::ANY_TYPE)) {
1023 case XPath::ResultTypeIndex_v<Node::Ptr>: {
1024 auto o = e.GetOptions ();
1025 if (firstOnly) {
1026 rt = e.GetOptions ().fOrdered ? DOMXPathResult::FIRST_ORDERED_NODE_TYPE : DOMXPathResult::ANY_UNORDERED_NODE_TYPE;
1027 }
1028 else if (o.fSnapshot) {
1029 rt = e.GetOptions ().fOrdered ? DOMXPathResult::ORDERED_NODE_SNAPSHOT_TYPE : DOMXPathResult::UNORDERED_NODE_SNAPSHOT_TYPE;
1030 }
1031 else {
1032 // Would make sense given docs, but appears unsupported by Xerces... --LGP 2024-01-12
1033 // rt = e.GetOptions ().fOrdered ? DOMXPathResult::ORDERED_NODE_ITERATOR_TYPE : DOMXPathResult::UNORDERED_NODE_ITERATOR_TYPE;
1034 rt = e.GetOptions ().fOrdered ? DOMXPathResult::ORDERED_NODE_SNAPSHOT_TYPE : DOMXPathResult::UNORDERED_NODE_SNAPSHOT_TYPE;
1035 }
1036 } break;
1037 default:
1039 }
1040 }
1041 static optional<XPath::Result> ToResult_ (const xercesc::DOMXPathResult* r)
1042 {
1043 RequireNotNull (r);
1044 switch (r->getResultType ()) {
1045 case DOMXPathResult::NUMBER_TYPE:
1046 return XPath::Result{r->getNumberValue ()};
1047 case DOMXPathResult::BOOLEAN_TYPE:
1048 return XPath::Result{r->getBooleanValue ()};
1049 case DOMXPathResult::STRING_TYPE:
1050 return XPath::Result{xercesString2String (r->getStringValue ())};
1051 case DOMXPathResult::ANY_UNORDERED_NODE_TYPE:
1052 case DOMXPathResult::FIRST_ORDERED_NODE_TYPE:
1053 case DOMXPathResult::UNORDERED_NODE_ITERATOR_TYPE:
1054 case DOMXPathResult::ORDERED_NODE_ITERATOR_TYPE:
1055 case DOMXPathResult::UNORDERED_NODE_SNAPSHOT_TYPE:
1056 case DOMXPathResult::ORDERED_NODE_SNAPSHOT_TYPE: {
1057 auto n = r->getNodeValue ();
1058 return n == nullptr ? optional<XPath::Result>{} : XPath::Result{Node::Ptr{WrapXercesNodeInStroikaNode_ (n)}};
1059 }
1060 default:
1062 }
1063 return nullopt;
1064 }
1065 };
1066 virtual optional<XPath::Result> LookupOne (const XPath::Expression& e) override
1067 {
1068 if constexpr (false) {
1069 // quickie impl just to test Lookup Code
1070 for (auto i : Lookup (e)) {
1071 return i;
1072 }
1073 return nullopt;
1074 }
1075 START_LIB_EXCEPTION_MAPPER_
1076 {
1077 XPathQueryHelper_ xpHelp{fNode_, e, true};
1078 AutoRelease_<xercesc::DOMXPathResult> r = (*xpHelp.expr)->evaluate (fNode_, xpHelp.rt, nullptr);
1079 return XPathQueryHelper_::ToResult_ (r);
1080 }
1081 END_LIB_EXCEPTION_MAPPER_
1082 }
1083 virtual Traversal::Iterable<XPath::Result> Lookup (const XPath::Expression& e) override
1084 {
1085 if (e.GetOptions ().fSnapshot) {
1086 XPath::Expression::Options e2o = e.GetOptions ();
1087 e2o.fSnapshot = false;
1088 return Sequence<XPath::Result>{this->Lookup (XPath::Expression{e.GetExpression (), e2o})};
1089 }
1090 shared_ptr<XPathQueryHelper_> xpHelp = MakeSharedPtr<XPathQueryHelper_> (fNode_, e, false);
1091 shared_ptr<AutoRelease_<xercesc::DOMXPathResult>> r =
1092 MakeSharedPtr<AutoRelease_<xercesc::DOMXPathResult>> ((*xpHelp->expr)->evaluate (fNode_, xpHelp->rt, nullptr));
1093 Assert (not e.GetOptions ().fSnapshot);
1094 if (xpHelp->rt == DOMXPathResult::UNORDERED_NODE_ITERATOR_TYPE or xpHelp->rt == DOMXPathResult::ORDERED_NODE_ITERATOR_TYPE) [[unlikely]] {
1095 return Traversal::CreateGenerator<XPath::Result> ([xpHelp, r, firstTime = true] () mutable -> optional<XPath::Result> {
1096 if (firstTime) {
1097 firstTime = false;
1098 return XPathQueryHelper_::ToResult_ (*r);
1099 }
1100 if ((*r)->iterateNext () == false) {
1101 return nullopt;
1102 }
1103 return XPathQueryHelper_::ToResult_ (*r);
1104 });
1105 }
1106 if (xpHelp->rt == DOMXPathResult::UNORDERED_NODE_SNAPSHOT_TYPE or xpHelp->rt == DOMXPathResult::ORDERED_NODE_SNAPSHOT_TYPE) [[likely]] {
1107 return Traversal::CreateGenerator<XPath::Result> ([xpHelp, r, snapIdx = 0] () mutable -> optional<XPath::Result> {
1108 if (not(*r)->snapshotItem (snapIdx)) {
1109 return nullopt;
1110 }
1111 ++snapIdx;
1112 return XPathQueryHelper_::ToResult_ (*r);
1113 });
1114 }
1117 }
1118 virtual Element::Ptr GetChildElementByID (const String& id) const override
1119 {
1120 AssertNotNull (fNode_);
1121 START_LIB_EXCEPTION_MAPPER_
1122 {
1123 for (DOMNode* i = fNode_->getFirstChild (); i != nullptr; i = i->getNextSibling ()) {
1124 if (i->getNodeType () == DOMNode::ELEMENT_NODE) {
1125 DOMElement* elt = Debug::UncheckedDynamicCast<DOMElement*> (i);
1126 const XMLCh* s = elt->getAttribute (u"id");
1127 AssertNotNull (s);
1128 if (CString::Equals (s, id.As<u16string> ().c_str ())) {
1129 return WrapXercesNodeInStroikaNode_ (elt);
1130 }
1131 }
1132 }
1133 return Element::Ptr{nullptr};
1134 }
1135 END_LIB_EXCEPTION_MAPPER_
1136 }
1137 };
1138 DISABLE_COMPILER_MSC_WARNING_END (4250) // inherits via dominance warning
1139}
1140
1141namespace {
1142 inline void MakeXMLDoc_ (shared_ptr<xercesc::DOMDocument>& newXMLDoc)
1143 {
1144 Require (newXMLDoc == nullptr);
1145 newXMLDoc = shared_ptr<xercesc::DOMDocument> (GetDOMIMPL_ ().createDocument (0, nullptr, 0));
1146 newXMLDoc->setXmlStandalone (true);
1147 }
1148}
1149
1150namespace {
1151 class MyMaybeSchemaDOMParser_ {
1152 public:
1153 Map2StroikaExceptionsErrorReporter_ myErrReporter;
1154 shared_ptr<XercesDOMParser> fParser;
1155 Schema::Ptr fSchema{nullptr};
1156
1157 MyMaybeSchemaDOMParser_ () = delete;
1158 MyMaybeSchemaDOMParser_ (const MyMaybeSchemaDOMParser_&) = delete;
1159 MyMaybeSchemaDOMParser_ (const Schema::Ptr& schema)
1160 : fSchema{schema}
1161 {
1162 shared_ptr<IXercesSchemaRep> accessSchema = dynamic_pointer_cast<IXercesSchemaRep> (schema.GetRep ());
1163 if (accessSchema != nullptr) {
1164 fParser = MakeSharedPtr<XercesDOMParser> (nullptr, XMLPlatformUtils::fgMemoryManager, accessSchema->GetCachedGrammarPool ());
1165 fParser->cacheGrammarFromParse (false);
1166 fParser->useCachedGrammarInParse (true);
1167 fParser->setDoSchema (true);
1168 fParser->setValidationScheme (AbstractDOMParser::Val_Always);
1169 fParser->setValidationSchemaFullChecking (true);
1170 fParser->setIdentityConstraintChecking (true);
1171 }
1172 else {
1173 fParser = MakeSharedPtr<XercesDOMParser> ();
1174 }
1175 fParser->setDoNamespaces (true);
1176 fParser->setErrorHandler (&myErrReporter);
1177
1178 // @todo make load-external DTD OPTION specified in NEW for document!!! - parser! --LGP 2023-12-16
1179
1180 // LGP added 2009-09-07 - so must test carefully!
1181 {
1182 // I THINK this prevents loading URL-based DTDs - like the one refered to in http://demo.healthframeowrks.com/ when I load the xhmtl as xml
1183 // (it tkaes forever)
1184 fParser->setLoadExternalDTD (false);
1185 // I THINK this prevents loading URL-based schemas (not sure if/how that would have ever happened so I'm not sure that
1186 // this is for) - guessing a bit...
1187 // -- LGP 2009-09-04
1188 fParser->setLoadSchema (false);
1189 }
1190 }
1191 };
1192}
1193
1194namespace {
1195 Element::Ptr WrapXercesNodeInStroikaNode_ (DOMElement* n);
1196}
1197namespace {
1198 struct DocRep_ : DataExchange::XML::DOM::Document::IRep {
1199#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
1200 static inline atomic<unsigned int> sLiveCnt{0};
1201#endif
1202 //
1203 // If this function is passed a nullptr exceptionResult - it will throw on bad validation.
1204 // If it is passed a non-nullptr exceptionResult - then it will map BadFormatException to being ignored, but filling in this
1205 // parameter with the exception details. This is used to allow 'advisory' read xsd validation failure, without actually fully
1206 // failing the read (for http://bugzilla/show_bug.cgi?id=513).
1207 //
1208 DocRep_ (const Streams::InputStream::Ptr<byte>& in, const Schema::Ptr& schema)
1209 {
1210 [[maybe_unused]] int ignoreMe = 0; // workaround quirk in clang-format
1211 START_LIB_EXCEPTION_MAPPER_
1212 {
1213 MakeXMLDoc_ (fXMLDoc);
1214 fXMLDoc->setUserData (kXerces2XMLDBDocumentKey_, this, nullptr);
1215 if (in != nullptr) {
1216 MyMaybeSchemaDOMParser_ myDOMParser{schema};
1217 myDOMParser.fParser->parse (StdIStream_InputSource_{in, u"XMLDB"});
1218 fXMLDoc.reset ();
1219 fXMLDoc = shared_ptr<xercesc::DOMDocument>{myDOMParser.fParser->adoptDocument ()};
1220 fXMLDoc->setXmlStandalone (true);
1221 fXMLDoc->setUserData (kXerces2XMLDBDocumentKey_, this, nullptr);
1222 }
1223 }
1224 END_LIB_EXCEPTION_MAPPER_
1225#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
1226 ++sLiveCnt;
1227#endif
1228 }
1229 DocRep_ (const DocRep_& from)
1230 {
1231 START_LIB_EXCEPTION_MAPPER_
1232 {
1233 fXMLDoc = shared_ptr<xercesc::DOMDocument> (dynamic_cast<xercesc::DOMDocument*> (from.fXMLDoc->cloneNode (true)));
1234 fXMLDoc->setXmlStandalone (true);
1235 fXMLDoc->setUserData (kXerces2XMLDBDocumentKey_, this, nullptr);
1236 }
1237 END_LIB_EXCEPTION_MAPPER_
1238 EnsureNotNull (fXMLDoc);
1239#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
1240 ++sLiveCnt;
1241#endif
1242 }
1243 virtual ~DocRep_ ()
1244 {
1245#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
1246 Assert (sLiveCnt > 0);
1247 --sLiveCnt;
1248#endif
1249 }
1250 virtual const Providers::IDOMProvider* GetProvider () const override
1251 {
1252 return &Providers::Xerces::kDefaultProvider;
1253 }
1254 virtual bool GetStandalone () const override
1255 {
1256 AssertNotNull (fXMLDoc);
1257 return fXMLDoc->getXmlStandalone ();
1258 }
1259 virtual void SetStandalone (bool standalone) override
1260 {
1261 AssertNotNull (fXMLDoc);
1262 fXMLDoc->setXmlStandalone (standalone);
1263 }
1264 virtual Iterable<Node::Ptr> GetChildren () const override
1265 {
1266 AssertExternallySynchronizedMutex::ReadContext declareContext{fThisAssertExternallySynchronized_};
1267 AssertNotNull (fXMLDoc);
1268 START_LIB_EXCEPTION_MAPPER_
1269 return Traversal::CreateGenerator<Node::Ptr> (
1270 [sni = SubNodeIterator_{MakeSharedPtr<SubNodeIteratorOver_SiblingList_Rep_> (fXMLDoc.get ())}] () mutable -> optional<Node::Ptr> {
1271 if (sni.IsAtEnd ()) {
1272 return optional<Node::Ptr>{};
1273 }
1274 Node::Ptr r = *sni;
1275 ++sni;
1276 return r;
1277 });
1278 END_LIB_EXCEPTION_MAPPER_
1279 }
1280 virtual Element::Ptr ReplaceRootElement (const NameWithNamespace& newEltName, bool childrenInheritNS) override
1281 {
1282 DOMElement* n = newEltName.fNamespace == nullopt
1283 ? fXMLDoc->createElement (newEltName.fName.As<u16string> ().c_str ())
1284 : fXMLDoc->createElementNS (newEltName.fNamespace->As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str (),
1285 newEltName.fName.As<u16string> ().c_str ());
1286 AssertNotNull (n);
1287 DOMElement* oldRoot = fXMLDoc->getDocumentElement ();
1288 if (oldRoot == nullptr) {
1289 (void)fXMLDoc->insertBefore (n, nullptr);
1290 }
1291 else {
1292 (void)fXMLDoc->replaceChild (n, oldRoot);
1293 /*
1294 * I THOGUHT this was a memory leak, but that appears to have been wrong. First, the
1295 * DOMNode objects get associated with the document, and when the document is destroyed
1296 * this is cleaned up. Secondly, there are enough other memory leaks - its unclear if this
1297 * actually helped. Plus the memory management pattern used by Xerces - with its own sub-chunking etc,
1298 * makes it hard to tell.
1299 *
1300 * More importantly - this caused a regression in HealthFrame - which I didn't debug. The OHSD reports
1301 * like AAFP CCR report - will be rejected by our 'valid HTML' tester. Unclear if that's cuz we generate
1302 * different HTML, but more likely a bug with the load/checker code. Still - not worth worrying
1303 * about why at this stage (especially as we are about to upgrade our Xerces version - could get fixed
1304 * by that?).
1305 *
1306 * -- LGP 2009-05-15
1307 *
1308 * oldRoot->release ();
1309 */
1310 }
1311 Assert (fXMLDoc->getDocumentElement () == n);
1312 Element::Ptr r{WrapXercesNodeInStroikaNode_ (n)};
1313 if (childrenInheritNS and newEltName.fNamespace) {
1314 r.SetAttribute (kXMLNS, newEltName.fNamespace->As<String> ());
1315 }
1316 return r;
1317 }
1318 virtual void Write (const Streams::OutputStream::Ptr<byte>& to, const SerializationOptions& options) const override
1319 {
1320 TraceContextBumper ctx{"Xerces::DocRep_::Write"};
1321 AssertExternallySynchronizedMutex::ReadContext declareContext{fThisAssertExternallySynchronized_};
1322 AssertNotNull (fXMLDoc);
1323 START_LIB_EXCEPTION_MAPPER_
1324 {
1325 DoWrite2Stream_ (fXMLDoc.get (), to, options);
1326 }
1327 END_LIB_EXCEPTION_MAPPER_
1328 }
1329#if qCompilerAndStdLib_arm_asan_FaultStackUseAfterScope_Buggy
1330 Stroika_Foundation_Debug_ATTRIBUTE_NO_SANITIZE_ADDRESS
1331#endif
1332 virtual void
1333 Validate (const Schema::Ptr& schema) const override
1334 {
1335 TraceContextBumper ctx{"Xerces::DocRep_::Validate"};
1336 AssertExternallySynchronizedMutex::ReadContext declareContext{fThisAssertExternallySynchronized_};
1337 RequireNotNull (schema);
1338 START_LIB_EXCEPTION_MAPPER_
1339 {
1340 try {
1341 DbgTrace ("Validating against schema (target namespace '{}')"_f, schema.GetTargetNamespace ());
1342 // As this CAN be expensive - especially if we need to externalize the file, and re-parse it!!! - just shortcut by
1343 // checking the top-level DOM-node and assure that has the right namespace. At least quickie first check that works when
1344 // reading files (doesn't help in pre-save check, of course)
1345 DOMNode* docNode = fXMLDoc->getDocumentElement ();
1346 if (docNode == nullptr) [[unlikely]] {
1347 Execution::Throw (BadFormatException{"No document", 0, 0, 0});
1348 }
1349 optional<URI> docURI = docNode->getNamespaceURI () == nullptr ? optional<URI>{} : docNode->getNamespaceURI ();
1350 if (docURI != schema.GetTargetNamespace ()) {
1351 Execution::Throw (BadFormatException{
1352 Format ("Wrong document namespace (found '{}' and expected '{}')"_f, docURI, schema.GetTargetNamespace ()), 0, 0, 0});
1353 }
1354
1355 // EXTERNALIZE, AND THEN RE-PARSE USING CACHED SAX PARSER WTIH LOADED GRAMMAR
1356 {
1357 MemBufFormatTarget destination;
1358 {
1359 AutoRelease_<DOMLSOutput> theOutputDesc = GetDOMIMPL_ ().createLSOutput ();
1360 theOutputDesc->setEncoding (XMLUni::fgUTF8EncodingString);
1361 AutoRelease_<DOMLSSerializer> writer = GetDOMIMPL_ ().createLSSerializer ();
1362 theOutputDesc->setByteStream (&destination);
1363 theOutputDesc->setEncoding (XMLUni::fgUTF8EncodingString);
1364 Assert (fXMLDoc->getXmlStandalone ());
1365 writer->write (fXMLDoc.get (), theOutputDesc);
1366 }
1367 MemBufInputSource readReadSrc{destination.getRawBuffer (), destination.getLen (), u"tmp"};
1368 readReadSrc.setEncoding (XMLUni::fgUTF8EncodingString);
1369 shared_ptr<IXercesSchemaRep> accessSchema = dynamic_pointer_cast<IXercesSchemaRep> (schema.GetRep ());
1370 {
1371 AssertNotNull (accessSchema); // for now only rep supported
1372 shared_ptr<SAX2XMLReader> parser = shared_ptr<SAX2XMLReader> (
1373 XMLReaderFactory::createXMLReader (XMLPlatformUtils::fgMemoryManager, accessSchema->GetCachedGrammarPool ()));
1374 SetupCommonParserFeatures_ (*parser, true);
1375 Map2StroikaExceptionsErrorReporter_ myErrorReporter;
1376 parser->setErrorHandler (&myErrorReporter);
1377 parser->parse (readReadSrc);
1378 }
1379 }
1380 }
1381 catch (...) {
1382 if constexpr (qDumpXMLOnValidationError_) {
1383#if !qCompilerAndStdLib_arm_asan_FaultStackUseAfterScope_Buggy
1384 // Generate temp file (each with differnet names), and write out the bad XML.
1385 // Then - re-validate (with line#s) - and print the results of the validation to ANOTHER
1386 // temporary file
1387 //
1388 filesystem::path tmpFileName = IO::FileSystem::AppTmpFileManager::sThe.GetTmpFile ("FAILED_VALIDATION_.xml");
1389 DbgTrace ("Error validating - so writing out temporary file = '{}'"_f, tmpFileName);
1390 Write (IO::FileSystem::FileOutputStream::New (tmpFileName), SerializationOptions{.fPrettyPrint = true, .fIndent = 4});
1391 try {
1392 ValidateFile (tmpFileName, schema);
1393 }
1394 catch (const BadFormatException& vf) {
1395 String tmpFileNameStr = String{tmpFileName};
1396 size_t idx = tmpFileNameStr.find (".xml");
1397 String newTmpFile = tmpFileNameStr.substr (0, idx) + "_MSG.txt";
1398 ofstream msgOut{newTmpFile.AsNarrowSDKString (eIgnoreErrors).c_str ()};
1399 msgOut << "Reason:" << vf.GetDetails () << endl;
1400 optional<unsigned int> lineNum;
1401 optional<unsigned int> colNumber;
1402 optional<uint64_t> fileOffset;
1403 vf.GetPositionInfo (&lineNum, &colNumber, &fileOffset);
1404 if (lineNum) {
1405 msgOut << "Line:" << *lineNum << endl;
1406 }
1407 if (colNumber) {
1408 msgOut << "Col: " << *colNumber << endl;
1409 }
1410 if (fileOffset) {
1411 msgOut << "FilePos: " << *fileOffset << endl;
1412 }
1413 }
1414 catch (...) {
1415 }
1416#endif
1417 }
1418 Execution::ReThrow ();
1419 }
1420 }
1421 END_LIB_EXCEPTION_MAPPER_
1422 }
1423 shared_ptr<xercesc::DOMDocument> fXMLDoc;
1424 [[no_unique_address]] Debug::AssertExternallySynchronizedMutex fThisAssertExternallySynchronized_;
1425 };
1426}
1427
1428namespace {
1429 Node::Ptr WrapXercesNodeInStroikaNode_ (DOMNode* n)
1430 {
1431 RequireNotNull (n);
1432 if (n->getNodeType () == DOMNode::ELEMENT_NODE) {
1433 return Node::Ptr{MakeSharedPtr<ElementRep_> (n)};
1434 }
1435 else {
1436 return Node::Ptr{MakeSharedPtr<NodeRep_> (n)};
1437 }
1438 }
1439 Element::Ptr WrapXercesNodeInStroikaNode_ (DOMElement* n)
1440 {
1441 RequireNotNull (n);
1442 return Element::Ptr{MakeSharedPtr<ElementRep_> (n)};
1443 }
1444}
1445
1446/*
1447 ********************************************************************************
1448 ********************* Provider::Xerces::xercesString2String ********************
1449 ********************************************************************************
1450 */
1451String Providers::Xerces::xercesString2String (const XMLCh* s, const XMLCh* e)
1452{
1453 if constexpr (same_as<XMLCh, char16_t>) {
1454 return String{span{s, e}};
1455 }
1456 // nb: casts required cuz Xerces doesn't (currently) use wchar_t/char16_t/char32_t but something the sizeof char16_t
1457 // --LGP 2016-07-29
1458 if constexpr (sizeof (XMLCh) == sizeof (char16_t)) {
1459 return String{span{reinterpret_cast<const char16_t*> (s), reinterpret_cast<const char16_t*> (e)}};
1460 }
1461 else if constexpr (sizeof (XMLCh) == sizeof (char32_t)) {
1462 return String{span{reinterpret_cast<const char32_t*> (s), reinterpret_cast<const char32_t*> (e)}};
1463 }
1464 else {
1466 return String{};
1467 }
1468}
1469
1470String Providers::Xerces::xercesString2String (const XMLCh* t)
1471{
1472 if constexpr (same_as<XMLCh, char16_t>) {
1473 return String{t};
1474 }
1475 // nb: casts required cuz Xerces doesn't (currently) use wchar_t/char16_t/char32_t but something the sizeof char16_t
1476 // --LGP 2016-07-29
1477 if constexpr (sizeof (XMLCh) == sizeof (char16_t)) {
1478 return String{reinterpret_cast<const char16_t*> (t)};
1479 }
1480 else if constexpr (sizeof (XMLCh) == sizeof (char32_t)) {
1481 return String{reinterpret_cast<const char32_t*> (t)};
1482 }
1483 else {
1485 return String{};
1486 }
1487}
1488
1489/*
1490 ********************************************************************************
1491 ************************* XML::Providers::LibXML2::Provider ********************
1492 ********************************************************************************
1493 */
1494Providers::Xerces::Provider::Provider ()
1495{
1496 TraceContextBumper ctx{"Xerces::Provider::CTOR"};
1497#if qStroika_Foundation_Debug_AssertionsChecked
1498 static unsigned int sNProvidersCreated_{0}; // don't create multiple of these - will lead to confusion
1499 Assert (++sNProvidersCreated_ == 1);
1500#endif
1501#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
1502 fUseXercesMemoryManager_ = new MyXercesMemMgr_{};
1503 XMLPlatformUtils::Initialize (XMLUni::fgXercescDefaultLocale, 0, 0, fUseXercesMemoryManager_);
1504#else
1505 XMLPlatformUtils::Initialize (XMLUni::fgXercescDefaultLocale, 0, 0);
1506#endif
1507}
1508
1509Providers::Xerces::Provider::~Provider ()
1510{
1511 TraceContextBumper ctx{"Xerces::Provider::DTOR"};
1512 XMLPlatformUtils::Terminate ();
1513#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
1514 Require (SchemaRep_::sLiveCnt == 0); // Check for leaks but better/clearer than memory leaks check below
1515 Require (DocRep_::sLiveCnt == 0); // ""
1516 delete fUseXercesMemoryManager_; // checks for leaks
1517#endif
1518}
1519
1520shared_ptr<Schema::IRep> Providers::Xerces::Provider::SchemaFactory (const InputStream::Ptr<byte>& schemaData, const Resource::ResolverPtr& resolver) const
1521{
1522 return MakeSharedPtr<SchemaRep_> (schemaData, resolver);
1523}
1524
1525shared_ptr<DOM::Document::IRep> Providers::Xerces::Provider::DocumentFactory (const Streams::InputStream::Ptr<byte>& in,
1526 const Schema::Ptr& schemaToValidateAgainstWhileReading) const
1527{
1528 return MakeSharedPtr<DocRep_> (in, schemaToValidateAgainstWhileReading);
1529}
1530
1531void Providers::Xerces::Provider::SAXParse (const Streams::InputStream::Ptr<byte>& in, StructuredStreamEvents::IConsumer* callback,
1532 const Schema::Ptr& schema) const
1533{
1534 SAX2PrintHandlers_ handler{callback};
1535 shared_ptr<IXercesSchemaRep> accessSchema;
1536 if (schema != nullptr) {
1537 accessSchema = dynamic_pointer_cast<IXercesSchemaRep> (schema.GetRep ());
1538 }
1539 shared_ptr<SAX2XMLReader> parser{XMLReaderFactory::createXMLReader (
1540 XMLPlatformUtils::fgMemoryManager, accessSchema == nullptr ? nullptr : accessSchema->GetCachedGrammarPool ())};
1541 SetupCommonParserFeatures_ (*parser, accessSchema != nullptr);
1542 parser->setContentHandler (&handler);
1543 Map2StroikaExceptionsErrorReporter_ mErrorReproter_;
1544 parser->setErrorHandler (&mErrorReproter_);
1545 parser->parse (StdIStream_InputSource_{in});
1546}
#define AssertNotNull(p)
Definition Assertions.h:333
#define EnsureNotNull(p)
Definition Assertions.h:340
#define AssertNotImplemented()
Definition Assertions.h:401
#define qStroika_Foundation_Debug_AssertionsChecked
The qStroika_Foundation_Debug_AssertionsChecked flag determines if assertions are checked and validat...
Definition Assertions.h:48
#define RequireNotNull(p)
Definition Assertions.h:347
#define AssertNotReached()
Definition Assertions.h:355
conditional_t< qStroika_Foundation_Memory_PreferBlockAllocation and andTrueCheck, BlockAllocationUseHelper< T >, Common::Empty > UseBlockAllocationIfAppropriate
Use this to enable block allocation for a particular class. Beware of subclassing.
bool Equals(const T *lhs, const T *rhs)
strcmp or wsccmp() as appropriate == 0
#define CompileTimeFlagChecker_SOURCE(NS_PREFIX, NAME, VALUE)
#define DbgTrace
Definition Trace.h:309
Similar to String, but intended to more efficiently construct a String. Mutable type (String is large...
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
nonvirtual string AsNarrowSDKString() const
Definition String.inl:834
nonvirtual String substr(size_t from, size_t count=npos) const
Definition String.inl:1092
nonvirtual size_t find(Character c, size_t startAt=0) const
Definition String.inl:1067
nonvirtual bool Add(ArgByValueType< key_type > key, ArgByValueType< mapped_type > newElt, AddReplaceMode addReplaceMode=AddReplaceMode::eAddReplaces)
Definition Mapping.inl:188
A generalization of a vector: a container whose elements are keyed by the natural numbers.
virtual void StartElement(const Name &name, const Mapping< Name, String > &attributes)
Definition IConsumer.cpp:24
nonvirtual void SetAttribute(const NameWithNamespace &attrName, const optional< String > &v)
Definition DOM.inl:148
nonvirtual shared_ptr< IRep > GetRep() const
return the associated shared_ptr (cannot be nullptr)
Node::Ptr is a smart pointer to a Node::IRep.
Definition DOM.h:210
NOT a real mutex - just a debugging infrastructure support tool so in debug builds can be assured thr...
shared_lock< const AssertExternallySynchronizedMutex > ReadContext
Instantiate AssertExternallySynchronizedMutex::ReadContext to designate an area of code where protect...
nonvirtual filesystem::path GetTmpFile(const String &fileBaseName)
nonvirtual bool empty() const
Definition BLOB.inl:251
nonvirtual const byte * begin() const
Definition BLOB.inl:258
nonvirtual size_t GetSize() const
Definition BLOB.inl:269
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
nonvirtual SeekOffsetType GetOffset() const
OutputStream<>::Ptr is Smart pointer to a stream-based sink of data.
nonvirtual void Write(span< ELEMENT_TYPE2, EXTENT_2 > elts) const
nonvirtual void Flush() const
forces any data contained in this stream to be written.
Iterable<T> is a base class for containers which easily produce an Iterator<T> to traverse them.
Definition Iterable.h:237
void ThrowIfNull(const Private_::ConstVoidStar &p, const HRESULT &hr)
Template specialization for ThrowIfNull (), for thing being thrown HRESULT - really throw HRESULTErro...