Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
Xerces.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
6#include <fstream>
7
8#include <xercesc/validators/schema/identity/XPathException.hpp>
9
11#include "Stroika/Foundation/DataExchange/BadFormatException.h"
15#include "Stroika/Foundation/Execution/Exceptions.h"
16#include "Stroika/Foundation/Execution/Throw.h"
19#include "Stroika/Foundation/Memory/Common.h"
23
24#include "Xerces.h"
25
26using namespace Stroika::Foundation;
29using namespace Stroika::Foundation::DataExchange::XML;
30using namespace Stroika::Foundation::DataExchange::XML::DOM;
31using namespace Stroika::Foundation::DataExchange::XML::Schema;
32using namespace Stroika::Foundation::DataExchange::XML::Providers::Xerces;
33using namespace Stroika::Foundation::Debug;
34using namespace Stroika::Foundation::Execution;
35using namespace Stroika::Foundation::Streams;
36
37XERCES_CPP_NAMESPACE_USE;
38
39using std::byte;
40
41// Comment this in to turn on aggressive noisy DbgTrace in this module
42//#define USE_NOISY_TRACE_IN_THIS_MODULE_ 1
43
44static_assert (qStroika_HasComponent_xerces, "Don't compile this file if qStroika_HasComponent_xerces not set");
45
46CompileTimeFlagChecker_SOURCE (Stroika::Foundation::DataExchange::XML, qStroika_HasComponent_xerces, qStroika_HasComponent_xerces);
47
48namespace {
49 // From https://www.w3.org/TR/xml-names/
50 // In a namespace declaration, the URI reference is the normalized value of the attribute, so replacement of XML
51 // character and entity references has already been done before any comparison.
52 //
53 // Not 100% sure, but I think that means decode %x stuff too (at least that fixes bug I'm encountering with ASTM-CCR files)
54 // --LGP 2024-01-31
55 constexpr auto kUseURIEncodingFlag_ = URI::StringPCTEncodedFlag::eDecoded;
56}
57
58/*
59 */
60#define START_LIB_EXCEPTION_MAPPER_ try {
61#define END_LIB_EXCEPTION_MAPPER_ \
62 } \
63 catch (const xercesc::OutOfMemoryException&) \
64 { \
65 Execution::Throw (bad_alloc{}, "xerces OutOfMemoryException - throwing bad_alloc"); \
66 } \
67 catch (...) \
68 { \
69 Execution::ReThrow (); \
70 }
71
72#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
73/*
74 * A helpful class to isolate Xerces (etc) memory management calls. Could be the basis
75 * of future performance/memory optimizations, but for now, just a helpful debugging/tracking
76 * class.
77 */
78struct Provider::MyXercesMemMgr_ : public MemoryManager {
79public:
80 MyXercesMemMgr_ ()
81 : fAllocator{fBaseAllocator}
82 {
83 }
84 ~MyXercesMemMgr_ ()
85 {
86#if qStroika_Foundation_Debug_AssertionsChecked
87 auto snapshot = fAllocator.GetSnapshot ();
88 Assert (snapshot.fAllocations.empty ()); // else we have a memory leak which can be perused here in the debugger
89#endif
90 }
91
92public:
95 mutex fLastSnapshot_CritSection;
96 Memory::LeakTrackingGeneralPurposeAllocator::Snapshot fLastSnapshot;
97
98public:
99 void DUMPCurMemStats ()
100 {
101 TraceContextBumper ctx{"Xerces::MyXercesMemMgr_::DUMPCurMemStats"};
102 [[maybe_unused]] lock_guard critSec{fLastSnapshot_CritSection};
103 fAllocator.DUMPCurMemStats (fLastSnapshot);
104 // now copy current map to prev for next time this gets called
105 fLastSnapshot = fAllocator.GetSnapshot ();
106 }
107
108public:
109 virtual MemoryManager* getExceptionMemoryManager () override
110 {
111 return this;
112 }
113 virtual void* allocate (XMLSize_t size) override
114 {
115 try {
116 return fAllocator.Allocate (size);
117 }
118 catch (...) {
119 // NB: use throw not Exception::Throw () since that requires its a subclass of exception (or SilentException)
120 throw (OutOfMemoryException{}); // quirk cuz this is the class Xerces expects and catches internally (why not bad_alloc?) - sigh...
121 }
122 }
123 virtual void deallocate (void* p) override
124 {
125 if (p != nullptr) {
126 return fAllocator.Deallocate (p);
127 }
128 }
129};
130#endif
131
132namespace {
133 struct MySchemaResolver_ : public XMLEntityResolver {
134 private:
135 Resource::ResolverPtr fResolver_;
136
137 public:
138 MySchemaResolver_ (const Resource::ResolverPtr& resolver)
139 : fResolver_{resolver}
140 {
141 }
142 virtual InputSource* resolveEntity (XMLResourceIdentifier* resourceIdentifier) override
143 {
144 // @todo consider exposting this API outside the module, and/or providing option to wget missing namespaces, or have option for where to fetch from?
145 TraceContextBumper ctx{"Xerces::{}::MySchemaResolver_::resolveEntity"};
146 RequireNotNull (resourceIdentifier);
147
148 if (fResolver_ != nullptr) {
149 if (auto o = fResolver_.Lookup (Resource::Name{.fNamespace = xercesString2String (resourceIdentifier->getNameSpace ()),
150 .fPublicID = xercesString2String (resourceIdentifier->getPublicId ()),
151 .fSystemID = xercesString2String (resourceIdentifier->getSystemId ())})) {
152 return mkMemInputSrc_ (o->fData);
153 }
154 }
155 return nullptr;
156 }
157
158 private:
159 static InputSource* mkMemInputSrc_ (const Memory::BLOB& schemaData)
160 {
161 if (schemaData.empty ()) [[unlikely]] {
162 // not sure this is useful case? Should assert/throw?
163 return new MemBufInputSource{nullptr, 0, "", true};
164 }
165 else {
166 XMLByte* useBuf = new XMLByte[schemaData.GetSize ()];
167 memcpy (useBuf, schemaData.begin (), schemaData.GetSize ());
168 return new MemBufInputSource{useBuf, schemaData.GetSize (), "", true};
169 }
170 }
171 };
172}
173
174namespace {
175 void SetupCommonParserFeatures_ (SAX2XMLReader& reader)
176 {
177 reader.setFeature (XMLUni::fgSAX2CoreNameSpaces, true);
178 reader.setFeature (XMLUni::fgXercesDynamic, false);
179 reader.setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, false); // false: * *Do not report attributes used for Namespace declarations, and optionally do not report original prefixed names
180 }
181 void SetupCommonParserFeatures_ (SAX2XMLReader& reader, bool validatingWithSchema)
182 {
183 reader.setFeature (XMLUni::fgXercesSchema, validatingWithSchema);
184 reader.setFeature (XMLUni::fgSAX2CoreValidation, validatingWithSchema);
185
186 // The purpose of this maybe to find errors with the schema itself, in which case,
187 // we shouldn't bother (esp for release builds)
188 // (leave for now til we performance test)
189 // -- LGP 2007-06-21
190 reader.setFeature (XMLUni::fgXercesSchemaFullChecking, validatingWithSchema);
191 reader.setFeature (XMLUni::fgXercesUseCachedGrammarInParse, validatingWithSchema);
192 reader.setFeature (XMLUni::fgXercesIdentityConstraintChecking, validatingWithSchema);
193
194 // we only want to use loaded schemas - don't save any more into the grammar cache, since that
195 // is global/shared.
196 reader.setFeature (XMLUni::fgXercesCacheGrammarFromParse, false);
197 }
198}
199
200namespace {
201 struct Map2StroikaExceptionsErrorReporter_ : public XMLErrorReporter, public ErrorHandler {
202 public:
203 virtual void error ([[maybe_unused]] const unsigned int errCode, [[maybe_unused]] const XMLCh* const errDomain,
204 [[maybe_unused]] const ErrTypes type, const XMLCh* const errorText, [[maybe_unused]] const XMLCh* const systemId,
205 [[maybe_unused]] const XMLCh* const publicId, const XMLFileLoc lineNum, const XMLFileLoc colNum) override
206 {
207 Execution::Throw (BadFormatException{errorText, static_cast<unsigned int> (lineNum), static_cast<unsigned int> (colNum), 0});
208 }
209 virtual void resetErrors () override
210 {
211 }
212 virtual void warning ([[maybe_unused]] const SAXParseException& exc) override
213 {
214 // ignore
215 }
216 virtual void error (const SAXParseException& exc) override
217 {
218 Execution::Throw (BadFormatException{exc.getMessage (), static_cast<unsigned int> (exc.getLineNumber ()),
219 static_cast<unsigned int> (exc.getColumnNumber ()), 0});
220 }
221 virtual void fatalError (const SAXParseException& exc) override
222 {
223 Execution::Throw (BadFormatException{exc.getMessage (), static_cast<unsigned int> (exc.getLineNumber ()),
224 static_cast<unsigned int> (exc.getColumnNumber ()), 0});
225 }
226 };
227}
228
229namespace {
230 struct SchemaRep_ : IXercesSchemaRep {
231#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
232 static inline atomic<unsigned int> sLiveCnt{0};
233#endif
234 SchemaRep_ (const Streams::InputStream::Ptr<byte>& schemaData, const Resource::ResolverPtr& resolver)
235 : fTargetNamespace{}
236 , fResolver{resolver}
237 , fSchemaData{schemaData.ReadAll ()}
238 {
239 AssertNotNull (XMLPlatformUtils::fgMemoryManager);
240 XMLGrammarPoolImpl* grammarPool = new (XMLPlatformUtils::fgMemoryManager) XMLGrammarPoolImpl{XMLPlatformUtils::fgMemoryManager};
241 try {
242 Require (not fSchemaData.empty ());
243 MemBufInputSource mis{reinterpret_cast<const XMLByte*> (fSchemaData.begin ()), fSchemaData.GetSize (), u""};
244
245 MySchemaResolver_ mySchemaResolver{resolver};
246 // Directly construct SAX2XMLReaderImpl so we can use XMLEntityResolver - which passes along namespace (regular
247 // EntityResolve just passes systemID
248 // shared_ptr<SAX2XMLReader> reader = shared_ptr<SAX2XMLReader> (XMLReaderFactory::createXMLReader (XMLPlatformUtils::fgMemoryManager, grammarPool));
249 //
250 shared_ptr<SAX2XMLReaderImpl> reader = shared_ptr<SAX2XMLReaderImpl> (
251 new (XMLPlatformUtils::fgMemoryManager) SAX2XMLReaderImpl{XMLPlatformUtils::fgMemoryManager, grammarPool});
252 reader->setXMLEntityResolver (&mySchemaResolver);
253
254 SetupCommonParserFeatures_ (*reader, true);
255
256 // Reset fgXercesCacheGrammarFromParse to TRUE so we actually load the XSD here
257 reader->setFeature (XMLUni::fgXercesCacheGrammarFromParse, true);
258 reader->setErrorHandler (&fErrorReporter_);
259 xercesc::Grammar* g = reader->loadGrammar (mis, Grammar::SchemaGrammarType, true);
260 AssertNotNull (g);
261 const XMLCh* ts = g->getTargetNamespace ();
262 if (ts and *ts) {
263 fTargetNamespace = URI{xercesString2String (ts)};
264 }
265 }
266 catch (...) {
267 delete grammarPool;
269 }
270 fCachedGrammarPool = grammarPool;
271#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
272 ++sLiveCnt;
273#endif
274 }
275 SchemaRep_ (const SchemaRep_&) = delete;
276 virtual ~SchemaRep_ ()
277 {
278 delete fCachedGrammarPool;
279#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
280 Assert (sLiveCnt > 0);
281 --sLiveCnt;
282#endif
283 }
284 optional<URI> fTargetNamespace;
285 Resource::ResolverPtr fResolver;
286 Memory::BLOB fSchemaData;
287 xercesc::XMLGrammarPool* fCachedGrammarPool{nullptr};
288 Map2StroikaExceptionsErrorReporter_ fErrorReporter_;
289
290 virtual const Providers::ISchemaProvider* GetProvider () const override
291 {
292 return &XML::Providers::Xerces::kDefaultProvider;
293 }
294 virtual optional<URI> GetTargetNamespace () const override
295 {
296 return fTargetNamespace;
297 }
298 virtual Memory::BLOB GetData () override
299 {
300 return fSchemaData;
301 }
302 // not super useful, except if you want to clone
303 virtual Resource::ResolverPtr GetResolver () override
304 {
305 return fResolver;
306 }
307 virtual xercesc::XMLGrammarPool* GetCachedGrammarPool () override
308 {
309 return fCachedGrammarPool;
310 }
311 };
312}
313
314namespace {
315 class StdIStream_InputSource_ : public InputSource {
316 protected:
317 class StdIStream_InputStream : public XERCES_CPP_NAMESPACE_QUALIFIER BinInputStream {
318 public:
319 StdIStream_InputStream (InputStream::Ptr<byte> in)
320 : fSource{in}
321 {
322 }
323 ~StdIStream_InputStream () = default;
324
325 public:
326 virtual XMLFilePos curPos () const override
327 {
328 return fSource.GetOffset ();
329 }
330 virtual XMLSize_t readBytes (XMLByte* const toFill, const XMLSize_t maxToRead) override
331 {
332 return fSource.ReadBlocking (span{reinterpret_cast<byte*> (toFill), maxToRead}).size ();
333 }
334 virtual const XMLCh* getContentType () const override
335 {
336 return nullptr;
337 }
338
339 protected:
341 };
342
343 public:
344 StdIStream_InputSource_ (InputStream::Ptr<byte> in, const XMLCh* const bufId = nullptr)
345 : InputSource{bufId}
346 , fSource{in}
347 {
348 }
349 virtual BinInputStream* makeStream () const override
350 {
351 return new (getMemoryManager ()) StdIStream_InputStream{fSource};
352 }
353
354 protected:
356 };
357}
358
359namespace {
360 class SAX2PrintHandlers_ : public DefaultHandler {
361 private:
363
364 public:
365 SAX2PrintHandlers_ (StructuredStreamEvents::IConsumer* callback)
366 : fCallback_{callback}
367 {
368 }
369
370 public:
371 virtual void startDocument () override
372 {
373 if (fCallback_ != nullptr) {
374 fCallback_->StartDocument ();
375 }
376 }
377 virtual void endDocument () override
378 {
379 if (fCallback_ != nullptr) {
380 fCallback_->EndDocument ();
381 }
382 }
383 virtual void startElement (const XMLCh* const uri, const XMLCh* const localName, const XMLCh* const /*qname*/, const Attributes& attributes) override
384 {
385 Require (uri != nullptr);
386 Require (localName != nullptr);
387 if (fCallback_ != nullptr) {
388 using Name = StructuredStreamEvents::Name;
389 Mapping<Name, String> useAttrs;
390 size_t attributesLen = attributes.getLength ();
391 for (XMLSize_t i = 0; i < attributesLen; ++i) {
392 Name attrName{xercesString2String (attributes.getURI (i)), xercesString2String (attributes.getLocalName (i)), Name::eAttribute};
393 useAttrs.Add (attrName, xercesString2String (attributes.getValue (i)));
394 }
395 fCallback_->StartElement (Name{xercesString2String (uri), xercesString2String (localName)}, useAttrs);
396 }
397 }
398 virtual void endElement (const XMLCh* const uri, const XMLCh* const localName, [[maybe_unused]] const XMLCh* const qname) override
399 {
400 Require (uri != nullptr);
401 Require (localName != nullptr);
402 Require (qname != nullptr);
403 if (fCallback_ != nullptr) {
404 fCallback_->EndElement (StructuredStreamEvents::Name{xercesString2String (uri), xercesString2String (localName)});
405 }
406 }
407 virtual void characters (const XMLCh* const chars, const XMLSize_t length) override
408 {
409 Require (chars != nullptr);
410 Require (length != 0);
411 if (fCallback_ != nullptr) {
412 fCallback_->TextInsideElement (xercesString2String (chars, chars + length));
413 }
414 }
415 };
416}
417
418namespace {
419 /*
420 * Short lifetime. Don't save these iterator objects. Just use them to enumerate a collection and then let them
421 * go. They (could) become invalid after a call to update the database.
422 */
423 class SubNodeIterator_ {
424 public:
425 class Rep {
426 public:
427 Rep () = default;
428 virtual ~Rep () = default;
429
430 public:
431 virtual bool IsAtEnd () const = 0;
432 virtual void Next () = 0;
433 virtual Node::Ptr Current () const = 0;
434 virtual size_t GetLength () const = 0;
435 };
436 explicit SubNodeIterator_ (const shared_ptr<Rep>& from)
437 : fRep{from}
438 {
439 }
440
441 public:
442 nonvirtual bool NotDone () const
443 {
444 return not fRep->IsAtEnd ();
445 }
446 nonvirtual bool IsAtEnd () const
447 {
448 return fRep->IsAtEnd ();
449 }
450 nonvirtual void Next ()
451 {
452 fRep->Next ();
453 }
454 nonvirtual Node::Ptr Current () const
455 {
456 return fRep->Current ();
457 }
458 nonvirtual size_t GetLength () const
459 {
460 return fRep->GetLength ();
461 }
462 nonvirtual void operator++ ()
463 {
464 Next ();
465 }
466 nonvirtual void operator++ (int)
467 {
468 Next ();
469 }
470 nonvirtual Node::Ptr operator* () const
471 {
472 return Current ();
473 }
474
475 protected:
476 shared_ptr<Rep> fRep;
477 };
478}
479
480namespace {
481 template <class TYPE>
482 class AutoRelease_ {
483 public:
484 AutoRelease_ (TYPE* p)
485 : p_ (p)
486 {
487 }
488 AutoRelease_ (const AutoRelease_<TYPE>&) = delete;
489 AutoRelease_<TYPE>& operator= (const AutoRelease_<TYPE>&) = delete;
490 ~AutoRelease_ ()
491 {
492 if (p_ != 0)
493 p_->release ();
494 }
495
496 TYPE& operator* () const
497 {
498 return *p_;
499 }
500 TYPE* operator->() const
501 {
502 return p_;
503 }
504 operator TYPE* () const
505 {
506 return p_;
507 }
508 TYPE* get () const
509 {
510 return p_;
511 }
512 TYPE* adopt ()
513 {
514 TYPE* tmp = p_;
515 p_ = 0;
516 return tmp;
517 }
518 TYPE* swap (TYPE* p)
519 {
520 TYPE* tmp = p_;
521 p_ = p;
522 return tmp;
523 }
524 void set (TYPE* p)
525 {
526 if (p_ != 0)
527 p_->release ();
528 p_ = p;
529 }
530
531 private:
532 TYPE* p_;
533 };
534}
535
536namespace {
537 DOMImplementation& GetDOMIMPL_ ()
538 {
539 static constexpr XMLCh kDOMImplFeatureDeclaration_[] = u"Core";
540 // safe to save in a static var? -- LGP 2007-05-20
541 // from perusing implementation - this appears safe to cache and re-use in differnt threads
542 static DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation (kDOMImplFeatureDeclaration_);
543 AssertNotNull (impl);
544 return *impl;
545 }
546
547 constexpr bool qDumpXMLOnValidationError_ = qStroika_Foundation_Debug_AssertionsChecked;
548
549#if qStroika_Foundation_Debug_AssertionsChecked
550 bool ValidNewNodeName_ (const String& n)
551 {
552 if (n.empty ()) {
553 return false;
554 }
555 if (n.find (':') != wstring::npos) { // if triggered, you probably used XPath as arg for CreateElement call!!!
556 return false;
557 }
558 return true;
559 }
560#endif
561}
562
563namespace {
564 void DoWrite2Stream_ (xercesc::DOMNode* node2Write, const Streams::OutputStream::Ptr<byte>& to, const SerializationOptions& options)
565 {
566 AutoRelease_<DOMLSOutput> theOutputDesc = GetDOMIMPL_ ().createLSOutput ();
567 theOutputDesc->setEncoding (XMLUni::fgUTF8EncodingString);
568 AutoRelease_<DOMLSSerializer> writer = GetDOMIMPL_ ().createLSSerializer ();
569 DOMConfiguration* dc = writer->getDomConfig ();
570 dc->setParameter (XMLUni::fgDOMWRTFormatPrettyPrint, options.fPrettyPrint);
571 dc->setParameter (XMLUni::fgDOMWRTBOM, true);
572 class myOutputter : public XMLFormatTarget {
573 public:
575 myOutputter (const Streams::OutputStream::Ptr<byte>& to)
576 : fOut{to}
577 {
578 }
579 virtual void writeChars (const XMLByte* const toWrite, const XMLSize_t count, [[maybe_unused]] XMLFormatter* const formatter) override
580 {
581 fOut.Write (span<const byte>{reinterpret_cast<const byte*> (toWrite), count});
582 }
583 virtual void flush () override
584 {
585 fOut.Flush ();
586 }
587 };
588 myOutputter dest{to};
589 theOutputDesc->setByteStream (&dest);
590 writer->write (node2Write, theOutputDesc);
591 }
592 // Currently unused but maybe needed again if we support 'moving' nodes from one doc to another
593 DOMNode* RecursivelySetNamespace_ (DOMNode* n, const XMLCh* namespaceURI)
594 {
595 RequireNotNull (n);
596 // namespaceURI CAN be nullptr
597 switch (n->getNodeType ()) {
598 case DOMNode::ELEMENT_NODE: {
599 xercesc::DOMDocument* doc = n->getOwnerDocument ();
600 AssertNotNull (doc);
601 n = doc->renameNode (n, namespaceURI, n->getNodeName ());
602 ThrowIfNull (n);
603 for (DOMNode* child = n->getFirstChild (); child != nullptr; child = child->getNextSibling ()) {
604 child = RecursivelySetNamespace_ (child, namespaceURI);
605 }
606 } break;
607 }
608 return n;
609 }
610 constexpr XMLCh* kXerces2XMLDBDocumentKey_ = nullptr; // just a unique key to lookup our doc object from the xerces doc object.
611 // Could use real str, then xerces does strcmp() - but this appears slightly faster
612 // so long as no conflict....
613 String GetTextForDOMNode_ (const DOMNode* node)
614 {
615 RequireNotNull (node);
616 if (node->getNodeType () == DOMNode::COMMENT_NODE) {
617 // The below hack doesn't seem to work for comment nodes - at least in one case - they had
618 // no children nodes so just returned empty string...
619 //
620 // The above comemnt about comment-nodes not working is old, and undated. Revisit this at some point. Though this backup mode isn't so bad.
621 goto BackupMode;
622 }
623
624 {
625 // try quick impl if all children are textnodes
626 //
627 // This trick is from:
628 // http://www.codesynthesis.com/~boris/blog/category/xerces-c/
629 // 2007-06-13, because default impl appears to allocate strings and never frees (til owning doc freed)
630 // for getTextContent calls...
631 //
632 using xercesc::DOMNode;
633 using xercesc::DOMText;
635 for (DOMNode* n = node->getFirstChild (); n != nullptr; n = n->getNextSibling ()) {
636 switch (n->getNodeType ()) {
637 case DOMNode::TEXT_NODE:
638 case DOMNode::CDATA_SECTION_NODE: {
639 DOMText* t (static_cast<DOMText*> (n));
640 // Note - we don't do the usual 'reserve' trick here because the string is generally made up of a single text node
641 r += t->getData ();
642 break;
643 }
644 case DOMNode::ELEMENT_NODE: {
645 goto BackupMode;
646 }
647 }
648 }
649 return r.str ();
650 }
651 BackupMode:
652 /*
653 * Note that this is SOMETHING OF A (temporary) memory leak. Xerces does free the memory when the document is freed.
654 */
655 DbgTrace ("WARNING: GetTextForDOMNode_::BackupMode used"_f);
656 return node->getTextContent ();
657 }
658
659 Node::Ptr WrapXercesNodeInStroikaNode_ (DOMNode* n);
660 Element::Ptr WrapXercesNodeInStroikaNode_ (DOMElement* n);
661}
662
663namespace {
664 class SubNodeIteratorOver_SiblingList_Rep_ : public SubNodeIterator_::Rep,
665 Memory::UseBlockAllocationIfAppropriate<SubNodeIteratorOver_SiblingList_Rep_> {
666 public:
667 // Called iterates over CHILDREN of given parentNode
668 SubNodeIteratorOver_SiblingList_Rep_ (DOMNode* nodeParent)
669 : fParentNode{nodeParent}
670 , fCachedMainListLen{static_cast<size_t> (-1)}
671 {
672 RequireNotNull (nodeParent);
673 START_LIB_EXCEPTION_MAPPER_
674 {
675 fCurNode_ = nodeParent->getFirstChild ();
676 }
677 END_LIB_EXCEPTION_MAPPER_
678 }
679 virtual bool IsAtEnd () const override
680 {
681 return fCurNode_ == nullptr;
682 }
683 virtual void Next () override
684 {
685 Require (not IsAtEnd ());
686 AssertNotNull (fCurNode_);
687 START_LIB_EXCEPTION_MAPPER_
688 {
689 fCurNode_ = fCurNode_->getNextSibling ();
690 }
691 END_LIB_EXCEPTION_MAPPER_
692 }
693 virtual Node::Ptr Current () const override
694 {
695 return WrapXercesNodeInStroikaNode_ (fCurNode_);
696 }
697 virtual size_t GetLength () const override
698 {
699 if (fCachedMainListLen == static_cast<size_t> (-1)) {
700 size_t n = 0;
701 START_LIB_EXCEPTION_MAPPER_
702 {
703 for (DOMNode* i = fParentNode->getFirstChild (); i != nullptr; (i = i->getNextSibling ()), ++n)
704 ;
705 }
706 END_LIB_EXCEPTION_MAPPER_
707 fCachedMainListLen = n;
708 }
709 return fCachedMainListLen;
710 }
711
712 private:
713 DOMNode* fParentNode{nullptr};
714 DOMNode* fCurNode_{nullptr};
715 mutable size_t fCachedMainListLen{};
716 };
717}
718
719namespace {
720 struct NodeRep_ : IXercesNodeRep, Memory::UseBlockAllocationIfAppropriate<NodeRep_> {
721 NodeRep_ (DOMNode* n)
722 : fNode_{n}
723 {
724 RequireNotNull (n);
725 }
726 virtual const Providers::IDOMProvider* GetProvider () const override
727 {
728 return &Providers::Xerces::kDefaultProvider;
729 }
730 virtual bool Equals (const IRep* rhs) const override
731 {
732 RequireNotNull (fNode_);
733 RequireNotNull (rhs);
734 return fNode_ == dynamic_cast<const NodeRep_*> (rhs)->fNode_;
735 }
736 virtual Node::Type GetNodeType () const override
737 {
738 AssertNotNull (fNode_);
739 START_LIB_EXCEPTION_MAPPER_
740 {
741 switch (fNode_->getNodeType ()) {
742 case DOMNode::ELEMENT_NODE:
743 return Node::eElementNT;
744 case DOMNode::ATTRIBUTE_NODE:
745 return Node::eAttributeNT;
746 case DOMNode::TEXT_NODE:
747 return Node::eTextNT;
748 case DOMNode::COMMENT_NODE:
749 return Node::eCommentNT;
750 default:
751 return Node::eOtherNT;
752 }
753 }
754 END_LIB_EXCEPTION_MAPPER_
755 }
756 virtual NameWithNamespace GetName () const override
757 {
758 AssertNotNull (fNode_);
759 Require (GetNodeType () == Node::eElementNT or GetNodeType () == Node::eAttributeNT);
760 START_LIB_EXCEPTION_MAPPER_
761 {
762 AssertNotNull (fNode_->getNodeName ());
763 const XMLCh* n = fNode_->getNamespaceURI ();
764 return NameWithNamespace{n == nullptr ? optional<URI>{} : URI{xercesString2String (n)}, fNode_->getNodeName ()};
765 }
766 END_LIB_EXCEPTION_MAPPER_
767 }
768 virtual void SetName (const NameWithNamespace& name) override
769 {
770 AssertNotNull (fNode_);
771#if qStroika_Foundation_Debug_AssertionsChecked
772 Require (ValidNewNodeName_ (name.fName));
773#endif
774 START_LIB_EXCEPTION_MAPPER_
775 {
776 xercesc::DOMDocument* doc = fNode_->getOwnerDocument ();
777 AssertNotNull (doc);
778 fNode_ = doc->renameNode (
779 fNode_, name.fNamespace == nullopt ? nullptr : name.fNamespace->As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str (),
780 name.fName.As<u16string> ().c_str ());
781 AssertNotNull (fNode_);
782 }
783 END_LIB_EXCEPTION_MAPPER_
784 }
785 virtual String GetValue () const override
786 {
787 AssertNotNull (fNode_);
788 START_LIB_EXCEPTION_MAPPER_
789 {
790 return GetTextForDOMNode_ (fNode_);
791 }
792 END_LIB_EXCEPTION_MAPPER_
793 }
794 virtual void SetValue (const String& v) override
795 {
796 AssertNotNull (fNode_);
797 START_LIB_EXCEPTION_MAPPER_
798 {
799 fNode_->setTextContent (v.empty () ? nullptr : v.As<u16string> ().c_str ());
800 }
801 END_LIB_EXCEPTION_MAPPER_
802 }
803 virtual void DeleteNode () override
804 {
805 START_LIB_EXCEPTION_MAPPER_
806 {
807 DOMNode* selNode = fNode_;
808 ThrowIfNull (selNode);
809 DOMNode* parentNode = selNode->getParentNode ();
810 if (parentNode == nullptr) {
811 // This happens if the selected node is an attribute
812 if (fNode_ != nullptr) {
813 const XMLCh* ln = selNode->getNodeName ();
814 AssertNotNull (ln);
815 DOMElement* de = dynamic_cast<DOMElement*> (fNode_);
816 de->removeAttribute (ln);
817 }
818 }
819 else {
820 (void)parentNode->removeChild (selNode);
821 }
822 }
823 END_LIB_EXCEPTION_MAPPER_
824 }
825 virtual Node::Ptr GetParentNode () const override
826 {
827 AssertNotNull (fNode_);
828 START_LIB_EXCEPTION_MAPPER_
829 {
830 auto p = fNode_->getParentNode ();
831 return p == nullptr ? nullptr : WrapXercesNodeInStroikaNode_ (p);
832 }
833 END_LIB_EXCEPTION_MAPPER_
834 }
835 virtual void Write (const Streams::OutputStream::Ptr<byte>& to, const SerializationOptions& options) const override
836 {
837 START_LIB_EXCEPTION_MAPPER_
838 {
839 DoWrite2Stream_ (fNode_, to, options);
840 }
841 END_LIB_EXCEPTION_MAPPER_
842 }
843 virtual xercesc::DOMNode* GetInternalTRep () override
844 {
845 return fNode_;
846 }
847 // must carefully think out mem managment here - cuz not ref counted - around as long as owning doc...
848 DOMNode* fNode_;
849 };
850}
851
852namespace {
853 DISABLE_COMPILER_MSC_WARNING_START (4250) // inherits via dominance warning
854 struct ElementRep_ : Element::IRep, Memory::InheritAndUseBlockAllocationIfAppropriate<ElementRep_, NodeRep_> {
856 ElementRep_ (DOMNode* n)
857 : inherited{n}
858 {
859 RequireNotNull (n);
860 Require (n->getNodeType () == DOMNode::ELEMENT_NODE);
861 }
862 virtual Node::Type GetNodeType () const override
863 {
864 AssertNotNull (fNode_);
865 Assert (fNode_->getNodeType () == DOMNode::ELEMENT_NODE);
866 return Node::eElementNT;
867 }
868 virtual optional<String> GetAttribute (const NameWithNamespace& attrName) const override
869 {
870 AssertNotNull (fNode_);
871 START_LIB_EXCEPTION_MAPPER_
872 {
873 if (fNode_->getNodeType () == DOMNode::ELEMENT_NODE) {
874 DOMElement* elt = Debug::UncheckedDynamicCast<DOMElement*> (fNode_);
875 const XMLCh* s = attrName.fNamespace
876 ? elt->getAttributeNS (attrName.fNamespace->As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str (),
877 attrName.fName.As<u16string> ().c_str ())
878 : elt->getAttribute (attrName.fName.As<u16string> ().c_str ());
879 AssertNotNull (s);
880 if (*s != '\0') {
881 return s;
882 }
883 }
884 return nullopt;
885 }
886 END_LIB_EXCEPTION_MAPPER_
887 }
888 virtual void SetAttribute (const NameWithNamespace& attrName, const optional<String>& v) override
889 {
890 Require (GetNodeType () == Node::eElementNT);
891 AssertNotNull (fNode_);
892 START_LIB_EXCEPTION_MAPPER_
893 {
894 DOMElement* element = dynamic_cast<DOMElement*> (fNode_);
895 ThrowIfNull (element);
896 /*
897 * SINCE STROIKA V3.0d5 - if no namespace given on attrName, we use nullptr as the namespace (not do any inheritance)
898 * attributes usually have no namespace!!!!
899 */
900 if (v) {
901 /*
902 * For reasons that elude maybe (maybe because it was standard for XML early on)
903 * all my attributes are free of namespaces. So why use setAttributeNS? Because otherwise
904 * the XQilla code fails to match on the attribute names at all in its XPath stuff.
905 * Considered copying the namespace from the parent element (fNode_->getNamespaceURI()),
906 * but XQilla didnt like that either (maybe then I needed M: on xpath).
907 * A different subclass object of DOMAttrNode is created - one that doesnt have a getLocalName,
908 * or something like that. Anyhow - this appears to do the right thing for now...
909 * -- LGP 2007-06-13
910 *
911 * Not using XQilla anymore. And may have been another misunderstanding anyhow. So have attributes
912 * inherit the namespace of the element they are from, by default --LGP 2024-01-09
913 *
914 * MAYBE related to http://stroika-bugs.sophists.com/browse/STK-999 - diff symptoms but similar workaround
915 */
916 element->setAttributeNS (attrName.fNamespace ? attrName.fNamespace->As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str () : nullptr,
917 attrName.fName.As<u16string> ().c_str (), v->As<u16string> ().c_str ());
918 }
919 else {
920 element->removeAttributeNS (
921 attrName.fNamespace ? attrName.fNamespace->As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str () : nullptr,
922 attrName.fName.As<u16string> ().c_str ());
923 }
924 }
925 END_LIB_EXCEPTION_MAPPER_
926 }
927 virtual Element::Ptr InsertElement (const NameWithNamespace& eltName, const Element::Ptr& afterNode) override
928 {
929#if qStroika_Foundation_Debug_AssertionsChecked
930 Require (ValidNewNodeName_ (eltName.fName));
931#endif
932 START_LIB_EXCEPTION_MAPPER_
933 {
934 xercesc::DOMDocument* doc = fNode_->getOwnerDocument ();
935 // unsure if we should use smartpointer here - thinkout xerces & smart ptrs & mem management
936 // unclear if we should set namespace to existing node namespace or nullptr here?
937 DOMNode* child = doc->createElementNS ((eltName.fNamespace == nullopt)
938 ? fNode_->getNamespaceURI ()
939 : eltName.fNamespace->As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str (),
940 eltName.fName.As<u16string> ().c_str ());
941 DOMNode* refChildNode = nullptr;
942 if (afterNode == nullptr) {
943 // this means PREPEND.
944 // If there is a first element, then insert before it. If no elements, then append is the same thing.
945 refChildNode = fNode_->getFirstChild ();
946 }
947 else {
948 refChildNode = dynamic_cast<NodeRep_&> (*afterNode.GetRep ()).GetInternalTRep ()->getNextSibling ();
949 }
950 DOMNode* childx = fNode_->insertBefore (child, refChildNode);
951 ThrowIfNull (childx);
952 return WrapXercesNodeInStroikaNode_ (childx);
953 }
954 END_LIB_EXCEPTION_MAPPER_
955 }
956 virtual Element::Ptr AppendElement (const NameWithNamespace& eltName) override
957 {
958#if qStroika_Foundation_Debug_AssertionsChecked
959 Require (ValidNewNodeName_ (eltName.fName));
960#endif
961 START_LIB_EXCEPTION_MAPPER_
962 {
963 xercesc::DOMDocument* doc = fNode_->getOwnerDocument ();
964 DOMNode* child{};
965 if (eltName.fNamespace) {
966 u16string namespaceURI = eltName.fNamespace->As<String> (kUseURIEncodingFlag_).As<u16string> ();
967 child = doc->createElementNS (namespaceURI.c_str (), eltName.fName.As<u16string> ().c_str ());
968 }
969 else {
970 const XMLCh* namespaceURI = fNode_->getNamespaceURI (); //? or should be null?
971 child = doc->createElementNS (namespaceURI, eltName.fName.As<u16string> ().c_str ());
972 }
973 DOMNode* childx = fNode_->appendChild (child);
974 ThrowIfNull (childx);
975 return WrapXercesNodeInStroikaNode_ (childx);
976 }
977 END_LIB_EXCEPTION_MAPPER_
978 }
979 virtual Iterable<Node::Ptr> GetChildren () const override
980 {
981 AssertNotNull (fNode_);
982 START_LIB_EXCEPTION_MAPPER_
983 {
984 return Traversal::CreateGenerator<Node::Ptr> (
985 [sni = SubNodeIterator_{Memory::MakeSharedPtr<SubNodeIteratorOver_SiblingList_Rep_> (fNode_)}] () mutable -> optional<Node::Ptr> {
986 if (sni.IsAtEnd ()) {
987 return optional<Node::Ptr>{};
988 }
989 Node::Ptr r = *sni;
990 ++sni;
991 return r;
992 });
993 }
994 END_LIB_EXCEPTION_MAPPER_
995 }
996 struct XPathQueryHelper_ {
997 optional<AutoRelease_<DOMXPathNSResolver>> resolver;
998 DOMXPathResult::ResultType rt{};
999 optional<AutoRelease_<DOMXPathExpression>> expr;
1000 XPathQueryHelper_ (DOMNode* n, const XPath::Expression& e, bool firstOnly)
1001 {
1002 xercesc::DOMDocument* doc = n->getOwnerDocument ();
1003 resolver.emplace (doc->createNSResolver (nullptr));
1004 auto namespaceDefs = e.GetOptions ().fNamespaces;
1005 if (namespaceDefs.GetDefaultNamespace ()) {
1006 (*resolver)->addNamespaceBinding (
1007 u"", namespaceDefs.GetDefaultNamespace ()->As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str ());
1008 }
1009 for (Common::KeyValuePair ni : namespaceDefs.GetPrefixedNamespaces ()) {
1010 (*resolver)->addNamespaceBinding (ni.fKey.As<u16string> ().c_str (),
1011 ni.fValue.As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str ());
1012 }
1013 try {
1014 expr.emplace (doc->createExpression (e.GetExpression ().As<u16string> ().c_str (), *resolver));
1015 }
1016 catch (const xercesc::DOMXPathException&) {
1017 // MANY basic things are not supported in Xerces XPath - like a[1] - brackets not supported.
1018 Execution::Throw (XPath::XPathExpressionNotSupported::kThe);
1019 }
1020 switch (e.GetOptions ().fResultTypeIndex.value_or (DOMXPathResult::ANY_TYPE)) {
1021 case XPath::ResultTypeIndex_v<Node::Ptr>: {
1022 auto o = e.GetOptions ();
1023 if (firstOnly) {
1024 rt = e.GetOptions ().fOrdered ? DOMXPathResult::FIRST_ORDERED_NODE_TYPE : DOMXPathResult::ANY_UNORDERED_NODE_TYPE;
1025 }
1026 else if (o.fSnapshot) {
1027 rt = e.GetOptions ().fOrdered ? DOMXPathResult::ORDERED_NODE_SNAPSHOT_TYPE : DOMXPathResult::UNORDERED_NODE_SNAPSHOT_TYPE;
1028 }
1029 else {
1030 // Would make sense given docs, but appears unsupported by Xerces... --LGP 2024-01-12
1031 // rt = e.GetOptions ().fOrdered ? DOMXPathResult::ORDERED_NODE_ITERATOR_TYPE : DOMXPathResult::UNORDERED_NODE_ITERATOR_TYPE;
1032 rt = e.GetOptions ().fOrdered ? DOMXPathResult::ORDERED_NODE_SNAPSHOT_TYPE : DOMXPathResult::UNORDERED_NODE_SNAPSHOT_TYPE;
1033 }
1034 } break;
1035 default:
1037 }
1038 }
1039 static optional<XPath::Result> ToResult_ (const xercesc::DOMXPathResult* r)
1040 {
1041 RequireNotNull (r);
1042 switch (r->getResultType ()) {
1043 case DOMXPathResult::NUMBER_TYPE:
1044 return XPath::Result{r->getNumberValue ()};
1045 case DOMXPathResult::BOOLEAN_TYPE:
1046 return XPath::Result{r->getBooleanValue ()};
1047 case DOMXPathResult::STRING_TYPE:
1048 return XPath::Result{xercesString2String (r->getStringValue ())};
1049 case DOMXPathResult::ANY_UNORDERED_NODE_TYPE:
1050 case DOMXPathResult::FIRST_ORDERED_NODE_TYPE:
1051 case DOMXPathResult::UNORDERED_NODE_ITERATOR_TYPE:
1052 case DOMXPathResult::ORDERED_NODE_ITERATOR_TYPE:
1053 case DOMXPathResult::UNORDERED_NODE_SNAPSHOT_TYPE:
1054 case DOMXPathResult::ORDERED_NODE_SNAPSHOT_TYPE: {
1055 auto n = r->getNodeValue ();
1056 return n == nullptr ? optional<XPath::Result>{} : XPath::Result{Node::Ptr{WrapXercesNodeInStroikaNode_ (n)}};
1057 }
1058 default:
1060 }
1061 return nullopt;
1062 }
1063 };
1064 virtual optional<XPath::Result> LookupOne (const XPath::Expression& e) override
1065 {
1066 if constexpr (false) {
1067 // quickie impl just to test Lookup Code
1068 for (auto i : Lookup (e)) {
1069 return i;
1070 }
1071 return nullopt;
1072 }
1073 START_LIB_EXCEPTION_MAPPER_
1074 {
1075 XPathQueryHelper_ xpHelp{fNode_, e, true};
1076 AutoRelease_<xercesc::DOMXPathResult> r = (*xpHelp.expr)->evaluate (fNode_, xpHelp.rt, nullptr);
1077 return XPathQueryHelper_::ToResult_ (r);
1078 }
1079 END_LIB_EXCEPTION_MAPPER_
1080 }
1081 virtual Traversal::Iterable<XPath::Result> Lookup (const XPath::Expression& e) override
1082 {
1083 if (e.GetOptions ().fSnapshot) {
1084 XPath::Expression::Options e2o = e.GetOptions ();
1085 e2o.fSnapshot = false;
1086 return Sequence<XPath::Result>{this->Lookup (XPath::Expression{e.GetExpression (), e2o})};
1087 }
1088 shared_ptr<XPathQueryHelper_> xpHelp = make_shared<XPathQueryHelper_> (fNode_, e, false);
1089 shared_ptr<AutoRelease_<xercesc::DOMXPathResult>> r =
1090 make_shared<AutoRelease_<xercesc::DOMXPathResult>> ((*xpHelp->expr)->evaluate (fNode_, xpHelp->rt, nullptr));
1091 Assert (not e.GetOptions ().fSnapshot);
1092 if (xpHelp->rt == DOMXPathResult::UNORDERED_NODE_ITERATOR_TYPE or xpHelp->rt == DOMXPathResult::ORDERED_NODE_ITERATOR_TYPE) [[unlikely]] {
1093 return Traversal::CreateGenerator<XPath::Result> ([xpHelp, r, firstTime = true] () mutable -> optional<XPath::Result> {
1094 if (firstTime) {
1095 firstTime = false;
1096 return XPathQueryHelper_::ToResult_ (*r);
1097 }
1098 if ((*r)->iterateNext () == false) {
1099 return nullopt;
1100 }
1101 return XPathQueryHelper_::ToResult_ (*r);
1102 });
1103 }
1104 if (xpHelp->rt == DOMXPathResult::UNORDERED_NODE_SNAPSHOT_TYPE or xpHelp->rt == DOMXPathResult::ORDERED_NODE_SNAPSHOT_TYPE) [[likely]] {
1105 return Traversal::CreateGenerator<XPath::Result> ([xpHelp, r, snapIdx = 0] () mutable -> optional<XPath::Result> {
1106 if (not(*r)->snapshotItem (snapIdx)) {
1107 return nullopt;
1108 }
1109 ++snapIdx;
1110 return XPathQueryHelper_::ToResult_ (*r);
1111 });
1112 }
1115 }
1116 virtual Element::Ptr GetChildElementByID (const String& id) const override
1117 {
1118 AssertNotNull (fNode_);
1119 START_LIB_EXCEPTION_MAPPER_
1120 {
1121 for (DOMNode* i = fNode_->getFirstChild (); i != nullptr; i = i->getNextSibling ()) {
1122 if (i->getNodeType () == DOMNode::ELEMENT_NODE) {
1123 DOMElement* elt = Debug::UncheckedDynamicCast<DOMElement*> (i);
1124 const XMLCh* s = elt->getAttribute (u"id");
1125 AssertNotNull (s);
1126 if (CString::Equals (s, id.As<u16string> ().c_str ())) {
1127 return WrapXercesNodeInStroikaNode_ (elt);
1128 }
1129 }
1130 }
1131 return Element::Ptr{nullptr};
1132 }
1133 END_LIB_EXCEPTION_MAPPER_
1134 }
1135 };
1136 DISABLE_COMPILER_MSC_WARNING_END (4250) // inherits via dominance warning
1137}
1138
1139namespace {
1140 inline void MakeXMLDoc_ (shared_ptr<xercesc::DOMDocument>& newXMLDoc)
1141 {
1142 Require (newXMLDoc == nullptr);
1143 newXMLDoc = shared_ptr<xercesc::DOMDocument> (GetDOMIMPL_ ().createDocument (0, nullptr, 0));
1144 newXMLDoc->setXmlStandalone (true);
1145 }
1146}
1147
1148namespace {
1149 class MyMaybeSchemaDOMParser_ {
1150 public:
1151 Map2StroikaExceptionsErrorReporter_ myErrReporter;
1152 shared_ptr<XercesDOMParser> fParser;
1153 Schema::Ptr fSchema{nullptr};
1154
1155 MyMaybeSchemaDOMParser_ () = delete;
1156 MyMaybeSchemaDOMParser_ (const MyMaybeSchemaDOMParser_&) = delete;
1157 MyMaybeSchemaDOMParser_ (const Schema::Ptr& schema)
1158 : fSchema{schema}
1159 {
1160 shared_ptr<IXercesSchemaRep> accessSchema = dynamic_pointer_cast<IXercesSchemaRep> (schema.GetRep ());
1161 if (accessSchema != nullptr) {
1162 fParser = Memory::MakeSharedPtr<XercesDOMParser> (nullptr, XMLPlatformUtils::fgMemoryManager, accessSchema->GetCachedGrammarPool ());
1163 fParser->cacheGrammarFromParse (false);
1164 fParser->useCachedGrammarInParse (true);
1165 fParser->setDoSchema (true);
1166 fParser->setValidationScheme (AbstractDOMParser::Val_Always);
1167 fParser->setValidationSchemaFullChecking (true);
1168 fParser->setIdentityConstraintChecking (true);
1169 }
1170 else {
1171 fParser = Memory::MakeSharedPtr<XercesDOMParser> ();
1172 }
1173 fParser->setDoNamespaces (true);
1174 fParser->setErrorHandler (&myErrReporter);
1175
1176 // @todo make load-external DTD OPTION specified in NEW for document!!! - parser! --LGP 2023-12-16
1177
1178 // LGP added 2009-09-07 - so must test carefully!
1179 {
1180 // I THINK this prevents loading URL-based DTDs - like the one refered to in http://demo.healthframeowrks.com/ when I load the xhmtl as xml
1181 // (it tkaes forever)
1182 fParser->setLoadExternalDTD (false);
1183 // I THINK this prevents loading URL-based schemas (not sure if/how that would have ever happened so I'm not sure that
1184 // this is for) - guessing a bit...
1185 // -- LGP 2009-09-04
1186 fParser->setLoadSchema (false);
1187 }
1188 }
1189 };
1190}
1191
1192namespace {
1193 Element::Ptr WrapXercesNodeInStroikaNode_ (DOMElement* n);
1194}
1195namespace {
1196 struct DocRep_ : DataExchange::XML::DOM::Document::IRep {
1197#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
1198 static inline atomic<unsigned int> sLiveCnt{0};
1199#endif
1200 //
1201 // If this function is passed a nullptr exceptionResult - it will throw on bad validation.
1202 // If it is passed a non-nullptr exceptionResult - then it will map BadFormatException to being ignored, but filling in this
1203 // parameter with the exception details. This is used to allow 'advisory' read xsd validation failure, without actually fully
1204 // failing the read (for http://bugzilla/show_bug.cgi?id=513).
1205 //
1206 DocRep_ (const Streams::InputStream::Ptr<byte>& in, const Schema::Ptr& schema)
1207 {
1208 [[maybe_unused]] int ignoreMe = 0; // workaround quirk in clang-format
1209 START_LIB_EXCEPTION_MAPPER_
1210 {
1211 MakeXMLDoc_ (fXMLDoc);
1212 fXMLDoc->setUserData (kXerces2XMLDBDocumentKey_, this, nullptr);
1213 if (in != nullptr) {
1214 MyMaybeSchemaDOMParser_ myDOMParser{schema};
1215 myDOMParser.fParser->parse (StdIStream_InputSource_{in, u"XMLDB"});
1216 fXMLDoc.reset ();
1217 fXMLDoc = shared_ptr<xercesc::DOMDocument>{myDOMParser.fParser->adoptDocument ()};
1218 fXMLDoc->setXmlStandalone (true);
1219 fXMLDoc->setUserData (kXerces2XMLDBDocumentKey_, this, nullptr);
1220 }
1221 }
1222 END_LIB_EXCEPTION_MAPPER_
1223#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
1224 ++sLiveCnt;
1225#endif
1226 }
1227 DocRep_ (const DocRep_& from)
1228 {
1229 START_LIB_EXCEPTION_MAPPER_
1230 {
1231 fXMLDoc = shared_ptr<xercesc::DOMDocument> (dynamic_cast<xercesc::DOMDocument*> (from.fXMLDoc->cloneNode (true)));
1232 fXMLDoc->setXmlStandalone (true);
1233 fXMLDoc->setUserData (kXerces2XMLDBDocumentKey_, this, nullptr);
1234 }
1235 END_LIB_EXCEPTION_MAPPER_
1236 EnsureNotNull (fXMLDoc);
1237#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
1238 ++sLiveCnt;
1239#endif
1240 }
1241 virtual ~DocRep_ ()
1242 {
1243#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
1244 Assert (sLiveCnt > 0);
1245 --sLiveCnt;
1246#endif
1247 }
1248 virtual const Providers::IDOMProvider* GetProvider () const override
1249 {
1250 return &Providers::Xerces::kDefaultProvider;
1251 }
1252 virtual bool GetStandalone () const override
1253 {
1254 AssertNotNull (fXMLDoc);
1255 return fXMLDoc->getXmlStandalone ();
1256 }
1257 virtual void SetStandalone (bool standalone) override
1258 {
1259 AssertNotNull (fXMLDoc);
1260 fXMLDoc->setXmlStandalone (standalone);
1261 }
1262 virtual Iterable<Node::Ptr> GetChildren () const override
1263 {
1264 AssertExternallySynchronizedMutex::ReadContext declareContext{fThisAssertExternallySynchronized_};
1265 AssertNotNull (fXMLDoc);
1266 START_LIB_EXCEPTION_MAPPER_
1267 return Traversal::CreateGenerator<Node::Ptr> (
1268 [sni = SubNodeIterator_{Memory::MakeSharedPtr<SubNodeIteratorOver_SiblingList_Rep_> (fXMLDoc.get ())}] () mutable -> optional<Node::Ptr> {
1269 if (sni.IsAtEnd ()) {
1270 return optional<Node::Ptr>{};
1271 }
1272 Node::Ptr r = *sni;
1273 ++sni;
1274 return r;
1275 });
1276 END_LIB_EXCEPTION_MAPPER_
1277 }
1278 virtual Element::Ptr ReplaceRootElement (const NameWithNamespace& newEltName, bool childrenInheritNS) override
1279 {
1280 DOMElement* n = newEltName.fNamespace == nullopt
1281 ? fXMLDoc->createElement (newEltName.fName.As<u16string> ().c_str ())
1282 : fXMLDoc->createElementNS (newEltName.fNamespace->As<String> (kUseURIEncodingFlag_).As<u16string> ().c_str (),
1283 newEltName.fName.As<u16string> ().c_str ());
1284 AssertNotNull (n);
1285 DOMElement* oldRoot = fXMLDoc->getDocumentElement ();
1286 if (oldRoot == nullptr) {
1287 (void)fXMLDoc->insertBefore (n, nullptr);
1288 }
1289 else {
1290 (void)fXMLDoc->replaceChild (n, oldRoot);
1291 /*
1292 * I THOGUHT this was a memory leak, but that appears to have been wrong. First, the
1293 * DOMNode objects get associated with the document, and when the document is destroyed
1294 * this is cleaned up. Secondly, there are enough other memory leaks - its unclear if this
1295 * actually helped. Plus the memory management pattern used by Xerces - with its own sub-chunking etc,
1296 * makes it hard to tell.
1297 *
1298 * More importantly - this caused a regression in HealthFrame - which I didn't debug. The OHSD reports
1299 * like AAFP CCR report - will be rejected by our 'valid HTML' tester. Unclear if that's cuz we generate
1300 * different HTML, but more likely a bug with the load/checker code. Still - not worth worrying
1301 * about why at this stage (especially as we are about to upgrade our Xerces version - could get fixed
1302 * by that?).
1303 *
1304 * -- LGP 2009-05-15
1305 *
1306 * oldRoot->release ();
1307 */
1308 }
1309 Assert (fXMLDoc->getDocumentElement () == n);
1310 Element::Ptr r{WrapXercesNodeInStroikaNode_ (n)};
1311 if (childrenInheritNS and newEltName.fNamespace) {
1312 r.SetAttribute (kXMLNS, newEltName.fNamespace->As<String> ());
1313 }
1314 return r;
1315 }
1316 virtual void Write (const Streams::OutputStream::Ptr<byte>& to, const SerializationOptions& options) const override
1317 {
1318 TraceContextBumper ctx{"Xerces::DocRep_::Write"};
1319 AssertExternallySynchronizedMutex::ReadContext declareContext{fThisAssertExternallySynchronized_};
1320 AssertNotNull (fXMLDoc);
1321 START_LIB_EXCEPTION_MAPPER_
1322 {
1323 DoWrite2Stream_ (fXMLDoc.get (), to, options);
1324 }
1325 END_LIB_EXCEPTION_MAPPER_
1326 }
1327#if qCompilerAndStdLib_arm_asan_FaultStackUseAfterScope_Buggy
1328 Stroika_Foundation_Debug_ATTRIBUTE_NO_SANITIZE_ADDRESS
1329#endif
1330 virtual void
1331 Validate (const Schema::Ptr& schema) const override
1332 {
1333 TraceContextBumper ctx{"Xerces::DocRep_::Validate"};
1334 AssertExternallySynchronizedMutex::ReadContext declareContext{fThisAssertExternallySynchronized_};
1335 RequireNotNull (schema);
1336 START_LIB_EXCEPTION_MAPPER_
1337 {
1338 try {
1339 DbgTrace ("Validating against schema (target namespace '{}')"_f, schema.GetTargetNamespace ());
1340 // As this CAN be expensive - especially if we need to externalize the file, and re-parse it!!! - just shortcut by
1341 // checking the top-level DOM-node and assure that has the right namespace. At least quickie first check that works when
1342 // reading files (doesn't help in pre-save check, of course)
1343 DOMNode* docNode = fXMLDoc->getDocumentElement ();
1344 if (docNode == nullptr) [[unlikely]] {
1345 Execution::Throw (BadFormatException{"No document", 0, 0, 0});
1346 }
1347 optional<URI> docURI = docNode->getNamespaceURI () == nullptr ? optional<URI>{} : docNode->getNamespaceURI ();
1348 if (docURI != schema.GetTargetNamespace ()) {
1350 Format ("Wrong document namespace (found '{}' and expected '{}')"_f, docURI, schema.GetTargetNamespace ()), 0, 0, 0});
1351 }
1352
1353 // EXTERNALIZE, AND THEN RE-PARSE USING CACHED SAX PARSER WTIH LOADED GRAMMAR
1354 {
1355 MemBufFormatTarget destination;
1356 {
1357 AutoRelease_<DOMLSOutput> theOutputDesc = GetDOMIMPL_ ().createLSOutput ();
1358 theOutputDesc->setEncoding (XMLUni::fgUTF8EncodingString);
1359 AutoRelease_<DOMLSSerializer> writer = GetDOMIMPL_ ().createLSSerializer ();
1360 theOutputDesc->setByteStream (&destination);
1361 theOutputDesc->setEncoding (XMLUni::fgUTF8EncodingString);
1362 Assert (fXMLDoc->getXmlStandalone ());
1363 writer->write (fXMLDoc.get (), theOutputDesc);
1364 }
1365 MemBufInputSource readReadSrc{destination.getRawBuffer (), destination.getLen (), u"tmp"};
1366 readReadSrc.setEncoding (XMLUni::fgUTF8EncodingString);
1367 shared_ptr<IXercesSchemaRep> accessSchema = dynamic_pointer_cast<IXercesSchemaRep> (schema.GetRep ());
1368 {
1369 AssertNotNull (accessSchema); // for now only rep supported
1370 shared_ptr<SAX2XMLReader> parser = shared_ptr<SAX2XMLReader> (
1371 XMLReaderFactory::createXMLReader (XMLPlatformUtils::fgMemoryManager, accessSchema->GetCachedGrammarPool ()));
1372 SetupCommonParserFeatures_ (*parser, true);
1373 Map2StroikaExceptionsErrorReporter_ myErrorReporter;
1374 parser->setErrorHandler (&myErrorReporter);
1375 parser->parse (readReadSrc);
1376 }
1377 }
1378 }
1379 catch (...) {
1380 if constexpr (qDumpXMLOnValidationError_) {
1381#if !qCompilerAndStdLib_arm_asan_FaultStackUseAfterScope_Buggy
1382 // Generate temp file (each with differnet names), and write out the bad XML.
1383 // Then - re-validate (with line#s) - and print the results of the validation to ANOTHER
1384 // temporary file
1385 //
1386 filesystem::path tmpFileName = IO::FileSystem::AppTmpFileManager::sThe.GetTmpFile ("FAILED_VALIDATION_.xml");
1387 DbgTrace ("Error validating - so writing out temporary file = '{}'"_f, tmpFileName);
1388 Write (IO::FileSystem::FileOutputStream::New (tmpFileName), SerializationOptions{.fPrettyPrint = true, .fIndent = 4});
1389 try {
1390 ValidateFile (tmpFileName, schema);
1391 }
1392 catch (const BadFormatException& vf) {
1393 String tmpFileNameStr = String{tmpFileName};
1394 size_t idx = tmpFileNameStr.find (".xml");
1395 String newTmpFile = tmpFileNameStr.substr (0, idx) + "_MSG.txt";
1396 ofstream msgOut{newTmpFile.AsNarrowSDKString ().c_str ()};
1397 msgOut << "Reason:" << vf.GetDetails () << endl;
1398 optional<unsigned int> lineNum;
1399 optional<unsigned int> colNumber;
1400 optional<uint64_t> fileOffset;
1401 vf.GetPositionInfo (&lineNum, &colNumber, &fileOffset);
1402 if (lineNum) {
1403 msgOut << "Line:" << *lineNum << endl;
1404 }
1405 if (colNumber) {
1406 msgOut << "Col: " << *colNumber << endl;
1407 }
1408 if (fileOffset) {
1409 msgOut << "FilePos: " << *fileOffset << endl;
1410 }
1411 }
1412 catch (...) {
1413 }
1414#endif
1415 }
1417 }
1418 }
1419 END_LIB_EXCEPTION_MAPPER_
1420 }
1421 shared_ptr<xercesc::DOMDocument> fXMLDoc;
1422 [[no_unique_address]] Debug::AssertExternallySynchronizedMutex fThisAssertExternallySynchronized_;
1423 };
1424}
1425
1426namespace {
1427 Node::Ptr WrapXercesNodeInStroikaNode_ (DOMNode* n)
1428 {
1429 RequireNotNull (n);
1430 if (n->getNodeType () == DOMNode::ELEMENT_NODE) {
1431 return Node::Ptr{Memory::MakeSharedPtr<ElementRep_> (n)};
1432 }
1433 else {
1434 return Node::Ptr{Memory::MakeSharedPtr<NodeRep_> (n)};
1435 }
1436 }
1437 Element::Ptr WrapXercesNodeInStroikaNode_ (DOMElement* n)
1438 {
1439 RequireNotNull (n);
1440 return Element::Ptr{Memory::MakeSharedPtr<ElementRep_> (n)};
1441 }
1442}
1443
1444/*
1445 ********************************************************************************
1446 ********************* Provider::Xerces::xercesString2String ********************
1447 ********************************************************************************
1448 */
1449String Providers::Xerces::xercesString2String (const XMLCh* s, const XMLCh* e)
1450{
1451 if constexpr (same_as<XMLCh, char16_t>) {
1452 return String{span{s, e}};
1453 }
1454 // nb: casts required cuz Xerces doesn't (currently) use wchar_t/char16_t/char32_t but something the sizeof char16_t
1455 // --LGP 2016-07-29
1456 if constexpr (sizeof (XMLCh) == sizeof (char16_t)) {
1457 return String{span{reinterpret_cast<const char16_t*> (s), reinterpret_cast<const char16_t*> (e)}};
1458 }
1459 else if constexpr (sizeof (XMLCh) == sizeof (char32_t)) {
1460 return String{span{reinterpret_cast<const char32_t*> (s), reinterpret_cast<const char32_t*> (e)}};
1461 }
1462 else {
1464 return String{};
1465 }
1466}
1467
1468String Providers::Xerces::xercesString2String (const XMLCh* t)
1469{
1470 if constexpr (same_as<XMLCh, char16_t>) {
1471 return String{t};
1472 }
1473 // nb: casts required cuz Xerces doesn't (currently) use wchar_t/char16_t/char32_t but something the sizeof char16_t
1474 // --LGP 2016-07-29
1475 if constexpr (sizeof (XMLCh) == sizeof (char16_t)) {
1476 return String{reinterpret_cast<const char16_t*> (t)};
1477 }
1478 else if constexpr (sizeof (XMLCh) == sizeof (char32_t)) {
1479 return String{reinterpret_cast<const char32_t*> (t)};
1480 }
1481 else {
1483 return String{};
1484 }
1485}
1486
1487/*
1488 ********************************************************************************
1489 ************************* XML::Providers::LibXML2::Provider ********************
1490 ********************************************************************************
1491 */
1492Providers::Xerces::Provider::Provider ()
1493{
1494 TraceContextBumper ctx{"Xerces::Provider::CTOR"};
1495#if qStroika_Foundation_Debug_AssertionsChecked
1496 static unsigned int sNProvidersCreated_{0}; // don't create multiple of these - will lead to confusion
1497 Assert (++sNProvidersCreated_ == 1);
1498#endif
1499#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
1500 fUseXercesMemoryManager_ = new MyXercesMemMgr_{};
1501 XMLPlatformUtils::Initialize (XMLUni::fgXercescDefaultLocale, 0, 0, fUseXercesMemoryManager_);
1502#else
1503 XMLPlatformUtils::Initialize (XMLUni::fgXercescDefaultLocale, 0, 0);
1504#endif
1505}
1506
1507Providers::Xerces::Provider::~Provider ()
1508{
1509 TraceContextBumper ctx{"Xerces::Provider::DTOR"};
1510 XMLPlatformUtils::Terminate ();
1511#if qStroika_Foundation_DataExchange_XML_DebugMemoryAllocations
1512 Require (SchemaRep_::sLiveCnt == 0); // Check for leaks but better/clearer than memory leaks check below
1513 Require (DocRep_::sLiveCnt == 0); // ""
1514 delete fUseXercesMemoryManager_; // checks for leaks
1515#endif
1516}
1517
1518shared_ptr<Schema::IRep> Providers::Xerces::Provider::SchemaFactory (const InputStream::Ptr<byte>& schemaData, const Resource::ResolverPtr& resolver) const
1519{
1520 return Memory::MakeSharedPtr<SchemaRep_> (schemaData, resolver);
1521}
1522
1523shared_ptr<DOM::Document::IRep> Providers::Xerces::Provider::DocumentFactory (const Streams::InputStream::Ptr<byte>& in,
1524 const Schema::Ptr& schemaToValidateAgainstWhileReading) const
1525{
1526 return Memory::MakeSharedPtr<DocRep_> (in, schemaToValidateAgainstWhileReading);
1527}
1528
1529void Providers::Xerces::Provider::SAXParse (const Streams::InputStream::Ptr<byte>& in, StructuredStreamEvents::IConsumer* callback,
1530 const Schema::Ptr& schema) const
1531{
1532 SAX2PrintHandlers_ handler{callback};
1533 shared_ptr<IXercesSchemaRep> accessSchema;
1534 if (schema != nullptr) {
1535 accessSchema = dynamic_pointer_cast<IXercesSchemaRep> (schema.GetRep ());
1536 }
1537 shared_ptr<SAX2XMLReader> parser{XMLReaderFactory::createXMLReader (
1538 XMLPlatformUtils::fgMemoryManager, accessSchema == nullptr ? nullptr : accessSchema->GetCachedGrammarPool ())};
1539 SetupCommonParserFeatures_ (*parser, accessSchema != nullptr);
1540 parser->setContentHandler (&handler);
1541 Map2StroikaExceptionsErrorReporter_ mErrorReproter_;
1542 parser->setErrorHandler (&mErrorReproter_);
1543 parser->parse (StdIStream_InputSource_{in});
1544}
#define AssertNotNull(p)
Definition Assertions.h:333
#define EnsureNotNull(p)
Definition Assertions.h:340
#define AssertNotImplemented()
Definition Assertions.h:401
#define qStroika_Foundation_Debug_AssertionsChecked
The qStroika_Foundation_Debug_AssertionsChecked flag determines if assertions are checked and validat...
Definition Assertions.h:48
#define RequireNotNull(p)
Definition Assertions.h:347
#define AssertNotReached()
Definition Assertions.h:355
conditional_t< qStroika_Foundation_Memory_PreferBlockAllocation and andTrueCheck, BlockAllocationUseHelper< T >, Common::Empty > UseBlockAllocationIfAppropriate
Use this to enable block allocation for a particular class. Beware of subclassing.
bool Equals(const T *lhs, const T *rhs)
strcmp or wsccmp() as appropriate == 0
#define CompileTimeFlagChecker_SOURCE(NS_PREFIX, NAME, VALUE)
#define DbgTrace
Definition Trace.h:309
Similar to String, but intended to more efficiently construct a String. Mutable type (String is large...
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
nonvirtual string AsNarrowSDKString() const
Definition String.inl:830
nonvirtual String substr(size_t from, size_t count=npos) const
Definition String.inl:1086
nonvirtual size_t find(Character c, size_t startAt=0) const
Definition String.inl:1061
nonvirtual bool Add(ArgByValueType< key_type > key, ArgByValueType< mapped_type > newElt, AddReplaceMode addReplaceMode=AddReplaceMode::eAddReplaces)
Definition Mapping.inl:190
A generalization of a vector: a container whose elements are keyed by the natural numbers.
Definition Sequence.h:187
virtual void StartElement(const Name &name, const Mapping< Name, String > &attributes)
Definition IConsumer.cpp:24
nonvirtual void SetAttribute(const NameWithNamespace &attrName, const optional< String > &v)
Definition DOM.inl:148
nonvirtual shared_ptr< IRep > GetRep() const
return the associated shared_ptr (cannot be nullptr)
Node::Ptr is a smart pointer to a Node::IRep.
Definition DOM.h:210
NOT a real mutex - just a debugging infrastructure support tool so in debug builds can be assured thr...
shared_lock< const AssertExternallySynchronizedMutex > ReadContext
Instantiate AssertExternallySynchronizedMutex::ReadContext to designate an area of code where protect...
nonvirtual filesystem::path GetTmpFile(const String &fileBaseName)
nonvirtual bool empty() const
Definition BLOB.inl:246
nonvirtual const byte * begin() const
Definition BLOB.inl:253
nonvirtual size_t GetSize() const
Definition BLOB.inl:264
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
nonvirtual SeekOffsetType GetOffset() const
OutputStream<>::Ptr is Smart pointer to a stream-based sink of data.
nonvirtual void Write(span< ELEMENT_TYPE2, EXTENT_2 > elts) const
nonvirtual void Flush() const
forces any data contained in this stream to be written.
Iterable<T> is a base class for containers which easily produce an Iterator<T> to traverse them.
Definition Iterable.h:237
void Throw(T &&e2Throw)
identical to builtin C++ 'throw' except that it does helpful, type dependent DbgTrace() messages firs...
Definition Throw.inl:43
void ThrowIfNull(const Private_::ConstVoidStar &p, const HRESULT &hr)
Template specialization for ThrowIfNull (), for thing being thrown HRESULT - really throw HRESULTErro...