Stroika Library 3.0d23
 
Loading...
Searching...
No Matches
InternetMediaTypeRegistry.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2026. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
6#include <filesystem>
7
8#include "Stroika/Foundation/Cache/LRUCache.h"
13#if qStroika_Foundation_Common_Platform_Windows
14#include "Stroika/Foundation/Common/Platform/Windows/Registry.h"
15#endif
17#if qStroika_Foundation_Common_Platform_Windows
18#include "Stroika/Foundation/Execution/Platform/Windows/Exception.h"
19#endif
23
25
26using namespace Stroika::Foundation;
27using namespace Stroika::Foundation::Cache;
31using namespace Stroika::Foundation::Execution;
32
33using Memory::MakeSharedPtr;
34using Memory::NullCoalesce;
35
36// Comment this in to turn on aggressive noisy DbgTrace in this module
37//#define USE_NOISY_TRACE_IN_THIS_MODULE_ 1
38
40
41/*
42 ********************************************************************************
43 ****************** InternetMediaTypeRegistry::OverrideRecord *******************
44 ********************************************************************************
45 */
47{
49 sb << "{"sv;
50 if (fTypePrintName) {
51 sb << "TypePrintName: " << fTypePrintName;
52 }
53 if (fFileSuffixes) {
54 sb << ", FileSuffixes: " << fFileSuffixes;
55 }
56 if (fPreferredSuffix) {
57 sb << ", PreferredSuffix: " << fPreferredSuffix;
58 }
59 sb << "}"sv;
60 return sb;
61}
62
63/*
64 ********************************************************************************
65 ******************** InternetMediaTypeRegistry::FrontendRep_ *******************
66 ********************************************************************************
67 */
68
69/**
70 * @todo NYI UPDATING the frontend. Implement APIs to externally add mappings and be sure copying the InternetMediaTypeRegistry and using that
71 * in isolation works as well (use COW)
72 *
73 * \note - Structurally, we do caching in the backend (as needed) because each backend stores data differently
74 * and the logic of what makes sense to cache changes.
75 *
76 * The frontend simply defines 'API-Driven OVERRIDES' of the values returned. (so far not fully implemented - no setters/manipulators)
77 */
78struct InternetMediaTypeRegistry::FrontendRep_ : InternetMediaTypeRegistry::IFrontendRep_ {
79
81
82 // Baked in predefined initial user-overrides.
83 // These are adjustable by API, serve the purpose of providing a default on systems with no MIME content database -- LGP 2020-07-27
84
85 static inline const Mapping<InternetMediaType, OverrideRecord> kDefaults_{initializer_list<KeyValuePair<InternetMediaType, OverrideRecord>>{
86 {InternetMediaTypes::kText_PLAIN, OverrideRecord{nullopt, Containers::Set<String>{".txt"sv}, ".txt"sv}},
87 {InternetMediaTypes::kCSS, OverrideRecord{nullopt, Containers::Set<String>{".css"sv}, ".css"sv}},
88 {InternetMediaTypes::kHTML, OverrideRecord{nullopt, Containers::Set<String>{".htm"sv, ".html"sv}, ".htm"sv}},
89 {InternetMediaTypes::kJavascript, OverrideRecord{nullopt, Containers::Set<String>{".js"sv}, ".js"sv}},
90 {InternetMediaTypes::kJSON, OverrideRecord{nullopt, Containers::Set<String>{".json"sv}, ".json"sv}},
91 {InternetMediaTypes::kPNG, OverrideRecord{nullopt, Containers::Set<String>{".png"sv}, ".png"sv}},
92 {InternetMediaTypes::kXML, OverrideRecord{nullopt, Containers::Set<String>{".xml"sv}, ".xml"sv}},
93 }};
94
95 // OVERRIDE values (take precedence over backend) and any other data we need to keep locked (synchronized)
96 struct Data_ {
97 shared_ptr<IBackendRep> fBackendRep; // lazy construct on first call to usage (since that construction can be slow)
98
101 };
102 mutable Synchronized<Data_> fData_;
103
104 // NULL backendRep IS allowed - use that to on-demand construct the backend
105 FrontendRep_ (const shared_ptr<IBackendRep>& backendRep)
106 : FrontendRep_{backendRep, kDefaults_}
107 {
108 }
109 FrontendRep_ (const shared_ptr<IBackendRep>& backendRep, const Mapping<InternetMediaType, OverrideRecord>& overrides)
110 : fData_{Data_{.fBackendRep = backendRep}}
111 {
112 SetOverrides (overrides);
113 }
114 virtual Mapping<InternetMediaType, OverrideRecord> GetOverrides () const override
115 {
116 auto lockedData = fData_.rwget ();
117 return lockedData->fOverrides;
118 }
119 virtual void SetOverrides (const Mapping<InternetMediaType, OverrideRecord>& overrides) override
120 {
121 auto lockedData = fData_.rwget ();
122 lockedData->fOverrides = overrides;
123 lockedData->fSuffix2MediaTypeMap.clear ();
124 for (const auto& i : lockedData->fOverrides) {
125 if (i.fValue.fFileSuffixes) {
126 for (const auto& si : *i.fValue.fFileSuffixes) {
127 lockedData->fSuffix2MediaTypeMap.Add (si, i.fKey, AddReplaceMode::eAddIfMissing);
128 }
129 }
130 }
131 }
132 virtual void AddOverride (const InternetMediaType& mediaType, const OverrideRecord& overrideRec) override
133 {
134 auto lockedData = fData_.rwget ();
135 lockedData->fOverrides.Add (mediaType, overrideRec);
136 lockedData->fSuffix2MediaTypeMap.clear ();
137 for (const auto& i : lockedData->fOverrides) {
138 if (i.fValue.fFileSuffixes) {
139 for (const auto& si : *i.fValue.fFileSuffixes) {
140 lockedData->fSuffix2MediaTypeMap.Add (si, i.fKey, AddReplaceMode::eAddIfMissing);
141 }
142 }
143 }
144 }
145 virtual shared_ptr<IBackendRep> GetBackendRep () const override
146 {
147 auto lockedData = fData_.rwget ();
148 return lockedData->fBackendRep;
149 }
150 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const override
151 {
152 using AtomType = InternetMediaType::AtomType;
153 auto lockedData = fData_.rwget ();
154 CheckData_ (&lockedData);
155 Containers::Set<InternetMediaType> result = lockedData->fBackendRep->GetMediaTypes (majorType);
156 if (majorType == nullopt) {
157 result += lockedData->fOverrides.Keys ();
158 }
159 else {
160 lockedData->fOverrides.Keys ().Apply ([&] (const InternetMediaType& i) {
161 if (i.GetType<AtomType> () == majorType) {
162 result += i;
163 }
164 });
165 }
166 return result;
167 }
168 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const override
169 {
170 auto lockedData = fData_.rwget ();
171 CheckData_ (&lockedData);
172 if (auto o = lockedData->fOverrides.Lookup (ct)) {
173 if (o->fPreferredSuffix) {
174 return *o->fPreferredSuffix;
175 }
176 }
177 return lockedData->fBackendRep->GetPreferredAssociatedFileSuffix (ct);
178 }
179 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const override
180 {
181 auto lockedData = fData_.rwget ();
182 CheckData_ (&lockedData);
183 Containers::Set<String> result = lockedData->fOverrides.LookupValue (ct).fFileSuffixes.value_or (Containers::Set<FileSuffixType>{});
184 result += lockedData->fBackendRep->GetAssociatedFileSuffixes (ct);
185 return result;
186 }
187 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& ct) const override
188 {
189 auto lockedData = fData_.rwget ();
190 CheckData_ (&lockedData);
191 if (auto o = lockedData->fOverrides.Lookup (ct)) {
192 if (o->fTypePrintName) {
193 return *o->fTypePrintName;
194 }
195 }
196 return lockedData->fBackendRep->GetAssociatedPrettyName (ct);
197 }
198 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileSuffix) const override
199 {
200 Require (fileSuffix[0] == '.');
201 auto lockedData = fData_.rwget ();
202 CheckData_ (&lockedData);
203 if (auto o = lockedData->fSuffix2MediaTypeMap.Lookup (fileSuffix)) {
204 return *o;
205 }
206 return lockedData->fBackendRep->GetAssociatedContentType (fileSuffix);
207 }
208 virtual bool IsA (const InternetMediaType& moreGeneralType, const InternetMediaType& moreSpecificType) const override
209 {
210 /**
211 * Generally simple to compare because AtomType code and parser handle case and breaking off bits like +xml, and ; parameters
212 *
213 * Only trick is that no good way to tell more general relationships between types, but doesn't appear well defined (like CCR is a kind of XML).
214 */
215 using AtomType = InternetMediaType::AtomType;
216 AtomType generalType = moreGeneralType.GetType<AtomType> ();
217 AtomType generalSubType = moreGeneralType.GetSubType<AtomType> ();
218 AtomType specificType = moreSpecificType.GetType<AtomType> ();
219 AtomType specificSubType = moreSpecificType.GetSubType<AtomType> ();
220
221 if (specificType == generalType and specificSubType == generalSubType) {
222 return true;
223 }
224
225 // Handle wildcard 'moreGeneralType' - if its exactly Type/'empty' - treating empty as wildcard for IsA...
226 if (moreGeneralType == InternetMediaType{generalType, {}}) {
227 if (specificType == generalType) {
228 return true;
229 }
230 }
231
232 // @todo find a better way - generalize... But for now - Stroika v3.0d12x... - just copy old logic for a bunch of special cases we had - then later
233 // maybe add "override" records for this too....
234 if (moreGeneralType == InternetMediaTypes::Wildcards::kText) {
235 if (IsA (InternetMediaTypes::kXML, moreSpecificType)) {
236 return true;
237 }
238 if (IsA (InternetMediaTypes::kJSON, moreSpecificType)) {
239 return true;
240 }
241 // well known types that can be treated as text (@todo need some way to extend this API)? - Maybe not here but in REGISTRY
242 if (specificType == InternetMediaTypes::Types::kApplication) {
243 Assert (InternetMediaTypes::kRTF.GetType<AtomType> () == InternetMediaTypes::Types::kApplication);
244 if (specificSubType == InternetMediaTypes::kRTF.GetSubType<AtomType> ()) {
245 return true;
246 }
247 }
248 }
249 else if (moreGeneralType == InternetMediaTypes::kXML) {
250 if (specificType == InternetMediaTypes::Types::kApplication) {
251 Assert (InternetMediaTypes::kXML.GetType<AtomType> () == InternetMediaTypes::Types::kApplication);
252 if (specificSubType == InternetMediaTypes::kXML.GetSubType<AtomType> ()) {
253 return true;
254 }
255 Assert (InternetMediaTypes::kXSLT.GetType<AtomType> () == InternetMediaTypes::Types::kApplication);
256 if (specificSubType == InternetMediaTypes::kXSLT.GetSubType<AtomType> ()) {
257 return true;
258 }
259 }
260 if (specificType == InternetMediaTypes::Types::kText) {
261 static const AtomType kXMLAtom_ = "xml"sv;
262 if (specificSubType == kXMLAtom_) {
263 return true;
264 }
265 }
266 }
267
268 // look for suffixes
269 if (auto suffix = moreSpecificType.GetSuffix<AtomType> ()) {
270 if (moreGeneralType == InternetMediaTypes::kJSON) {
271 static const AtomType kSuffix_{"json"sv};
272 if (suffix == kSuffix_) {
273 return true;
274 }
275 }
276 else if (moreGeneralType == InternetMediaTypes::kXML) {
277 static const AtomType kSuffix_{"xml"sv};
278 if (suffix == kSuffix_) {
279 return true;
280 }
281 }
282 }
283
284 return false;
285 }
286 static void CheckData_ (Synchronized<Data_>::WritableReference* lockedData)
287 {
288 if (lockedData->rwref ().fBackendRep == nullptr) {
289 lockedData->rwref ().fBackendRep = InternetMediaTypeRegistry::DefaultBackend ();
290 }
291 }
292};
293inline InternetMediaTypeRegistry::FrontendRep_ InternetMediaTypeRegistry::kDefaultFrontEndForNoBackend_{nullptr};
294
295/*
296 ********************************************************************************
297 *************************** InternetMediaTypeRegistry **************************
298 ********************************************************************************
299 */
300InternetMediaTypeRegistry::InternetMediaTypeRegistry (const shared_ptr<IBackendRep>& backendRep)
301 // note because can be constructed before main () - not safe to Memory::MakeSharedPtr<FrontendRep_> - so delay construction and use kDefaultFrontEndForNoBackend_ if needed
302 : fFrontEndRep_{backendRep == nullptr ? nullptr : MakeSharedPtr<FrontendRep_> (backendRep)}
303{
304}
305
307{
308 return NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetOverrides ();
309}
310
312{
313 if (fFrontEndRep_ == nullptr) {
314 fFrontEndRep_ = MakeSharedPtr<FrontendRep_> (kDefaultFrontEndForNoBackend_);
315 }
316 AssertNotNull (fFrontEndRep_);
317 fFrontEndRep_->SetOverrides (overrides);
318}
319
321{
322 if (fFrontEndRep_ == nullptr) {
323 fFrontEndRep_ = MakeSharedPtr<FrontendRep_> (kDefaultFrontEndForNoBackend_);
324 }
325 AssertNotNull (fFrontEndRep_);
326 fFrontEndRep_->AddOverride (mediaType, overrideRec);
327}
328
329optional<InternetMediaTypeRegistry::FileSuffixType> InternetMediaTypeRegistry::GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const
330{
331 return Memory::NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetPreferredAssociatedFileSuffix (ct);
332}
333
335{
337 r = Memory::NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetAssociatedFileSuffixes (ct);
338 // if a MediaType has a builtin suffix, include that as well...
339 if (auto os = ct.GetSuffix<String> ()) {
340 r += *os;
341 }
342 return r;
343}
344
346{
347 return Memory::NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetAssociatedPrettyName (ct);
348}
349
350shared_ptr<InternetMediaTypeRegistry::IBackendRep> InternetMediaTypeRegistry::DefaultBackend ()
351{
352 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::DefaultBackend"};
353#if qStroika_Foundation_Common_Platform_Windows
354 return WindowsRegistryDefaultBackend ();
355#endif
356 // @todo fix for MacOS - which doesn't support these - http://stroika-bugs.sophists.com/browse/STK-795
357 if (filesystem::exists ("/usr/share/mime"sv)) {
358 try {
359 return UsrSharedDefaultBackend ();
360 }
361 catch (...) {
362 // LOG/WRN
363 }
364 }
365 if (filesystem::exists ("/etc/mime.types"sv)) {
366 try {
368 }
369 catch (...) {
370 // LOG/WRN
371 }
372 }
373 return BakedInDefaultBackend (); // always works (but sucks)
374}
375
377{
378 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::EtcMimeTypesDefaultBackend"};
379 /*
380 * Use the file /etc/mime.types
381 *
382 * not sure this is useful - not sure who uses it that doesn't support /usr/share/mime...
383 *
384 * Preload the entire DB since its not practical to scan looking for the intended record (due to the time this would take).
385 */
386 struct EtcMimeTypesRep_ : IBackendRep {
387 Mapping<FileSuffixType, InternetMediaType> fSuffix2MediaTypeMap_;
388 Mapping<InternetMediaType, FileSuffixType> fMediaType2PreferredSuffixMap_;
390
391 EtcMimeTypesRep_ ()
392 {
393#if USE_NOISY_TRACE_IN_THIS_MODULE_
394 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::{}::EtcMimeTypesRep_::CTOR"};
395#endif
397 IO::FileSystem::FileInputStream::New ("/etc/mime.types"sv))) {
398 if (line.length () >= 2 and not line[0].StartsWith ("#"_k)) {
400 try {
401 ct = InternetMediaType{line[0]};
402 }
403 catch (...) {
404 DbgTrace ("Ignoring exception looking parsing potential media type entry ({}): {}"_f, line[0], current_exception ());
405 }
406 // a line starts with a content type, but then contains any number of file suffixes (without the leading .)
408 for (size_t i = 1; i < line.length (); ++i) {
409 if (line[i].empty ()) {
410 DbgTrace ("Ignoring bad looking parsing potential media type entry ({})"_f, line);
411 }
412 else {
413 Assert (not line[i].empty ());
414 String suffix = "."sv + line[i];
415 fSuffix2MediaTypeMap_.Add (suffix, ct);
416 fMediaType2PreferredSuffixMap_.Add (ct, suffix, AddReplaceMode::eAddIfMissing);
417 fileSuffixes.Add (suffix);
418 }
419 }
420 fMediaType2SuffixesMap_.Add (ct, fileSuffixes);
421 }
422 }
423 // Because on raspberrypi/debian, this comes out with a crazy default for text\plain -- LGP 2020-07-27
424 fMediaType2PreferredSuffixMap_.Add (InternetMediaTypes::kText_PLAIN, ".txt"sv);
425#if USE_NOISY_TRACE_IN_THIS_MODULE_
426 DbgTrace (L"succeeded with {} fSuffix2MediaTypeMap entries, and {} fMediaType2PreferredSuffixMap entries"_f,
427 fSuffix2MediaTypeMap_.size (), fMediaType2PreferredSuffixMap_.size ());
428#endif
429 }
430 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const override
431 {
433 for (const InternetMediaType& imt : fMediaType2PreferredSuffixMap_.Keys ()) {
434 if (majorType != nullopt and (imt.GetType<InternetMediaType::AtomType> () != *majorType)) {
435 continue;
436 }
437 results += imt;
438 }
439 return results;
440 }
441 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const override
442 {
443 if (auto o = fMediaType2PreferredSuffixMap_.Lookup (ct)) {
444 return *o;
445 }
446 return nullopt;
447 }
448 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const override
449 {
450 if (auto i = fMediaType2SuffixesMap_.Lookup (ct)) {
451 return *i;
452 }
454 }
455 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& /*ct*/) const override
456 {
457 return nullopt; // not supported in this file
458 }
459 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileSuffix) const override
460 {
461 Require (fileSuffix[0] == '.');
462 if (auto o = fSuffix2MediaTypeMap_.Lookup (fileSuffix)) {
463 return *o;
464 }
465 return nullopt;
466 }
467 };
468 return MakeSharedPtr<EtcMimeTypesRep_> ();
469}
470
472{
473 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::UsrSharedDefaultBackend"};
474 /*
475 * Documented to some small degree in https://www.linuxtopia.org/online_books/linux_desktop_guides/gnome_2.14_admin_guide/mimetypes-database.html
476 */
477 struct UsrShareMIMERep_ : IBackendRep {
478 Iterable<filesystem::path> fDataRoots_{"~/.local/share/mime/"sv, "/usr/local/share/mime/"sv, "/usr/share/mime"sv};
479
480 /*
481 * NOTE - for fSuffix2MediaTypeMap_ and fMediaType2PreferredSuffixMap, we cannot use Bijection,
482 * because multiple media-types can map to a single filetype and not all mediatypes have a filetype.
483 *
484 * We CANNOT use a cache, or dynamically fetch this data from files, because the data for each file suffix
485 * is not indexed (by file suffix) - it is indexed by content type (so those lookups COULD be dynamic). But
486 * we can easily construct both at the same time reading the summary file, so we do.
487 */
488 Mapping<FileSuffixType, InternetMediaType> fSuffix2MediaTypeMap_;
489 Mapping<InternetMediaType, FileSuffixType> fMediaType2PreferredSuffixMap_;
491
492 mutable Synchronized<Mapping<InternetMediaType, String>> fMediaType2PrettyNameCache; // incrementally build as needed
493
494 UsrShareMIMERep_ ()
495 {
496#if USE_NOISY_TRACE_IN_THIS_MODULE_
497 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::{}UsrShareMIMERep_::CTOR"};
498#endif
499 // @todo consider using globs2 file support, but little point since they seem to be written in priority order
500 auto loadGlobsFromFile = [&] (const filesystem::path& fn) {
501 if (filesystem::exists (fn)) {
502 Debug::TraceContextBumper ctx1{"UsrShareMIMERep_::CTOR::loadGlobsFromFile", "exists=true,fn={}"_f, fn};
503 try {
504 for (Sequence<String> line :
505 DataExchange::Variant::CharacterDelimitedLines::Reader{{':'}}.ReadMatrix (IO::FileSystem::FileInputStream::New (fn))) {
506 if (line.length () == 2) {
507 String glob = line[1];
508 if (glob.StartsWith ('*')) {
509 glob = glob.SubString (1);
510 }
511 // Use AddReplaceMode::eAddIfMissing - so first (appears empirically to be the preferred value) wins
513 try {
514 imt = InternetMediaType{line[0]};
515 }
516 catch (...) {
517 DbgTrace ("Ignoring exception looking parsing potential media type entry ({}): {}"_f, line[0], current_exception ());
518 }
519 fSuffix2MediaTypeMap_.Add (glob, imt, AddReplaceMode::eAddIfMissing);
520 fMediaType2PreferredSuffixMap_.Add (imt, glob, AddReplaceMode::eAddIfMissing);
521
522 // update the set of mapped suffixes
523 Containers::Set<FileSuffixType> prevSuffixes = fMediaType2SuffixesMap_.LookupValue (imt);
524 prevSuffixes.Add (glob);
525 fMediaType2SuffixesMap_.Add (imt, prevSuffixes);
526 }
527 }
528
529 // Because on raspberrypi/Debian, this comes out with a crazy default for text\plain -- LGP 2020-07-27
530 fMediaType2PreferredSuffixMap_.Add (InternetMediaTypes::kText_PLAIN, ".txt"_k);
531 }
532 catch (...) {
533 // log error
534 }
535 }
536 };
537 // override files loaded first, tied to use of AddReplaceMode::eAddIfMissing - not replacing
538 for (const auto& p : fDataRoots_) {
539 loadGlobsFromFile (p / "globs");
540 }
541
542#if USE_NOISY_TRACE_IN_THIS_MODULE_
543 DbgTrace ("succeeded with {} fSuffix2MediaTypeMap_ entries, and {} fMediaType2PreferredSuffixMap entries"_f,
544 fSuffix2MediaTypeMap_.size (), fMediaType2PreferredSuffixMap_.size ());
545#endif
546 }
547 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const override
548 {
549#if USE_NOISY_TRACE_IN_THIS_MODULE_
550 Debug::TraceContextBumper ctx{Stroika_Foundation_Debug_OptionalizeTraceArgs ("UsrShareMIMERep_::GetMediaTypes", "majorType={}"_f, majorType)};
551#endif
553 for (const auto& imt : fMediaType2PreferredSuffixMap_.Keys ()) {
554 if (majorType) {
555 if (imt.GetType<InternetMediaType::AtomType> () != *majorType) {
556 continue; // skip non-matching types
557 }
558 }
559 results += imt;
560 }
561 return results;
562 }
563 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const override
564 {
565 if (auto o = fMediaType2PreferredSuffixMap_.Lookup (ct)) {
566 return *o;
567 }
568 return nullopt;
569 }
570 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const override
571 {
572 if (auto i = fMediaType2SuffixesMap_.Lookup (ct)) {
573 return *i;
574 }
576 }
577 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& ct) const override
578 {
579 return LookupAndUpdateFromUsrShareMimePrettyName_ (ct);
580 }
581 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileSuffix) const override
582 {
583 Require (fileSuffix[0] == '.');
584 if (auto o = fSuffix2MediaTypeMap_.Lookup (fileSuffix)) {
585 return *o;
586 }
587 return nullopt;
588 }
589 optional<String> LookupAndUpdateFromUsrShareMimePrettyName_ (const InternetMediaType& ct) const
590 {
591#if USE_NOISY_TRACE_IN_THIS_MODULE_
592 Debug::TraceContextBumper ctx{"{}MIMEDB_::LookupAndUpdateFromUsrShareMimePrettyName"};
593#endif
594 // @todo combine lock calls in this procedure
595 if (auto o = fMediaType2PrettyNameCache.cget ()->Lookup (ct)) {
596 return *o;
597 }
598 // SAX parse /usr/share/mime/TYPE/SUBTYPE.xml file and look for <comment> element (default with no language for now)
599 // Simpler - just take the first - seems empirically fine/OK
600#if qStroika_Foundation_DataExchange_XML_SupportParsing
601 try {
602 using Name = StructuredStreamEvents::Name;
603 struct myHander_ : StructuredStreamEvents::IConsumer {
604 optional<String> fResult;
605 bool onContentElt{false};
606 StringBuilder<> fAccum;
607 virtual void StartElement (const Name& name, [[maybe_unused]] const Mapping<Name, String>& attributes) override
608 {
609 if (name == Name{"content"_k} and not fResult.has_value ()) {
610 onContentElt = true;
611 }
612 }
613 virtual void EndElement ([[maybe_unused]] const Name& name) override
614 {
615 if (onContentElt) {
616 Assert (not fResult);
617 fResult = fAccum.str ();
618 }
619 }
620 virtual void TextInsideElement (const String& t) override
621 {
622 if (onContentElt) {
623 fAccum << t;
624 }
625 }
626 };
627 filesystem::path mimeRoot{"/usr/share/mime/"sv};
628 myHander_ handler;
629 // @todo validate ct.GetType () to make sure not a ../../ ATTACK
630 DataExchange::XML::SAXParse (IO::FileSystem::FileInputStream::New (
631 mimeRoot / (ct.GetType () + "/"_k + ct.GetSubType () + ".xml"_k).As<filesystem::path> ()),
632 &handler);
633 if (handler.fResult) {
634 fMediaType2PrettyNameCache.rwget ()->Add (ct, *handler.fResult);
635 return *handler.fResult;
636 }
637 }
638 catch (...) {
639#if USE_NOISY_TRACE_IN_THIS_MODULE_
640 DbgTrace ("failure ignored"_f);
641#endif
642 }
643#else
644 DbgTrace ("/usr/share/mime/ ignored cuz no xml reader - not compiled with libxml2 or Xerces"_f);
645#endif
646 return nullopt;
647 }
648 };
649 return MakeSharedPtr<UsrShareMIMERep_> ();
650}
651
652auto InternetMediaTypeRegistry::BakedInDefaultBackend () -> shared_ptr<IBackendRep>
653{
654 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::BakedInDefaultBackend"};
655 struct DefaultEmptyBackendRep_ : IBackendRep {
656 virtual Containers::Set<InternetMediaType> GetMediaTypes ([[maybe_unused]] optional<InternetMediaType::AtomType> majorType) const override
657 {
659 }
660 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix ([[maybe_unused]] const InternetMediaType& ct) const override
661 {
662 return nullopt;
663 }
664 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes ([[maybe_unused]] const InternetMediaType& ct) const override
665 {
667 }
668 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& /*ct*/) const override
669 {
670 return nullopt;
671 }
672 virtual optional<InternetMediaType> GetAssociatedContentType ([[maybe_unused]] const FileSuffixType& fileSuffix) const override
673 {
674 Require (fileSuffix[0] == '.');
675 return nullopt;
676 }
677 };
678 return MakeSharedPtr<DefaultEmptyBackendRep_> ();
679}
680
681auto InternetMediaTypeRegistry::CloneAsSharedPtr_ (const IFrontendRep_& t) -> shared_ptr<IFrontendRep_>
682{
683 return Memory::MakeSharedPtr<FrontendRep_> (t.GetBackendRep (), t.GetOverrides ());
684}
685
686namespace {
687 template <typename K, typename V>
688 using SynchronizedHashingCache_ =
690}
691
692#if qStroika_Foundation_Common_Platform_Windows
693auto InternetMediaTypeRegistry::WindowsRegistryDefaultBackend () -> shared_ptr<IBackendRep>
694{
695 /*
696 * I can find no documentation on how this works, but at least https://stackoverflow.com/questions/3442607/mime-types-in-the-windows-registry
697 * mentions it.
698 *
699 * Empirically you can usually find:
700 * HKEY_CLASSES_ROOT\MIME\Database
701 * Content Type\CT\Extension
702 * This layout does not appear to accommodate ever having more than one extension for a given mime type
703 *
704 * HKEY_CLASSES_ROOT\FILE_SUFFIX
705 * {default} pretty name
706 * Content Type: 'internet media type'
707 *
708 * \note On Docker windows server core images, this is often missing! (but addressed with the default values baked into the frontend) -- LGP 2020-07-28
709 */
710 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::WindowsRegistryDefaultBackend"};
711 struct WinRep_ : IBackendRep {
712 // underlying windows code fast so use small cache sizes
713 mutable SynchronizedHashingCache_<FileSuffixType, optional<String>> fFileSuffix2PrettyNameCache_{25, 7};
714 mutable SynchronizedHashingCache_<FileSuffixType, optional<InternetMediaType>> fSuffix2MediaTypeCache_{25, 7};
715 mutable SynchronizedHashingCache_<InternetMediaType, optional<FileSuffixType>> fContentType2FileSuffixCache_{25, 7};
716 mutable SynchronizedHashingCache_<InternetMediaType, Containers::Set<FileSuffixType>> fContentType2FileSuffixesCache_{25, 7};
717
718 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const override
719 {
721 //
722 // rarely do we fetch all MIME types, so don't cache - just re-fetch each time
723 //
724 // On Windows, in registry, easiest way appears to be to enumerate ALL registry entries in HKCR that start with .,
725 // and look for sub-field 'Content-type'
726 //
727 using RegistryKey = Common::Platform::Windows::RegistryKey;
728 for (shared_ptr<RegistryKey> sk : RegistryKey{HKEY_CLASSES_ROOT}.EnumerateSubKeys ()) {
729 String name = sk->GetFullPathOfKey ().Tokenize ({'\\'}).LastValue ();
730 if (name.StartsWith ('.')) {
731 if (auto o = sk->Lookup ("Content Type"sv)) {
733 try {
734 imt = InternetMediaType{o.As<String> ()};
735 }
736 catch (...) {
737 // ignore bad format - such as .sqlproj has Content-Type "string" which my read of the RFC says is illegal
738 DbgTrace ("Ignoring exception parsing registry key ({}): {}"_f, o, current_exception ());
739 continue;
740 }
741 if (majorType) {
742 if (imt.GetType<InternetMediaType::AtomType> () != *majorType) {
743 continue; // skip non-matching types
744 }
745 }
746 result.Add (imt);
747 }
748 }
749 }
750 return result;
751 }
752 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const override
753 {
754 return fContentType2FileSuffixCache_.LookupValue (ct, [] (const InternetMediaType& ct) -> optional<FileSuffixType> {
755 if (auto fs = Common::Platform::Windows::RegistryKey{HKEY_CLASSES_ROOT}.Lookup ("MIME\\Database\\Content Type\\{}\\Extension"_f(ct))) {
756 return fs.As<String> ();
757 }
758 return nullopt;
759 });
760 }
761 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const override
762 {
763 // This is expensive to compute, and we could compute all and cache, but I don't think we will need to lookup very often, so just
764 // compute as needed and cache a few
765 return fContentType2FileSuffixesCache_.LookupValue (ct, [] (const InternetMediaType& ct) -> Containers::Set<FileSuffixType> {
768 for (shared_ptr<RegistryKey> sk : RegistryKey{HKEY_CLASSES_ROOT}.EnumerateSubKeys ()) {
769 String name = sk->GetFullPathOfKey ().Tokenize ({'\\'}).LastValue ();
770 if (name.StartsWith ("."_k)) {
771 if (auto o = sk->Lookup ("Content Type"sv)) {
773 try {
774 imt = InternetMediaType{o.As<String> ()};
775 }
776 catch (...) {
777 // ignore bad format - such as .sqlproj has Content-Type "string" which my read of the RFC says is illegal
778 DbgTrace ("Ignoring exception parsing registry key ({}): {}"_f, o, current_exception ());
779 continue;
780 }
781 if (ct.GetType () == imt.GetType () and ct.GetSubType () == imt.GetSubType ()) {
782 result += name;
783 }
784 }
785 }
786 }
787 return result;
788 });
789 }
790 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& ct) const override
791 {
792 if (optional<FileSuffixType> fileSuffix = GetPreferredAssociatedFileSuffix (ct)) {
793 return fFileSuffix2PrettyNameCache_.LookupValue (*fileSuffix, [] (const String& suffix) -> optional<String> {
794 if (auto fileTypeID = Common::Platform::Windows::RegistryKey{HKEY_CLASSES_ROOT}.Lookup (suffix + "\\"_k)) {
795 if (auto prettyName = Common::Platform::Windows::RegistryKey{HKEY_CLASSES_ROOT}.Lookup (fileTypeID.As<String> () + "\\"_k)) {
796 return prettyName.As<String> ();
797 }
798 }
799 return nullopt;
800 });
801 }
802 return nullopt;
803 }
804 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileSuffix) const override
805 {
806 Require (fileSuffix[0] == '.');
807 return fSuffix2MediaTypeCache_.LookupValue (fileSuffix, [] (const FileSuffixType& fileSuffix) -> optional<InternetMediaType> {
809 // only do registry lookup if needed, since (probably) more costly than local map lookup
810 if (auto oct = RegistryKey{HKEY_CLASSES_ROOT}.Lookup ("{}\\Content Type"_f(fileSuffix))) {
811 InternetMediaType mediaType{oct.As<String> ()};
812 return mediaType;
813 }
814 return nullopt;
815 });
816 }
817 };
818 return MakeSharedPtr<WinRep_> ();
819}
820#endif
821
822Set<InternetMediaType> InternetMediaTypeRegistry::GetMediaTypes () const
823{
824 return NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetMediaTypes (nullopt);
825}
826
827Set<InternetMediaType> InternetMediaTypeRegistry::GetMediaTypes (InternetMediaType::AtomType majorType) const
828{
829 return NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetMediaTypes (majorType);
830}
831
833{
835 for (const auto& ct : mediaTypes) {
836 for (const auto& i : GetAssociatedFileSuffixes (ct)) {
837 result += i;
838 }
839 }
840 return result;
841}
842
843optional<InternetMediaType> InternetMediaTypeRegistry::GetAssociatedContentType (const FileSuffixType& fileSuffix) const
844{
845 if (fileSuffix.empty ()) {
846 return nullopt;
847 }
848 Assert (fileSuffix[0] == '.');
849 return NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetAssociatedContentType (fileSuffix);
850}
851
853{
854 return IsA (InternetMediaTypes::Wildcards::kText, ct);
855}
856
858{
859 return IsA (InternetMediaTypes::Wildcards::kImage, ct);
860}
861
863{
864 return IsA (InternetMediaTypes::kXML, ct);
865}
866
867bool InternetMediaTypeRegistry::IsA (const InternetMediaType& moreGeneralType, const InternetMediaType& moreSpecificType) const
868{
869 using AtomType = InternetMediaType::AtomType;
870 // shortcut this one case
871 if (moreSpecificType.GetType<AtomType> () == moreGeneralType.GetType<AtomType> () and
872 moreSpecificType.GetSubType<AtomType> () == moreGeneralType.GetSubType<AtomType> ()) {
873 return true;
874 }
875 return NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).IsA (moreGeneralType, moreSpecificType);
876}
#define AssertNotNull(p)
Definition Assertions.h:334
const OT & NullCoalesce(const OT &l, const OT &r)
return one of l, or r, with first preference for which is engaged, and second preference for left-to-...
Definition Optional.inl:134
#define DbgTrace
Definition Trace.h:317
#define Stroika_Foundation_Debug_OptionalizeTraceArgs(...)
Definition Trace.h:278
LRUCache implements a simple least-recently-used caching strategy, with optional hashing (of keys) to...
Definition LRUCache.h:224
Similar to String, but intended to more efficiently construct a String. Mutable type (String is large...
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
nonvirtual String SubString(SZ from) const
nonvirtual bool StartsWith(const Character &c, CompareOptions co=eWithCase) const
Definition String.cpp:1060
nonvirtual Containers::Sequence< String > Tokenize() const
Definition String.cpp:1235
nonvirtual DataExchange::VariantValue Lookup(const Characters::String &valuePath) const
Definition Registry.cpp:143
nonvirtual bool Add(ArgByValueType< key_type > key, ArgByValueType< mapped_type > newElt, AddReplaceMode addReplaceMode=AddReplaceMode::eAddReplaces)
Definition Mapping.inl:188
nonvirtual optional< mapped_type > Lookup(ArgByValueType< key_type > key) const
Definition Mapping.inl:142
nonvirtual mapped_type LookupValue(ArgByValueType< key_type > key, ArgByValueType< mapped_type > defaultValue=mapped_type{}) const
Definition Mapping.inl:166
nonvirtual Iterable< key_type > Keys() const
Definition Mapping.inl:111
A generalization of a vector: a container whose elements are keyed by the natural numbers.
Set<T> is a container of T, where once an item is added, additionally adds () do nothing.
nonvirtual void Add(ArgByValueType< value_type > item)
Definition Set.inl:138
nonvirtual RETURN_TYPE GetType() const
Gets the primary (major) type of the full internet media type (as a string or atom)
nonvirtual T As() const
convert to type T supported types: o String o wstring
nonvirtual optional< RETURN_TYPE > GetSuffix() const
this is the +XXX part of the internet media type (e.g. +xml) and is often omitted (but note this omit...
nonvirtual Mapping< InternetMediaType, OverrideRecord > GetOverrides() const
static shared_ptr< IBackendRep > DefaultBackend()
Generally no need to use this - handled automatically - but returns the default, OS-provided MIME Int...
nonvirtual bool IsXMLFormat(const InternetMediaType &ct) const
static shared_ptr< IBackendRep > UsrSharedDefaultBackend()
Generally no need to use this - handled automatically.
nonvirtual bool IsA(const InternetMediaType &moreGeneralType, const InternetMediaType &moreSpecificType) const
return true if moreSpecificType 'isa' moreGeneralType
nonvirtual void SetOverrides(const Mapping< InternetMediaType, OverrideRecord > &overrides)
nonvirtual optional< InternetMediaType > GetAssociatedContentType(const FileSuffixType &fileSuffix) const
nonvirtual optional< String > GetAssociatedPrettyName(const InternetMediaType &ct) const
static shared_ptr< IBackendRep > EtcMimeTypesDefaultBackend()
Generally no need to use this - handled automatically.
static shared_ptr< IBackendRep > BakedInDefaultBackend()
Generally no need to use this - handled automatically.
InternetMediaTypeRegistry(const shared_ptr< IBackendRep > &backendRep=nullptr)
nonvirtual optional< FileSuffixType > GetPreferredAssociatedFileSuffix(const InternetMediaType &ct) const
nonvirtual Containers::Set< FileSuffixType > GetAssociatedFileSuffixes(const InternetMediaType &ct) const
nonvirtual void AddOverride(const InternetMediaType &mediaType, const OverrideRecord &overrideRec)
bool IsTextFormat(const InternetMediaType &ct) const
returns true if you can expect to treat as some sort of text and reasonably view - like text/html,...
This COULD be easily used to read CSV files, or tab-delimited files, for example.
nonvirtual Iterable< Sequence< String > > ReadMatrix(const Streams::InputStream::Ptr< byte > &in) const
Wrap any object with Synchronized<> and it can be used similarly to the base type,...
nonvirtual WritableReference rwget()
get a read-write smart pointer to the underlying Synchronized<> object, holding the full lock the who...
nonvirtual ReadableReference cget() const
get a read-only smart pointer to the underlying Synchronized<> object, holding the readlock the whole...
Iterable<T> is a base class for containers which easily produce an Iterator<T> to traverse them.
Definition Iterable.h:237
nonvirtual void Apply(const function< void(ArgByValueType< T > item)> &doToElement, Execution::SequencePolicy seq=Execution::SequencePolicy::eDEFAULT) const
Run the argument function (or lambda) on each element of the container.
nonvirtual size_t size() const
Returns the number of items contained.
Definition Iterable.inl:303
const InternetMediaType::AtomType kApplication
'application'
for OS facilities not updatable - or controllable - just usable.