Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
InternetMediaTypeRegistry.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
6#include <filesystem>
7
8#include "Stroika/Foundation/Cache/SynchronizedLRUCache.h"
13#if qStroika_Foundation_Common_Platform_Windows
14#include "Stroika/Foundation/Common/Platform/Windows/Registry.h"
15#endif
17#if qStroika_Foundation_Common_Platform_Windows
18#include "Stroika/Foundation/Execution/Platform/Windows/Exception.h"
19#endif
22
24
25using namespace Stroika::Foundation;
29using namespace Stroika::Foundation::Execution;
30
31using Memory::NullCoalesce;
32
33// Comment this in to turn on aggressive noisy DbgTrace in this module
34//#define USE_NOISY_TRACE_IN_THIS_MODULE_ 1
35
37
38/*
39 ********************************************************************************
40 ****************** InternetMediaTypeRegistry::OverrideRecord *******************
41 ********************************************************************************
42 */
44{
46 sb << "{"sv;
47 if (fTypePrintName) {
48 sb << "TypePrintName: " << fTypePrintName;
49 }
50 if (fFileSuffixes) {
51 sb << ", FileSuffixes: " << fFileSuffixes;
52 }
53 if (fPreferredSuffix) {
54 sb << ", PreferredSuffix: " << fPreferredSuffix;
55 }
56 sb << "}"sv;
57 return sb;
58}
59
60/*
61 ********************************************************************************
62 ******************** InternetMediaTypeRegistry::FrontendRep_ *******************
63 ********************************************************************************
64 */
65
66/**
67 * @todo NYI UPDATING the frontend. Implement APIs to externally add mappings and be sure copying the InternetMediaTypeRegistry and using that
68 * in isolation works as well (use COW)
69 *
70 * \note - Structurally, we do caching in the backend (as needed) because each backend stores data differently
71 * and the logic of what makes sense to cache changes.
72 *
73 * The frontend simply defines 'API-Driven OVERRIDES' of the values returned. (so far not fully implemented - no setters/manipulators)
74 */
75struct InternetMediaTypeRegistry::FrontendRep_ : InternetMediaTypeRegistry::IFrontendRep_ {
76
78
79 // Baked in predefined initial user-overrides.
80 // These are adjustable by API, serve the purpose of providing a default on systems with no MIME content database -- LGP 2020-07-27
81
82 static inline const Mapping<InternetMediaType, OverrideRecord> kDefaults_{initializer_list<KeyValuePair<InternetMediaType, OverrideRecord>>{
83 {InternetMediaTypes::kText_PLAIN, OverrideRecord{nullopt, Containers::Set<String>{".txt"sv}, ".txt"sv}},
84 {InternetMediaTypes::kCSS, OverrideRecord{nullopt, Containers::Set<String>{".css"sv}, ".css"sv}},
85 {InternetMediaTypes::kHTML, OverrideRecord{nullopt, Containers::Set<String>{".htm"sv, ".html"sv}, ".htm"sv}},
86 {InternetMediaTypes::kJavascript, OverrideRecord{nullopt, Containers::Set<String>{".js"sv}, ".js"sv}},
87 {InternetMediaTypes::kJSON, OverrideRecord{nullopt, Containers::Set<String>{".json"sv}, ".json"sv}},
88 {InternetMediaTypes::kPNG, OverrideRecord{nullopt, Containers::Set<String>{".png"sv}, ".png"sv}},
89 {InternetMediaTypes::kXML, OverrideRecord{nullopt, Containers::Set<String>{".xml"sv}, ".xml"sv}},
90 }};
91
92 // OVERRIDE values (take precedence over backend) and any other data we need to keep locked (synchronized)
93 struct Data_ {
94 shared_ptr<IBackendRep> fBackendRep; // lazy construct on first call to usage (since that construction can be slow)
95
98 };
99 mutable Synchronized<Data_> fData_;
100
101 // NULL backendRep IS allowed - use that to on-demand construct the backend
102 FrontendRep_ (const shared_ptr<IBackendRep>& backendRep)
103 : FrontendRep_{backendRep, kDefaults_}
104 {
105 }
106 FrontendRep_ (const shared_ptr<IBackendRep>& backendRep, const Mapping<InternetMediaType, OverrideRecord>& overrides)
107 : fData_{Data_{.fBackendRep = backendRep}}
108 {
109 SetOverrides (overrides);
110 }
111 virtual Mapping<InternetMediaType, OverrideRecord> GetOverrides () const override
112 {
113 auto lockedData = fData_.rwget ();
114 return lockedData->fOverrides;
115 }
116 virtual void SetOverrides (const Mapping<InternetMediaType, OverrideRecord>& overrides) override
117 {
118 auto lockedData = fData_.rwget ();
119 lockedData->fOverrides = overrides;
120 lockedData->fSuffix2MediaTypeMap.clear ();
121 for (const auto& i : lockedData->fOverrides) {
122 if (i.fValue.fFileSuffixes) {
123 for (const auto& si : *i.fValue.fFileSuffixes) {
124 lockedData->fSuffix2MediaTypeMap.Add (si, i.fKey, AddReplaceMode::eAddIfMissing);
125 }
126 }
127 }
128 }
129 virtual void AddOverride (const InternetMediaType& mediaType, const OverrideRecord& overrideRec) override
130 {
131 auto lockedData = fData_.rwget ();
132 lockedData->fOverrides.Add (mediaType, overrideRec);
133 lockedData->fSuffix2MediaTypeMap.clear ();
134 for (const auto& i : lockedData->fOverrides) {
135 if (i.fValue.fFileSuffixes) {
136 for (const auto& si : *i.fValue.fFileSuffixes) {
137 lockedData->fSuffix2MediaTypeMap.Add (si, i.fKey, AddReplaceMode::eAddIfMissing);
138 }
139 }
140 }
141 }
142 virtual shared_ptr<IBackendRep> GetBackendRep () const override
143 {
144 auto lockedData = fData_.rwget ();
145 return lockedData->fBackendRep;
146 }
147 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const override
148 {
149 using AtomType = InternetMediaType::AtomType;
150 auto lockedData = fData_.rwget ();
151 CheckData_ (&lockedData);
152 Containers::Set<InternetMediaType> result = lockedData->fBackendRep->GetMediaTypes (majorType);
153 if (majorType == nullopt) {
154 result += lockedData->fOverrides.Keys ();
155 }
156 else {
157 lockedData->fOverrides.Keys ().Apply ([&] (const InternetMediaType& i) {
158 if (i.GetType<AtomType> () == majorType) {
159 result += i;
160 }
161 });
162 }
163 return result;
164 }
165 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const override
166 {
167 auto lockedData = fData_.rwget ();
168 CheckData_ (&lockedData);
169 if (auto o = lockedData->fOverrides.Lookup (ct)) {
170 if (o->fPreferredSuffix) {
171 return *o->fPreferredSuffix;
172 }
173 }
174 return lockedData->fBackendRep->GetPreferredAssociatedFileSuffix (ct);
175 }
176 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const override
177 {
178 auto lockedData = fData_.rwget ();
179 CheckData_ (&lockedData);
180 Containers::Set<String> result = lockedData->fOverrides.LookupValue (ct).fFileSuffixes.value_or (Containers::Set<FileSuffixType>{});
181 result += lockedData->fBackendRep->GetAssociatedFileSuffixes (ct);
182 return result;
183 }
184 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& ct) const override
185 {
186 auto lockedData = fData_.rwget ();
187 CheckData_ (&lockedData);
188 if (auto o = lockedData->fOverrides.Lookup (ct)) {
189 if (o->fTypePrintName) {
190 return *o->fTypePrintName;
191 }
192 }
193 return lockedData->fBackendRep->GetAssociatedPrettyName (ct);
194 }
195 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileSuffix) const override
196 {
197 Require (fileSuffix[0] == '.');
198 auto lockedData = fData_.rwget ();
199 CheckData_ (&lockedData);
200 if (auto o = lockedData->fSuffix2MediaTypeMap.Lookup (fileSuffix)) {
201 return *o;
202 }
203 return lockedData->fBackendRep->GetAssociatedContentType (fileSuffix);
204 }
205 virtual bool IsA (const InternetMediaType& moreGeneralType, const InternetMediaType& moreSpecificType) const override
206 {
207 /**
208 * Generally simple to compare because AtomType code and parser handle case and breaking off bits like +xml, and ; parameters
209 *
210 * Only trick is that no good way to tell more general relationships between types, but doesn't appear well defined (like CCR is a kind of XML).
211 */
212 using AtomType = InternetMediaType::AtomType;
213 AtomType generalType = moreGeneralType.GetType<AtomType> ();
214 AtomType generalSubType = moreGeneralType.GetSubType<AtomType> ();
215 AtomType specificType = moreSpecificType.GetType<AtomType> ();
216 AtomType specificSubType = moreSpecificType.GetSubType<AtomType> ();
217
218 if (specificType == generalType and specificSubType == generalSubType) {
219 return true;
220 }
221
222 // Handle wildcard 'moreGeneralType' - if its exactly Type/'empty' - treating empty as wildcard for IsA...
223 if (moreGeneralType == InternetMediaType{generalType, {}}) {
224 if (specificType == generalType) {
225 return true;
226 }
227 }
228
229 // @todo find a better way - generalize... But for now - Stroika v3.0d12x... - just copy old logic for a bunch of special cases we had - then later
230 // maybe add "override" records for this too....
231 if (moreGeneralType == InternetMediaTypes::Wildcards::kText) {
232 if (IsA (InternetMediaTypes::kXML, moreSpecificType)) {
233 return true;
234 }
235 if (IsA (InternetMediaTypes::kJSON, moreSpecificType)) {
236 return true;
237 }
238 // well known types that can be treated as text (@todo need some way to extend this API)? - Maybe not here but in REGISTRY
239 if (specificType == InternetMediaTypes::Types::kApplication) {
240 Assert (InternetMediaTypes::kRTF.GetType<AtomType> () == InternetMediaTypes::Types::kApplication);
241 if (specificSubType == InternetMediaTypes::kRTF.GetSubType<AtomType> ()) {
242 return true;
243 }
244 }
245 }
246 else if (moreGeneralType == InternetMediaTypes::kXML) {
247 if (specificType == InternetMediaTypes::Types::kApplication) {
248 Assert (InternetMediaTypes::kXML.GetType<AtomType> () == InternetMediaTypes::Types::kApplication);
249 if (specificSubType == InternetMediaTypes::kXML.GetSubType<AtomType> ()) {
250 return true;
251 }
252 Assert (InternetMediaTypes::kXSLT.GetType<AtomType> () == InternetMediaTypes::Types::kApplication);
253 if (specificSubType == InternetMediaTypes::kXSLT.GetSubType<AtomType> ()) {
254 return true;
255 }
256 }
257 if (specificType == InternetMediaTypes::Types::kText) {
258 static const AtomType kXMLAtom_ = "xml"sv;
259 if (specificSubType == kXMLAtom_) {
260 return true;
261 }
262 }
263 }
264
265 // look for suffixes
266 if (auto suffix = moreSpecificType.GetSuffix<AtomType> ()) {
267 if (moreGeneralType == InternetMediaTypes::kJSON) {
268 static const AtomType kSuffix_{"json"sv};
269 if (suffix == kSuffix_) {
270 return true;
271 }
272 }
273 else if (moreGeneralType == InternetMediaTypes::kXML) {
274 static const AtomType kSuffix_{"xml"sv};
275 if (suffix == kSuffix_) {
276 return true;
277 }
278 }
279 }
280
281 return false;
282 }
283 static void CheckData_ (Synchronized<Data_>::WritableReference* lockedData)
284 {
285 if (lockedData->rwref ().fBackendRep == nullptr) {
286 lockedData->rwref ().fBackendRep = InternetMediaTypeRegistry::DefaultBackend ();
287 }
288 }
289};
290inline InternetMediaTypeRegistry::FrontendRep_ InternetMediaTypeRegistry::kDefaultFrontEndForNoBackend_{nullptr};
291
292/*
293 ********************************************************************************
294 ******************** InternetMediaTypeRegistry::Rep_Cloner_ ********************
295 ********************************************************************************
296 */
297auto InternetMediaTypeRegistry::Rep_Cloner_::operator() (const IFrontendRep_& t) const -> shared_ptr<IFrontendRep_>
298{
299 return make_shared<FrontendRep_> (t.GetBackendRep (), t.GetOverrides ());
300};
301
302/*
303 ********************************************************************************
304 *************************** InternetMediaTypeRegistry **************************
305 ********************************************************************************
306 */
307InternetMediaTypeRegistry::InternetMediaTypeRegistry (const shared_ptr<IBackendRep>& backendRep)
308 // note because can be constructed before main () - not safe to make_shared<FrontendRep_> - so delay construction and use kDefaultFrontEndForNoBackend_ if needed
309 : fFrontEndRep_{backendRep == nullptr ? nullptr : make_shared<FrontendRep_> (backendRep)}
310{
311}
312
314{
315 return NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetOverrides ();
316}
317
319{
320 if (fFrontEndRep_ == nullptr) {
321 fFrontEndRep_ = make_shared<FrontendRep_> (kDefaultFrontEndForNoBackend_);
322 }
323 AssertNotNull (fFrontEndRep_);
324 fFrontEndRep_->SetOverrides (overrides);
325}
326
328{
329 if (fFrontEndRep_ == nullptr) {
330 fFrontEndRep_ = make_shared<FrontendRep_> (kDefaultFrontEndForNoBackend_);
331 }
332 AssertNotNull (fFrontEndRep_);
333 fFrontEndRep_->AddOverride (mediaType, overrideRec);
334}
335
336optional<InternetMediaTypeRegistry::FileSuffixType> InternetMediaTypeRegistry::GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const
337{
338 return Memory::NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetPreferredAssociatedFileSuffix (ct);
339}
340
342{
344 r = Memory::NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetAssociatedFileSuffixes (ct);
345 // if a MediaType has a builtin suffix, include that as well...
346 if (auto os = ct.GetSuffix<String> ()) {
347 r += *os;
348 }
349 return r;
350}
351
353{
354 return Memory::NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetAssociatedPrettyName (ct);
355}
356
357shared_ptr<InternetMediaTypeRegistry::IBackendRep> InternetMediaTypeRegistry::DefaultBackend ()
358{
359 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::DefaultBackend"};
360#if qStroika_Foundation_Common_Platform_Windows
361 return WindowsRegistryDefaultBackend ();
362#endif
363 // @todo fix for MacOS - which doesn't support these - http://stroika-bugs.sophists.com/browse/STK-795
364 if (filesystem::exists ("/usr/share/mime"sv)) {
365 try {
366 return UsrSharedDefaultBackend ();
367 }
368 catch (...) {
369 // LOG/WRN
370 }
371 }
372 if (filesystem::exists ("/etc/mime.types"sv)) {
373 try {
375 }
376 catch (...) {
377 // LOG/WRN
378 }
379 }
380 return BakedInDefaultBackend (); // always works (but sucks)
381}
382
384{
385 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::EtcMimeTypesDefaultBackend"};
386 /*
387 * Use the file /etc/mime.types
388 *
389 * not sure this is useful - not sure who uses it that doesn't support /usr/share/mime...
390 *
391 * Preload the entire DB since its not practical to scan looking for the intended record (due to the time this would take).
392 */
393 struct EtcMimeTypesRep_ : IBackendRep {
394 Mapping<FileSuffixType, InternetMediaType> fSuffix2MediaTypeMap_;
395 Mapping<InternetMediaType, FileSuffixType> fMediaType2PreferredSuffixMap_;
397
398 EtcMimeTypesRep_ ()
399 {
400#if USE_NOISY_TRACE_IN_THIS_MODULE_
401 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::{}::EtcMimeTypesRep_::CTOR"};
402#endif
404 IO::FileSystem::FileInputStream::New ("/etc/mime.types"sv))) {
405 if (line.length () >= 2 and not line[0].StartsWith ("#"_k)) {
407 try {
408 ct = InternetMediaType{line[0]};
409 }
410 catch (...) {
411 DbgTrace ("Ignoring exception looking parsing potential media type entry ({}): {}"_f, line[0], current_exception ());
412 }
413 // a line starts with a content type, but then contains any number of file suffixes (without the leading .)
415 for (size_t i = 1; i < line.length (); ++i) {
416 Assert (not line[i].empty ());
417 String suffix = "."sv + line[i];
418 fSuffix2MediaTypeMap_.Add (suffix, ct);
419 fMediaType2PreferredSuffixMap_.Add (ct, suffix, AddReplaceMode::eAddIfMissing);
420 fileSuffixes.Add (suffix);
421 }
422 fMediaType2SuffixesMap_.Add (ct, fileSuffixes);
423 }
424 }
425 // Because on raspberrypi/debian, this comes out with a crazy default for text\plain -- LGP 2020-07-27
426 fMediaType2PreferredSuffixMap_.Add (InternetMediaTypes::kText_PLAIN, ".txt"sv);
427#if USE_NOISY_TRACE_IN_THIS_MODULE_
428 DbgTrace (L"succeeded with {} fSuffix2MediaTypeMap entries, and {} fMediaType2PreferredSuffixMap entries"_f,
429 fSuffix2MediaTypeMap_.size (), fMediaType2PreferredSuffixMap_.size ());
430#endif
431 }
432 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const override
433 {
435 for (const InternetMediaType& imt : fMediaType2PreferredSuffixMap_.Keys ()) {
436 if (majorType != nullopt and (imt.GetType<InternetMediaType::AtomType> () != *majorType)) {
437 continue;
438 }
439 results += imt;
440 }
441 return results;
442 }
443 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const override
444 {
445 if (auto o = fMediaType2PreferredSuffixMap_.Lookup (ct)) {
446 return *o;
447 }
448 return nullopt;
449 }
450 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const override
451 {
452 if (auto i = fMediaType2SuffixesMap_.Lookup (ct)) {
453 return *i;
454 }
456 }
457 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& /*ct*/) const override
458 {
459 return nullopt; // not supported in this file
460 }
461 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileSuffix) const override
462 {
463 Require (fileSuffix[0] == '.');
464 if (auto o = fSuffix2MediaTypeMap_.Lookup (fileSuffix)) {
465 return *o;
466 }
467 return nullopt;
468 }
469 };
470 return make_shared<EtcMimeTypesRep_> ();
471}
472
474{
475 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::UsrSharedDefaultBackend"};
476 /*
477 * Documented to some small degree in https://www.linuxtopia.org/online_books/linux_desktop_guides/gnome_2.14_admin_guide/mimetypes-database.html
478 */
479 struct UsrShareMIMERep_ : IBackendRep {
480 Iterable<filesystem::path> fDataRoots_{"~/.local/share/mime/"sv, "/usr/local/share/mime/"sv, "/usr/share/mime"sv};
481
482 /*
483 * NOTE - for fSuffix2MediaTypeMap_ and fMediaType2PreferredSuffixMap, we cannot use Bijection,
484 * because multiple media-types can map to a single filetype and not all mediatypes have a filetype.
485 *
486 * We CANNOT use a cache, or dynamically fetch this data from files, because the data for each file suffix
487 * is not indexed (by file suffix) - it is indexed by content type (so those lookups COULD be dynamic). But
488 * we can easily construct both at the same time reading the summary file, so we do.
489 */
490 Mapping<FileSuffixType, InternetMediaType> fSuffix2MediaTypeMap_;
491 Mapping<InternetMediaType, FileSuffixType> fMediaType2PreferredSuffixMap_;
493
494 mutable Synchronized<Mapping<InternetMediaType, String>> fMediaType2PrettyNameCache; // incrementally build as needed
495
496 UsrShareMIMERep_ ()
497 {
498#if USE_NOISY_TRACE_IN_THIS_MODULE_
499 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::{}UsrShareMIMERep_::CTOR"};
500#endif
501 // @todo consider using globs2 file support, but little point since they seem to be written in priority order
502 auto loadGlobsFromFile = [&] (const filesystem::path& fn) {
503 if (filesystem::exists (fn)) {
504 Debug::TraceContextBumper ctx1{"UsrShareMIMERep_::CTOR::loadGlobsFromFile", "exists=true,fn={}"_f, fn};
505 try {
506 for (Sequence<String> line :
507 DataExchange::Variant::CharacterDelimitedLines::Reader{{':'}}.ReadMatrix (IO::FileSystem::FileInputStream::New (fn))) {
508 if (line.length () == 2) {
509 String glob = line[1];
510 if (glob.StartsWith ('*')) {
511 glob = glob.SubString (1);
512 }
513 // Use AddReplaceMode::eAddIfMissing - so first (appears empirically to be the preferred value) wins
515 try {
516 imt = InternetMediaType{line[0]};
517 }
518 catch (...) {
519 DbgTrace ("Ignoring exception looking parsing potential media type entry ({}): {}"_f, line[0], current_exception ());
520 }
521 fSuffix2MediaTypeMap_.Add (glob, imt, AddReplaceMode::eAddIfMissing);
522 fMediaType2PreferredSuffixMap_.Add (imt, glob, AddReplaceMode::eAddIfMissing);
523
524 // update the set of mapped suffixes
525 Containers::Set<FileSuffixType> prevSuffixes = fMediaType2SuffixesMap_.LookupValue (imt);
526 prevSuffixes.Add (glob);
527 fMediaType2SuffixesMap_.Add (imt, prevSuffixes);
528 }
529 }
530
531 // Because on raspberrypi/Debian, this comes out with a crazy default for text\plain -- LGP 2020-07-27
532 fMediaType2PreferredSuffixMap_.Add (InternetMediaTypes::kText_PLAIN, ".txt"_k);
533 }
534 catch (...) {
535 // log error
536 }
537 }
538 };
539 // override files loaded first, tied to use of AddReplaceMode::eAddIfMissing - not replacing
540 for (const auto& p : fDataRoots_) {
541 loadGlobsFromFile (p / "globs");
542 }
543
544#if USE_NOISY_TRACE_IN_THIS_MODULE_
545 DbgTrace ("succeeded with {} fSuffix2MediaTypeMap_ entries, and {} fMediaType2PreferredSuffixMap entries"_f,
546 fSuffix2MediaTypeMap_.size (), fMediaType2PreferredSuffixMap_.size ());
547#endif
548 }
549 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const override
550 {
551#if USE_NOISY_TRACE_IN_THIS_MODULE_
552 Debug::TraceContextBumper ctx{Stroika_Foundation_Debug_OptionalizeTraceArgs ("UsrShareMIMERep_::GetMediaTypes", "majorType={}"_f, majorType)};
553#endif
555 for (const auto& imt : fMediaType2PreferredSuffixMap_.Keys ()) {
556 if (majorType) {
557 if (imt.GetType<InternetMediaType::AtomType> () != *majorType) {
558 continue; // skip non-matching types
559 }
560 }
561 results += imt;
562 }
563 return results;
564 }
565 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const override
566 {
567 if (auto o = fMediaType2PreferredSuffixMap_.Lookup (ct)) {
568 return *o;
569 }
570 return nullopt;
571 }
572 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const override
573 {
574 if (auto i = fMediaType2SuffixesMap_.Lookup (ct)) {
575 return *i;
576 }
578 }
579 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& ct) const override
580 {
581 return LookupAndUpdateFromUsrShareMimePrettyName_ (ct);
582 }
583 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileSuffix) const override
584 {
585 Require (fileSuffix[0] == '.');
586 if (auto o = fSuffix2MediaTypeMap_.Lookup (fileSuffix)) {
587 return *o;
588 }
589 return nullopt;
590 }
591 optional<String> LookupAndUpdateFromUsrShareMimePrettyName_ (const InternetMediaType& ct) const
592 {
593#if USE_NOISY_TRACE_IN_THIS_MODULE_
594 Debug::TraceContextBumper ctx{"{}MIMEDB_::LookupAndUpdateFromUsrShareMimePrettyName"};
595#endif
596 // @todo combine lock calls in this procedure
597 if (auto o = fMediaType2PrettyNameCache.cget ()->Lookup (ct)) {
598 return *o;
599 }
600 // SAX parse /usr/share/mime/TYPE/SUBTYPE.xml file and look for <comment> element (default with no language for now)
601 // Simpler - just take the first - seems empirically fine/OK
602#if qStroika_Foundation_DataExchange_XML_SupportParsing
603 try {
604 using Name = StructuredStreamEvents::Name;
605 struct myHander_ : StructuredStreamEvents::IConsumer {
606 optional<String> fResult;
607 bool onContentElt{false};
608 StringBuilder<> fAccum;
609 virtual void StartElement (const Name& name, [[maybe_unused]] const Mapping<Name, String>& attributes) override
610 {
611 if (name == Name{"content"_k} and not fResult.has_value ()) {
612 onContentElt = true;
613 }
614 }
615 virtual void EndElement ([[maybe_unused]] const Name& name) override
616 {
617 if (onContentElt) {
618 Assert (not fResult);
619 fResult = fAccum.str ();
620 }
621 }
622 virtual void TextInsideElement (const String& t) override
623 {
624 if (onContentElt) {
625 fAccum << t;
626 }
627 }
628 };
629 filesystem::path mimeRoot{"/usr/share/mime/"sv};
630 myHander_ handler;
631 // @todo validate ct.GetType () to make sure not a ../../ ATTACK
632 DataExchange::XML::SAXParse (IO::FileSystem::FileInputStream::New (
633 mimeRoot / (ct.GetType () + "/"_k + ct.GetSubType () + ".xml"_k).As<filesystem::path> ()),
634 &handler);
635 if (handler.fResult) {
636 fMediaType2PrettyNameCache.rwget ()->Add (ct, *handler.fResult);
637 return *handler.fResult;
638 }
639 }
640 catch (...) {
641#if USE_NOISY_TRACE_IN_THIS_MODULE_
642 DbgTrace ("failure ignored");
643#endif
644 }
645#else
646 DbgTrace ("/usr/share/mime/ ignored cuz no xml reader - not compiled with Xerces");
647#endif
648 return nullopt;
649 }
650 };
651 return make_shared<UsrShareMIMERep_> ();
652}
653
654auto InternetMediaTypeRegistry::BakedInDefaultBackend () -> shared_ptr<IBackendRep>
655{
656 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::BakedInDefaultBackend"};
657 struct DefaultEmptyBackendRep_ : IBackendRep {
658 virtual Containers::Set<InternetMediaType> GetMediaTypes ([[maybe_unused]] optional<InternetMediaType::AtomType> majorType) const override
659 {
661 }
662 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix ([[maybe_unused]] const InternetMediaType& ct) const override
663 {
664 return nullopt;
665 }
666 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes ([[maybe_unused]] const InternetMediaType& ct) const override
667 {
669 }
670 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& /*ct*/) const override
671 {
672 return nullopt;
673 }
674 virtual optional<InternetMediaType> GetAssociatedContentType ([[maybe_unused]] const FileSuffixType& fileSuffix) const override
675 {
676 Require (fileSuffix[0] == '.');
677 return nullopt;
678 }
679 };
680 return make_shared<DefaultEmptyBackendRep_> ();
681}
682
683#if qStroika_Foundation_Common_Platform_Windows
684auto InternetMediaTypeRegistry::WindowsRegistryDefaultBackend () -> shared_ptr<IBackendRep>
685{
686 /*
687 * I can find no documentation on how this works, but at least https://stackoverflow.com/questions/3442607/mime-types-in-the-windows-registry
688 * mentions it.
689 *
690 * Empirically you can usually find:
691 * HKEY_CLASSES_ROOT\MIME\Database
692 * Content Type\CT\Extension
693 * This layout does not appear to accommodate ever having more than one extension for a given mime type
694 *
695 * HKEY_CLASSES_ROOT\FILE_SUFFIX
696 * {default} pretty name
697 * Content Type: 'internet media type'
698 *
699 * \note On Docker windows server core images, this is often missing! (but addressed with the default values baked into the frontend) -- LGP 2020-07-28
700 */
701 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::WindowsRegistryDefaultBackend"};
702 struct WinRep_ : IBackendRep {
703 // underlying windows code fast so use small cache sizes
704 mutable Cache::SynchronizedLRUCache<FileSuffixType, optional<String>, equal_to<FileSuffixType>, hash<FileSuffixType>> fFileSuffix2PrettyNameCache_{
705 25, 7};
706 mutable Cache::SynchronizedLRUCache<FileSuffixType, optional<InternetMediaType>, equal_to<FileSuffixType>, hash<FileSuffixType>> fSuffix2MediaTypeCache_{
707 25, 7};
708 mutable Cache::SynchronizedLRUCache<InternetMediaType, optional<FileSuffixType>, equal_to<InternetMediaType>, hash<InternetMediaType>> fContentType2FileSuffixCache_{
709 25, 7};
710 mutable Cache::SynchronizedLRUCache<InternetMediaType, Containers::Set<FileSuffixType>, equal_to<InternetMediaType>, hash<InternetMediaType>> fContentType2FileSuffixesCache_{
711 25, 7};
712
713 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const override
714 {
716 //
717 // rarely do we fetch all MIME types, so don't cache - just re-fetch each time
718 //
719 // On Windows, in registry, easiest way appears to be to enumerate ALL registry entries in HKCR that start with .,
720 // and look for sub-field 'Content-type'
721 //
722 using RegistryKey = Common::Platform::Windows::RegistryKey;
723 for (shared_ptr<RegistryKey> sk : RegistryKey{HKEY_CLASSES_ROOT}.EnumerateSubKeys ()) {
724 String name = sk->GetFullPathOfKey ().Tokenize ({'\\'}).LastValue ();
725 if (name.StartsWith ('.')) {
726 if (auto o = sk->Lookup ("Content Type"sv)) {
728 try {
729 imt = InternetMediaType{o.As<String> ()};
730 }
731 catch (...) {
732 // ignore bad format - such as .sqlproj has Content-Type "string" which my read of the RFC says is illegal
733 DbgTrace ("Ignoring exception parsing registry key ({}): {}"_f, o, current_exception ());
734 continue;
735 }
736 if (majorType) {
737 if (imt.GetType<InternetMediaType::AtomType> () != *majorType) {
738 continue; // skip non-matching types
739 }
740 }
741 result.Add (imt);
742 }
743 }
744 }
745 return result;
746 }
747 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const override
748 {
749 return fContentType2FileSuffixCache_.LookupValue (ct, [] (const InternetMediaType& ct) -> optional<FileSuffixType> {
750 if (auto fs = Common::Platform::Windows::RegistryKey{HKEY_CLASSES_ROOT}.Lookup ("MIME\\Database\\Content Type\\{}\\Extension"_f(ct))) {
751 return fs.As<String> ();
752 }
753 return nullopt;
754 });
755 }
756 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const override
757 {
758 // This is expensive to compute, and we could compute all and cache, but I don't think we will need to lookup very often, so just
759 // compute as needed and cache a few
760 return fContentType2FileSuffixesCache_.LookupValue (ct, [] (const InternetMediaType& ct) -> Containers::Set<FileSuffixType> {
763 for (shared_ptr<RegistryKey> sk : RegistryKey{HKEY_CLASSES_ROOT}.EnumerateSubKeys ()) {
764 String name = sk->GetFullPathOfKey ().Tokenize ({'\\'}).LastValue ();
765 if (name.StartsWith ("."_k)) {
766 if (auto o = sk->Lookup ("Content Type"sv)) {
768 try {
769 imt = InternetMediaType{o.As<String> ()};
770 }
771 catch (...) {
772 // ignore bad format - such as .sqlproj has Content-Type "string" which my read of the RFC says is illegal
773 DbgTrace ("Ignoring exception parsing registry key ({}): {}"_f, o, current_exception ());
774 continue;
775 }
776 if (ct.GetType () == imt.GetType () and ct.GetSubType () == imt.GetSubType ()) {
777 result += name;
778 }
779 }
780 }
781 }
782 return result;
783 });
784 }
785 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& ct) const override
786 {
787 if (optional<FileSuffixType> fileSuffix = GetPreferredAssociatedFileSuffix (ct)) {
788 return fFileSuffix2PrettyNameCache_.LookupValue (*fileSuffix, [] (const String& suffix) -> optional<String> {
789 if (auto fileTypeID = Common::Platform::Windows::RegistryKey{HKEY_CLASSES_ROOT}.Lookup (suffix + "\\"_k)) {
790 if (auto prettyName = Common::Platform::Windows::RegistryKey{HKEY_CLASSES_ROOT}.Lookup (fileTypeID.As<String> () + "\\"_k)) {
791 return prettyName.As<String> ();
792 }
793 }
794 return nullopt;
795 });
796 }
797 return nullopt;
798 }
799 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileSuffix) const override
800 {
801 Require (fileSuffix[0] == '.');
802 return fSuffix2MediaTypeCache_.LookupValue (fileSuffix, [] (const FileSuffixType& fileSuffix) -> optional<InternetMediaType> {
804 // only do registry lookup if needed, since (probably) more costly than local map lookup
805 if (auto oct = RegistryKey{HKEY_CLASSES_ROOT}.Lookup ("{}\\Content Type"_f(fileSuffix))) {
806 InternetMediaType mediaType{oct.As<String> ()};
807 return mediaType;
808 }
809 return nullopt;
810 });
811 }
812 };
813 return make_shared<WinRep_> ();
814}
815#endif
816
817Set<InternetMediaType> InternetMediaTypeRegistry::GetMediaTypes () const
818{
819 return NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetMediaTypes (nullopt);
820}
821
822Set<InternetMediaType> InternetMediaTypeRegistry::GetMediaTypes (InternetMediaType::AtomType majorType) const
823{
824 return NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetMediaTypes (majorType);
825}
826
828{
830 for (const auto& ct : mediaTypes) {
831 for (const auto& i : GetAssociatedFileSuffixes (ct)) {
832 result += i;
833 }
834 }
835 return result;
836}
837
838optional<InternetMediaType> InternetMediaTypeRegistry::GetAssociatedContentType (const FileSuffixType& fileSuffix) const
839{
840 if (fileSuffix.empty ()) {
841 return nullopt;
842 }
843 Assert (fileSuffix[0] == '.');
844 return NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetAssociatedContentType (fileSuffix);
845}
846
848{
849 return IsA (InternetMediaTypes::Wildcards::kText, ct);
850}
851
853{
854 return IsA (InternetMediaTypes::Wildcards::kImage, ct);
855}
856
858{
859 return IsA (InternetMediaTypes::kXML, ct);
860}
861
862bool InternetMediaTypeRegistry::IsA (const InternetMediaType& moreGeneralType, const InternetMediaType& moreSpecificType) const
863{
864 using AtomType = InternetMediaType::AtomType;
865 // shortcut this one case
866 if (moreSpecificType.GetType<AtomType> () == moreGeneralType.GetType<AtomType> () and
867 moreSpecificType.GetSubType<AtomType> () == moreGeneralType.GetSubType<AtomType> ()) {
868 return true;
869 }
870 return NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).IsA (moreGeneralType, moreSpecificType);
871}
#define AssertNotNull(p)
Definition Assertions.h:333
const OT & NullCoalesce(const OT &l, const OT &r)
return one of l, or r, with first preference for which is engaged, and second preference for left-to-...
Definition Optional.inl:134
#define DbgTrace
Definition Trace.h:309
#define Stroika_Foundation_Debug_OptionalizeTraceArgs(...)
Definition Trace.h:270
LRUCache implements a simple least-recently-used caching strategy, with optional hashing (of keys) to...
Definition LRUCache.h:94
Similar to String, but intended to more efficiently construct a String. Mutable type (String is large...
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
nonvirtual String SubString(SZ from) const
nonvirtual bool StartsWith(const Character &c, CompareOptions co=eWithCase) const
Definition String.cpp:1059
nonvirtual Containers::Sequence< String > Tokenize() const
Definition String.cpp:1234
nonvirtual DataExchange::VariantValue Lookup(const Characters::String &valuePath) const
Definition Registry.cpp:141
nonvirtual bool Add(ArgByValueType< key_type > key, ArgByValueType< mapped_type > newElt, AddReplaceMode addReplaceMode=AddReplaceMode::eAddReplaces)
Definition Mapping.inl:190
nonvirtual optional< mapped_type > Lookup(ArgByValueType< key_type > key) const
Definition Mapping.inl:144
nonvirtual mapped_type LookupValue(ArgByValueType< key_type > key, ArgByValueType< mapped_type > defaultValue=mapped_type{}) const
Definition Mapping.inl:168
nonvirtual Iterable< key_type > Keys() const
Definition Mapping.inl:113
A generalization of a vector: a container whose elements are keyed by the natural numbers.
Definition Sequence.h:187
Set<T> is a container of T, where once an item is added, additionally adds () do nothing.
Definition Set.h:105
nonvirtual void Add(ArgByValueType< value_type > item)
Definition Set.inl:138
nonvirtual RETURN_TYPE GetType() const
Gets the primary (major) type of the full internet media type (as a string or atom)
nonvirtual T As() const
convert to type T supported types: o String o wstring
nonvirtual optional< RETURN_TYPE > GetSuffix() const
this is the +XXX part of the internet media type (e.g. +xml) and is often omitted (but note this omit...
nonvirtual Mapping< InternetMediaType, OverrideRecord > GetOverrides() const
static shared_ptr< IBackendRep > DefaultBackend()
Generally no need to use this - handled automatically - but returns the default, OS-provided MIME Int...
nonvirtual bool IsXMLFormat(const InternetMediaType &ct) const
static shared_ptr< IBackendRep > UsrSharedDefaultBackend()
Generally no need to use this - handled automatically.
nonvirtual bool IsA(const InternetMediaType &moreGeneralType, const InternetMediaType &moreSpecificType) const
return true if moreSpecificType 'isa' moreGeneralType
nonvirtual void SetOverrides(const Mapping< InternetMediaType, OverrideRecord > &overrides)
nonvirtual optional< InternetMediaType > GetAssociatedContentType(const FileSuffixType &fileSuffix) const
nonvirtual optional< String > GetAssociatedPrettyName(const InternetMediaType &ct) const
static shared_ptr< IBackendRep > EtcMimeTypesDefaultBackend()
Generally no need to use this - handled automatically.
static shared_ptr< IBackendRep > BakedInDefaultBackend()
Generally no need to use this - handled automatically.
InternetMediaTypeRegistry(const shared_ptr< IBackendRep > &backendRep=nullptr)
nonvirtual optional< FileSuffixType > GetPreferredAssociatedFileSuffix(const InternetMediaType &ct) const
nonvirtual Containers::Set< FileSuffixType > GetAssociatedFileSuffixes(const InternetMediaType &ct) const
nonvirtual void AddOverride(const InternetMediaType &mediaType, const OverrideRecord &overrideRec)
bool IsTextFormat(const InternetMediaType &ct) const
returns true if you can expect to treat as some sort of text and reasonably view - like text/html,...
This COULD be easily used to read CSV files, or tab-delimited files, for example.
nonvirtual Iterable< Sequence< String > > ReadMatrix(const Streams::InputStream::Ptr< byte > &in) const
Wrap any object with Synchronized<> and it can be used similarly to the base type,...
nonvirtual WritableReference rwget()
get a read-write smart pointer to the underlying Synchronized<> object, holding the full lock the who...
nonvirtual ReadableReference cget() const
get a read-only smart pointer to the underlying Synchronized<> object, holding the readlock the whole...
Iterable<T> is a base class for containers which easily produce an Iterator<T> to traverse them.
Definition Iterable.h:237
nonvirtual void Apply(const function< void(ArgByValueType< T > item)> &doToElement, Execution::SequencePolicy seq=Execution::SequencePolicy::eDEFAULT) const
Run the argument function (or lambda) on each element of the container.
nonvirtual size_t size() const
Returns the number of items contained.
Definition Iterable.inl:300
const InternetMediaType::AtomType kApplication
'application'
for OS facilities not updatable - or controllable - just usable.