Stroika Library 3.0d23
 
Loading...
Searching...
No Matches
InternetMediaTypeRegistry.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2026. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
6#include <filesystem>
7
8#include "Stroika/Foundation/Cache/SynchronizedLRUCache.h"
13#if qStroika_Foundation_Common_Platform_Windows
14#include "Stroika/Foundation/Common/Platform/Windows/Registry.h"
15#endif
17#if qStroika_Foundation_Common_Platform_Windows
18#include "Stroika/Foundation/Execution/Platform/Windows/Exception.h"
19#endif
23
25
26using namespace Stroika::Foundation;
30using namespace Stroika::Foundation::Execution;
31
32using Memory::MakeSharedPtr;
33using Memory::NullCoalesce;
34
35// Comment this in to turn on aggressive noisy DbgTrace in this module
36//#define USE_NOISY_TRACE_IN_THIS_MODULE_ 1
37
39
40/*
41 ********************************************************************************
42 ****************** InternetMediaTypeRegistry::OverrideRecord *******************
43 ********************************************************************************
44 */
46{
48 sb << "{"sv;
49 if (fTypePrintName) {
50 sb << "TypePrintName: " << fTypePrintName;
51 }
52 if (fFileSuffixes) {
53 sb << ", FileSuffixes: " << fFileSuffixes;
54 }
55 if (fPreferredSuffix) {
56 sb << ", PreferredSuffix: " << fPreferredSuffix;
57 }
58 sb << "}"sv;
59 return sb;
60}
61
62/*
63 ********************************************************************************
64 ******************** InternetMediaTypeRegistry::FrontendRep_ *******************
65 ********************************************************************************
66 */
67
68/**
69 * @todo NYI UPDATING the frontend. Implement APIs to externally add mappings and be sure copying the InternetMediaTypeRegistry and using that
70 * in isolation works as well (use COW)
71 *
72 * \note - Structurally, we do caching in the backend (as needed) because each backend stores data differently
73 * and the logic of what makes sense to cache changes.
74 *
75 * The frontend simply defines 'API-Driven OVERRIDES' of the values returned. (so far not fully implemented - no setters/manipulators)
76 */
77struct InternetMediaTypeRegistry::FrontendRep_ : InternetMediaTypeRegistry::IFrontendRep_ {
78
80
81 // Baked in predefined initial user-overrides.
82 // These are adjustable by API, serve the purpose of providing a default on systems with no MIME content database -- LGP 2020-07-27
83
84 static inline const Mapping<InternetMediaType, OverrideRecord> kDefaults_{initializer_list<KeyValuePair<InternetMediaType, OverrideRecord>>{
85 {InternetMediaTypes::kText_PLAIN, OverrideRecord{nullopt, Containers::Set<String>{".txt"sv}, ".txt"sv}},
86 {InternetMediaTypes::kCSS, OverrideRecord{nullopt, Containers::Set<String>{".css"sv}, ".css"sv}},
87 {InternetMediaTypes::kHTML, OverrideRecord{nullopt, Containers::Set<String>{".htm"sv, ".html"sv}, ".htm"sv}},
88 {InternetMediaTypes::kJavascript, OverrideRecord{nullopt, Containers::Set<String>{".js"sv}, ".js"sv}},
89 {InternetMediaTypes::kJSON, OverrideRecord{nullopt, Containers::Set<String>{".json"sv}, ".json"sv}},
90 {InternetMediaTypes::kPNG, OverrideRecord{nullopt, Containers::Set<String>{".png"sv}, ".png"sv}},
91 {InternetMediaTypes::kXML, OverrideRecord{nullopt, Containers::Set<String>{".xml"sv}, ".xml"sv}},
92 }};
93
94 // OVERRIDE values (take precedence over backend) and any other data we need to keep locked (synchronized)
95 struct Data_ {
96 shared_ptr<IBackendRep> fBackendRep; // lazy construct on first call to usage (since that construction can be slow)
97
100 };
101 mutable Synchronized<Data_> fData_;
102
103 // NULL backendRep IS allowed - use that to on-demand construct the backend
104 FrontendRep_ (const shared_ptr<IBackendRep>& backendRep)
105 : FrontendRep_{backendRep, kDefaults_}
106 {
107 }
108 FrontendRep_ (const shared_ptr<IBackendRep>& backendRep, const Mapping<InternetMediaType, OverrideRecord>& overrides)
109 : fData_{Data_{.fBackendRep = backendRep}}
110 {
111 SetOverrides (overrides);
112 }
113 virtual Mapping<InternetMediaType, OverrideRecord> GetOverrides () const override
114 {
115 auto lockedData = fData_.rwget ();
116 return lockedData->fOverrides;
117 }
118 virtual void SetOverrides (const Mapping<InternetMediaType, OverrideRecord>& overrides) override
119 {
120 auto lockedData = fData_.rwget ();
121 lockedData->fOverrides = overrides;
122 lockedData->fSuffix2MediaTypeMap.clear ();
123 for (const auto& i : lockedData->fOverrides) {
124 if (i.fValue.fFileSuffixes) {
125 for (const auto& si : *i.fValue.fFileSuffixes) {
126 lockedData->fSuffix2MediaTypeMap.Add (si, i.fKey, AddReplaceMode::eAddIfMissing);
127 }
128 }
129 }
130 }
131 virtual void AddOverride (const InternetMediaType& mediaType, const OverrideRecord& overrideRec) override
132 {
133 auto lockedData = fData_.rwget ();
134 lockedData->fOverrides.Add (mediaType, overrideRec);
135 lockedData->fSuffix2MediaTypeMap.clear ();
136 for (const auto& i : lockedData->fOverrides) {
137 if (i.fValue.fFileSuffixes) {
138 for (const auto& si : *i.fValue.fFileSuffixes) {
139 lockedData->fSuffix2MediaTypeMap.Add (si, i.fKey, AddReplaceMode::eAddIfMissing);
140 }
141 }
142 }
143 }
144 virtual shared_ptr<IBackendRep> GetBackendRep () const override
145 {
146 auto lockedData = fData_.rwget ();
147 return lockedData->fBackendRep;
148 }
149 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const override
150 {
151 using AtomType = InternetMediaType::AtomType;
152 auto lockedData = fData_.rwget ();
153 CheckData_ (&lockedData);
154 Containers::Set<InternetMediaType> result = lockedData->fBackendRep->GetMediaTypes (majorType);
155 if (majorType == nullopt) {
156 result += lockedData->fOverrides.Keys ();
157 }
158 else {
159 lockedData->fOverrides.Keys ().Apply ([&] (const InternetMediaType& i) {
160 if (i.GetType<AtomType> () == majorType) {
161 result += i;
162 }
163 });
164 }
165 return result;
166 }
167 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const override
168 {
169 auto lockedData = fData_.rwget ();
170 CheckData_ (&lockedData);
171 if (auto o = lockedData->fOverrides.Lookup (ct)) {
172 if (o->fPreferredSuffix) {
173 return *o->fPreferredSuffix;
174 }
175 }
176 return lockedData->fBackendRep->GetPreferredAssociatedFileSuffix (ct);
177 }
178 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const override
179 {
180 auto lockedData = fData_.rwget ();
181 CheckData_ (&lockedData);
182 Containers::Set<String> result = lockedData->fOverrides.LookupValue (ct).fFileSuffixes.value_or (Containers::Set<FileSuffixType>{});
183 result += lockedData->fBackendRep->GetAssociatedFileSuffixes (ct);
184 return result;
185 }
186 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& ct) const override
187 {
188 auto lockedData = fData_.rwget ();
189 CheckData_ (&lockedData);
190 if (auto o = lockedData->fOverrides.Lookup (ct)) {
191 if (o->fTypePrintName) {
192 return *o->fTypePrintName;
193 }
194 }
195 return lockedData->fBackendRep->GetAssociatedPrettyName (ct);
196 }
197 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileSuffix) const override
198 {
199 Require (fileSuffix[0] == '.');
200 auto lockedData = fData_.rwget ();
201 CheckData_ (&lockedData);
202 if (auto o = lockedData->fSuffix2MediaTypeMap.Lookup (fileSuffix)) {
203 return *o;
204 }
205 return lockedData->fBackendRep->GetAssociatedContentType (fileSuffix);
206 }
207 virtual bool IsA (const InternetMediaType& moreGeneralType, const InternetMediaType& moreSpecificType) const override
208 {
209 /**
210 * Generally simple to compare because AtomType code and parser handle case and breaking off bits like +xml, and ; parameters
211 *
212 * Only trick is that no good way to tell more general relationships between types, but doesn't appear well defined (like CCR is a kind of XML).
213 */
214 using AtomType = InternetMediaType::AtomType;
215 AtomType generalType = moreGeneralType.GetType<AtomType> ();
216 AtomType generalSubType = moreGeneralType.GetSubType<AtomType> ();
217 AtomType specificType = moreSpecificType.GetType<AtomType> ();
218 AtomType specificSubType = moreSpecificType.GetSubType<AtomType> ();
219
220 if (specificType == generalType and specificSubType == generalSubType) {
221 return true;
222 }
223
224 // Handle wildcard 'moreGeneralType' - if its exactly Type/'empty' - treating empty as wildcard for IsA...
225 if (moreGeneralType == InternetMediaType{generalType, {}}) {
226 if (specificType == generalType) {
227 return true;
228 }
229 }
230
231 // @todo find a better way - generalize... But for now - Stroika v3.0d12x... - just copy old logic for a bunch of special cases we had - then later
232 // maybe add "override" records for this too....
233 if (moreGeneralType == InternetMediaTypes::Wildcards::kText) {
234 if (IsA (InternetMediaTypes::kXML, moreSpecificType)) {
235 return true;
236 }
237 if (IsA (InternetMediaTypes::kJSON, moreSpecificType)) {
238 return true;
239 }
240 // well known types that can be treated as text (@todo need some way to extend this API)? - Maybe not here but in REGISTRY
241 if (specificType == InternetMediaTypes::Types::kApplication) {
242 Assert (InternetMediaTypes::kRTF.GetType<AtomType> () == InternetMediaTypes::Types::kApplication);
243 if (specificSubType == InternetMediaTypes::kRTF.GetSubType<AtomType> ()) {
244 return true;
245 }
246 }
247 }
248 else if (moreGeneralType == InternetMediaTypes::kXML) {
249 if (specificType == InternetMediaTypes::Types::kApplication) {
250 Assert (InternetMediaTypes::kXML.GetType<AtomType> () == InternetMediaTypes::Types::kApplication);
251 if (specificSubType == InternetMediaTypes::kXML.GetSubType<AtomType> ()) {
252 return true;
253 }
254 Assert (InternetMediaTypes::kXSLT.GetType<AtomType> () == InternetMediaTypes::Types::kApplication);
255 if (specificSubType == InternetMediaTypes::kXSLT.GetSubType<AtomType> ()) {
256 return true;
257 }
258 }
259 if (specificType == InternetMediaTypes::Types::kText) {
260 static const AtomType kXMLAtom_ = "xml"sv;
261 if (specificSubType == kXMLAtom_) {
262 return true;
263 }
264 }
265 }
266
267 // look for suffixes
268 if (auto suffix = moreSpecificType.GetSuffix<AtomType> ()) {
269 if (moreGeneralType == InternetMediaTypes::kJSON) {
270 static const AtomType kSuffix_{"json"sv};
271 if (suffix == kSuffix_) {
272 return true;
273 }
274 }
275 else if (moreGeneralType == InternetMediaTypes::kXML) {
276 static const AtomType kSuffix_{"xml"sv};
277 if (suffix == kSuffix_) {
278 return true;
279 }
280 }
281 }
282
283 return false;
284 }
285 static void CheckData_ (Synchronized<Data_>::WritableReference* lockedData)
286 {
287 if (lockedData->rwref ().fBackendRep == nullptr) {
288 lockedData->rwref ().fBackendRep = InternetMediaTypeRegistry::DefaultBackend ();
289 }
290 }
291};
292inline InternetMediaTypeRegistry::FrontendRep_ InternetMediaTypeRegistry::kDefaultFrontEndForNoBackend_{nullptr};
293
294/*
295 ********************************************************************************
296 *************************** InternetMediaTypeRegistry **************************
297 ********************************************************************************
298 */
299InternetMediaTypeRegistry::InternetMediaTypeRegistry (const shared_ptr<IBackendRep>& backendRep)
300 // note because can be constructed before main () - not safe to Memory::MakeSharedPtr<FrontendRep_> - so delay construction and use kDefaultFrontEndForNoBackend_ if needed
301 : fFrontEndRep_{backendRep == nullptr ? nullptr : MakeSharedPtr<FrontendRep_> (backendRep)}
302{
303}
304
306{
307 return NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetOverrides ();
308}
309
311{
312 if (fFrontEndRep_ == nullptr) {
313 fFrontEndRep_ = MakeSharedPtr<FrontendRep_> (kDefaultFrontEndForNoBackend_);
314 }
315 AssertNotNull (fFrontEndRep_);
316 fFrontEndRep_->SetOverrides (overrides);
317}
318
320{
321 if (fFrontEndRep_ == nullptr) {
322 fFrontEndRep_ = MakeSharedPtr<FrontendRep_> (kDefaultFrontEndForNoBackend_);
323 }
324 AssertNotNull (fFrontEndRep_);
325 fFrontEndRep_->AddOverride (mediaType, overrideRec);
326}
327
328optional<InternetMediaTypeRegistry::FileSuffixType> InternetMediaTypeRegistry::GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const
329{
330 return Memory::NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetPreferredAssociatedFileSuffix (ct);
331}
332
334{
336 r = Memory::NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetAssociatedFileSuffixes (ct);
337 // if a MediaType has a builtin suffix, include that as well...
338 if (auto os = ct.GetSuffix<String> ()) {
339 r += *os;
340 }
341 return r;
342}
343
345{
346 return Memory::NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetAssociatedPrettyName (ct);
347}
348
349shared_ptr<InternetMediaTypeRegistry::IBackendRep> InternetMediaTypeRegistry::DefaultBackend ()
350{
351 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::DefaultBackend"};
352#if qStroika_Foundation_Common_Platform_Windows
353 return WindowsRegistryDefaultBackend ();
354#endif
355 // @todo fix for MacOS - which doesn't support these - http://stroika-bugs.sophists.com/browse/STK-795
356 if (filesystem::exists ("/usr/share/mime"sv)) {
357 try {
358 return UsrSharedDefaultBackend ();
359 }
360 catch (...) {
361 // LOG/WRN
362 }
363 }
364 if (filesystem::exists ("/etc/mime.types"sv)) {
365 try {
367 }
368 catch (...) {
369 // LOG/WRN
370 }
371 }
372 return BakedInDefaultBackend (); // always works (but sucks)
373}
374
376{
377 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::EtcMimeTypesDefaultBackend"};
378 /*
379 * Use the file /etc/mime.types
380 *
381 * not sure this is useful - not sure who uses it that doesn't support /usr/share/mime...
382 *
383 * Preload the entire DB since its not practical to scan looking for the intended record (due to the time this would take).
384 */
385 struct EtcMimeTypesRep_ : IBackendRep {
386 Mapping<FileSuffixType, InternetMediaType> fSuffix2MediaTypeMap_;
387 Mapping<InternetMediaType, FileSuffixType> fMediaType2PreferredSuffixMap_;
389
390 EtcMimeTypesRep_ ()
391 {
392#if USE_NOISY_TRACE_IN_THIS_MODULE_
393 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::{}::EtcMimeTypesRep_::CTOR"};
394#endif
396 IO::FileSystem::FileInputStream::New ("/etc/mime.types"sv))) {
397 if (line.length () >= 2 and not line[0].StartsWith ("#"_k)) {
399 try {
400 ct = InternetMediaType{line[0]};
401 }
402 catch (...) {
403 DbgTrace ("Ignoring exception looking parsing potential media type entry ({}): {}"_f, line[0], current_exception ());
404 }
405 // a line starts with a content type, but then contains any number of file suffixes (without the leading .)
407 for (size_t i = 1; i < line.length (); ++i) {
408 if (line[i].empty ()) {
409 DbgTrace ("Ignoring bad looking parsing potential media type entry ({})"_f, line);
410 }
411 else {
412 Assert (not line[i].empty ());
413 String suffix = "."sv + line[i];
414 fSuffix2MediaTypeMap_.Add (suffix, ct);
415 fMediaType2PreferredSuffixMap_.Add (ct, suffix, AddReplaceMode::eAddIfMissing);
416 fileSuffixes.Add (suffix);
417 }
418 }
419 fMediaType2SuffixesMap_.Add (ct, fileSuffixes);
420 }
421 }
422 // Because on raspberrypi/debian, this comes out with a crazy default for text\plain -- LGP 2020-07-27
423 fMediaType2PreferredSuffixMap_.Add (InternetMediaTypes::kText_PLAIN, ".txt"sv);
424#if USE_NOISY_TRACE_IN_THIS_MODULE_
425 DbgTrace (L"succeeded with {} fSuffix2MediaTypeMap entries, and {} fMediaType2PreferredSuffixMap entries"_f,
426 fSuffix2MediaTypeMap_.size (), fMediaType2PreferredSuffixMap_.size ());
427#endif
428 }
429 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const override
430 {
432 for (const InternetMediaType& imt : fMediaType2PreferredSuffixMap_.Keys ()) {
433 if (majorType != nullopt and (imt.GetType<InternetMediaType::AtomType> () != *majorType)) {
434 continue;
435 }
436 results += imt;
437 }
438 return results;
439 }
440 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const override
441 {
442 if (auto o = fMediaType2PreferredSuffixMap_.Lookup (ct)) {
443 return *o;
444 }
445 return nullopt;
446 }
447 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const override
448 {
449 if (auto i = fMediaType2SuffixesMap_.Lookup (ct)) {
450 return *i;
451 }
453 }
454 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& /*ct*/) const override
455 {
456 return nullopt; // not supported in this file
457 }
458 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileSuffix) const override
459 {
460 Require (fileSuffix[0] == '.');
461 if (auto o = fSuffix2MediaTypeMap_.Lookup (fileSuffix)) {
462 return *o;
463 }
464 return nullopt;
465 }
466 };
467 return MakeSharedPtr<EtcMimeTypesRep_> ();
468}
469
471{
472 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::UsrSharedDefaultBackend"};
473 /*
474 * Documented to some small degree in https://www.linuxtopia.org/online_books/linux_desktop_guides/gnome_2.14_admin_guide/mimetypes-database.html
475 */
476 struct UsrShareMIMERep_ : IBackendRep {
477 Iterable<filesystem::path> fDataRoots_{"~/.local/share/mime/"sv, "/usr/local/share/mime/"sv, "/usr/share/mime"sv};
478
479 /*
480 * NOTE - for fSuffix2MediaTypeMap_ and fMediaType2PreferredSuffixMap, we cannot use Bijection,
481 * because multiple media-types can map to a single filetype and not all mediatypes have a filetype.
482 *
483 * We CANNOT use a cache, or dynamically fetch this data from files, because the data for each file suffix
484 * is not indexed (by file suffix) - it is indexed by content type (so those lookups COULD be dynamic). But
485 * we can easily construct both at the same time reading the summary file, so we do.
486 */
487 Mapping<FileSuffixType, InternetMediaType> fSuffix2MediaTypeMap_;
488 Mapping<InternetMediaType, FileSuffixType> fMediaType2PreferredSuffixMap_;
490
491 mutable Synchronized<Mapping<InternetMediaType, String>> fMediaType2PrettyNameCache; // incrementally build as needed
492
493 UsrShareMIMERep_ ()
494 {
495#if USE_NOISY_TRACE_IN_THIS_MODULE_
496 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::{}UsrShareMIMERep_::CTOR"};
497#endif
498 // @todo consider using globs2 file support, but little point since they seem to be written in priority order
499 auto loadGlobsFromFile = [&] (const filesystem::path& fn) {
500 if (filesystem::exists (fn)) {
501 Debug::TraceContextBumper ctx1{"UsrShareMIMERep_::CTOR::loadGlobsFromFile", "exists=true,fn={}"_f, fn};
502 try {
503 for (Sequence<String> line :
504 DataExchange::Variant::CharacterDelimitedLines::Reader{{':'}}.ReadMatrix (IO::FileSystem::FileInputStream::New (fn))) {
505 if (line.length () == 2) {
506 String glob = line[1];
507 if (glob.StartsWith ('*')) {
508 glob = glob.SubString (1);
509 }
510 // Use AddReplaceMode::eAddIfMissing - so first (appears empirically to be the preferred value) wins
512 try {
513 imt = InternetMediaType{line[0]};
514 }
515 catch (...) {
516 DbgTrace ("Ignoring exception looking parsing potential media type entry ({}): {}"_f, line[0], current_exception ());
517 }
518 fSuffix2MediaTypeMap_.Add (glob, imt, AddReplaceMode::eAddIfMissing);
519 fMediaType2PreferredSuffixMap_.Add (imt, glob, AddReplaceMode::eAddIfMissing);
520
521 // update the set of mapped suffixes
522 Containers::Set<FileSuffixType> prevSuffixes = fMediaType2SuffixesMap_.LookupValue (imt);
523 prevSuffixes.Add (glob);
524 fMediaType2SuffixesMap_.Add (imt, prevSuffixes);
525 }
526 }
527
528 // Because on raspberrypi/Debian, this comes out with a crazy default for text\plain -- LGP 2020-07-27
529 fMediaType2PreferredSuffixMap_.Add (InternetMediaTypes::kText_PLAIN, ".txt"_k);
530 }
531 catch (...) {
532 // log error
533 }
534 }
535 };
536 // override files loaded first, tied to use of AddReplaceMode::eAddIfMissing - not replacing
537 for (const auto& p : fDataRoots_) {
538 loadGlobsFromFile (p / "globs");
539 }
540
541#if USE_NOISY_TRACE_IN_THIS_MODULE_
542 DbgTrace ("succeeded with {} fSuffix2MediaTypeMap_ entries, and {} fMediaType2PreferredSuffixMap entries"_f,
543 fSuffix2MediaTypeMap_.size (), fMediaType2PreferredSuffixMap_.size ());
544#endif
545 }
546 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const override
547 {
548#if USE_NOISY_TRACE_IN_THIS_MODULE_
549 Debug::TraceContextBumper ctx{Stroika_Foundation_Debug_OptionalizeTraceArgs ("UsrShareMIMERep_::GetMediaTypes", "majorType={}"_f, majorType)};
550#endif
552 for (const auto& imt : fMediaType2PreferredSuffixMap_.Keys ()) {
553 if (majorType) {
554 if (imt.GetType<InternetMediaType::AtomType> () != *majorType) {
555 continue; // skip non-matching types
556 }
557 }
558 results += imt;
559 }
560 return results;
561 }
562 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const override
563 {
564 if (auto o = fMediaType2PreferredSuffixMap_.Lookup (ct)) {
565 return *o;
566 }
567 return nullopt;
568 }
569 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const override
570 {
571 if (auto i = fMediaType2SuffixesMap_.Lookup (ct)) {
572 return *i;
573 }
575 }
576 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& ct) const override
577 {
578 return LookupAndUpdateFromUsrShareMimePrettyName_ (ct);
579 }
580 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileSuffix) const override
581 {
582 Require (fileSuffix[0] == '.');
583 if (auto o = fSuffix2MediaTypeMap_.Lookup (fileSuffix)) {
584 return *o;
585 }
586 return nullopt;
587 }
588 optional<String> LookupAndUpdateFromUsrShareMimePrettyName_ (const InternetMediaType& ct) const
589 {
590#if USE_NOISY_TRACE_IN_THIS_MODULE_
591 Debug::TraceContextBumper ctx{"{}MIMEDB_::LookupAndUpdateFromUsrShareMimePrettyName"};
592#endif
593 // @todo combine lock calls in this procedure
594 if (auto o = fMediaType2PrettyNameCache.cget ()->Lookup (ct)) {
595 return *o;
596 }
597 // SAX parse /usr/share/mime/TYPE/SUBTYPE.xml file and look for <comment> element (default with no language for now)
598 // Simpler - just take the first - seems empirically fine/OK
599#if qStroika_Foundation_DataExchange_XML_SupportParsing
600 try {
601 using Name = StructuredStreamEvents::Name;
602 struct myHander_ : StructuredStreamEvents::IConsumer {
603 optional<String> fResult;
604 bool onContentElt{false};
605 StringBuilder<> fAccum;
606 virtual void StartElement (const Name& name, [[maybe_unused]] const Mapping<Name, String>& attributes) override
607 {
608 if (name == Name{"content"_k} and not fResult.has_value ()) {
609 onContentElt = true;
610 }
611 }
612 virtual void EndElement ([[maybe_unused]] const Name& name) override
613 {
614 if (onContentElt) {
615 Assert (not fResult);
616 fResult = fAccum.str ();
617 }
618 }
619 virtual void TextInsideElement (const String& t) override
620 {
621 if (onContentElt) {
622 fAccum << t;
623 }
624 }
625 };
626 filesystem::path mimeRoot{"/usr/share/mime/"sv};
627 myHander_ handler;
628 // @todo validate ct.GetType () to make sure not a ../../ ATTACK
629 DataExchange::XML::SAXParse (IO::FileSystem::FileInputStream::New (
630 mimeRoot / (ct.GetType () + "/"_k + ct.GetSubType () + ".xml"_k).As<filesystem::path> ()),
631 &handler);
632 if (handler.fResult) {
633 fMediaType2PrettyNameCache.rwget ()->Add (ct, *handler.fResult);
634 return *handler.fResult;
635 }
636 }
637 catch (...) {
638#if USE_NOISY_TRACE_IN_THIS_MODULE_
639 DbgTrace ("failure ignored"_f);
640#endif
641 }
642#else
643 DbgTrace ("/usr/share/mime/ ignored cuz no xml reader - not compiled with libxml2 or Xerces"_f);
644#endif
645 return nullopt;
646 }
647 };
648 return MakeSharedPtr<UsrShareMIMERep_> ();
649}
650
651auto InternetMediaTypeRegistry::BakedInDefaultBackend () -> shared_ptr<IBackendRep>
652{
653 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::BakedInDefaultBackend"};
654 struct DefaultEmptyBackendRep_ : IBackendRep {
655 virtual Containers::Set<InternetMediaType> GetMediaTypes ([[maybe_unused]] optional<InternetMediaType::AtomType> majorType) const override
656 {
658 }
659 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix ([[maybe_unused]] const InternetMediaType& ct) const override
660 {
661 return nullopt;
662 }
663 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes ([[maybe_unused]] const InternetMediaType& ct) const override
664 {
666 }
667 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& /*ct*/) const override
668 {
669 return nullopt;
670 }
671 virtual optional<InternetMediaType> GetAssociatedContentType ([[maybe_unused]] const FileSuffixType& fileSuffix) const override
672 {
673 Require (fileSuffix[0] == '.');
674 return nullopt;
675 }
676 };
677 return MakeSharedPtr<DefaultEmptyBackendRep_> ();
678}
679
680auto InternetMediaTypeRegistry::CloneAsSharedPtr_ (const IFrontendRep_& t) -> shared_ptr<IFrontendRep_>
681{
682 return Memory::MakeSharedPtr<FrontendRep_> (t.GetBackendRep (), t.GetOverrides ());
683}
684
685#if qStroika_Foundation_Common_Platform_Windows
686auto InternetMediaTypeRegistry::WindowsRegistryDefaultBackend () -> shared_ptr<IBackendRep>
687{
688 /*
689 * I can find no documentation on how this works, but at least https://stackoverflow.com/questions/3442607/mime-types-in-the-windows-registry
690 * mentions it.
691 *
692 * Empirically you can usually find:
693 * HKEY_CLASSES_ROOT\MIME\Database
694 * Content Type\CT\Extension
695 * This layout does not appear to accommodate ever having more than one extension for a given mime type
696 *
697 * HKEY_CLASSES_ROOT\FILE_SUFFIX
698 * {default} pretty name
699 * Content Type: 'internet media type'
700 *
701 * \note On Docker windows server core images, this is often missing! (but addressed with the default values baked into the frontend) -- LGP 2020-07-28
702 */
703 Debug::TraceContextBumper ctx{"InternetMediaTypeRegistry::WindowsRegistryDefaultBackend"};
704 struct WinRep_ : IBackendRep {
705 // underlying windows code fast so use small cache sizes
706 mutable Cache::SynchronizedLRUCache<FileSuffixType, optional<String>, equal_to<FileSuffixType>, hash<FileSuffixType>> fFileSuffix2PrettyNameCache_{
707 25, 7};
708 mutable Cache::SynchronizedLRUCache<FileSuffixType, optional<InternetMediaType>, equal_to<FileSuffixType>, hash<FileSuffixType>> fSuffix2MediaTypeCache_{
709 25, 7};
710 mutable Cache::SynchronizedLRUCache<InternetMediaType, optional<FileSuffixType>, equal_to<InternetMediaType>, hash<InternetMediaType>> fContentType2FileSuffixCache_{
711 25, 7};
712 mutable Cache::SynchronizedLRUCache<InternetMediaType, Containers::Set<FileSuffixType>, equal_to<InternetMediaType>, hash<InternetMediaType>> fContentType2FileSuffixesCache_{
713 25, 7};
714
715 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const override
716 {
718 //
719 // rarely do we fetch all MIME types, so don't cache - just re-fetch each time
720 //
721 // On Windows, in registry, easiest way appears to be to enumerate ALL registry entries in HKCR that start with .,
722 // and look for sub-field 'Content-type'
723 //
724 using RegistryKey = Common::Platform::Windows::RegistryKey;
725 for (shared_ptr<RegistryKey> sk : RegistryKey{HKEY_CLASSES_ROOT}.EnumerateSubKeys ()) {
726 String name = sk->GetFullPathOfKey ().Tokenize ({'\\'}).LastValue ();
727 if (name.StartsWith ('.')) {
728 if (auto o = sk->Lookup ("Content Type"sv)) {
730 try {
731 imt = InternetMediaType{o.As<String> ()};
732 }
733 catch (...) {
734 // ignore bad format - such as .sqlproj has Content-Type "string" which my read of the RFC says is illegal
735 DbgTrace ("Ignoring exception parsing registry key ({}): {}"_f, o, current_exception ());
736 continue;
737 }
738 if (majorType) {
739 if (imt.GetType<InternetMediaType::AtomType> () != *majorType) {
740 continue; // skip non-matching types
741 }
742 }
743 result.Add (imt);
744 }
745 }
746 }
747 return result;
748 }
749 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const override
750 {
751 return fContentType2FileSuffixCache_.LookupValue (ct, [] (const InternetMediaType& ct) -> optional<FileSuffixType> {
752 if (auto fs = Common::Platform::Windows::RegistryKey{HKEY_CLASSES_ROOT}.Lookup ("MIME\\Database\\Content Type\\{}\\Extension"_f(ct))) {
753 return fs.As<String> ();
754 }
755 return nullopt;
756 });
757 }
758 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const override
759 {
760 // This is expensive to compute, and we could compute all and cache, but I don't think we will need to lookup very often, so just
761 // compute as needed and cache a few
762 return fContentType2FileSuffixesCache_.LookupValue (ct, [] (const InternetMediaType& ct) -> Containers::Set<FileSuffixType> {
765 for (shared_ptr<RegistryKey> sk : RegistryKey{HKEY_CLASSES_ROOT}.EnumerateSubKeys ()) {
766 String name = sk->GetFullPathOfKey ().Tokenize ({'\\'}).LastValue ();
767 if (name.StartsWith ("."_k)) {
768 if (auto o = sk->Lookup ("Content Type"sv)) {
770 try {
771 imt = InternetMediaType{o.As<String> ()};
772 }
773 catch (...) {
774 // ignore bad format - such as .sqlproj has Content-Type "string" which my read of the RFC says is illegal
775 DbgTrace ("Ignoring exception parsing registry key ({}): {}"_f, o, current_exception ());
776 continue;
777 }
778 if (ct.GetType () == imt.GetType () and ct.GetSubType () == imt.GetSubType ()) {
779 result += name;
780 }
781 }
782 }
783 }
784 return result;
785 });
786 }
787 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& ct) const override
788 {
789 if (optional<FileSuffixType> fileSuffix = GetPreferredAssociatedFileSuffix (ct)) {
790 return fFileSuffix2PrettyNameCache_.LookupValue (*fileSuffix, [] (const String& suffix) -> optional<String> {
791 if (auto fileTypeID = Common::Platform::Windows::RegistryKey{HKEY_CLASSES_ROOT}.Lookup (suffix + "\\"_k)) {
792 if (auto prettyName = Common::Platform::Windows::RegistryKey{HKEY_CLASSES_ROOT}.Lookup (fileTypeID.As<String> () + "\\"_k)) {
793 return prettyName.As<String> ();
794 }
795 }
796 return nullopt;
797 });
798 }
799 return nullopt;
800 }
801 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileSuffix) const override
802 {
803 Require (fileSuffix[0] == '.');
804 return fSuffix2MediaTypeCache_.LookupValue (fileSuffix, [] (const FileSuffixType& fileSuffix) -> optional<InternetMediaType> {
806 // only do registry lookup if needed, since (probably) more costly than local map lookup
807 if (auto oct = RegistryKey{HKEY_CLASSES_ROOT}.Lookup ("{}\\Content Type"_f(fileSuffix))) {
808 InternetMediaType mediaType{oct.As<String> ()};
809 return mediaType;
810 }
811 return nullopt;
812 });
813 }
814 };
815 return MakeSharedPtr<WinRep_> ();
816}
817#endif
818
819Set<InternetMediaType> InternetMediaTypeRegistry::GetMediaTypes () const
820{
821 return NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetMediaTypes (nullopt);
822}
823
824Set<InternetMediaType> InternetMediaTypeRegistry::GetMediaTypes (InternetMediaType::AtomType majorType) const
825{
826 return NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetMediaTypes (majorType);
827}
828
830{
832 for (const auto& ct : mediaTypes) {
833 for (const auto& i : GetAssociatedFileSuffixes (ct)) {
834 result += i;
835 }
836 }
837 return result;
838}
839
840optional<InternetMediaType> InternetMediaTypeRegistry::GetAssociatedContentType (const FileSuffixType& fileSuffix) const
841{
842 if (fileSuffix.empty ()) {
843 return nullopt;
844 }
845 Assert (fileSuffix[0] == '.');
846 return NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).GetAssociatedContentType (fileSuffix);
847}
848
850{
851 return IsA (InternetMediaTypes::Wildcards::kText, ct);
852}
853
855{
856 return IsA (InternetMediaTypes::Wildcards::kImage, ct);
857}
858
860{
861 return IsA (InternetMediaTypes::kXML, ct);
862}
863
864bool InternetMediaTypeRegistry::IsA (const InternetMediaType& moreGeneralType, const InternetMediaType& moreSpecificType) const
865{
866 using AtomType = InternetMediaType::AtomType;
867 // shortcut this one case
868 if (moreSpecificType.GetType<AtomType> () == moreGeneralType.GetType<AtomType> () and
869 moreSpecificType.GetSubType<AtomType> () == moreGeneralType.GetSubType<AtomType> ()) {
870 return true;
871 }
872 return NullCoalesce (fFrontEndRep_, kDefaultFrontEndForNoBackend_).IsA (moreGeneralType, moreSpecificType);
873}
#define AssertNotNull(p)
Definition Assertions.h:334
const OT & NullCoalesce(const OT &l, const OT &r)
return one of l, or r, with first preference for which is engaged, and second preference for left-to-...
Definition Optional.inl:134
#define DbgTrace
Definition Trace.h:317
#define Stroika_Foundation_Debug_OptionalizeTraceArgs(...)
Definition Trace.h:278
simple wrapper on LRUCache (with the same API) - but internally synchronized in a way that is more pe...
Similar to String, but intended to more efficiently construct a String. Mutable type (String is large...
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
nonvirtual String SubString(SZ from) const
nonvirtual bool StartsWith(const Character &c, CompareOptions co=eWithCase) const
Definition String.cpp:1060
nonvirtual Containers::Sequence< String > Tokenize() const
Definition String.cpp:1235
nonvirtual DataExchange::VariantValue Lookup(const Characters::String &valuePath) const
Definition Registry.cpp:143
nonvirtual bool Add(ArgByValueType< key_type > key, ArgByValueType< mapped_type > newElt, AddReplaceMode addReplaceMode=AddReplaceMode::eAddReplaces)
Definition Mapping.inl:188
nonvirtual optional< mapped_type > Lookup(ArgByValueType< key_type > key) const
Definition Mapping.inl:142
nonvirtual mapped_type LookupValue(ArgByValueType< key_type > key, ArgByValueType< mapped_type > defaultValue=mapped_type{}) const
Definition Mapping.inl:166
nonvirtual Iterable< key_type > Keys() const
Definition Mapping.inl:111
A generalization of a vector: a container whose elements are keyed by the natural numbers.
Set<T> is a container of T, where once an item is added, additionally adds () do nothing.
nonvirtual void Add(ArgByValueType< value_type > item)
Definition Set.inl:138
nonvirtual RETURN_TYPE GetType() const
Gets the primary (major) type of the full internet media type (as a string or atom)
nonvirtual T As() const
convert to type T supported types: o String o wstring
nonvirtual optional< RETURN_TYPE > GetSuffix() const
this is the +XXX part of the internet media type (e.g. +xml) and is often omitted (but note this omit...
nonvirtual Mapping< InternetMediaType, OverrideRecord > GetOverrides() const
static shared_ptr< IBackendRep > DefaultBackend()
Generally no need to use this - handled automatically - but returns the default, OS-provided MIME Int...
nonvirtual bool IsXMLFormat(const InternetMediaType &ct) const
static shared_ptr< IBackendRep > UsrSharedDefaultBackend()
Generally no need to use this - handled automatically.
nonvirtual bool IsA(const InternetMediaType &moreGeneralType, const InternetMediaType &moreSpecificType) const
return true if moreSpecificType 'isa' moreGeneralType
nonvirtual void SetOverrides(const Mapping< InternetMediaType, OverrideRecord > &overrides)
nonvirtual optional< InternetMediaType > GetAssociatedContentType(const FileSuffixType &fileSuffix) const
nonvirtual optional< String > GetAssociatedPrettyName(const InternetMediaType &ct) const
static shared_ptr< IBackendRep > EtcMimeTypesDefaultBackend()
Generally no need to use this - handled automatically.
static shared_ptr< IBackendRep > BakedInDefaultBackend()
Generally no need to use this - handled automatically.
InternetMediaTypeRegistry(const shared_ptr< IBackendRep > &backendRep=nullptr)
nonvirtual optional< FileSuffixType > GetPreferredAssociatedFileSuffix(const InternetMediaType &ct) const
nonvirtual Containers::Set< FileSuffixType > GetAssociatedFileSuffixes(const InternetMediaType &ct) const
nonvirtual void AddOverride(const InternetMediaType &mediaType, const OverrideRecord &overrideRec)
bool IsTextFormat(const InternetMediaType &ct) const
returns true if you can expect to treat as some sort of text and reasonably view - like text/html,...
This COULD be easily used to read CSV files, or tab-delimited files, for example.
nonvirtual Iterable< Sequence< String > > ReadMatrix(const Streams::InputStream::Ptr< byte > &in) const
Wrap any object with Synchronized<> and it can be used similarly to the base type,...
nonvirtual WritableReference rwget()
get a read-write smart pointer to the underlying Synchronized<> object, holding the full lock the who...
nonvirtual ReadableReference cget() const
get a read-only smart pointer to the underlying Synchronized<> object, holding the readlock the whole...
Iterable<T> is a base class for containers which easily produce an Iterator<T> to traverse them.
Definition Iterable.h:237
nonvirtual void Apply(const function< void(ArgByValueType< T > item)> &doToElement, Execution::SequencePolicy seq=Execution::SequencePolicy::eDEFAULT) const
Run the argument function (or lambda) on each element of the container.
nonvirtual size_t size() const
Returns the number of items contained.
Definition Iterable.inl:303
const InternetMediaType::AtomType kApplication
'application'
for OS facilities not updatable - or controllable - just usable.