Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
InternetMediaTypeRegistry.h
Go to the documentation of this file.
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Foundation_DataExchange_InternetMediaTypeRegistry_h_
5#define _Stroika_Foundation_DataExchange_InternetMediaTypeRegistry_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <filesystem>
10#include <optional>
11
13#include "Stroika/Foundation/Containers/Mapping.h"
14#include "Stroika/Foundation/Containers/Set.h"
16#include "Stroika/Foundation/DataExchange/InternetMediaTypeNotSupportedException.h"
19
20/**
21 * \file
22 *
23 * \note Code-Status: <a href="Code-Status.md#Beta">Beta</a>
24 *
25 */
26
28
29 using Characters::String;
30 using Containers::Mapping;
31
32 /**
33 * This leverages the os-dependent MIME databases
34 *
35 * \todo Cleanup internally when we do caching and where and how. Not bad now - but maybe smarter todo at
36 * frontend level not backend level? Unclear (since some backends have different cost structures).
37 *
38 * But sloppily done for now.
39 *
40 * \par Example Usage
41 * \code
42 * if (InternetMediaTypeRegistry::sThe->IsA (InternetMediaTypes::Wildcards::kText, InternetMediaType {some-string}) {
43 * handle_textfiles()
44 * }
45 * \endcode
46 *
47 * \par Example Usage
48 * \code
49 * DbgTrace ("SUFFIX: {}"_f, InternetMediaTypeRegistry::sThe->GetPreferredAssociatedFileSuffix (i));
50 * DbgTrace ("ASSOCFILESUFFIXES: {}"_f, InternetMediaTypeRegistry::sThe->GetAssociatedFileSuffixes (i));
51 * DbgTrace ("GetAssociatedPrettyName: {}"_f, InternetMediaTypeRegistry::sThe->GetAssociatedPrettyName (i));
52 * \endcode
53 *
54 * \par Example Usage
55 * \code
56 * // updating media type registry, create a new one and call Set
57 * InternetMediaTypeRegistry origRegistry = InternetMediaTypeRegistry::sThe.load ();
58 * InternetMediaTypeRegistry updatedRegistry = origRegistry;
59 * const auto kHFType_ = InternetMediaType{"application/fake-heatlthframe-phr+xml"};
60 * EXPECT_TRUE (not InternetMediaTypeRegistry::sThe->GetMediaTypes ().Contains (kHFType_));
61 * updatedRegistry.AddOverride (kHFType_, InternetMediaTypeRegistry::OverrideRecord{nullopt, Set<String>{".HPHR"}, L".HPHR"});
62 * InternetMediaTypeRegistry::sThe.store (updatedRegistry);
63 * Assert (InternetMediaTypeRegistry::sThe->IsXMLFormat (kHFType_));
64 * Assert (InternetMediaTypeRegistry::sThe->GetMediaTypes ().Contains (kHFType_));
65 * Assert (not origRegistry.GetMediaTypes ().Contains (kHFType_));
66 * Assert (updatedRegistry.GetMediaTypes ().Contains (kHFType_));
67 * \endcode
68 *
69 * TODO:
70 * \todo http://stroika-bugs.sophists.com/browse/STK-714 - InternetMediaTypeRegistry Add mechanism to fetch subtypes more generally - enhance IsA
71 *
72 * \note \em Thread-Safety <a href="Thread-Safety.md#C++-Standard-Thread-Safety">C++-Standard-Thread-Safety</a>
73 *
74 */
76 public:
77 struct IBackendRep;
78
79 public:
80 /**
81 * \brief - Generally no need to use this - handled automatically - but returns the default, OS-provided MIME InternetMediaType registry.
82 *
83 * On Windows, this uses:
84 * HKEY_CLASSES_ROOT\MIME\Database\Content Type
85 *
86 * On Linux/BSD (but not MacOS), this uses:
87 * /usr/share/mime/globs
88 * /etc/mime.types
89 */
90 static shared_ptr<IBackendRep> DefaultBackend ();
91
92#if qStroika_Foundation_Common_Platform_Windows
93 public:
94 /**
95 * Use:
96 * HKEY_CLASSES_ROOT\MIME\Database\Content Type
97 */
98 static shared_ptr<IBackendRep> WindowsRegistryDefaultBackend ();
99#endif
100
101 public:
102 /**
103 * \brief - Generally no need to use this - handled automatically.
104 *
105 * Available on:
106 * o Linux
107 * o BSD
108 *
109 * /usr/share/mime/globs
110 *
111 * This is the preferred backend on UNIX systems
112 */
113 static shared_ptr<IBackendRep> UsrSharedDefaultBackend ();
114
115 public:
116 /**
117 * \brief - Generally no need to use this - handled automatically.
118 *
119 * Available on:
120 * o Linux
121 * o BSD
122 *
123 * /etc/mime.types
124 *
125 * This is not a very good choice, but will often work. It is fairly incomplete.
126 */
127 static shared_ptr<IBackendRep> EtcMimeTypesDefaultBackend ();
128
129 public:
130 /**
131 * \brief - Generally no need to use this - handled automatically.
132 *
133 * Provides a handful of hardwired values - enough to pass regression tests, but not a good choice.
134 *
135 * Available everywhere
136 *
137 * This is a terrible choice, but better than nothing.
138 */
139 static shared_ptr<IBackendRep> BakedInDefaultBackend ();
140
141 public:
142 /**
143 * file suffix includes the dot; This COULD have been defined as a filesystem::path, as path::extension() returns path.
144 * But I think this is generally more convenient as a string and this class provides overloads when passing in an extension
145 * taking a filesystem::path.
146 */
148
149 public:
150 /**
151 * Used to override InternetMediaType file suffix/pretty name entries from the OS, or to define custom ones per-application.
152 */
154 optional<String> fTypePrintName;
155 optional<Containers::Set<FileSuffixType>> fFileSuffixes;
156 optional<FileSuffixType> fPreferredSuffix;
157
158 /**
159 * @see Characters::ToString ()
160 */
161 nonvirtual Characters::String ToString () const;
162 };
163
164 public:
165 /**
166 * The default constructor makes a new (empty) copy of customizations, and uses DefaultBackend (). The constructor
167 * with the explicit backend, uses that backend.
168 */
169 InternetMediaTypeRegistry (const shared_ptr<IBackendRep>& backendRep = nullptr);
171
172 public:
173 nonvirtual InternetMediaTypeRegistry& operator= (InternetMediaTypeRegistry&& rhs) = default;
174 nonvirtual InternetMediaTypeRegistry& operator= (const InternetMediaTypeRegistry& rhs) = default;
175
176 public:
177 /**
178 * The current global variable - InternetMediaTypeRegistry. Typically - use this.
179 *
180 * \note copying InternetMediaTypeRegistry by value is cheap (shared-by-value) to avoiding the lock around sThe is easy - just copy the InternetMediaTypeRegistry::sThe.
181 *
182 * \note \em Thread-Safety <a href="Thread-Safety.md#C++-Standard-Thread-Safety-For-Envelope-Letter-Internally-Synchronized">C++-Standard-Thread-Safety-For-Envelope-Letter-Internally-Synchronized</a>
183 */
185
186 public:
187 /**
188 * Return the current override mappings (note - these are initialized per-OS, to provide sometimes better values than that OS,
189 * but this can be overridden/cleared).
190 */
192
193 public:
194 /**
195 * Set the current override mappings. Rarely called. More likely - call AddOverride()
196 */
197 nonvirtual void SetOverrides (const Mapping<InternetMediaType, OverrideRecord>& overrides);
198
199 public:
200 /**
201 * Typically used to add custom internet media type mappings to file names. But can be used to override operating system defaults.
202 */
203 nonvirtual void AddOverride (const InternetMediaType& mediaType, const OverrideRecord& overrideRec);
204
205 private:
206 struct IFrontendRep_;
207 struct FrontendRep_;
208
209 struct Rep_Cloner_ {
210 shared_ptr<IFrontendRep_> operator() (const IFrontendRep_& t) const;
211 };
212 using SharedRepByValuePtr_ =
214
215 SharedRepByValuePtr_ fFrontEndRep_;
216
217 static FrontendRep_ kDefaultFrontEndForNoBackend_;
218
219 public:
220 /**
221 */
222 nonvirtual Containers::Set<InternetMediaType> GetMediaTypes () const;
223 nonvirtual Containers::Set<InternetMediaType> GetMediaTypes (InternetMediaType::AtomType majorType) const;
224
225 public:
226 /**
227 * There are frequently many file suffixes associated with a given filetype. This routine fetches the single best/preferred value.
228 */
229 nonvirtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const;
230
231 public:
232 /**
233 * There can be more than one file suffix associated with a content type.
234 */
237
238 public:
239 /**
240 * return nullopt if not found
241 */
242 nonvirtual optional<String> GetAssociatedPrettyName (const InternetMediaType& ct) const;
243
244 public:
245 /**
246 * return nullopt if not found
247 *
248 * \note **Since Stroika v2.1b2 - this takes fileSuffix argument - NOT fileNameOrSuffix - because path::extension ()
249 *
250 * \pre fileSuffix.empty () or fileSuffix[0] == '.'
251 */
252 nonvirtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileSuffix) const;
253 nonvirtual optional<InternetMediaType> GetAssociatedContentType (const filesystem::path& fileSuffix) const;
254
255 public:
256 /**
257 * \brief returns true if you can expect to treat as some sort of text and reasonably view - like text/html, application/x-ccr, application/x-url, etc...
258 *
259 * This examines the 'Type' field, sometimes subtype field, as well as leverages the Suffix field (if present).
260 */
261 [[deprecated ("Since Stroika v3.0d12 - use IsA (InternetMediaTypes::Wildcards::kText, ct)")]] bool IsTextFormat (const InternetMediaType& ct) const;
262
263 public:
264 /**
265 * This returns true if the given type is known to be treatable as an image.
266 */
267 [[deprecated ("Since Stroika v3.0d12 - use IsA (InternetMediaTypes::Wildcards::kImage,ct)")]] bool IsImageFormat (const InternetMediaType& ct) const;
268
269 public:
270 /**
271 * This returns true if the given type is known to be treatable as an XML.
272 *
273 * This examines the 'Type' field, sometimes subtype field, as well as leverages the Suffix field (if present).
274 *
275 * \note This returns true if 'ct.GetSuffix () == "xml"
276 */
277 [[deprecated ("Since Stroika v3.0d12 - use IsA (InternetMediaTypes::kXML,ct)")]] nonvirtual bool IsXMLFormat (const InternetMediaType& ct) const;
278
279 public:
280 /**
281 * \brief return true if moreSpecificType 'isa' moreGeneralType
282 *
283 * Still a bit of a work in progress (as of 2024-11-03). But basic idea is to make it easy to check categories of internet media types
284 * in a (someday) extensible fashion.
285 *
286 * To check if a given type 'ct' is a 'text' type:
287 * o IsA (InternetMediaTypes::Wildcards::kText, ct)
288 *
289 * To check if a given type 'ct' is a 'image' type:
290 * o IsA (InternetMediaTypes::Wildcards::kImage, ct)
291 *
292 * To check if a given type 'ct' is a 'json' type:
293 * o IsA (InternetMediaTypes::kJSON, ct) -- for example works for kJSONPatch
294 *
295 *
296 * This API replaces the deprecated Stroika v2.1 APIs:
297 * IsXMLFormat, IsImageFormat, IsTextFormat
298 *
299 *
300 *
301 * // OLD DOCS BELOW - @todo cleanup
302 *
303 * The HISTORICAL algorithm for this is:
304 *
305 * This function compares similar types, like
306 * application/healthframe-PHR-Format and
307 * application/healthframe-PHR-Format-2 etc
308 * and returns true iff the given type is a prefix (case insensitive)
309 * of the argument more general one. The types must match, and the
310 * parameters are ignored.
311 *
312 * Change as of Stroika v2.1d27 - now only checks prefix of
313 * subtype - type must match - and now ignores parameters.
314 *
315 * @todo REDO this - and dont count on above old algorith. Will add new mechanism EITHER based on what I can read from
316 * the MIME config files on each OS (except it appears windows), or from some registration;
317 * see http://stroika-bugs.sophists.com/browse/STK-714
318 *
319 * As of Stroika v3.0d5 - this just checks the type and subtype atoms for equality (which are already case insensitive) and
320 * ignores file suffix and parameters.
321 *
322 * However, unclear how to capture semantic things like XML is a kind of text, or CCR is a kind of XML. And certainly nothing like the -1, -2 stuff
323 * above with HELATHFRAME PHR format.
324 */
325 nonvirtual bool IsA (const InternetMediaType& moreGeneralType, const InternetMediaType& moreSpecificType) const;
326
327 public:
328 /**
329 */
330 template <typename EXCEPTION = InternetMediaTypeNotSupportedException>
331 nonvirtual void CheckIsA (const InternetMediaType& moreGeneralType, const InternetMediaType& moreSpecificType,
332 const EXCEPTION& throwIfNot = InternetMediaTypeNotSupportedException::kThe) const;
333
334 public:
335 [[deprecated ("Since Stroika v3.0d10 - just access sThe->")]] static InternetMediaTypeRegistry Get ()
336 {
337 return sThe.load ();
338 }
339 [[deprecated ("Since Stroika v3.0d10 - just set sThe")]] static void Set (const InternetMediaTypeRegistry& newRegistry)
340 {
341 sThe = newRegistry;
342 }
343 };
344 inline Execution::Synchronized<InternetMediaTypeRegistry> InternetMediaTypeRegistry::sThe;
345
346 /**
347 * \brief for OS facilities not updatable - or controllable - just usable.
348 */
350 virtual ~IBackendRep () = default;
351 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const = 0;
352 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const = 0;
353 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const = 0;
354 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& ct) const = 0;
355 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileNameOrSuffix) const = 0;
356 };
357
358 /**
359 * \brief alternative to IBackendRep - which can be updated/amended.
360 */
362 virtual ~IFrontendRep_ () = default;
363 virtual Mapping<InternetMediaType, OverrideRecord> GetOverrides () const = 0;
364 virtual void SetOverrides (const Mapping<InternetMediaType, OverrideRecord>& overrides) = 0;
365 virtual void AddOverride (const InternetMediaType& mediaType, const OverrideRecord& overrideRec) = 0;
366 virtual shared_ptr<IBackendRep> GetBackendRep () const = 0;
367 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const = 0;
368 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const = 0;
369 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const = 0;
370 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& ct) const = 0;
371 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileNameOrSuffix) const = 0;
372 virtual bool IsA (const InternetMediaType& moreGeneralType, const InternetMediaType& moreSpecificType) const = 0;
373 };
374
375 /**
376 * \note
377 * @see http://www.iana.org/assignments/media-types/media-types.xhtml
378 */
379
380 /**
381 * The currently registered types are:
382 * application, audio, example, font, image, message, model, multipart, text and video
383 *
384 * \note Types - here - refers to MAJOR types - not InternetMediaTypes (so just top level stuff before the /)
385 */
386 namespace InternetMediaTypes::Types {
387
388 /**
389 * \brief 'application'
390 *
391 * This is the major type (atom) making up a class of InternetMediaTypes.
392 */
393 inline const InternetMediaType::AtomType kApplication{"application"sv};
394
395 /**
396 * \brief 'audio'
397 *
398 * This is the major type (atom) making up a class of InternetMediaTypes.
399 */
400 inline const InternetMediaType::AtomType kAudio{"audio"sv};
401
402 /**
403 * \brief 'image'
404 *
405 * This is the major type (atom) making up a class of InternetMediaTypes.
406 */
407 inline const InternetMediaType::AtomType kImage{"image"sv};
408
409 /**
410 * \brief 'text'
411 *
412 * This is the major type (atom) making up a class of InternetMediaTypes.
413 */
414 inline const InternetMediaType::AtomType kText{"text"sv};
415
416 }
417
418 namespace InternetMediaTypes::Wildcards {
419
420 /**
421 * Generic audio content (used for IsA () primarily)
422 */
423 inline const InternetMediaType kAudio{Types::kAudio, {}};
424
425 /**
426 * Generic image content (used for IsA () primarily)
427 */
428 inline const InternetMediaType kImage{Types::kImage, {}};
429
430 /**
431 * Generic text content (used for IsA () primarily)
432 */
433 inline const InternetMediaType kText{Types::kText, {}};
434
435 }
436
437 namespace InternetMediaTypes {
438
439 /**
440 * \brief application/octet-stream
441 */
442 inline const InternetMediaType kOctetStream{Types::kApplication, "octet-stream"sv};
443
444 /**
445 * \brief audio/mp3
446 */
448
449 /**
450 * \brief audio/mp4
451 */
453
454 /**
455 * \brief audio/x-wav
456 */
457 inline const InternetMediaType kAudioWAV{Types::kAudio, "x-wav"sv};
458
459 /**
460 * \brief image/png
461 */
462 inline const InternetMediaType kPNG{Types::kImage, "png"sv};
463
464 /**
465 * \brief image/gif
466 */
467 inline const InternetMediaType kGIF{Types::kImage, "gif"sv};
468
469 /**
470 * \brief image/jpeg
471 */
472 inline const InternetMediaType kJPEG{Types::kImage, "jpeg"sv};
473
474 /**
475 * \brief text/html
476 */
477 inline const InternetMediaType kHTML{Types::kText, "html"sv};
478
479 /**
480 * \brief text/javascript
481 */
482 inline const InternetMediaType kJavascript{Types::kText, "javascript"sv};
483
484 /**
485 * \brief text/css
486 */
487 inline const InternetMediaType kCSS{Types::kText, "css"sv};
488
489 /**
490 * \brief text/plain
491 */
493
494 /**
495 * \brief text/cvs
496 */
497 inline const InternetMediaType kCSV{Types::kText, "csv"sv};
498
499 /**
500 * \brief application/json
501 */
503
504 /**
505 * \brief json-patch+json
506 * \see https://datatracker.ietf.org/doc/html/rfc6902
507 */
508 inline const InternetMediaType kJSONPatch{Types::kApplication, "json-patch"sv, "json"sv};
509
510 /**
511 * \brief application/x-www-form-urlencoded
512 * \see https://www.ietf.org/rfc/rfc1867.txt
513 * \see https://www.ietf.org/rfc/rfc7578.txt
514 */
515 inline const InternetMediaType kWWWFormURLEncoded{Types::kApplication, "x-www-form-urlencoded"sv};
516
517 /**
518 * \brief application/pdf
519 */
521
522 /**
523 * \brief text/uri-list (@see https://tools.ietf.org/html/rfc2483#section-5)
524 *
525 * \note until Stroika 2.1b2, this was defined as application/x-url
526 */
527 inline const InternetMediaType kURL{Types::kText, "uri-list"sv};
528
529 /**
530 * \brief application/xml (@see https://www.w3.org/2006/02/son-of-3023/draft-murata-kohn-lilley-xml-04.html#rfc.section.3.2)
531 *
532 * \note use this because text/xml deprecated - https://www.w3.org/2006/02/son-of-3023/draft-murata-kohn-lilley-xml-04.html#:~:text=Text%2Fxml%20Registration%20(deprecated),-MIME%20media%20type&text=Although%20listed%20as%20an%20optional,based%20content%20negotiation%20in%20HTTP.
533 */
535
536 /**
537 * \brief application/x-xslt
538 */
539 inline const InternetMediaType kXSLT{Types::kApplication, "x-xslt"sv};
540
541 /**
542 */
543 inline const InternetMediaType kJavaArchive{Types::kApplication, "java-archive"sv};
544
545 /**
546 * Microsoft RTF - Rich Text Format
547 */
549
550 /**
551 * \brief application/zip
552 */
554
555 }
556
557}
558
559/*
560 ********************************************************************************
561 ***************************** Implementation Details ***************************
562 ********************************************************************************
563 */
564#include "InternetMediaTypeRegistry.inl"
565
566#endif /*_Stroika_Foundation_DataExchange_InternetMediaTypeRegistry_h_*/
const InternetMediaType kWWWFormURLEncoded
application/x-www-form-urlencoded
const InternetMediaType kOctetStream
application/octet-stream
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
Set<T> is a container of T, where once an item is added, additionally adds () do nothing.
Definition Set.h:105
nonvirtual Mapping< InternetMediaType, OverrideRecord > GetOverrides() const
static shared_ptr< IBackendRep > DefaultBackend()
Generally no need to use this - handled automatically - but returns the default, OS-provided MIME Int...
nonvirtual bool IsXMLFormat(const InternetMediaType &ct) const
static Execution::Synchronized< InternetMediaTypeRegistry > sThe
static shared_ptr< IBackendRep > UsrSharedDefaultBackend()
Generally no need to use this - handled automatically.
nonvirtual bool IsA(const InternetMediaType &moreGeneralType, const InternetMediaType &moreSpecificType) const
return true if moreSpecificType 'isa' moreGeneralType
nonvirtual void SetOverrides(const Mapping< InternetMediaType, OverrideRecord > &overrides)
nonvirtual optional< InternetMediaType > GetAssociatedContentType(const FileSuffixType &fileSuffix) const
nonvirtual optional< String > GetAssociatedPrettyName(const InternetMediaType &ct) const
static shared_ptr< IBackendRep > EtcMimeTypesDefaultBackend()
Generally no need to use this - handled automatically.
static shared_ptr< IBackendRep > BakedInDefaultBackend()
Generally no need to use this - handled automatically.
nonvirtual optional< FileSuffixType > GetPreferredAssociatedFileSuffix(const InternetMediaType &ct) const
nonvirtual Containers::Set< FileSuffixType > GetAssociatedFileSuffixes(const InternetMediaType &ct) const
nonvirtual void AddOverride(const InternetMediaType &mediaType, const OverrideRecord &overrideRec)
bool IsTextFormat(const InternetMediaType &ct) const
returns true if you can expect to treat as some sort of text and reasonably view - like text/html,...
Wrap any object with Synchronized<> and it can be used similarly to the base type,...
SharedByValue is a utility class to implement Copy-On-Write (aka COW) - sort of halfway between uniqu...
Iterable<T> is a base class for containers which easily produce an Iterator<T> to traverse them.
Definition Iterable.h:237
const InternetMediaType::AtomType kApplication
'application'
for OS facilities not updatable - or controllable - just usable.