Stroika Library 3.0d20
 
Loading...
Searching...
No Matches
InternetMediaTypeRegistry.h
Go to the documentation of this file.
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Foundation_DataExchange_InternetMediaTypeRegistry_h_
5#define _Stroika_Foundation_DataExchange_InternetMediaTypeRegistry_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <filesystem>
10#include <optional>
11
13#include "Stroika/Foundation/Containers/Mapping.h"
14#include "Stroika/Foundation/Containers/Set.h"
16#include "Stroika/Foundation/DataExchange/InternetMediaTypeNotSupportedException.h"
19
20/**
21 * \file
22 *
23 * \note Code-Status: <a href="Code-Status.md#Beta">Beta</a>
24 *
25 */
26
28
29 using Characters::String;
30 using Containers::Mapping;
31
32 /**
33 * This leverages the os-dependent MIME databases
34 *
35 * \todo Cleanup internally when we do caching and where and how. Not bad now - but maybe smarter todo at
36 * frontend level not backend level? Unclear (since some backends have different cost structures).
37 *
38 * But sloppily done for now.
39 *
40 * \par Example Usage
41 * \code
42 * if (InternetMediaTypeRegistry::sThe->IsA (InternetMediaTypes::Wildcards::kText, InternetMediaType {some-string}) {
43 * handle_textfiles()
44 * }
45 * \endcode
46 *
47 * \par Example Usage
48 * \code
49 * DbgTrace ("SUFFIX: {}"_f, InternetMediaTypeRegistry::sThe->GetPreferredAssociatedFileSuffix (i));
50 * DbgTrace ("ASSOCFILESUFFIXES: {}"_f, InternetMediaTypeRegistry::sThe->GetAssociatedFileSuffixes (i));
51 * DbgTrace ("GetAssociatedPrettyName: {}"_f, InternetMediaTypeRegistry::sThe->GetAssociatedPrettyName (i));
52 * \endcode
53 *
54 * \par Example Usage
55 * \code
56 * // updating media type registry, create a new one and call Set
57 * InternetMediaTypeRegistry origRegistry = InternetMediaTypeRegistry::sThe.load ();
58 * InternetMediaTypeRegistry updatedRegistry = origRegistry;
59 * const auto kHFType_ = InternetMediaType{"application/fake-heatlthframe-phr+xml"};
60 * EXPECT_TRUE (not InternetMediaTypeRegistry::sThe->GetMediaTypes ().Contains (kHFType_));
61 * updatedRegistry.AddOverride (kHFType_, InternetMediaTypeRegistry::OverrideRecord{nullopt, Set<String>{".HPHR"}, L".HPHR"});
62 * InternetMediaTypeRegistry::sThe.store (updatedRegistry);
63 * Assert (InternetMediaTypeRegistry::sThe->IsXMLFormat (kHFType_));
64 * Assert (InternetMediaTypeRegistry::sThe->GetMediaTypes ().Contains (kHFType_));
65 * Assert (not origRegistry.GetMediaTypes ().Contains (kHFType_));
66 * Assert (updatedRegistry.GetMediaTypes ().Contains (kHFType_));
67 * \endcode
68 *
69 * TODO:
70 * \todo http://stroika-bugs.sophists.com/browse/STK-714 - InternetMediaTypeRegistry Add mechanism to fetch subtypes more generally - enhance IsA
71 *
72 * \note \em Thread-Safety <a href="Thread-Safety.md#C++-Standard-Thread-Safety">C++-Standard-Thread-Safety</a>
73 *
74 */
76 public:
77 struct IBackendRep;
78
79 public:
80 /**
81 * \brief - Generally no need to use this - handled automatically - but returns the default, OS-provided MIME InternetMediaType registry.
82 *
83 * On Windows, this uses:
84 * HKEY_CLASSES_ROOT\MIME\Database\Content Type
85 *
86 * On Linux/BSD (but not MacOS), this uses:
87 * /usr/share/mime/globs
88 * /etc/mime.types
89 */
90 static shared_ptr<IBackendRep> DefaultBackend ();
91
92#if qStroika_Foundation_Common_Platform_Windows
93 public:
94 /**
95 * Use:
96 * HKEY_CLASSES_ROOT\MIME\Database\Content Type
97 */
98 static shared_ptr<IBackendRep> WindowsRegistryDefaultBackend ();
99#endif
100
101 public:
102 /**
103 * \brief - Generally no need to use this - handled automatically.
104 *
105 * Available on:
106 * o Linux
107 * o BSD
108 *
109 * /usr/share/mime/globs
110 *
111 * This is the preferred backend on UNIX systems
112 */
113 static shared_ptr<IBackendRep> UsrSharedDefaultBackend ();
114
115 public:
116 /**
117 * \brief - Generally no need to use this - handled automatically.
118 *
119 * Available on:
120 * o Linux
121 * o BSD
122 *
123 * /etc/mime.types
124 *
125 * This is not a very good choice, but will often work. It is fairly incomplete.
126 */
127 static shared_ptr<IBackendRep> EtcMimeTypesDefaultBackend ();
128
129 public:
130 /**
131 * \brief - Generally no need to use this - handled automatically.
132 *
133 * Provides a handful of hardwired values - enough to pass regression tests, but not a good choice.
134 *
135 * Available everywhere
136 *
137 * This is a terrible choice, but better than nothing.
138 */
139 static shared_ptr<IBackendRep> BakedInDefaultBackend ();
140
141 public:
142 /**
143 * file suffix includes the dot; This COULD have been defined as a filesystem::path, as path::extension() returns path.
144 * But I think this is generally more convenient as a string and this class provides overloads when passing in an extension
145 * taking a filesystem::path.
146 *
147 * \note FileSuffixType MUST be non-empty, and MUST start with a '.' (dot) character.
148 */
150
151 public:
152 /**
153 * Used to override InternetMediaType file suffix/pretty name entries from the OS, or to define custom ones per-application.
154 */
156 optional<String> fTypePrintName;
157 optional<Containers::Set<FileSuffixType>> fFileSuffixes;
158 optional<FileSuffixType> fPreferredSuffix;
159
160 /**
161 * @see Characters::ToString ()
162 */
163 nonvirtual Characters::String ToString () const;
164 };
165
166 public:
167 /**
168 * The default constructor makes a new (empty) copy of customizations, and uses DefaultBackend (). The constructor
169 * with the explicit backend, uses that backend.
170 */
171 InternetMediaTypeRegistry (const shared_ptr<IBackendRep>& backendRep = nullptr);
173
174 public:
175 nonvirtual InternetMediaTypeRegistry& operator= (InternetMediaTypeRegistry&& rhs) = default;
176 nonvirtual InternetMediaTypeRegistry& operator= (const InternetMediaTypeRegistry& rhs) = default;
177
178 public:
179 /**
180 * The current global variable - InternetMediaTypeRegistry. Typically - use this.
181 *
182 * \note copying InternetMediaTypeRegistry by value is cheap (shared-by-value) to avoiding the lock around sThe is easy - just copy the InternetMediaTypeRegistry::sThe.
183 *
184 * \note \em Thread-Safety <a href="Thread-Safety.md#C++-Standard-Thread-Safety-For-Envelope-Letter-Internally-Synchronized">C++-Standard-Thread-Safety-For-Envelope-Letter-Internally-Synchronized</a>
185 */
187
188 public:
189 /**
190 * Return the current override mappings (note - these are initialized per-OS, to provide sometimes better values than that OS,
191 * but this can be overridden/cleared).
192 */
194
195 public:
196 /**
197 * Set the current override mappings. Rarely called. More likely - call AddOverride()
198 */
199 nonvirtual void SetOverrides (const Mapping<InternetMediaType, OverrideRecord>& overrides);
200
201 public:
202 /**
203 * Typically used to add custom internet media type mappings to file names. But can be used to override operating system defaults.
204 */
205 nonvirtual void AddOverride (const InternetMediaType& mediaType, const OverrideRecord& overrideRec);
206
207 private:
208 struct IFrontendRep_;
209 struct FrontendRep_;
210
211 struct Rep_Cloner_ {
212 shared_ptr<IFrontendRep_> operator() (const IFrontendRep_& t) const;
213 };
214 using SharedRepByValuePtr_ =
216
217 SharedRepByValuePtr_ fFrontEndRep_;
218
219 static FrontendRep_ kDefaultFrontEndForNoBackend_;
220
221 public:
222 /**
223 */
224 nonvirtual Containers::Set<InternetMediaType> GetMediaTypes () const;
225 nonvirtual Containers::Set<InternetMediaType> GetMediaTypes (InternetMediaType::AtomType majorType) const;
226
227 public:
228 /**
229 * There are frequently many file suffixes associated with a given filetype. This routine fetches the single best/preferred value.
230 */
231 nonvirtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const;
232
233 public:
234 /**
235 * There can be more than one file suffix associated with a content type.
236 */
239
240 public:
241 /**
242 * return nullopt if not found
243 */
244 nonvirtual optional<String> GetAssociatedPrettyName (const InternetMediaType& ct) const;
245
246 public:
247 /**
248 * return nullopt if not found
249 *
250 * \note **Since Stroika v2.1b2 - this takes fileSuffix argument - NOT fileNameOrSuffix - because path::extension ()
251 *
252 * \pre fileSuffix.empty () or fileSuffix[0] == '.'
253 */
254 nonvirtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileSuffix) const;
255 nonvirtual optional<InternetMediaType> GetAssociatedContentType (const filesystem::path& fileSuffix) const;
256
257 public:
258 /**
259 * \brief returns true if you can expect to treat as some sort of text and reasonably view - like text/html, application/x-ccr, application/x-url, etc...
260 *
261 * This examines the 'Type' field, sometimes subtype field, as well as leverages the Suffix field (if present).
262 */
263 [[deprecated ("Since Stroika v3.0d12 - use IsA (InternetMediaTypes::Wildcards::kText, ct)")]] bool IsTextFormat (const InternetMediaType& ct) const;
264
265 public:
266 /**
267 * This returns true if the given type is known to be treatable as an image.
268 */
269 [[deprecated ("Since Stroika v3.0d12 - use IsA (InternetMediaTypes::Wildcards::kImage,ct)")]] bool IsImageFormat (const InternetMediaType& ct) const;
270
271 public:
272 /**
273 * This returns true if the given type is known to be treatable as an XML.
274 *
275 * This examines the 'Type' field, sometimes subtype field, as well as leverages the Suffix field (if present).
276 *
277 * \note This returns true if 'ct.GetSuffix () == "xml"
278 */
279 [[deprecated ("Since Stroika v3.0d12 - use IsA (InternetMediaTypes::kXML,ct)")]] nonvirtual bool IsXMLFormat (const InternetMediaType& ct) const;
280
281 public:
282 /**
283 * \brief return true if moreSpecificType 'isa' moreGeneralType
284 *
285 * Still a bit of a work in progress (as of 2024-11-03). But basic idea is to make it easy to check categories of internet media types
286 * in a (someday) extensible fashion.
287 *
288 * To check if a given type 'ct' is a 'text' type:
289 * o IsA (InternetMediaTypes::Wildcards::kText, ct)
290 *
291 * To check if a given type 'ct' is a 'image' type:
292 * o IsA (InternetMediaTypes::Wildcards::kImage, ct)
293 *
294 * To check if a given type 'ct' is a 'json' type:
295 * o IsA (InternetMediaTypes::kJSON, ct) -- for example works for kJSONPatch
296 *
297 *
298 * This API replaces the deprecated Stroika v2.1 APIs:
299 * IsXMLFormat, IsImageFormat, IsTextFormat
300 *
301 *
302 *
303 * // OLD DOCS BELOW - @todo cleanup
304 *
305 * The HISTORICAL algorithm for this is:
306 *
307 * This function compares similar types, like
308 * application/healthframe-PHR-Format and
309 * application/healthframe-PHR-Format-2 etc
310 * and returns true iff the given type is a prefix (case insensitive)
311 * of the argument more general one. The types must match, and the
312 * parameters are ignored.
313 *
314 * Change as of Stroika v2.1d27 - now only checks prefix of
315 * subtype - type must match - and now ignores parameters.
316 *
317 * @todo REDO this - and dont count on above old algorith. Will add new mechanism EITHER based on what I can read from
318 * the MIME config files on each OS (except it appears windows), or from some registration;
319 * see http://stroika-bugs.sophists.com/browse/STK-714
320 *
321 * As of Stroika v3.0d5 - this just checks the type and subtype atoms for equality (which are already case insensitive) and
322 * ignores file suffix and parameters.
323 *
324 * However, unclear how to capture semantic things like XML is a kind of text, or CCR is a kind of XML. And certainly nothing like the -1, -2 stuff
325 * above with HELATHFRAME PHR format.
326 */
327 nonvirtual bool IsA (const InternetMediaType& moreGeneralType, const InternetMediaType& moreSpecificType) const;
328
329 public:
330 /**
331 */
332 template <typename EXCEPTION = InternetMediaTypeNotSupportedException>
333 nonvirtual void CheckIsA (const InternetMediaType& moreGeneralType, const InternetMediaType& moreSpecificType,
334 const EXCEPTION& throwIfNot = InternetMediaTypeNotSupportedException::kThe) const;
335
336 public:
337 [[deprecated ("Since Stroika v3.0d10 - just access sThe->")]] static InternetMediaTypeRegistry Get ()
338 {
339 return sThe.load ();
340 }
341 [[deprecated ("Since Stroika v3.0d10 - just set sThe")]] static void Set (const InternetMediaTypeRegistry& newRegistry)
342 {
343 sThe = newRegistry;
344 }
345 };
346 inline Execution::Synchronized<InternetMediaTypeRegistry> InternetMediaTypeRegistry::sThe;
347
348 /**
349 * \brief for OS facilities not updatable - or controllable - just usable.
350 */
352 virtual ~IBackendRep () = default;
353 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const = 0;
354 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const = 0;
355 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const = 0;
356 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& ct) const = 0;
357 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileNameOrSuffix) const = 0;
358 };
359
360 /**
361 * \brief alternative to IBackendRep - which can be updated/amended.
362 */
364 virtual ~IFrontendRep_ () = default;
365 virtual Mapping<InternetMediaType, OverrideRecord> GetOverrides () const = 0;
366 virtual void SetOverrides (const Mapping<InternetMediaType, OverrideRecord>& overrides) = 0;
367 virtual void AddOverride (const InternetMediaType& mediaType, const OverrideRecord& overrideRec) = 0;
368 virtual shared_ptr<IBackendRep> GetBackendRep () const = 0;
369 virtual Containers::Set<InternetMediaType> GetMediaTypes (optional<InternetMediaType::AtomType> majorType) const = 0;
370 virtual optional<FileSuffixType> GetPreferredAssociatedFileSuffix (const InternetMediaType& ct) const = 0;
371 virtual Containers::Set<FileSuffixType> GetAssociatedFileSuffixes (const InternetMediaType& ct) const = 0;
372 virtual optional<String> GetAssociatedPrettyName (const InternetMediaType& ct) const = 0;
373 virtual optional<InternetMediaType> GetAssociatedContentType (const FileSuffixType& fileNameOrSuffix) const = 0;
374 virtual bool IsA (const InternetMediaType& moreGeneralType, const InternetMediaType& moreSpecificType) const = 0;
375 };
376
377 /**
378 * \note
379 * @see http://www.iana.org/assignments/media-types/media-types.xhtml
380 */
381
382 /**
383 * The currently registered types are:
384 * application, audio, example, font, image, message, model, multipart, text and video
385 *
386 * \note Types - here - refers to MAJOR types - not InternetMediaTypes (so just top level stuff before the /)
387 */
388 namespace InternetMediaTypes::Types {
389
390 /**
391 * \brief 'application'
392 *
393 * This is the major type (atom) making up a class of InternetMediaTypes.
394 */
395 inline const InternetMediaType::AtomType kApplication{"application"sv};
396
397 /**
398 * \brief 'audio'
399 *
400 * This is the major type (atom) making up a class of InternetMediaTypes.
401 */
402 inline const InternetMediaType::AtomType kAudio{"audio"sv};
403
404 /**
405 * \brief 'image'
406 *
407 * This is the major type (atom) making up a class of InternetMediaTypes.
408 */
409 inline const InternetMediaType::AtomType kImage{"image"sv};
410
411 /**
412 * \brief 'text'
413 *
414 * This is the major type (atom) making up a class of InternetMediaTypes.
415 */
416 inline const InternetMediaType::AtomType kText{"text"sv};
417
418 }
419
420 namespace InternetMediaTypes::Wildcards {
421
422 /**
423 * Generic audio content (used for IsA () primarily)
424 */
425 inline const InternetMediaType kAudio{Types::kAudio, {}};
426
427 /**
428 * Generic image content (used for IsA () primarily)
429 */
430 inline const InternetMediaType kImage{Types::kImage, {}};
431
432 /**
433 * Generic text content (used for IsA () primarily)
434 */
435 inline const InternetMediaType kText{Types::kText, {}};
436
437 }
438
439 namespace InternetMediaTypes {
440
441 /**
442 * \brief application/octet-stream
443 */
444 inline const InternetMediaType kOctetStream{Types::kApplication, "octet-stream"sv};
445
446 /**
447 * \brief audio/mp3
448 */
450
451 /**
452 * \brief audio/mp4
453 */
455
456 /**
457 * \brief audio/x-wav
458 */
459 inline const InternetMediaType kAudioWAV{Types::kAudio, "x-wav"sv};
460
461 /**
462 * \brief image/png
463 */
464 inline const InternetMediaType kPNG{Types::kImage, "png"sv};
465
466 /**
467 * \brief image/gif
468 */
469 inline const InternetMediaType kGIF{Types::kImage, "gif"sv};
470
471 /**
472 * \brief image/jpeg
473 */
474 inline const InternetMediaType kJPEG{Types::kImage, "jpeg"sv};
475
476 /**
477 * \brief text/html
478 */
479 inline const InternetMediaType kHTML{Types::kText, "html"sv};
480
481 /**
482 * \brief text/javascript
483 */
484 inline const InternetMediaType kJavascript{Types::kText, "javascript"sv};
485
486 /**
487 * \brief text/css
488 */
489 inline const InternetMediaType kCSS{Types::kText, "css"sv};
490
491 /**
492 * \brief text/plain
493 */
495
496 /**
497 * \brief text/cvs
498 */
499 inline const InternetMediaType kCSV{Types::kText, "csv"sv};
500
501 /**
502 * \brief application/json
503 */
505
506 /**
507 * \brief json-patch+json
508 * \see https://datatracker.ietf.org/doc/html/rfc6902
509 */
510 inline const InternetMediaType kJSONPatch{Types::kApplication, "json-patch"sv, "json"sv};
511
512 /**
513 * \brief application/x-www-form-urlencoded
514 * \see https://www.ietf.org/rfc/rfc1867.txt
515 * \see https://www.ietf.org/rfc/rfc7578.txt
516 */
517 inline const InternetMediaType kWWWFormURLEncoded{Types::kApplication, "x-www-form-urlencoded"sv};
518
519 /**
520 * \brief application/pdf
521 */
523
524 /**
525 * \brief text/uri-list (@see https://tools.ietf.org/html/rfc2483#section-5)
526 *
527 * \note until Stroika 2.1b2, this was defined as application/x-url
528 */
529 inline const InternetMediaType kURL{Types::kText, "uri-list"sv};
530
531 /**
532 * \brief application/xml (@see https://www.w3.org/2006/02/son-of-3023/draft-murata-kohn-lilley-xml-04.html#rfc.section.3.2)
533 *
534 * \note use this because text/xml deprecated - https://www.w3.org/2006/02/son-of-3023/draft-murata-kohn-lilley-xml-04.html#:~:text=Text%2Fxml%20Registration%20(deprecated),-MIME%20media%20type&text=Although%20listed%20as%20an%20optional,based%20content%20negotiation%20in%20HTTP.
535 */
537
538 /**
539 * \brief application/x-xslt
540 */
541 inline const InternetMediaType kXSLT{Types::kApplication, "x-xslt"sv};
542
543 /**
544 */
545 inline const InternetMediaType kJavaArchive{Types::kApplication, "java-archive"sv};
546
547 /**
548 * Microsoft RTF - Rich Text Format
549 */
551
552 /**
553 * \brief application/zip
554 */
556
557 }
558
559}
560
561/*
562 ********************************************************************************
563 ***************************** Implementation Details ***************************
564 ********************************************************************************
565 */
566#include "InternetMediaTypeRegistry.inl"
567
568#endif /*_Stroika_Foundation_DataExchange_InternetMediaTypeRegistry_h_*/
const InternetMediaType kWWWFormURLEncoded
application/x-www-form-urlencoded
const InternetMediaType kOctetStream
application/octet-stream
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
Set<T> is a container of T, where once an item is added, additionally adds () do nothing.
nonvirtual Mapping< InternetMediaType, OverrideRecord > GetOverrides() const
static shared_ptr< IBackendRep > DefaultBackend()
Generally no need to use this - handled automatically - but returns the default, OS-provided MIME Int...
nonvirtual bool IsXMLFormat(const InternetMediaType &ct) const
static Execution::Synchronized< InternetMediaTypeRegistry > sThe
static shared_ptr< IBackendRep > UsrSharedDefaultBackend()
Generally no need to use this - handled automatically.
nonvirtual bool IsA(const InternetMediaType &moreGeneralType, const InternetMediaType &moreSpecificType) const
return true if moreSpecificType 'isa' moreGeneralType
nonvirtual void SetOverrides(const Mapping< InternetMediaType, OverrideRecord > &overrides)
nonvirtual optional< InternetMediaType > GetAssociatedContentType(const FileSuffixType &fileSuffix) const
nonvirtual optional< String > GetAssociatedPrettyName(const InternetMediaType &ct) const
static shared_ptr< IBackendRep > EtcMimeTypesDefaultBackend()
Generally no need to use this - handled automatically.
static shared_ptr< IBackendRep > BakedInDefaultBackend()
Generally no need to use this - handled automatically.
nonvirtual optional< FileSuffixType > GetPreferredAssociatedFileSuffix(const InternetMediaType &ct) const
nonvirtual Containers::Set< FileSuffixType > GetAssociatedFileSuffixes(const InternetMediaType &ct) const
nonvirtual void AddOverride(const InternetMediaType &mediaType, const OverrideRecord &overrideRec)
bool IsTextFormat(const InternetMediaType &ct) const
returns true if you can expect to treat as some sort of text and reasonably view - like text/html,...
Wrap any object with Synchronized<> and it can be used similarly to the base type,...
SharedByValue is a utility class to implement Copy-On-Write (aka COW) - sort of halfway between uniqu...
Iterable<T> is a base class for containers which easily produce an Iterator<T> to traverse them.
Definition Iterable.h:237
const InternetMediaType::AtomType kApplication
'application'
for OS facilities not updatable - or controllable - just usable.