Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
UniformResourceIdentification.h
Go to the documentation of this file.
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Foundation_IO_Network_UniformResourceIdentification_h_
5#define _Stroika_Foundation_IO_Network_UniformResourceIdentification_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <compare>
10
11#include <string>
12
14#include "Stroika/Foundation/Common/Common.h"
16#include "Stroika/Foundation/Containers/Mapping.h"
19
20/**
21 * \file
22 *
23 * \note Code-Status: <a href="Code-Status.md#Release">Release</a>
24 *
25 * TODO:
26 * @todo Add PREDEFINED namespace and list predefined protocols
27 * o HTTP
28 * o HTTPS
29 * o FTP
30 * o FILE
31 * (only do with new stroika string class so we can use low-cost constants)
32 *
33 * @todo Need more regression tests
34 */
35
36namespace Stroika::Foundation::IO::Network::UniformResourceIdentification {
37
38 using Characters::String;
39
40 /**
41 * \par Base RFC References
42 * \note http://www.ietf.org/rfc/rfc1738.txt - Uniform Resource Locators (URL) - 1994
43 * original FULL URL spec
44 *
45 * \note http://tools.ietf.org/html/rfc1808 - Relative Uniform Resource Locators - 1995
46 * original FULL URL spec
47 *
48 * \note https://tools.ietf.org/html/rfc3986 - Uniform Resource Identifier (URI): Generic Syntax - 2005
49 * Updates: 1738; Obsoletes 2732, 2396, 1808
50 * (so combines relative and absolute)
51 *
52 * Each RFC uses its own terminology for the parts of a URL. We the most recent of these RFCs for our primary terminology:
53 * https://tools.ietf.org/html/rfc3986#section-3.2
54 *
55 * The generic URI syntax consists of a hierarchical sequence of
56 * components referred to as the scheme, authority, path, query, and
57 * fragment
58 * ...
59 * The following are two example URIs and their component parts:
60 *
61 * foo://example.com:8042/over/there?name=ferret#nose
62 * \_/ \______________/\_________/ \_________/ \__/
63 * | | | | |
64 * scheme authority path query fragment
65 * | _____________________|__
66 * / \ / \
67 * urn:example:animal:ferret:nose
68 *
69 * \par Empty String versus optional 'missing' !has_value
70 *
71 * For the various optional pieces of a URL (or URI), we could represent this as
72 * an empty string, or call the feature optional, and return nullopt.
73 *
74 * We COULD allow for EITHER, but that would be clearly needlessly confusing.
75 * PRO USE of optional
76 * o its the normal way (since C++17) to represent that something is there, or not (string empty is a throwback
77 * sentinel value approach)
78 * o Some objects (like port#, or authority) have formatting constraints and using optional makes
79 * clear that IF it exists its of the right form, and else it just doesn't exist (small point)
80 * CONS USE of optional
81 * o Backward compatibility with earlier versions of the API are a bit of a pain, as the old API used
82 * string and string.empty () in many places.
83 * Performance:
84 * o optional more often avoids allocating memory, but is larger, so probably a wash
85 * Choice:
86 * o API will use optional<> for sub-elements of URL and URI that may or may not be present, and
87 * if a string value is provided (not nullopt) - then it MUST be a legal value (not empty typically).
88 * So, for example, Authority.Get/SetUserInfo () - MUST be either nullopt or a non-empty string.
89 * o Exception - because the https://tools.ietf.org/html/rfc3986#page-11 explicitly says the path is not optional
90 *
91 * "A path is always defined for a URI, though the defined path may be empty (zero length)"
92 *
93 * So the Path in Stroika does not use optional.
94 *
95 * \note As () versus AsEncoded () versus AsDecoded ()
96 * Some objects (like Host, or UserInfo) make sense to access in either encoded or decoded form. Other objects, like an
97 * Authority, or URI, really don't make sense except in ENCODED form (because you couldn't parse out the pieces to
98 * get back to the original). So - for objects where it makes sense to access either way, we provide AsEncoded/AsDecoded
99 * methods, and for objects that really require the string form to be encoded - we just call that As<>.
100 *
101 * One SLIGHT exception is the ToString() method, which is just for debugging, and there we emit what will be easier/better
102 * for debugging, and don't worry about reversibility.
103 *
104 * \see also StringPCTEncodedFlag flag
105 */
106
107 /**
108 * \brief for some purposes, we may want to render objects PCT-encoded, and sometimes not (plain or decoded). This flag is just used
109 * to specify in some 'As<String>' APIs - which is preferred for output. The default often depends on the type being produced
110 * in the As<> method.
111 *
112 * \note - SEE http://stroika-bugs.sophists.com/browse/STK-1000 -- issue about maybe needed more nuanced approach
113 * PERHAPS add another flag eDecodedUNICODEButEncodeSOMETUFFNeededToUnparseDisambiguate (not sure that ever happens)?
114 * (OK - I do have an example - the # character - and maybe sometimes ? character??? - (# in query for example - comes before fragment).
115 */
117 eDecoded,
118 ePCTEncoded,
119 };
120
121 /**
122 * From https://tools.ietf.org/html/rfc3986#section-3.1
123 * Scheme names consist of a sequence of characters beginning with a
124 * letter and followed by any combination of letters, digits, plus
125 * ("+"), period ("."), or hyphen ("-"). Although schemes are case-
126 * insensitive, the canonical form is lowercase ...
127 *
128 * \note schemes cannot include a ':' character
129 *
130 * TODO:
131 * @todo MABYE add "SCHEME REGISTER"
132 * { string, isSecure, isHttpIsh } -na dif httpish req certain methods get GethOst etc for httpish schemes
133 *
134 * \note <a href="Design-Overview.md#Comparisons">Comparisons</a>:
135 * o Standard Stroika Comparison support (operator<=>,operator==, etc);
136 *
137 * o schemes are case-insensitive @see https://tools.ietf.org/html/rfc3986#section-3.1
138 */
139 class [[nodiscard]] SchemeType : public String {
140 using inherited = String;
141
142 public:
143 template <Characters::IConvertibleToString STRINGISH_T>
144 SchemeType (STRINGISH_T&& s);
145 SchemeType (const SchemeType&) noexcept = default;
146 SchemeType (SchemeType&&) noexcept = default;
147
148 public:
149 nonvirtual SchemeType& operator= (const SchemeType&) = default;
150 nonvirtual SchemeType& operator= (SchemeType&&) = default;
151
152 public:
153 /**
154 */
155 nonvirtual SchemeType Normalize () const;
156
157 public:
158 /**
159 */
160 nonvirtual void Validate () const;
161
162 public:
163 /**
164 * Returns true iff its a KNOWN secure protocol, like https, ssh, ftps, etc. By secure, this generally means that it is TLS based.
165 * \note for unrecognized schema, this may produce a WAG so just take it as a hint.
166 */
167 nonvirtual bool IsSecure () const;
168
169 public:
170 /**
171 */
172 nonvirtual strong_ordering operator<=> (const SchemeType& rhs) const;
173
174 public:
175 /**
176 */
177 nonvirtual bool operator== (const SchemeType& rhs) const;
178
179 private:
180 static strong_ordering TWC_ (const SchemeType& lhs, const SchemeType& rhs); // utility code share between c++17 and c++20 versions
181
182 public:
183 nonvirtual optional<PortType> GetDefaultPort () const;
184 };
185
186 /**
187 * FROM https://tools.ietf.org/html/rfc3986#section-3.2.2:
188 * The host subcomponent of authority is identified by an IP literal
189 * encapsulated within square brackets, an IPv4 address in dotted-
190 * decimal form, or a registered name. The host subcomponent is case-
191 * insensitive.
192 *
193 * This class is ALWAYS either (mutually exclusive) registerName, or internetAddress.
194 *
195 * \note <a href="Design-Overview.md#Comparisons">Comparisons</a>:
196 * o Standard Stroika Comparison support (operator<=>,operator==, etc);
197 *
198 * note that when comparing hosts, if they are registered names, they are compared case insensitively.
199 * @see https://tools.ietf.org/html/rfc3986#section-6.2.2.1
200 */
201 class [[nodiscard]] Host {
202 public:
203 static constexpr auto eDecoded = StringPCTEncodedFlag::eDecoded;
204 static constexpr auto ePCTEncoded = StringPCTEncodedFlag::ePCTEncoded;
205
206 public:
207 /**
208 * Technically accoridng to https://tools.ietf.org/html/rfc3986#section-3.2.2, the registered-name
209 * maybe empty, but for the sake of consistency with the rest of this module, we intead represent
210 * this using optional<Host> and say that the optional host is missing.
211 *
212 * So, \pre not registeredName.empty ()
213 *
214 * \todo http://stroika-bugs.sophists.com/browse/STK-750
215 * noexcept - unclear why I cannot declare copy constructor and copy assignment operators as noexcept
216 * on GCC. THIS compiles fine, but then later bits of code that use it fail to compile (g++ 9 at least).
217 */
218 Host (const String& registeredName);
219 Host (const InternetAddress& addr);
220 Host (const Host&) = default;
221 Host (Host&&) noexcept = default;
222
223 private:
224 Host () = default;
225
226 public:
227 nonvirtual Host& operator= (const Host&) = default;
228 nonvirtual Host& operator= (Host&&) noexcept = default;
229
230 public:
231 /**
232 * This takes argument a possibly %-encoded name, or [] encoded internet addresses etc, and produces a properly parsed host object
233 * This may throw if given an invalid raw URL hostname value.
234 *
235 * Require (not rawURLHostnameText.empty ()); // use optional instead, and treat empty text as invalid. NB " " is OK.
236 */
237 static Host Parse (const String& rawURLHostnameText);
238
239 public:
240 /**
241 * See https://tools.ietf.org/html/rfc3986#section-6.2.2
242 */
243 nonvirtual Host Normalize () const;
244
245 public:
246 /*
247 * Returns missing if its not a registered name (DNS name).
248 *
249 * \note always AsRegisteredName () or AsInternetAddress () returns a value;
250 */
251 nonvirtual optional<String> AsRegisteredName () const;
252
253 public:
254 /**
255 * Returns missing if its not an InternetAddress
256 *
257 * \note always AsRegisteredName () or AsInternetAddress () returns a value;
258 */
259 nonvirtual optional<InternetAddress> AsInternetAddress () const;
260
261 public:
262 /**
263 * \brief Returns the hostname, either encoded or decoded (PCT encoding) as some form of printed derivitive string. If the result type is 'std::string' - throws if data not ASCII;
264 *
265 * if (pctEncoded == StringPCTEncodedFlag::eDecoded)
266 * Returns decoded (no PCT encoding etc) hostname (registered name, ipv4 or ipv6 address). Doesn't contain the
267 * [] decoration around ip addresses, etc. Suitable for passing to DNS::Default::GetHostAddress ().
268 * if (pctEncoded == StringPCTEncodedFlag::ePCTEncoded)
269 * Returns encoded result (%-encoding host names, and wrapping [] around ipv6 addresses).
270 *
271 * if RESULT_TYPE==String, pctEncoded defaults to eDecoded
272 * if RESULT_TYPE==string, pctEncoded defaults to ePCTEncoded
273 *
274 * \par Example Usage
275 * \code
276 * auto locAddrs = IO::Network::DNS::kThe.GetHostAddresses (host.As<String> (StringPCTEncodedFlag::eDecoded));
277 * \endcode
278 */
279 template <typename RESULT_TYPE = String>
280 nonvirtual RESULT_TYPE As (optional<StringPCTEncodedFlag> pctEncode = {}) const
281 requires (same_as<RESULT_TYPE, String> or same_as<RESULT_TYPE, string>);
282
283 public:
284 /**
285 */
286 nonvirtual strong_ordering operator<=> (const Host& rhs) const;
287
288 public:
289 /**
290 */
291 nonvirtual bool operator== (const Host& rhs) const;
292
293 private:
294 static strong_ordering TWC_ (const Host& lhs, const Host& rhs); // utility code share between c++17 and c++20 versions
295
296 public:
297 /**
298 * For debugging purposes: don't count on the format.
299 * @see Characters::ToString ()
300 */
301 nonvirtual String ToString () const;
302
303 public:
304 [[deprecated ("Since Stroika v3.0d5 - use As<String>(StringPCTEncodedFlag::eDecoded)")]] String AsDecoded () const
305 {
306 return As<String> (StringPCTEncodedFlag::eDecoded);
307 }
308 template <typename RESULT_TYPE = String>
309 [[deprecated ("Since Stroika v3.0d5 - use As<String>(StringPCTEncodedFlag::ePCTEncoded)")]] RESULT_TYPE AsEncoded () const
310 {
311 return As<RESULT_TYPE> (StringPCTEncodedFlag::ePCTEncoded);
312 }
313
314 private:
315 // Throws if cannot parse/illegal
316 static pair<optional<String>, optional<InternetAddress>> ParseRaw_ (const String& raw);
317
318 private:
319 static String EncodeAsRawURL_ (const String& registeredName);
320 static String EncodeAsRawURL_ (const InternetAddress& ipAddr);
321
322 private:
323 String fEncodedName_;
324 optional<String> fRegisteredName_;
325 optional<InternetAddress> fInternetAddress_;
326 };
327
328 /**
329 * FROM https://tools.ietf.org/html/rfc3986#section-3.2.1:
330 * The userinfo subcomponent may consist of a user name and, optionally,
331 * scheme-specific information about how to gain authorization to access
332 * the resource.
333 *
334 * No claims are made about case sensativity, so this is treated as case sensitive.
335 *
336 * \note UserInfo may not contain an empty string (use optional<UserInfo> and nullopt for that)
337 *
338 * \note <a href="Design-Overview.md#Comparisons">Comparisons</a>:
339 * o Standard Stroika Comparison support (operator<=>,operator==, etc);
340 *
341 * Because https://tools.ietf.org/html/rfc3986 says nothing about case sensativity or comparing userInfo,
342 * These are compared as case-senstive strings.
343 */
344 class [[nodiscard]] UserInfo {
345 public:
346 static constexpr auto eDecoded = StringPCTEncodedFlag::eDecoded;
347 static constexpr auto ePCTEncoded = StringPCTEncodedFlag::ePCTEncoded;
348
349 public:
350 /**
351 * Note, though https://tools.ietf.org/html/rfc3986#section-3.2.1 allows for an empty UserInfo, we instead
352 * handle that case with uses of UserInfo being optional<UserInfo>. So, we require that the decoded userInfo
353 * is not an empty string in this class.
354 *
355 * \pre not decodedUserInfo.empty ()
356 */
357 UserInfo (const String& decodedUserInfo);
358 UserInfo (const UserInfo&) noexcept = default;
359 UserInfo (UserInfo&&) noexcept = default;
360
361 private:
362 UserInfo () = default;
363
364 public:
365 nonvirtual UserInfo& operator= (const UserInfo&) noexcept = default;
366 nonvirtual UserInfo& operator= (UserInfo&&) noexcept = default;
367
368 public:
369 /**
370 * This takes argument a possibly %-encoded name, or [] encoded internet addresses etc, and produces a properly parsed host object
371 * This may throw if given an invalid raw URL hostname value.
372 *
373 * \pre not rawURLUserInfo.empty () // use optional<UserInfo> {} instead
374 */
375 static UserInfo Parse (const String& rawURLUserInfo);
376
377 public:
378 /**
379 * \brief Returns the hostname, either encoded or decoded (PCT encoding) as some form of printed derivitive string. If the result type is 'std::string' - throws if data not ASCII;
380 *
381 * if (pctEncoded == StringPCTEncodedFlag::eDecoded)
382 * Returns decoded (no PCT encoding etc) userInfo.
383 * if (pctEncoded == StringPCTEncodedFlag::ePCTEncoded)
384 * Returns encoded result (%-encoding user-info after converting to UTF8).
385 *
386 * if RESULT_TYPE==String, pctEncoded defaults to eDecoded
387 * if RESULT_TYPE==string, pctEncoded defaults to ePCTEncoded
388 *
389 */
390 template <typename RESULT_TYPE = String>
391 nonvirtual RESULT_TYPE As (optional<StringPCTEncodedFlag> pctEncoded = {}) const
392 requires (same_as<RESULT_TYPE, String> or same_as<RESULT_TYPE, string>);
393
394 public:
395 /**
396 */
397 nonvirtual strong_ordering operator<=> (const UserInfo& rhs) const;
398
399 public:
400 /**
401 */
402 nonvirtual bool operator== (const UserInfo& rhs) const;
403
404 private:
405 static strong_ordering TWC_ (const UserInfo& lhs, const UserInfo& rhs); // utility code share between c++17 and c++20 versions
406
407 public:
408 /**
409 * For debugging purposes: don't count on the format.
410 * @see Characters::ToString ()
411 */
412 nonvirtual String ToString () const;
413
414 public:
415 [[deprecated ("Since Stroika v3.0d5 - use As<String>(StringPCTEncodedFlag::eDecoded)")]] String AsDecoded () const
416 {
417 return As<String> (StringPCTEncodedFlag::eDecoded);
418 }
419 template <typename RESULT_TYPE = String>
420 [[deprecated ("Since Stroika v3.0d5 - use As<String>(StringPCTEncodedFlag::ePCTEncoded)")]] RESULT_TYPE AsEncoded () const
421 {
422 return As<RESULT_TYPE> (StringPCTEncodedFlag::ePCTEncoded);
423 }
424
425 private:
426 // Throws if cannot parse/illegal
427 static String ParseRaw_ (const String& raw);
428
429 private:
430 static String EncodeAsRawURL_ (const String& decodedName);
431
432 private:
433 String fEncodedUserInfo_;
434 String fUserInfo_;
435 };
436
437 template <>
438 String UserInfo::AsEncoded () const;
439 template <>
440 string UserInfo::AsEncoded () const;
441
442 /**
443 * \brief Authority is roughly the part of a URL where you say the hostname (and portnumber etc) - part just after //
444 *
445 * Based on https://tools.ietf.org/html/rfc3986#section-3.2
446 *
447 * \note <a href="Design-Overview.md#Comparisons">Comparisons</a>:
448 * o Standard Stroika Comparison support (operator<=>,operator==, etc);
449 */
450 class [[nodiscard]] Authority {
451 public:
452 static constexpr auto eDecoded = StringPCTEncodedFlag::eDecoded;
453 static constexpr auto ePCTEncoded = StringPCTEncodedFlag::ePCTEncoded;
454
455 public:
456 /**
457 * \todo http://stroika-bugs.sophists.com/browse/STK-750
458 * noexcept - unclear why I cannot declare copy constructor and copy assignment operators as noexect
459 * on GCC. THIS compiles fine, but then later bits of code that use it fail to compile (g++ 9 at least).
460 */
461 Authority (const optional<Host>& h = nullopt, const optional<PortType>& port = nullopt, const optional<UserInfo>& userInfo = nullopt);
462 Authority (const Authority&) = default;
463 Authority (Authority&&) noexcept = default;
464
465 public:
466 nonvirtual Authority& operator= (const Authority&) = default;
467 nonvirtual Authority& operator= (Authority&&) noexcept = default;
468
469 public:
470 /**
471 * This takes argument a possibly %-encoded name, or [] encoded internet addresses etc, and produces a properly parsed host object
472 * This may throw if given an invalid raw URL hostname value. However, a 'missing' hostname is not an error, and will just
473 * return an Authority with HostName == nullopt.
474 *
475 * \note - the argument rawURLAuthorityText must be valid ASCII, or this will throw
476 *
477 * If the argument string is fully empty, this will return an empty optional authority. If its invalid/illegal, it will throw.
478 */
479 static optional<Authority> Parse (const String& rawURLAuthorityText);
480
481 public:
482 /**
483 * See https://tools.ietf.org/html/rfc3986#section-6.2.2
484 */
485 nonvirtual Authority Normalize () const;
486
487 public:
488 /**
489 * Supported conversion-targets (T):
490 * String - converts to the raw URI format (as it would appear in a web-browser or html link)
491 *
492 * if RESULT_TYPE==String, pctEncoded defaults to eDecoded
493 * if RESULT_TYPE==string, pctEncoded defaults to ePCTEncoded
494 */
495 template <typename T>
496 nonvirtual T As (optional<StringPCTEncodedFlag> pctEncode = {}) const;
497
498 public:
499 /**
500 * note that https://tools.ietf.org/html/rfc3986#appendix-A sort if indicates that the host is NOT optional, but maybe empty
501 * Because of how combining works with base urls and full URLs, I think its clearer to represent the empty case of an empty
502 * host as a missing host specificiation.
503 */
504 nonvirtual optional<Host> GetHost () const;
505
506 public:
507 /**
508 */
509 nonvirtual void SetHost (const optional<Host>& host);
510
511 public:
512 /**
513 */
514 nonvirtual optional<PortType> GetPort () const;
515
516 public:
517 /**
518 */
519 nonvirtual void SetPort (const optional<PortType>& port);
520
521 public:
522 /**
523 * FROM https://tools.ietf.org/html/rfc3986#section-3.2.1:
524 * The userinfo subcomponent may consist of a user name and, optionally,
525 * scheme-specific information about how to gain authorization to access
526 * the resource
527 *
528 * \note This value is typically missing (nullopt)
529 */
530 nonvirtual optional<UserInfo> GetUserInfo () const;
531
532 public:
533 /**
534 * @see GetUserInfo ()
535 */
536 nonvirtual void SetUserInfo (const optional<UserInfo>& userInfo);
537
538 public:
539 /**
540 */
541 nonvirtual strong_ordering operator<=> (const Authority& rhs) const;
542
543 public:
544 /**
545 */
546 nonvirtual bool operator== (const Authority& rhs) const;
547
548 private:
549 static strong_ordering TWC_ (const Authority& lhs, const Authority& rhs); // utility code share between c++17 and c++20 versions
550
551 public:
552 /**
553 * For debugging purposes: don't count on the format.
554 * @see Characters::ToString ()
555 */
556 nonvirtual String ToString () const;
557
558 private:
559 optional<Host> fHost_;
560 optional<PortType> fPort_;
561 optional<UserInfo> fUserInfo_;
562 };
563
564 /**
565 * \note case sensitive - https://www.rfc-editor.org/rfc/rfc3986#section-6.2.2.1
566 *
567 * \note Map, not Association - since https://www.rfc-editor.org/rfc/rfc3986#section-3.4 doesn't clearly indicate
568 * that keys can be repeated, and common practice is to store query args in javascript 'object' which doesn't
569 * allow duplicates. Do likewise for now... --LGP 2024-11-20
570 *
571 * \note <a href="Design-Overview.md#Comparisons">Comparisons</a>:
572 * o Standard Stroika Comparison support (operator<=>,operator==, etc);
573 *
574 * comparing for equals makes full sense. But comparing < really doesn't, because there is no obvious preferred order for query strings
575 * So pick a preferred ordering (alphabetical) - and compare one after the other
576 * @todo see http://stroika-bugs.sophists.com/browse/STK-144 and fix when that is fixed
577 *
578 * According to http://tools.ietf.org/html/rfc3986 - URLs need to be treated as UTF-8 before doing % etc substitution, so support u8string overload.
579 */
580 class [[nodiscard]] Query {
581 public:
582 /**
583 */
584 Query (const u8string& query);
585 Query (const String& query);
586 Query (const Query&) noexcept = default;
587 Query (Query&&) noexcept = default;
588
589 public:
590 nonvirtual Query& operator= (const Query&) = default;
591 nonvirtual Query& operator= (Query&&) = default;
592
593 public:
594 nonvirtual const Containers::Mapping<String, String>& GetMap () const;
595
596 public:
597 nonvirtual String operator() (const u8string& idx) const;
598 nonvirtual String operator() (const String& idx) const;
599
600 public:
601 nonvirtual bool HasField (const u8string& idx) const;
602 nonvirtual bool HasField (const String& idx) const;
603
604 public:
605 /**
606 * \brief lookup argument in map
607 */
608 nonvirtual optional<String> Lookup (const String& idx) const;
609
610 public:
611 nonvirtual void AddField (const String& idx, const String& value);
612
613 public:
614 nonvirtual void RemoveFieldIfAny (const u8string& idx);
615 nonvirtual void RemoveFieldIfAny (const String& idx);
616
617 public:
618 // Return wide string, but all ascii characters
619 // http://tools.ietf.org/html/rfc3986
620 nonvirtual String ComputeQueryString () const;
621
622 public:
623 /**
624 */
625 nonvirtual strong_ordering operator<=> (const Query& rhs) const;
626
627 public:
628 /**
629 */
630 nonvirtual bool operator== (const Query& rhs) const;
631
632 private:
633 static strong_ordering TWC_ (const Query& lhs, const Query& rhs); // utility code share between c++17 and c++20 versions
634
635 public:
636 /**
637 * For debugging purposes: don't count on the format.
638 * @see Characters::ToString ()
639 */
640 nonvirtual String ToString () const;
641
642 private:
644 };
645
646 /*
647 * See http://tools.ietf.org/html/rfc3986
648 * This doesn't encode an entire URL, just a particular field
649 */
650 u8string EncodeURLQueryStringField (const String& s);
651
652 /**
653 * See https://tools.ietf.org/html/rfc3986#appendix-A for the meaning of encodeGenDelims/encodeSubDelims
654 *
655 * When called with a String, the string is first UTF8 encoded.
656 *
657 * This is mostly used INTERNALLY in parsing URLs, and probably should not be used otherwise.
658 */
659 struct [[nodiscard]] PCTEncodeOptions {
660 // From https://tools.ietf.org/html/rfc3986#appendix-A
661 // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
662 // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
663 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
664 // we always allow 'unreserved'
665 bool allowSubDelims = false;
666 bool allowGenDelims = false;
667 bool allowPChar = false; // sub-delims + ":" / "@"
668 bool allowFragOrQueryChars = false; // pchar / "/" / "?"
669 bool allowPathCharacters = false; // COMPLICATED - I THINK this means sub-delims + '/' (@ and : maybe sometimes allowed, but I think always safe to encode)
670 };
671 u8string PCTEncode (const u8string& s, const PCTEncodeOptions& options);
672 u8string PCTEncode (const String& s, const PCTEncodeOptions& options);
673 String PCTEncode2String (const String& s, const PCTEncodeOptions& options);
674
675 /**
676 * PCTDecode2String () takes the result of PCTDecode, and treats it as UTF8 text, and converts a String from that.
677 */
678 u8string PCTDecode (const u8string& s);
679 String PCTDecode2String (const u8string& s);
680 String PCTDecode2String (const String& s);
681
682}
683
684namespace std {
685 template <>
687 public:
689 };
690}
691
692/*
693 ********************************************************************************
694 ***************************** Implementation Details ***************************
695 ********************************************************************************
696 */
697#include "UniformResourceIdentification.inl"
698
699#endif /*_Stroika_Foundation_IO_Network_UniformResourceIdentification_h_*/
StringPCTEncodedFlag
for some purposes, we may want to render objects PCT-encoded, and sometimes not (plain or decoded)....
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
Authority is roughly the part of a URL where you say the hostname (and portnumber etc) - part just af...
nonvirtual T As(optional< StringPCTEncodedFlag > pctEncode={}) const
nonvirtual RESULT_TYPE As(optional< StringPCTEncodedFlag > pctEncode={}) const
Returns the hostname, either encoded or decoded (PCT encoding) as some form of printed derivitive str...
nonvirtual RESULT_TYPE As(optional< StringPCTEncodedFlag > pctEncoded={}) const
Returns the hostname, either encoded or decoded (PCT encoding) as some form of printed derivitive str...
STL namespace.