Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
URI.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
9#include "Stroika/Foundation/Characters/String2Int.h"
12#include "Stroika/Foundation/Execution/Exceptions.h"
13#include "Stroika/Foundation/Execution/Throw.h"
14
15#include "URI.h"
16
17// Comment this in to turn on aggressive noisy DbgTrace in this module
18// #define USE_NOISY_TRACE_IN_THIS_MODULE_ 1
19
20using namespace Stroika::Foundation;
23using namespace Stroika::Foundation::IO;
25
27
28/*
29 ********************************************************************************
30 ************************************** URI *************************************
31 ********************************************************************************
32 */
33namespace {
34 String PatchOldStroikaURLPath2NewPath_ (const optional<URI::Authority>& a, const String& s)
35 {
36 if (s.empty ()) {
37 return s;
38 }
39 if (s.StartsWith (L"/")) {
40 return s;
41 }
42 if (a) {
43 return "/" + s;
44 }
45 return s;
46 }
47}
48
49namespace {
50 String remove_dot_segments_ (const String& p, URI::NormalizationStyle normalization = URI::NormalizationStyle::eRFC3986)
51 {
52 // @todo - this is a fairly inefficient implementation, but so far hasn't shown up in profiles
53#if USE_NOISY_TRACE_IN_THIS_MODULE_
54 //Debug::TraceContextBumper{"remove_dot_segments_", "p={},normalization={}"_f, p, normalization};
55#endif
56 // from https://tools.ietf.org/html/rfc3986#section-5.2.4
57 vector<String> segments; // for our purpose here, segments may (or not in case of first) contain a leading /
58 StringBuilder accumulatingSegment;
59 for (Character c : p) {
60 if (c == '/' and not accumulatingSegment.empty ()) {
61 segments.push_back (accumulatingSegment.str ());
62 accumulatingSegment.clear ();
63 }
64 accumulatingSegment << c;
65 }
66 if (not accumulatingSegment.empty ()) {
67 segments.push_back (accumulatingSegment.str ());
68 }
69 vector<String> segments2; // apply ../. removal
70 bool lastSegmentShouldHaveSlash{false}; // not sure about this
71 for (const String& segment : segments) {
72 lastSegmentShouldHaveSlash = false;
73 if (segment == "."sv or segment == "/."sv) {
74 // drop it on the floor
75 if (segment[0] == '/') {
76 lastSegmentShouldHaveSlash = true;
77 }
78 }
79 else if (segment == ".."sv or segment == "/.."sv) {
80 if (not segments2.empty ()) {
81 segments2.pop_back ();
82 }
83 if (segment[0] == '/') {
84 lastSegmentShouldHaveSlash = true;
85 }
86 }
87 else {
88 segments2.push_back (segment);
89 }
90 }
91
92 StringBuilder result;
93 bool soFarEndsWithSlash = false;
94 for (const String& segment : segments2) {
95 if (normalization == URI::NormalizationStyle::eAggressive) {
96 if (segment.StartsWith ('/') and soFarEndsWithSlash) {
97 String add = segment.SubString (1);
98 if (not add.empty ()) {
99 result << add;
100 soFarEndsWithSlash = add.EndsWith ('/');
101 }
102 }
103 else {
104 result << segment;
105 soFarEndsWithSlash = segment.EndsWith ('/');
106 }
107 }
108 else {
109 result << segment;
110 }
111 }
112 if (lastSegmentShouldHaveSlash and not result.str ().EndsWith ("/"sv)) {
113 result << "/"sv;
114 }
115 return result;
116 };
117}
118
119URI URI::Parse (const String& rawURL)
120{
121#if USE_NOISY_TRACE_IN_THIS_MODULE_
122 Debug::TraceContextBumper{"IO::Network::URI::Parse", "{}"_f, rawURL};
123#endif
124 // https://tools.ietf.org/html/rfc3986#appendix-B
125 static const RegularExpression kParseURLRegExp_{"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"_RegEx};
126 optional<String> scheme;
127 optional<String> authority;
128 optional<String> path;
129 optional<String> query;
130 optional<String> fragment;
131 auto emptyStr2Missing = [] (const optional<String>& s) -> optional<String> {
132 if (s) {
133 if (not s->empty ()) {
134 return s;
135 }
136 }
137 return nullopt;
138 };
139 (void)rawURL.AsASCII (); // for throw check side-effect
140 if (rawURL.Matches (kParseURLRegExp_, nullptr, &scheme, nullptr, &authority, &path, nullptr, &query, nullptr, &fragment)) {
141 return URI{emptyStr2Missing (scheme), Authority::Parse (authority.value_or (String{})),
142 UniformResourceIdentification::PCTDecode2String (path.value_or (String{})), emptyStr2Missing (query), emptyStr2Missing (fragment)};
143 }
144 else {
145 static const Execution::RuntimeErrorException kException_{"Ill-formed URI"sv};
146 Execution::Throw (kException_); // doesn't match regexp in https://tools.ietf.org/html/rfc3986#appendix-B
147 }
148}
149
150URI URI::ParseRelative (const String& rawRelativeURL)
151{
152#if USE_NOISY_TRACE_IN_THIS_MODULE_
153 Debug::TraceContextBumper{"IO::Network::URI::ParseRelative", "{}"_f, rawRelativeURL};
154#endif
155 // https://tools.ietf.org/html/rfc3986#appendix-B
156 static const RegularExpression kParseRelativeURLRegExp_{"([^?#]*)(\\?([^#]*))?(#(.*))?"_RegEx};
157 optional<String> scheme;
158 optional<String> authority;
159 optional<String> path;
160 optional<String> query;
161 optional<String> fragment;
162 auto emptyStr2Missing = [] (const optional<String>& s) -> optional<String> {
163 if (s) {
164 if (not s->empty ()) {
165 return s;
166 }
167 }
168 return nullopt;
169 };
170 (void)rawRelativeURL.AsASCII (); // for throw check side-effect
171 if (rawRelativeURL.Matches (kParseRelativeURLRegExp_, &path, nullptr, &query, nullptr, &fragment)) {
172 return URI{nullopt, nullopt, UniformResourceIdentification::PCTDecode2String (path.value_or (String{})), emptyStr2Missing (query),
173 emptyStr2Missing (fragment)};
174 }
175 else {
176 static const Execution::RuntimeErrorException kException_{"Ill-formed relative URI"sv};
177 Execution::Throw (kException_); // doesn't match regexp in https://tools.ietf.org/html/rfc3986#appendix-B
178 }
179}
180
181String URI::AsString_ (optional<StringPCTEncodedFlag> pctEncode) const
182{
183 // http://stroika-bugs.sophists.com/browse/STK-1000 -- issue about maybe needed more nuanced approach
184 StringPCTEncodedFlag usingPCTEncodeFlag = pctEncode.value_or (eDecoded);
185 AssertExternallySynchronizedMutex::ReadContext declareContext{fThisAssertExternallySynchronized_};
186 StringBuilder result;
187 if (fScheme_) {
188 // From https://tools.ietf.org/html/rfc3986#appendix-A
189 // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
190 // no need to pct encode this
191 Assert (fScheme_->All ([] (Character c) { return c.IsASCII (); }));
192 result << *fScheme_ << ":"sv;
193 }
194 if (fAuthority_) {
195 Assert (fAuthority_->As<String> (usingPCTEncodeFlag).All ([] (Character c) { return c.IsASCII (); }));
196 result << "//"sv << fAuthority_->As<String> (usingPCTEncodeFlag);
197 }
198
199 if (fAuthority_ and not(fPath_.empty () or fPath_.StartsWith ("/"sv))) {
200 // NOT SURE HOW TO HANDLE
201 static const Execution::RuntimeErrorException kException_{"This is not a legal URI to encode (authority present, but path not empty or absolute)"sv};
202 Execution::Throw (kException_);
203 }
204
205 if (usingPCTEncodeFlag == eDecoded) {
206 result << fPath_;
207 }
208 else {
209 static constexpr UniformResourceIdentification::PCTEncodeOptions kPathEncodeOptions_{
210 .allowSubDelims = false, .allowGenDelims = false, .allowPChar = true, .allowFragOrQueryChars = false, .allowPathCharacters = true};
211 result << UniformResourceIdentification::PCTEncode2String (fPath_, kPathEncodeOptions_);
212 }
213
214 if (fQuery_) {
215 static constexpr UniformResourceIdentification::PCTEncodeOptions kQueryEncodeOptions_{
216 .allowSubDelims = false, .allowGenDelims = false, .allowPChar = false, .allowFragOrQueryChars = true};
217 if (usingPCTEncodeFlag == eDecoded) {
218 result << "?"sv << *fQuery_;
219 }
220 else {
221 result << "?"sv << UniformResourceIdentification::PCTEncode2String (*fQuery_, kQueryEncodeOptions_);
222 }
223 }
224 if (fFragment_) {
225 if (usingPCTEncodeFlag == eDecoded) {
226 result << "#"sv + *fFragment_;
227 }
228 else {
229 static constexpr UniformResourceIdentification::PCTEncodeOptions kFragmentEncodeOptions_{false, false, false, true};
230 result << "#"sv + UniformResourceIdentification::PCTEncode2String (*fFragment_, kFragmentEncodeOptions_);
231 }
232 }
233 Ensure (result.str ().All ([] (Character c) { return c.IsASCII (); }));
234 return result.str ();
235}
236
237URI::operator bool () const
238{
239 AssertExternallySynchronizedMutex::ReadContext declareContext{fThisAssertExternallySynchronized_};
240 if (fScheme_) {
241 return true;
242 }
243 if (fAuthority_) {
244 return true;
245 }
246 if (not fPath_.empty ()) {
247 return true;
248 }
249 if (fQuery_) {
250 return true;
251 }
252 if (fFragment_) {
253 return true;
254 }
255 return false;
256}
257
259{
260 AssertExternallySynchronizedMutex::ReadContext declareContext{fThisAssertExternallySynchronized_};
261 static const RegularExpression kSelectDir_ = "(.*\\/)[^\\/]*"_RegEx;
262 optional<String> baseDir;
263 (void)fPath_.Matches (kSelectDir_, &baseDir);
264 return baseDir.value_or (String{});
265}
266
268{
269 AssertExternallySynchronizedMutex::ReadContext declareContext{fThisAssertExternallySynchronized_};
270 optional<SchemeType> scheme = fScheme_;
271 if (scheme) {
272 scheme = scheme->Normalize ();
273 }
274 optional<Authority> authority = fAuthority_;
275 if (authority) {
276 authority = authority->Normalize ();
277 }
278 String path = remove_dot_segments_ (fPath_, normalization); // review https://tools.ietf.org/html/rfc3986#section-6.2.2.3 - this algorithm for removing dots was from merge code, so not sure it applies here
279 return URI{scheme, authority, path, fQuery_, fFragment_};
280}
281
283{
284 // dont use As<String> () because this can throw if bad string - and no need to pct-encode here
285 AssertExternallySynchronizedMutex::ReadContext declareContext{fThisAssertExternallySynchronized_};
286 StringBuilder result;
287 if (fScheme_) {
288 result << *fScheme_ << ":"sv;
289 }
290 if (fAuthority_) {
291 result << "//"sv << fAuthority_->As<String> ();
292 }
293 result << fPath_;
294 if (fQuery_) {
295 result << "?"sv << *fQuery_;
296 }
297 if (fFragment_) {
298 result << "#"sv << *fFragment_;
299 }
300 return result;
301}
302
303void URI::CheckValidPathForAuthority_ (const optional<Authority>& authority, const String& path)
304{
305 /*
306 * https://tools.ietf.org/html/rfc3986#section-3.3
307 * If a URI contains an authority component, then the path component
308 * must either be empty or begin with a slash ("/") character
309 */
310 if (authority and (not path.empty () and not path.StartsWith ("/"sv))) {
311 static const Execution::RuntimeErrorException kException_{"A URI with an authority must have an empty path, or an absolute path"sv};
312 Execution::Throw (kException_);
313 }
314}
315
316URI URI::Combine (const URI& overridingURI) const
317{
318 AssertExternallySynchronizedMutex::ReadContext declareContext{fThisAssertExternallySynchronized_};
319
320 /*
321 * This is not strictly according to Hoyle, but it avoids a common inconvenience with the Scheme check below. And avoids having to write a lot of
322 * code like:
323 * if (l) {
324 * return l.Combine (r);
325 * }
326 * else {
327 * return r;
328 * }
329 */
330 if (not*this) {
331 return overridingURI;
332 }
333
334 /*
335 * From https://tools.ietf.org/html/rfc3986#section-5
336 * "Note that only the scheme component is required to be present in a base URI; the other components may be empty or undefined."
337 */
338 URI baseURI = Normalize ();
339 if (not baseURI.GetScheme ()) {
340 static const Execution::RuntimeErrorException kException_{"Scheme is required in base URI to combine with another URI"sv};
341 Execution::Throw (kException_);
342 }
343 auto merge = [&] (const String& base, const String& rhs) -> String {
344 // @see https://tools.ietf.org/html/rfc3986#section-5.2.3
345 if (baseURI.GetAuthority () and base.empty ()) {
346 return "/"sv + rhs;
347 }
348 static const RegularExpression kSelectDir_ = "(.*\\/)[^\\/]*"_RegEx;
349 optional<String> baseDir;
350 (void)base.Matches (kSelectDir_, &baseDir);
351 return baseDir.value_or (String{}) + rhs;
352 };
353
354 Assert (remove_dot_segments_ ("/a/b/c/./../../g") == "/a/g"); // from https://tools.ietf.org/html/rfc3986#section-5.2.4
355 Assert (remove_dot_segments_ ("mid/content=5/../6") == "mid/6"); // ditto
356
357 // Algorithm copied from https://tools.ietf.org/html/rfc3986#section-5.2.2
358 URI result;
359
360 /*
361 * Skipped this part
362 * -- A non-strict parser may ignore a scheme in the reference
363 * -- if it is identical to the base URI's scheme.
364 * --
365 * if ((not strict) and (R.scheme == Base.scheme)) then
366 * undefine(R.scheme);
367 * endif;
368 */
369 if (overridingURI.GetScheme ()) {
370 result.SetScheme (overridingURI.GetScheme ());
371 result.SetAuthority (overridingURI.GetAuthority ());
372 result.SetPath (remove_dot_segments_ (overridingURI.GetPath ()));
373 result.SetQuery (overridingURI.GetQuery<String> ());
374 }
375 else {
376 result.SetScheme (baseURI.GetScheme ());
377 if (overridingURI.GetAuthority ()) {
378 result.SetAuthority (overridingURI.GetAuthority ());
379 result.SetPath (remove_dot_segments_ (overridingURI.GetPath ()));
380 result.SetQuery (overridingURI.GetQuery<String> ());
381 }
382 else {
383 result.SetAuthority (baseURI.GetAuthority ());
384 if (overridingURI.GetPath ().empty ()) {
385 result.SetPath (baseURI.GetPath ());
386 result.SetQuery (overridingURI.GetQuery<String> () ? overridingURI.GetQuery<String> () : baseURI.GetQuery<String> ());
387 }
388 else {
389 if (overridingURI.GetPath ().StartsWith ("/"sv)) {
390 result.SetPath (remove_dot_segments_ (overridingURI.GetPath ()));
391 }
392 else {
393 result.SetPath (remove_dot_segments_ (merge (baseURI.GetPath (), overridingURI.GetPath ())));
394 }
395 result.SetQuery (overridingURI.GetQuery<String> ());
396 }
397 }
398 }
399 result.SetFragment (overridingURI.GetFragment ());
400 return result;
401}
402
403strong_ordering URI::TWC_ (const URI& lhs, const URI& rhs)
404{
405 using namespace UniformResourceIdentification;
406 if (auto cmp = Common::StdCompat::compare_three_way{}(lhs.GetScheme (), rhs.GetScheme ()); cmp != strong_ordering::equal) {
407 return cmp;
408 }
409 if (auto cmp = Common::StdCompat::compare_three_way{}(lhs.GetAuthority (), rhs.GetAuthority ()); cmp != strong_ordering::equal) {
410 return cmp;
411 }
412 if (auto cmp = Common::StdCompat::compare_three_way{}(lhs.GetPath (), rhs.GetPath ()); cmp != strong_ordering::equal) {
413 return cmp;
414 }
415 if (auto cmp = Common::StdCompat::compare_three_way{}(lhs.GetQuery (), rhs.GetQuery ()); cmp != strong_ordering::equal) {
416 return cmp;
417 }
418 if (auto cmp = Common::StdCompat::compare_three_way{}(lhs.GetFragment (), rhs.GetFragment ()); cmp != strong_ordering::equal) {
419 return cmp;
420 }
421 return strong_ordering::equal;
422}
423
424/*
425 ********************************************************************************
426 *********** hash<Stroika::Foundation::IO::Network::URI> ************************
427 ********************************************************************************
428 */
429size_t std::hash<Stroika::Foundation::IO::Network::URI>::operator() (const Stroika::Foundation::IO::Network::URI& arg) const
430{
431 return hash<Characters::String> () (arg.As<Characters::String> ());
432}
RegularExpression is a compiled regular expression which can be used to match on a String class.
Similar to String, but intended to more efficiently construct a String. Mutable type (String is large...
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
nonvirtual bool Matches(const RegularExpression &regEx) const
Definition String.cpp:1133
nonvirtual bool EndsWith(const Character &c, CompareOptions co=eWithCase) const
Definition String.cpp:1088
nonvirtual String SubString(SZ from) const
nonvirtual bool StartsWith(const Character &c, CompareOptions co=eWithCase) const
Definition String.cpp:1059
NOT a real mutex - just a debugging infrastructure support tool so in debug builds can be assured thr...
nonvirtual String GetAuthorityRelativeResourceDir() const
Return the path component, excluding any text after the final /.
Definition URI.cpp:258
static URI Parse(const String &rawURL)
Definition URI.cpp:119
nonvirtual String ToString() const
Definition URI.cpp:282
nonvirtual T As(optional< StringPCTEncodedFlag > pctEncoded={}) const
nonvirtual optional< SchemeType > GetScheme() const
Definition URI.inl:36
nonvirtual void SetPath(const String &path)
Definition URI.inl:84
nonvirtual URI Combine(const URI &overridingURI) const
Combine overridingURI possibly relative url with this base url, to produce a new URI.
Definition URI.cpp:316
static URI ParseRelative(const String &rawRelativeURL)
Definition URI.cpp:150
nonvirtual optional< Authority > GetAuthority() const
Definition URI.inl:55
nonvirtual URI Normalize(NormalizationStyle normalization=NormalizationStyle::eDefault) const
Produce a normalized representation of the URI.
Definition URI.cpp:267
nonvirtual void SetScheme(const optional< SchemeType > &scheme)
Definition URI.inl:41
static optional< Authority > Parse(const String &rawURLAuthorityText)
nonvirtual bool All(const function< bool(ArgByValueType< T >)> &testEachElt) const
return true iff argument predicate returns true for each element of the iterable
Definition Iterable.inl:940
void Throw(T &&e2Throw)
identical to builtin C++ 'throw' except that it does helpful, type dependent DbgTrace() messages firs...
Definition Throw.inl:43