Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
RegularExpression.h
Go to the documentation of this file.
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Foundation_Characters_RegularExpression_h_
5#define _Stroika_Foundation_Characters_RegularExpression_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <regex>
10
12#include "Stroika/Foundation/Containers/Sequence.h"
13
14/**
15 * \file
16 */
17
19
20 /**
21 * \brief RegularExpression is a compiled regular expression which can be used to match on a String class
22 *
23 * This class is a simple wrapper on the std::wregex class.
24 */
26 public:
27 /**
28 * \note We chose ECMAScript as a default, to match what stdC++ chose as the default.
29 */
30 enum class SyntaxType {
31 /**
32 * http://en.cppreference.com/w/cpp/regex/ecmascript
33 */
34 eECMAScript = regex_constants::ECMAScript,
35
36 /**
37 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03
38 */
39 eBasic = regex_constants::basic,
40
41 /**
42 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04
43 */
44 eExtended = regex_constants::extended,
45
46 /**
47 * https://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html#tag_20_06_13_04
48 */
49 eAwk = regex_constants::awk,
50
51 /**
52 * https://pubs.opengroup.org/onlinepubs/9699919799/utilities/grep.html
53 */
54 eGrep = regex_constants::grep,
55 eEGrep = regex_constants::egrep,
56
57 eDEFAULT = eECMAScript,
58 };
59 using SyntaxType::eAwk;
62 using SyntaxType::eEGrep;
65
66 public:
67 /**
68 * \note RegularExpression {} creates a special regular expression that matches nothing.
69 * \note RegularExpression (String re) throws std::regex_error () if provided an invalid regular expression.
70 * \note The default syntax is ECMAScript.
71 *
72 * \par Example Usage
73 * \code
74 * // see http://en.cppreference.com/w/cpp/regex/ecmascript
75 * const auto kSingleWhitespaceChar = RegularExpression{"[[:space:]]"sv};
76 * const auto kSeriesOfWhitespaceCharacters = RegularExpression{"\\s+"sv};
77 * \endcode
78 */
79 explicit RegularExpression ();
80 explicit RegularExpression (SyntaxType syntaxType, const String& re, CompareOptions co = eWithCase);
81 explicit RegularExpression (const String& re, CompareOptions co = eWithCase);
82 RegularExpression (const wregex& regEx);
83 RegularExpression (wregex&& regEx);
84
85 public:
86 /**
87 * Predefined regular expression that matches nothing.
88 *
89 * \note Since this is a static object, beware, it cannot be (safely) used before or after main
90 * \note Equivalent to
91 * \code
92 * const RegularExpression kMatchNone ("(?!)"); // OR
93 * const RegularExpression kMatchNoneAlternative{};
94 * \endcode
95 */
97
98 public:
99 /**
100 * Predefined regular expression that matches anything.
101 *
102 * \note Since this is a static object, beware, it cannot be (safely) used before or after main
103 * \note Equivalent to
104 * \code
105 * const RegularExpression kAny {".*"};
106 * \endcode
107 */
109
110 public:
111 nonvirtual const wregex& GetCompiled () const;
112
113 public:
114 [[deprecated ("Since Stroika v3.0d14 - use SyntaxType before the regexp")]]
115 explicit RegularExpression (const String& re, SyntaxType syntaxType, CompareOptions co = eWithCase)
116 : RegularExpression (syntaxType, re, co)
117 {
118 }
119
120 private:
121 wregex fCompiledRegExp_;
122 };
123 inline const RegularExpression RegularExpression::kNONE{"(?!)"sv};
124 inline const RegularExpression RegularExpression::kAny{".*"sv};
125
126 /**
127 * This class doesn't do anything. It just collects together a match result/response.
128 */
130 public:
131 RegularExpressionMatch (const String& fullMatch);
132 RegularExpressionMatch (const String& fullMatch, const Containers::Sequence<String>& subMatches);
133
134 public:
135 nonvirtual String GetFullMatch () const;
136
137 public:
138 nonvirtual Containers::Sequence<String> GetSubMatches () const;
139
140 private:
141 String fFullMatch_;
142 Containers::Sequence<String> fSubMatches_;
143 };
144
145 inline namespace Literals {
146 /**
147 * \brief user defined literal for RegularExpression. These are always of type RegularExpression::SyntaxType::eDEFAULT (ie eECMAScript)
148 *
149 * \pre FOR 'char' overload, argument REQUIRES all text is ASCII (uses String::FromStringConstant)
150 */
151 RegularExpression operator"" _RegEx (const char* str, size_t len);
152 RegularExpression operator"" _RegEx (const wchar_t* str, size_t len);
153 RegularExpression operator"" _RegEx (const char8_t* str, size_t len);
154 RegularExpression operator"" _RegEx (const char16_t* str, size_t len);
155 RegularExpression operator"" _RegEx (const char32_t* str, size_t len);
156 }
157
158}
159
160/*
161 ********************************************************************************
162 ***************************** Implementation Details ***************************
163 ********************************************************************************
164 */
165#include "RegularExpression.inl"
166
167#endif /*_Stroika_Foundation_Characters_RegularExpression_h_*/
RegularExpression is a compiled regular expression which can be used to match on a String class.
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
A generalization of a vector: a container whose elements are keyed by the natural numbers.
Definition Sequence.h:187