Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
Hash.h
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Foundation_Cryptography_Digest_Hash_h_
5#define _Stroika_Foundation_Cryptography_Digest_Hash_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <cstdint>
10
12#include "Stroika/Foundation/DataExchange/DefaultSerializer.h"
14
15#include "Digester.h"
16#include "HashBase.h"
17
18#include "Algorithm/SuperFastHash.h" // for default algorithm
19
20/*
21 * \note Code-Status: <a href="Code-Status.md#Alpha">Alpha</a>
22 *
23 */
24
25namespace Stroika::Foundation::Cryptography::Digest {
26
27 /**
28 * \brief use std::hash<T> to digest a type t. AKA stdhash_Digester
29 *
30 * This is not generally useful, as the default hash<> function is really not generally very good as a cryptographic digest,
31 * but it can be used as such (and for some implementations it maybe quite good or fast).
32 *
33 * The more pressing reason to want to use it for a digest, is that many people support std::hash<T> for their types T, and
34 * this allows using those hash<> specializations.
35 *
36 * \note many of these overloads are redundant, but provided as a speed tweak for this situations.
37 */
38 template <typename T>
40 using ReturnType = size_t;
41
42 ReturnType operator() (const Streams::InputStream::Ptr<byte>& from) const;
43 ReturnType operator() (const byte* from, const byte* to) const;
44 ReturnType operator() (const BLOB& from) const;
45 };
46
47 /**
48 * The Hash<> template takes an optional parameter of the digester. This simply provides a default
49 * which allows the Hash<> template to be used with the same parameters as std::hash<>, except provides a an avenue to
50 * revise/enhance hashing to use a differt digest algorithm.
51 */
53
54 /**
55 * \brief Simple wrapper on (cryptographic) digest algorithms with fewer knobs, and easier construcion- mimicing std::hash<T>
56 *
57 * A Hash is very much like a Digest - it takes a series of bytes and produces a
58 * (generally fixed size much shorter) series of bits which as closely as practical
59 * uniquely bijectively maps between the series of input bytes and the series of output bits.
60 *
61 * The main difference between a Digest and a Hash, is more a focus on how its used. A Hash
62 * is typically required to be more quick, and often works on a variety of input types (int, string
63 * etc), and maps to often smaller sequences of bits (say 32-bit number).
64 *
65 * A hash makes NO EFFORT to rem (mod) the results (though frequently the caller with % the result before use).
66 *
67 * So this class uses the SERIALIZER mechanism to allow users to easily map different types to
68 * a sequence of 'bytes' in normalized form, and then allows them to be digested, and then the digest
69 * mapped to a (typically) small number (32-bit integer for example).
70 *
71 * This Adapter takes care of the general part of mapping the inputs and outputs to/from
72 * common forms, and then makes generic the actual hash computing algorithm.
73 *
74 * \note Endianness - the digest algorithms logically returns an array of bytes, so in a typical use case
75 * HASH_RETURN_TYPE is a numeric type like uin32_t, then the hash numerical value will depend on
76 * integer endianness of the machine.
77 *
78 * Use the digest directly if you need a portable, externalizable digest value.
79 *
80 * \note Design Note:
81 * Considered reversing the first two template parameters, so
82 * template Hash<typename DIGESTER = DefaultHashDigester, typename T>...
83 * For the case of the constructor this MIGHT work better, allowing you to specify
84 * one parameter (the digester) and DEDUCE the T parameter.
85 *
86 * However, that makes worse the default behavior for the more common (in my current estimation)
87 * usage where there is no SALT (where now you can just provide T, and have DIGESTER defaulted).
88 *
89 * Mimic the behavior of std::hash<> - except hopefully pick a better default algorithm than gcc did.
90 * (https://news.ycombinator.com/item?id=13745383)
91 *
92 * If you construct your Hash object with a 'salt' value (a value of type T which is digested to combine/offset
93 * all other computed hash values). This can be used for cryptographic salt (as with passwords) or
94 * with rehashing, for example.
95 *
96 * This template is NOT fully default-defined for all T, but will work automatically for:
97 * o all builtin numeric types, int, char, unsigned int, long etc...
98 * o many other Stroika types, but see docs for that type to see if there is a specialization
99 *
100 * To use with types for which hash<> is already defined, you can use DIGESTER=SystemHashDigester (but then this
101 * class provides little additional value over direct use of std::hash)
102 *
103 * This works by default on any time T for which DefaultSerializer<T> is defined (so @sse DefaultSerializer<T>).
104 * But callers can always provide a specific templated serializer for performance or because the default serializer doesnt support T).
105 *
106 * Other types should generate compile-time error.
107 *
108 * Supported values for HASH_RETURN_TYPE, depend on the DIGESTER::ReturnType. This can be any type
109 * cast convertible into HASH_RETURN_TYPE (typically an unsigned int), or std::string, or Characters::String, or Common::GUID, or array<byte, N> etc...
110 *
111 * \par Example Usage
112 * \code
113 * EXPECT_TRUE (Hash<int>{} (1) == someNumber);
114 * EXPECT_TRUE (Hash<string>{} ("1") == someNumber2);
115 * EXPECT_TRUE (Hash<String>{} ("1") == someNumber3);
116 * EXPECT_TRUE (Hash<String>{"somesalt"} ("1") == someNumber4);
117 * \endcode
118 *
119 * \par Example Usage
120 * \code
121 * using USE_DIGESTER_ = Digester<Algorithm::Jenkins>;
122 * EXPECT_TRUE (Hash<int, USE_DIGESTER_>{} (1) == 10338022);
123 * EXPECT_TRUE (Hash<string, USE_DIGESTER_>{} ("1") == 2154528969);
124 * EXPECT_TRUE (Hash<String, USE_DIGESTER_>{} ("1") == 2154528969);
125 * \endcode
126 *
127 * \par Example Usage (using explicitly specified serializer)
128 * \code
129 * EXPECT_TRUE ((Hash<String, DefaultHashDigester, DefaultHashDigester::ReturnType>{}(L"x") == Hash<String>{}(L"x")));
130 * struct altStringSerializer {
131 * auto operator () (const String& s) { return s.empty () ? Memory::BLOB{} : Memory::BLOB ((const byte*)s.c_str (), (const byte*)s.c_str () + 1); };
132 * };
133 * // NICE to figure out how to get this working instead of the 'struct' above - @todo
134 * //constexpr auto altStringSerializer = [] (const String& s) { return s.empty () ? Memory::BLOB{} : Memory::BLOB ((const byte*)s.c_str (), (const byte*)s.c_str () + 1); };
135 * EXPECT_TRUE ((Hash<String, DefaultHashDigester, DefaultHashDigester::ReturnType, altStringSerializer>{}("xxx") != Hash<String>{}(L"xxx")));
136 * EXPECT_TRUE ((Hash<String, DefaultHashDigester, DefaultHashDigester::ReturnType, altStringSerializer>{}("x1") == Hash<String, DefaultHashDigester, DefaultHashDigester::ReturnType, altStringSerializer>{}("x2")));
137 * \endcode
138 *
139 * \par Example Usage (using explicit result type - typically string)
140 * \code
141 * using namespace IO::Network;
142 * auto hasherWithResult_uint8_t = Hash<InternetAddress, Digester<Digest::Algorithm::SuperFastHash>, uint8_t>{};
143 * auto value2Hash = InternetAddress{"192.168.244.33"};
144 * uint8_t h2 = hasherWithResult_uint8_t (value2Hash);
145 * EXPECT_TRUE (h2 == 215);
146 * auto hasherWithResult_array40 = Hash<InternetAddress, Digester<Digest::Algorithm::SuperFastHash>, std::array<byte, 40>>{};
147 * std::array<byte, 40> h3 = hasherWithResult_array40 (value2Hash);
148 * EXPECT_TRUE ((Digester<Digest::Algorithm::MD5, String>{}(value2Hash) == "..."));
149 * \endcode
150 *
151 * AND see docs on DefaultSerializer<> for how to explicitly specialize it for a given type (often better than passing
152 * an explicit serializer as in this example.
153 */
154 template <typename T, typename DIGESTER = DefaultHashDigester, typename HASH_RETURN_TYPE = typename DIGESTER::ReturnType, typename SERIALIZER = DataExchange::DefaultSerializer<T>>
155 struct Hash {
156 /**
157 * Seed parameter to the hash 'starts it off' in some unique state so that computed hashes will all take that
158 * 'seed' into account and differ (statistically) from those with a different seed.
159 */
160 constexpr Hash () = default;
161 constexpr Hash (const T& seed);
162
163 /**
164 */
165 HASH_RETURN_TYPE operator() (const T& t) const;
166
167 optional<HASH_RETURN_TYPE> fSeed;
168 };
169
170 /**
171 * \brief combine two hash values to produce a new hash value (in a hopefully very random/uncorrelated way)
172 *
173 * inspired by https://en.cppreference.com/w/cpp/utility/hash - return h1 ^ (h2 << 1); // or use boost::hash_combine
174 */
175 template <typename RESULT_TYPE>
176 RESULT_TYPE HashValueCombine (RESULT_TYPE lhs, RESULT_TYPE rhs);
177
178}
179
180/*
181 ********************************************************************************
182 ***************************** Implementation Details ***************************
183 ********************************************************************************
184 */
185#include "Hash.inl"
186
187#endif /*_Stroika_Foundation_Cryptography_Digest_Hash_h_*/
Digester<ALGORITHM> is a function-object way to access the digest algorithm. Its generally almost the...
Definition Digester.h:238
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
Simple wrapper on (cryptographic) digest algorithms with fewer knobs, and easier construcion- mimicin...
Definition Hash.h:155
use std::hash<T> to digest a type t. AKA stdhash_Digester
Definition Hash.h:39