Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
BlockAllocator.inl
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#include <algorithm>
5#include <mutex>
6#include <vector>
7
10#include "Stroika/Foundation/Execution/Common.h"
12#include "Stroika/Foundation/Math/Common.h"
13#include "Stroika/Foundation/Memory/Common.h"
14
15/**
16 * \brief qStroika_Foundation_Memory_BlockAllocator_UseLockFree_ provides a lock-free
17 * implementation of BlockAllocator (which should be faster)
18 */
19//#define qStroika_Foundation_Memory_BlockAllocator_UseLockFree_ 0
20#if !defined(qStroika_Foundation_Memory_BlockAllocator_UseLockFree_)
21#define qStroika_Foundation_Memory_BlockAllocator_UseLockFree_ 1
22#endif
23
24/*
25 * qStroika_Foundation_Memory_BlockAllocator_UseMallocDirectly_ is currently experimental as
26 * a test to see if its faster than calling operator new (for blocks of RAM).
27 *
28 * operator new[] does very little for us, and could in principle have call overhead and
29 * overhead to track the number of entries in the array (which would be pointless).
30 */
31#if !defined(qStroika_Foundation_Memory_BlockAllocator_UseMallocDirectly_)
32#define qStroika_Foundation_Memory_BlockAllocator_UseMallocDirectly_ 1
33#endif
34
35#if qStroika_Foundation_Memory_BlockAllocator_UseMallocDirectly_
36#include <cstdlib>
37
38#include "Stroika/Foundation/Execution/Throw.h"
39#endif
40
41namespace Stroika::Foundation::Memory {
42
43 namespace Private_ {
44
45#if qStroika_Foundation_Memory_BlockAllocator_UseLockFree_
46 /**
47 * Basic idea is to use a sentinel value to indicate LOCKED - and use atomic exchange, with
48 * the head of list pointer.
49 *
50 * If we exchange and get the sentinel, we didnt get the lock so retry.
51 *
52 * If don't get the sentinel, we have the lock (and stored the sentinel so nobody else will get the lock)
53 * so go ahead and complete the operation.
54 *
55 * \note I tried to make kLockedsentinel_ constexpr, but this generates errors, apparently because:
56 * http://en.cppreference.com/w/cpp/language/constant_expression
57 *
58 * Address constant expression is a prvalue core constant expression (after conversions required by context)
59 * of type nullptr_t or of a pointer type, which points to an object with static storage duration, one past
60 * the end of an array with static storage duration, to a function, or is a null pointer
61 *
62 * @todo COULD use UNION to workaround this...
63 */
64 static void* const kLockedSentinel_ = (void*)1; // any invalid pointer
65#else
66 /*
67 * kUseSpinLock_ is probaly best true (empirical tests with the
68 * performance regression test indicated that this helped considerably).
69 *
70 * It should be generally pretty safe because the locks are very narrow (so threads shoudln't spend much time
71 * spinning. And we could reduce this further during mallocs of new blocks.
72 */
73 constexpr bool kUseSpinLock_ = !qStroika_Foundation_Memory_BlockAllocator_UseLockFree_ and Execution::kSpinLock_IsFasterThan_mutex;
74
75 using LockType_ = conditional_t<kUseSpinLock_, Execution::SpinLock, mutex>;
76
77 inline LockType_& GetLock_ ()
78 {
79 static LockType_ sLock_;
80 return sLock_;
81 }
82
83 void DoDeleteHandlingLocksExceptionsEtc_ (void* p, void** staticNextLinkP) noexcept;
84#endif
85 }
86
87 namespace Private_ {
88 /*
89 * BlockAllocationPool_ implements the core logic for allocation/deallocation of a particular pool. These are organized
90 * by size rather than type to reduce fragmentation.
91 */
92 template <size_t SIZE>
93 class BlockAllocationPool_ {
94 public:
95 // never returns nullptr - throws if memory exhausted
96 static void* Allocate (size_t n);
97 // require p != nullptr
98 static void Deallocate (void* p) noexcept;
99 static void Compact ();
100
101 private:
102 alignas (void*) static inline conditional_t<qStroika_Foundation_Memory_BlockAllocator_UseLockFree_, atomic<void*>, void*> sHeadLink_{nullptr};
103
104 private:
105 static constexpr size_t ComputeChunks_ ();
106 static void** GetMem_Util_ ();
107 };
108 }
109
110 /*
111 ********************************************************************************
112 ***************** Private_::BlockAllocationPool_<SIZE> *************************
113 ********************************************************************************
114 */
115 template <size_t SIZE>
116 inline void* Private_::BlockAllocationPool_<SIZE>::Allocate ([[maybe_unused]] size_t n)
117 {
118 static_assert (SIZE >= sizeof (void*), "SIZE >= sizeof (void*)");
119 Require (n <= SIZE);
120
121#if qStroika_Foundation_Memory_BlockAllocator_UseLockFree_
122 /*
123 */
124 constexpr bool kTryMemoryOrderOptimizations_ = true; // experiment as of 2023-09-26
125
126 /*
127 * Note - once we have stored Private_::kLockedSentinel_ in the sHeadLink_ and gotten back something other than that, we
128 * effectively have a lock for the scope below (because nobody else can get other than Private_::kLockedSentinel_ from exchange).
129 */
130 void* p{};
131 {
132 again:
133 p = sHeadLink_.exchange (Private_::kLockedSentinel_, memory_order_acq_rel);
134 if (p == Private_::kLockedSentinel_) [[unlikely]] {
135 // we stored and retrieved a sentinel. So no lock. Try again!
136 this_thread::yield (); // nb: until Stroika v2.0a209, this called Execution::Yield (), making this a cancelation point.
137 goto again;
138 }
139 }
140 // if we got here, p contains the real head, and have a pseudo lock
141 if (p == nullptr) [[unlikely]] {
142 p = GetMem_Util_ ();
143 }
144 void* result = p;
145 AssertNotNull (result);
146 /*
147 * Treat this as a linked list, and make head point to next member.
148 *
149 * Use atomic_load to guarantee the value not loaded from cache line not shared across threads.
150 */
151 static_assert (sizeof (void*) == sizeof (atomic<void*>), "atomic doesn't change size");
152 // even though we have 'acquired' a logical lock, the memory at address 'p' may not be syned to our local processor/thread
153 void* next = reinterpret_cast<const atomic<void*>*> (p)->load (memory_order_acquire);
154 if constexpr (kTryMemoryOrderOptimizations_) {
155 Verify (sHeadLink_.exchange (next, memory_order_release) == Private_::kLockedSentinel_); // must return Private_::kLockedSentinel_ cuz we owned lock, so Private_::kLockedSentinel_ must be there
156 }
157 else {
158 Verify (sHeadLink_.exchange (next, memory_order_acq_rel) == Private_::kLockedSentinel_); // must return Private_::kLockedSentinel_ cuz we owned lock, so Private_::kLockedSentinel_ must be there
159 }
160 return result;
161#else
162 [[maybe_unused]] lock_guard critSec{Private_::GetLock_ ()};
163 /*
164 * To implement linked list of BlockAllocated(T)'s before they are
165 * actually alloced, re-use the begining of this as a link pointer.
166 */
167 if (sHeadLink_ == nullptr) [[unlikely]] {
168 sHeadLink_ = GetMem_Util_ ();
169 }
170 void* result = sHeadLink_;
171 AssertNotNull (result);
172 /*
173 * treat this as a linked list, and make head point to next member
174 */
175 sHeadLink_ = (*(void**)sHeadLink_);
176 return result;
177#endif
178 }
179 template <size_t SIZE>
180 inline void Private_::BlockAllocationPool_<SIZE>::Deallocate (void* p) noexcept
181 {
182 static_assert (SIZE >= sizeof (void*), "SIZE >= sizeof (void*)");
183 RequireNotNull (p);
184#if qStroika_Foundation_Memory_BlockAllocator_UseLockFree_
185
186 constexpr bool kTryMemoryOrderOptimizations_ = true; // experiment as of 2023-09-26
187
188 /*
189 * Note - once we have stored Private_::kLockedSentinel_ in the sHeadLink_ and gotten back something other than that, we
190 * effectively have a lock for the scope below (because nobody else can get other than Private_::kLockedSentinel_ from exchange).
191 */
192 void* prevHead{};
193 {
194 again:
195 prevHead = sHeadLink_.exchange (Private_::kLockedSentinel_, memory_order_acq_rel);
196 if (prevHead == Private_::kLockedSentinel_) [[unlikely]] {
197 // we stored and retrieved a sentinel. So no lock. Try again!
198 this_thread::yield (); // nb: until Stroika v2.0a209, this called Execution::Yield (), making this a cancelation point.
199 goto again;
200 }
201 }
202 /*
203 * Push p onto the head of linked free list.
204 *
205 * Use atomic to guarantee the value not pushed to RAM so shared across threads.
206 */
207 static_assert (sizeof (void*) == sizeof (atomic<void*>), "atomic doesn't change size");
208 void* newHead = p;
209 reinterpret_cast<atomic<void*>*> (newHead)->store (prevHead, memory_order_release);
210 if constexpr (kTryMemoryOrderOptimizations_) {
211 Verify (sHeadLink_.exchange (newHead, memory_order_release) == Private_::kLockedSentinel_); // must return Private_::kLockedSentinel_ cuz we owned lock, so Private_::kLockedSentinel_ must be there
212 }
213 else {
214 Verify (sHeadLink_.exchange (newHead, memory_order_acq_rel) == Private_::kLockedSentinel_); // must return Private_::kLockedSentinel_ cuz we owned lock, so Private_::kLockedSentinel_ must be there
215 }
216#else
217 Private_::DoDeleteHandlingLocksExceptionsEtc_ (p, &sHeadLink_);
218#endif
219 }
220 template <size_t SIZE>
221 void Private_::BlockAllocationPool_<SIZE>::Compact ()
222 {
223#if qStroika_Foundation_Memory_BlockAllocator_UseLockFree_
224 // cannot compact lock-free - no biggie
225#else
226 [[maybe_unused]] lock_guard critSec{Private_::GetLock_ ()};
227
228 // step one: put all the links into a single, sorted vector
229 const size_t kChunks = ComputeChunks_ ();
230 Assert (kChunks >= 1);
231 vector<void*> links;
232 try {
233 links.reserve (kChunks * 2);
234 void* link = sHeadLink_;
235 while (link != nullptr) {
236 // probably should use Containers::ReserveSpeedTweekAddN - but want to avoid embrace of dependencies
237 if (links.size () == links.capacity ()) {
238 links.reserve (links.size () * 2);
239 }
240 links.push_back (link);
241 link = *(void**)link;
242 }
243 }
244 catch (...) {
245 return;
246 }
247
248 sort (links.begin (), links.end ());
249
250 // now look for unused memory blocks. Since the vector is sorted by pointer value, the first pointer encountered is potentially the start of a block.
251 // Look at the next N vector elements, where N is the amount of elements that would have been alloced (the chunk size). If they are all there, then the
252 // block is unused can can be freed. Otherwise do the same thing to the first element found outside of the block.
253 size_t originalSize = links.size ();
254 size_t index = 0;
255 while (index < links.size ()) {
256 byte* deleteCandidate = (byte*)links[index];
257 bool canDelete = true;
258 size_t i = 1;
259 for (; i < kChunks; ++i) {
260 if ((index + i) >= links.size ())
261 break;
262 byte* next = (byte*)links[index + i];
263 byte* prev = (byte*)links[index + i - 1];
264 if (canDelete and ((next - prev) != SIZE)) {
265 canDelete = false; // don't break here, as have to find end up this block, which could be further on
266 }
267 if (static_cast<size_t> (abs (next - deleteCandidate) / SIZE) >= kChunks) {
268 Assert (not canDelete);
269 break;
270 }
271 }
272 if (canDelete) {
273 links.erase (links.begin () + index, links.begin () + index + kChunks);
274 if constexpr (qStroika_Foundation_Memory_BlockAllocator_UseMallocDirectly_) {
275 ::free ((void*)deleteCandidate);
276 }
277 else {
278 delete static_cast<byte*> ((void*)deleteCandidate);
279 }
280 }
281 else {
282 index += i;
283 }
284 }
285
286 // finally, clean up the pool. The head link is probably wrong, and the elements are no longer linked up correctly
287 sHeadLink_ = (links.size () == 0) ? nullptr : links[0];
288 if (links.size () != originalSize and links.size () > 0) {
289 // we delete some, which means that the next pointers are bad
290 void** curLink = (void**)sHeadLink_;
291 for (size_t i = 1; i < links.size (); ++i) {
292 *curLink = links[i];
293 curLink = (void**)*curLink;
294 }
295 *curLink = nullptr;
296 }
297#endif
298 }
299 template <size_t SIZE>
300 constexpr size_t Private_::BlockAllocationPool_<SIZE>::ComputeChunks_ ()
301 {
302 /*
303 * Picked particular kTargetMallocSize since with malloc overhead likely to turn out to be
304 * a chunk which memory allocator can do a good job on.
305 */
306 constexpr size_t kTargetMallocSize_ = 16360; // 16384 = 16K - leave a few bytes sluff...
307
308 return Math::AtLeast (static_cast<size_t> (kTargetMallocSize_ / SIZE), static_cast<size_t> (10));
309 }
310 template <size_t SIZE>
311 inline void** Private_::BlockAllocationPool_<SIZE>::GetMem_Util_ ()
312 {
313 Require (SIZE >= sizeof (void*));
314
315 constexpr size_t kChunks = ComputeChunks_ ();
316 Assert (kChunks >= 1);
317
318 /*
319 * Please note that the following line is NOT a memory leak. @see BlockAllocator<>
320 */
321 void** newLinks;
322 if constexpr (qStroika_Foundation_Memory_BlockAllocator_UseMallocDirectly_) {
323 DISABLE_COMPILER_GCC_WARNING_START ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") // crazy warning from g++-11
324 newLinks = (void**)::malloc (kChunks * SIZE);
325 Execution::ThrowIfNull (newLinks);
326 DISABLE_COMPILER_GCC_WARNING_END ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") // crazy warning from g++-11
327 }
328 else {
329 newLinks = (void**)new char[kChunks * SIZE];
330 }
331 AssertNotNull (newLinks);
332 /*
333 * Create a 'linked list' of blocks - all of size SIZE. The first element of each block is a pointer to the next
334 * block. The last block must be NULL-terminated.
335 *
336 * @todo MAYBE make this a member of the class, and use it elsewhere for the linked-list logic
337 */
338 {
339 struct linkedListElt {
340 linkedListElt* fNext;
341 };
342 auto nextLinkAlias = reinterpret_cast<linkedListElt*> (newLinks);
343 // the first kChunk-1 links point to the next guy
344 for (size_t i = 1; i < kChunks; ++i) {
345 auto nextNextLink = reinterpret_cast<linkedListElt*> (reinterpret_cast<byte*> (nextLinkAlias) + SIZE);
346 nextLinkAlias->fNext = nextNextLink;
347 nextLinkAlias = nextNextLink;
348 }
349 // The FINAL link must be NULL-terminated
350 nextLinkAlias->fNext = nullptr;
351 }
352 return newLinks;
353 }
354
355 /*
356 ********************************************************************************
357 ******************************** BlockAllocator<T> *****************************
358 ********************************************************************************
359 */
360 template <typename T>
361 template <typename U>
362 constexpr BlockAllocator<T>::BlockAllocator (const BlockAllocator<U>&) noexcept
363 {
364 }
365 template <typename T>
366 [[nodiscard]] inline T* BlockAllocator<T>::allocate ([[maybe_unused]] size_t n)
367 {
368 using Private_::BlockAllocationPool_;
369 Require (n == 1);
370#if qStroika_Foundation_Memory_PreferBlockAllocation
371 T* result = reinterpret_cast<T*> (BlockAllocationPool_<AdjustSizeForPool_ ()>::Allocate (sizeof (T)));
372#else
373 T* result = reinterpret_cast<T*> (::operator new (sizeof (T)));
374#endif
375 EnsureNotNull (result);
376 Ensure (reinterpret_cast<ptrdiff_t> (result) % alignof (T) == 0); // see http://stroika-bugs.sophists.com/browse/STK-511 - assure aligned
377 return result;
378 }
379 template <typename T>
380 inline void BlockAllocator<T>::deallocate (T* p, [[maybe_unused]] size_t n) noexcept
381 {
382 Require (n == 1);
383 using Private_::BlockAllocationPool_;
384#if qStroika_Foundation_Memory_PreferBlockAllocation
385 if (p != nullptr) [[likely]] {
386 BlockAllocationPool_<AdjustSizeForPool_ ()>::Deallocate (p);
387 }
388#else
389 ::operator delete (p);
390#endif
391 }
392 template <typename T>
394 {
395 using Private_::BlockAllocationPool_;
396#if qStroika_Foundation_Memory_PreferBlockAllocation
397 BlockAllocationPool_<AdjustSizeForPool_ ()>::Compact ();
398#endif
399 }
400 template <typename T>
401 constexpr size_t BlockAllocator<T>::AdjustSizeForPool_ ()
402 {
403 return Math::RoundUpTo (sizeof (T), sizeof (void*));
404 }
405 template <typename T>
406 inline bool BlockAllocator<T>::operator== (const BlockAllocator& rhs) const
407 {
408 return true;
409 }
410
411}
#define AssertNotNull(p)
Definition Assertions.h:333
#define EnsureNotNull(p)
Definition Assertions.h:340
#define RequireNotNull(p)
Definition Assertions.h:347
#define Verify(c)
Definition Assertions.h:419
Include this file VERY EARLY ON - before including stuff like <cstdio> - to allow use of Valgrind (so...
nonvirtual void deallocate(T *p, std::size_t n) noexcept
constexpr bool kSpinLock_IsFasterThan_mutex
Definition SpinLock.h:30