Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
TextStore.h
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Frameworks_Led_TextStore_h_
5#define _Stroika_Frameworks_Led_TextStore_h_ 1
6
7#include "Stroika/Frameworks/StroikaPreComp.h"
8
9#include <algorithm>
10#include <cstddef>
11#include <list>
12#include <vector>
13
14#include "Stroika/Foundation/Containers/Common.h"
17
18/*
19@MODULE: TextStore
20@DESCRIPTION:
21 <p>This module introduces one of Led's key abstractions - the @'TextStore'. A @'TextStore' - as the name suggests -
22 is where Led keeps track of text. It is abstract enuf to keep track in many different representations of the text.</p>
23 <p>A @'TextStore' also keeps track of @'Marker's associated with the text.</p>
24 <h4>Important Design Notes:</h4>
25 <ul>
26 <li> Note that Led uses @'Led_tChar's as the meaning for the indexes. This means for
27 wide character sets it means characters, and for mbyte or single character set
28 builds it means bytes. In either case you can use CharacterToTCharIndex/
29 TCharToCharacterIndex () to get whichever you want.
30 </li>
31 </ul>
32 */
33
34#include "Stroika/Frameworks/Led/Marker.h"
35#include "Stroika/Frameworks/Led/Support.h"
36#include "Stroika/Frameworks/Led/TextBreaks.h"
37
38namespace Stroika::Frameworks::Led {
39
40/*
41 @CONFIGVAR: qUseWin32CompareStringCallForCaseInsensitiveSearch
42 @DESCRIPTION: <p>Produces better internationalized results - but - of course - is Win32 specific - and a bit slower.
43 Based on SPR#0864</p>
44 */
45#ifndef qUseWin32CompareStringCallForCaseInsensitiveSearch
46#define qUseWin32CompareStringCallForCaseInsensitiveSearch qStroika_Foundation_Common_Platform_Windows
47#endif
48
49 /*
50 @CLASS: TextStore
51 @BASES: @'MarkerOwner'
52 @DESCRIPTION: <p>An abstraction of something which contains text, and special objects
53 called markers, which can be used to represent embedded data, or
54 hypertext links, or font style runs. Markers have a left and right edge
55 (named by indexes into the text buffer), and conceptually stick to the
56 text they wrap as the text is modified (edited). </p>
57
58 <p>There are two concrete TextStore implementations you can use (with Led 3.0).
59 <ul style="margin-left: +6em">
60 <li>@'ChunkedArrayTextStore'</li>
61 <li>@'SimpleTextStore'</li>
62 </ul>
63 </p>
64
65 <p>Future versions of Led may include TextStore implementations which:
66 <ul style="margin-left: +6em">
67 <li>directly encode the markers into the stream of text</li><br>
68 Something along these lines is a common editor represention for styled text.
69 But in my case, Markers are arbitrary objects, stored by pointer.
70 So it's less clear how to accomplish that inline storage technique here.
71
72 <li>Store text on the disk, and then read in only the little bits it needed.</li><br>
73 This would be a sensible TextStore to use when implementing a 'programming'
74 style text editor, where you might reasonably want to edit many megabyte files,
75 and not really need to store many - if any - markers. By accessing the file
76 data on a demand-basis (memory mapped file?)- you could save a significant amount
77 of startup time, and time overall if not the entire file is viewed.
78 </ul>
79 */
80 class TextStore : public virtual MarkerOwner {
81 protected:
82 TextStore ();
83
84 public:
85 virtual ~TextStore ();
86
87 /*
88 @METHOD: TextStore::ConstructNewTextStore
89 @DESCRIPTION: <p>Constructs a new instance of an object of THIS type (whatever dynamic type the object has). The
90 newly constructed object bears no (other) assosiation with the original (THIS) object. This function
91 is pure-virtual, and must be overriden in all subclasses.</p>
92 */
93 public:
94 virtual TextStore* ConstructNewTextStore () const = 0;
95
96 /*
97 * Register a markerOwner here, and it will be notified of text changes before
98 * all markers, and AFTER all markers. Never register more than once, and
99 * always delete iff registered.
100 */
101 public:
102 virtual void AddMarkerOwner (MarkerOwner* owner);
103 virtual void RemoveMarkerOwner (MarkerOwner* owner);
104 nonvirtual const vector<MarkerOwner*>& GetMarkerOwners () const noexcept;
105
106 private:
107 vector<MarkerOwner*> fMarkerOwners{};
108
109 // Retrieve the text
110 public:
111 /*
112 @METHOD: TextStore::GetLength
113 @DESCRIPTION: <p>Returns the number of @'Led_tChar's in this @'TextStore'.</p>
114 */
115 virtual size_t GetLength () const noexcept = 0;
116 /*
117 @METHOD: TextStore::CopyOut
118 @DESCRIPTION: <p>CopyOut does NOT null terminate. It is an error to call with 'count' causing
119 access past end of TextStore buffer.</p>
120 <p>Note that it IS NOT an error to call CopyOut for multibyte characters and split them.
121 This is one of the few API routines where that is so.</p>
122 */
123 virtual void CopyOut (size_t from, size_t count, Led_tChar* buffer) const noexcept = 0;
124
125 // utilities to simplify refering to beginning/end of buffer.
126 public:
127 static size_t GetStart ();
128 nonvirtual size_t GetEnd () const;
129
130 // Type of indexing conversion
131 //
132 // In Led - by default - we index in terms of Led_tChars. These are typically chars(bytes)
133 // but for wide-charactersets such as UNICODE, then can be larger. It is possibly useful to
134 // sometimes index in-terms of CHARACTERS instead of BYTES - even when using a multibyte
135 // character set. These routines allow you to always freely go back and forth between
136 // CHARACTER INDEXES and Led_tChar indexes (used throughout Led).
137 //
138 // NB: For MBYTE character sets - these CAN BE QUITE SLOW!
139 //
140 public:
141 nonvirtual size_t CharacterToTCharIndex (size_t i);
142 nonvirtual size_t TCharToCharacterIndex (size_t i);
143
144 // Update the text
145 public:
146 nonvirtual void Replace (size_t from, size_t to, const Led_tChar* withWhat, size_t withWhatCount);
147
148 /*
149 @METHOD: TextStore::ReplaceWithoutUpdate
150 @DESCRIPTION: <p>Similar to @'TextStore::Replace' except that this routine doesn't notify markers and markerowners about
151 the update. This should rarely be called directly - except in conjunction with @'TextStore::SimpleUpdater'</p>
152 <p>This method is pure-virtual, and implemented by a concrete subsclass. This method is new to Led 3.1</p>
153 */
154 virtual void ReplaceWithoutUpdate (size_t from, size_t to, const Led_tChar* withWhat, size_t withWhatCount) = 0;
155
156 /*
157 * Access the markers.
158 */
159 public:
160 /*
161 @METHOD: TextStore::AddMarker
162 @DESCRIPTION: <p>Add the given marker to this TextStore, starting at position lhs, and give the marker
163 length 'length'. Record that its owner is 'owner'.</p>
164 <p>NB: the @'MarkerOwner' - must be a valid @'MarkerOwner'. Either this TestStore,
165 or a class which has been added to this @'TextStore' by @'TextStore::AddMarkerOwner'. This requirement is
166 new to Led 2.3 (but unlikely ever violated before).</p>
167 <p>It is an error to add a marker which is already added to some TextStore (even this TextStore).</p>
168 <p>It is an error to destroy a TextStore without first removing all markers you had added.
169 It is in order to make that more efficient that we provide the RemoveMarkers () method.</p>
170 <p>We are strict about markers extenting outside valid index ranges - this is an prohibited.</p>
171 */
172 virtual void AddMarker (Marker* marker, size_t lhs, size_t length, MarkerOwner* owner) = 0;
173
174 /*
175 @METHOD: TextStore::RemoveMarker
176 @DESCRIPTION: <p>Remove the given marker from the TextStore. It is an error if the given marker is not already
177 in this TextStore. See @'TextStore::AddMarker' for more details.</p>
178 */
179
180 nonvirtual void RemoveMarker (Marker* marker);
181 /*
182 @METHOD: TextStore::RemoveMarkers
183 @DESCRIPTION: <p>Remove the given markers from the TextStore. This is essentially the same as doing multiple
184 @'TextStore::RemoveMarker' calls, except that it may be more efficient for removing large numbers of markers.</p>
185 <p>See also @'TextStore::PreRemoveMarker'.</p>
186 <p>See also @'TextStore::RemoveTypedMarkers'.</p>
187 */
188 virtual void RemoveMarkers (Marker* const markerArray[], size_t markerCount) = 0;
189
190 template <typename T>
191 /*
192 @METHOD: TextStore::RemoveTypedMarkers
193 @DESCRIPTION: <p>A variant of @'TextStore::RemoveMarkers' which can be called with an array of any time 'T' that publicly
194 subclasses from @'Marker'.</p>
195 */
196 nonvirtual void RemoveTypedMarkers (T* const ma[], size_t mc)
197 {
198 vector<Marker*> v;
199 for (size_t i = 0; i < mc; ++i) {
200 v.push_back (ma[i]);
201 }
202 RemoveMarkers (Foundation::Containers::Start (v), mc);
203 }
204
205 template <typename T>
206 /*
207 @METHOD: TextStore::RemoveAndDeleteMarkers
208 @DESCRIPTION: <p>Calls @'TextStore::RemoveTypedMarkers' and then deletes each marker.</p>
209 */
210 nonvirtual void RemoveAndDeleteMarkers (T* const ma[], size_t mc)
211 {
212 RemoveTypedMarkers (ma, mc);
213 for (size_t i = 0; i < mc; ++i) {
214 delete (ma[i]);
215 }
216 }
217
218 /*
219 @METHOD: TextStore::PreRemoveMarker
220 @DESCRIPTION: <p>Don't entirely remove the marker (so it can still be queried for
221 size etc). But mark it so it will not appear in future CollectAllXXX methods.</p>
222 <p>This is <em>NOT</em> required before calling @'TextStore::RemoveMarker',
223 but can be handy from classes like @'MarkerMortuary<MARKER>'.</p>
224 <p>This method was added to fix SPR#0822 - see for details.</p>
225 */
226 virtual void PreRemoveMarker (Marker* marker) = 0;
227
228 public:
229 /*
230 @METHOD: TextStore::SetMarkerRange
231 @DESCRIPTION: <p>Set the bounds of the given marker. The given marker must already
232 be in this TextStore (see @'TextStore::AddMarker').
233 And it is required that the start/end values be within the valid marker
234 range for this buffer.</p>
235 */
236 virtual void SetMarkerRange (Marker* marker, size_t start, size_t end) noexcept = 0;
237 nonvirtual void SetMarkerStart (Marker* marker, size_t start) noexcept;
238 nonvirtual void SetMarkerEnd (Marker* marker, size_t end) noexcept;
239 nonvirtual void SetMarkerLength (Marker* marker, size_t length) noexcept;
240
241 /*
242 * Family of routines to retreive markers of interest in a particular range of the text.
243 *
244 * The 'Overlap' method is what is used to see if a marker is considered to be in the
245 * given from/to range for the purpose of collection (does the obvious intersection test
246 * with the added caveat of not including markers which only overlap at one edge or the other
247 * - and not including any common characters).
248 *
249 * You can either specify a callback function/object to be called with each found marker.
250 * If you only need the first such, you can throw to terminate the search. There is a help class
251 * and helper fuctions to allow you to fill an array with the all the matching Markers.
252 *
253 * NB: this has changed somewhat since Led21 - see qSupportLed21CompatAPI, and SPRs#420,421,422
254 */
255 public:
256 /*
257 @CLASS: TextStore::MarkerSink
258 @DESCRIPTION:
259 <p>An abstract "callback class", used to be notified in calls to CollectAllMarkersInRangeInto.
260 To use this class, subclass, and OVERRIDE the Append () method. Pass an instance of your
261 subclass to TextStore::CollectAllMarkersInRangeInto (or some variant). Your classes Append
262 method will be called for each marker in the given range.</p>
263 */
264 class MarkerSink {
265 public:
266 /*
267 @METHOD: TextStore::MarkerSink::Append
268 @DESCRIPTION:
269 <p>Don't call directly. Called by TextStore::CollectAllMarkersInRangeInto (). Override this method
270 and pass an instance of your subclass to TextStore::CollectAllMarkersInRangeInto ().</p>
271 */
272 virtual void Append (Marker* m) = 0;
273 };
274 /*
275 @CLASS: TextStore::VectorMarkerSink
276 @BASES: @'TextStore::MarkerSink'
277 @DESCRIPTION:
278 <p>A utility class which gathers all the markers passed to it into an array (vector).</p>
279 */
280 class VectorMarkerSink : public MarkerSink {
281 public:
282 VectorMarkerSink (vector<Marker*>* markers);
283
284 virtual void Append (Marker* m) override;
285
286 private:
287 vector<Marker*>* fMarkers;
288 };
289
290 /*
291 @CLASS: TextStore::InlineBufferMarkerSink
292 @BASES: @'TextStore::MarkerSink'
293 @DESCRIPTION:
294 <p>A utility class which gathers all the markers passed to it into an array (vector).</p>
295 */
296 class InlineBufferMarkerSink : public MarkerSink {
297 public:
298 InlineBufferMarkerSink () = default;
299
300 virtual void Append (Marker* m) override;
301
302 public:
303 Foundation::Memory::InlineBuffer<Marker*> fMarkers;
304 };
305
306 public:
307 /*
308 @METHOD: TextStore::CollectAllMarkersInRangeInto
309 @DESCRIPTION: <p>Note - owner can be any valid MarkerOwner, or @'TextStore::kAnyMarkerOwner'.</p>
310 */
311 static const MarkerOwner* kAnyMarkerOwner;
312 virtual void CollectAllMarkersInRangeInto (size_t from, size_t to, const MarkerOwner* owner, MarkerSink& output) const = 0;
313
314 // Related helpers
315 // _OrSurroundings () versions include markers which overlapped just barely on an edge
316 public:
317 static bool Overlap (size_t mStart, size_t mEnd, size_t from, size_t to);
318 static bool Overlap (const Marker& m, size_t from, size_t to);
319
320 nonvirtual void CollectAllMarkersInRangeInto_OrSurroundings (size_t from, size_t to, const MarkerOwner* owner, MarkerSink& output) const;
321
322 nonvirtual void CollectAllMarkersInRangeInto (size_t from, size_t to, const MarkerOwner* owner, vector<Marker*>* markerList) const;
323 nonvirtual void CollectAllMarkersInRangeInto_OrSurroundings (size_t from, size_t to, const MarkerOwner* owner, vector<Marker*>* markerList) const;
324
325 nonvirtual vector<Marker*> CollectAllMarkersInRange (size_t from, size_t to, const MarkerOwner* owner = kAnyMarkerOwner) const;
326 nonvirtual vector<Marker*> CollectAllMarkersInRange_OrSurroundings (size_t from, size_t to, const MarkerOwner* owner = kAnyMarkerOwner) const;
327
328 /*
329 * NB: We consider that if there is NO text, there is still 1 line. We could the zero-length line
330 * at the end of the buffer as one line.
331 *
332 * GetLineContainingPosition () returns the lineNumber that fully contains the
333 * character at position charPosition. It is an error to call this if charPosition
334 * is not a valid position.
335 *
336 * GetEndOfLine () returns the position BEFORE the NL (or after the last character)
337 * if there is no NL. This corresponds to being AFTER the last displayed character.
338 */
339 public:
340 virtual size_t GetStartOfLine (size_t lineNumber) const;
341 virtual size_t GetStartOfLineContainingPosition (size_t charPosition) const;
342 virtual size_t GetEndOfLine (size_t lineNumber) const;
343 virtual size_t GetEndOfLineContainingPosition (size_t afterPos) const;
344 virtual size_t GetLineContainingPosition (size_t charPosition) const;
345 virtual size_t GetLineCount () const;
346 nonvirtual size_t GetLineLength (size_t lineNumber) const; // end-start
347
348 /*
349 * Char/Line navigating commands.
350 *
351 * These always return a valid character position. If they are pinned up against
352 * the beggining or end of the buffer, they just return that end position.
353 */
354 public:
355 nonvirtual size_t FindNextCharacter (size_t afterPos) const; // error to call in mid character - at end of buffer - we just return position after last character
356 nonvirtual size_t FindPreviousCharacter (size_t beforePos) const; // error to call in mid character - at start of buffer, we just return 1
357
358 public:
359 nonvirtual shared_ptr<TextBreaks> GetTextBreaker () const;
360 nonvirtual void SetTextBreaker (const shared_ptr<TextBreaks>& textBreaker);
361
362 private:
363 mutable shared_ptr<TextBreaks> fTextBreaker{};
364
365 public:
366 nonvirtual void FindWordBreaks (size_t afterPosition, size_t* wordStartResult, size_t* wordEndResult, bool* wordReal,
367 TextBreaks* useTextBreaker = nullptr);
368 nonvirtual void FindLineBreaks (size_t afterPosition, size_t* wordEndResult, bool* wordReal, TextBreaks* useTextBreaker = nullptr);
369
370 /*
371 * Some helpful word-break utility routines based on FindWordBreaks().
372 */
373 public:
374 nonvirtual size_t FindFirstWordStartBeforePosition (size_t position, bool wordMustBeReal = true);
375 // use for find-prev-word (can return position if position==1
376 // and if no previous word - return 1.
377 nonvirtual size_t FindFirstWordStartStrictlyBeforePosition (size_t position, bool wordMustBeReal = true);
378 // use for find-prev-word (can return position if position==1
379 // and if no previous word - return 1.
380 nonvirtual size_t FindFirstWordEndAfterPosition (size_t position, bool wordMustBeReal = true);
381 // Can return EndOfBuffer if no word-end after position.
382 // Also - might not be called in the context of a word!
383 // Can return position - if it is the end of a word.
384 nonvirtual size_t FindFirstWordStartAfterPosition (size_t position); // use for find-next-word.
385 // returns end-of-buffer if no following word-start
386 // Can return position - note we didn't say STRICTLY after
387
388 /*
389 * Search/Find/Replace support.
390 */
391 public:
392 struct SearchParameters {
393 SearchParameters (const Led_tString& searchString = LED_TCHAR_OF (""), bool wrap = true, bool wholeWord = false, bool caseSensative = false);
394
395 Led_tString fMatchString;
396 bool fWrapSearch;
397 bool fWholeWordSearch;
398 bool fCaseSensativeSearch;
399 };
400
401 enum {
402 eUseSearchParameters = kBadIndex
403 };
404 // return kBadIndex if no match found - otherwise, return index of first char in match.
405 // Start searching after markerPos searchFrom. if searchTo==eUseSearchParameters, then
406 // either search to end of buffer (fWrapSearch==false), or wrap search (fWrapSearch==true).
407 // If searchTo!=eUseSearchParameters, then consider IT to specify the end of the search (ignore
408 // fWrapSearch).
409 virtual size_t Find (const SearchParameters& params, size_t searchFrom, size_t searchTo = eUseSearchParameters);
410
411 // Helper functions, mainly for subclasses, but imagers may use too
412 public:
413 nonvirtual void DoAboutToUpdateCalls (const UpdateInfo& updateInfo, Marker* const* markersBegin, Marker* const* markersEnd);
414 nonvirtual void DoDidUpdateCalls (const UpdateInfo& updateInfo, Marker* const* markersBegin, Marker* const* markersEnd) noexcept;
415
416 public:
417 class SimpleUpdater;
418
419 public:
420 virtual TextStore* PeekAtTextStore () const override;
421
422 public:
423 /**
424 * Debugging support.
425 * Note that all these calls (either fail to compile with debug off,
426 * or produce NO-CODE when debug is off. So they can be called freely (though
427 * they might be QUITE expensive when debug on - so SOME conservatism may
428 * be in order).
429 */
430 nonvirtual void Invariant () const;
431
432#if qStroika_Foundation_Debug_AssertionsChecked
433 protected:
434 virtual void Invariant_ () const;
435#endif
436 };
437
438 /*
439 @CLASS: TextStore::SimpleUpdater
440 @DESCRIPTION: <p>Simple utility to make it a bit easier to make @'TextStore::DoAboutToUpdateCalls' /
441 @'TextStore::DoDidUpdateCalls'.</p>
442 <p>Constructing the object does the about-to-update call (collecting all the markers in
443 the range given by from/to or the explic updateInfo object), and then the destructor calls the
444 didUpdates. So you would write code like:
445 <code><pre>
446 TextStore::SimpleUpdater updater (*ts, from, to);
447 try {
448 Do_Some_Code ();
449 }
450 catch (...) {
451 updater.Cancel ();
452 throw;
453 }
454 </pre></code>
455 </p>
456 <p>The did-updates are done when the updater object goes out of scope; even if it goes out
457 of scope because of an exception - whcih MAY NOT be desired. Because of this, you should
458 call the cancel method to prevent the didUpdates from happening on destruction.
459 </p>
460 */
461 class TextStore::SimpleUpdater : public Foundation::Memory::UseBlockAllocationIfAppropriate<SimpleUpdater> {
462 public:
463 SimpleUpdater (TextStore& ts, const UpdateInfo& updateInfo);
464 SimpleUpdater (TextStore& ts, size_t from, size_t to, bool realContentUpdate = true);
465 ~SimpleUpdater ();
466
467 public:
468 nonvirtual void Cancel ();
469
470 private:
471 TextStore& fTextStore;
472 InlineBufferMarkerSink fMarkerSink;
473 UpdateInfo fUpdateInfo;
474 bool fCanceled;
475 };
476
477 // Helpers for MarkerSink classes
478
479 /*
480 @CLASS: MarkerOfATypeMarkerSink<T>
481 @BASES: @'TextStore::MarkerSink'
482 @DESCRIPTION: <p>A MarkerSink template which grabs only Markers of subtype 'T' (using dynamic_cast<>). Similar to
483 @'MarkersOfATypeMarkerSink2Vector<T>' except that this class only grabs a SINGLE marker in that range
484 (which is stored in fResult). It is an error (detected via an assertion) if this class is ever used
485 to sink more than one marker (though its OK if it gets none - fResult will be nullptr).</p>
486 */
487 template <typename T>
488 class MarkerOfATypeMarkerSink : public TextStore::MarkerSink {
489 public:
490 MarkerOfATypeMarkerSink ();
491
492 virtual void Append (Marker* m) override;
493
494 T* fResult;
495 };
496
497 /*
498 @CLASS: MarkersOfATypeMarkerSink2Vector<T>
499 @BASES: @'TextStore::MarkerSink'
500 @DESCRIPTION: <p>A MarkerSink template which grabs only Markers of subtype 'T' (using dynamic_cast<>).
501 Dumps results into a vector named 'fResult'.</p>
502 */
503 template <typename T>
504 class MarkersOfATypeMarkerSink2Vector : public TextStore::MarkerSink {
505 public:
506 MarkersOfATypeMarkerSink2Vector ();
507
508 virtual void Append (Marker* m) override;
509
510 vector<T*> fResult;
511 };
512
513 /*
514 @CLASS: MarkersOfATypeMarkerSink2SmallStackBuffer<T>
515 @BASES: @'TextStore::MarkerSink'
516 @DESCRIPTION: <p>A MarkerSink template which grabs only Markers of subtype 'T' (using dynamic_cast<>).
517 Dumps results into a @'Memory::StackBuffer<T>' named 'fResult'.</p>
518 */
519 template <typename T>
520 class MarkersOfATypeMarkerSink2SmallStackBuffer : public TextStore::MarkerSink {
521 public:
522 MarkersOfATypeMarkerSink2SmallStackBuffer ();
523
524 virtual void Append (Marker* m) override;
525
526 Foundation::Memory::InlineBuffer<T*> fResult;
527 };
528
529}
530
531/*
532 ********************************************************************************
533 ***************************** Implementation Details ***************************
534 ********************************************************************************
535 */
536#include "TextStore.inl"
537
538#endif /*_Stroika_Frameworks_Led_TextStore_h_*/
CONTAINER::value_type * Start(CONTAINER &c)
For a contiguous container (such as a vector or basic_string) - find the pointer to the start of the ...