Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
TextBreaks.h
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Frameworks_Led_TextBreaks_h_
5#define _Stroika_Frameworks_Led_TextBreaks_h_ 1
6
7#include "Stroika/Frameworks/StroikaPreComp.h"
8
9#include "Stroika/Frameworks/Led/Support.h"
10
11/*
12@MODULE: TextBreaks
13@DESCRIPTION:
14 <p>This code handles finding word boundaries (for double click processing), and finding
15 points in a range of text where it is safe to word-wrap. This code knows nothing about the UI modules,
16 and only operates on strings of text (@'Led_tChar's).</p>
17 <p>Though this module contains a few implementations, you probably just want to use the
18 @'TextBreaks_DefaultImpl' implementation.</p>
19 */
20
21namespace Stroika::Frameworks::Led {
22
23 /*
24 @CLASS: TextBreaks
25 @DESCRIPTION:
26 <p>DOCS SOMEWHAT OBSOLETE - BASED ON ORIGINAL LED CODE.</p>
27
28 <p>Word/Row Break/Boundary calculation routines.</p>
29
30 <p>Note that because some script systems (notably Japanese) have different notions
31 of "words" for selection (e.g. double-click) and for row (aka line) breaking,
32 we have two correspondingly different notions supported here.</p>
33
34 <p>Also - we provide both static routines operating on strings, and routines similarly
35 named, operating on this textstore buffer. The reason for the string-based routines
36 is so that the EXACT same algorithm can be used on text not conveniently (or efficiently)
37 already stored in a TextStore buffer. We provide the TextStore based versions for
38 convenience - it is our main target of these sorts of operations.</p>
39
40 <p>One place where the behavior of these routines is not obvious from its name (and so we
41 must nail down by fiat) is what happens when asked to find the word starting from
42 a space character. Do we find the word BEFORE or AFTER the space(s). Or do we condider
43 the string of spaces to be a word? In order to avoid the arbitary choice of selecting
44 the forward or backward word (likely to subtly impact on GUI choices) we instead here
45 return a flag indicating if the word is REAL, or simply a run of spaces. If the caller
46 was looking for a real word and got a run of spaces - he can simply reset the position
47 we were looking from to the END of the run of spaces, or just before it.</p>
48 */
49 class TextBreaks {
50 protected:
51 TextBreaks () = default;
52
53 public:
54 virtual ~TextBreaks () = default;
55
56 public:
57 /*
58 @METHOD: TextBreaks::FindWordBreaks
59 @DESCRIPTION: <p>For the given text, and offset information, return the start and end of the next word. Here word
60 is defined rather losely. In particular, sequences of whitespace are considered a 'word'. In that case, the wordReal
61 value is returned false.</p>
62 <p>Must think a bit about generalizing this for other languages (though it was adequate for SJIS/Japanese).</p>
63 <p>NB: textOffsetToStartLookingForWord is zero-based.</p>
64 <p>No word was found - iff *wordStartResult and *wordEndResult are equal.</p>
65 <p> *wordStartResult and *wordEndResult are zero-based.</p>
66 */
67 virtual void FindWordBreaks (const Led_tChar* startOfText, size_t lengthOfText, size_t textOffsetToStartLookingForWord,
68 size_t* wordStartResult, size_t* wordEndResult, bool* wordReal) const = 0;
69
70 public:
71 /*
72 @METHOD: TextBreaks::FindLineBreaks
73 @DESCRIPTION: <p>For the given text, and offset information, return the next reasonable point in the text to break the row,
74 for word-wrapping (for example, a space). <code>wordReal</code> somehow flags if we were forced to break at an unnatural
75 place, but I cannot recall the details.</p>
76 <p>Must think a bit about generalizing this for other languages (though it was adequate for SJIS/Japanese, using the
77 Kinsoku rule, but that code no longer exists as part of Led). The basic API will probably persist, but the implemntation
78 will probably need to change significantly.</p>
79 */
80 virtual void FindLineBreaks (const Led_tChar* startOfText, size_t lengthOfText, size_t textOffsetToStartLookingForWord,
81 size_t* wordEndResult, bool* wordReal) const = 0;
82 };
83
84 /*
85 @CLASS: TextBreaks_Basic
86 @BASES: @'TextBreaks'
87 @DESCRIPTION: <p>The original text break algorithm I had implemented long ago. Its been touched up a bit in Led 3.0
88 to work better with UNICODE. But its still not up to the 3.0 UNICODE spec - or even very close.</p>
89 */
90 class TextBreaks_Basic : public TextBreaks {
91 private:
92 using inherited = TextBreaks;
93
94 public:
95 TextBreaks_Basic ();
96
97 public:
98 virtual void FindWordBreaks (const Led_tChar* startOfText, size_t lengthOfText, size_t textOffsetToStartLookingForWord,
99 size_t* wordStartResult, size_t* wordEndResult, bool* wordReal) const override;
100 virtual void FindLineBreaks (const Led_tChar* startOfText, size_t lengthOfText, size_t textOffsetToStartLookingForWord,
101 size_t* wordEndResult, bool* wordReal) const override;
102
103 protected:
104 enum CharacterClasses {
105 eSpaceClass,
106 eSentinelClass, // Special character which always breaks on either side, and stands by itself
107 eWordClass,
108 eKanjiClass,
109 eKatakanaClass,
110 eHiraganaClass,
111 eRomanjiOrDigitClass,
112 eOtherCharacterClass // e.g. some punctuation... (;)
113 };
114
115 protected:
116 virtual CharacterClasses CharToCharacterClass (const Led_tChar* startOfText, size_t lengthOfText, const Led_tChar* charToExamine) const;
117
118#if qStroika_Foundation_Debug_AssertionsChecked
119 private:
120 nonvirtual void RegressionTest ();
121#endif
122 };
123
124 /*
125 @CLASS: TextBreaks_Basic_WP
126 @BASES: @'TextBreaks_Basic'
127 @DESCRIPTION: <p></p>
128 */
129 class TextBreaks_Basic_WP : public TextBreaks_Basic {
130 private:
131 using inherited = TextBreaks_Basic;
132
133 public:
134 TextBreaks_Basic_WP ();
135
136 protected:
137 virtual CharacterClasses CharToCharacterClass (const Led_tChar* startOfText, size_t lengthOfText, const Led_tChar* charToExamine) const override;
138
139#if qStroika_Foundation_Debug_AssertionsChecked
140 private:
141 nonvirtual void RegressionTest ();
142#endif
143 };
144
145 /*
146 @CLASS: TextBreaks_Basic_TextEditor
147 @BASES: @'TextBreaks_Basic'
148 @DESCRIPTION: <p></p>
149 */
150 class TextBreaks_Basic_TextEditor : public TextBreaks_Basic {
151 private:
152 using inherited = TextBreaks_Basic;
153
154 public:
155 TextBreaks_Basic_TextEditor ();
156
157 protected:
158 virtual CharacterClasses CharToCharacterClass (const Led_tChar* startOfText, size_t lengthOfText, const Led_tChar* charToExamine) const override;
159
160#if qStroika_Foundation_Debug_AssertionsChecked
161 private:
162 nonvirtual void RegressionTest ();
163#endif
164 };
165
166#if qStroika_Foundation_Common_Platform_MacOS
167 /*
168 @CLASS: TextBreaks_System
169 @BASES: @'TextBreaks'
170 @DESCRIPTION: <p>Similar to the behavior you got in Led 2.3 with the old 'qUseSystemWordBreakRoutine' define.</p>
171 <p>Right now - only implemented for MacOS.</p>
172 */
173 class TextBreaks_System : public TextBreaks {
174 public:
175 /*
176 */
177 public:
178 // NB: textOffsetToStartLookingForWord is zero-based.
179 // No word was found - iff *wordStartResult and *wordEndResult are equal.
180 // *wordStartResult and *wordEndResult are zero-based.
181 virtual void FindWordBreaks (const Led_tChar* startOfText, size_t lengthOfText, size_t textOffsetToStartLookingForWord,
182 size_t* wordStartResult, size_t* wordEndResult, bool* wordReal) const override;
183 virtual void FindLineBreaks (const Led_tChar* startOfText, size_t lengthOfText, size_t textOffsetToStartLookingForWord,
184 size_t* wordEndResult, bool* wordReal) const override;
185 };
186#endif
187
188 /*
189 @CLASS: TextBreaks_DefaultImpl
190 @BASES: @'TextBreaks'
191 @DESCRIPTION: <p>This typedef maps to one of a number of different possible implementations of
192 the @'TextBreaks' abstract class'. When you must construct a textbreaks class - this is
193 probably what you should use.</p>
194 <p>Note that it defaults to @'TextBreaks_Basic_WP'.</p>
195 */
196 using TextBreaks_DefaultImpl = TextBreaks_Basic_WP;
197
198}
199
200/*
201 ********************************************************************************
202 ***************************** Implementation Details ***************************
203 ********************************************************************************
204 */
205#include "TextBreaks.inl"
206
207#endif /*_Stroika_Frameworks_Led_TextBreaks_h_*/