Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
SpellCheckEngine_Basic.h
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Frameworks_Led_SpellCheckEngine_Basic_h_
5#define _Stroika_Frameworks_Led_SpellCheckEngine_Basic_h_ 1
6
7/*
8@MODULE: SpellCheckEngine_Basic
9@DESCRIPTION: <p></p>
10
11 */
12
13#include "Stroika/Frameworks/StroikaPreComp.h"
14
15#include <memory>
16#include <set>
17
18#include "SpellCheckEngine.h"
19#include "Support.h"
20#include "TextBreaks.h"
21
22namespace Stroika::Frameworks::Led {
23
24#if qFailToCompileLargeDataInitializedArraysBug
25#define qIncludeBakedInDictionaries 0
26#endif
27
28/*
29 @CONFIGVAR: qIncludeBakedInDictionaries
30 @DESCRIPTION: <p>Turning this on (its on by default) includes into the binary - pre-built dictionaries (currently just US-English).
31 Including this allows direct access to the US-English dictionary. However - its large - adding about 2-3MB (depending
32 on if you use UNICODE or single-byte - among other things) to the size of your binary.</p>
33 */
34#ifndef qIncludeBakedInDictionaries
35// DISABLE BY DEFAULT UNTIL WE PUT IN PLACE BETTER MECHANISM FOR 'RESOURCES' - as we do in HealthFrame for stuff like XSDs...
36#define qIncludeBakedInDictionaries 0
37#endif
38
39 /*
40 @CLASS: SpellCheckEngine_Basic
41 @DESCRIPTION: <p>
42 </p>
43 */
44 class SpellCheckEngine_Basic : public SpellCheckEngine {
45 private:
46 using inherited = SpellCheckEngine;
47
48 public:
49 class Dictionary;
50 class EditableDictionary;
51 class CompiledDictionary;
52
53 public:
54 SpellCheckEngine_Basic (const Dictionary* mainDictionary = NULL);
55 ~SpellCheckEngine_Basic ();
56
57 public:
58 virtual bool ScanForUndefinedWord (const Led_tChar* startBuf, const Led_tChar* endBuf, const Led_tChar** cursor,
59 const Led_tChar** wordStartResult, const Led_tChar** wordEndResult) override;
60
61 protected:
62 virtual bool LookupWord_ (const Led_tString& checkWord, Led_tString* matchedWordResult) override;
63
64 private:
65 nonvirtual bool LookupWordHelper_ (const Led_tString& checkWord, Led_tString* matchedWordResult) const;
66
67 protected:
68 virtual bool OtherStringToIgnore (const Led_tString& checkWord);
69 nonvirtual bool OtherStringToIgnore_AllPunctuation (const Led_tString& checkWord);
70 nonvirtual bool OtherStringToIgnore_Sentinels (const Led_tString& checkWord);
71 nonvirtual bool OtherStringToIgnore_Number (const Led_tString& checkWord);
72
73 public:
74 virtual vector<Led_tString> GenerateSuggestions (const Led_tString& misspelledWord) override;
75
76 public:
77 virtual UDInterface* GetUDInterface () override;
78
79 public:
80 virtual TextBreaks* PeekAtTextBreaksUsed () override;
81
82 private:
83 nonvirtual float Heuristic (const Led_tString& misspelledWord, const Led_tString& candidateWord, float atLeast);
84
85 private:
86 nonvirtual bool ScanForWord (const Led_tChar* startBuf, const Led_tChar* endBuf, const Led_tChar** cursor,
87 const Led_tChar** wordStartResult, const Led_tChar** wordEndResult);
88
89 public:
90 nonvirtual shared_ptr<TextBreaks> GetTextBreaker () const;
91 nonvirtual void SetTextBreaker (const shared_ptr<TextBreaks>& textBreaker);
92
93 private:
94 mutable shared_ptr<TextBreaks> fTextBreaker;
95
96 public:
97 struct InfoBlock {
98 unsigned int fIndex : 22;
99 unsigned int fWordLen : 8;
100 unsigned int fXXX : 2; // some flags - I forget the design - but I'll need this...
101 };
102
103#if qIncludeBakedInDictionaries
104 public:
105 static const CompiledDictionary kDictionary_US_English;
106#endif
107
108 public:
109 nonvirtual vector<const Dictionary*> GetDictionaries () const;
110 nonvirtual void SetDictionaries (const vector<const Dictionary*>& dictionaries);
111
112 private:
113 vector<const Dictionary*> fDictionaries;
114
115#if qStroika_Foundation_Debug_AssertionsChecked
116 protected:
117 virtual void Invariant_ () const override;
118#endif
119
120#if qStroika_Foundation_Debug_AssertionsChecked
121 public:
122 /*
123 @METHOD: SpellCheckEngine_Basic::RegressionTest
124 @DESCRIPTION: <p>This function only exists if @'qStroika_Foundation_Debug_AssertionsChecked' is on. When run, it performs a basic regression test.</p>
125 */
126 static void RegressionTest ();
127
128 private:
129 static void RegressionTest_1 ();
130#endif
131 };
132
133 class SpellCheckEngine_Basic::Dictionary {
134 public:
135 using InfoBlock = SpellCheckEngine_Basic::InfoBlock;
136
137 protected:
138 Dictionary ();
139
140 public:
141 virtual ~Dictionary ();
142
143 public:
144 virtual const Led_tChar* GetTextBase () const = 0;
145 virtual const Led_tChar* GetTextEnd () const = 0;
146 virtual void GetInfoBlocks (const InfoBlock** start, const InfoBlock** end) const = 0;
147 };
148
149 class SpellCheckEngine_Basic::EditableDictionary : public SpellCheckEngine_Basic::Dictionary {
150 private:
151 using inherited = SpellCheckEngine_Basic::Dictionary;
152
153 public:
154 EditableDictionary () = default;
155
156 public:
157 virtual ~EditableDictionary ();
158
159 public:
160 nonvirtual void AddWordToUserDictionary (const Led_tString& word);
161
162 public:
163 virtual const Led_tChar* GetTextBase () const override;
164 virtual const Led_tChar* GetTextEnd () const override;
165 virtual void GetInfoBlocks (const InfoBlock** start, const InfoBlock** end) const override;
166
167 public:
168 nonvirtual void ReadFromBuffer (const Led_tChar* readOnlyRAMDictStart, const Led_tChar* readOnlyRAMDictEnd);
169 nonvirtual vector<Led_tChar> SaveToBuffer () const;
170
171 private:
172 nonvirtual void ConstructInfoBlocksEtcFromWordList ();
173
174 private:
175 set<Led_tString> fSortedWordList;
176 Led_tChar* fDictBufStart{nullptr};
177 Led_tChar* fDictBufEnd{nullptr};
178 vector<InfoBlock> fInfoBlocks;
179 };
180
181 class SpellCheckEngine_Basic::CompiledDictionary : public SpellCheckEngine_Basic::Dictionary {
182 private:
183 using inherited = SpellCheckEngine_Basic::Dictionary;
184
185 public:
186 struct CompiledDictionaryData {
187 const Led_tChar* fTextDataStart;
188 const Led_tChar* fTextDataEnd;
189 const InfoBlock* fInfoBlocksStart;
190 const InfoBlock* fInfoBlocksEnd;
191 };
192
193 public:
194 CompiledDictionary (const CompiledDictionaryData& data);
195
196 public:
197 virtual const Led_tChar* GetTextBase () const override;
198 virtual const Led_tChar* GetTextEnd () const override;
199 virtual void GetInfoBlocks (const InfoBlock** start, const InfoBlock** end) const override;
200
201 private:
202 CompiledDictionaryData fData;
203 };
204
205 /*
206 @CLASS: TextBreaks_SpellChecker
207 @BASES: @'TextBreaks_Basic'
208 @DESCRIPTION: <p>Special purpose text-break implementation uses inside spell-checker. Not recomended for use elsewhere, but
209 it could be.</p>
210 */
211 class TextBreaks_SpellChecker : public TextBreaks_Basic {
212 private:
213 using inherited = TextBreaks_Basic;
214
215 public:
216 TextBreaks_SpellChecker ();
217
218 protected:
219 virtual CharacterClasses CharToCharacterClass (const Led_tChar* startOfText, size_t lengthOfText, const Led_tChar* charToExamine) const override;
220
221#if qStroika_Foundation_Debug_AssertionsChecked
222 private:
223 nonvirtual void RegressionTest ();
224#endif
225 };
226
227 /*
228 @CLASS: SpellCheckEngine_Basic_Simple
229 @DESCRIPTION: <p>This is a simple wrapper providing most pre-packaged spellcheck functionality a spellchekcing application would typically want.
230 You easily specify one pre-built 'system' dictionary, and one optional user-dictionary (you just specify the file name
231 and the rest is taken care of automatically).
232 </p>
233 */
234 class SpellCheckEngine_Basic_Simple : public SpellCheckEngine_Basic, private SpellCheckEngine::UDInterface {
235 private:
236 using inherited = SpellCheckEngine_Basic;
237
238 public:
239 using SpellCheckEngine::UDInterface;
240
241 public:
242 SpellCheckEngine_Basic_Simple ();
243 ~SpellCheckEngine_Basic_Simple ();
244
245 public:
246 virtual UDInterface* GetUDInterface () override;
247
248 // From SpellCheckEngine::UDInterface
249 public:
250 virtual bool AddWordToUserDictionarySupported () const override;
251 virtual void AddWordToUserDictionary (const Led_tString& word) override;
252
253 public:
254 nonvirtual const Dictionary* GetMainDictionary () const;
255 nonvirtual void SetMainDictionary (const Dictionary* mainDictionary);
256
257 private:
258 const Dictionary* fMainDictionary;
259
260 public:
261 nonvirtual filesystem::path GetUserDictionary () const;
262 nonvirtual void SetUserDictionary (const filesystem::path& userDictionary);
263
264 private:
265 filesystem::path fUDName;
266 EditableDictionary* fUD;
267
268 private:
269 nonvirtual void ReadFromUD ();
270 nonvirtual void WriteToUD ();
271 };
272
273}
274
275/*
276 ********************************************************************************
277 ***************************** Implementation Details ***************************
278 ********************************************************************************
279 */
280#include "SpellCheckEngine_Basic.inl"
281
282#endif /*_Stroika_Frameworks_Led_SpellCheckEngine_Basic_h_*/