4#include "Stroika/Frameworks/StroikaPreComp.h"
16#include "Stroika/Foundation/Streams/TextToBinary.h"
18#include "SpellCheckEngine_Basic.h"
25using namespace Stroika::Frameworks;
26using namespace Stroika::Frameworks::Led;
32#if qIncludeBakedInDictionaries
33#if qStroika_Foundation_Common_Platform_MacOS
35#include "Dictionary-Compiled-US-English."
37#include "Dictionary-Compiled-US-English.inc"
41 inline bool IsASCIIUpper (Led_tChar c)
43 return isascii (c) and isupper (c);
46 inline bool AsymmetricCaseInsensativeCompare (Led_tChar wordChar, Led_tChar dictChar)
48 if (wordChar == dictChar) {
51 if (isascii (wordChar) and isupper (wordChar)) {
52 return tolower (wordChar) == dictChar;
56 inline bool AsymmetricCaseInsensativeCompare (
const Led_tChar* word,
const Led_tChar* dictWord)
58 const Led_tChar* wi = word;
59 const Led_tChar* di = dictWord;
61 if (not AsymmetricCaseInsensativeCompare (*wi, *di)) {
64 if (*wi ==
'\0' and *di ==
'\0') {
67 if (*wi ==
'\0' or *di ==
'\0') {
73 struct DictLookup_Compare {
74 DictLookup_Compare (
const Led_tChar* base)
78 bool operator() (
const SpellCheckEngine_Basic::InfoBlock& _Left,
const Led_tString& _Right)
const
81#if qBasicString_Missing_CompareOverload_T
82 bool answer = _Right.compare (fBase + _Left.fIndex, 0, _Left.fWordLen) > 0;
84 bool answer = _Right.compare (0, _Right.length (), fBase + _Left.fIndex, _Left.fWordLen) > 0;
87 Led_tString left = Led_tString{fBase + _Left.fIndex, fBase + _Left.fIndex + _Left.fWordLen};
88 Assert (answer == (left < _Right));
92 const Led_tChar* fBase;
97 inline bool IsASCIISpace (Led_tChar c)
99 return isascii (c) and isspace (c);
101 inline bool IsASCIIAlnum (Led_tChar c)
103 return isascii (c) and isalnum (c);
112#if qIncludeBakedInDictionaries
113const SpellCheckEngine_Basic::CompiledDictionary SpellCheckEngine_Basic::kDictionary_US_English (Dictionary_US_English);
116SpellCheckEngine_Basic::SpellCheckEngine_Basic (
const Dictionary* mainDictionary)
120 if (mainDictionary != NULL) {
121 fDictionaries.push_back (mainDictionary);
125SpellCheckEngine_Basic::~SpellCheckEngine_Basic ()
133bool SpellCheckEngine_Basic::ScanForUndefinedWord (
const Led_tChar* startBuf,
const Led_tChar* endBuf,
const Led_tChar** cursor,
134 const Led_tChar** wordStartResult,
const Led_tChar** wordEndResult)
141 Require (*cursor == NULL or (*cursor >= startBuf and *cursor <= endBuf));
143 if (*cursor == NULL) {
148 while (ScanForWord (startBuf, endBuf, cursor, wordStartResult, wordEndResult)) {
149 if (not LookupWord (Led_tString{*wordStartResult, *wordEndResult}) and not OtherStringToIgnore (Led_tString{*wordStartResult, *wordEndResult})) {
161bool SpellCheckEngine_Basic::LookupWord_ (
const Led_tString& checkWord, Led_tString* matchedWordResult)
168 if (LookupWordHelper_ (checkWord, matchedWordResult)) {
172 if (checkWord.empty ()) {
182 if (checkWord[0] < 127 and isupper (checkWord[0])) {
183 Led_tString w2 = checkWord;
184 w2[0] =
static_cast<char> (tolower (w2[0]));
185 if (LookupWordHelper_ (w2, matchedWordResult)) {
194 bool allUpper =
true;
195 Led_tString caseFixedWord = checkWord;
196 for (
auto i = caseFixedWord.begin (); i != caseFixedWord.end (); ++i) {
197 if (IsASCIIUpper (*i)) {
198 *i =
static_cast<char> (tolower (*i));
205 if (allUpper and LookupWordHelper_ (caseFixedWord, matchedWordResult)) {
208 caseFixedWord[0] = checkWord[0];
209 if (allUpper and LookupWordHelper_ (caseFixedWord, matchedWordResult)) {
231 size_t lastFoundAt = 0;
232 bool mustCheckLastSeg =
false;
233 for (
size_t i = checkWord.find (
'-'); i != Led_tString::npos or mustCheckLastSeg; i = checkWord.find (
'-', i + 1)) {
234 if (i == Led_tString::npos) {
235 Assert (mustCheckLastSeg);
236 mustCheckLastSeg =
false;
237 i = checkWord.length ();
240 mustCheckLastSeg =
true;
242 Led_tString segWord = Led_tString{checkWord.substr (lastFoundAt, i - lastFoundAt)};
244 if (segWord.empty ()) {
248 else if (LookupWord_ (segWord, &tmpMWR)) {
249 if (lastFoundAt != 0) {
250 genMWR += LED_TCHAR_OF (
"-");
254 else if (i != checkWord.length () and LookupWordHelper_ (segWord + LED_TCHAR_OF (
"-"), &tmpMWR)) {
255 if (lastFoundAt != 0) {
256 genMWR += LED_TCHAR_OF (
"-");
258 Assert (tmpMWR.length () >= 2);
259 genMWR += tmpMWR.substr (0, tmpMWR.length () - 1);
270 if (not genMWR.empty ()) {
271 if (matchedWordResult != NULL) {
272 *matchedWordResult = genMWR;
279 const wchar_t kRightSingleQuotationMark = L
'\x2019';
280 size_t apos = checkWord.find (kRightSingleQuotationMark);
281 if (apos != Led_tString::npos) {
282 Led_tString tmp = checkWord;
284 return LookupWord_ (tmp, matchedWordResult);
302bool SpellCheckEngine_Basic::LookupWordHelper_ (
const Led_tString& checkWord, Led_tString* matchedWordResult)
const
306 for (
auto i = fDictionaries.begin (); i != fDictionaries.end (); ++i) {
307 const Dictionary* dict = *i;
310 const InfoBlock* ibsStart = NULL;
311 const InfoBlock* ibsEnd = NULL;
312 dict->GetInfoBlocks (&ibsStart, &ibsEnd);
313 const Led_tChar* dictBufStart = dict->GetTextBase ();
315 const InfoBlock* r = lower_bound (ibsStart, ibsEnd, checkWord, DictLookup_Compare (dictBufStart));
317 Led_tString x = Led_tString{dictBufStart + (*r).fIndex, dictBufStart + (*r).fIndex + (*r).fWordLen};
318 if (x == checkWord) {
319 if (matchedWordResult != NULL) {
320 *matchedWordResult = x;
336bool SpellCheckEngine_Basic::OtherStringToIgnore (
const Led_tString& checkWord)
338 return OtherStringToIgnore_AllPunctuation (checkWord) or OtherStringToIgnore_Sentinels (checkWord) or OtherStringToIgnore_Number (checkWord);
341bool SpellCheckEngine_Basic::OtherStringToIgnore_AllPunctuation (
const Led_tString& checkWord)
343 for (
size_t i = 0; i < checkWord.length (); ++i) {
344 Led_tChar c = checkWord[i];
345 if (not
Character (c).IsPunctuation ()) {
352bool SpellCheckEngine_Basic::OtherStringToIgnore_Number (
const Led_tString& checkWord)
354 for (
size_t i = 0; i < checkWord.length (); ++i) {
355 Led_tChar c = checkWord[i];
370 template <
class B
idirectionalIterator1,
class B
idirectionalIterator2>
371 BidirectionalIterator2 My_copy_backward_ (BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 result)
373 while (last != first)
374 *(--result) = *(--last);
380 void AddToListsHelper (
const size_t kMaxSug, Led_tString topSugs[],
float topSugScores[],
float* scoreCutOff,
float s,
const Led_tString& w)
383 if (s > *scoreCutOff) {
385 for (
size_t ii = 0; ii < kMaxSug; ++ii) {
386 if (s > topSugScores[ii]) {
387 My_copy_backward_ (topSugs + ii, topSugs + kMaxSug - 1, topSugs + kMaxSug);
388 My_copy_backward_ (topSugScores + ii, topSugScores + kMaxSug - 1, topSugScores + kMaxSug);
390 topSugScores[ii] = s;
394 *scoreCutOff = topSugScores[kMaxSug - 1];
399vector<Led_tString> SpellCheckEngine_Basic::GenerateSuggestions (
const Led_tString& misspelledWord)
401 const size_t kMaxSug = 8;
402 Led_tString topSugs[kMaxSug];
403 float topSugScores[kMaxSug];
404 float scoreCutOff = -100000.0f;
406 fill (topSugScores, topSugScores + kMaxSug, scoreCutOff);
409 if (misspelledWord.find (
'-') == Led_tString::npos) {
410 size_t maxInitSegSize = misspelledWord.size () - 1;
411 for (
size_t initialWordSize = 1; initialWordSize < maxInitSegSize; ++initialWordSize) {
412 if (LookupWord (misspelledWord.substr (0, initialWordSize)) and LookupWord (misspelledWord.substr (initialWordSize))) {
413 AddToListsHelper (kMaxSug, topSugs, topSugScores, &scoreCutOff, -1.2f,
414 misspelledWord.substr (0, initialWordSize) + LED_TCHAR_OF (
"-") + misspelledWord.substr (initialWordSize));
420 for (
auto i = fDictionaries.begin (); i != fDictionaries.end (); ++i) {
421 const Dictionary* dict = *i;
423 const InfoBlock* ibsStart = NULL;
424 const InfoBlock* ibsEnd = NULL;
425 dict->GetInfoBlocks (&ibsStart, &ibsEnd);
426 const Led_tChar* dictBufStart = dict->GetTextBase ();
428 for (
const InfoBlock* ib = ibsStart; ib != ibsEnd; ++ib) {
429 Led_tString w = Led_tString{dictBufStart + (*ib).fIndex, dictBufStart + (*ib).fIndex + (*ib).fWordLen};
430 float s = Heuristic (misspelledWord, w, scoreCutOff);
431 AddToListsHelper (kMaxSug, topSugs, topSugScores, &scoreCutOff, s, w);
438 if (not misspelledWord.empty ()) {
439 Led_tString topSug = topSugs[0];
440 if (not topSug.empty ()) {
441 bool capitalize =
false;
442 bool allCaps =
false;
444 if (
Character (misspelledWord[0]).IsAlphabetic () and
Character (topSug[0]).IsAlphabetic ()) {
445 if (isupper (misspelledWord[0]) and not isupper (topSug[0])) {
449 for (
size_t i = 0; i < misspelledWord.length (); ++i) {
450 if (
Character (misspelledWord[i]).IsAlphabetic ()) {
451 if (isupper (misspelledWord[i])) {
462 Led_tString newWord = topSug;
464 for (
size_t i = 0; i < newWord.length (); ++i) {
465 if (
Character (newWord[i]).IsAlphabetic ()) {
466 newWord[i] =
static_cast<char> (toupper (newWord[i]));
470 float newScore = topSugScores[0];
472 AddToListsHelper (kMaxSug, topSugs, topSugScores, &scoreCutOff, newScore, newWord);
474 else if (capitalize) {
475 Led_tString newWord = topSug;
477 if (
Character (newWord[0]).IsAlphabetic ()) {
478 newWord[0] =
static_cast<char> (toupper (newWord[0]));
481 float newScore = topSugScores[0];
483 AddToListsHelper (kMaxSug, topSugs, topSugScores, &scoreCutOff, newScore, newWord);
489 size_t endScoreList = kMaxSug;
491 const float kTrigger = 1.5f;
492 float lastScore = 0.0f;
493 for (
size_t i = 0; i < kMaxSug; ++i) {
495 if (fabs (lastScore - topSugScores[i]) > kTrigger) {
500 lastScore = topSugScores[i];
504 vector<Led_tString> result;
505 for (
size_t j = 0; j < endScoreList; ++j) {
506 if (topSugs[j].empty ()) {
510 result.push_back (topSugs[j]);
516SpellCheckEngine_Basic::UDInterface* SpellCheckEngine_Basic::GetUDInterface ()
521TextBreaks* SpellCheckEngine_Basic::PeekAtTextBreaksUsed ()
523 return GetTextBreaker ().get ();
526float SpellCheckEngine_Basic::Heuristic (
const Led_tString& misspelledWord,
const Led_tString& candidateWord,
float atLeast)
530 size_t mwl = misspelledWord.length ();
531 size_t cwl = candidateWord.length ();
532 float thisCharImportance = 2.0f;
533 for (
size_t i = 0; i < mwl; ++i) {
535 h -= thisCharImportance * 5.0f;
537 else if (misspelledWord[i] != candidateWord[i]) {
541 if (AsymmetricCaseInsensativeCompare (misspelledWord[i], candidateWord[i])) {
542 h -= thisCharImportance * 0.1f;
546 if (i > 0 and AsymmetricCaseInsensativeCompare (misspelledWord[i], candidateWord[i - 1])) {
547 h -= thisCharImportance * 0.5f;
549 else if (i + 1 < cwl and AsymmetricCaseInsensativeCompare (misspelledWord[i], candidateWord[i + 1])) {
550 h -= thisCharImportance * 0.5f;
553 h -= thisCharImportance * 3.0f;
559 if (i < mwl and (misspelledWord[i] == candidateWord[i + 1]) and (misspelledWord[i + 1] == candidateWord[i])) {
560 float thisLetterCost = h - prevH;
561 h += (-thisLetterCost) * 1.2f;
572 thisCharImportance *= 0.5f;
575 thisCharImportance *= 0.8f;
578 thisCharImportance *= 0.9f;
582 const float kPenaltyForLettersOffEnd = 0.5f;
583 const float kPenaltyForLettersOffEndGrowthFactor = 1.2f;
584 float curPenalty = kPenaltyForLettersOffEnd;
585 for (
size_t i = mwl; i < cwl; ++i) {
588 curPenalty *= kPenaltyForLettersOffEndGrowthFactor;
602bool SpellCheckEngine_Basic::ScanForWord (
const Led_tChar* startBuf,
const Led_tChar* endBuf,
const Led_tChar** cursor,
603 const Led_tChar** wordStartResult,
const Led_tChar** wordEndResult)
610 Require ((*cursor >= startBuf and *cursor <= endBuf));
612 if (*cursor >= endBuf) {
616 size_t bufLen = endBuf - startBuf;
618 size_t initialCrs = *cursor - startBuf;
619 size_t p = initialCrs;
620 size_t wordStart = 0;
622 bool wordReal =
false;
624 while (not wordReal or wordStart < initialCrs) {
625 GetTextBreaker ()->FindWordBreaks (startBuf, bufLen, p, &wordStart, &wordEnd, &wordReal);
626 if (not wordReal or wordStart < initialCrs) {
628 p = Led_NextChar (&startBuf[p]) - startBuf;
643 *cursor = startBuf + wordEnd;
648 Assert (wordStart < wordEnd);
651 Assert (*cursor <= startBuf + p);
652 Assert ((*cursor < startBuf + p) or (*cursor == endBuf));
653 *cursor = startBuf + p;
655 *wordStartResult = startBuf + wordStart;
656 *wordEndResult = startBuf + wordEnd;
661vector<const SpellCheckEngine_Basic::Dictionary*> SpellCheckEngine_Basic::GetDictionaries ()
const
663 return fDictionaries;
666void SpellCheckEngine_Basic::SetDictionaries (
const vector<const Dictionary*>& dictionaries)
668 fDictionaries = dictionaries;
671#if qStroika_Foundation_Debug_AssertionsChecked
676void SpellCheckEngine_Basic::Invariant_ ()
const
678 Assert (
sizeof (InfoBlock) ==
sizeof (
int));
680 for (
const Dictionary* dict : fDictionaries) {
682 const InfoBlock* ibsStart = NULL;
683 const InfoBlock* ibsEnd = NULL;
684 dict->GetInfoBlocks (&ibsStart, &ibsEnd);
685 const Led_tChar* dictBufStart = dict->GetTextBase ();
686 const Led_tChar* dictBufEnd = dict->GetTextEnd ();
687 if (dictBufStart != dictBufEnd) {
690 Assert (dictBufStart <= dictBufEnd);
691 size_t bufSize = dictBufEnd - dictBufStart;
692 Led_tString prevWord;
693 for (
const InfoBlock* i = ibsStart; i != ibsEnd; ++i) {
695 Assert ((*i).fIndex < bufSize);
696 Assert ((*i).fIndex + (*i).fWordLen <= bufSize);
697 Assert ((*i).fWordLen > 0);
698 Led_tString w = Led_tString{dictBufStart + (*i).fIndex, dictBufStart + (*i).fIndex + (*i).fWordLen};
699 Assert (not w.empty ());
702 Assert (Led_tStrCmp (prevWord.c_str (), w.c_str ()) < 0);
711#if qStroika_Foundation_Debug_AssertionsChecked
712void SpellCheckEngine_Basic::RegressionTest ()
724void SpellCheckEngine_Basic::RegressionTest_1 ()
726 const Led_tChar* xxx = LED_TCHAR_OF (
"IBM\na\napple\ndog\nfrog\ngood\nthis\nzipper\n");
727 SpellCheckEngine_Basic::EditableDictionary testerDict;
728 testerDict.ReadFromBuffer (xxx, xxx + Led_tStrlen (xxx));
729 SpellCheckEngine_Basic tester (&testerDict);
730 const Led_tChar* testText = LED_TCHAR_OF (
"This is a very good test. ");
735 bool r1 = tester.LookupWord (LED_TCHAR_OF (
"Frog"));
741 bool r2n = tester.LookupWord (LED_TCHAR_OF (
"ziPPer"), &r2S);
742 bool r2y = tester.LookupWord (LED_TCHAR_OF (
"Zipper"), &r2S);
743 Assert (not r2n and r2y and r2S == LED_TCHAR_OF (
"zipper"));
748 Assert (not tester.LookupWord (LED_TCHAR_OF (
"ibm")));
749 Assert (not tester.LookupWord (LED_TCHAR_OF (
"Ibm")));
750 bool r3 = tester.LookupWord (LED_TCHAR_OF (
"IBM"), &r3S);
751 Assert (r3 and r3S == LED_TCHAR_OF (
"IBM"));
755 const Led_tChar* wordStart = NULL;
756 const Led_tChar* wordEnd = NULL;
757 const Led_tChar* p = NULL;
758 bool result = tester.ScanForUndefinedWord (testText, testText + Led_tStrlen (testText), &p, &wordStart, &wordEnd);
759 Assert (result and (Led_tString{wordStart, wordEnd} == LED_TCHAR_OF (
"is")));
763 const Led_tChar* cursor = NULL;
764 const Led_tChar* wordStartResult = NULL;
765 const Led_tChar* wordEndResult = NULL;
767 while (tester.ScanForUndefinedWord (testText, testText + Led_tStrlen (testText), &cursor, &wordStartResult, &wordEndResult)) {
769 Led_tString word = Led_tString{wordStartResult, wordEndResult};
771 if (nWordsFound == 1) {
772 Assert ((Led_tString{wordStartResult, wordEndResult} == LED_TCHAR_OF (
"is")));
774 if (nWordsFound == 2) {
775 Assert ((Led_tString{wordStartResult, wordEndResult} == LED_TCHAR_OF (
"very")));
777 if (nWordsFound == 3) {
778 Assert ((Led_tString{wordStartResult, wordEndResult} == LED_TCHAR_OF (
"test")));
781 Assert (nWordsFound == 3);
791SpellCheckEngine_Basic::EditableDictionary::~EditableDictionary ()
793 delete[] fDictBufStart;
796void SpellCheckEngine_Basic::EditableDictionary::AddWordToUserDictionary (
const Led_tString& word)
798 fSortedWordList.insert (word);
799 ConstructInfoBlocksEtcFromWordList ();
802const Led_tChar* SpellCheckEngine_Basic::EditableDictionary::GetTextBase ()
const
804 return fDictBufStart;
807const Led_tChar* SpellCheckEngine_Basic::EditableDictionary::GetTextEnd ()
const
812void SpellCheckEngine_Basic::EditableDictionary::GetInfoBlocks (
const InfoBlock** start,
const InfoBlock** end)
const
820void SpellCheckEngine_Basic::EditableDictionary::ReadFromBuffer (
const Led_tChar* readOnlyRAMDictStart,
const Led_tChar* readOnlyRAMDictEnd)
822 fSortedWordList.clear ();
826 for (
const Led_tChar* p = readOnlyRAMDictStart; p < readOnlyRAMDictEnd;) {
828 const Led_tChar* i1 = Led_tStrChr (p,
'\r');
829 const Led_tChar* i2 = Led_tStrChr (p,
'\n');
831 i1 = p + Led_tStrlen (p);
834 i2 = p + Led_tStrlen (p);
836 const Led_tChar* wordStart = p;
837 const Led_tChar* wordEnd = min (i1, i2);
838 if (wordStart != wordEnd) {
839 fSortedWordList.insert (Led_tString{wordStart, wordEnd});
843 if (p < readOnlyRAMDictEnd) {
851 ConstructInfoBlocksEtcFromWordList ();
854vector<Led_tChar> SpellCheckEngine_Basic::EditableDictionary::SaveToBuffer ()
const
856 StackBuffer<Led_tChar> buf{1};
858#if qStroika_Foundation_Common_Platform_Windows
859 const Led_tChar kLineTerm[] = LED_TCHAR_OF (
"\r\n");
860#elif qStroika_Foundation_Common_Platform_MacOS
861 const Led_tChar kLineTerm[] = LED_TCHAR_OF (
"\r");
863 const Led_tChar kLineTerm[] = LED_TCHAR_OF (
"\n");
865 const size_t kLineTerm_Length = Memory::NEltsOf (kLineTerm) - 1;
866 size_t totalBufSizeSoFar = 0;
867 for (
auto i = fSortedWordList.begin (); i != fSortedWordList.end (); ++i) {
869 size_t prevCopyTo = totalBufSizeSoFar;
870 totalBufSizeSoFar += (*i).length ();
871 buf.GrowToSize (totalBufSizeSoFar);
872 (void)::memcpy (
static_cast<Led_tChar*
> (buf) + prevCopyTo,
Containers::Start (*i), i->size () *
sizeof (Led_tChar));
875 size_t prevCopyTo = totalBufSizeSoFar;
876 totalBufSizeSoFar += kLineTerm_Length;
877 buf.GrowToSize (totalBufSizeSoFar);
878 (void)::memcpy (
static_cast<Led_tChar*
> (buf) + prevCopyTo, kLineTerm, kLineTerm_Length *
sizeof (Led_tChar));
881 return vector<Led_tChar> (
static_cast<Led_tChar*
> (buf),
static_cast<Led_tChar*
> (buf) + totalBufSizeSoFar);
888 template <
class InputIterator,
class OutputIterator>
889 OutputIterator my_copy_ (InputIterator first, InputIterator last, OutputIterator result)
891 while (first != last) {
900void SpellCheckEngine_Basic::EditableDictionary::ConstructInfoBlocksEtcFromWordList ()
903 delete[] fDictBufStart;
904 fDictBufStart = NULL;
906 fInfoBlocks.clear ();
908 size_t totalBlockSize = 0;
910 for (
auto i = fSortedWordList.begin (); i != fSortedWordList.end (); ++i) {
911 totalBlockSize += (*i).length ();
915 fDictBufStart =
new Led_tChar[totalBlockSize];
916 fDictBufEnd = fDictBufStart + totalBlockSize;
922 Led_tChar* intoBufPtr = fDictBufStart;
923 for (
auto i = fSortedWordList.begin (); i != fSortedWordList.end (); ++i) {
924 my_copy_ ((*i).begin (), (*i).end (), intoBufPtr);
927 (void)::memset (&iB, 0,
sizeof (iB));
928 iB.fIndex = intoBufPtr - fDictBufStart;
929 iB.fWordLen = (*i).length ();
931 Assert (iB.fWordLen > 0);
932 fInfoBlocks.push_back (iB);
934 intoBufPtr += (*i).length ();
943SpellCheckEngine_Basic::CompiledDictionary::CompiledDictionary (
const CompiledDictionaryData& data)
948const Led_tChar* SpellCheckEngine_Basic::CompiledDictionary::GetTextBase ()
const
950 return fData.fTextDataStart;
953const Led_tChar* SpellCheckEngine_Basic::CompiledDictionary::GetTextEnd ()
const
955 return fData.fTextDataEnd;
958void SpellCheckEngine_Basic::CompiledDictionary::GetInfoBlocks (
const InfoBlock** start,
const InfoBlock** end)
const
962 *start = fData.fInfoBlocksStart;
963 *end = fData.fInfoBlocksEnd;
972TextBreaks_SpellChecker::TextBreaks_SpellChecker ()
974#if qStroika_Foundation_Debug_AssertionsChecked
983TextBreaks_SpellChecker::CharacterClasses TextBreaks_SpellChecker::CharToCharacterClass (
const Led_tChar* startOfText,
size_t lengthOfText,
984 const Led_tChar* charToExamine)
const
986 switch (*charToExamine) {
996 if (charToExamine > startOfText and charToExamine < &startOfText[lengthOfText]) {
997 const Led_tChar* prevChar = Led_PreviousChar (startOfText, charToExamine);
998 const Led_tChar* nextChar = charToExamine + 1;
1001 if ((IsASCIIAlnum (*prevChar) and *nextChar ==
's') or (*prevChar ==
's' and IsASCIISpace (*nextChar))) {
1002 return (eWordClass);
1007 return inherited::CharToCharacterClass (startOfText, lengthOfText, charToExamine);
1010#if qStroika_Foundation_Debug_AssertionsChecked
1011void TextBreaks_SpellChecker::RegressionTest ()
1014 const Led_tChar* kTest = LED_TCHAR_OF (
"This is a good test of Simone's bug with the 'word'.");
1015 size_t wordEndResult = 0;
1018 FindLineBreaks (kTest, Led_tStrlen (kTest), 25, &wordEndResult, &wordReal);
1019 Assert (wordEndResult == 31);
1020 Assert (wordReal ==
true);
1030SpellCheckEngine_Basic_Simple::SpellCheckEngine_Basic_Simple ()
1031 : fMainDictionary{nullptr}
1035#if qIncludeBakedInDictionaries
1036 SetMainDictionary (&kDictionary_US_English);
1040SpellCheckEngine_Basic_Simple::~SpellCheckEngine_Basic_Simple ()
1045SpellCheckEngine_Basic_Simple::UDInterface* SpellCheckEngine_Basic_Simple::GetUDInterface ()
1050bool SpellCheckEngine_Basic_Simple::AddWordToUserDictionarySupported ()
const
1055void SpellCheckEngine_Basic_Simple::AddWordToUserDictionary (
const Led_tString& word)
1058 fUD->AddWordToUserDictionary (word);
1062const SpellCheckEngine_Basic_Simple::Dictionary* SpellCheckEngine_Basic_Simple::GetMainDictionary ()
const
1064 return fMainDictionary;
1067void SpellCheckEngine_Basic_Simple::SetMainDictionary (
const Dictionary* mainDictionary)
1069 fMainDictionary = mainDictionary;
1070 vector<const Dictionary*> dicts;
1071 if (fMainDictionary != NULL) {
1072 dicts.push_back (fMainDictionary);
1075 dicts.push_back (fUD);
1077 SetDictionaries (dicts);
1080filesystem::path SpellCheckEngine_Basic_Simple::GetUserDictionary ()
const
1085void SpellCheckEngine_Basic_Simple::SetUserDictionary (
const filesystem::path& userDictionary)
1087 fUDName = userDictionary;
1088 bool noUD = userDictionary.empty ();
1093 fUD =
new EditableDictionary{};
1096 SetMainDictionary (fMainDictionary);
1099void SpellCheckEngine_Basic_Simple::ReadFromUD ()
1105 Memory::BLOB b = IO::FileSystem::FileInputStream::New (filesystem::path (fUDName)).ReadAll ();
1106 span<const byte> rawByteSpan{b};
1108 size_t outCharCnt = converter.ComputeTargetCharacterBufferSize (rawByteSpan);
1110 auto charsRead = converter.Bytes2Characters (&rawByteSpan, span{fileData2});
1111 fUD->ReadFromBuffer (charsRead.data (), charsRead.data () + charsRead.size ());
1117void SpellCheckEngine_Basic_Simple::WriteToUD ()
1120 vector<Led_tChar> data = fUD->SaveToBuffer ();
1121 IO::FileSystem::FileOutputStream::Ptr writer = IO::FileSystem::FileOutputStream::New (filesystem::path (fUDName));
1122 Streams::TextToBinary::Writer::New (writer, UnicodeExternalEncodings::eUTF8, ByteOrderMark::eInclude).Write (span{data});
#define qStroika_Foundation_Debug_AssertionsChecked
The qStroika_Foundation_Debug_AssertionsChecked flag determines if assertions are checked and validat...
#define RequireNotNull(p)
CodeCvt unifies byte <-> unicode conversions, vaguely inspired by (and wraps) std::codecvt,...
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
CONTAINER::value_type * End(CONTAINER &c)
For a contiguous container (such as a vector or basic_string) - find the pointer to the end of the co...
CONTAINER::value_type * Start(CONTAINER &c)
For a contiguous container (such as a vector or basic_string) - find the pointer to the start of the ...
void ThrowIfNull(const Private_::ConstVoidStar &p, const HRESULT &hr)
Template specialization for ThrowIfNull (), for thing being thrown HRESULT - really throw HRESULTErro...