Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
StyledTextIO_HTML.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#include "Stroika/Frameworks/StroikaPreComp.h"
5
6#include <cctype>
7#include <cstdio> // for sprintf()
8
12#include "Stroika/Foundation/Characters/LineEndings.h"
13#include "Stroika/Frameworks/Led/Config.h"
14#include "Stroika/Frameworks/Led/StyledTextEmbeddedObjects.h"
15
16#include "StyledTextIO_HTML.h"
17
18using namespace Stroika::Foundation;
20
21using namespace Stroika::Frameworks;
22using namespace Stroika::Frameworks::Led;
23using namespace Stroika::Frameworks::Led::StyledTextIO;
24
25namespace {
26
27 bool ParseStringToInt_ (const string& s, int* t)
28 {
29 int l = 0;
30 char* e = nullptr;
31 l = strtol (s.c_str (), &e, 10);
32 if (s.c_str () == e) {
33 return false;
34 }
35 else {
36 *t = l;
37 return true;
38 }
39 }
40
41 inline bool IsASCIISpace_ (Led_tChar c)
42 {
43 return isascii (c) and isspace (c);
44 }
45
46 constexpr pair<string_view, string_view> kColorNameTable_[] = {
47 {"black", "000000"}, {"silver", "C0C0C0"}, {"gray", "808080"}, {"white", "FFFFFF"}, {"maroon", "800000"}, {"red", "FF0000"},
48 {"purple", "800080"}, {"fuchsia", "FF00FF"}, {"green", "008000"}, {"lime", "00FF00"}, {"olive", "808000"}, {"yellow", "FFFF00"},
49 {"navy", "000080"}, {"blue", "0000FF"}, {"teal", "008080"}, {"aqua", "00FFFF"},
50 };
51
52 inline char NumToHexChar_ (unsigned i)
53 {
54 Require (i <= 15);
55 if (i <= 9) {
56 return static_cast<char> (i + '0');
57 }
58 else {
59 return static_cast<char> (i - 10 + 'a');
60 }
61 }
62 inline unsigned short HexCharToNum_ (char c)
63 {
64 if (c >= '0' and c <= '9') {
65 return c - '0';
66 }
67 else if (c >= 'a' and c <= 'f') {
68 return c - 'a' + 10;
69 }
70 else if (c >= 'A' and c <= 'F') {
71 return c - 'A' + 10;
72 }
73 else {
74 return 0;
75 }
76 }
77 inline bool ParseColorString_ (string colorStr, Color* color)
78 {
79 for (size_t i = 0; i < Memory::NEltsOf (kColorNameTable_); ++i) {
80 if (Led_CasedStringsEqual (colorStr, kColorNameTable_[i].first)) {
81 colorStr = kColorNameTable_[i].second;
82 break;
83 }
84 }
85 // Cheezy quick hack color parser - for spr# 0622 - LGP990827
86 if (colorStr.length () == 6) {
87 unsigned short red = HexCharToNum_ (colorStr[0]) * 16 + HexCharToNum_ (colorStr[1]);
88 unsigned short green = HexCharToNum_ (colorStr[2]) * 16 + HexCharToNum_ (colorStr[3]);
89 unsigned short blue = HexCharToNum_ (colorStr[4]) * 16 + HexCharToNum_ (colorStr[5]);
90 *color = Color (red == 255 ? 0xffff : red << 8, green == 255 ? 0xffff : green << 8, blue == 255 ? 0xffff : blue << 8);
91 return true;
92 }
93 return false;
94 }
95}
96
97/*
98 ********************************************************************************
99 ************************************ HTMLInfo **********************************
100 ********************************************************************************
101 */
102HTMLInfo::EntityRefMapEntry HTMLInfo::sDefaultEntityRefMapTable[] = {
103
104 EntityRefMapEntry{"euro", 0x20ac}, // As part of bug request SPR#0852 - this number was given to us. Not in the HTML 4.0 spec (but must be somewhere!)
105
106 /*
107 * FROM http://www.w3.org/TR/WD-html40-970708/sgml/entities.html#h-10.5.1
108 */
109 EntityRefMapEntry{"nbsp", 0x00a0},
110 EntityRefMapEntry{"iexcl", 0x00a1},
111 EntityRefMapEntry{"cent", 0x00a2},
112 EntityRefMapEntry{"pound", 0x00a3},
113 EntityRefMapEntry{"curren", 0x00a4},
114 EntityRefMapEntry{"yen", 0x00a5},
115 EntityRefMapEntry{"brvbar", 0x00a6},
116 EntityRefMapEntry{"sect", 0x00a7},
117 EntityRefMapEntry{"uml", 0x00a8},
118 EntityRefMapEntry{"copy", 0x00a9},
119 EntityRefMapEntry{"ordf", 0x00aa},
120 EntityRefMapEntry{"laquo", 0x00ab},
121 EntityRefMapEntry{"not", 0x00ac},
122 EntityRefMapEntry{"shy", 0x00ad},
123 EntityRefMapEntry{"reg", 0x00ae},
124 EntityRefMapEntry{"macr", 0x00af},
125 EntityRefMapEntry{"deg", 0x00b0},
126 EntityRefMapEntry{"plusmn", 0x00b1},
127 EntityRefMapEntry{"sup2", 0x00b2},
128 EntityRefMapEntry{"sup3", 0x00b3},
129 EntityRefMapEntry{"acute", 0x00b4},
130 EntityRefMapEntry{"micro", 0x00b5},
131 EntityRefMapEntry{"para", 0x00b6},
132 EntityRefMapEntry{"middot", 0x00b7},
133 EntityRefMapEntry{"cedil", 0x00b8},
134 EntityRefMapEntry{"sup1", 0x00b9},
135 EntityRefMapEntry{"ordm", 0x00ba},
136 EntityRefMapEntry{"raquo", 0x00bb},
137 EntityRefMapEntry{"frac14", 0x00bc},
138 EntityRefMapEntry{"frac12", 0x00bd},
139 EntityRefMapEntry{"frac34", 0x00be},
140 EntityRefMapEntry{"iquest", 0x00bf},
141 EntityRefMapEntry{"Agrave", 0x00c0},
142 EntityRefMapEntry{"Aacute", 0x00c1},
143 EntityRefMapEntry{"Acirc", 0x00c2},
144 EntityRefMapEntry{"Atilde", 0x00c3},
145 EntityRefMapEntry{"Auml", 0x00c4},
146 EntityRefMapEntry{"Aring", 0x00c5},
147 EntityRefMapEntry{"AElig", 0x00c6},
148 EntityRefMapEntry{"Ccedil", 0x00c7},
149 EntityRefMapEntry{"Egrave", 0x00c8},
150 EntityRefMapEntry{"Eacute", 0x00c9},
151 EntityRefMapEntry{"Ecirc", 0x00ca},
152 EntityRefMapEntry{"Euml", 0x00cb},
153 EntityRefMapEntry{"Igrave", 0x00cc},
154 EntityRefMapEntry{"Iacute", 0x00cd},
155 EntityRefMapEntry{"Icirc", 0x00ce},
156 EntityRefMapEntry{"Iuml", 0x00cf},
157 EntityRefMapEntry{"ETH", 0x00d0},
158 EntityRefMapEntry{"Ntilde", 0x00d1},
159 EntityRefMapEntry{"Ograve", 0x00d2},
160 EntityRefMapEntry{"Oacute", 0x00d3},
161 EntityRefMapEntry{"Ocirc", 0x00d4},
162 EntityRefMapEntry{"Otilde", 0x00d5},
163 EntityRefMapEntry{"Ouml", 0x00d6},
164 EntityRefMapEntry{"times", 0x00d7},
165 EntityRefMapEntry{"Oslash", 0x00d8},
166 EntityRefMapEntry{"Ugrave", 0x00d9},
167 EntityRefMapEntry{"Uacute", 0x00da},
168 EntityRefMapEntry{"Ucirc", 0x00db},
169 EntityRefMapEntry{"Uuml", 0x00dc},
170 EntityRefMapEntry{"Yacute", 0x00dd},
171 EntityRefMapEntry{"THORN", 0x00de},
172 EntityRefMapEntry{"szlig", 0x00df},
173 EntityRefMapEntry{"agrave", 0x00e0},
174 EntityRefMapEntry{"aacute", 0x00e1},
175 EntityRefMapEntry{"acirc", 0x00e2},
176 EntityRefMapEntry{"atilde", 0x00e3},
177 EntityRefMapEntry{"auml", 0x00e4},
178 EntityRefMapEntry{"aring", 0x00e5},
179 EntityRefMapEntry{"aelig", 0x00e6},
180 EntityRefMapEntry{"ccedil", 0x00e7},
181 EntityRefMapEntry{"egrave", 0x00e8},
182 EntityRefMapEntry{"eacute", 0x00e9},
183 EntityRefMapEntry{"ecirc", 0x00ea},
184 EntityRefMapEntry{"euml", 0x00eb},
185 EntityRefMapEntry{"igrave", 0x00ec},
186 EntityRefMapEntry{"iacute", 0x00ed},
187 EntityRefMapEntry{"icirc", 0x00ee},
188 EntityRefMapEntry{"iuml", 0x00ef},
189 EntityRefMapEntry{"eth", 0x00f0},
190 EntityRefMapEntry{"ntilde", 0x00f1},
191 EntityRefMapEntry{"ograve", 0x00f2},
192 EntityRefMapEntry{"oacute", 0x00f3},
193 EntityRefMapEntry{"ocirc", 0x00f4},
194 EntityRefMapEntry{"otilde", 0x00f5},
195 EntityRefMapEntry{"ouml", 0x00f6},
196 EntityRefMapEntry{"divide", 0x00f7},
197 EntityRefMapEntry{"oslash", 0x00f8},
198 EntityRefMapEntry{"ugrave", 0x00f9},
199 EntityRefMapEntry{"uacute", 0x00fa},
200 EntityRefMapEntry{"ucirc", 0x00fb},
201 EntityRefMapEntry{"uuml", 0x00fc},
202 EntityRefMapEntry{"yacute", 0x00fd},
203 EntityRefMapEntry{"thorn", 0x00fe},
204 EntityRefMapEntry{"yuml", 0x00ff},
205
206 /*
207 * FROM http://www.w3.org/TR/WD-html40-970708/sgml/entities.html#h-10.5.2
208 */
209 // Latin Extended-B
210 EntityRefMapEntry{"fnof", 0x0192},
211
212 // Greek
213 EntityRefMapEntry{"Alpha", 0x391},
214 EntityRefMapEntry{"Beta", 0x392},
215 EntityRefMapEntry{"Gamma", 0x393},
216 EntityRefMapEntry{"Delta", 0x394},
217 EntityRefMapEntry{"Epsilon", 0x395},
218 EntityRefMapEntry{"Zeta", 0x396},
219 EntityRefMapEntry{"Eta", 0x397},
220 EntityRefMapEntry{"Theta", 0x398},
221 EntityRefMapEntry{"Iota", 0x399},
222 EntityRefMapEntry{"Kappa", 0x39a},
223 EntityRefMapEntry{"Lambda", 0x39b},
224 EntityRefMapEntry{"Mu", 0x39c},
225 EntityRefMapEntry{"Nu", 0x39d},
226 EntityRefMapEntry{"Xi", 0x39e},
227 EntityRefMapEntry{"Omicron", 0x39f},
228 EntityRefMapEntry{"Pi", 0x3a0},
229 EntityRefMapEntry{"Rho", 0x3a1},
230 EntityRefMapEntry{"Sigma", 0x3a3},
231 EntityRefMapEntry{"Tau", 0x3a4},
232 EntityRefMapEntry{"Upsilon", 0x3a5},
233 EntityRefMapEntry{"Phi", 0x3a6},
234 EntityRefMapEntry{"Chi", 0x3a7},
235 EntityRefMapEntry{"Psi", 0x3a8},
236 EntityRefMapEntry{"Omega", 0x3a9},
237 EntityRefMapEntry{"alpha", 0x03b1},
238 EntityRefMapEntry{"beta", 0x03b2},
239 EntityRefMapEntry{"gamma", 0x03b3},
240 EntityRefMapEntry{"delta", 0x03b4},
241 EntityRefMapEntry{"epsilon", 0x03b5},
242 EntityRefMapEntry{"zeta", 0x03b6},
243 EntityRefMapEntry{"eta", 0x03b7},
244 EntityRefMapEntry{"theta", 0x03b8},
245 EntityRefMapEntry{"iota", 0x03b9},
246 EntityRefMapEntry{"kappa", 0x03ba},
247 EntityRefMapEntry{"lambda", 0x03bb},
248 EntityRefMapEntry{"mu", 0x03bc},
249 EntityRefMapEntry{"nu", 0x03bd},
250 EntityRefMapEntry{"xi", 0x03be},
251 EntityRefMapEntry{"omicron", 0x03bf},
252 EntityRefMapEntry{"pi", 0x03c0},
253 EntityRefMapEntry{"rho", 0x03c1},
254 EntityRefMapEntry{"sigmaf", 0x03c2},
255 EntityRefMapEntry{"sigma", 0x03c3},
256 EntityRefMapEntry{"tau", 0x03c4},
257 EntityRefMapEntry{"upsilon", 0x03c5},
258 EntityRefMapEntry{"phi", 0x03c6},
259 EntityRefMapEntry{"chi", 0x03c7},
260 EntityRefMapEntry{"psi", 0x03c8},
261 EntityRefMapEntry{"omega", 0x03c9},
262 EntityRefMapEntry{"thetasym", 0x03d1},
263 EntityRefMapEntry{"upsih", 0x03d2},
264 EntityRefMapEntry{"piv", 0x03d6},
265
266 // General Punctuation
267 EntityRefMapEntry{"bull", 0x2022},
268 EntityRefMapEntry{"hellip", 0x2026},
269 EntityRefMapEntry{"prime", 0x2032},
270 EntityRefMapEntry{"Prime", 0x2033},
271 EntityRefMapEntry{"oline", 0x203e},
272 EntityRefMapEntry{"frasl", 0x2044},
273
274 // Letterlike Symbols
275 EntityRefMapEntry{"weierp", 0x2118},
276 EntityRefMapEntry{"image", 0x2111},
277 EntityRefMapEntry{"real", 0x211c},
278 EntityRefMapEntry{"trade", 0x2122},
279 EntityRefMapEntry{"alefsym", 0x2135},
280
281 // Arrows
282 EntityRefMapEntry{"larr", 0x2190},
283 EntityRefMapEntry{"uarr", 0x2191},
284 EntityRefMapEntry{"rarr", 0x2192},
285 EntityRefMapEntry{"darr", 0x2193},
286 EntityRefMapEntry{"harr", 0x2194},
287 EntityRefMapEntry{"crarr", 0x21b5},
288 EntityRefMapEntry{"lArr", 0x21d0},
289 EntityRefMapEntry{"uArr", 0x21d1},
290 EntityRefMapEntry{"rArr", 0x21d2},
291 EntityRefMapEntry{"dArr", 0x21d3},
292 EntityRefMapEntry{"hArr", 0x21d4},
293
294 // Mathematical Operators
295 //THESE ARE IN DECIMAL - CONVERT TO HEX TO BE CONSISTENT WITH OTHERS!!!!
296 EntityRefMapEntry{"forall", 8704},
297 EntityRefMapEntry{"part", 8706},
298 EntityRefMapEntry{"exist", 8707},
299 EntityRefMapEntry{"empty", 8709},
300 EntityRefMapEntry{"nabla", 8711},
301 EntityRefMapEntry{"isin", 8712},
302 EntityRefMapEntry{"notin", 8713},
303 EntityRefMapEntry{"ni", 8715},
304 EntityRefMapEntry{"prod", 8719},
305 EntityRefMapEntry{"sum", 8721},
306 EntityRefMapEntry{"minus", 8722},
307 EntityRefMapEntry{"lowast", 8727},
308 EntityRefMapEntry{"radic", 8730},
309 EntityRefMapEntry{"prop", 8733},
310 EntityRefMapEntry{"infin", 8734},
311 EntityRefMapEntry{"ang", 8736},
312 EntityRefMapEntry{"and", 8869},
313 EntityRefMapEntry{"or", 8870},
314 EntityRefMapEntry{"cap", 8745},
315 EntityRefMapEntry{"cup", 8746},
316 EntityRefMapEntry{"int", 8747},
317 EntityRefMapEntry{"there4", 8756},
318 EntityRefMapEntry{"sim", 8764},
319 EntityRefMapEntry{"cong", 8773},
320 EntityRefMapEntry{"asymp", 8776},
321 EntityRefMapEntry{"ne", 8800},
322 EntityRefMapEntry{"equiv", 8801},
323 EntityRefMapEntry{"le", 8804},
324 EntityRefMapEntry{"ge", 8805},
325 EntityRefMapEntry{"sub", 8834},
326 EntityRefMapEntry{"sup", 8835},
327 EntityRefMapEntry{"nsub", 8836},
328 EntityRefMapEntry{"sube", 8838},
329 EntityRefMapEntry{"supe", 8839},
330 EntityRefMapEntry{"oplus", 8853},
331 EntityRefMapEntry{"otimes", 8855},
332 EntityRefMapEntry{"perp", 8869},
333 EntityRefMapEntry{"sdot", 8901},
334
335 // Miscellaneous Technical
336 //THESE ARE IN DECIMAL - CONVERT TO HEX TO BE CONSISTENT WITH OTHERS!!!!
337 EntityRefMapEntry{"lceil", 8968},
338 EntityRefMapEntry{"rceil", 8969},
339 EntityRefMapEntry{"lfloor", 8970},
340 EntityRefMapEntry{"rfloor", 8971},
341 EntityRefMapEntry{"lang", 9001},
342 EntityRefMapEntry{"rang", 9002},
343
344 // Geometric Shapes
345 //THESE ARE IN DECIMAL - CONVERT TO HEX TO BE CONSISTENT WITH OTHERS!!!!
346 EntityRefMapEntry{"loz", 9674},
347
348 // Miscellaneous Symbols
349 //THESE ARE IN DECIMAL - CONVERT TO HEX TO BE CONSISTENT WITH OTHERS!!!!
350 EntityRefMapEntry{"spades", 9824},
351 EntityRefMapEntry{"clubs", 9827},
352 EntityRefMapEntry{"hearts", 9829},
353 EntityRefMapEntry{"diams", 9830},
354
355 /*
356 * FROM http://www.w3.org/TR/WD-html40-970708/sgml/entities.html#h-10.5.3
357 */
358
359 // C0 Controls and Basic Latin
360 EntityRefMapEntry{"quot", 0x0022},
361 EntityRefMapEntry{"amp", 0x0026},
362 EntityRefMapEntry{"lt", 0x003c},
363 EntityRefMapEntry{"gt", 0x003e},
364
365 // Latin Extended-A
366 EntityRefMapEntry{"OElig", 0x0152},
367 EntityRefMapEntry{"oelig", 0x0153},
368 EntityRefMapEntry{"Scaron", 0x0160},
369 EntityRefMapEntry{"scaron", 0x0161},
370 EntityRefMapEntry{"Yuml", 0x0178},
371
372 // Spacing Modifier Letters
373 EntityRefMapEntry{"circ", 0x2c6},
374 EntityRefMapEntry{"tilde", 0x2dc},
375
376 // General Punctuation
377 EntityRefMapEntry{"ensp", 8194},
378 EntityRefMapEntry{"emsp", 8195},
379 EntityRefMapEntry{"thinsp", 8201},
380 EntityRefMapEntry{"zwnj", 8204},
381 EntityRefMapEntry{"zwj", 8205},
382 EntityRefMapEntry{"lrm", 8206},
383 EntityRefMapEntry{"rlm", 8207},
384 EntityRefMapEntry{"ndash", 0x2013},
385 EntityRefMapEntry{"mdash", 0x2014},
386 EntityRefMapEntry{"lsquo", 0x2018},
387 EntityRefMapEntry{"rsquo", 0x2019},
388 EntityRefMapEntry{"sbquo", 0x201a},
389 EntityRefMapEntry{"ldquo", 0x201c},
390 EntityRefMapEntry{"rdquo", 0x201d},
391 EntityRefMapEntry{"bdquo", 0x201e},
392 EntityRefMapEntry{"dagger", 0x2020},
393 EntityRefMapEntry{"Dagger", 0x2021},
394 EntityRefMapEntry{"permil", 0x2030},
395 EntityRefMapEntry{"lsaquo", 0x2039},
396 EntityRefMapEntry{"rsaquo", 0x203a},
397};
398const size_t HTMLInfo::kDefaultEntityRefMapTable_Count = Memory::NEltsOf (HTMLInfo::sDefaultEntityRefMapTable);
399
400FontSpecification::FontSize HTMLInfo::HTMLFontSizeToRealFontSize (int size)
401{
402 size = min (size, 7);
403 size = max (size, 1);
404 switch (size) {
405 case 1:
406 return 7;
407 case 2:
408 return 9;
409 case 3:
410 return 10;
411 case 4:
412 return 12;
413 case 5:
414 return 14;
415 case 6:
416 return 18;
417 case 7:
418 return 24;
419 default:
420 Assert (false);
421 }
422 Assert (false);
423 return 12;
424}
425
426int HTMLInfo::RealFontSizeToHTMLFontSize (FontSpecification::FontSize size)
427{
428 if (size <= 7) {
429 return 1;
430 }
431 else if (size <= 9) {
432 return 2;
433 }
434 else if (size <= 10) {
435 return 3;
436 }
437 else if (size <= 12) {
438 return 4;
439 }
440 else if (size <= 14) {
441 return 5;
442 }
443 else if (size <= 18) {
444 return 6;
445 }
446 else {
447 return 7;
448 }
449}
450
451/*
452 ********************************************************************************
453 ******************************* StyledTextIOReader_HTML ************************
454 ********************************************************************************
455 */
456StyledTextIOReader_HTML::StyledTextIOReader_HTML (SrcStream* srcStream, SinkStream* sinkStream, HTMLInfo* saveHTMLInfoInto)
457 : StyledTextIOReader{srcStream, sinkStream}
458 , fSaveHTMLInfoInto{saveHTMLInfoInto}
459{
460 if (fSaveHTMLInfoInto != nullptr) {
461 *fSaveHTMLInfoInto = HTMLInfo (); // reset to default values before reading...
462 }
463 fFontStack.push_back (GetStaticDefaultFont ());
464}
465
466void StyledTextIOReader_HTML::Read ()
467{
468 while (true) {
469 size_t lastWroteBefore = GetSrcStream ().current_offset ();
470 switch (ScanTilNextHTMLThingy ()) {
471 case eEntityRef: {
472 EmitText (MapInputTextToTString (GrabString (lastWroteBefore)));
473 size_t curPos = GetSrcStream ().current_offset ();
474 ScanTilAfterHTMLThingy (eEntityRef);
475 HandleHTMLThingy (eEntityRef, GrabString (curPos));
476 } break;
477
478 case eTag: {
479 EmitText (MapInputTextToTString (GrabString (lastWroteBefore)));
480 size_t curPos = GetSrcStream ().current_offset ();
481 ScanTilAfterHTMLThingy (eTag);
482 HandleHTMLThingy (eTag, GrabString (curPos));
483 } break;
484
485 case eBangComment: {
486 ScanTilAfterHTMLThingy (eBangComment);
487 } break;
488
489 case eEOF: {
490 EmitText (MapInputTextToTString (GrabString (lastWroteBefore)));
491 GetSinkStream ().EndOfBuffer ();
492 return;
493 } break;
494
495 default: {
496 Assert (false);
497 } break;
498 }
499 }
500}
501
502bool StyledTextIOReader_HTML::QuickLookAppearsToBeRightFormat ()
503{
504 SrcStreamSeekSaver savePos{GetSrcStream ()};
505
506Again:
507 if (GetSrcStream ().current_offset () >= 1024) {
508 return false;
509 }
510 switch (ScanTilNextHTMLThingy ()) {
511 case eEntityRef: {
512 ScanTilAfterHTMLThingy (eEntityRef);
513 goto Again;
514 } break;
515
516 case eTag: {
517 // If the first tag appears before the first 1K, and if it is one of a set of common start
518 // tags, then it is probably a valid HTML file...
519 if (GetSrcStream ().current_offset () < 1024) {
520 size_t curPos = GetSrcStream ().current_offset ();
521 ScanTilAfterHTMLThingy (eTag);
522 string tmp = GrabString (curPos);
523 char tagBuf[1024];
524 bool isStartTag = true;
525 ExtractHTMLTagIntoTagNameBuf (tmp.c_str (), tmp.length (), tagBuf, sizeof (tagBuf), &isStartTag);
526 if (isStartTag and (::strcmp (tagBuf, "html") == 0 or ::strcmp (tagBuf, "!doctype") == 0 or
527 ::strcmp (tagBuf, "title") == 0 or ::strcmp (tagBuf, "head") == 0 or ::strcmp (tagBuf, "body") == 0)) {
528 return true;
529 }
530 return false;
531 }
532 else {
533 return false;
534 }
535 } break;
536
537 case eEOF: {
538 return false;
539 } break;
540
541 default: {
542 Assert (false);
543 return false; // Not reached
544 } break;
545 }
546 Assert (false);
547 return false; // Not reached
548}
549
550/*
551@METHOD: StyledTextIOReader_HTML::GetEntityRefMapTable
552@DESCRIPTION: <p>Return a vector of @'StyledTextIOReader_HTML::EntityRefMapEntry' to be used in reading in
553 HTML text. These entity refs will be recognized in the input text, and mapped to the appropriate given character.</p>
554 <p>NB: The return value is a REFERENCE to vector. The lifetime of that vector must be at least til the
555 next call of this function, or til the death of this object. Typically, it is assumed, a static table will be used
556 so the lifetime will be the end of the process.</p>
557*/
558const vector<StyledTextIOReader_HTML::EntityRefMapEntry>& StyledTextIOReader_HTML::GetEntityRefMapTable () const
559{
560 using ELTV = vector<StyledTextIOReader_HTML::EntityRefMapEntry>;
561 static bool sFirstTime = true;
562 static ELTV sResult;
563 if (sFirstTime) {
564 sFirstTime = false;
565 sResult = ELTV{HTMLInfo::sDefaultEntityRefMapTable, HTMLInfo::sDefaultEntityRefMapTable + HTMLInfo::kDefaultEntityRefMapTable_Count};
566 }
567 return sResult;
568}
569
570StyledTextIOReader_HTML::ThingyType StyledTextIOReader_HTML::ScanTilNextHTMLThingy ()
571{
572 try {
573 // Looks like infinite loop but is not - cuz PeekNextChar () at EOF
574 for (char c = PeekNextChar ();; c = PeekNextChar ()) {
575 switch (c) {
576 case '&': {
577 return eEntityRef;
578 } break;
579
580 case '<': {
581 if (LookingAt ("<!--")) {
582 return eBangComment;
583 }
584 else {
585 return eTag;
586 }
587 } break;
588
589 default: {
590 // ignore - plain characters
591 ConsumeNextChar ();
592 } break;
593 }
594 }
595 }
596 catch (ReadEOFException& /*eof*/) {
597 return eEOF;
598 }
599 Assert (false);
600 return (eEOF); // not reached
601}
602
603void StyledTextIOReader_HTML::ScanTilAfterHTMLThingy (ThingyType thingy)
604{
605 Require (thingy != eEOF);
606 switch (thingy) {
607 case eEntityRef: {
608 // should check first char is '&'
609 ConsumeNextChar ();
610 try {
611 // read all successive alpha characters? Right way to parse entityrefs?
612 // LGP 960907
613 char c;
614 while (isalnum (c = PeekNextChar ()) or c == '#') {
615 ConsumeNextChar ();
616 }
617 if (c == ';') {
618 ConsumeNextChar ();
619 }
620 }
621 catch (ReadEOFException& /*eof*/) {
622 return; // OK to have entity ref at EOF
623 }
624 } break;
625
626 case eTag: {
627 // Probably must be more careful about comments and quoted strings???...
628 char c;
629 while ((c = GetNextChar ()) != '>')
630 ;
631 } break;
632
633 case eBangComment: {
634 while (true) {
635 if (LookingAt ("-->")) {
636 ConsumeNextChar ();
637 ConsumeNextChar ();
638 ConsumeNextChar ();
639 break;
640 }
641 else {
642 ConsumeNextChar ();
643 }
644 }
645 } break;
646
647 default: {
648 Assert (false);
649 } break;
650 }
651}
652
653bool StyledTextIOReader_HTML::LookingAt (const char* text) const
654{
655 bool result = true;
656 const char* p = text;
657 for (; *p != '\0';) {
658 if (PeekNextChar () == *p) {
659 ConsumeNextChar ();
660 ++p;
661 }
662 else {
663 result = false;
664 break;
665 }
666 }
667 // unwind consumed chars
668 size_t i = p - text;
669 while (i != 0) {
670 PutBackLastChar ();
671 --i;
672 }
673 return result;
674}
675
676Led_tString StyledTextIOReader_HTML::MapInputTextToTString (const string& text)
677{
678 return CodeCvt<Led_tChar>{WellKnownCodePages::kANSI}.Bytes2String<Led_tString> (as_bytes (span{text}));
679}
680
681void StyledTextIOReader_HTML::EmitText (const Led_tChar* text, size_t nBytes, bool skipNLCheck)
682{
683 if (fComingTextIsTitle) {
684 if (fSaveHTMLInfoInto != nullptr) {
685 fSaveHTMLInfoInto->fTitle += Led_tString{text, nBytes};
686 }
687 return;
688 }
689 if (fHiddenTextMode) {
690 fHiddenTextAccumulation += Led_tString{text, nBytes};
691 return;
692 }
693
694 if (fULNestingCount > 0 and not fLIOpen) {
695 // Drop that text on the floor
696 return;
697 }
698
699 Memory::StackBuffer<Led_tChar> outBuf{Memory::eUninitialized, nBytes};
700 nBytes = Characters::NormalizeTextToNL<Led_tChar> (text, nBytes, outBuf.data (), nBytes);
701
702 if (not skipNLCheck and fNormalizeInputWhitespace) {
703 Assert (fNormalizeInputWhitespace);
704 Assert (not skipNLCheck);
705
706 // Normalize space (including NLs) to one
707 Led_tChar* out = outBuf.data ();
708 for (size_t i = 0; i < nBytes; ++i) {
709 Led_tChar c = outBuf[i];
710 bool thisCharSpace = IsASCIISpace_ (c);
711 if (thisCharSpace) {
712 if (not fLastCharSpace) {
713 *out = ' ';
714 ++out;
715 }
716 }
717 else {
718 *out = c;
719 ++out;
720 }
721 fLastCharSpace = thisCharSpace;
722 }
723 size_t newNBytes = out - static_cast<Led_tChar*> (outBuf);
724 Assert (newNBytes <= nBytes);
725 nBytes = newNBytes;
726 }
727
728 if (nBytes > 0) {
729 fReadingBody = true;
730 GetSinkStream ().AppendText (outBuf.data (), nBytes, &fFontStack.back ());
731 }
732}
733
734void StyledTextIOReader_HTML::HandleHTMLThingy (StyledTextIOReader_HTML::ThingyType thingy, const char* text, size_t nBytes)
735{
736 switch (thingy) {
737 case eEntityRef:
738 HandleHTMLThingy_EntityReference (text, nBytes);
739 break;
740 case eTag:
741 HandleHTMLThingy_Tag (text, nBytes);
742 break;
743 case eEOF:
744 break;
745 default:
746 Assert (false);
747 }
748}
749
750void StyledTextIOReader_HTML::HandleHTMLThingy_EntityReference (const char* text, size_t nBytes)
751{
752 RequireNotNull (text);
753 Require (nBytes >= 1);
754
755 // For understood entity references, emit the character. For others, just emit the original enity ref..
756 string refName = string{&text[1], nBytes - 1};
757 if (refName.length () > 0) {
758 if (refName[refName.length () - 1] == ';') {
759 refName.erase (refName.length () - 1);
760 }
761 }
762 if (refName.length () > 0) {
763 if (refName[0] == '#') {
764 wchar_t result = static_cast<wchar_t> (::atoi (refName.c_str () + 1));
765 EmitText (&result, 1);
766 return;
767 }
768 else {
769 const vector<StyledTextIOReader_HTML::EntityRefMapEntry>& entityRefs = GetEntityRefMapTable ();
770 for (auto i = entityRefs.begin (); i != entityRefs.end (); ++i) {
771 if (refName == (*i).fEntityRefName) {
772 EmitText (&(*i).fCharValue, 1);
773 return;
774 }
775 }
776 }
777 }
778
779 // Even if qThrowAwayMostUnknownHTMLTags, we should still emit unknown entity refs, I think ... LGP 961015
780 EmitText (MapInputTextToTString (string{text, nBytes}));
781}
782
783void StyledTextIOReader_HTML::HandleHTMLThingy_Tag (const char* text, size_t nBytes)
784{
785 RequireNotNull (text);
786
787 char tagBuf[1024];
788 bool isStartTag = true;
789
790 ExtractHTMLTagIntoTagNameBuf (text, nBytes, tagBuf, sizeof (tagBuf), &isStartTag);
791
792 if (0) {
793 }
794 else if (::strcmp (tagBuf, "!doctype") == 0) {
795 HandleHTMLThingyTag_BANG_doctype (isStartTag, text, nBytes);
796 }
797 else if (::strcmp (tagBuf, "a") == 0) {
798 HandleHTMLThingyTag_a (isStartTag, text, nBytes);
799 }
800 else if (::strcmp (tagBuf, "b") == 0) {
801 HandleHTMLThingyTag_b (isStartTag, text, nBytes);
802 }
803 else if (::strcmp (tagBuf, "basefont") == 0) {
804 HandleHTMLThingyTag_basefont (isStartTag, text, nBytes);
805 }
806 else if (::strcmp (tagBuf, "blockquote") == 0) {
807 HandleHTMLThingyTag_blockquote (isStartTag, text, nBytes);
808 }
809 else if (::strcmp (tagBuf, "br") == 0) {
810 HandleHTMLThingyTag_br (isStartTag, text, nBytes);
811 }
812 else if (::strcmp (tagBuf, "body") == 0) {
813 HandleHTMLThingyTag_body (isStartTag, text, nBytes);
814 }
815 else if (::strcmp (tagBuf, "code") == 0) {
816 HandleHTMLThingyTag_code (isStartTag, text, nBytes);
817 }
818 else if (::strcmp (tagBuf, "comment") == 0) {
819 HandleHTMLThingyTag_comment (isStartTag, text, nBytes);
820 }
821 else if (::strcmp (tagBuf, "dir") == 0) {
822 HandleHTMLThingyTag_dir (isStartTag, text, nBytes);
823 }
824 else if (::strcmp (tagBuf, "div") == 0) {
825 HandleHTMLThingyTag_div (isStartTag, text, nBytes);
826 }
827 else if (::strcmp (tagBuf, "em") == 0) {
828 HandleHTMLThingyTag_em (isStartTag, text, nBytes);
829 }
830 else if (::strcmp (tagBuf, "font") == 0) {
831 HandleHTMLThingyTag_font (isStartTag, text, nBytes);
832 }
833 else if (::strcmp (tagBuf, "head") == 0) {
834 HandleHTMLThingyTag_head (isStartTag, text, nBytes);
835 }
836 else if (::strcmp (tagBuf, "html") == 0) {
837 HandleHTMLThingyTag_html (isStartTag, text, nBytes);
838 }
839 else if (::strcmp (tagBuf, "hr") == 0) {
840 HandleHTMLThingyTag_hr (isStartTag, text, nBytes);
841 }
842 else if (::strcmp (tagBuf, "h1") == 0) {
843 HandleHTMLThingyTag_hN (isStartTag, text, nBytes);
844 }
845 else if (::strcmp (tagBuf, "h2") == 0) {
846 HandleHTMLThingyTag_hN (isStartTag, text, nBytes);
847 }
848 else if (::strcmp (tagBuf, "h3") == 0) {
849 HandleHTMLThingyTag_hN (isStartTag, text, nBytes);
850 }
851 else if (::strcmp (tagBuf, "h4") == 0) {
852 HandleHTMLThingyTag_hN (isStartTag, text, nBytes);
853 }
854 else if (::strcmp (tagBuf, "h5") == 0) {
855 HandleHTMLThingyTag_hN (isStartTag, text, nBytes);
856 }
857 else if (::strcmp (tagBuf, "h6") == 0) {
858 HandleHTMLThingyTag_hN (isStartTag, text, nBytes);
859 }
860 else if (::strcmp (tagBuf, "h7") == 0) {
861 HandleHTMLThingyTag_hN (isStartTag, text, nBytes);
862 }
863 else if (::strcmp (tagBuf, "h8") == 0) {
864 HandleHTMLThingyTag_hN (isStartTag, text, nBytes);
865 }
866 else if (::strcmp (tagBuf, "h9") == 0) {
867 HandleHTMLThingyTag_hN (isStartTag, text, nBytes);
868 }
869 else if (::strcmp (tagBuf, "i") == 0) {
870 HandleHTMLThingyTag_i (isStartTag, text, nBytes);
871 }
872 else if (::strcmp (tagBuf, "img") == 0) {
873 HandleHTMLThingyTag_img (isStartTag, text, nBytes);
874 }
875 else if (::strcmp (tagBuf, "li") == 0) {
876 HandleHTMLThingyTag_li (isStartTag, text, nBytes);
877 }
878 else if (::strcmp (tagBuf, "listing") == 0) {
879 HandleHTMLThingyTag_listing (isStartTag, text, nBytes);
880 }
881 else if (::strcmp (tagBuf, "ol") == 0) {
882 HandleHTMLThingyTag_ol (isStartTag, text, nBytes);
883 }
884 else if (::strcmp (tagBuf, "p") == 0) {
885 HandleHTMLThingyTag_p (isStartTag, text, nBytes);
886 }
887 else if (::strcmp (tagBuf, "plaintext") == 0) {
888 HandleHTMLThingyTag_plaintext (isStartTag, text, nBytes);
889 }
890 else if (::strcmp (tagBuf, "pre") == 0) {
891 HandleHTMLThingyTag_pre (isStartTag, text, nBytes);
892 }
893 else if (::strcmp (tagBuf, "s") == 0) {
894 HandleHTMLThingyTag_s (isStartTag, text, nBytes);
895 }
896 else if (::strcmp (tagBuf, "samp") == 0) {
897 HandleHTMLThingyTag_samp (isStartTag, text, nBytes);
898 }
899 else if (::strcmp (tagBuf, "small") == 0) {
900 HandleHTMLThingyTag_small (isStartTag, text, nBytes);
901 }
902 else if (::strcmp (tagBuf, "span") == 0) {
903 HandleHTMLThingyTag_span (isStartTag, text, nBytes);
904 }
905 else if (::strcmp (tagBuf, "strike") == 0) {
906 HandleHTMLThingyTag_strike (isStartTag, text, nBytes);
907 }
908 else if (::strcmp (tagBuf, "strong") == 0) {
909 HandleHTMLThingyTag_strong (isStartTag, text, nBytes);
910 }
911 else if (::strcmp (tagBuf, "sub") == 0) {
912 HandleHTMLThingyTag_sub (isStartTag, text, nBytes);
913 }
914 else if (::strcmp (tagBuf, "sup") == 0) {
915 HandleHTMLThingyTag_sup (isStartTag, text, nBytes);
916 }
917 else if (::strcmp (tagBuf, "table") == 0) {
918 HandleHTMLThingyTag_table (isStartTag, text, nBytes);
919 }
920 else if (::strcmp (tagBuf, "td") == 0) {
921 HandleHTMLThingyTag_td (isStartTag, text, nBytes);
922 }
923 else if (::strcmp (tagBuf, "th") == 0) {
924 HandleHTMLThingyTag_th (isStartTag, text, nBytes);
925 }
926 else if (::strcmp (tagBuf, "title") == 0) {
927 HandleHTMLThingyTag_title (isStartTag, text, nBytes);
928 }
929 else if (::strcmp (tagBuf, "tr") == 0) {
930 HandleHTMLThingyTag_tr (isStartTag, text, nBytes);
931 }
932 else if (::strcmp (tagBuf, "tt") == 0) {
933 HandleHTMLThingyTag_tt (isStartTag, text, nBytes);
934 }
935 else if (::strcmp (tagBuf, "u") == 0) {
936 HandleHTMLThingyTag_u (isStartTag, text, nBytes);
937 }
938 else if (::strcmp (tagBuf, "ul") == 0) {
939 HandleHTMLThingyTag_ul (isStartTag, text, nBytes);
940 }
941 else if (::strcmp (tagBuf, "var") == 0) {
942 HandleHTMLThingyTag_var (isStartTag, text, nBytes);
943 }
944 else if (::strcmp (tagBuf, "xmp") == 0) {
945 HandleHTMLThingyTag_xmp (isStartTag, text, nBytes);
946 }
947 else {
948 HandleHTMLThingyTagUnknown (isStartTag, text, nBytes);
949 }
950}
951
952void StyledTextIOReader_HTML::ExtractHTMLTagIntoTagNameBuf (const char* text, size_t nBytes, char* tagBuf, size_t tagBufSize, bool* isStartTag)
953{
954 RequireNotNull (text);
955 RequireNotNull (tagBuf);
956 RequireNotNull (isStartTag);
957
958 {
959 const char* in = &text[0];
960 const char* in_end = in + nBytes;
961 char* out = &tagBuf[0];
962 ++in; // skip '<'
963 if (*in == '/') {
964 in++; // skip endTag character, and set flag
965 *isStartTag = false;
966 }
967 for (; in < in_end; ++in) {
968 char c = *in;
969 if (isalnum (c) or (c == '!') or (c == '_') or (c == '-')) {
970 if (isupper (c)) {
971 c = static_cast<char> (tolower (c));
972 }
973 *out = c;
974 ++out;
975 }
976 else {
977 break;
978 }
979
980 if (size_t (out - tagBuf) >= tagBufSize - 2) {
981 break;
982 }
983 }
984 *out = '\0';
985 }
986}
987
988IncrementalFontSpecification StyledTextIOReader_HTML::ExtractFontSpecFromCSSStyleAttribute (const char* text, size_t nBytes)
989{
990 IncrementalFontSpecification f;
991 string fullCSSString (text, text + nBytes);
992 string itemValue;
993 if (ParseCSSTagArgOut (fullCSSString, "font-family", &itemValue)) {
994 // really not right - could be comma separated list - just grab first and ignore the rest, and map
995 // predefined generic family items (serif => Times, etc... -- see http://www.w3.org/TR/1999/REC-CSS1-19990111
996 size_t comma = itemValue.find ('.');
997 if (comma != string::npos) {
998 itemValue = itemValue.substr (0, comma);
999 }
1000 f.SetFontName (String::FromNarrowSDKString (itemValue).AsSDKString ());
1001 }
1002 if (ParseCSSTagArgOut (fullCSSString, "font-size", &itemValue)) {
1003 if (itemValue.length () >= 3 and Led_CasedStringsEqual (itemValue.substr (itemValue.length () - 2, 2), "pt"sv)) {
1004 int sizeVal = 0;
1005 if (ParseStringToInt_ (itemValue.substr (0, itemValue.length () - 2), &sizeVal) and sizeVal >= 2 and sizeVal <= 128) {
1006 f.SetPointSize (static_cast<uint8_t> (sizeVal)); // cast OK cuz sizeVal in range
1007 }
1008 }
1009 }
1010 if (ParseCSSTagArgOut (fullCSSString, "font-style", &itemValue)) {
1011 if (Led_CasedStringsEqual (itemValue, "italic"sv)) {
1012 f.SetStyle_Italic (true);
1013 }
1014 else if (Led_CasedStringsEqual (itemValue, "normal"sv)) {
1015 f.SetStyle_Italic (false);
1016 }
1017 }
1018 if (ParseCSSTagArgOut (fullCSSString, "font-weight", &itemValue)) {
1019 int bv = 0;
1020 if (Led_CasedStringsEqual (itemValue, "bold"sv) or Led_CasedStringsEqual (itemValue, "bolder"sv) or
1021 (itemValue.length () > 0 and isdigit (itemValue[0]) and ParseStringToInt_ (itemValue, &bv) and bv >= 400)) {
1022 f.SetStyle_Bold (true);
1023 }
1024 else {
1025 f.SetStyle_Bold (false);
1026 }
1027 }
1028 if (ParseCSSTagArgOut (fullCSSString, "color", &itemValue)) {
1029 Color clr = Color::kBlack;
1030 if (ParseColorString_ (itemValue, &clr)) {
1031 f.SetTextColor (clr);
1032 }
1033 }
1034 return f;
1035}
1036
1037void StyledTextIOReader_HTML::ApplyCSSStyleAttributeToCurrentFontStack (const char* text, size_t nBytes)
1038{
1039 fFontStack.back ().MergeIn (ExtractFontSpecFromCSSStyleAttribute (text, nBytes));
1040}
1041
1042void StyledTextIOReader_HTML::GrabAndApplyCSSStyleFromTagText (const char* text, size_t nBytes)
1043{
1044 if (nBytes >= 11) { // speed check - smallest tag case would be "<a style=a>"
1045 string tagText (text, nBytes);
1046 string tagValue;
1047 if (ParseHTMLTagArgOut (tagText, "style", &tagValue)) {
1048 ApplyCSSStyleAttributeToCurrentFontStack (tagValue.c_str (), tagValue.length ());
1049 }
1050 }
1051}
1052
1053void StyledTextIOReader_HTML::HandleHTMLThingyTag_BANG_doctype (bool /*start*/, const char* text, size_t nBytes)
1054{
1055 if (fSaveHTMLInfoInto != nullptr) {
1056 fSaveHTMLInfoInto->fDocTypeTag = string{text, nBytes};
1057 }
1058}
1059
1060void StyledTextIOReader_HTML::HandleHTMLThingyTag_a (bool start, const char* text, size_t nBytes)
1061{
1062#if qThrowAwayMostUnknownHTMLTags
1063 if (start) {
1064 string tagText (text, nBytes);
1065
1066 fCurAHRefStart = GetSrcStream ().current_offset ();
1067 fCurAHRefText = tagText;
1068 fHiddenTextMode = true;
1069 fHiddenTextAccumulation = Led_tString{};
1070 }
1071 else {
1072 if (fCurAHRefStart != size_t (-1)) {
1073 string tagValue;
1074 (void)ParseHTMLTagArgOut (fCurAHRefText, "href", &tagValue);
1075#if qStroika_Frameworks_Led_SupportGDI
1076 EmbeddedObjectCreatorRegistry::Assoc assoc;
1077 if (EmbeddedObjectCreatorRegistry::Get ().Lookup (StandardURLStyleMarker::kEmbeddingTag, &assoc)) {
1078 AssertNotNull (assoc.fReadFromMemory);
1079 Led_URLD urld =
1080 Led_URLD{tagValue.c_str (),
1081 CodeCvt<Led_tChar>{WellKnownCodePages::kANSI}.String2Bytes<string> (span{fHiddenTextAccumulation}).c_str ()};
1082 GetSinkStream ().AppendEmbedding (
1083 (assoc.fReadFromMemory) (StandardURLStyleMarker::kEmbeddingTag, urld.PeekAtURLD (), urld.GetURLDLength ()));
1084 }
1085#endif
1086 }
1087 fCurAHRefStart = size_t (-1);
1088 fCurAHRefText = string{};
1089 fHiddenTextMode = false;
1090 fHiddenTextAccumulation = Led_tString{};
1091 fLastCharSpace = false;
1092 }
1093#else
1094 HandleHTMLThingyTagUnknown (start, text, nBytes);
1095#endif
1096}
1097
1098void StyledTextIOReader_HTML::HandleHTMLThingyTag_b (bool start, const char* /*text*/, size_t /*nBytes*/)
1099{
1100 BasicFontStackOperation (start);
1101 if (start) {
1102 FontSpecification fsp = fFontStack.back ();
1103 fsp.SetStyle_Bold (true);
1104 fFontStack.back () = fsp;
1105 }
1106}
1107
1108void StyledTextIOReader_HTML::HandleHTMLThingyTag_basefont (bool start, const char* text, size_t nBytes)
1109{
1110 if (start) {
1111 // we don't bother restoring old value for this on a close. Should we keep a stack??? - LGP 961014
1112 string tagText (text, nBytes);
1113 string tagValue;
1114 if (ParseHTMLTagArgOut (tagText, "size", &tagValue)) {
1115 if (tagValue.length () > 0) {
1116 int tagNum = Led_DigitCharToNumber (tagValue[0]);
1117 if (tagNum >= 1 and tagNum <= 7) {
1118 fHTMLBaseFontSize = tagNum;
1119 }
1120 }
1121 }
1122 }
1123}
1124
1125void StyledTextIOReader_HTML::HandleHTMLThingyTag_body (bool start, const char* text, size_t nBytes)
1126{
1127 if (start and fSaveHTMLInfoInto != nullptr) {
1128 fSaveHTMLInfoInto->fStartBodyTag = string{text, nBytes};
1129 }
1130 fReadingBody = true;
1131 if (start) {
1132 GrabAndApplyCSSStyleFromTagText (text, nBytes);
1133 }
1134}
1135
1136void StyledTextIOReader_HTML::HandleHTMLThingyTag_big (bool start, const char* /*text*/, size_t /*nBytes*/)
1137{
1138 BasicFontStackOperation (start);
1139 if (start) {
1140 SetHTMLFontSize (fHTMLFontSize + 1);
1141 }
1142}
1143
1144void StyledTextIOReader_HTML::HandleHTMLThingyTag_blockquote (bool start, const char* text, size_t nBytes)
1145{
1146 // NB: This ignores the other attributes, like centering, etc...
1147
1148 // Probably should add a routine to only force a break UNLESS we just emitted a NL. This is really sloppy.
1149 EmitForcedLineBreak ();
1150
1151 // by default, handle this same as 'tt' - or 'teletype'
1152 StyledTextIOReader_HTML::HandleHTMLThingyTag_tt (start, text, nBytes);
1153
1154 // ALSO, most handle NEWLINES specially in this context. Note this won't work if tags are nested, which
1155 // I doubt they are very often (and I doubt its legal). But maybe should add context stack?
1156 fNormalizeInputWhitespace = not start;
1157}
1158
1159void StyledTextIOReader_HTML::HandleHTMLThingyTag_br (bool start, const char* /*text*/, size_t /*nBytes*/)
1160{
1161 // This tag should only HAVE a start version (no </br>).
1162 if (start) {
1163 if (fComingTextIsTitle or fHiddenTextMode) {
1164 // IGNORE
1165 }
1166 else {
1167 GetSinkStream ().AppendSoftLineBreak ();
1168 }
1169 }
1170}
1171
1172void StyledTextIOReader_HTML::HandleHTMLThingyTag_code (bool start, const char* text, size_t nBytes)
1173{
1174 EndParaIfOpen ();
1175 // by default, handle this same as 'tt' - or 'teletype'
1176 StyledTextIOReader_HTML::HandleHTMLThingyTag_tt (start, text, nBytes);
1177}
1178
1179void StyledTextIOReader_HTML::HandleHTMLThingyTag_comment (bool start, const char* text, size_t nBytes)
1180{
1181 HandleHTMLThingyTagUnknown (start, text, nBytes);
1182
1183 // ALSO, should handle NEWLINES in this context. Note this won't work if tags are nested, which
1184 // I doubt they are very often (and I doubt its legal). But maybe should add context stack?
1185 fNormalizeInputWhitespace = not start;
1186}
1187
1188void StyledTextIOReader_HTML::HandleHTMLThingyTag_dir (bool start, const char* text, size_t nBytes)
1189{
1190 EndParaIfOpen ();
1191 // According to HTML 4.0 spec, these are now deprecated, but should be treated just as a UL, when encountered.
1192 HandleHTMLThingyTag_ul (start, text, nBytes);
1193}
1194
1195void StyledTextIOReader_HTML::HandleHTMLThingyTag_div (bool start, const char* text, size_t nBytes)
1196{
1197 EndParaIfOpen ();
1198 BasicFontStackOperation (start);
1199 if (start) {
1200 GrabAndApplyCSSStyleFromTagText (text, nBytes);
1201 string tagText (text, nBytes);
1202
1203 // SOON FIX THIS !@!!!
1204 // Figure out what justification to set and turn it on
1205 if (tagText.find ("left") != -1) {
1206 GetSinkStream ().SetJustification (eLeftJustify);
1207 }
1208 else if (tagText.find ("center") != -1) {
1209 GetSinkStream ().SetJustification (eCenterJustify);
1210 }
1211 else if (tagText.find ("right") != -1) {
1212 GetSinkStream ().SetJustification (eRightJustify);
1213 }
1214 }
1215 else {
1216 EmitForcedLineBreak (); // So we have at least ONE LINE above to apply the above justication to - else we can easily end up with NOTHING having the above justification (SPR#0707)
1217 // Set left justification by default. Probably should be maintaining a STACK of justifications -
1218 // and restoring it here. But this will frequently be good enough. And good enuf for now til I rewrite
1219 // the entire HTML reader...
1220 GetSinkStream ().SetJustification (eLeftJustify);
1221 }
1222}
1223
1224void StyledTextIOReader_HTML::HandleHTMLThingyTag_em (bool start, const char* text, size_t nBytes)
1225{
1226 // by default, handle this same as italics
1227 StyledTextIOReader_HTML::HandleHTMLThingyTag_i (start, text, nBytes);
1228}
1229
1230void StyledTextIOReader_HTML::HandleHTMLThingyTag_font (bool start, const char* text, size_t nBytes)
1231{
1232 BasicFontStackOperation (start);
1233 if (start) {
1234 // try to parse out SIZE= field and/or FACE=
1235 string tagText (text, nBytes);
1236 string tagValue;
1237 if (ParseHTMLTagArgOut (tagText, "face", &tagValue)) {
1238 FontSpecification fsp = fFontStack.back ();
1239 if (tagValue.find (',') != string::npos) {
1240 tagValue = tagValue.substr (0, tagValue.find (','));
1241 }
1242 fsp.SetFontName (String::FromNarrowSDKString (tagValue).AsSDKString ());
1243 fFontStack.back () = fsp;
1244 }
1245 if (ParseHTMLTagArgOut (tagText, "color", &tagValue)) {
1246 Color newColor = Color::kBlack;
1247 if (ParseColorString_ (tagValue, &newColor)) {
1248 FontSpecification fsp = fFontStack.back ();
1249 fsp.SetTextColor (newColor);
1250 fFontStack.back () = fsp;
1251 }
1252 }
1253 if (ParseHTMLTagArgOut (tagText, "size", &tagValue)) {
1254 if (tagValue.length () > 0) {
1255 switch (tagValue[0]) {
1256 case '-': {
1257 if (tagValue.length () >= 2) {
1258 int tagNum = tagValue[1] - '0';
1259 if (tagNum >= -7 and tagNum <= 7) {
1260 SetHTMLFontSize (fHTMLBaseFontSize - tagNum);
1261 }
1262 }
1263 } break;
1264 case '+': {
1265 if (tagValue.length () >= 2) {
1266 int tagNum = tagValue[1] - '0';
1267 if (tagNum >= -7 and tagNum <= 7) {
1268 SetHTMLFontSize (fHTMLBaseFontSize + tagNum);
1269 }
1270 }
1271 } break;
1272 default: {
1273 if (isdigit (tagValue[0])) {
1274 int tagNum = Led_DigitCharToNumber (tagValue[0]);
1275 if (tagNum >= 1 and tagNum <= 7) {
1276 SetHTMLFontSize (tagNum);
1277 }
1278 }
1279 } break;
1280 }
1281 }
1282 }
1283 }
1284}
1285
1286void StyledTextIOReader_HTML::HandleHTMLThingyTag_head (bool start, const char* text, size_t nBytes)
1287{
1288 if (start and fSaveHTMLInfoInto != nullptr) {
1289 fSaveHTMLInfoInto->fHeadTag = string{text, nBytes};
1290 }
1291}
1292
1293void StyledTextIOReader_HTML::HandleHTMLThingyTag_html (bool start, const char* text, size_t nBytes)
1294{
1295 if (start and fSaveHTMLInfoInto != nullptr) {
1296 fSaveHTMLInfoInto->fHTMLTag = string{text, nBytes};
1297 }
1298}
1299
1300void StyledTextIOReader_HTML::HandleHTMLThingyTag_hr ([[maybe_unused]] bool start, [[maybe_unused]] const char* text, [[maybe_unused]] size_t nBytes)
1301{
1302 EndParaIfOpen ();
1303// emit it unchanged (since we don't support these), but also emit a line-break as well so
1304// looks more readable
1305#if qThrowAwayMostUnknownHTMLTags
1306 const Led_tChar kSeparator[] = LED_TCHAR_OF ("----------------------------------------------------------------");
1307 EmitText (kSeparator, Led_tStrlen (kSeparator));
1308#else
1309 HandleHTMLThingyTagUnknown (start, text, nBytes);
1310#endif
1311 EmitForcedLineBreak ();
1312}
1313
1314void StyledTextIOReader_HTML::HandleHTMLThingyTag_hN (bool start, const char* text, size_t nBytes)
1315{
1316 EndParaIfOpen ();
1317 if (start) {
1318 EmitForcedLineBreak ();
1319 }
1320#if qThrowAwayMostUnknownHTMLTags
1321 BasicFontStackOperation (start);
1322 if (start) {
1323 FontSpecification fsp = fFontStack.back ();
1324 fsp.SetStyle_Bold (true);
1325 fFontStack.back () = fsp;
1326 if (nBytes > 3 and isdigit (text[2])) {
1327 int headingLevel = Led_DigitCharToNumber (text[2]);
1328 switch (headingLevel) {
1329 case 1:
1330 SetHTMLFontSize (7);
1331 break;
1332 case 2:
1333 SetHTMLFontSize (6);
1334 break;
1335 case 3:
1336 SetHTMLFontSize (5);
1337 break;
1338 case 4:
1339 SetHTMLFontSize (4);
1340 break;
1341 case 5:
1342 SetHTMLFontSize (3);
1343 break;
1344 default: {
1345 // ignored
1346 }
1347 }
1348 }
1349 }
1350#endif
1351 if (start) {
1352 GrabAndApplyCSSStyleFromTagText (text, nBytes);
1353 }
1354 // emit it unchanged (since we don't support these), but also emit a line-break as well so
1355 // looks more readable
1356 HandleHTMLThingyTagUnknown (start, text, nBytes);
1357 if (not start) {
1358 EmitForcedLineBreak ();
1359 }
1360}
1361
1362void StyledTextIOReader_HTML::HandleHTMLThingyTag_i (bool start, const char* /*text*/, size_t /*nBytes*/)
1363{
1364 BasicFontStackOperation (start);
1365 if (start) {
1366 FontSpecification fsp = fFontStack.back ();
1367 fsp.SetStyle_Italic (true);
1368 fFontStack.back () = fsp;
1369 }
1370}
1371
1372void StyledTextIOReader_HTML::HandleHTMLThingyTag_img (bool start, const char* text, size_t nBytes)
1373{
1374 HandleHTMLThingyTagUnknown (start, text, nBytes);
1375}
1376
1377void StyledTextIOReader_HTML::HandleHTMLThingyTag_li (bool start, const char* text, size_t nBytes)
1378{
1379 EndParaIfOpen ();
1380 BasicFontStackOperation (start);
1381 if (start) {
1382 fLastCharSpace = true;
1383 GrabAndApplyCSSStyleFromTagText (text, nBytes);
1384 }
1385 else {
1386 EmitForcedLineBreak ();
1387 }
1388 fLIOpen = start;
1389}
1390
1391void StyledTextIOReader_HTML::HandleHTMLThingyTag_listing (bool start, const char* text, size_t nBytes)
1392{
1393 // by default, handle this same as 'tt' - or 'teletype'
1394 EndParaIfOpen ();
1395 StyledTextIOReader_HTML::HandleHTMLThingyTag_tt (start, text, nBytes);
1396}
1397
1398void StyledTextIOReader_HTML::HandleHTMLThingyTag_ol (bool start, const char* text, size_t nBytes)
1399{
1400 EndParaIfOpen ();
1401 // emit it unchanged (since we don't support these), but also emit a line-break as well so
1402 // looks more readable
1403 HandleHTMLThingyTagUnknown (start, text, nBytes);
1404 EmitForcedLineBreak ();
1405}
1406
1407void StyledTextIOReader_HTML::HandleHTMLThingyTag_p (bool start, const char* text, size_t nBytes)
1408{
1409 if (start) {
1410 StartPara ();
1411 if (start) {
1412 GrabAndApplyCSSStyleFromTagText (text, nBytes);
1413 }
1414 if (nBytes > 5) {
1415 string tagTxt (text, nBytes);
1416 // Figure out what justification to set and turn it on
1417 if (tagTxt.find ("left") != -1) {
1418 GetSinkStream ().SetJustification (eLeftJustify);
1419 }
1420 else if (tagTxt.find ("center") != -1) {
1421 GetSinkStream ().SetJustification (eCenterJustify);
1422 }
1423 else if (tagTxt.find ("right") != -1) {
1424 GetSinkStream ().SetJustification (eRightJustify);
1425 }
1426 else {
1427 // Unclear if I should treat this as inherit from someplace - or just a specification of LEFT? Docs seem to indicate left (HTML DOCS)
1428 GetSinkStream ().SetJustification (eLeftJustify);
1429 }
1430 }
1431 }
1432 else {
1433 EndParaIfOpen ();
1434 }
1435}
1436
1437void StyledTextIOReader_HTML::HandleHTMLThingyTag_plaintext (bool start, const char* text, size_t nBytes)
1438{
1439 EndParaIfOpen ();
1440 // by default, handle this same as 'tt' - or 'teletype'
1441 StyledTextIOReader_HTML::HandleHTMLThingyTag_tt (start, text, nBytes);
1442
1443 // ALSO, most handle NEWLINES specially in this context. Note this won't work if <pre> tags are nested, which
1444 // I doubt they are very often (and I doubt its legal). But maybe should add context stack?
1445 fNormalizeInputWhitespace = not start;
1446
1447 // ALSO should disable other tag processing, but this ISNT implemented here... LGP 961003
1448}
1449
1450void StyledTextIOReader_HTML::HandleHTMLThingyTag_pre (bool start, const char* text, size_t nBytes)
1451{
1452 EndParaIfOpen ();
1453 // by default, handle this same as 'tt' - or 'teletype'
1454 StyledTextIOReader_HTML::HandleHTMLThingyTag_tt (start, text, nBytes);
1455
1456 // ALSO, most handle NEWLINES specially in this context. Note this won't work if <pre> tags are nested, which
1457 // I doubt they are very often (and I doubt its legal). But maybe should add context stack?
1458 fNormalizeInputWhitespace = not start;
1459}
1460
1461void StyledTextIOReader_HTML::HandleHTMLThingyTag_s (bool start, const char* text, size_t nBytes)
1462{
1463 HandleHTMLThingyTag_strike (start, text, nBytes);
1464}
1465
1466void StyledTextIOReader_HTML::HandleHTMLThingyTag_samp (bool start, const char* /*text*/, size_t /*nBytes*/)
1467{
1468 BasicFontStackOperation (start);
1469 if (start) {
1470 SetHTMLFontSize (2);
1471 }
1472}
1473
1474void StyledTextIOReader_HTML::HandleHTMLThingyTag_small (bool start, const char* /*text*/, size_t /*nBytes*/)
1475{
1476 BasicFontStackOperation (start);
1477 if (start) {
1478 SetHTMLFontSize (fHTMLFontSize - 1);
1479 }
1480}
1481
1482void StyledTextIOReader_HTML::HandleHTMLThingyTag_span (bool start, const char* text, size_t nBytes)
1483{
1484 BasicFontStackOperation (start);
1485 if (start) {
1486 GrabAndApplyCSSStyleFromTagText (text, nBytes);
1487 }
1488}
1489
1490void StyledTextIOReader_HTML::HandleHTMLThingyTag_strike ([[maybe_unused]] bool start, [[maybe_unused]] const char* text, [[maybe_unused]] size_t nBytes)
1491{
1492#if qStroika_Foundation_Common_Platform_Windows
1493 BasicFontStackOperation (start);
1494 if (start) {
1495 FontSpecification fsp = fFontStack.back ();
1496 fsp.SetStyle_Strikeout (true);
1497 fFontStack.back () = fsp;
1498 }
1499#endif
1500}
1501
1502void StyledTextIOReader_HTML::HandleHTMLThingyTag_strong (bool start, const char* text, size_t nBytes)
1503{
1504 // by default, handle this same as 'b' - BOLD
1505 StyledTextIOReader_HTML::HandleHTMLThingyTag_b (start, text, nBytes);
1506}
1507
1508void StyledTextIOReader_HTML::HandleHTMLThingyTag_sub (bool start, const char* /*text*/, size_t /*nBytes*/)
1509{
1510 BasicFontStackOperation (start);
1511 if (start) {
1512 FontSpecification fsp = fFontStack.back ();
1513 fsp.SetStyle_SubOrSuperScript (FontSpecification::eSubscript);
1514 fFontStack.back () = fsp;
1515 }
1516}
1517
1518void StyledTextIOReader_HTML::HandleHTMLThingyTag_sup (bool start, const char* /*text*/, size_t /*nBytes*/)
1519{
1520 BasicFontStackOperation (start);
1521 if (start) {
1522 FontSpecification fsp = fFontStack.back ();
1523 fsp.SetStyle_SubOrSuperScript (FontSpecification::eSuperscript);
1524 fFontStack.back () = fsp;
1525 }
1526}
1527
1528void StyledTextIOReader_HTML::HandleHTMLThingyTag_table (bool start, const char* /*text*/, size_t /*nBytes*/)
1529{
1530 GetSinkStream ().Flush ();
1531 fLastCharSpace = true;
1532 /*
1533 * MINIMAL nested table support.
1534 *
1535 * Since in order for tables to be nested, you must start a table while already inside a table cell,
1536 * we can avoid keeping track of some information on a stack (cheap trick).
1537 *
1538 * If we are starting a table, (even if nested) - we set the row/cell flags to FALSE. Then - any time we
1539 * end a table (with count>1), we set them to TRUE (since to have been a nested table, it must have been
1540 * contained in some open table row/cell.
1541 */
1542 if (start) {
1543 GetSinkStream ().StartTable ();
1544 ++fTableOpenCount;
1545 fTableRowOpen = false;
1546 fTableCellOpen = false;
1547 }
1548 else {
1549 if (fTableOpenCount > 0) {
1550 if (fTableCellOpen) {
1551 // must implicitly close previous cell
1552 GetSinkStream ().EndTableCell ();
1553 fTableCellOpen = false;
1554 BasicFontStackOperation (fTableCellOpen);
1555 }
1556 if (fTableRowOpen) {
1557 // must implicitly close previous row
1558 GetSinkStream ().EndTableRow ();
1559 fTableRowOpen = false;
1560 }
1561 GetSinkStream ().EndTable ();
1562 fTableOpenCount--;
1563 // if tableRowCount is now > 0 - we must have ended table inside an existing cell. If == 0, doesn't matter
1564 // as values are meaningless.
1565 fTableRowOpen = true;
1566 fTableCellOpen = true;
1567 }
1568 }
1569}
1570
1571void StyledTextIOReader_HTML::HandleHTMLThingyTag_td (bool start, const char* text, size_t nBytes)
1572{
1573 // Ignore td calls if not in a table or row.
1574 if (fTableOpenCount > 0 and fTableRowOpen) {
1575 GetSinkStream ().Flush ();
1576 if (start) {
1577 if (fTableCellOpen) {
1578 // must implicitly close previous cell
1579 GetSinkStream ().EndTableCell ();
1580 fTableCellOpen = false;
1581 BasicFontStackOperation (fTableCellOpen);
1582 }
1583 GetSinkStream ().StartTableCell (1);
1584 fTableCellOpen = true;
1585 BasicFontStackOperation (fTableCellOpen);
1586 GrabAndApplyCSSStyleFromTagText (text, nBytes);
1587 }
1588 else {
1589 // simply ignore a close cell command if we're not already in a table cell.
1590 if (fTableCellOpen) {
1591 GetSinkStream ().EndTableCell ();
1592 fTableCellOpen = false;
1593 BasicFontStackOperation (fTableCellOpen);
1594 }
1595 }
1596 }
1597}
1598
1599void StyledTextIOReader_HTML::HandleHTMLThingyTag_th (bool start, const char* text, size_t nBytes)
1600{
1601 // Treat a th as identical to a td element (maybe later force text to bold too)
1602 HandleHTMLThingyTag_td (start, text, nBytes);
1603 if (start) {
1604 fFontStack.back ().SetStyle_Bold (true);
1605 GrabAndApplyCSSStyleFromTagText (text, nBytes);
1606 }
1607}
1608
1609void StyledTextIOReader_HTML::HandleHTMLThingyTag_title (bool start, const char* /*text*/, size_t /*nBytes*/)
1610{
1611 fComingTextIsTitle = start;
1612}
1613
1614void StyledTextIOReader_HTML::HandleHTMLThingyTag_tr (bool start, const char* /*text*/, size_t /*nBytes*/)
1615{
1616 // Ignore (largely) tr calls if not in a table.
1617 if (fTableOpenCount > 0) {
1618 GetSinkStream ().Flush ();
1619
1620 if (fTableCellOpen) {
1621 // must implicitly close previous cell
1622 GetSinkStream ().EndTableCell ();
1623 fTableCellOpen = false;
1624 BasicFontStackOperation (fTableCellOpen);
1625 }
1626 if (fTableRowOpen) {
1627 // must implicitly close previous row
1628 GetSinkStream ().EndTableRow ();
1629 fTableRowOpen = false;
1630 }
1631
1632 if (start) {
1633 GetSinkStream ().StartTableRow ();
1634 fTableRowOpen = true;
1635 }
1636 }
1637 else {
1638 // slight hack --- probably best we can do for ill-formed input HTML
1639 EndParaIfOpen ();
1640 }
1641}
1642
1643void StyledTextIOReader_HTML::HandleHTMLThingyTag_tt (bool start, const char* text, size_t nBytes)
1644{
1645 BasicFontStackOperation (start);
1646 if (start) {
1647 FontSpecification fsp = fFontStack.back ();
1648 fsp.SetFontName (Led_SDK_TCHAROF ("courier")); // will this work on mac and PC? - want teletype, monospace font here! - LGP 961003
1649 fFontStack.back () = fsp;
1650 GrabAndApplyCSSStyleFromTagText (text, nBytes);
1651 }
1652}
1653
1654void StyledTextIOReader_HTML::HandleHTMLThingyTag_u (bool start, const char* /*text*/, size_t /*nBytes*/)
1655{
1656 BasicFontStackOperation (start);
1657 if (start) {
1658 FontSpecification fsp = fFontStack.back ();
1659 fsp.SetStyle_Underline (true);
1660 fFontStack.back () = fsp;
1661 }
1662}
1663
1664void StyledTextIOReader_HTML::HandleHTMLThingyTag_ul (bool start, const char* text, size_t nBytes)
1665{
1666 EndParaIfOpen ();
1667 if (start) {
1668 ++fULNestingCount;
1669 EmitForcedLineBreak ();
1670 ListStyle listStyle = eListStyle_Bullet; // should get style from param to UL - as in 'type=disc'
1671 GetSinkStream ().SetListStyle (listStyle);
1672 GrabAndApplyCSSStyleFromTagText (text, nBytes);
1673 }
1674 else {
1675 --fULNestingCount;
1676 if (fULNestingCount == 0) {
1677 EmitForcedLineBreak ();
1678 ListStyle listStyle = eListStyle_None; // should get style from param to saved-stack...
1679 GetSinkStream ().SetListStyle (listStyle);
1680 }
1681 }
1682}
1683
1684void StyledTextIOReader_HTML::HandleHTMLThingyTag_var (bool start, const char* text, size_t nBytes)
1685{
1686 // by default, handle this same as 'tt' - or 'teletype'
1687 StyledTextIOReader_HTML::HandleHTMLThingyTag_tt (start, text, nBytes);
1688}
1689
1690void StyledTextIOReader_HTML::HandleHTMLThingyTag_xmp (bool start, const char* text, size_t nBytes)
1691{
1692 // by default, handle this same as 'tt' - or 'teletype'
1693 StyledTextIOReader_HTML::HandleHTMLThingyTag_tt (start, text, nBytes);
1694}
1695
1696void StyledTextIOReader_HTML::HandleHTMLThingyTagUnknown ([[maybe_unused]] bool start, [[maybe_unused]] const char* text, [[maybe_unused]] size_t nBytes)
1697{
1698#if !qThrowAwayMostUnknownHTMLTags
1699 EmitText (text, nBytes);
1700#endif
1701}
1702
1703void StyledTextIOReader_HTML::EmitForcedLineBreak ()
1704{
1705 EmitText (LED_TCHAR_OF ("\n"), 1, true);
1706}
1707
1708void StyledTextIOReader_HTML::BasicFontStackOperation (bool start)
1709{
1710 if (start) {
1711 fFontStack.push_back (fFontStack.back ());
1712 }
1713 else {
1714 // Assume end-font-setting-tag matches earlier start-font-setting-tag. This is RIGHT for valid HTML files, and
1715 // I'm not that worried about properly dealing with bogus files
1716 if (fFontStack.size () > 1) {
1717 fFontStack.pop_back ();
1718 }
1719 }
1720}
1721
1722bool StyledTextIOReader_HTML::ParseHTMLTagArgOut (const string& tagText, const string& attrName, string* attrValue)
1723{
1724 /*
1725 * Really primitive, half-assed implementation. But good enuf for my current usage.
1726 * --LGP 961014
1727 * Slighyly improved as part of SPR#1504 and SPR#0574 and SPR#1505.
1728 */
1729 size_t len = tagText.length ();
1730 size_t i = 0;
1731 string tagName;
1732
1733 if (i == 0 and tagText[i] == '<') {
1734 ++i;
1735 } // SKIP leading <
1736 while (i < len and isspace (tagText[i])) {
1737 ++i;
1738 } // SKIP WHITESPACE
1739
1740 // grab tag name
1741 {
1742 size_t startOfArgName = i;
1743 while (isalpha (tagText[i])) {
1744 i++;
1745 if (i >= len) {
1746 return false;
1747 }
1748 }
1749 tagName = string{tagText.c_str () + startOfArgName, (i - startOfArgName)};
1750 }
1751
1752 // Check each attr
1753 for (; i < len; i++) {
1754 while (i < len and isspace (tagText[i])) {
1755 ++i;
1756 } // SKIP WHITESPACE
1757
1758 size_t startOfArgName = i;
1759 while (isalpha (tagText[i])) {
1760 i++;
1761 if (i >= len) {
1762 return false;
1763 }
1764 }
1765 string argName (tagText.c_str () + startOfArgName, (i - startOfArgName));
1766
1767 while (i < len and isspace (tagText[i])) {
1768 ++i;
1769 } // SKIP WHITESPACE
1770
1771 // Look for '=' in a=b (or a='b')
1772 if (i < len and tagText[i] == '=') {
1773 i++;
1774 }
1775 else {
1776 return false;
1777 }
1778
1779 while (i < len and isspace (tagText[i])) {
1780 ++i;
1781 } // SKIP WHITESPACE
1782
1783 bool gotStartQuote = false;
1784 char startQuote = '\0';
1785 if (i < len) {
1786 if (tagText[i] == '\'' or tagText[i] == '\"') {
1787 gotStartQuote = true;
1788 startQuote = tagText[i];
1789 ++i;
1790 }
1791 }
1792
1793 size_t startOfArgValue = i;
1794 while ((not isspace (tagText[i]) or gotStartQuote) and tagText[i] != startQuote and tagText[i] != '>') {
1795 i++;
1796 if (i >= len) {
1797 break;
1798 }
1799 }
1800 string argValue (tagText.c_str () + startOfArgValue, (i - startOfArgValue));
1801 if (Led_CasedStringsEqual (attrName, argName)) {
1802 *attrValue = argValue;
1803 return true;
1804 }
1805 }
1806
1807 return false;
1808}
1809
1810bool StyledTextIOReader_HTML::ParseCSSTagArgOut (const string& text, const string& attrName, string* attrValue)
1811{
1812 size_t len = text.length ();
1813
1814 for (size_t i = 0; i < len; i++) {
1815 while (i < len and (isspace (text[i]) or (text[i] == ';'))) {
1816 ++i;
1817 } // SKIP WHITESPACE (or semi-colon - attribute separator)
1818
1819 string argName;
1820 {
1821 size_t startOfArgName = i;
1822 while (isalpha (text[i]) or text[i] == '-') {
1823 i++;
1824 if (i >= len) {
1825 return false;
1826 }
1827 }
1828 argName = string{text.c_str () + startOfArgName, (i - startOfArgName)};
1829 }
1830
1831 while (i < len and isspace (text[i])) {
1832 ++i;
1833 } // SKIP WHITESPACE
1834
1835 // Look for ':' in a:b (or a:'b' etc)
1836 if (i < len and text[i] == ':') {
1837 i++;
1838 }
1839 else {
1840 return false;
1841 }
1842
1843 while (i < len and isspace (text[i])) {
1844 ++i;
1845 } // SKIP WHITESPACE
1846
1847 char startQuote = '\0';
1848 if (i < len) {
1849 if (text[i] == '\'' or text[i] == '\"') {
1850 startQuote = text[i];
1851 ++i;
1852 }
1853 }
1854
1855 {
1856 size_t startOfArgValue = i;
1857 while (text[i] != ';') {
1858 if (text[i] == startQuote) {
1859 break;
1860 }
1861 i++;
1862 if (i >= len) {
1863 break;
1864 }
1865 }
1866 string argValue (text.c_str () + startOfArgValue, (i - startOfArgValue));
1867 if (Led_CasedStringsEqual (attrName, argName)) {
1868 *attrValue = argValue;
1869 return true;
1870 }
1871 }
1872 }
1873 return false;
1874}
1875
1876void StyledTextIOReader_HTML::StartPara ()
1877{
1878 EndParaIfOpen ();
1879 Assert (not fInAPara);
1880 fInAPara = true;
1881}
1882
1883void StyledTextIOReader_HTML::EndParaIfOpen ()
1884{
1885 if (fInAPara) {
1886 EmitForcedLineBreak ();
1887 fInAPara = false;
1888 fLastCharSpace = true;
1889 GetSinkStream ().SetJustification (eLeftJustify); // unclear if this is needed or desirable?
1890 }
1891 Ensure (not fInAPara);
1892}
1893
1894void StyledTextIOReader_HTML::SetHTMLFontSize (int to)
1895{
1896 fHTMLFontSize = min (to, 7);
1897 fHTMLFontSize = max (fHTMLFontSize, 1);
1898 FontSpecification fsp = fFontStack.back ();
1899 fsp.SetPointSize (HTMLInfo::HTMLFontSizeToRealFontSize (fHTMLFontSize));
1900 fFontStack.back () = fsp;
1901}
1902
1903/*
1904 ********************************************************************************
1905 ******************* StyledTextIOWriter_HTML::WriterContext *********************
1906 ********************************************************************************
1907 */
1908
1909size_t StyledTextIOWriter_HTML::WriterContext::GetCurSrcOffset () const
1910{
1911 return GetSrcStream ().current_offset ();
1912}
1913
1914#if qStroika_Frameworks_Led_SupportGDI
1915SimpleEmbeddedObjectStyleMarker* StyledTextIOWriter_HTML::WriterContext::GetCurSimpleEmbeddedObjectStyleMarker () const
1916{
1917 size_t offset = GetCurSrcOffset ();
1918 vector<SimpleEmbeddedObjectStyleMarker*> embeddingsList = GetSrcStream ().CollectAllEmbeddingMarkersInRange (offset - 1, offset);
1919 Assert (embeddingsList.size () <= 1); // cuz we gave a range of one, and can only have a single
1920 // embedding in one place. Allow for there to be NONE - if the user
1921 // wants to allow having NUL characters in his text for other reasons.
1922 if (embeddingsList.empty ()) {
1923 return nullptr;
1924 }
1925 else {
1926 return embeddingsList[0];
1927 }
1928}
1929#endif
1930
1931/*
1932 ********************************************************************************
1933 ************************** StyledTextIOWriter_HTML *****************************
1934 ********************************************************************************
1935 */
1936StyledTextIOWriter_HTML::StyledTextIOWriter_HTML (SrcStream* srcStream, SinkStream* sinkStream, const HTMLInfo* getHTMLInfoFrom)
1937 : StyledTextIOWriter (srcStream, sinkStream)
1938 , fGetHTMLInfoFrom (getHTMLInfoFrom)
1939 , fStyleRunSummary ()
1940 , fSoftLineBreakChar (srcStream->GetSoftLineBreakCharacter ())
1941{
1942}
1943
1944StyledTextIOWriter_HTML::~StyledTextIOWriter_HTML ()
1945{
1946}
1947
1948/*
1949@METHOD: StyledTextIOReader_HTML::GetEntityRefMapTable
1950@DESCRIPTION: <p>Return a vector of @'StyledTextIOReader_HTML::EntityRefMapEntry' to be used in reading in
1951 HTML text. These entity refs will be recognized in the input text, and mapped to the appropriate given character.</p>
1952 <p>NB: The return value is a REFERENCE to vector. The lifetime of that vector must be at least til the
1953 next call of this function, or til the death of this object. Typically, it is assumed, a static table will be used
1954 so the lifetime will be the end of the process.</p>
1955*/
1956const vector<StyledTextIOWriter_HTML::EntityRefMapEntry>& StyledTextIOWriter_HTML::GetEntityRefMapTable () const
1957{
1958 static bool sFirstTime = true;
1959 static vector<StyledTextIOReader_HTML::EntityRefMapEntry> sResult;
1960 if (sFirstTime) {
1961 sFirstTime = false;
1962 for (size_t i = 0; i < HTMLInfo::kDefaultEntityRefMapTable_Count; ++i) {
1963#if !qWriteOutMostHTMLEntitiesByName
1964 // Then skip all but a handfull of very important ones...
1965 if (HTMLInfo::sDefaultEntityRefMapTable[i].fEntityRefName != "amp" and HTMLInfo::sDefaultEntityRefMapTable[i].fEntityRefName != "gt" and
1966 HTMLInfo::sDefaultEntityRefMapTable[i].fEntityRefName != "lt" and HTMLInfo::sDefaultEntityRefMapTable[i].fEntityRefName != "quot"
1967
1968 ) {
1969 continue;
1970 }
1971#endif
1972 sResult.push_back (HTMLInfo::sDefaultEntityRefMapTable[i]);
1973 }
1974 }
1975 return sResult;
1976}
1977
1978void StyledTextIOWriter_HTML::Write ()
1979{
1980 WriterContext writerContext (*this);
1981 WriteHeader (writerContext);
1982 WriteBody (writerContext);
1983 WriteCloseTag (writerContext, "html");
1984 write ("\r\n");
1985}
1986
1987void StyledTextIOWriter_HTML::WriteHeader (WriterContext& writerContext)
1988{
1989 if (fGetHTMLInfoFrom != nullptr and fGetHTMLInfoFrom->fDocTypeTag.length () != 0) {
1990 write (fGetHTMLInfoFrom->fDocTypeTag);
1991 }
1992
1993 if (fGetHTMLInfoFrom == nullptr or fGetHTMLInfoFrom->fHTMLTag.length () == 0) {
1994 WriteOpenTag (writerContext, "html");
1995 }
1996 else {
1997 WriteOpenTagSpecial (writerContext, "html", fGetHTMLInfoFrom->fHTMLTag);
1998 }
1999 write ("\r\n");
2000
2001 if (fGetHTMLInfoFrom == nullptr or fGetHTMLInfoFrom->fHeadTag.length () == 0) {
2002 WriteOpenTag (writerContext, "head");
2003 }
2004 else {
2005 WriteOpenTagSpecial (writerContext, "head", fGetHTMLInfoFrom->fHeadTag);
2006 }
2007 write ("\r\n");
2008
2009 if (fGetHTMLInfoFrom != nullptr) {
2010 for (size_t i = 0; i < fGetHTMLInfoFrom->fUnknownHeaderTags.size (); i++) {
2011 write (fGetHTMLInfoFrom->fUnknownHeaderTags[i]);
2012 }
2013 }
2014
2015 if (fGetHTMLInfoFrom != nullptr and fGetHTMLInfoFrom->fTitle.length () != 0) {
2016 WriteOpenTag (writerContext, "title");
2017 write (MapOutputTextFromTString (fGetHTMLInfoFrom->fTitle));
2018 WriteCloseTag (writerContext, "title");
2019 write ("\r\n");
2020 }
2021
2022 WriteCloseTag (writerContext, "head");
2023 write ("\r\n");
2024}
2025
2026void StyledTextIOWriter_HTML::WriteBody (WriterContext& writerContext)
2027{
2028 AssureStyleRunSummaryBuilt (writerContext);
2029
2030 if (fGetHTMLInfoFrom == nullptr or fGetHTMLInfoFrom->fStartBodyTag.length () == 0) {
2031 WriteOpenTag (writerContext, "body");
2032 }
2033 else {
2034 WriteOpenTagSpecial (writerContext, "body", fGetHTMLInfoFrom->fStartBodyTag);
2035 }
2036 WriteInnerBody (writerContext);
2037 write ("\r\n");
2038 WriteCloseTag (writerContext, "body");
2039 write ("\r\n");
2040}
2041
2042void StyledTextIOWriter_HTML::WriteInnerBody (WriterContext& writerContext)
2043{
2044 AssureStyleRunSummaryBuilt (writerContext);
2045
2046 write ("\r\n");
2047
2048 /*
2049 * Walk through the characters, and output them one at a time. Walk
2050 * SIMULTANEOUSLY through the style run information, and output new controlling
2051 * tags on the fly.
2052 */
2053 writerContext.fLastEmittedISR = StyledInfoSummaryRecord (IncrementalFontSpecification (), 0);
2054 writerContext.fLastStyleChangeAt = 0;
2055 writerContext.fIthStyleRun = 0;
2056 Led_tChar c = '\0';
2057 while (writerContext.GetSrcStream ().readNTChars (&c, 1) != 0) {
2058 WriteBodyCharacter (writerContext, c);
2059 }
2060 write ("\r\n");
2061}
2062
2063void StyledTextIOWriter_HTML::WriteBodyCharacter (WriterContext& writerContext, Led_tChar c)
2064{
2065 ListStyle listStyle = eListStyle_None;
2066 unsigned char indentLevel = 0;
2067 writerContext.GetSrcStream ().GetListStyleInfo (&listStyle, &indentLevel);
2068
2069 bool justOpenedList = false;
2070 if (listStyle == eListStyle_None) {
2071 if (writerContext.fEmittingListItem) {
2072 WriteCloseTag (writerContext, "li");
2073 write ("\r\n");
2074 writerContext.fEmittingListItem = false;
2075 }
2076 if (writerContext.fEmittingList) {
2077 WriteCloseTag (writerContext, "ul");
2078 write ("\r\n");
2079 writerContext.fEmittingList = false;
2080 }
2081 }
2082 else {
2083 if (writerContext.fEmittedStartOfPara) {
2084 WriteCloseTag (writerContext, "p");
2085 write ("\r\n");
2086 writerContext.fEmittedStartOfPara = false;
2087 }
2088 if (not writerContext.fEmittingList) {
2089 WriteOpenTag (writerContext, "ul");
2090 write ("\r\n");
2091 writerContext.fEmittingList = true;
2092 }
2093 if (not writerContext.fEmittingListItem) {
2094 WriteOpenTag (writerContext, "li");
2095 writerContext.fEmittingListItem = true;
2096 justOpenedList = true;
2097 }
2098 }
2099
2100 if (not writerContext.fEmittedStartOfPara and (not writerContext.fEmittingListItem) and (not writerContext.fInTableCell)) {
2101 // No matter what - even if we're looking at \n - emit start of para.
2102 // Check src text justification...
2103 switch (writerContext.GetSrcStream ().GetJustification ()) {
2104 case eLeftJustify:
2105 WriteOpenTag (writerContext, "p", "align=\"left\"");
2106 break;
2107 case eCenterJustify:
2108 WriteOpenTag (writerContext, "p", "align=\"center\"");
2109 break;
2110 case eRightJustify:
2111 WriteOpenTag (writerContext, "p", "align=\"right\"");
2112 break;
2113 case eFullyJustify:
2114 WriteOpenTag (writerContext, "p", "align=\"justify\"");
2115 break;
2116 default:
2117 break; // ignore
2118 }
2119 writerContext.fEmittedStartOfPara = true;
2120 }
2121
2122 // -1 for PREV character, NOT 0
2123 if (writerContext.GetSrcStream ().current_offset () - 1 == 0) {
2124 EmitBodyFontInfoChange (writerContext, fStyleRunSummary[writerContext.fIthStyleRun], false);
2125 }
2126 if (writerContext.GetSrcStream ().current_offset () - 1 == writerContext.fLastStyleChangeAt + writerContext.fLastEmittedISR.fLength) {
2127 writerContext.fLastStyleChangeAt = writerContext.GetSrcStream ().current_offset () - 1;
2128 writerContext.fIthStyleRun++;
2129 }
2130
2131 // If the next char is a NL (in which case we will terminate the scope anyhow) - just do the close-tags - and skip the open ones...
2132 EmitBodyFontInfoChange (writerContext, fStyleRunSummary[writerContext.fIthStyleRun], c == '\n');
2133
2134 switch (c) {
2135 case '\n': {
2136 if (writerContext.fEmittingListItem) {
2137 if (not justOpenedList) {
2138 WriteCloseTag (writerContext, "li");
2139 writerContext.fEmittingListItem = false;
2140 write ("\r\n");
2141 }
2142 }
2143 else if (writerContext.fInTableCell) {
2144 WriteOpenCloseTag (writerContext, "br");
2145 write ("\r\n");
2146 }
2147 else if (writerContext.fEmittedStartOfPara) {
2148 WriteCloseTag (writerContext, "p");
2149 write ("\r\n");
2150 writerContext.fEmittedStartOfPara = false;
2151 }
2152 } break;
2153
2154#if qStroika_Frameworks_Led_SupportGDI
2155 case kEmbeddingSentinelChar: {
2156 unique_ptr<Table> table (writerContext.GetSrcStream ().GetTableAt (writerContext.GetCurSrcOffset () - 1));
2157 if (table.get () != nullptr) {
2158 WriteTable (writerContext, table.get ());
2159 [[maybe_unused]] size_t x = table->GetOffsetEnd ();
2160 Assert (x == 1);
2161 break;
2162 }
2163
2164 // write now we cannot write any embeddings out in HTML files - so skip the character
2165 SimpleEmbeddedObjectStyleMarker* embedding = writerContext.GetCurSimpleEmbeddedObjectStyleMarker ();
2166 if (StandardURLStyleMarker* aHRef = dynamic_cast<StandardURLStyleMarker*> (embedding)) {
2167 Led_URLD urld = aHRef->GetURLData ();
2168 WriteOpenTag (writerContext, "a", "href=\"" + urld.GetURL () + "\"");
2169 write (urld.GetTitle ().c_str ());
2170 WriteCloseTag (writerContext, "a");
2171 }
2172 } break;
2173#endif
2174
2175 default: {
2176 if (c == fSoftLineBreakChar) {
2177 WriteOpenCloseTag (writerContext, "br");
2178 break;
2179 }
2180 wchar_t unicodeC = c;
2181 /*
2182 * Check if the char should be written as an entity-ref, and otherwise simply emit it.
2183 */
2184 const vector<StyledTextIOReader_HTML::EntityRefMapEntry>& entityRefs = GetEntityRefMapTable ();
2185 vector<EntityRefMapEntry>::const_iterator i = entityRefs.begin ();
2186 for (; i != entityRefs.end (); ++i) {
2187 if (unicodeC == (*i).fCharValue) {
2188 write ('&');
2189 write ((*i).fEntityRefName);
2190 write (';');
2191 break;
2192 }
2193 }
2194 if (i == entityRefs.end ()) {
2195 write (MapOutputTextFromWString (wstring{&unicodeC, 1}));
2196 }
2197 } break;
2198 }
2199
2200 if (IsASCIISpace_ (c) and writerContext.GetSrcStream ().current_offset () - writerContext.fLastForcedNLAt > 80) {
2201 write ("\r\n"); // should write a newline every once in a while...
2202 writerContext.fLastForcedNLAt = writerContext.GetSrcStream ().current_offset ();
2203 }
2204}
2205
2206/*
2207@METHOD: StyledTextIOWriter_HTML::WriteTable
2208@DESCRIPTION: <p>Write the contents of a table object. This creates a nested
2209 @'StyledTextIOWriter_HTML::WriterContext' object for each cell, and calls
2210 @'StyledTextIOWriter_HTML::WriteInnerBody' to write each cells contents.
2211 </p>
2212*/
2213void StyledTextIOWriter_HTML::WriteTable (WriterContext& writerContext, Table* table)
2214{
2215 RequireNotNull (table);
2216 write ("\r\n");
2217
2218 using CellInfo = Table::CellInfo;
2219
2220 WriteOpenTag (writerContext, "table");
2221 write ("\r\n");
2222 size_t rows = table->GetRows ();
2223 for (size_t r = 0; r < rows; ++r) {
2224 WriteOpenTag (writerContext, "tr");
2225 vector<CellInfo> cellInfos;
2226 table->GetRowInfo (r, &cellInfos);
2227 size_t columns = cellInfos.size ();
2228 for (size_t c = 0; c < columns; c++) {
2229 WriteOpenTag (writerContext, "td");
2230 unique_ptr<SrcStream> srcStream = unique_ptr<SrcStream> (table->MakeCellSubSrcStream (r, c));
2231 if (srcStream.get () != nullptr) {
2232 WriterContext wc (writerContext, *srcStream.get ());
2233 vector<StyledInfoSummaryRecord> x = fStyleRunSummary;
2234 fStyleRunSummary.clear ();
2235 AssureStyleRunSummaryBuilt (wc);
2236 WriteInnerBody (wc);
2237 {
2238 // close all tags on the tmp tag stack
2239 for (vector<string>::reverse_iterator i = wc.fTagStack.rbegin (); i != wc.fTagStack.rend (); ++i) {
2240 string thisTag = *i;
2241 write ("</" + thisTag + ">");
2242 }
2243 }
2244 fStyleRunSummary = x;
2245 }
2246 WriteCloseTag (writerContext, "td");
2247 }
2248 WriteCloseTag (writerContext, "tr");
2249 write ("\r\n");
2250 }
2251 WriteCloseTag (writerContext, "table");
2252 write ("\r\n");
2253}
2254
2255/*
2256@METHOD: StyledTextIOWriter_HTML::WriteOpenTag
2257@DESCRIPTION: <p>Write the given tag (given by name - eg. "b" for bold) to the output. Save it in the tagstack
2258 so that later closing tags will be implicitly closed.</p>
2259 <p>See also @'StyledTextIOWriter_HTML::WriteCloseTag'.</p>
2260*/
2261void StyledTextIOWriter_HTML::WriteOpenTag (WriterContext& writerContext, const string& tagName, const string& tagExtras)
2262{
2263 Require (not tagName.empty ());
2264 Require (tagName[0] != '<'); // just the name - not the surrounding brackets...
2265 Require (tagName[0] != '/');
2266 writerContext.fTagStack.push_back (tagName);
2267 write ("<" + tagName);
2268 string te = tagExtras;
2269 if (not te.empty () and te[0] != ' ') {
2270 te = " " + te;
2271 }
2272 write (te.c_str ());
2273 write (">");
2274}
2275
2276/*
2277@METHOD: StyledTextIOWriter_HTML::WriteOpenTagSpecial
2278@DESCRIPTION: <p>Like @'StyledTextIOWriter_HTML::WriteOpenTag'.- but use the tiven argument text as what we write out.
2279 Helper in case we saved exact text from input document.</p>
2280 <p>See also @'StyledTextIOWriter_HTML::WriteOpenTag'.</p>
2281*/
2282void StyledTextIOWriter_HTML::WriteOpenTagSpecial (WriterContext& writerContext, const string& tagName, const string& tagFullText)
2283{
2284 Require (not tagName.empty ());
2285 Require (tagName[0] != '<'); // just the name - not the surrounding brackets...
2286 Require (tagName[0] != '/');
2287 writerContext.fTagStack.push_back (tagName);
2288 write (tagFullText);
2289}
2290
2291/*
2292@METHOD: StyledTextIOWriter_HTML::WriteCloseTag
2293@DESCRIPTION: <p>Closes the given tag and removes it from the tag stack. First removes any implicity closed
2294 tags from the tag stack (and emits their closing tag HTML).</p>
2295 <p>See also @'StyledTextIOWriter_HTML::WriteOpenTag'.</p>
2296*/
2297void StyledTextIOWriter_HTML::WriteCloseTag (WriterContext& writerContext, const string& tagName)
2298{
2299 // Walk up the stack and see how many tags need to be closed
2300 size_t countToPop = 0;
2301 for (vector<string>::reverse_iterator i = writerContext.fTagStack.rbegin (); i != writerContext.fTagStack.rend (); ++i) {
2302 string thisTag = *i;
2303 write ("</" + thisTag + ">");
2304 countToPop++;
2305 if (thisTag == tagName) { // XML - and our writer - are case sensative
2306 break;
2307 }
2308 }
2309 writerContext.fTagStack.erase (writerContext.fTagStack.end () - countToPop, writerContext.fTagStack.end ());
2310}
2311
2312/*
2313@METHOD: StyledTextIOWriter_HTML::WriteOpenCloseTag
2314@DESCRIPTION: <p>Writes an open tag - possibly with arguments - and implicitly closes it. Tags like "br" get
2315 handled that way.</p>
2316 <p>See also @'StyledTextIOWriter_HTML::WriteOpenTag'.</p>
2317*/
2318void StyledTextIOWriter_HTML::WriteOpenCloseTag (WriterContext& /*writerContext*/, [[maybe_unused]] const string& tagName, const string& tagExtras)
2319{
2320 Require (not tagName.empty ());
2321 Require (tagName[0] != '<'); // just the name - not the surrounding brackets...
2322 Require (tagName[0] != '/');
2323 // NB: don't add to the tagstack since this is an OPEN & a CLOSE
2324 write ("<");
2325 string te = tagExtras;
2326 if (not te.empty () and te[0] != ' ') {
2327 te = " " + te;
2328 }
2329 write (te.c_str ());
2330 write ("/>");
2331}
2332
2333/*
2334@METHOD: StyledTextIOWriter_HTML::IsTagOnStack
2335@DESCRIPTION: <p>See if there is an open tag on the stack with the given name.</p>
2336 <p>See also @'StyledTextIOWriter_HTML::WriteOpenTag'.</p>
2337*/
2338bool StyledTextIOWriter_HTML::IsTagOnStack (WriterContext& writerContext, const string& tagName)
2339{
2340 for (auto i = writerContext.fTagStack.begin (); i != writerContext.fTagStack.end (); ++i) {
2341 if (*i == tagName) { // XML - and our writer - are case sensative
2342 return true;
2343 }
2344 }
2345 return false;
2346}
2347
2348static inline string PrintColorString (Color color)
2349{
2350 unsigned short red = color.GetRed () >> 8;
2351 unsigned short greeen = color.GetGreen () >> 8;
2352 unsigned short blue = color.GetBlue () >> 8;
2353 char buf[1024];
2354 buf[0] = '"';
2355 buf[1] = NumToHexChar_ (red >> 4);
2356 buf[2] = NumToHexChar_ (red % 16);
2357 buf[3] = NumToHexChar_ (greeen >> 4);
2358 buf[4] = NumToHexChar_ (greeen % 16);
2359 buf[5] = NumToHexChar_ (blue >> 4);
2360 buf[6] = NumToHexChar_ (blue % 16);
2361 buf[7] = '"';
2362 buf[8] = '\0';
2363 string result = string{buf};
2364 for (size_t i = 0; i < Memory::NEltsOf (kColorNameTable_); ++i) {
2365 if (Led_CasedStringsEqual (buf, string{"\""sv} + string{kColorNameTable_[i].second} + string{"\""sv})) {
2366 //result = "\'" + kColorNameTable_[i].first + "\'";
2367 result = kColorNameTable_[i].first;
2368 break;
2369 }
2370 }
2371 return result;
2372}
2373
2374void StyledTextIOWriter_HTML::EmitBodyFontInfoChange (WriterContext& writerContext, const FontSpecification& newOne, bool skipDoingOpenTags)
2375{
2376 // Close off old
2377 bool fontTagChanged = newOne.GetFontName () != writerContext.fLastEmittedISR.GetFontName () or
2378 newOne.GetPointSize () != writerContext.fLastEmittedISR.GetPointSize () or
2379 newOne.GetTextColor () != writerContext.fLastEmittedISR.GetTextColor ();
2380 if (fontTagChanged and IsTagOnStack (writerContext, "span")) {
2381 WriteCloseTag (writerContext, "span");
2382 }
2383 if (not newOne.GetStyle_Bold () and IsTagOnStack (writerContext, "b")) {
2384 WriteCloseTag (writerContext, "b");
2385 }
2386 if (not newOne.GetStyle_Italic () and IsTagOnStack (writerContext, "i")) {
2387 WriteCloseTag (writerContext, "i");
2388 }
2389 if (not newOne.GetStyle_Underline () and IsTagOnStack (writerContext, "u")) {
2390 WriteCloseTag (writerContext, "u");
2391 }
2392 if (newOne.GetStyle_SubOrSuperScript () != FontSpecification::eSubscript and IsTagOnStack (writerContext, "sub")) {
2393 WriteCloseTag (writerContext, "sub");
2394 }
2395 if (newOne.GetStyle_SubOrSuperScript () != FontSpecification::eSuperscript and IsTagOnStack (writerContext, "sup")) {
2396 WriteCloseTag (writerContext, "sup");
2397 }
2398#if qStroika_Foundation_Common_Platform_Windows
2399 if (not newOne.GetStyle_Strikeout () and IsTagOnStack (writerContext, "strike")) {
2400 WriteCloseTag (writerContext, "strike");
2401 }
2402#endif
2403
2404 // Open new tags
2405 if (skipDoingOpenTags) {
2406 // Set to a BLANK record - cuz we aren't actually emitting any open-tags - so make sure gets done later after the new <p> tag
2407 writerContext.fLastEmittedISR =
2408 StyledInfoSummaryRecord (IncrementalFontSpecification (), fStyleRunSummary[writerContext.fIthStyleRun].fLength);
2409 }
2410 else {
2411 if (not IsTagOnStack (writerContext, "span")) {
2412 char sprintfBuffer[1024];
2413 string cssInfo = "style=\"";
2414 cssInfo += "font-family: '" + String::FromSDKString (newOne.GetFontName ()).AsNarrowSDKString () + "'; ";
2415 cssInfo += (snprintf (sprintfBuffer, Memory::NEltsOf (sprintfBuffer), "font-size: %dpt; ", newOne.GetPointSize ()), sprintfBuffer);
2416 cssInfo += "color: " + PrintColorString (newOne.GetTextColor ());
2417 cssInfo += "\"";
2418 WriteOpenTag (writerContext, "span",
2419#if 1
2420 cssInfo
2421#else
2422 "face=\"" + SDKString2NarrowSDK (newOne.GetFontName ()) + "\" size=\"" +
2423 Led_NumberToDigitChar (HTMLInfo::RealFontSizeToHTMLFontSize (newOne.GetPointSize ())) +
2424 "\" color=" + PrintColorString (newOne.GetTextColor ())
2425#endif
2426 );
2427 }
2428 if (newOne.GetStyle_Bold () and not IsTagOnStack (writerContext, "b")) {
2429 WriteOpenTag (writerContext, "b");
2430 }
2431 if (newOne.GetStyle_Italic () and not IsTagOnStack (writerContext, "i")) {
2432 WriteOpenTag (writerContext, "i");
2433 }
2434 if (newOne.GetStyle_Underline () and not IsTagOnStack (writerContext, "u")) {
2435 WriteOpenTag (writerContext, "u");
2436 }
2437
2438 switch (newOne.GetStyle_SubOrSuperScript ()) {
2439 case FontSpecification::eSubscript: {
2440 if (not IsTagOnStack (writerContext, "sub")) {
2441 WriteOpenTag (writerContext, "sub");
2442 }
2443 } break;
2444 case FontSpecification::eSuperscript: {
2445 if (not IsTagOnStack (writerContext, "sup")) {
2446 WriteOpenTag (writerContext, "sup");
2447 }
2448 } break;
2449 }
2450#if qStroika_Foundation_Common_Platform_Windows
2451 if (newOne.GetStyle_Strikeout () and not IsTagOnStack (writerContext, "strike")) {
2452 WriteOpenTag (writerContext, "strike");
2453 }
2454#endif
2455 writerContext.fLastEmittedISR = fStyleRunSummary[writerContext.fIthStyleRun];
2456 }
2457}
2458
2459void StyledTextIOWriter_HTML::AssureStyleRunSummaryBuilt (WriterContext& writerContext)
2460{
2461 if (fStyleRunSummary.empty ()) {
2462 size_t totalTextLength = writerContext.GetSrcStream ().GetTotalTextLength ();
2463 fStyleRunSummary = vector<StyledInfoSummaryRecord> (writerContext.GetSrcStream ().GetStyleInfo (0, totalTextLength));
2464 }
2465}
2466
2467string StyledTextIOWriter_HTML::MapOutputTextFromWString (const wstring& text)
2468{
2469 // Try to write out non-ascii characters (maybe should only do non- ANSI/ISOLATIN1? chars this way?)
2470 // as entities - which are interpreted as UNICODE chars
2471 string result;
2472 result.reserve (text.length ());
2473 for (auto i = text.begin (); i != text.end (); ++i) {
2474 if (static_cast<unsigned int> (*i) <= 127) {
2475 char c = static_cast<char> (*i);
2476 result.append (&c, 1);
2477 }
2478 else {
2479 char buf[1024];
2480 (void)snprintf (buf, Memory::NEltsOf (buf), "&#%d;", static_cast<unsigned int> (*i));
2481 result += buf;
2482 }
2483 }
2484 return result;
2485}
2486
2487string StyledTextIOWriter_HTML::MapOutputTextFromTString (const Led_tString& text)
2488{
2489 return MapOutputTextFromWString (text);
2490}
#define AssertNotNull(p)
Definition Assertions.h:333
#define RequireNotNull(p)
Definition Assertions.h:347
CodeCvt unifies byte <-> unicode conversions, vaguely inspired by (and wraps) std::codecvt,...
Definition CodeCvt.h:118
nonvirtual STRINGISH Bytes2String(span< const byte > from) const
nonvirtual BLOBISH String2Bytes(span< const CHAR_T > from) const
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...