@@ -19,7 +19,6 @@ static std::string nonbreaking_inline = "|a|abbr|acronym|b|bdo|big|cite|code|df
19
19
static std::string empty_tags = " |area|base|basefont|bgsound|br|command|col|embed|event-source|frame|hr|image|img|input|keygen|link|menuitem|meta|param|source|spacer|track|wbr|" ;
20
20
static std::string preserve_whitespace = " |pre|textarea|script|style|" ;
21
21
static std::string special_handling = " |html|body|" ;
22
- static std::string no_entity_sub = " " ; // "|style|";
23
22
static std::string treat_like_inline = " |p|" ;
24
23
25
24
static std::vector<std::string> html_tags = {" div" ," span" ," a" ," img" ," p" ," h1" ," h2" ," h3" ," h4" ," h5" ," h6" ,
@@ -436,9 +435,25 @@ static void substitute_xml_entities_into_text(std::string& text)
436
435
replace_all (text, " >" , " >" );
437
436
}
438
437
438
+ // After running through Gumbo, the values of type "" are replaced with the corresponding code '0x01'
439
+ // Since the attribute value does not use control characters (value <= 0x1F),
440
+ // then just delete them, otherwise XmlUtils::CXmlLiteReader crashes on them.
441
+ // bug#73486
442
+ static void remove_control_symbols (std::string& text)
443
+ {
444
+ std::string::iterator itFound = std::find_if (text.begin (), text.end (), [](char chValue){ return chValue <= 0x1F ; });
445
+
446
+ while (itFound != text.end ())
447
+ {
448
+ itFound = text.erase (itFound);
449
+ itFound = std::find_if (itFound, text.end (), [](char chValue){ return chValue <= 0x1F ; });
450
+ }
451
+ }
452
+
439
453
// Заменяет сущности " в text
440
454
static void substitute_xml_entities_into_attributes (std::string& text)
441
455
{
456
+ remove_control_symbols (text);
442
457
substitute_xml_entities_into_text (text);
443
458
replace_all (text, " \" " , " "" );
444
459
}
@@ -486,7 +501,7 @@ static void build_doctype(GumboNode* node, NSStringUtils::CStringBuilderA& oBuil
486
501
}
487
502
}
488
503
489
- static void build_attributes (const GumboVector* attribs, bool no_entities, NSStringUtils::CStringBuilderA& atts)
504
+ static void build_attributes (const GumboVector* attribs, NSStringUtils::CStringBuilderA& atts)
490
505
{
491
506
std::vector<std::string> arrRepeat;
492
507
for (size_t i = 0 ; i < attribs->length ; ++i)
@@ -532,8 +547,7 @@ static void build_attributes(const GumboVector* attribs, bool no_entities, NSStr
532
547
std::string qs =" \" " ;
533
548
atts.WriteString (" =" );
534
549
atts.WriteString (qs);
535
- if (!no_entities)
536
- substitute_xml_entities_into_attributes (sVal );
550
+ substitute_xml_entities_into_attributes (sVal );
537
551
atts.WriteString (sVal );
538
552
atts.WriteString (qs);
539
553
}
@@ -542,7 +556,6 @@ static void build_attributes(const GumboVector* attribs, bool no_entities, NSStr
542
556
static void prettyprint_contents (GumboNode* node, NSStringUtils::CStringBuilderA& contents, bool bCheckValidNode)
543
557
{
544
558
std::string key = " |" + get_tag_name (node) + " |" ;
545
- bool no_entity_substitution = no_entity_sub.find (key) != std::string::npos;
546
559
bool keep_whitespace = preserve_whitespace.find (key) != std::string::npos;
547
560
bool is_inline = nonbreaking_inline.find (key) != std::string::npos;
548
561
bool is_like_inline = treat_like_inline.find (key) != std::string::npos;
@@ -556,8 +569,7 @@ static void prettyprint_contents(GumboNode* node, NSStringUtils::CStringBuilderA
556
569
if (child->type == GUMBO_NODE_TEXT)
557
570
{
558
571
std::string val (child->v .text .text );
559
- if (!no_entity_substitution)
560
- substitute_xml_entities_into_text (val);
572
+ substitute_xml_entities_into_text (val);
561
573
562
574
// Избавление от FF
563
575
size_t found = val.find_first_of (" \014 " );
@@ -613,7 +625,6 @@ static void prettyprint(GumboNode* node, NSStringUtils::CStringBuilderA& oBuilde
613
625
std::string closeTag = " " ;
614
626
std::string key = " |" + tagname + " |" ;
615
627
bool is_empty_tag = empty_tags.find (key) != std::string::npos;
616
- bool no_entity_substitution = no_entity_sub.find (key) != std::string::npos;
617
628
618
629
// determine closing tag type
619
630
if (is_empty_tag)
@@ -626,7 +637,7 @@ static void prettyprint(GumboNode* node, NSStringUtils::CStringBuilderA& oBuilde
626
637
627
638
// build attr string
628
639
const GumboVector* attribs = &node->v .element .attributes ;
629
- build_attributes (attribs, no_entity_substitution, oBuilder);
640
+ build_attributes (attribs, oBuilder);
630
641
oBuilder.WriteString (close + " >" );
631
642
632
643
// prettyprint your contents
0 commit comments