Skip to content

Commit 670dae5

Browse files
committed
Merge pull request 'Fix html bugs' (#254) from fix/html-bugs into hotfix/v8.3.2
Reviewed-on: https://git.onlyoffice.com/ONLYOFFICE/core/pulls/254
2 parents 95a5a2c + 20a4bcf commit 670dae5

File tree

2 files changed

+22
-9
lines changed

2 files changed

+22
-9
lines changed

Common/3dParty/html/fetch.py

+2
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,5 @@
2525
base.replaceInFileUtf8(base_directory + "/katana-parser/src/tokenizer.c", "static inline bool2 katana_is_html_space(char c);", "static inline bool katana_is_html_space(char c);")
2626
base.replaceInFileUtf8(base_directory + "/katana-parser/src/parser.c", "katanaget_text(parser->scanner)", "/*katanaget_text(parser->scanner)*/\"error\"")
2727
base.replaceInFileUtf8(base_directory + "/katana-parser/src/parser.c", "#define KATANA_PARSER_STRING(literal) (KatanaParserString){", "#define KATANA_PARSER_STRING(literal) {")
28+
# katana may not be able to handle an empty string correctly in some cases (bug#73485)
29+
base.replaceInFileUtf8(base_directory + "/katana-parser/src/foundation.c", "size_t len = strlen(str);", "if (NULL == str)\n return;\n size_t len = strlen(str);")

Common/3dParty/html/htmltoxhtml.h

+20-9
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ static std::string nonbreaking_inline = "|a|abbr|acronym|b|bdo|big|cite|code|df
1919
static std::string empty_tags = "|area|base|basefont|bgsound|br|command|col|embed|event-source|frame|hr|image|img|input|keygen|link|menuitem|meta|param|source|spacer|track|wbr|";
2020
static std::string preserve_whitespace = "|pre|textarea|script|style|";
2121
static std::string special_handling = "|html|body|";
22-
static std::string no_entity_sub = ""; //"|style|";
2322
static std::string treat_like_inline = "|p|";
2423

2524
static std::vector<std::string> html_tags = {"div","span","a","img","p","h1","h2","h3","h4","h5","h6",
@@ -436,9 +435,25 @@ static void substitute_xml_entities_into_text(std::string& text)
436435
replace_all(text, ">", "&gt;");
437436
}
438437

438+
// After running through Gumbo, the values of type "&#1;" are replaced with the corresponding code '0x01'
439+
// Since the attribute value does not use control characters (value <= 0x1F),
440+
// then just delete them, otherwise XmlUtils::CXmlLiteReader crashes on them.
441+
// bug#73486
442+
static void remove_control_symbols(std::string& text)
443+
{
444+
std::string::iterator itFound = std::find_if(text.begin(), text.end(), [](char chValue){ return chValue <= 0x1F; });
445+
446+
while (itFound != text.end())
447+
{
448+
itFound = text.erase(itFound);
449+
itFound = std::find_if(itFound, text.end(), [](char chValue){ return chValue <= 0x1F; });
450+
}
451+
}
452+
439453
// Заменяет сущности " в text
440454
static void substitute_xml_entities_into_attributes(std::string& text)
441455
{
456+
remove_control_symbols(text);
442457
substitute_xml_entities_into_text(text);
443458
replace_all(text, "\"", "&quot;");
444459
}
@@ -486,7 +501,7 @@ static void build_doctype(GumboNode* node, NSStringUtils::CStringBuilderA& oBuil
486501
}
487502
}
488503

489-
static void build_attributes(const GumboVector* attribs, bool no_entities, NSStringUtils::CStringBuilderA& atts)
504+
static void build_attributes(const GumboVector* attribs, NSStringUtils::CStringBuilderA& atts)
490505
{
491506
std::vector<std::string> arrRepeat;
492507
for (size_t i = 0; i < attribs->length; ++i)
@@ -532,8 +547,7 @@ static void build_attributes(const GumboVector* attribs, bool no_entities, NSStr
532547
std::string qs ="\"";
533548
atts.WriteString("=");
534549
atts.WriteString(qs);
535-
if(!no_entities)
536-
substitute_xml_entities_into_attributes(sVal);
550+
substitute_xml_entities_into_attributes(sVal);
537551
atts.WriteString(sVal);
538552
atts.WriteString(qs);
539553
}
@@ -542,7 +556,6 @@ static void build_attributes(const GumboVector* attribs, bool no_entities, NSStr
542556
static void prettyprint_contents(GumboNode* node, NSStringUtils::CStringBuilderA& contents, bool bCheckValidNode)
543557
{
544558
std::string key = "|" + get_tag_name(node) + "|";
545-
bool no_entity_substitution = no_entity_sub.find(key) != std::string::npos;
546559
bool keep_whitespace = preserve_whitespace.find(key) != std::string::npos;
547560
bool is_inline = nonbreaking_inline.find(key) != std::string::npos;
548561
bool is_like_inline = treat_like_inline.find(key) != std::string::npos;
@@ -556,8 +569,7 @@ static void prettyprint_contents(GumboNode* node, NSStringUtils::CStringBuilderA
556569
if (child->type == GUMBO_NODE_TEXT)
557570
{
558571
std::string val(child->v.text.text);
559-
if(!no_entity_substitution)
560-
substitute_xml_entities_into_text(val);
572+
substitute_xml_entities_into_text(val);
561573

562574
// Избавление от FF
563575
size_t found = val.find_first_of("\014");
@@ -613,7 +625,6 @@ static void prettyprint(GumboNode* node, NSStringUtils::CStringBuilderA& oBuilde
613625
std::string closeTag = "";
614626
std::string key = "|" + tagname + "|";
615627
bool is_empty_tag = empty_tags.find(key) != std::string::npos;
616-
bool no_entity_substitution = no_entity_sub.find(key) != std::string::npos;
617628

618629
// determine closing tag type
619630
if (is_empty_tag)
@@ -626,7 +637,7 @@ static void prettyprint(GumboNode* node, NSStringUtils::CStringBuilderA& oBuilde
626637

627638
// build attr string
628639
const GumboVector* attribs = &node->v.element.attributes;
629-
build_attributes(attribs, no_entity_substitution, oBuilder);
640+
build_attributes(attribs, oBuilder);
630641
oBuilder.WriteString(close + ">");
631642

632643
// prettyprint your contents

0 commit comments

Comments
 (0)