Skip to content

Commit 70eb54c

Browse files
committed
Implement the latest select parsing changes
- whatwg/html#10557 and pin to upstream html5lib-tests - html5lib/html5lib-tests#178
1 parent 9568860 commit 70eb54c

File tree

3 files changed

+37
-207
lines changed

3 files changed

+37
-207
lines changed

gumbo-parser/src/insertion_mode.h

-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ typedef enum {
2020
GUMBO_INSERTION_MODE_IN_TABLE_BODY,
2121
GUMBO_INSERTION_MODE_IN_ROW,
2222
GUMBO_INSERTION_MODE_IN_CELL,
23-
GUMBO_INSERTION_MODE_IN_SELECT,
24-
GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE,
2523
GUMBO_INSERTION_MODE_IN_TEMPLATE,
2624
GUMBO_INSERTION_MODE_AFTER_BODY,
2725
GUMBO_INSERTION_MODE_IN_FRAMESET,

gumbo-parser/src/parser.c

+36-204
Original file line numberDiff line numberDiff line change
@@ -670,21 +670,6 @@ static GumboInsertionMode get_appropriate_insertion_mode (
670670
}
671671

672672
switch (node->v.element.tag) {
673-
case GUMBO_TAG_SELECT: {
674-
if (is_last) {
675-
return GUMBO_INSERTION_MODE_IN_SELECT;
676-
}
677-
for (int i = index; i > 0; --i) {
678-
const GumboNode* ancestor = open_elements->data[i];
679-
if (node_html_tag_is(ancestor, GUMBO_TAG_TEMPLATE)) {
680-
return GUMBO_INSERTION_MODE_IN_SELECT;
681-
}
682-
if (node_html_tag_is(ancestor, GUMBO_TAG_TABLE)) {
683-
return GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE;
684-
}
685-
}
686-
return GUMBO_INSERTION_MODE_IN_SELECT;
687-
}
688673
case GUMBO_TAG_TD:
689674
case GUMBO_TAG_TH:
690675
if (!is_last) return GUMBO_INSERTION_MODE_IN_CELL;
@@ -1610,6 +1595,7 @@ static bool has_open_element(const GumboParser* parser, GumboTag tag) {
16101595
TAG(TH), \
16111596
TAG(MARQUEE), \
16121597
TAG(OBJECT), \
1598+
TAG(SELECT), \
16131599
TAG(TEMPLATE), \
16141600
TAG_MATHML(MI), \
16151601
TAG_MATHML(MO), \
@@ -1694,12 +1680,6 @@ static bool has_an_element_in_table_scope(const GumboParser* parser, GumboTag ta
16941680
return has_an_element_in_specific_scope(parser, 1, &tag, false, &tags);
16951681
}
16961682

1697-
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-select-scope
1698-
static bool has_an_element_in_select_scope(const GumboParser* parser, GumboTag tag) {
1699-
static const TagSet tags = {TAG(OPTGROUP), TAG(OPTION)};
1700-
return has_an_element_in_specific_scope(parser, 1, &tag, true, &tags);
1701-
}
1702-
17031683
// https://html.spec.whatwg.org/multipage/parsing.html#generate-implied-end-tags
17041684
// "exception" is the "element to exclude from the process" listed in the spec.
17051685
// Pass GUMBO_TAG_LAST to not exclude any of them.
@@ -1804,18 +1784,6 @@ static void close_current_cell(GumboParser* parser, const GumboToken* token) {
18041784
close_table_cell(parser, token, cell_tag);
18051785
}
18061786

1807-
// This factors out the "act as if an end tag of tag name 'select' had been
1808-
// seen" clause of the spec, since it's referenced in several places. It pops
1809-
// all nodes from the stack until the current <select> has been closed, then
1810-
// resets the insertion mode appropriately.
1811-
static void close_current_select(GumboParser* parser) {
1812-
GumboNode* node = pop_current_node(parser);
1813-
while (!node_html_tag_is(node, GUMBO_TAG_SELECT)) {
1814-
node = pop_current_node(parser);
1815-
}
1816-
reset_insertion_mode_appropriately(parser);
1817-
}
1818-
18191787
// The list of nodes in the "special" category:
18201788
// https://html.spec.whatwg.org/multipage/parsing.html#special
18211789
static bool is_special_node(const GumboNode* node) {
@@ -3310,6 +3278,10 @@ static void handle_in_body(GumboParser* parser, GumboToken* token) {
33103278
}
33113279
if (tag_is(token, kStartTag, GUMBO_TAG_HR)) {
33123280
maybe_implicitly_close_p_tag(parser, token);
3281+
generate_implied_end_tags(parser, GUMBO_TAG_OPTGROUP, NULL);
3282+
if (has_an_element_in_scope(parser, GUMBO_TAG_OPTION)) {
3283+
parser_add_parse_error(parser, token);
3284+
}
33133285
insert_element_from_token(parser, token);
33143286
pop_current_node(parser);
33153287
acknowledge_self_closing_tag(parser);
@@ -3342,30 +3314,45 @@ static void handle_in_body(GumboParser* parser, GumboToken* token) {
33423314
return;
33433315
}
33443316
if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
3317+
if (has_an_element_in_scope(parser, GUMBO_TAG_SELECT)) {
3318+
parser_add_parse_error(parser, token);
3319+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_SELECT))
3320+
;
3321+
}
33453322
reconstruct_active_formatting_elements(parser);
33463323
insert_element_from_token(parser, token);
33473324
set_frameset_not_ok(parser);
3348-
GumboInsertionMode state = parser->_parser_state->_insertion_mode;
3349-
if (
3350-
state == GUMBO_INSERTION_MODE_IN_TABLE
3351-
|| state == GUMBO_INSERTION_MODE_IN_CAPTION
3352-
|| state == GUMBO_INSERTION_MODE_IN_TABLE_BODY
3353-
|| state == GUMBO_INSERTION_MODE_IN_ROW
3354-
|| state == GUMBO_INSERTION_MODE_IN_CELL
3355-
) {
3356-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE);
3325+
return;
3326+
}
3327+
if (tag_is(token, kStartTag, GUMBO_TAG_OPTION)) {
3328+
if (has_an_element_in_scope(parser, GUMBO_TAG_SELECT)) {
3329+
generate_implied_end_tags(parser, GUMBO_TAG_OPTGROUP, NULL);
3330+
if (has_an_element_in_scope(parser, GUMBO_TAG_OPTION)) {
3331+
parser_add_parse_error(parser, token);
3332+
}
33573333
} else {
3358-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_SELECT);
3334+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3335+
pop_current_node(parser);
3336+
}
3337+
reconstruct_active_formatting_elements(parser);
33593338
}
3339+
insert_element_from_token(parser, token);
33603340
return;
33613341
}
3362-
if (
3363-
tag_in(token, kStartTag, &(const TagSet){TAG(OPTGROUP), TAG(OPTION)})
3364-
) {
3365-
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3366-
pop_current_node(parser);
3342+
if (tag_is(token, kStartTag, GUMBO_TAG_OPTGROUP)) {
3343+
if (has_an_element_in_scope(parser, GUMBO_TAG_SELECT)) {
3344+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
3345+
if (has_an_element_in_scope_with_tagname(parser, 2, (GumboTag[]) {
3346+
GUMBO_TAG_OPTION, GUMBO_TAG_OPTGROUP
3347+
})) {
3348+
parser_add_parse_error(parser, token);
3349+
}
3350+
} else {
3351+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3352+
pop_current_node(parser);
3353+
}
3354+
reconstruct_active_formatting_elements(parser);
33673355
}
3368-
reconstruct_active_formatting_elements(parser);
33693356
insert_element_from_token(parser, token);
33703357
return;
33713358
}
@@ -3944,159 +3931,6 @@ static void handle_in_cell(GumboParser* parser, GumboToken* token) {
39443931
handle_in_body(parser, token);
39453932
}
39463933

3947-
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inselect
3948-
static void handle_in_select(GumboParser* parser, GumboToken* token) {
3949-
if (token->type == GUMBO_TOKEN_NULL) {
3950-
parser_add_parse_error(parser, token);
3951-
ignore_token(parser);
3952-
return;
3953-
}
3954-
if (
3955-
token->type == GUMBO_TOKEN_CHARACTER
3956-
|| token->type == GUMBO_TOKEN_WHITESPACE
3957-
) {
3958-
insert_text_token(parser, token);
3959-
return;
3960-
}
3961-
if (token->type == GUMBO_TOKEN_COMMENT) {
3962-
append_comment_node(parser, get_current_node(parser), token);
3963-
return;
3964-
}
3965-
if (token->type == GUMBO_TOKEN_DOCTYPE) {
3966-
parser_add_parse_error(parser, token);
3967-
ignore_token(parser);
3968-
return;
3969-
}
3970-
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
3971-
handle_in_body(parser, token);
3972-
return;
3973-
}
3974-
if (tag_is(token, kStartTag, GUMBO_TAG_OPTION)) {
3975-
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3976-
pop_current_node(parser);
3977-
}
3978-
insert_element_from_token(parser, token);
3979-
return;
3980-
}
3981-
if (tag_is(token, kStartTag, GUMBO_TAG_OPTGROUP)) {
3982-
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3983-
pop_current_node(parser);
3984-
}
3985-
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
3986-
pop_current_node(parser);
3987-
}
3988-
insert_element_from_token(parser, token);
3989-
return;
3990-
}
3991-
if (tag_is(token, kStartTag, GUMBO_TAG_HR)) {
3992-
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3993-
pop_current_node(parser);
3994-
}
3995-
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
3996-
pop_current_node(parser);
3997-
}
3998-
insert_element_from_token(parser, token);
3999-
pop_current_node(parser);
4000-
acknowledge_self_closing_tag(parser);
4001-
return;
4002-
}
4003-
if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) {
4004-
GumboVector* open_elements = &parser->_parser_state->_open_elements;
4005-
if (
4006-
node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)
4007-
&& node_html_tag_is (
4008-
open_elements->data[open_elements->length - 2],
4009-
GUMBO_TAG_OPTGROUP
4010-
)
4011-
) {
4012-
pop_current_node(parser);
4013-
}
4014-
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
4015-
pop_current_node(parser);
4016-
return;
4017-
}
4018-
parser_add_parse_error(parser, token);
4019-
ignore_token(parser);
4020-
return;
4021-
}
4022-
if (tag_is(token, kEndTag, GUMBO_TAG_OPTION)) {
4023-
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
4024-
pop_current_node(parser);
4025-
return;
4026-
}
4027-
parser_add_parse_error(parser, token);
4028-
ignore_token(parser);
4029-
return;
4030-
}
4031-
if (tag_is(token, kEndTag, GUMBO_TAG_SELECT)) {
4032-
if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
4033-
parser_add_parse_error(parser, token);
4034-
ignore_token(parser);
4035-
return;
4036-
}
4037-
close_current_select(parser);
4038-
return;
4039-
}
4040-
if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
4041-
parser_add_parse_error(parser, token);
4042-
ignore_token(parser);
4043-
if (has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
4044-
close_current_select(parser);
4045-
}
4046-
return;
4047-
}
4048-
if (
4049-
tag_in(token, kStartTag, &(const TagSet) {TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA)})
4050-
) {
4051-
parser_add_parse_error(parser, token);
4052-
if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
4053-
ignore_token(parser);
4054-
} else {
4055-
close_current_select(parser);
4056-
parser->_parser_state->_reprocess_current_token = true;
4057-
}
4058-
return;
4059-
}
4060-
if (
4061-
tag_in(token, kStartTag, &(const TagSet){TAG(SCRIPT), TAG(TEMPLATE)})
4062-
|| tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
4063-
) {
4064-
handle_in_head(parser, token);
4065-
return;
4066-
}
4067-
if (token->type == GUMBO_TOKEN_EOF) {
4068-
handle_in_body(parser, token);
4069-
return;
4070-
}
4071-
parser_add_parse_error(parser, token);
4072-
ignore_token(parser);
4073-
}
4074-
4075-
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inselectintable
4076-
static void handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
4077-
static const TagSet tags = {
4078-
TAG(CAPTION), TAG(TABLE), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
4079-
TAG(TR), TAG(TD), TAG(TH)
4080-
};
4081-
if (tag_in(token, kStartTag, &tags)) {
4082-
parser_add_parse_error(parser, token);
4083-
close_current_select(parser);
4084-
parser->_parser_state->_reprocess_current_token = true;
4085-
return;
4086-
}
4087-
if (tag_in(token, kEndTag, &tags)) {
4088-
parser_add_parse_error(parser, token);
4089-
if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
4090-
ignore_token(parser);
4091-
return;
4092-
}
4093-
close_current_select(parser);
4094-
parser->_parser_state->_reprocess_current_token = true;
4095-
return;
4096-
}
4097-
handle_in_select(parser, token);
4098-
}
4099-
41003934
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intemplate
41013935
static void handle_in_template(GumboParser* parser, GumboToken* token) {
41023936
GumboParserState* state = parser->_parser_state;
@@ -4395,8 +4229,6 @@ static const TokenHandler kTokenHandlers[] = {
43954229
handle_in_table_body,
43964230
handle_in_row,
43974231
handle_in_cell,
4398-
handle_in_select,
4399-
handle_in_select_in_table,
44004232
handle_in_template,
44014233
handle_after_body,
44024234
handle_in_frameset,

0 commit comments

Comments
 (0)