Skip to content

Commit 5a04f5f

Browse files
committed
'<' is a valid attribute name
1 parent f71c251 commit 5a04f5f

File tree

130 files changed

+318
-134
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

130 files changed

+318
-134
lines changed

README.md

+2-3
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ The **Htmlunit-NekoHtml** Parser is used by Htmlunit.
1414

1515
### Project News
1616

17-
**[Developer Blog][5]**
17+
**[Developer Blog](https://htmlunit.github.io/htmlunit-blog/)**
1818

1919
[HtmlUnit@mastodon][4] | [HtmlUnit@Twitter][3]
2020

@@ -311,5 +311,4 @@ Many thanks to all of you contributing to HtmlUnit/CSSParser/Rhino/NekoHtml in t
311311

312312
[2]: https://jenkins.wetator.org/job/HtmlUnit%20-%20Neko/ "HtmlUnit -Neko CI"
313313
[3]: https://twitter.com/HtmlUnit "https://twitter.com/HtmlUnit"
314-
[4]: https://fosstodon.org/@HtmlUnit
315-
[5]: https://htmlunit.github.io/htmlunit-blog/
314+
[4]: https://fosstodon.org/@HtmlUnit

pom.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<modelVersion>4.0.0</modelVersion>
66
<groupId>org.htmlunit</groupId>
77
<artifactId>neko-htmlunit</artifactId>
8-
<version>4.7.0</version>
8+
<version>4.8.0-SNAPSHOT</version>
99
<name>HtmlUnit NekoHtml</name>
1010
<organization>
1111
<name>HtmlUnit</name>
@@ -25,7 +25,7 @@
2525

2626
<junit.version>5.11.3</junit.version>
2727

28-
<checkstyle.version>10.20.1</checkstyle.version>
28+
<checkstyle.version>10.20.2</checkstyle.version>
2929
<spotbugs.version>4.8.6</spotbugs.version>
3030
<pmd.version>7.7.0</pmd.version>
3131
<dependencycheck.version>10.0.4</dependencycheck.version>

src/main/java/org/htmlunit/cyberneko/HTMLScanner.java

+10-9
Original file line numberDiff line numberDiff line change
@@ -742,7 +742,6 @@ public void reset(final XMLComponentManager manager) throws XMLConfigurationExce
742742
}
743743
}
744744
}
745-
746745
}
747746

748747
/** Sets a feature. */
@@ -2895,17 +2894,18 @@ protected boolean scanAttribute(final XMLAttributesImpl attributes, final boolea
28952894
if (fReportErrors_) {
28962895
fErrorReporter.reportError("HTML1007", null);
28972896
}
2898-
return false;
2897+
throw new EOFException();
28992898
}
29002899
if (c == '>') {
29012900
return false;
29022901
}
2902+
2903+
// https://html.spec.whatwg.org/multipage/parsing.html#parse-error-unexpected-character-in-attribute-name
29032904
if (c == '<') {
2904-
fCurrentEntity.rewind();
29052905
if (fReportErrors_) {
29062906
fErrorReporter.reportError("HTML1016", null);
29072907
}
2908-
return false;
2908+
// report but process as part of the attrib name
29092909
}
29102910
}
29112911

@@ -2937,13 +2937,14 @@ protected boolean scanAttribute(final XMLAttributesImpl attributes, final boolea
29372937
}
29382938
throw new EOFException();
29392939
}
2940-
if (c == '/' || c == '>') {
2940+
if (c == '/') {
2941+
qName_.setValues(null, aname, aname, null);
2942+
attributes.addAttribute(qName_, "CDATA", "", true);
2943+
return true;
2944+
}
2945+
if (c == '>') {
29412946
qName_.setValues(null, aname, aname, null);
29422947
attributes.addAttribute(qName_, "CDATA", "", true);
2943-
if (c == '/') {
2944-
fCurrentEntity.rewind();
2945-
empty[0] = skipMarkup(false);
2946-
}
29472948
return false;
29482949
}
29492950
if (c == '=') {

src/main/java/org/htmlunit/cyberneko/parsers/DOMFragmentParser.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -324,9 +324,10 @@ public void startElement(final QName element, final XMLAttributes attrs, final A
324324
for (int i = 0; i < count; i++) {
325325
final String aname = attrs.getQName(i);
326326
final String avalue = attrs.getValue(i);
327-
if (XMLChar.isValidName(aname)) {
328-
elementNode.setAttribute(aname, avalue);
329-
}
327+
328+
// don't check the name here - this is done by the HTMLScanner and
329+
// not all valid html tag names are valid xml names
330+
elementNode.setAttribute(aname, avalue);
330331
}
331332
}
332333
currentNode_.appendChild(elementNode);

src/main/java/org/htmlunit/cyberneko/xerces/dom/CoreDocumentImpl.java

+6-4
Original file line numberDiff line numberDiff line change
@@ -380,10 +380,12 @@ public void setTextContent(final String textContent) throws DOMException {
380380
*/
381381
@Override
382382
public Attr createAttribute(final String name) throws DOMException {
383-
if (errorChecking && !isXMLName(name, xml11Version_)) {
384-
final String msg = DOMMessageFormatter.formatMessage("INVALID_CHARACTER_ERR", null);
385-
throw new DOMException(DOMException.INVALID_CHARACTER_ERR, msg);
386-
}
383+
// don't check the name here - this is done by the HTMLScanner and
384+
// not all valid html tag names are valid xml names
385+
// if (errorChecking && !isXMLName(name, xml11Version_)) {
386+
// final String msg = DOMMessageFormatter.formatMessage("INVALID_CHARACTER_ERR", null);
387+
// throw new DOMException(DOMException.INVALID_CHARACTER_ERR, msg);
388+
// }
387389
return new AttrImpl(this, name);
388390

389391
}

src/test/java/org/htmlunit/cyberneko/CanonicalDomFragmentTest.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.io.StringReader;
2626
import java.io.StringWriter;
2727
import java.nio.file.Files;
28+
import java.nio.file.Paths;
2829
import java.util.StringTokenizer;
2930

3031
import org.htmlunit.cyberneko.parsers.DOMFragmentParser;
@@ -94,7 +95,10 @@ public void runTest(final File dataFile) throws Exception {
9495
}
9596
}
9697
catch (final AssertionFailedError e) {
97-
final File output = new File(outputDir, dataFile.getName());
98+
String path = dataFile.getAbsolutePath();
99+
path = path.substring(path.indexOf("\\testfiles\\") + 11);
100+
final File output = new File(outputDir, path + ".canonical-frg");
101+
Files.createDirectories(Paths.get(output.getParentFile().getPath()));
98102
try (PrintWriter pw = new PrintWriter(Files.newOutputStream(output.toPath()))) {
99103
pw.print(domDataLines);
100104
}

src/test/java/org/htmlunit/cyberneko/CanonicalDomTest.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.io.StringReader;
2626
import java.io.StringWriter;
2727
import java.nio.file.Files;
28+
import java.nio.file.Paths;
2829
import java.util.StringTokenizer;
2930

3031
import org.htmlunit.cyberneko.parsers.DOMParser;
@@ -86,7 +87,10 @@ public void runTest(final File dataFile) throws Exception {
8687
}
8788
}
8889
catch (final AssertionFailedError e) {
89-
final File output = new File(outputDir, dataFile.getName());
90+
String path = dataFile.getAbsolutePath();
91+
path = path.substring(path.indexOf("\\testfiles\\") + 11);
92+
final File output = new File(outputDir, path + ".canonical-dom");
93+
Files.createDirectories(Paths.get(output.getParentFile().getPath()));
9094
try (PrintWriter pw = new PrintWriter(Files.newOutputStream(output.toPath()))) {
9195
pw.print(domDataLines);
9296
}

src/test/java/org/htmlunit/cyberneko/CanonicalSAXTest.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.io.StringReader;
2626
import java.io.StringWriter;
2727
import java.nio.file.Files;
28+
import java.nio.file.Paths;
2829
import java.util.StringTokenizer;
2930

3031
import org.htmlunit.cyberneko.parsers.SAXParser;
@@ -85,7 +86,10 @@ public void runTest(final File dataFile) throws Exception {
8586
}
8687
}
8788
catch (final AssertionFailedError e) {
88-
final File output = new File(outputDir, dataFile.getName());
89+
String path = dataFile.getAbsolutePath();
90+
path = path.substring(path.indexOf("\\testfiles\\") + 11);
91+
final File output = new File(outputDir, path + ".canonical-sax");
92+
Files.createDirectories(Paths.get(output.getParentFile().getPath()));
8993
try (PrintWriter pw = new PrintWriter(Files.newOutputStream(output.toPath()))) {
9094
pw.print(domDataLines);
9195
}

src/test/java/org/htmlunit/cyberneko/CanonicalTest.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import java.io.StringReader;
2727
import java.io.StringWriter;
2828
import java.nio.file.Files;
29+
import java.nio.file.Paths;
2930
import java.util.StringTokenizer;
3031

3132
import org.htmlunit.cyberneko.xerces.xni.parser.XMLDocumentFilter;
@@ -78,7 +79,10 @@ public void runTest(final File dataFile) throws Exception {
7879
}
7980
}
8081
catch (final AssertionFailedError e) {
81-
final File output = new File(outputDir, dataFile.getName());
82+
String path = dataFile.getAbsolutePath();
83+
path = path.substring(path.indexOf("\\testfiles\\") + 11);
84+
final File output = new File(outputDir, path + ".canonical");
85+
Files.createDirectories(Paths.get(output.getParentFile().getPath()));
8286
try (PrintWriter pw = new PrintWriter(Files.newOutputStream(output.toPath()))) {
8387
pw.print(dataLines);
8488
}

src/test/java/org/htmlunit/cyberneko/DOMFragmentParserTest.java

+10-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,13 @@ public void invalidProcessingInstruction() throws Exception {
5858
*/
5959
@Test
6060
public void invalidAttributeName() throws Exception {
61-
doTest("<html 9='id'></html>", "<html/>");
61+
// doTest("<html 9='id'></html>", "<html/>");
62+
63+
// changed in version 4.8.0 as this is an valid (html) attribute name
64+
// doTest("<html 9='id'></html>", "<html 9=\"id\"/>");
65+
66+
// this fail on jdk8 because the DOMImplementationLS returns null if the dom is not xml
67+
// migrated to test-digit-attr-name
6268
}
6369

6470
private static void doTest(final String html, final String expected) throws Exception {
@@ -74,6 +80,9 @@ private static void doTest(final String html, final String expected) throws Exce
7480

7581
final LSSerializer writer = impl.createLSSerializer();
7682
String str = writer.writeToString(fragment);
83+
if (str == null) {
84+
str = "";
85+
}
7786
str = str.replace("\r", "").replace("\n", "");
7887

7988
final String xmlDecl = "<?xml version=\"1.0\" encoding=\"UTF-16\"?>";
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<p :style="abc" 7="c">Content</p>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
(HTML
2+
(head
3+
)head
4+
(BODY
5+
(p
6+
A7 c
7+
A:style abc
8+
"Content
9+
)p
10+
)BODY
11+
)HTML
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
(p
2+
A7 c
3+
A:style abc
4+
"Content
5+
)p
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<HTML><head></head><BODY><p :style="abc" 7="c">Content</p></BODY></HTML>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
startDocument [(1,1,0) (1,1,0) false]
2+
startElement (localpart="HTML",rawname="HTML",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
3+
startElement (localpart="head",rawname="head",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
4+
endElement (localpart="head",rawname="head",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
5+
startElement (localpart="BODY",rawname="BODY",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
6+
startElement (localpart="p",rawname="p") [(1,1,0) (1,23,22) false]
7+
characters 'Content'[(1,23,22) (1,30,29) false]
8+
endElement (localpart="p",rawname="p") [(1,30,29) (1,34,33) false]
9+
characters '
10+
'[(1,34,33) (2,1,34) false]
11+
endElement (localpart="BODY",rawname="BODY",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
12+
endElement (localpart="HTML",rawname="HTML",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
13+
endDocument [(2,1,34) (2,1,34) false]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<p :style="abc" <="c">Content</p>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
(HTML
2+
(head
3+
)head
4+
(BODY
5+
(p
6+
A:style abc
7+
A< c
8+
"Content
9+
)p
10+
)BODY
11+
)HTML
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
(p
2+
A:style abc
3+
A< c
4+
"Content
5+
)p
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<HTML><head></head><BODY><p :style="abc" <="c">Content</p></BODY></HTML>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
startDocument [(1,1,0) (1,1,0) false]
2+
startElement (localpart="HTML",rawname="HTML",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
3+
startElement (localpart="head",rawname="head",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
4+
endElement (localpart="head",rawname="head",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
5+
startElement (localpart="BODY",rawname="BODY",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
6+
startElement (localpart="p",rawname="p") [(1,1,0) (1,23,22) false]
7+
characters 'Content'[(1,23,22) (1,30,29) false]
8+
endElement (localpart="p",rawname="p") [(1,30,29) (1,34,33) false]
9+
characters '
10+
'[(1,34,33) (2,1,34) false]
11+
endElement (localpart="BODY",rawname="BODY",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
12+
endElement (localpart="HTML",rawname="HTML",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
13+
endDocument [(2,1,34) (2,1,34) false]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<p :style="abc" a<b="c">Content</p>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
(HTML
2+
(head
3+
)head
4+
(BODY
5+
(p
6+
A:style abc
7+
Aa<b c
8+
"Content
9+
)p
10+
)BODY
11+
)HTML
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
(p
2+
A:style abc
3+
Aa<b c
4+
"Content
5+
)p
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<HTML><head></head><BODY><p :style="abc" a<b="c">Content</p></BODY></HTML>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
startDocument [(1,1,0) (1,1,0) false]
2+
startElement (localpart="HTML",rawname="HTML",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
3+
startElement (localpart="head",rawname="head",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
4+
endElement (localpart="head",rawname="head",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
5+
startElement (localpart="BODY",rawname="BODY",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
6+
startElement (localpart="p",rawname="p") [(1,1,0) (1,25,24) false]
7+
characters 'Content'[(1,25,24) (1,32,31) false]
8+
endElement (localpart="p",rawname="p") [(1,32,31) (1,36,35) false]
9+
characters '
10+
'[(1,36,35) (2,1,36) false]
11+
endElement (localpart="BODY",rawname="BODY",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
12+
endElement (localpart="HTML",rawname="HTML",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
13+
endDocument [(2,1,36) (2,1,36) false]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<p :style="abc" <ab="c">Content</p>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
(HTML
2+
(head
3+
)head
4+
(BODY
5+
(p
6+
A:style abc
7+
A<ab c
8+
"Content
9+
)p
10+
)BODY
11+
)HTML
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
(p
2+
A:style abc
3+
A<ab c
4+
"Content
5+
)p
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<HTML><head></head><BODY><p :style="abc" <ab="c">Content</p></BODY></HTML>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
startDocument [(1,1,0) (1,1,0) false]
2+
startElement (localpart="HTML",rawname="HTML",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
3+
startElement (localpart="head",rawname="head",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
4+
endElement (localpart="head",rawname="head",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
5+
startElement (localpart="BODY",rawname="BODY",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
6+
startElement (localpart="p",rawname="p") [(1,1,0) (1,25,24) false]
7+
characters 'Content'[(1,25,24) (1,32,31) false]
8+
endElement (localpart="p",rawname="p") [(1,32,31) (1,36,35) false]
9+
characters '
10+
'[(1,36,35) (2,1,36) false]
11+
endElement (localpart="BODY",rawname="BODY",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
12+
endElement (localpart="HTML",rawname="HTML",uri="http://www.w3.org/1999/xhtml") [(-1,-1,-1) (-1,-1,-1) true]
13+
endDocument [(2,1,36) (2,1,36) false]

src/test/resources/org/htmlunit/cyberneko/testfiles/attrs/test-not-quoted-attr.html.canonical-frg

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ Aa &Uumlabc
4949
"\n
5050
(div
5151
Aa Ü
52+
Aabc'
5253
)div
5354
"\n
5455
(div
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<p style="abc" a/b="c">Content</p>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
(HTML
2+
(head
3+
)head
4+
(BODY
5+
(p
6+
Aa
7+
Ab c
8+
Astyle abc
9+
"Content
10+
)p
11+
)BODY
12+
)HTML

0 commit comments

Comments
 (0)