Skip to content

Commit 805258b

Browse files
committed
use DOMParser instead of a hidden iframe, added benchmarks
fixes #15 fixes #8
1 parent 05ace8a commit 805258b

File tree

4 files changed

+66
-22
lines changed

4 files changed

+66
-22
lines changed

HtmlSanitizer.js

+6-18
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ var HtmlSanitizer = new (function () {
2222

2323
var uriAttributes_ = { 'href': true, 'action': true };
2424

25+
var _parser = new DOMParser();
26+
2527
this.SanitizeHtml = function (input, extraTags, extraAttributes) {
2628
extraTags = (extraTags && extraTags instanceof Array) ? extraTags : [];
2729
extraAttributes = (extraAttributes && extraAttributes instanceof Array) ? extraAttributes : [];
@@ -32,23 +34,9 @@ var HtmlSanitizer = new (function () {
3234
//firefox "bogus node" workaround
3335
if (input == "<br>") return "";
3436

35-
var iframe = document.createElement('iframe');
36-
if (iframe['sandbox'] === undefined) {
37-
alert('Your browser does not support sandboxed iframes. Please upgrade to a modern browser.');
38-
return '';
39-
}
40-
iframe['sandbox'] = 'allow-same-origin';
41-
iframe.style.display = 'none';
42-
document.body.appendChild(iframe); // necessary so the iframe contains a document
43-
var iframedoc = iframe.contentDocument || iframe.contentWindow.document;
44-
if (iframedoc.body == null) iframedoc.write("<body></body>"); // null in IE
45-
iframedoc.body.innerHTML = input;
46-
47-
//DOM clobbering check
48-
if (iframedoc.body.tagName !== 'BODY')
49-
iframedoc.body.remove();
50-
if (typeof iframedoc.createElement !== 'function')
51-
iframedoc.createElement.remove();
37+
if (input.indexOf("<body")==-1) input = "<body>" + input + "</body>"; //add "body" otherwise some tags are skipped, like <style>
38+
39+
var iframedoc = _parser.parseFromString(input, "text/html");
5240

5341
function makeSanitizedCopy(node) {
5442
if (node.nodeType == Node.TEXT_NODE) {
@@ -96,7 +84,7 @@ var HtmlSanitizer = new (function () {
9684
};
9785

9886
var resultElement = makeSanitizedCopy(iframedoc.body);
99-
document.body.removeChild(iframe);
87+
10088
return resultElement.innerHTML
10189
.replace(/<br[^>]*>(\S)/g, "<br>\n$1")
10290
.replace(/div><div/g, "div>\n<div"); //replace is just for cleaner code

README.md

+11-2
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,21 @@ npm install @jitbit/htmlsanitizer
3636

3737
The sanitizer uses [whitelisting](https://en.wikipedia.org/wiki/Whitelisting) approach (as opposed to "blacklisting") to clean out everything that's not allowed.
3838

39-
## Speed
39+
## Speed & Benchmarks
4040

41-
It uses browser/DOM to parse the html by creating an invisible "sandboxed" iframe (hence the browser "front-end only" requirement) which makes it **much faster** than "pure JavaScript" sanitizers.
41+
It uses browser/DOM to parse the html by using `DOMParser` object (hence the browser "front-end only" requirement) which makes it **much faster** than "pure JavaScript" sanitizers.
4242

4343
Tested on `https://www.bbc.co.uk` homepage - the page is sanitized **~370 times per second** on an i5 core CPU in Firefox Quantum (tested via `benchmark.js`)
4444

45+
Comparing HtmlSanitizer vs DOMPurify benchmark:
46+
47+
```
48+
starting benchmark...
49+
HtmlSanitizer x 7,092 ops/sec ±4.52% (55 runs sampled)
50+
DOMPurify x 4,479 ops/sec ±4.48% (58 runs sampled)
51+
Fastest is HtmlSanitizer
52+
```
53+
4554
## Tags allowed by default
4655

4756
`a, abbr, b, blockquote, body, br, center, code, div, em, font, h1, h2, h3, h4, h5, h6, hr, i, img, label, li, ol, p, pre, small, source, span, strong, table, tbody, tr, td, th, thead, ul, u, video`

benchmark.html

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<script src="https://cdn.jsdelivr.net/gh/cure53/DOMPurify/dist/purify.js"></script>
5+
<script src="HtmlSanitizer.js"></script>
6+
7+
<script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.21/lodash.min.js"></script>
8+
<script src="https://cdnjs.cloudflare.com/ajax/libs/benchmark/2.1.4/benchmark.min.js"></script>
9+
</head>
10+
<body>
11+
<pre id="mylog"></pre>
12+
13+
<script>
14+
var testcontent = `<form id="action_online_form" method="post" action="./../adm/index.php?i=acp_main&amp;sid=534a8e9c03d8251f56daeb0d857cc062&amp;mode=main" data-ajax="true">
15+
<dl>
16+
<dt><label for="action_online">Vynulovat rekord uživatelů online</label><br><span class="responsive-hide">&nbsp;</span></dt>
17+
<dd><input type="hidden" name="action" value="online"><input class="button2" type="submit" id="action_online" name="action_online" value="Spustit nyní"></dd>
18+
</dl>
19+
</form>`;
20+
21+
mylog = (...args) => {
22+
args.map(arg => document.querySelector("#mylog").innerHTML += arg + '<br>')
23+
}
24+
25+
mylog("starting...");
26+
27+
var suite = new Benchmark.Suite;
28+
29+
// add tests
30+
suite.add('HtmlSanitizer', function () {
31+
var str = HtmlSanitizer.SanitizeHtml(testcontent);
32+
})
33+
.add('DOMPurify', function () {
34+
DOMPurify.sanitize(testcontent, { ALLOWED_TAGS: ['b'] });
35+
})
36+
// add listeners
37+
.on('cycle', function (event) {
38+
mylog(String(event.target));
39+
})
40+
.on('complete', function () {
41+
mylog('Fastest is ' + this.filter('fastest').map('name'));
42+
})
43+
// run async
44+
.run({ 'async': true });
45+
</script>
46+
</body>
47+
</html>

tests.html

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414

1515
<script>
1616
QUnit.test("Html Sanitizer", function( assert ) {
17-
assert.equal(HtmlSanitizer.SanitizeHtml("<p>Hello world!</p><img name=createElement>"), "<p>Hello world!</p>");
18-
assert.equal(HtmlSanitizer.SanitizeHtml("<p>Hello world!</p><img name=body>"), "<p>Hello world!</p>");
17+
assert.equal(HtmlSanitizer.SanitizeHtml("<p>Hello world!</p><img name=createElement>"), "<p>Hello world!</p><img>");
18+
assert.equal(HtmlSanitizer.SanitizeHtml("<p>Hello world!</p><img name=body>"), "<p>Hello world!</p><img>");
1919
assert.equal(HtmlSanitizer.SanitizeHtml("<div> <script> Alert('xss!'); </scr" + "ipt> </div>"), "<div> </div>");
2020
assert.equal(HtmlSanitizer.SanitizeHtml("<p class='MsoNormal' style='margin-bottom:10.0pt;line-height:115%'><b>Official - SBU&nbsp;</b><o:p></o:p></p>"), "<p><b>Official - SBU&nbsp;</b></p>");
2121
assert.equal(HtmlSanitizer.SanitizeHtml("<span style='color: rgb(102, 102, 102); font-size: 8px;'><i>NOTE</i></span>"), "<span style=\"color: rgb(102, 102, 102); font-size: 8px;\"><i>NOTE</i></span>");

0 commit comments

Comments
 (0)