Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend sanitize #171

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion sanitize_html/lib/sanitize_html.dart
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@

library sanitize_html;

import 'src/sane_html_validator.dart' show SaneHtmlValidator;
import 'src/sane_html_validator.dart'
show SaneHtmlValidator, AllowTagCB, AllowAttributeCB;
export 'src/sane_html_validator.dart'
show AllowTagCB, AllowAttributeCB, AllowAttributeResponse;

/// Sanitize [htmlString] to prevent XSS exploits and limit interference with
/// other markup on the page.
Expand Down Expand Up @@ -76,10 +79,14 @@ String sanitizeHtml(
bool Function(String)? allowElementId,
bool Function(String)? allowClassName,
Iterable<String>? Function(String)? addLinkRel,
AllowTagCB? allowTag,
AllowAttributeCB? allowAttribute,
}) {
return SaneHtmlValidator(
allowElementId: allowElementId,
allowClassName: allowClassName,
addLinkRel: addLinkRel,
allowTag: allowTag,
allowAttribute: allowAttribute,
).sanitize(htmlString);
}
105 changes: 94 additions & 11 deletions sanitize_html/lib/src/sane_html_validator.dart
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,47 @@ final _elementAttributeValidators =
'Q': _citeAttributeValidator,
};

/// Callback function used to check wich tags are allowed
/// The function will return true if the tag is allowed of false if not.
typedef AllowTagCB = bool Function(String tag);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't need a typedef... also why give a callback... why not just make it a list of string?


/// Callback function used to check wich attributes are allowed
/// The function will return an [AllowAttributeResponse] object
/// with the result of the check.
typedef AllowAttributeCB = AllowAttributeResponse Function(
String tag, String attribute, String value);

/// Possible outcome actions of [AllowAttributeResponse]
enum ResponseAction {
unchanged,
edit,
remove,
}

/// Response object returned by [AllowAttributeCB]
/// The [action] attribute determine the action to take
/// and can be [unchanged], [edit] or [remove].
/// In case of [edit], the [value] field can be used to
/// change the content of the attribute.
class AllowAttributeResponse {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What examples are there of attributes where this makes sense?

ResponseAction action;
String value;

AllowAttributeResponse(this.action, [this.value = '']);

/// Factory function for unchaged action
factory AllowAttributeResponse.unchanged() =>
AllowAttributeResponse(ResponseAction.unchanged);

/// Factory function for edit action
factory AllowAttributeResponse.edit(String value) =>
AllowAttributeResponse(ResponseAction.edit, value);

/// Factory function for remove action
factory AllowAttributeResponse.remove() =>
AllowAttributeResponse(ResponseAction.remove);
}

/// An implementation of [html.NodeValidator] that only allows sane HTML tags
/// and attributes protecting against XSS.
///
Expand All @@ -213,38 +254,80 @@ class SaneHtmlValidator {
final bool Function(String)? allowClassName;
final Iterable<String>? Function(String)? addLinkRel;

/// Callback function to check for allowed tags
final AllowTagCB? allowTag;

/// Callback function to check for allowed attributes
final AllowAttributeCB? allowAttribute;

late AllowTagCB _allowTagFn;
late AllowAttributeCB _allowAttributeFn;

SaneHtmlValidator({
required this.allowElementId,
required this.allowClassName,
required this.addLinkRel,
});
required this.allowTag,
required this.allowAttribute,
}) {
_allowTagFn = allowTag ?? _defaultAllowedElements;
_allowAttributeFn = allowAttribute ?? _defaultAllowedAttributes;
}

String sanitize(String htmlString) {
final root = html_parser.parseFragment(htmlString);
_sanitize(root);
return root.outerHtml;
}

bool _defaultAllowedElements(String tagName) =>
_allowedElements.contains(tagName);

AllowAttributeResponse _defaultAllowedAttributes(
String tagName, String attrName, String attrValue) {
if (attrName == 'id') {
if (allowElementId == null) return AllowAttributeResponse.remove();
return allowElementId!(attrValue)
? AllowAttributeResponse.unchanged()
: AllowAttributeResponse.remove();
}
if (attrName == 'class') {
if (allowClassName == null) return AllowAttributeResponse.remove();
final klasses = attrValue.split(' ');
klasses.removeWhere((cn) => !allowClassName!(cn));
return klasses.isEmpty
? AllowAttributeResponse.remove()
: AllowAttributeResponse.edit(klasses.join(' '));
}
return _isAttributeAllowed(tagName, attrName, attrValue)
? AllowAttributeResponse.unchanged()
: AllowAttributeResponse.remove();
}

void _sanitize(Node node) {
if (node is Element) {
final tagName = node.localName!.toUpperCase();
if (!_allowedElements.contains(tagName)) {
if (!_allowTagFn(tagName)) {
node.remove();
return;
}
node.attributes.removeWhere((k, v) {
final attrName = k.toString();
if (attrName == 'id') {
return allowElementId == null || !allowElementId!(v);
}
if (attrName == 'class') {
if (allowClassName == null) return true;
node.classes.removeWhere((cn) => !allowClassName!(cn));
return node.classes.isEmpty;
final rc = _allowAttributeFn(tagName, attrName, v);
if (rc.action == ResponseAction.remove) return true;
if (rc.action == ResponseAction.edit) {
if (attrName == 'class') {
final klasses = rc.value.split(' ');
node.classes.removeWhere((klass) => !klasses.contains(klass));
} else {
node.attributes[k] = rc.value;
}
return false;
}
return !_isAttributeAllowed(tagName, attrName, v);
return false;
});
if (tagName == 'A') {
// When use a custom tag list, the default rule for anchor/rel will not work
if ((allowTag == null) && (tagName == 'A')) {
final href = node.attributes['href'];
if (href != null && addLinkRel != null) {
final rels = addLinkRel!(href);
Expand Down
111 changes: 103 additions & 8 deletions sanitize_html/test/sanitize_html_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -13,45 +13,91 @@
// limitations under the License.

import 'package:test/test.dart';
import 'package:sanitize_html/sanitize_html.dart' show sanitizeHtml;
import 'package:sanitize_html/sanitize_html.dart'
show sanitizeHtml, AllowTagCB, AllowAttributeCB, AllowAttributeResponse;

final otherTagList = <String>{'CSTM', 'A'};
bool customTagList(String t) => otherTagList.contains(t);
AllowAttributeResponse customAttrList(
String tag, String attrName, String attrValue) {
if (attrName.toUpperCase() == 'CLASS') {
final k = attrValue.split(' ');
k.removeWhere((v) => !v.endsWith('allowed'));
return AllowAttributeResponse.edit(k.join(' '));
}
if (tag.toUpperCase() == 'DIV') {
if (attrName.toUpperCase() == 'ALT') {
return AllowAttributeResponse.unchanged();
}
return AllowAttributeResponse.remove();
}
if (attrName.toUpperCase() == 'DATA-KEY') {
return AllowAttributeResponse.edit('new-value');
}
return AllowAttributeResponse.remove();
}

void main() {
// Calls sanitizeHtml with two different configurations.
// * When `withOptionalConfiguration` is `true`: `allowElementId`, `allowClassName`
// and `addLinkRel` overrides are passed to the sanitizeHtml call of `template`.
// (This is the default behavior for the [testContains]/[testNotContains] methods.)
// * When `withOptionalConfiguration` is false: only `template` is passed.
String doSanitizeHtml(String template,
{required bool withOptionalConfiguration}) {
String doSanitizeHtml(
String template, {
required bool withOptionalConfiguration,
AllowTagCB? withCustomTagList,
AllowAttributeCB? withCustomAttrList,
}) {
if (!withOptionalConfiguration) {
return sanitizeHtml(template);
return sanitizeHtml(
template,
allowTag: withCustomTagList,
allowAttribute: withCustomAttrList,
);
}

return sanitizeHtml(
template,
allowElementId: (id) => id == 'only-allowed-id',
allowClassName: (className) => className == 'only-allowed-class',
addLinkRel: (href) => href == 'bad-link' ? ['ugc', 'nofollow'] : null,
allowTag: withCustomTagList,
allowAttribute: withCustomAttrList,
);
}

void testContains(String template, String needle,
{bool withOptionalConfiguration = true}) {
void testContains(
String template,
String needle, {
bool withOptionalConfiguration = true,
AllowTagCB? withCustomTagList,
AllowAttributeCB? withCustomAttrList,
}) {
test('"$template" does contain "$needle"', () {
final sanitizedHtml = doSanitizeHtml(
template,
withOptionalConfiguration: withOptionalConfiguration,
withCustomTagList: withCustomTagList,
withCustomAttrList: withCustomAttrList,
);
expect(sanitizedHtml, contains(needle));
});
}

void testNotContains(String template, String needle,
{bool withOptionalConfiguration = true}) {
void testNotContains(
String template,
String needle, {
bool withOptionalConfiguration = true,
AllowTagCB? withCustomTagList,
AllowAttributeCB? withCustomAttrList,
}) {
test('"$template" does not contain "$needle"', () {
final sanitizedHtml = doSanitizeHtml(
template,
withOptionalConfiguration: withOptionalConfiguration,
withCustomTagList: withCustomTagList,
withCustomAttrList: withCustomAttrList,
);
expect(sanitizedHtml, isNot(contains(needle)));
});
Expand Down Expand Up @@ -157,4 +203,53 @@ void main() {
testNotContains('<span class="any-class">hello</span>', 'class=',
withOptionalConfiguration: false);
});

group('Custom Tag List', () {
testContains('<cstm>hello', '<cstm>hello',
withCustomTagList: customTagList);
testNotContains('<div>hello', 'hello', withCustomTagList: customTagList);

// custom tags with support for attributes
testContains('<cstm id="only-allowed-id">hello</cstm>', 'id',
withCustomTagList: customTagList);
testContains('<cstm id="only-allowed-id">hello</cstm>', 'only-allowed-id',
withCustomTagList: customTagList);
testNotContains('<cstm id="disallowed-id">hello</cstm>', 'id',
withCustomTagList: customTagList);
testNotContains('<cstm id="disallowed-id">hello</cstm>', 'only-allowed-id',
withCustomTagList: customTagList);
// default rules for A attribute do not works when use customTagList
testNotContains('<a href="bad-link">hello', 'rel="ugc nofollow"',
withCustomTagList: customTagList);

testContains('<a href="any-href">hey', 'href=',
withOptionalConfiguration: false, withCustomTagList: customTagList);
testNotContains('<a href="any-href">hey', 'rel=',
withOptionalConfiguration: false, withCustomTagList: customTagList);
testNotContains('<cstm id="any-id">hello</cstm>', 'id=',
withOptionalConfiguration: false, withCustomTagList: customTagList);
testNotContains('<cstm class="any-class">hello</cstm>', 'class=',
withOptionalConfiguration: false, withCustomTagList: customTagList);

// custom tag list handle default allowed attributes if AllowAttributes is missing
testContains('<cstm alt="bar">hello', 'alt',
withCustomTagList: customTagList);
});

group('Custom Attribute List', () {
testNotContains('<span alt="foo">hello', 'alt',
withCustomAttrList: customAttrList);
testContains('<div alt="foo">hello', 'alt',
withCustomAttrList: customAttrList);
testNotContains('<div bar="foo">hello', 'bar',
withCustomAttrList: customAttrList);
testNotContains('<span class="foo-allowed other-attr">hello', 'other-attr',
withCustomAttrList: customAttrList);
testContains('<span class="foo-allowed other-attr">hello', 'foo-allowed',
withCustomAttrList: customAttrList);
testContains('<span data-key="value-allowed">hello', 'new-value',
withCustomAttrList: customAttrList);
testNotContains('<span data-key="value-allowed">hello', 'value-allowed',
withCustomAttrList: customAttrList);
});
}