From 0fda3d872b0ab09784b78312fd03ca26dd03969e Mon Sep 17 00:00:00 2001 From: dab246 Date: Wed, 23 Oct 2024 13:44:10 +0700 Subject: [PATCH 1/4] Custom allow attributes & tags --- sanitize_html/lib/sanitize_html.dart | 4 ++++ sanitize_html/lib/src/sane_html_validator.dart | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/sanitize_html/lib/sanitize_html.dart b/sanitize_html/lib/sanitize_html.dart index dc298d48..ba3b159d 100644 --- a/sanitize_html/lib/sanitize_html.dart +++ b/sanitize_html/lib/sanitize_html.dart @@ -76,10 +76,14 @@ String sanitizeHtml( bool Function(String)? allowElementId, bool Function(String)? allowClassName, Iterable? Function(String)? addLinkRel, + final List? allowAttributes, + final List? allowTags, }) { return SaneHtmlValidator( allowElementId: allowElementId, allowClassName: allowClassName, addLinkRel: addLinkRel, + allowAttributes: allowAttributes, + allowTags: allowTags, ).sanitize(htmlString); } diff --git a/sanitize_html/lib/src/sane_html_validator.dart b/sanitize_html/lib/src/sane_html_validator.dart index 32e39a81..516011b5 100644 --- a/sanitize_html/lib/src/sane_html_validator.dart +++ b/sanitize_html/lib/src/sane_html_validator.dart @@ -212,11 +212,15 @@ class SaneHtmlValidator { final bool Function(String)? allowElementId; final bool Function(String)? allowClassName; final Iterable? Function(String)? addLinkRel; + final List? allowAttributes; + final List? allowTags; SaneHtmlValidator({ required this.allowElementId, required this.allowClassName, required this.addLinkRel, + required this.allowAttributes, + required this.allowTags, }); String sanitize(String htmlString) { @@ -228,7 +232,8 @@ class SaneHtmlValidator { void _sanitize(Node node) { if (node is Element) { final tagName = node.localName!.toUpperCase(); - if (!_allowedElements.contains(tagName)) { + if (!_allowedElements.contains(tagName) + && !(allowTags?.contains(tagName.toLowerCase()) ?? false)) { node.remove(); return; } @@ -269,6 +274,8 @@ class SaneHtmlValidator { } bool _isAttributeAllowed(String tagName, String attrName, String value) { + if (allowAttributes?.contains(attrName.toLowerCase()) == true) return true; + if (_alwaysAllowedAttributes.contains(attrName)) return true; // Special validators for special attributes on special tags (href/src/cite) From c663ad93a659fce3c0d209a048fa93c6465ebedc Mon Sep 17 00:00:00 2001 From: dab246 Date: Wed, 23 Oct 2024 14:29:01 +0700 Subject: [PATCH 2/4] Add validate base64 image tag --- sanitize_html/lib/sanitize_html.dart | 4 +- .../lib/src/sane_html_validator.dart | 13 +++++- .../test/validate_base64_image_test.dart | 46 +++++++++++++++++++ 3 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 sanitize_html/test/validate_base64_image_test.dart diff --git a/sanitize_html/lib/sanitize_html.dart b/sanitize_html/lib/sanitize_html.dart index ba3b159d..fb4a6134 100644 --- a/sanitize_html/lib/sanitize_html.dart +++ b/sanitize_html/lib/sanitize_html.dart @@ -76,8 +76,8 @@ String sanitizeHtml( bool Function(String)? allowElementId, bool Function(String)? allowClassName, Iterable? Function(String)? addLinkRel, - final List? allowAttributes, - final List? allowTags, + List? allowAttributes, + List? allowTags, }) { return SaneHtmlValidator( allowElementId: allowElementId, diff --git a/sanitize_html/lib/src/sane_html_validator.dart b/sanitize_html/lib/src/sane_html_validator.dart index 516011b5..b4554728 100644 --- a/sanitize_html/lib/src/sane_html_validator.dart +++ b/sanitize_html/lib/src/sane_html_validator.dart @@ -177,6 +177,15 @@ bool _validUrl(String url) { } } +bool validateBase64Image(String base64String) { + try { + final regex = RegExp(r'^data:image\/(png|jpeg|jpg|gif|bmp|svg\+xml);base64,[A-Za-z0-9+/]+={0,2}$'); + return regex.hasMatch(base64String); + } catch (e) { + return false; + } +} + final _citeAttributeValidator = { 'cite': _validUrl, }; @@ -187,8 +196,8 @@ final _elementAttributeValidators = 'href': _validLink, }, 'IMG': { - 'src': _validUrl, - 'longdesc': _validUrl, + 'src': (url) => _validUrl(url) || validateBase64Image(url), + 'longdesc': (url) => _validUrl(url) || validateBase64Image(url), }, 'DIV': { 'itemscope': _alwaysAllowed, diff --git a/sanitize_html/test/validate_base64_image_test.dart b/sanitize_html/test/validate_base64_image_test.dart new file mode 100644 index 00000000..33d34271 --- /dev/null +++ b/sanitize_html/test/validate_base64_image_test.dart @@ -0,0 +1,46 @@ +import 'package:sanitize_html/src/sane_html_validator.dart'; +import 'package:test/test.dart'; + +void main() { + group('validateBase64Image', () { + test('Valid Base64 PNG image string', () { + String validBase64PNG = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA'; + expect(validateBase64Image(validBase64PNG), isTrue); + }); + + test('Valid Base64 JPEG image string', () { + String validBase64JPEG = 'data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEAAAAAA'; + expect(validateBase64Image(validBase64JPEG), isTrue); + }); + + test('Invalid Base64 image string (missing data:image/)', () { + String invalidBase64 = 'base64,iVBORw0KGgoAAAANSUhEUgAAAAUA'; + expect(validateBase64Image(invalidBase64), isFalse); + }); + + test('Invalid Base64 image string (not base64 encoded)', () { + String invalidBase64 = 'data:image/png;notabase64string'; + expect(validateBase64Image(invalidBase64), isFalse); + }); + + test('Valid Base64 SVG image string', () { + String validBase64SVG = 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDov'; + expect(validateBase64Image(validBase64SVG), isTrue); + }); + + test('Invalid Base64 image string (wrong image type)', () { + String invalidBase64Type = 'data:image/tiff;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA'; + expect(validateBase64Image(invalidBase64Type), isFalse); + }); + + test('Empty string', () { + String emptyString = ''; + expect(validateBase64Image(emptyString), isFalse); + }); + + test('Non-image Base64 string', () { + String nonImageBase64 = 'data:text/plain;base64,dGVzdA=='; // Plain text Base64-encoded + expect(validateBase64Image(nonImageBase64), isFalse); + }); + }); +} From 01c37cde93640add2291655ef36d4949aff9dade Mon Sep 17 00:00:00 2001 From: dab246 Date: Wed, 23 Oct 2024 18:58:22 +0700 Subject: [PATCH 3/4] Add validate CID source for image tag --- .../lib/src/sane_html_validator.dart | 25 ++++++++++++++++--- .../test/validate_cid_image_test.dart | 18 +++++++++++++ 2 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 sanitize_html/test/validate_cid_image_test.dart diff --git a/sanitize_html/lib/src/sane_html_validator.dart b/sanitize_html/lib/src/sane_html_validator.dart index b4554728..a97f3265 100644 --- a/sanitize_html/lib/src/sane_html_validator.dart +++ b/sanitize_html/lib/src/sane_html_validator.dart @@ -14,6 +14,7 @@ import 'package:html/dom.dart'; import 'package:html/parser.dart' as html_parser; +import 'package:meta/meta.dart'; final _allowedElements = { 'H1', @@ -177,7 +178,7 @@ bool _validUrl(String url) { } } -bool validateBase64Image(String base64String) { +bool _validBase64Image(String base64String) { try { final regex = RegExp(r'^data:image\/(png|jpeg|jpg|gif|bmp|svg\+xml);base64,[A-Za-z0-9+/]+={0,2}$'); return regex.hasMatch(base64String); @@ -186,6 +187,24 @@ bool validateBase64Image(String base64String) { } } +bool _validCIDImage(String cidString) { + try { + return cidString.startsWith('cid:'); + } catch (e) { + return false; + } +} + +@visibleForTesting +bool validateBase64Image(String base64String) => _validBase64Image(base64String); + +@visibleForTesting +bool validateCIDImage(String cidString) => _validCIDImage(cidString); + +bool _validImageSource(String url) { + return _validUrl(url) || _validBase64Image(url) || validateCIDImage(url); +} + final _citeAttributeValidator = { 'cite': _validUrl, }; @@ -196,8 +215,8 @@ final _elementAttributeValidators = 'href': _validLink, }, 'IMG': { - 'src': (url) => _validUrl(url) || validateBase64Image(url), - 'longdesc': (url) => _validUrl(url) || validateBase64Image(url), + 'src': _validImageSource, + 'longdesc': _validImageSource, }, 'DIV': { 'itemscope': _alwaysAllowed, diff --git a/sanitize_html/test/validate_cid_image_test.dart b/sanitize_html/test/validate_cid_image_test.dart new file mode 100644 index 00000000..8cc0879a --- /dev/null +++ b/sanitize_html/test/validate_cid_image_test.dart @@ -0,0 +1,18 @@ +import 'package:sanitize_html/src/sane_html_validator.dart'; +import 'package:test/test.dart'; + +void main() { + group('validateCIDImage', () { + test('returns true for valid cid string', () { + expect(validateCIDImage('cid:12345'), true); + }); + + test('returns false for string without cid', () { + expect(validateCIDImage('https://example.com/image.png'), false); + }); + + test('returns false for empty string', () { + expect(validateCIDImage(''), false); + }); + }); +} From fda32cde4d4baadaa988477f498ab6622ee79987 Mon Sep 17 00:00:00 2001 From: dab246 Date: Thu, 24 Oct 2024 10:37:59 +0700 Subject: [PATCH 4/4] Allow `id` & `class` attribute --- sanitize_html/lib/src/sane_html_validator.dart | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sanitize_html/lib/src/sane_html_validator.dart b/sanitize_html/lib/src/sane_html_validator.dart index a97f3265..caae2821 100644 --- a/sanitize_html/lib/src/sane_html_validator.dart +++ b/sanitize_html/lib/src/sane_html_validator.dart @@ -268,9 +268,11 @@ class SaneHtmlValidator { node.attributes.removeWhere((k, v) { final attrName = k.toString(); if (attrName == 'id') { - return allowElementId == null || !allowElementId!(v); + return allowAttributes?.contains('id') != true && + (allowElementId == null || !allowElementId!(v)); } if (attrName == 'class') { + if (allowAttributes?.contains('class') == true) return false; if (allowClassName == null) return true; node.classes.removeWhere((cn) => !allowClassName!(cn)); return node.classes.isEmpty;