diff --git a/bears/general/URLBear.py b/bears/general/URLBear.py index 4f98fe8238..13f5e1fde3 100644 --- a/bears/general/URLBear.py +++ b/bears/general/URLBear.py @@ -108,12 +108,16 @@ def extract_links_from_file(file, link_ignore_regex, link_ignore_list): [^.:%\s_/?#[\]@\\]+ # Initial part of domain \. # A required dot `.` ( - (?:[^\s()%\'"`<>|\\\[\]]+) # Path name - # This part does not allow - # any parenthesis: balanced or - # unbalanced. - | # OR - \([^\s()%\'"`<>|\\\[\]]*\) # Path name contained within () + ((?:%[A-Fa-f0-9][A-Fa-f0-9])*[^\s()%\'"`<>|\\\[\]]+) + # Path name + # This part allows precentage + # encoding like %3F + # and does not allow + # any parenthesis: balanced or + # unbalanced. + | # OR + \((?:%[A-Fa-f0-9][A-Fa-f0-9])*[^\s()%\'"`<>|\\\[\]]*\) + # Path name contained within () # This part allows path names that # are explicitly enclosed within one # set of parenthesis. diff --git a/tests/general/URLBearTest.py b/tests/general/URLBearTest.py index 45eebbcf34..f5a2ff1076 100644 --- a/tests/general/URLBearTest.py +++ b/tests/general/URLBearTest.py @@ -60,6 +60,23 @@ def test_detect_url_result(self): [3, 'http://www.google.com/404', 404, LINK_CONTEXT.no_context]) + def test_precentage_encoded_url(self): + valid_file = """ + # A url with a precentage-encoded character in path + https://img.shields.io/badge/Maintained%3F-yes-green.svg/200 + """.splitlines() + + with requests_mock.Mocker() as m: + m.add_matcher(custom_matcher) + + result = get_results(self.uut, valid_file) + self.assertEqual(result[0].contents, + [3, + ('https://img.shields.io/badge/Maintained%3F-' + 'yes-green.svg/200'), + 200, + LINK_CONTEXT.no_context]) + class URLResultTest(unittest.TestCase):