Skip to content

Commit

Permalink
disable utf8 regex check for arbitrary bytes (#136)
Browse files Browse the repository at this point in the history
  • Loading branch information
nitely committed Jan 6, 2024
1 parent 4ecbeb5 commit c0ea531
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/regex.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1466,6 +1466,7 @@ when isMainModule:
block:
let flags = {regexArbitraryBytes}
doAssert match("\xff", re2(r"\xff", flags))
doAssert match("\xff", re2("\xff", flags))
doAssert replace("\xff", re2(r"\xff", flags), "abc") == "abc"
doAssert match("\xff\xff", re2(r"\xff\xff", flags))
doAssert replace("\xff\xff", re2(r"\xff\xff", flags), "abc") == "abc"
Expand Down
2 changes: 1 addition & 1 deletion src/regex/compiler.nim
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ when defined(regexDotDir):
import ./dotgraph

func reImpl*(s: string, flags: RegexFlags = {}): Regex {.inline.} =
if verifyUtf8(s) != -1:
if regexArbitraryBytes notin flags and verifyUtf8(s) != -1:
raise newException(RegexError, "Invalid utf-8 regex")
var groups: GroupsCapture
let rpn = s
Expand Down
8 changes: 8 additions & 0 deletions tests/tests2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -3073,11 +3073,19 @@ when not defined(js) or NimMajor >= 2:
check match("a", re2(r"a", flags))
check(not match("b", re2(r"a", flags)))
check match("\xff", re2(r"\xff", flags))
check match("\xff", re2("\xff", flags))
check match("\xf8\xa1\xa1\xa1\xa1", re2(r"\xf8\xa1\xa1\xa1\xa1", flags))
check match("\xf8\xa1\xa1\xa1\xa1", re2("\xf8\xa1\xa1\xa1\xa1", flags))
check replace("\xff", re2(r"\xff", flags), "abc") == "abc"
check replace("\xff", re2("\xff", flags), "abc") == "abc"
check match("\xff\xff", re2(r"\xff\xff", flags))
check match("\xff\xff", re2("\xff\xff", flags))
check replace("\xff\xff", re2(r"\xff\xff", flags), "abc") == "abc"
check replace("\xff\xff", re2("\xff\xff", flags), "abc") == "abc"
check match("\xff\xff", re2(r"\xff+", flags))
check match("\xff\xff", re2("\xff+", flags))
check replace("\xff\xff", re2(r"\xff", flags), "abc") == "abcabc"
check replace("\xff\xff", re2("\xff", flags), "abc") == "abcabc"
check(not match("\xf0", re2(r"\xff", flags)))
check replace("\xf0", re2(r"\xff", flags), "abc") == "\xf0"
check match("弢", re2(r"弢", flags))
Expand Down

0 comments on commit c0ea531

Please sign in to comment.