Skip to content

Commit a4bed6d

Browse files
authored
escape more characters in fileURLWithFileSystemPath (#81)
* escape more characters in fileURLWithFileSystemPath * use a lookup table
1 parent e6cb36c commit a4bed6d

File tree

1 file changed

+16
-2
lines changed

1 file changed

+16
-2
lines changed

Source/WTF/wtf/URL.cpp

+16-2
Original file line numberDiff line numberDiff line change
@@ -814,10 +814,24 @@ void URL::setQuery(StringView newQuery)
814814
maybeTrimTrailingSpacesFromOpaquePath();
815815
}
816816

817+
// To match Node.js pathToFileURL, the following chars are escaped: \0, \t, \n, \r, " # % ? [ ] ^ | ~
818+
// https://github.com/nodejs/node/blob/532fff6b27be6b0d833d06b4a9fe46d6fb7f0f6c/src/node_url.cc#L82-L121
819+
// RFC1738 defines the following chars as "unsafe" for URLs
820+
// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues
821+
static constexpr uint64_t escapeTable[] = {
822+
// 0-63: Only specific control chars (\0, \t, \n, \r), space, ", #, %, ?
823+
(1ULL << 0) | (1ULL << '\t') | (1ULL << '\n') | (1ULL << '\r') |
824+
(1ULL << ' ') | (1ULL << '"') | (1ULL << '#') | (1ULL << '%') | (1ULL << '?'),
825+
826+
// 64-127: [, \, ], ^, |, ~
827+
(1ULL << ('[' - 64)) | (1ULL << ('\\' - 64)) | (1ULL << (']' - 64)) |
828+
(1ULL << ('^' - 64)) | (1ULL << ('|' - 64)) | (1ULL << ('~' - 64))
829+
};
830+
817831
static String escapePathWithoutCopying(StringView path)
818832
{
819-
auto questionMarkOrNumberSignOrNonASCII = [] (UChar character) {
820-
return character == '?' || character == '#' || !isASCII(character);
833+
auto questionMarkOrNumberSignOrNonASCII = [](UChar character) {
834+
return character >= 128 || ((escapeTable[character >> 6] >> (character & 63)) & 1);
821835
};
822836
return percentEncodeCharacters(path, questionMarkOrNumberSignOrNonASCII);
823837
}

0 commit comments

Comments
 (0)