Skip to content

Commit a621ff0

Browse files
authored
Merge pull request #1950 from DaveLak/fix-fuzz-submodules-filename-exception
Fix Several Bugs in the `fuzz_submodule` Causing a lot of False Alarms in the OSS-Fuzz Bug Tracker
2 parents 855befa + 2ed3334 commit a621ff0

File tree

5 files changed

+118
-33
lines changed

5 files changed

+118
-33
lines changed

fuzzing/fuzz-targets/fuzz_submodule.py

+16-31
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,16 @@
33
import os
44
import tempfile
55
from configparser import ParsingError
6-
from utils import is_expected_exception_message, get_max_filename_length
7-
8-
if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover
9-
path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git"))
10-
os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary
11-
6+
from utils import (
7+
setup_git_environment,
8+
handle_exception,
9+
get_max_filename_length,
10+
)
11+
12+
# Setup the git environment
13+
setup_git_environment()
1214
from git import Repo, GitCommandError, InvalidGitRepositoryError
1315

14-
if not sys.warnoptions: # pragma: no cover
15-
# The warnings filter below can be overridden by passing the -W option
16-
# to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable.
17-
import warnings
18-
import logging
19-
20-
# Fuzzing data causes some modules to generate a large number of warnings
21-
# which are not usually interesting and make the test output hard to read, so we ignore them.
22-
warnings.simplefilter("ignore")
23-
logging.getLogger().setLevel(logging.ERROR)
24-
2516

2617
def TestOneInput(data):
2718
fdp = atheris.FuzzedDataProvider(data)
@@ -35,12 +26,13 @@ def TestOneInput(data):
3526
sub_repo = Repo.init(submodule_temp_dir, bare=fdp.ConsumeBool())
3627
sub_repo.index.commit(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512)))
3728

38-
submodule_name = f"submodule_{fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))}"
29+
submodule_name = fdp.ConsumeUnicodeNoSurrogates(
30+
fdp.ConsumeIntInRange(1, max(1, get_max_filename_length(repo.working_tree_dir)))
31+
)
3932
submodule_path = os.path.join(repo.working_tree_dir, submodule_name)
40-
submodule_url = sub_repo.git_dir
4133

42-
submodule = repo.create_submodule(submodule_name, submodule_path, url=submodule_url)
43-
repo.index.commit(f"Added submodule {submodule_name}")
34+
submodule = repo.create_submodule(submodule_name, submodule_path, url=sub_repo.git_dir)
35+
repo.index.commit("Added submodule")
4436

4537
with submodule.config_writer() as writer:
4638
key_length = fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes()))
@@ -88,18 +80,11 @@ def TestOneInput(data):
8880
BrokenPipeError,
8981
):
9082
return -1
91-
except ValueError as e:
92-
expected_messages = [
93-
"SHA is empty",
94-
"Reference at",
95-
"embedded null byte",
96-
"This submodule instance does not exist anymore",
97-
"cmd stdin was empty",
98-
]
99-
if is_expected_exception_message(e, expected_messages):
83+
except Exception as e:
84+
if isinstance(e, ValueError) and "embedded null byte" in str(e):
10085
return -1
10186
else:
102-
raise e
87+
return handle_exception(e)
10388

10489

10590
def main():

fuzzing/fuzz-targets/utils.py

+86-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import atheris # pragma: no cover
22
import os # pragma: no cover
3-
from typing import List # pragma: no cover
3+
import re # pragma: no cover
4+
import traceback # pragma: no cover
5+
import sys # pragma: no cover
6+
from typing import Set, Tuple, List # pragma: no cover
47

58

69
@atheris.instrument_func
@@ -35,3 +38,85 @@ def get_max_filename_length(path: str) -> int: # pragma: no cover
3538
int: The maximum filename length.
3639
"""
3740
return os.pathconf(path, "PC_NAME_MAX")
41+
42+
43+
@atheris.instrument_func
44+
def read_lines_from_file(file_path: str) -> list:
45+
"""Read lines from a file and return them as a list."""
46+
try:
47+
with open(file_path, "r") as f:
48+
return [line.strip() for line in f if line.strip()]
49+
except FileNotFoundError:
50+
print(f"File not found: {file_path}")
51+
return []
52+
except IOError as e:
53+
print(f"Error reading file {file_path}: {e}")
54+
return []
55+
56+
57+
@atheris.instrument_func
58+
def load_exception_list(file_path: str = "explicit-exceptions-list.txt") -> Set[Tuple[str, str]]:
59+
"""Load and parse the exception list from a default or specified file."""
60+
try:
61+
bundle_dir = os.path.dirname(os.path.abspath(__file__))
62+
full_path = os.path.join(bundle_dir, file_path)
63+
lines = read_lines_from_file(full_path)
64+
exception_list: Set[Tuple[str, str]] = set()
65+
for line in lines:
66+
match = re.match(r"(.+):(\d+):", line)
67+
if match:
68+
file_path: str = match.group(1).strip()
69+
line_number: str = str(match.group(2).strip())
70+
exception_list.add((file_path, line_number))
71+
return exception_list
72+
except Exception as e:
73+
print(f"Error loading exception list: {e}")
74+
return set()
75+
76+
77+
@atheris.instrument_func
78+
def match_exception_with_traceback(exception_list: Set[Tuple[str, str]], exc_traceback) -> bool:
79+
"""Match exception traceback with the entries in the exception list."""
80+
for filename, lineno, _, _ in traceback.extract_tb(exc_traceback):
81+
for file_pattern, line_pattern in exception_list:
82+
# Ensure filename and line_number are strings for regex matching
83+
if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)):
84+
return True
85+
return False
86+
87+
88+
@atheris.instrument_func
89+
def check_exception_against_list(exc_traceback, exception_file: str = "explicit-exceptions-list.txt") -> bool:
90+
"""Check if the exception traceback matches any entry in the exception list."""
91+
exception_list = load_exception_list(exception_file)
92+
return match_exception_with_traceback(exception_list, exc_traceback)
93+
94+
95+
@atheris.instrument_func
96+
def handle_exception(e: Exception) -> int:
97+
"""Encapsulate exception handling logic for reusability."""
98+
exc_traceback = e.__traceback__
99+
if check_exception_against_list(exc_traceback):
100+
return -1
101+
else:
102+
raise e
103+
104+
105+
@atheris.instrument_func
106+
def setup_git_environment() -> None:
107+
"""Set up the environment variables for Git."""
108+
bundle_dir = os.path.dirname(os.path.abspath(__file__))
109+
if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover
110+
bundled_git_binary_path = os.path.join(bundle_dir, "git")
111+
os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path
112+
113+
if not sys.warnoptions: # pragma: no cover
114+
# The warnings filter below can be overridden by passing the -W option
115+
# to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable.
116+
import warnings
117+
import logging
118+
119+
# Fuzzing data causes some modules to generate a large number of warnings
120+
# which are not usually interesting and make the test output hard to read, so we ignore them.
121+
warnings.simplefilter("ignore")
122+
logging.getLogger().setLevel(logging.ERROR)

fuzzing/oss-fuzz-scripts/build.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ find "$SRC" -maxdepth 1 \
1515

1616
# Build fuzzers in $OUT.
1717
find "$SRC/gitpython/fuzzing" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do
18-
compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):."
18+
compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." --add-data="$SRC/explicit-exceptions-list.txt:."
1919
done

fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh

+11
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,17 @@ create_seed_corpora_zips "$WORK/qa-assets/gitpython/corpora"
9191

9292
prepare_dictionaries_for_fuzz_targets "$WORK/qa-assets/gitpython/dictionaries" "$SRC/gitpython/fuzzing"
9393

94+
pushd "$SRC/gitpython/"
95+
# Search for 'raise' and 'assert' statements in Python files within GitPython's source code and submodules, saving the
96+
# matched file path, line number, and line content to a file named 'explicit-exceptions-list.txt'.
97+
# This file can then be used by fuzz harnesses to check exception tracebacks and filter out explicitly raised or otherwise
98+
# anticipated exceptions to reduce false positive test failures.
99+
100+
git grep -n --recurse-submodules -e '\braise\b' -e '\bassert\b' -- '*.py' -- ':!setup.py' -- ':!test/**' -- ':!fuzzing/**' > "$SRC/explicit-exceptions-list.txt"
101+
102+
popd
103+
104+
94105
# The OSS-Fuzz base image has outdated dependencies by default so we upgrade them below.
95106
python3 -m pip install --upgrade pip
96107
# Upgrade to the latest versions known to work at the time the below changes were introduced:

pyproject.toml

+4
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ lint.unfixable = [
7878
"test/**" = [
7979
"B018", # useless-expression
8080
]
81+
"fuzzing/fuzz-targets/**" = [
82+
"E402", # environment setup must happen before the `git` module is imported, thus cannot happen at top of file
83+
]
84+
8185

8286
[tool.codespell]
8387
ignore-words-list="gud,doesnt"

0 commit comments

Comments
 (0)