Skip to content

Commit

Permalink
annotate.py: Add and test HaPy-Bug auto annotations as line callback
Browse files Browse the repository at this point in the history
This commit takes the code that was used for creating initial automatic
annotations in the HaPy-Bug dataset, and tries to translate it into
current shape of the line callback function, putting this code into
data/experiments/HaPy-Bug/hapybug_line_callback_func.py file.

The added tests/test_cli.py::test_annotate_patch_with_line_callback_hapybug
test checks that it works as intended.  Experiments with different
patches have shown however that the current implementation doesn't fully
do what the original HaPy-Bug code did.  For that, we will need to
change the signature of line callback function, again...

Includes lots of commented-out debug-print lines.
  • Loading branch information
jnareb committed Nov 29, 2024
1 parent 609b2e7 commit bb64b3e
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 0 deletions.
27 changes: 27 additions & 0 deletions data/experiments/HaPy-Bug/hapybug_line_callback_func.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
def line_callback(file_purpose, tokens):
# NOTE: function definition *must* currently be first line of the file

# based on the code used to generate initial annotations for HaPy-Bug
# https://github.com/ncusi/python_cve_dataset/blob/main/annotation/annotate.py#L80

#print(f"RUNNING line_callback({file_purpose!r}, ...) -> {''.join([t[2] for t in tokens]).rstrip()}")
line_type = file_purpose

# the original code uses file _type_ here (the "type" field in 'languages.yml');
# file purpose is here often the same as file type, but not always (!)
# see Languages._path2purpose(path: str, filetype: str) -> str
if file_purpose != "programming":
if file_purpose not in ["documentation", "test"]:
line_type = "bug(fix)"
else:
# For programming languages
if line_is_comment(tokens):
line_type = "documentation"
#print(f" line is comment, {file_purpose=}, {line_type=}")
elif file_purpose == "test":
line_type = "test"
else:
line_type = "bug(fix)"

#print(f" returning {line_type=}")
return line_type
7 changes: 7 additions & 0 deletions src/diffannotator/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,7 @@ def make_line_callback(code_str: str) -> OptionalLineCallback:
:param code_str: text of the function body code
:return: callback function or None
"""
#print(f"RUNNING make_line_callback(code_str='{code_str[:6]}[...]')")
if not code_str:
return None

Expand All @@ -573,6 +574,8 @@ def make_line_callback(code_str: str) -> OptionalLineCallback:
if match:
# or .info(), if it were not provided extra debugging data
logger.debug("Found function definition in callback code string:", match.groupdict())
#print(f" Found function definition in callback code string:")
#print(f" {match.groupdict()}")

callback_name = match.group('func_name')
callback_code_str = code_str
Expand Down Expand Up @@ -1973,13 +1976,15 @@ def filename_to_language_callback(ctx: typer.Context, param: typer.CallbackParam


def parse_line_callback(code_str: Optional[str]) -> Optional[LineCallback]:
#print(f"RUNNING parse_line_callback({code_str=})")
if code_str is None:
return None

# code_str might be the name of the file with the code
maybe_path: Optional[Path] = Path(code_str)
try:
if maybe_path.is_file():
#print(f" reading code from {maybe_path!r} file")
code_str = maybe_path.read_text(encoding='utf-8')
else:
maybe_path = None
Expand All @@ -1989,6 +1994,8 @@ def parse_line_callback(code_str: Optional[str]) -> Optional[LineCallback]:

# code_str now contains the code as a string
# maybe_path is not None only if code_str was retrieved from file
#print(f" {maybe_path=}")
#print(code_str)

# sanity check
if 'return ' not in code_str:
Expand Down
36 changes: 36 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,42 @@ def test_annotate_patch_with_line_callback(tmp_path: Path):
"app runs 'patch' subcommand with line callback from file defining function without errors"


def test_annotate_patch_with_line_callback_hapybug(tmp_path: Path, caplog: pytest.LogCaptureFixture):
file_path = Path('tests/test_dataset/tqdm-1/c0dcf39b046d1b4ff6de14ac99ad9a1b10487512.diff')
#file_path = Path('tests/test_dataset_structured/scrapy-11/patches/9de6f1ca757b7f200d15e94840c9d431cf202276.diff')
#file_path = Path('tests/test_dataset_structured/keras-10/patches/c1c4afe60b1355a6c0e83577791a0423f37a3324.diff')
save_path = tmp_path.joinpath(file_path).with_suffix('.v2.json')

# callback as file, full definition of function
callback_path = Path('data/experiments/HaPy-Bug/hapybug_line_callback_func.py')
result = runner.invoke(annotate_app, [
f"--line-callback", f"{callback_path}", # file with line callback
"patch", f"{file_path}", f"{save_path}"
])

if result.exit_code != 0:
print(f"Exit code: {result.exit_code}")
print(result.stdout)
if caplog.text:
print("Captured logs:")
print(caplog.text)
if result.exception:
print(f"Exception: {result.exception}")
print("Traceback:")
# or `result.exc_info[2]` instead of `result.exception.__traceback__`
traceback.print_tb(result.exception.__traceback__)

assert result.exit_code == 0, \
"app runs 'patch' subcommand with line callback from file defining function without errors"

annotation_data: dict = json.loads(save_path.read_text())
#from rich.pretty import pprint
#print(result.stdout)
#pprint(annotation_data)
assert annotation_data['changes']['tqdm/contrib/__init__.py']['+'][0]['type'] == 'bug(fix)', \
"the callback was run, and it did provide 'bug(fix)' as line type for code changes"


def test_annotate_patch_with_purpose_to_annotation(tmp_path: Path):
file_path = Path('tests/test_dataset/tqdm-1/c0dcf39b046d1b4ff6de14ac99ad9a1b10487512.diff')
save_path = tmp_path.joinpath(file_path).with_suffix('.json')
Expand Down

0 comments on commit bb64b3e

Please sign in to comment.