Skip to content

Commit

Permalink
annotate.py: Change file_purpose to file_data in line callback
Browse files Browse the repository at this point in the history
The LineCallback type now uses the `dict[str, str]` and not `str` as the
type of its first parameter; the line callback is passed file_data, not
just file_purpose as its first argument (positional argument).

This change allows to modify code of hapybug_line_callback_func.py
to match what the code to create initial automatic annotations for
the annotation tasks in the HaPy-Bug dataset does.

Renames `file_purpose` to `file_data`, and use `file_data["purpose"]`
in place where there was `file_purpose` previously.

Test passes, manual checking with the help of test and debug-print
statements suggests that it should work as intended.
  • Loading branch information
jnareb committed Nov 29, 2024
1 parent bb64b3e commit c8115f0
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 16 deletions.
17 changes: 7 additions & 10 deletions data/experiments/HaPy-Bug/hapybug_line_callback_func.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,21 @@
def line_callback(file_purpose, tokens):
def line_callback(file_data, tokens):
# NOTE: function definition *must* currently be first line of the file

# based on the code used to generate initial annotations for HaPy-Bug
# https://github.com/ncusi/python_cve_dataset/blob/main/annotation/annotate.py#L80

#print(f"RUNNING line_callback({file_purpose!r}, ...) -> {''.join([t[2] for t in tokens]).rstrip()}")
line_type = file_purpose
#print(f"RUNNING line_callback({file_data!r}, ...) -> {''.join([t[2] for t in tokens]).rstrip()}")
line_type = file_data['purpose']

# the original code uses file _type_ here (the "type" field in 'languages.yml');
# file purpose is here often the same as file type, but not always (!)
# see Languages._path2purpose(path: str, filetype: str) -> str
if file_purpose != "programming":
if file_purpose not in ["documentation", "test"]:
if file_data['type'] != "programming":
if file_data['purpose'] not in ["documentation", "test"]:
line_type = "bug(fix)"
else:
# For programming languages
if line_is_comment(tokens):
line_type = "documentation"
#print(f" line is comment, {file_purpose=}, {line_type=}")
elif file_purpose == "test":
#print(f" line is comment, {file_data=}, {line_type=}")
elif file_data['purpose'] == "test":
line_type = "test"
else:
line_type = "bug(fix)"
Expand Down
14 changes: 10 additions & 4 deletions src/diffannotator/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def annotate(path: str) -> dict:

T = TypeVar('T')
PathLike = TypeVar("PathLike", str, bytes, Path, os.PathLike)
LineCallback = Callable[[str, Iterable[tuple]], str]
LineCallback = Callable[[dict[str, str], Iterable[tuple]], str]
OptionalLineCallback = Optional[LineCallback]

PURPOSE_TO_ANNOTATION = {"documentation": "documentation"}
Expand Down Expand Up @@ -582,9 +582,12 @@ def make_line_callback(code_str: str) -> OptionalLineCallback:
else:
# or .info(), if it were not provided full text of the callback body
logger.debug("Using provided code string as body of callback function", code_str)
#print(f" Using provided code string as body (first 50 characters):")
#print(f" {code_str[:50]}")
#print(f" {match=}")

callback_name = "_line_callback"
callback_code_str = (f"def {callback_name}(file_purpose, tokens):\n" +
callback_code_str = (f"def {callback_name}(file_data, tokens):\n" +
" " + "\n ".join(code_str.splitlines()) + "\n")
# TODO?: wrap with try: ... except SyntaxError: ...
exec(callback_code_str, globals())
Expand Down Expand Up @@ -1316,9 +1319,12 @@ def process(self):
line_annotation: Optional[str] = None
if AnnotatedPatchedFile.line_callback is not None:
try:
line_annotation = AnnotatedPatchedFile.line_callback(file_purpose, line_tokens)
except:
file_data = self.patched_file.patch_data[file_path]
#print(f"CALLING line_callback({file_data=}, {len(line_tokens)=})")
line_annotation = AnnotatedPatchedFile.line_callback(file_data, line_tokens)
except Exception as ex:
# TODO: log problems with line callback
#print(f"EXCEPTION {ex}")
pass
if line_annotation is None:
line_annotation = 'documentation' \
Expand Down
2 changes: 1 addition & 1 deletion tests/test_code_fragments/example_line_callback.py.body
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# def _line_callback(file_purpose, tokens):
# def _line_callback(file_data, tokens):
# this could be written using ternary conditional operator
if len(tokens) == 1 and tokens[0][2] == '\\n':
return 'empty'
Expand Down
2 changes: 1 addition & 1 deletion tests/test_code_fragments/example_line_callback_func.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
def detect_all_whitespace_line(_file_purpose, tokens):
def detect_all_whitespace_line(_file_data, tokens):
if len(tokens) == 1 and tokens[0][2] == "\n":
return "empty"
elif all([token_type in Token.Text.Whitespace or
Expand Down

0 comments on commit c8115f0

Please sign in to comment.