Skip to content

Commit

Permalink
test_annotate.py: Extract count_pm_lines() and move it to conftest.py
Browse files Browse the repository at this point in the history
Extracting this function reduces code duplication (one of which was
slightly wrong in a way that didn't make the test it was in to fail:
total_p and total_m were switched around in that copy of the code).

Moving it conftest.py means that it would be able to be used by other
tests, like new test in test_cli.py that will be added in next commit.
  • Loading branch information
jnareb committed Oct 28, 2024
1 parent 9ca979a commit bf39557
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 18 deletions.
26 changes: 26 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
default_branch = 'main'


## ----------------------------------------------------------------------
## fixtures


@pytest.fixture(scope="module") # like unittest.setUpClass()
def example_repo(tmp_path_factory: pytest.TempPathFactory) -> GitRepo:
"""Prepare Git repository for testing `utils.git` module
Expand Down Expand Up @@ -78,3 +82,25 @@ def example_repo(tmp_path_factory: pytest.TempPathFactory) -> GitRepo:
subprocess.run(['git', '-C', repo_path, 'tag', 'v2'])

return GitRepo(repo_path)


## ----------------------------------------------------------------------
## helper functions


def count_pm_lines(changes_data: dict) -> tuple[int, int]:
"""Count number of '-' and '+' lines in changes part of annotation data
:param changes_data: information about changes extracted from annotation data;
in the v2 data format this data is available at the 'changes' key
:return: (total number of '-' lines, total number of '+' lines)
"""
total_p = total_m = 0
for file_name, file_data in changes_data.items(): # we are not interested in file names here
for data_key, data_value in file_data.items():
if data_key == '-':
total_m += len(data_value)
elif data_key == '+':
total_p += len(data_value)

return total_m, total_p
21 changes: 3 additions & 18 deletions tests/test_annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
clean_text, line_is_comment, annotate_single_diff,
Bug, BugDataset, AnnotatedPatchedFile, AnnotatedHunk, AnnotatedPatchSet)
from diffannotator.utils.git import GitRepo, DiffSide, ChangeSet
from .conftest import count_pm_lines

# Example code to be tokenized
example_C_code = r'''
Expand Down Expand Up @@ -391,17 +392,7 @@ def test_misc_patchsets_sizes_and_spreads():
assert len(changes_data) == diff_metadata['n_files'] + diff_metadata['n_file_renames'], \
f"number of files matches between 'changes' and 'diff_metadata' for {file_path}"

# TODO: extract this common-ish code
total_m = total_p = 0
for file_name, file_data in changes_data.items():
for data_key, data_value in file_data.items():
if data_key == '-':
total_m += len(data_value)
elif data_key == '+':
total_p += len(data_value)

## DEBUG
#print(f"{file_name!r}: {total_m=}, {total_p=}")
total_m, total_p = count_pm_lines(changes_data)

## DEBUG
#print(f"TOTAL: {total_m=}, {total_p=}, {total_p+total_m=}")
Expand Down Expand Up @@ -814,13 +805,7 @@ def test_BugDataset_from_repo(tmp_path: Path):
assert len(bug_patches['changes']) == diff_metadata['n_files'] + diff_metadata['n_file_renames'], \
f"number of files matches between 'changes' and 'diff_metadata' for patchset № {i}"

total_p = total_m = 0
for file_data in bug_patches['changes'].values():
for data_key, data_value in file_data.items():
if data_key == '-':
total_p += len(data_value)
elif data_key == '+':
total_m += len(data_value)
total_m, total_p = count_pm_lines(bug_patches['changes'])

## DEBUG
#print(f"{i}: {annotated_patch_data.patches.keys()}")
Expand Down

0 comments on commit bf39557

Please sign in to comment.