test_annotate.py: Extract count_pm_lines() and move it to conftest.py

Extracting this function reduces code duplication (one of which was slightly wrong in a way that didn't make the test it was in to fail: total_p and total_m were switched around in that copy of the code). Moving it conftest.py means that it would be able to be used by other tests, like new test in test_cli.py that will be added in next commit.
ncusi · Oct 28, 2024 · bf39557 · bf39557
1 parent 9ca979a
commit bf39557
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 18 deletions.
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -19,6 +19,10 @@
 default_branch = 'main'
 
 
+## ----------------------------------------------------------------------
+## fixtures
+
+
 @pytest.fixture(scope="module")  # like unittest.setUpClass()
 def example_repo(tmp_path_factory: pytest.TempPathFactory) -> GitRepo:
     """Prepare Git repository for testing `utils.git` module
@@ -78,3 +82,25 @@ def example_repo(tmp_path_factory: pytest.TempPathFactory) -> GitRepo:
     subprocess.run(['git', '-C', repo_path, 'tag', 'v2'])
 
     return GitRepo(repo_path)
+
+
+## ----------------------------------------------------------------------
+## helper functions
+
+
+def count_pm_lines(changes_data: dict) -> tuple[int, int]:
+    """Count number of '-' and '+' lines in changes part of annotation data
+
+    :param changes_data: information about changes extracted from annotation data;
+        in the v2 data format this data is available at the 'changes' key
+    :return: (total number of '-' lines, total number of '+' lines)
+    """
+    total_p = total_m = 0
+    for file_name, file_data in changes_data.items():  # we are not interested in file names here
+        for data_key, data_value in file_data.items():
+            if data_key == '-':
+                total_m += len(data_value)
+            elif data_key == '+':
+                total_p += len(data_value)
+
+    return total_m, total_p
diff --git a/tests/test_annotate.py b/tests/test_annotate.py
@@ -15,6 +15,7 @@
                                     clean_text, line_is_comment, annotate_single_diff,
                                     Bug, BugDataset, AnnotatedPatchedFile, AnnotatedHunk, AnnotatedPatchSet)
 from diffannotator.utils.git import GitRepo, DiffSide, ChangeSet
+from .conftest import count_pm_lines
 
 # Example code to be tokenized
 example_C_code = r'''
@@ -391,17 +392,7 @@ def test_misc_patchsets_sizes_and_spreads():
     assert len(changes_data) == diff_metadata['n_files'] + diff_metadata['n_file_renames'], \
         f"number of files matches between 'changes' and 'diff_metadata' for {file_path}"
 
-    # TODO: extract this common-ish code
-    total_m = total_p = 0
-    for file_name, file_data in changes_data.items():
-        for data_key, data_value in file_data.items():
-            if data_key == '-':
-                total_m += len(data_value)
-            elif data_key == '+':
-                total_p += len(data_value)
-
-        ## DEBUG
-        #print(f"{file_name!r}: {total_m=}, {total_p=}")
+    total_m, total_p = count_pm_lines(changes_data)
 
     ## DEBUG
     #print(f"TOTAL: {total_m=}, {total_p=}, {total_p+total_m=}")
@@ -814,13 +805,7 @@ def test_BugDataset_from_repo(tmp_path: Path):
         assert len(bug_patches['changes']) == diff_metadata['n_files'] + diff_metadata['n_file_renames'], \
             f"number of files matches between 'changes' and 'diff_metadata' for patchset № {i}"
 
-        total_p = total_m = 0
-        for file_data in bug_patches['changes'].values():
-            for data_key, data_value in file_data.items():
-                if data_key == '-':
-                    total_p += len(data_value)
-                elif data_key == '+':
-                    total_m += len(data_value)
+        total_m, total_p = count_pm_lines(bug_patches['changes'])
 
         ## DEBUG
         #print(f"{i}: {annotated_patch_data.patches.keys()}")