Skip to content

Commit

Permalink
annotate.py: Add fan_out parameter to Bug.from_dataset()
Browse files Browse the repository at this point in the history
Like the previous commit, this feature would be useful in implementing
support for the '--use-fanout' parameter, but this time for a different
subcommand, namely 'diff-annotate dataset'.

The file used for the new test is a test file from a different test,
just in different directory, and with different pathname and name.
  • Loading branch information
jnareb committed Sep 13, 2024
1 parent c22f875 commit 6df523a
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 4 deletions.
51 changes: 47 additions & 4 deletions src/diffannotator/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,8 @@ def __init__(self, patches_data: dict, *,
@classmethod
def from_dataset(cls, dataset_dir: PathLike, bug_id: str, *,
patches_dir: str = DEFAULT_PATCHES_DIR,
annotations_dir: str = DEFAULT_ANNOTATIONS_DIR) -> 'Bug':
annotations_dir: str = DEFAULT_ANNOTATIONS_DIR,
fan_out: bool = False) -> 'Bug':
"""Create Bug object from patch files for given bug in given dataset
Assumes that patch files have '*.diff' extension, and that they are
Expand All @@ -625,6 +626,9 @@ def from_dataset(cls, dataset_dir: PathLike, bug_id: str, *,
in case the `save()` method is invoked without providing `annotate_path`
parameter, the data is saved in dataset_dir / bug_id / annotations_dir
subdirectory; use empty string ("") to not use subdirectory
:param fan_out: the dataset uses stores patches in fan-out subdirectories,
like the ones generated by 'diff-generate --use-fanout', that is patches
are assumed to be in dataset_dir / bug_id / patches_dir / fanout_subdir
:return: Bug object instance
"""
read_dir = Path(dataset_dir).joinpath(bug_id, patches_dir)
Expand All @@ -639,7 +643,10 @@ def from_dataset(cls, dataset_dir: PathLike, bug_id: str, *,
print(f"Error during Bug constructor: '{read_dir}' is not a directory")

obj = Bug({}, read_dir=read_dir, save_dir=save_dir)
obj.patches = obj._get_patches_from_dir(patches_dir=read_dir)
if fan_out:
obj.patches = obj._get_patches_from_dir_with_fanout(patches_dir=read_dir)
else:
obj.patches = obj._get_patches_from_dir(patches_dir=read_dir)
obj.relative_save_dir = Path(bug_id).joinpath(annotations_dir) # for .save()

return obj
Expand Down Expand Up @@ -689,21 +696,57 @@ def _get_patch(self, patch_file: PathLike) -> dict:

return annotate_single_diff(patch_path)

def _get_patches_from_dir(self, patches_dir: PathLike) -> dict[str, dict]:
def _get_patches_from_dir(self, patches_dir: PathLike,
fan_out: bool = False) -> dict[str, dict]:
"""Get and annotate set of patches from given directory
:param patches_dir: directory with patches
:param fan_out: the dataset uses stores patches in fan-out subdirectories,
like the ones generated by 'diff-generate --use-fanout', that is patches
are assumed to be in dataset_dir / bug_id / patches_dir / fanout_subdir
:return: mapping from patch filename (patch source)
to annotated patch data
"""
patches_data = {}

for patch_file in patches_dir.glob('*.diff'):
patch_data = self._get_patch(patch_file.name)
if fan_out:
patch_data = self._get_patch('/'.join(patch_file.parts[-2:]))
else:
patch_data = self._get_patch(patch_file.name)
patches_data[patch_file.name] = patch_data

return patches_data

def _get_patches_from_dir_with_fanout(self, patches_dir: PathLike) -> dict[str, dict]:
"""Get and annotate set of patches from given directory, with fan-out
Fan-out means that individual patches (diffs), instead of being
stored directly in the `patches_dir` directory, are instead
stored in subdirectories of said directory, 1 level deeper.
:param patches_dir: directory with patches
:return: mapping from patch filename (patch source),
relative to `patches_dir` (as string), to annotated patch data
"""
patches_data = {}

# DEBUG
#print(f"getting patches from patches_dir={patches_dir} with fanout")
for subdir in patches_dir.iterdir():
# DEBUG
#print(f"- in {subdir.name} subdirectory: {subdir}")
if subdir.is_dir():
subdir_data = self._get_patches_from_dir(subdir, fan_out=True)
# DEBUG
#print(f" got subdir_data with {len(subdir_data)} element(s)")
patches_data.update(
{ f"{subdir.name}/{filename}": data
for filename, data in subdir_data.items() }
)

return patches_data

def save(self, annotate_dir: Optional[PathLike] = None, fan_out: bool = False):
"""Save annotated patches in JSON format
Expand Down
16 changes: 16 additions & 0 deletions tests/test_annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,22 @@ def test_Bug_from_dataset():
"there is expected changed file in a bug patch"


def test_Bug_from_dataset_with_fanout():
# code patch
file_path = 'tests/test_dataset_fanout/tqdm-1/c0/dcf39b046d1b4ff6de14ac99ad9a1b10487512.diff'

commit_id = '/'.join(Path(file_path).parts[-2:])
bug = Bug.from_dataset('tests/test_dataset_fanout', 'tqdm-1',
patches_dir="", annotations_dir="", fan_out=True)

assert commit_id in bug.patches, \
"retrieved annotations for the single *.diff file"
assert len(bug.patches) == 1, \
"there was only 1 patch file for a bug"
assert "tqdm/contrib/__init__.py" in bug.patches[commit_id], \
"there is expected changed file in a bug patch"


def test_Bug_from_patchset():
file_path = 'tests/test_dataset/tqdm-1/c0dcf39b046d1b4ff6de14ac99ad9a1b10487512.diff'
patch = unidiff.PatchSet.from_filename(file_path, encoding='utf-8')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
diff --git a/tqdm/contrib/__init__.py b/tqdm/contrib/__init__.py
index 1dddacf..935ab63 100644
--- a/tqdm/contrib/__init__.py
+++ b/tqdm/contrib/__init__.py
@@ -38,7 +38,7 @@ def tenumerate(iterable, start=0, total=None, tqdm_class=tqdm_auto,
if isinstance(iterable, np.ndarray):
return tqdm_class(np.ndenumerate(iterable),
total=total or len(iterable), **tqdm_kwargs)
- return enumerate(tqdm_class(iterable, start, **tqdm_kwargs))
+ return enumerate(tqdm_class(iterable, **tqdm_kwargs), start)


def _tzip(iter1, *iter2plus, **tqdm_kwargs):

0 comments on commit 6df523a

Please sign in to comment.