-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtest_utils_git.py
366 lines (293 loc) · 14.2 KB
/
test_utils_git.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
# -*- coding: utf-8 -*-
"""Test cases for 'src/diffannotator/utils/git.py' module"""
import pytest
from unidiff import PatchSet
from diffannotator.utils.git import decode_c_quoted_str, GitRepo, DiffSide, AuthorStat, parse_shortlog_count, ChangeSet
from tests.conftest import default_branch, example_repo
def test_decode_c_quoted_str():
"""Test decode_c_quoted_str() function"""
assert r'simple text' == decode_c_quoted_str(r'simple text'), \
'non-encoded text passthrough'
assert r'some text\with slash and "quote"' == \
decode_c_quoted_str(r'"some text\\with slash and \"quote\""'), \
'c-quoted quotation marks and backslashes'
# NOTE: the test below does not use raw string for expected value,
# but uses ecape sentence to create string with literal TAB character
# as opposed to other test, which use raw string for easier reading
assert 'some text with \t tab' == decode_c_quoted_str(r'"some text with \t tab"'), \
'c-quoted tab character'
assert r'zażółć' == decode_c_quoted_str(r'"za\305\274\303\263\305\202\304\207"'), \
'c-quoted utf8'
with pytest.raises(ValueError):
decode_c_quoted_str(r'"unknown escape \x"')
with pytest.raises(ValueError):
decode_c_quoted_str(r'"interrupted octal escape \30z"')
with pytest.raises(ValueError):
decode_c_quoted_str(r'"unfinished escape \"')
with pytest.raises(ValueError):
decode_c_quoted_str(r'"unfinished octal escape \30"')
with pytest.raises(ValueError):
decode_c_quoted_str(r'"\305\477"')
def test_list_files(example_repo: GitRepo):
"""Test that GitRepo.list_files() returns correct list of files"""
expected = [
'example_file',
'subdir/subfile'
]
actual = example_repo.list_files('v1')
assert sorted(expected) == sorted(actual), "list of files in v1"
expected = [
'renamed_file',
'subdir/subfile',
'new_file'
]
actual = example_repo.list_files()
assert sorted(expected) == sorted(actual), "list of files in HEAD"
def test_list_changed_files(example_repo: GitRepo):
"""Test that GitRepo.list_changed_files returns correct list of files"""
expected = [
'new_file',
'subdir/subfile',
'renamed_file',
]
actual = example_repo.list_changed_files('v2')
assert sorted(expected) == sorted(actual), "list of changed files in v2 (post)"
expected = [
# no 'new_file'
'subdir/subfile',
'example_file', # before rename
]
actual = example_repo.list_changed_files('v2', side=DiffSide.PRE)
assert sorted(expected) == sorted(actual), "list of changed files in v2 (post)"
def test_diff_file_status(example_repo):
"""Test the result of GitRepo.diff_file_status"""
expected = {
(None, 'new_file'): 'A', # file added in v2
('example_file', 'renamed_file'): 'R', # file renamed in v2
('subdir/subfile',) * 2: 'M', # file modified in v2 without name change
}
actual = example_repo.diff_file_status('v2')
assert expected == actual, "status of changed files in v2"
def test_unidiff(example_repo):
"""Test extracting data from GitRepo.unidiff"""
patch = example_repo.unidiff()
files = [f.path for f in patch]
expected = [
'new_file', # file added in v2
'renamed_file', # file renamed in v2 from 'example_file'
'subdir/subfile', # file modified in v2 without name change
]
assert sorted(files) == sorted(expected), "extracted changed files match"
diffstat = {
f.path: (f.removed, f.added)
for f in patch
}
assert diffstat['new_file'][0] == 0, "new file has no deletions"
assert diffstat['renamed_file'] == (1, 1), "rename with changes"
# before: 'subfile', after: 'subfile\nsubfile\n'
assert diffstat['subdir/subfile'] == (0, 1), "changed file stats matches"
expected_src = {
# changed from 'subfile\n'
#1: 'subfile'
}
expected_dst = {
# changes to 'subfile\nsubfile\n'
#1: 'subfile',
2: 'subfile'
}
assert {
line.source_line_no: line.value.strip()
# there is only one hunk in changes in 'subdir/subfiles' file
for line in patch[-1][0] if line.is_removed
} == expected_src, "pre-image on last file matches"
assert {
line.target_line_no: line.value.strip()
# there is only one hunk in changes in 'subdir/subfiles' file
for line in patch[-1][0] if line.is_added
} == expected_dst, "post-image on last file matches"
def test_unidiff_wrap(example_repo):
"""Test handling of `wrap` parameter in GitRepo.unidiff"""
assert isinstance(example_repo.unidiff(), PatchSet), \
"return PatchSet by default"
assert isinstance(example_repo.unidiff(wrap=True), PatchSet), \
"with wrap=True return unidiff.PatchSet"
assert isinstance(example_repo.unidiff(wrap=True), ChangeSet), \
"with wrap=True return utils.git.ChangeSet"
assert isinstance(example_repo.unidiff(wrap=False), str), \
"with wrap=False return str"
def test_changed_lines_extents(example_repo):
# TODO?: use pytest-subtest plugin
# with self.subTest("for HEAD (last commit)"):
actual, _ = example_repo.changed_lines_extents()
expected = {
'new_file': [(1,10)], # whole file added in v2
'renamed_file': [(4,4)], # file renamed in v2 from 'example_file', changed line 4
'subdir/subfile': [(2,2)], # file modified in v2 without name change
}
assert expected == actual, "changed lines for post-image for changed files match (HEAD)"
# with self.subTest("for v1 (first commit, root)"):
actual, _ = example_repo.changed_lines_extents('v1')
expected = {
'example_file': [(1,5)], # whole file added in v1 with 5 lines
'subdir/subfile': [(1,1)], # whole file added in v2 with 1 incomplete line
}
assert expected == actual, "changed lines for post-image for changed files match (v1)"
def test_file_contents(example_repo):
"""Test that GitRepo.file_contents returns file contents as text"""
expected = 'example\n2\n3\n4\n5\n'
actual = example_repo.file_contents('v1', 'example_file')
assert expected == actual, "contents of 'example_file' at v1"
expected = 'example\n2\n3\n4b\n5\n'
actual = example_repo.file_contents('v2', 'renamed_file')
assert expected == actual, "contents of 'renamed_file' at v2"
def test_list_tags(example_repo):
"""Test that GitRepo.list_tags list all tags"""
expected = ['v1', 'v1.5', 'v2']
actual = example_repo.list_tags()
assert expected == actual, "list of tags matches"
def test_get_commit_metadata(example_repo):
commit_info = example_repo.get_commit_metadata('v2')
assert commit_info['tree'] == '417e98fd5c1f9ddfbdee64c98256998958d901ce', \
"'tree' field did not change"
assert commit_info['message'] == 'Change some files\n\n* one renamed file\n* one new file\n', \
"commit message matches"
assert commit_info['author'] == {
'author': 'Joe Random <[email protected]>',
'email': '[email protected]',
'name': 'Joe Random',
'timestamp': 1693605193,
'tz_info': '-0600'
}, "author info matches"
assert commit_info['committer']['committer'] == 'A U Thor <[email protected]>', \
"committer matches repository setup"
def test_is_valid_commit(example_repo):
"""Test that GitRepo.is_valid_commit returns correct answer
Tested only with references and <rev>^ notation, as the test repository
is not created in such way that SHA-1 identifiers are be stable; and
currently GitRepo class lack method that would turn <commit-ish> or
<object> into SHA-1 identifier.
"""
# all are valid references that resolve to commit
assert example_repo.is_valid_commit("HEAD"), "HEAD is valid"
assert example_repo.is_valid_commit("v1"), "tag v1 is valid"
assert example_repo.is_valid_commit("v2"), "tag v2 is valid"
# all are not existing references
assert not example_repo.is_valid_commit("non_existent"), "no 'non_existent' reference"
# <rev>^ notation within existing commit history
assert example_repo.is_valid_commit("HEAD^"), "HEAD^ is valid"
# <rev>^ notation leading outside existing commit history
assert not example_repo.is_valid_commit("HEAD^3"), "HEAD^3 is invalid"
assert not example_repo.is_valid_commit("HEAD~20"), "HEAD~20 is invalid"
def test_get_current_branch(example_repo):
"""Basic test of GitRepo.get_current_branch"""
assert example_repo.get_current_branch() == default_branch, \
f"current branch is default branch: '{default_branch}'"
def test_resolve_symbolic_ref(example_repo):
"""Test that GitRepo.resolve_symbolic_ref works correctly"""
assert \
example_repo.resolve_symbolic_ref("HEAD") == \
f'refs/heads/{default_branch}', \
f"'HEAD' resolves to 'refs/heads/{default_branch}'"
assert example_repo.resolve_symbolic_ref("v2") is None, \
"'v2' is not a symbolic ref"
def test_check_merged_into(example_repo):
"""Test GitRepo.check_merged_into for various combinations of commit and into"""
actual = example_repo.check_merged_into('v1')
assert len(actual) > 0, "'v1' is merged [into HEAD]"
actual = example_repo.check_merged_into('v1', ['refs/heads/', 'refs/tags/'])
expected = [
f'refs/heads/{default_branch}',
'refs/tags/v1',
'refs/tags/v1.5',
'refs/tags/v2',
]
assert sorted(expected) == sorted(actual), "'v1' is merged into HEAD, v1, v1.5, v2"
actual = example_repo.check_merged_into('v2', 'refs/tags/v1')
assert not actual, "'v2' is not merged into v1"
def test_count_commits(example_repo):
"""Basic tests for GitRepo.count_commits() method"""
expected = 3 # v1, v1.5, v2
# with self.subTest("default value of start_from"):
actual = example_repo.count_commits()
assert expected == actual, "number of commits in repository matches (default param)"
# with self.subTest("for start_from='HEAD'"):
actual = example_repo.count_commits('HEAD')
assert expected == actual, "number of commits in repository matches (start_from='HEAD')"
def test_list_authors(example_repo):
"""Test GitRepo.list_authors_shortlog() and related methods"""
expected = [
'2\tA U Thor', # author of v1, v1.5
'1\tJoe Random', # author of v2
]
authors_shortlog = example_repo.list_authors_shortlog()
actual_simplified = [
info.strip()
for info in authors_shortlog
]
assert sorted(actual_simplified) == sorted(expected), "list of authors matches"
expected = [
AuthorStat(author='A U Thor', count=2),
AuthorStat(author='Joe Random', count=1)
]
actual = parse_shortlog_count(authors_shortlog)
assert sorted(expected) == sorted(actual), "parsed authors counts matches"
def test_find_roots(example_repo):
"""Test GitRepo.find_roots() method"""
roots_list = example_repo.find_roots()
assert len(roots_list) == 1, "has a single root commit"
v1_oid = example_repo.to_oid("v1")
assert roots_list[0] == v1_oid, "root commit is v1"
def test_get_config(example_repo):
"""Test GitRepo.get_config() method"""
expected = 'A U Thor' # set up in setUpClass() class method
actual = example_repo.get_config('user.name')
assert expected == actual, "got expected value for 'user.name'"
actual = example_repo.get_config('not-exists')
assert actual is None, "returns `None` for invalid variable name"
def test_metadata_extraction_in_ChangeSet(example_repo):
"""Test that ChangeSet constructor can extract commit metadata"""
revision = "v2"
revision_id = example_repo.to_oid(revision)
patch_bare = example_repo.unidiff(revision)
assert patch_bare.prev == f"{revision}^", \
".unidiff() sets .prev field to expected value"
assert patch_bare.commit_metadata is None, \
".unidiff() does not provide commit info to extract metadata"
# single commit changeset, i.e. the first element from a single element generator
patch_log = next(example_repo.log_p(revision_range=('-1', revision), wrap=True))
revision_metadata = example_repo.get_commit_metadata(revision)
assert patch_log.prev is None, \
".log_p() does not set .prev field"
assert patch_log.commit_id == revision_id, \
".log_p() returns expected commit, and sets .commit_id to its oid"
assert patch_log.commit_metadata is not None, \
"extracted commit metadata from .log_p() result"
assert patch_log.commit_metadata == revision_metadata, \
"correctly extracted expected metadata from .log_p() result"
def test_ChangeSet_from_filename():
commit_id = 'c0dcf39b046d1b4ff6de14ac99ad9a1b10487512'
filename_diff_only = f'tests/test_dataset/tqdm-1/{commit_id}.diff'
changeset_diff_only = ChangeSet.from_filename(filename_diff_only)
assert isinstance(changeset_diff_only, ChangeSet), \
"ChangeSet.from_filename returned ChangeSet or derived class"
assert isinstance(changeset_diff_only, PatchSet), \
"ChangeSet.from_filename returned PatchSet or derived class"
assert changeset_diff_only.commit_id == commit_id, \
"Extracted commit_id from file name"
assert changeset_diff_only.commit_metadata is None, \
"For file with diff only there is no way to get commit metadata from it"
filename_diff_full = f'tests/test_dataset/tqdm-1/{commit_id}.diff_with_raw'
changeset_diff_full = ChangeSet.from_filename(filename_diff_full)
assert changeset_diff_full.commit_id == commit_id, \
"Extracted commit_id from metadata matches with from file name"
assert changeset_diff_full.commit_metadata is not None, \
"Successful extraction of commit metadata from raw with patch format"
assert changeset_diff_full.commit_metadata['id'] == commit_id, \
"Commit id from metadata matches expectations"
# NOTE: this depends on the test file used!
assert changeset_diff_full.commit_metadata['message'].count('\n') == 1, \
"The commit message has exactly one line, ending in '\\n'"
def test_ChangeSet_from_patch_file_with_cr():
diff_filename = 'tests/test_dataset/qtile/4424a39ba5d6374cc18b98297f6de8a82c37ab6a.diff'
ChangeSet.from_filename(diff_filename)
# there were no exceptions