-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
annotate.py: Fix bug for 'Binary files ... differ' patches
When there is change to a binary file, Git will show this using the "Binary files a/foo and b/foo differ" message in place of diff; therefore for unidiff there are 0 hunks. Make the compute_sizes_and_spreads() method of AnnotatedPatchedFile class handle this case correctly. Additionally, you can now find number of changed "binary" files with 'n_binary_files' field in the compute_sizes_and_spreads() method output. . Witout this fix, you would get the following error (caught and printed): File "python-diff-annotator/src/diffannotator/annotate.py", line 786, in compute_sizes_and_spreads (self.patched_file[-1].source_start + self.patched_file[-1].source_length - 1 ~~~~~~~~~~~~~~~~~^^^^ IndexError: list index out of range Add test for this specific issue.
- Loading branch information
Showing
3 changed files
with
82 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
46 changes: 46 additions & 0 deletions
46
...s/test_dataset_structured/scrapy-11/patches/9de6f1ca757b7f200d15e94840c9d431cf202276.diff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
diff --git a/scrapy/utils/gz.py b/scrapy/utils/gz.py | ||
index afc7ed128..73c2eb73b 100644 | ||
--- a/scrapy/utils/gz.py | ||
+++ b/scrapy/utils/gz.py | ||
@@ -43,7 +43,7 @@ def gunzip(data): | ||
# contains the whole page content | ||
if output or getattr(f, 'extrabuf', None): | ||
try: | ||
- output += f.extrabuf | ||
+ output += f.extrabuf[-f.extrasize:] | ||
finally: | ||
break | ||
else: | ||
diff --git a/tests/sample_data/compressed/unexpected-eof-output.txt b/tests/sample_data/compressed/unexpected-eof-output.txt | ||
new file mode 100644 | ||
index 000000000..3b201255f | ||
Binary files /dev/null and b/tests/sample_data/compressed/unexpected-eof-output.txt differ | ||
diff --git a/tests/sample_data/compressed/unexpected-eof.gz b/tests/sample_data/compressed/unexpected-eof.gz | ||
new file mode 100644 | ||
index 000000000..96211e432 | ||
Binary files /dev/null and b/tests/sample_data/compressed/unexpected-eof.gz differ | ||
diff --git a/tests/test_utils_gz.py b/tests/test_utils_gz.py | ||
index 2b47bf8da..7148185f4 100644 | ||
--- a/tests/test_utils_gz.py | ||
+++ b/tests/test_utils_gz.py | ||
@@ -1,6 +1,8 @@ | ||
import unittest | ||
from os.path import join | ||
|
||
+from w3lib.encoding import html_to_unicode | ||
+ | ||
from scrapy.utils.gz import gunzip, is_gzipped | ||
from scrapy.http import Response, Headers | ||
from tests import tests_datadir | ||
@@ -66,3 +68,11 @@ class GunzipTest(unittest.TestCase): | ||
hdrs = Headers({"Content-Type": "application/x-gzip;charset=utf-8"}) | ||
r1 = Response("http://www.example.com", headers=hdrs) | ||
self.assertTrue(is_gzipped(r1)) | ||
+ | ||
+ def test_gunzip_illegal_eof(self): | ||
+ with open(join(SAMPLEDIR, 'unexpected-eof.gz'), 'rb') as f: | ||
+ text = html_to_unicode('charset=cp1252', gunzip(f.read()))[1] | ||
+ with open(join(SAMPLEDIR, 'unexpected-eof-output.txt'), 'rb') as o: | ||
+ expected_text = o.read().decode("utf-8") | ||
+ self.assertEqual(len(text), len(expected_text)) | ||
+ self.assertEqual(text, expected_text) |