-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathpdfdiff.py
93 lines (80 loc) · 2.37 KB
/
pdfdiff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# file: pdfdiff.py
# vim:fileencoding=utf-8:ft=python
#
# Copyright © 2019 R.F. Smith <[email protected]>
# SPDX-License-Identifier: MIT
# Created: 2019-07-11T00:22:30+0200
# Last modified: 2020-04-23T19:03:32+0200
"""
Script to try and show a diff between two PDF files.
Requires pdftotext from the poppler utilities,
and a diff that supports the -w option to ignore whitespace.
"""
from types import SimpleNamespace
import binascii
import os
import subprocess as sp
import sys
# Standard ANSI colors.
fgcolor = SimpleNamespace(
brightred="\033[31;1m",
brightgreen="\033[32;1m",
brightyellow="\033[33;1m",
brightmagenta="\033[35;1m",
reset="\033[0m",
)
def main(argv):
"""
Entry point for pdfdiff.py.
Arguments:
argv: command line arguments
"""
if len(argv) != 2:
print("Usage: pdfdiff.py first second")
sys.exit(1)
tmpnam = []
for j in range(2):
tmpnam.append(binascii.hexlify(os.urandom(10)).decode("ascii") + ".txt")
with open(tmpnam[j], "w") as f:
f.write(pdftotext(argv[j]))
diffargs = ["diff", "-d", "-u", "-w"] + tmpnam
result = sp.run(diffargs, stdout=sp.PIPE, stderr=sp.DEVNULL, check=True, text=True)
lines = result.stdout.splitlines()
os.remove(tmpnam[0])
os.remove(tmpnam[1])
lines[0] = lines[0].replace(tmpnam[0], argv[0])
lines[1] = lines[1].replace(tmpnam[1], argv[1])
try:
colordiff(lines)
except BrokenPipeError:
pass
def pdftotext(path):
"""
Generate a text rendering of a PDF file in the form of a list of lines.
"""
args = ["pdftotext", "-layout", path, "-"]
cp = sp.run(args, stdout=sp.PIPE, stderr=sp.DEVNULL, check=True, text=True)
return cp.stdout
def colordiff(txt):
"""
Print a colored diff.
Arguments:
txt: diff list or generator to print
"""
for line in txt:
line = line.rstrip()
if line.startswith(("+++ ", "--- ")):
print(fgcolor.brightyellow, line)
continue
if line.startswith("+"):
print(fgcolor.brightgreen, line)
continue
if line.startswith("-"):
print(fgcolor.brightred, line)
continue
if line.startswith("@@"):
print(fgcolor.brightmagenta, line)
continue
print(fgcolor.reset, line)
if __name__ == "__main__":
main(sys.argv[1:])