Skip to content

Commit 8238648

Browse files
committed
Fix all the files outside of html5lib to flake8 cleanly
1 parent 2c3b64b commit 8238648

File tree

5 files changed

+80
-58
lines changed

5 files changed

+80
-58
lines changed

Diff for: flake8-run.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ if [[ ! -x $(which flake8) ]]; then
55
exit 1
66
fi
77

8-
flake8 html5lib
8+
flake8 `dirname $0`
99
exit $?

Diff for: parse.py

+19-12
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
"""
66

77
import sys
8-
import os
98
import traceback
109
from optparse import OptionParser
1110

@@ -15,17 +14,21 @@
1514
from html5lib import constants
1615
from html5lib import utils
1716

17+
1818
def parse():
1919
optParser = getOptParser()
20-
opts,args = optParser.parse_args()
20+
opts, args = optParser.parse_args()
2121
encoding = "utf8"
2222

2323
try:
2424
f = args[-1]
2525
# Try opening from the internet
2626
if f.startswith('http://'):
2727
try:
28-
import urllib.request, urllib.parse, urllib.error, cgi
28+
import urllib.request
29+
import urllib.parse
30+
import urllib.error
31+
import cgi
2932
f = urllib.request.urlopen(f)
3033
contentType = f.headers.get('content-type')
3134
if contentType:
@@ -41,7 +44,7 @@ def parse():
4144
try:
4245
# Try opening from file system
4346
f = open(f, "rb")
44-
except IOError as e:
47+
except IOError as e:
4548
sys.stderr.write("Unable to open file: %s\n" % e)
4649
sys.exit(1)
4750
except IndexError:
@@ -82,14 +85,15 @@ def parse():
8285
if document:
8386
printOutput(p, document, opts)
8487
t2 = time.time()
85-
sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)"%(t1-t0, t2-t1))
88+
sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)" % (t1 - t0, t2 - t1))
8689
else:
87-
sys.stderr.write("\n\nRun took: %fs"%(t1-t0))
90+
sys.stderr.write("\n\nRun took: %fs" % (t1 - t0))
8891
else:
8992
document = run(parseMethod, f, encoding, opts.scripting)
9093
if document:
9194
printOutput(p, document, opts)
9295

96+
9397
def run(parseMethod, f, encoding, scripting):
9498
try:
9599
document = parseMethod(f, encoding=encoding, scripting=scripting)
@@ -98,6 +102,7 @@ def run(parseMethod, f, encoding, scripting):
98102
traceback.print_exc()
99103
return document
100104

105+
101106
def printOutput(parser, document, opts):
102107
if opts.encoding:
103108
print("Encoding:", parser.tokenizer.stream.charEncoding)
@@ -116,7 +121,7 @@ def printOutput(parser, document, opts):
116121
elif tb == "etree":
117122
sys.stdout.write(utils.default_etree.tostring(document))
118123
elif opts.tree:
119-
if not hasattr(document,'__getitem__'):
124+
if not hasattr(document, '__getitem__'):
120125
document = [document]
121126
for fragment in document:
122127
print(parser.tree.testSerializer(fragment))
@@ -126,7 +131,7 @@ def printOutput(parser, document, opts):
126131
kwargs = {}
127132
for opt in serializer.HTMLSerializer.options:
128133
try:
129-
kwargs[opt] = getattr(opts,opt)
134+
kwargs[opt] = getattr(opts, opt)
130135
except:
131136
pass
132137
if not kwargs['quote_char']:
@@ -142,12 +147,14 @@ def printOutput(parser, document, opts):
142147
encoding = "utf-8"
143148
for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=encoding):
144149
sys.stdout.write(text)
145-
if not text.endswith('\n'): sys.stdout.write('\n')
150+
if not text.endswith('\n'):
151+
sys.stdout.write('\n')
146152
if opts.error:
147-
errList=[]
153+
errList = []
148154
for pos, errorcode, datavars in parser.errors:
149-
errList.append("Line %i Col %i"%pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars)
150-
sys.stdout.write("\nParse errors:\n" + "\n".join(errList)+"\n")
155+
errList.append("Line %i Col %i" % pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars)
156+
sys.stdout.write("\nParse errors:\n" + "\n".join(errList) + "\n")
157+
151158

152159
def getOptParser():
153160
parser = OptionParser(usage=__doc__)

Diff for: setup.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from setuptools import setup
66

77

8-
classifiers=[
8+
classifiers = [
99
'Development Status :: 5 - Production/Stable',
1010
'Intended Audience :: Developers',
1111
'License :: OSI Approved :: MIT License',
@@ -20,9 +20,9 @@
2020
'Programming Language :: Python :: 3.5',
2121
'Topic :: Software Development :: Libraries :: Python Modules',
2222
'Topic :: Text Processing :: Markup :: HTML'
23-
]
23+
]
2424

25-
packages = ['html5lib'] + ['html5lib.'+name
25+
packages = ['html5lib'] + ['html5lib.' + name
2626
for name in os.listdir(os.path.join('html5lib'))
2727
if os.path.isdir(os.path.join('html5lib', name)) and
2828
not name.startswith('.') and name != 'tests']
@@ -39,9 +39,9 @@
3939
assignments = filter(lambda x: isinstance(x, ast.Assign), t.body)
4040
for a in assignments:
4141
if (len(a.targets) == 1 and
42-
isinstance(a.targets[0], ast.Name) and
43-
a.targets[0].id == "__version__" and
44-
isinstance(a.value, ast.Str)):
42+
isinstance(a.targets[0], ast.Name) and
43+
a.targets[0].id == "__version__" and
44+
isinstance(a.value, ast.Str)):
4545
version = a.value.s
4646

4747
setup(name='html5lib',

Diff for: utils/entities.py

+31-19
Original file line numberDiff line numberDiff line change
@@ -2,57 +2,67 @@
22

33
import html5lib
44

5+
56
def parse(path="html5ents.xml"):
67
return html5lib.parse(open(path), treebuilder="lxml")
78

9+
810
def entity_table(tree):
911
return dict((entity_name("".join(tr[0].xpath(".//text()"))),
1012
entity_characters(tr[1].text))
1113
for tr in tree.xpath("//h:tbody/h:tr",
12-
namespaces={"h":"http://www.w3.org/1999/xhtml"}))
14+
namespaces={"h": "http://www.w3.org/1999/xhtml"}))
15+
1316

1417
def entity_name(inp):
1518
return inp.strip()
1619

20+
1721
def entity_characters(inp):
1822
return "".join(codepoint_to_character(item)
19-
for item in inp.split()
20-
if item)
23+
for item in inp.split()
24+
if item)
25+
2126

2227
def codepoint_to_character(inp):
23-
return ("\U000"+inp[2:]).decode("unicode-escape")
28+
return ("\\U000" + inp[2:]).decode("unicode-escape")
29+
2430

2531
def make_tests_json(entities):
2632
test_list = make_test_list(entities)
2733
tests_json = {"tests":
28-
[make_test(*item) for item in test_list]
34+
[make_test(*item) for item in test_list]
2935
}
3036
return tests_json
3137

38+
3239
def make_test(name, characters, good):
3340
return {
34-
"description":test_description(name, good),
35-
"input":"&%s"%name,
36-
"output":test_expected(name, characters, good)
37-
}
41+
"description": test_description(name, good),
42+
"input": "&%s" % name,
43+
"output": test_expected(name, characters, good)
44+
}
45+
3846

3947
def test_description(name, good):
4048
with_semicolon = name.endswith(";")
41-
semicolon_text = {True:"with a semi-colon",
42-
False:"without a semi-colon"}[with_semicolon]
49+
semicolon_text = {True: "with a semi-colon",
50+
False: "without a semi-colon"}[with_semicolon]
4351
if good:
44-
text = "Named entity: %s %s"%(name, semicolon_text)
52+
text = "Named entity: %s %s" % (name, semicolon_text)
4553
else:
46-
text = "Bad named entity: %s %s"%(name, semicolon_text)
54+
text = "Bad named entity: %s %s" % (name, semicolon_text)
4755
return text
4856

57+
4958
def test_expected(name, characters, good):
5059
rv = []
5160
if not good or not name.endswith(";"):
5261
rv.append("ParseError")
5362
rv.append(["Character", characters])
5463
return rv
5564

65+
5666
def make_test_list(entities):
5767
tests = []
5868
for entity_name, characters in entities.items():
@@ -61,20 +71,23 @@ def make_test_list(entities):
6171
tests.append((entity_name, characters, True))
6272
return sorted(tests)
6373

74+
6475
def subentity_exists(entity_name, entities):
6576
for i in range(1, len(entity_name)):
6677
if entity_name[:-i] in entities:
6778
return True
6879
return False
6980

81+
7082
def make_entities_code(entities):
71-
entities_text = "\n".join(" \"%s\": u\"%s\","%(
72-
name, entities[name].encode(
73-
"unicode-escape").replace("\"", "\\\""))
74-
for name in sorted(entities.keys()))
83+
entities_text = "\n".join(" \"%s\": u\"%s\"," % (
84+
name, entities[name].encode(
85+
"unicode-escape").replace("\"", "\\\""))
86+
for name in sorted(entities.keys()))
7587
return """entities = {
7688
%s
77-
}"""%entities_text
89+
}""" % entities_text
90+
7891

7992
def main():
8093
entities = entity_table(parse())
@@ -85,4 +98,3 @@ def main():
8598

8699
if __name__ == "__main__":
87100
main()
88-

Diff for: utils/spider.py

+23-20
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
s.spider("http://www.google.com", maxURLs=100)
88
"""
99

10-
import urllib.request, urllib.error, urllib.parse
10+
import urllib.request
11+
import urllib.error
12+
import urllib.parse
1113
import urllib.robotparser
1214
import md5
1315

@@ -16,11 +18,13 @@
1618
import html5lib
1719
from html5lib.treebuilders import etree
1820

21+
1922
class Spider(object):
23+
2024
def __init__(self):
2125
self.unvisitedURLs = set()
2226
self.visitedURLs = set()
23-
self.buggyURLs=set()
27+
self.buggyURLs = set()
2428
self.robotParser = urllib.robotparser.RobotFileParser()
2529
self.contentDigest = {}
2630
self.http = httplib2.Http(".cache")
@@ -70,18 +74,18 @@ def updateURLs(self, tree):
7074
update the list of visited and unvisited URLs according to whether we
7175
have seen them before or not"""
7276
urls = set()
73-
#Remove all links we have already visited
77+
# Remove all links we have already visited
7478
for link in tree.findall(".//a"):
75-
try:
76-
url = urllib.parse.urldefrag(link.attrib['href'])[0]
77-
if (url and url not in self.unvisitedURLs and url
79+
try:
80+
url = urllib.parse.urldefrag(link.attrib['href'])[0]
81+
if (url and url not in self.unvisitedURLs and url
7882
not in self.visitedURLs):
79-
urls.add(url)
80-
except KeyError:
81-
pass
83+
urls.add(url)
84+
except KeyError:
85+
pass
8286

83-
#Remove all non-http URLs and add a suitable base URL where that is
84-
#missing
87+
# Remove all non-http URLs and add a suitable base URL where that is
88+
# missing
8589
newUrls = set()
8690
for url in urls:
8791
splitURL = list(urllib.parse.urlsplit(url))
@@ -93,23 +97,22 @@ def updateURLs(self, tree):
9397
urls = newUrls
9498

9599
responseHeaders = {}
96-
#Now we want to find the content types of the links we haven't visited
100+
# Now we want to find the content types of the links we haven't visited
97101
for url in urls:
98102
try:
99103
resp, content = self.http.request(url, "HEAD")
100104
responseHeaders[url] = resp
101-
except AttributeError as KeyError:
102-
#Don't know why this happens
105+
except AttributeError:
106+
# Don't know why this happens
103107
pass
104108

105-
106-
#Remove links not of content-type html or pages not found
107-
#XXX - need to deal with other status codes?
109+
# Remove links not of content-type html or pages not found
110+
# XXX - need to deal with other status codes?
108111
toVisit = set([url for url in urls if url in responseHeaders and
109-
"html" in responseHeaders[url]['content-type'] and
110-
responseHeaders[url]['status'] == "200"])
112+
"html" in responseHeaders[url]['content-type'] and
113+
responseHeaders[url]['status'] == "200"])
111114

112-
#Now check we are allowed to spider the page
115+
# Now check we are allowed to spider the page
113116
for url in toVisit:
114117
robotURL = list(urllib.parse.urlsplit(url)[:2])
115118
robotURL.extend(["robots.txt", "", ""])

0 commit comments

Comments
 (0)