Fix all the files outside of html5lib to flake8 cleanly

gsnedders · gsnedders · commit 823864882ee9 · 2016-05-21T00:29:35.000+01:00
diff --git a/flake8-run.sh b/flake8-run.sh
@@ -5,5 +5,5 @@ if [[ ! -x $(which flake8) ]]; then
   exit 1
 fi
 
-flake8 html5lib
+flake8 `dirname $0`
 exit $?
diff --git a/parse.py b/parse.py
@@ -5,7 +5,6 @@
 """
 
 import sys
-import os
 import traceback
 from optparse import OptionParser
 
@@ -15,17 +14,21 @@
 from html5lib import constants
 from html5lib import utils
 
+
 def parse():
     optParser = getOptParser()
-    opts,args = optParser.parse_args()
+    opts, args = optParser.parse_args()
     encoding = "utf8"
 
     try:
         f = args[-1]
         # Try opening from the internet
         if f.startswith('http://'):
             try:
-                import urllib.request, urllib.parse, urllib.error, cgi
+                import urllib.request
+                import urllib.parse
+                import urllib.error
+                import cgi
                 f = urllib.request.urlopen(f)
                 contentType = f.headers.get('content-type')
                 if contentType:
@@ -41,7 +44,7 @@ def parse():
             try:
                 # Try opening from file system
                 f = open(f, "rb")
-            except IOError as e:                
+            except IOError as e:
                 sys.stderr.write("Unable to open file: %s\n" % e)
                 sys.exit(1)
     except IndexError:
@@ -82,14 +85,15 @@ def parse():
         if document:
             printOutput(p, document, opts)
             t2 = time.time()
-            sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)"%(t1-t0, t2-t1))
+            sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)" % (t1 - t0, t2 - t1))
         else:
-            sys.stderr.write("\n\nRun took: %fs"%(t1-t0))
+            sys.stderr.write("\n\nRun took: %fs" % (t1 - t0))
     else:
         document = run(parseMethod, f, encoding, opts.scripting)
         if document:
             printOutput(p, document, opts)
 
+
 def run(parseMethod, f, encoding, scripting):
     try:
         document = parseMethod(f, encoding=encoding, scripting=scripting)
@@ -98,6 +102,7 @@ def run(parseMethod, f, encoding, scripting):
         traceback.print_exc()
     return document
 
+
 def printOutput(parser, document, opts):
     if opts.encoding:
         print("Encoding:", parser.tokenizer.stream.charEncoding)
@@ -116,7 +121,7 @@ def printOutput(parser, document, opts):
             elif tb == "etree":
                 sys.stdout.write(utils.default_etree.tostring(document))
         elif opts.tree:
-            if not hasattr(document,'__getitem__'):
+            if not hasattr(document, '__getitem__'):
                 document = [document]
             for fragment in document:
                 print(parser.tree.testSerializer(fragment))
@@ -126,7 +131,7 @@ def printOutput(parser, document, opts):
             kwargs = {}
             for opt in serializer.HTMLSerializer.options:
                 try:
-                    kwargs[opt] = getattr(opts,opt)
+                    kwargs[opt] = getattr(opts, opt)
                 except:
                     pass
             if not kwargs['quote_char']:
@@ -142,12 +147,14 @@ def printOutput(parser, document, opts):
                 encoding = "utf-8"
             for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=encoding):
                 sys.stdout.write(text)
-            if not text.endswith('\n'): sys.stdout.write('\n')
+            if not text.endswith('\n'):
+                sys.stdout.write('\n')
     if opts.error:
-        errList=[]
+        errList = []
         for pos, errorcode, datavars in parser.errors:
-            errList.append("Line %i Col %i"%pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars)
-        sys.stdout.write("\nParse errors:\n" + "\n".join(errList)+"\n")
+            errList.append("Line %i Col %i" % pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars)
+        sys.stdout.write("\nParse errors:\n" + "\n".join(errList) + "\n")
+
 
 def getOptParser():
     parser = OptionParser(usage=__doc__)
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 from setuptools import setup
 
 
-classifiers=[
+classifiers = [
     'Development Status :: 5 - Production/Stable',
     'Intended Audience :: Developers',
     'License :: OSI Approved :: MIT License',
@@ -20,9 +20,9 @@
     'Programming Language :: Python :: 3.5',
     'Topic :: Software Development :: Libraries :: Python Modules',
     'Topic :: Text Processing :: Markup :: HTML'
-    ]
+]
 
-packages = ['html5lib'] + ['html5lib.'+name
+packages = ['html5lib'] + ['html5lib.' + name
                            for name in os.listdir(os.path.join('html5lib'))
                            if os.path.isdir(os.path.join('html5lib', name)) and
                            not name.startswith('.') and name != 'tests']
@@ -39,9 +39,9 @@
     assignments = filter(lambda x: isinstance(x, ast.Assign), t.body)
     for a in assignments:
         if (len(a.targets) == 1 and
-              isinstance(a.targets[0], ast.Name) and
-              a.targets[0].id == "__version__" and
-              isinstance(a.value, ast.Str)):
+                isinstance(a.targets[0], ast.Name) and
+                a.targets[0].id == "__version__" and
+                isinstance(a.value, ast.Str)):
             version = a.value.s
 
 setup(name='html5lib',
diff --git a/utils/entities.py b/utils/entities.py
@@ -2,57 +2,67 @@
 
 import html5lib
 
+
 def parse(path="html5ents.xml"):
     return html5lib.parse(open(path), treebuilder="lxml")
 
+
 def entity_table(tree):
     return dict((entity_name("".join(tr[0].xpath(".//text()"))),
                  entity_characters(tr[1].text))
                 for tr in tree.xpath("//h:tbody/h:tr",
-                                     namespaces={"h":"http://www.w3.org/1999/xhtml"}))
+                                     namespaces={"h": "http://www.w3.org/1999/xhtml"}))
+
 
 def entity_name(inp):
     return inp.strip()
 
+
 def entity_characters(inp):
     return "".join(codepoint_to_character(item)
-                    for item in inp.split()
-                    if item)
+                   for item in inp.split()
+                   if item)
+
 
 def codepoint_to_character(inp):
-    return ("\U000"+inp[2:]).decode("unicode-escape")
+    return ("\\U000" + inp[2:]).decode("unicode-escape")
+
 
 def make_tests_json(entities):
     test_list = make_test_list(entities)
     tests_json = {"tests":
-                      [make_test(*item) for item in test_list]
+                  [make_test(*item) for item in test_list]
                   }
     return tests_json
 
+
 def make_test(name, characters, good):
     return {
-        "description":test_description(name, good),
-        "input":"&%s"%name,
-        "output":test_expected(name, characters, good)
-        }
+        "description": test_description(name, good),
+        "input": "&%s" % name,
+        "output": test_expected(name, characters, good)
+    }
+
 
 def test_description(name, good):
     with_semicolon = name.endswith(";")
-    semicolon_text = {True:"with a semi-colon",
-                      False:"without a semi-colon"}[with_semicolon]
+    semicolon_text = {True: "with a semi-colon",
+                      False: "without a semi-colon"}[with_semicolon]
     if good:
-        text = "Named entity: %s %s"%(name, semicolon_text)
+        text = "Named entity: %s %s" % (name, semicolon_text)
     else:
-        text = "Bad named entity: %s %s"%(name, semicolon_text)
+        text = "Bad named entity: %s %s" % (name, semicolon_text)
     return text
 
+
 def test_expected(name, characters, good):
     rv = []
     if not good or not name.endswith(";"):
         rv.append("ParseError")
     rv.append(["Character", characters])
     return rv
 
+
 def make_test_list(entities):
     tests = []
     for entity_name, characters in entities.items():
@@ -61,20 +71,23 @@ def make_test_list(entities):
         tests.append((entity_name, characters, True))
     return sorted(tests)
 
+
 def subentity_exists(entity_name, entities):
     for i in range(1, len(entity_name)):
         if entity_name[:-i] in entities:
             return True
     return False
 
+
 def make_entities_code(entities):
-    entities_text = "\n".join("    \"%s\": u\"%s\","%(
-            name, entities[name].encode(
-                "unicode-escape").replace("\"", "\\\""))
-                              for name in sorted(entities.keys()))
+    entities_text = "\n".join("    \"%s\": u\"%s\"," % (
+        name, entities[name].encode(
+            "unicode-escape").replace("\"", "\\\""))
+        for name in sorted(entities.keys()))
     return """entities = {
 %s
-}"""%entities_text
+}""" % entities_text
+
 
 def main():
     entities = entity_table(parse())
@@ -85,4 +98,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-
diff --git a/utils/spider.py b/utils/spider.py
@@ -7,7 +7,9 @@
 s.spider("http://www.google.com", maxURLs=100)
 """
 
-import urllib.request, urllib.error, urllib.parse
+import urllib.request
+import urllib.error
+import urllib.parse
 import urllib.robotparser
 import md5
 
@@ -16,11 +18,13 @@
 import html5lib
 from html5lib.treebuilders import etree
 
+
 class Spider(object):
+
     def __init__(self):
         self.unvisitedURLs = set()
         self.visitedURLs = set()
-        self.buggyURLs=set()
+        self.buggyURLs = set()
         self.robotParser = urllib.robotparser.RobotFileParser()
         self.contentDigest = {}
         self.http = httplib2.Http(".cache")
@@ -70,18 +74,18 @@ def updateURLs(self, tree):
         update the list of visited and unvisited URLs according to whether we
         have seen them before or not"""
         urls = set()
-        #Remove all links we have already visited
+        # Remove all links we have already visited
         for link in tree.findall(".//a"):
-                try:
-                    url = urllib.parse.urldefrag(link.attrib['href'])[0]
-                    if (url and url not in self.unvisitedURLs and url
+            try:
+                url = urllib.parse.urldefrag(link.attrib['href'])[0]
+                if (url and url not in self.unvisitedURLs and url
                         not in self.visitedURLs):
-                        urls.add(url)
-                except KeyError:
-                    pass
+                    urls.add(url)
+            except KeyError:
+                pass
 
-        #Remove all non-http URLs and add a suitable base URL where that is
-        #missing
+        # Remove all non-http URLs and add a suitable base URL where that is
+        # missing
         newUrls = set()
         for url in urls:
             splitURL = list(urllib.parse.urlsplit(url))
@@ -93,23 +97,22 @@ def updateURLs(self, tree):
         urls = newUrls
 
         responseHeaders = {}
-        #Now we want to find the content types of the links we haven't visited
+        # Now we want to find the content types of the links we haven't visited
         for url in urls:
             try:
                 resp, content = self.http.request(url, "HEAD")
                 responseHeaders[url] = resp
-            except AttributeError as KeyError:
-                #Don't know why this happens
+            except AttributeError:
+                # Don't know why this happens
                 pass
 
-
-        #Remove links not of content-type html or pages not found
-        #XXX - need to deal with other status codes?
+        # Remove links not of content-type html or pages not found
+        # XXX - need to deal with other status codes?
         toVisit = set([url for url in urls if url in responseHeaders and
-                      "html" in responseHeaders[url]['content-type'] and
-                      responseHeaders[url]['status'] == "200"])
+                       "html" in responseHeaders[url]['content-type'] and
+                       responseHeaders[url]['status'] == "200"])
 
-        #Now check we are allowed to spider the page
+        # Now check we are allowed to spider the page
         for url in toVisit:
             robotURL = list(urllib.parse.urlsplit(url)[:2])
             robotURL.extend(["robots.txt", "", ""])