lists, headings, todos + tests working

numeralz · numeralz · commit 938d81b06419 · 2021-05-07T10:51:48.000-04:00
diff --git a/.eggs/README.txt b/.eggs/README.txt
@@ -0,0 +1,6 @@
+This directory contains eggs that were downloaded by setuptools to build, test, and run plug-ins.
+
+This directory caches those eggs to prevent repeated downloads.
+
+However, it is safe to delete this directory.
+
diff --git a/htmlslacker/htmlslacker.py b/htmlslacker/htmlslacker.py
@@ -4,6 +4,7 @@
 except ImportError:
     from HTMLParser import HTMLParser
     from htmlentitydefs import name2codepoint
+import re
 
 LINEBR = "::LINEBR::"
 
@@ -23,6 +24,9 @@ def __init__(self, html, *args, **kwargs):
         except TypeError:
             HTMLParser.__init__(self, *args, **kwargs)
         self.skip = False
+        self.isProcessingList = False
+        self.isProcessingOrderedList = False
+        self.orderedNumber = 0
 
         # slackified string
         self.output = ''
@@ -43,9 +47,11 @@ def handle_starttag(self, tag, attrs):
         if tag == 'br' or tag == 'p':
             self.output += LINEBR
         if tag == 'b' or tag == 'strong':
-            self.output += '*'
+            self.output += ' *'
+        if re.match("h[1-6]{1}", tag):
+            self.output += ' *'
         if tag == 'i' or tag == 'em':
-            self.output += '_'
+            self.output += ' _'
         if tag == 'code':
             self.output += '`'
         if tag == 'a':
@@ -55,6 +61,16 @@ def handle_starttag(self, tag, attrs):
                     self.output += attr[1] + '|'
         if tag == 'style' or tag == 'script':
             self.skip = True
+        if tag == 'ul':
+            self.isProcessingList = True
+        if tag == 'li' and self.isProcessingList:
+            self.output += '• '
+        if tag == 'ol':
+            self.orderedNumber = 1
+            self.isProcessingOrderedList = True
+        if tag == 'li' and self.isProcessingOrderedList:
+            self.output += '{}. '.format(self.orderedNumber)
+            self.orderedNumber = self.orderedNumber + 1
 
     def handle_endtag(self, tag):
         """
@@ -63,15 +79,25 @@ def handle_endtag(self, tag):
         :return:
         """
         if tag == 'b' or tag == 'strong':
-            self.output += '*'
+            self.output += '* '
+        if re.match("h[1-6]{1}", tag):
+            self.output += '* '+LINEBR
         if tag == 'i' or tag == 'em':
-            self.output += '_'
+            self.output += '_ '
         if tag == 'a':
             self.output += '>'
         if tag == 'code':
             self.output += '`'
         if tag == 'style' or tag == 'script':
             self.skip = False
+        if tag == 'ul':
+            self.isProcessingList = False
+        if tag == 'li' and self.isProcessingList:
+            self.output += LINEBR
+        if tag == 'ol':
+            self.isProcessingOrderedList = False
+        if tag == 'li' and self.isProcessingOrderedList:
+            self.output += LINEBR
 
     def handle_data(self, data):
         """
@@ -105,4 +131,12 @@ def get_output(self):
         link: https://stackoverflow.com/questions/2077897/substitute-multiple-whitespace-with-single-whitespace-in-python
         :return:
         """
-        return ' '.join(self.output.split()).replace(LINEBR, "\n")
+        output = self.output
+        output = re.sub(r'\*(\s\*)+', '*', output)
+        output = re.sub(r'_( _)+', '_', output)
+        output = output.replace('[] ', '☐ ').replace('[x] ', '☑︎ ')
+        output = ' '.join(output.split())
+        output = output.replace(LINEBR, "\n")
+        output = re.sub(r' *\n *', '\n', output)
+        output = output.strip()
+        return output
diff --git a/test_general.py b/test_general.py
@@ -11,7 +11,7 @@ def test_example_1():
     link in a paragraph!</a>
     </p>
     """
-    expected = "*Hello*\n There is _something_ interesting about `this doc` \n And <http://example.com/|here is a link in a paragraph!>"
+    expected = "*Hello*\nThere is _something_ interesting about `this doc`\nAnd <http://example.com/|here is a link in a paragraph!>"
     output = HTMLSlacker(html).get_output()
     assert(output == expected)
 
@@ -35,3 +35,37 @@ def test_link_with_target():
     expected = "Please click <http://xxx.com/t.html|here>"
     output = HTMLSlacker(html).get_output()
     assert(output == expected)
+
+def test_unordered_list():
+    html = 'Here is my cool list <ul><li>The Shining</li><li>Memento</li><li>Blade Runner</li></ul>'
+    expected = 'Here is my cool list • The Shining\n• Memento\n• Blade Runner'
+    output = HTMLSlacker(html).get_output()
+    assert(output == expected)
+
+def test_ordered_list():
+    html = 'Here is my cool list <ol><li>The Shining</li><li>Memento</li><li>Blade Runner</li></ol>'
+    expected = 'Here is my cool list 1. The Shining\n2. Memento\n3. Blade Runner'
+    output = HTMLSlacker(html).get_output()
+    assert(output == expected)
+
+def test_unordered_list_with_text_modifications():
+    html = 'Here is my cool list <ul><li>The Shining</li><li>Memento</li><li>Blade <b>Runner</b></li></ul>'
+    expected = 'Here is my cool list • The Shining\n• Memento\n• Blade *Runner*'
+
+def test_headers_rendered():
+    html = '''<h2>Hello</h2> <h7>new</h7> <h2><b>world</b></h2>'''
+    expected = "*Hello*\nnew *world*"
+    output = HTMLSlacker(html).get_output()
+    assert(output == expected)
+
+def test_headers_rendered_no_spaces():
+    html = '''<h2>Hello</h2><h7>new</h7><h2><b>world</b></h2>'''
+    expected = "*Hello*\nnew *world*"
+    output = HTMLSlacker(html).get_output()
+    assert(output == expected)
+
+def test_task_list_rendered():
+    html = '''[] Grocery<br>[x] Laundary'''
+    expected = "☐ Grocery\n☑︎ Laundary"
+    output = HTMLSlacker(html).get_output()
+    assert(output == expected)