Skip to content

Commit 938d81b

Browse files
committed
lists, headings, todos + tests working
1 parent 17a5edc commit 938d81b

File tree

3 files changed

+80
-6
lines changed

3 files changed

+80
-6
lines changed

.eggs/README.txt

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
This directory contains eggs that were downloaded by setuptools to build, test, and run plug-ins.
2+
3+
This directory caches those eggs to prevent repeated downloads.
4+
5+
However, it is safe to delete this directory.
6+

htmlslacker/htmlslacker.py

+39-5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
except ImportError:
55
from HTMLParser import HTMLParser
66
from htmlentitydefs import name2codepoint
7+
import re
78

89
LINEBR = "::LINEBR::"
910

@@ -23,6 +24,9 @@ def __init__(self, html, *args, **kwargs):
2324
except TypeError:
2425
HTMLParser.__init__(self, *args, **kwargs)
2526
self.skip = False
27+
self.isProcessingList = False
28+
self.isProcessingOrderedList = False
29+
self.orderedNumber = 0
2630

2731
# slackified string
2832
self.output = ''
@@ -43,9 +47,11 @@ def handle_starttag(self, tag, attrs):
4347
if tag == 'br' or tag == 'p':
4448
self.output += LINEBR
4549
if tag == 'b' or tag == 'strong':
46-
self.output += '*'
50+
self.output += ' *'
51+
if re.match("h[1-6]{1}", tag):
52+
self.output += ' *'
4753
if tag == 'i' or tag == 'em':
48-
self.output += '_'
54+
self.output += ' _'
4955
if tag == 'code':
5056
self.output += '`'
5157
if tag == 'a':
@@ -55,6 +61,16 @@ def handle_starttag(self, tag, attrs):
5561
self.output += attr[1] + '|'
5662
if tag == 'style' or tag == 'script':
5763
self.skip = True
64+
if tag == 'ul':
65+
self.isProcessingList = True
66+
if tag == 'li' and self.isProcessingList:
67+
self.output += '• '
68+
if tag == 'ol':
69+
self.orderedNumber = 1
70+
self.isProcessingOrderedList = True
71+
if tag == 'li' and self.isProcessingOrderedList:
72+
self.output += '{}. '.format(self.orderedNumber)
73+
self.orderedNumber = self.orderedNumber + 1
5874

5975
def handle_endtag(self, tag):
6076
"""
@@ -63,15 +79,25 @@ def handle_endtag(self, tag):
6379
:return:
6480
"""
6581
if tag == 'b' or tag == 'strong':
66-
self.output += '*'
82+
self.output += '* '
83+
if re.match("h[1-6]{1}", tag):
84+
self.output += '* '+LINEBR
6785
if tag == 'i' or tag == 'em':
68-
self.output += '_'
86+
self.output += '_ '
6987
if tag == 'a':
7088
self.output += '>'
7189
if tag == 'code':
7290
self.output += '`'
7391
if tag == 'style' or tag == 'script':
7492
self.skip = False
93+
if tag == 'ul':
94+
self.isProcessingList = False
95+
if tag == 'li' and self.isProcessingList:
96+
self.output += LINEBR
97+
if tag == 'ol':
98+
self.isProcessingOrderedList = False
99+
if tag == 'li' and self.isProcessingOrderedList:
100+
self.output += LINEBR
75101

76102
def handle_data(self, data):
77103
"""
@@ -105,4 +131,12 @@ def get_output(self):
105131
link: https://stackoverflow.com/questions/2077897/substitute-multiple-whitespace-with-single-whitespace-in-python
106132
:return:
107133
"""
108-
return ' '.join(self.output.split()).replace(LINEBR, "\n")
134+
output = self.output
135+
output = re.sub(r'\*(\s\*)+', '*', output)
136+
output = re.sub(r'_( _)+', '_', output)
137+
output = output.replace('[] ', '☐ ').replace('[x] ', '☑︎ ')
138+
output = ' '.join(output.split())
139+
output = output.replace(LINEBR, "\n")
140+
output = re.sub(r' *\n *', '\n', output)
141+
output = output.strip()
142+
return output

test_general.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def test_example_1():
1111
link in a paragraph!</a>
1212
</p>
1313
"""
14-
expected = "*Hello*\n There is _something_ interesting about `this doc` \n And <http://example.com/|here is a link in a paragraph!>"
14+
expected = "*Hello*\nThere is _something_ interesting about `this doc`\nAnd <http://example.com/|here is a link in a paragraph!>"
1515
output = HTMLSlacker(html).get_output()
1616
assert(output == expected)
1717

@@ -35,3 +35,37 @@ def test_link_with_target():
3535
expected = "Please click <http://xxx.com/t.html|here>"
3636
output = HTMLSlacker(html).get_output()
3737
assert(output == expected)
38+
39+
def test_unordered_list():
40+
html = 'Here is my cool list <ul><li>The Shining</li><li>Memento</li><li>Blade Runner</li></ul>'
41+
expected = 'Here is my cool list • The Shining\n• Memento\n• Blade Runner'
42+
output = HTMLSlacker(html).get_output()
43+
assert(output == expected)
44+
45+
def test_ordered_list():
46+
html = 'Here is my cool list <ol><li>The Shining</li><li>Memento</li><li>Blade Runner</li></ol>'
47+
expected = 'Here is my cool list 1. The Shining\n2. Memento\n3. Blade Runner'
48+
output = HTMLSlacker(html).get_output()
49+
assert(output == expected)
50+
51+
def test_unordered_list_with_text_modifications():
52+
html = 'Here is my cool list <ul><li>The Shining</li><li>Memento</li><li>Blade <b>Runner</b></li></ul>'
53+
expected = 'Here is my cool list • The Shining\n• Memento\n• Blade *Runner*'
54+
55+
def test_headers_rendered():
56+
html = '''<h2>Hello</h2> <h7>new</h7> <h2><b>world</b></h2>'''
57+
expected = "*Hello*\nnew *world*"
58+
output = HTMLSlacker(html).get_output()
59+
assert(output == expected)
60+
61+
def test_headers_rendered_no_spaces():
62+
html = '''<h2>Hello</h2><h7>new</h7><h2><b>world</b></h2>'''
63+
expected = "*Hello*\nnew *world*"
64+
output = HTMLSlacker(html).get_output()
65+
assert(output == expected)
66+
67+
def test_task_list_rendered():
68+
html = '''[] Grocery<br>[x] Laundary'''
69+
expected = "☐ Grocery\n☑︎ Laundary"
70+
output = HTMLSlacker(html).get_output()
71+
assert(output == expected)

0 commit comments

Comments
 (0)