Skip to content

Commit d271051

Browse files
committed
Checking consolidated links
1 parent 5e89d32 commit d271051

File tree

2 files changed

+79
-11
lines changed

2 files changed

+79
-11
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ ${RMD_DST} : ${RMD_SRC}
8989

9090
## lesson-check : validate lesson Markdown.
9191
lesson-check :
92-
@bin/lesson_check.py -s . -p ${PARSER}
92+
@bin/lesson_check.py -s . -p ${PARSER} -r _includes/links.md
9393

9494
## lesson-check-all : validate lesson Markdown, checking line lengths and trailing whitespace.
9595
lesson-check-all :

bin/lesson_check.py

+78-10
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from util import Reporter, read_markdown, load_yaml, check_unwanted_files, require, IMAGE_FILE_SUFFIX
1616

17-
__version__ = '0.2'
17+
__version__ = '0.3'
1818

1919
# Where to look for source Markdown files.
2020
SOURCE_DIRS = ['', '_episodes', '_extras']
@@ -48,7 +48,10 @@
4848
P_FIGURE_REFS = re.compile(r'<img[^>]+src="([^"]+)"[^>]*>')
4949

5050
# Pattern to match internally-defined Markdown links.
51-
P_INTERNALLY_DEFINED_LINK = re.compile(r'\[[^\]]+\]\[[^\]]+\]')
51+
P_INTERNAL_LINK_REF = re.compile(r'\[([^\]]+)\]\[([^\]]+)\]')
52+
53+
# Pattern to match reference links (to resolve internally-defined references).
54+
P_INTERNAL_LINK_DEF = re.compile(r'^\[([^\]]+)\]:\s*(.+)')
5255

5356
# What kinds of blockquotes are allowed?
5457
KNOWN_BLOCKQUOTES = {
@@ -103,13 +106,16 @@ def main():
103106
args = parse_args()
104107
args.reporter = Reporter()
105108
check_config(args.reporter, args.source_dir)
109+
args.references = read_references(args.reporter, args.reference_path)
110+
106111
docs = read_all_markdown(args.source_dir, args.parser)
107112
check_fileset(args.source_dir, args.reporter, docs.keys())
108113
check_unwanted_files(args.source_dir, args.reporter)
109114
for filename in docs.keys():
110115
checker = create_checker(args, filename, docs[filename])
111116
checker.check()
112117
check_figures(args.source_dir, args.reporter)
118+
113119
args.reporter.report()
114120

115121

@@ -126,6 +132,10 @@ def parse_args():
126132
default=None,
127133
dest='parser',
128134
help='path to Markdown parser')
135+
parser.add_option('-r', '--references',
136+
default=None,
137+
dest='reference_path',
138+
help='path to Markdown file of external references')
129139
parser.add_option('-s', '--source',
130140
default=os.curdir,
131141
dest='source_dir',
@@ -160,6 +170,37 @@ def check_config(reporter, source_dir):
160170
'"root" not set to ".." in configuration')
161171

162172

173+
def read_references(reporter, ref_path):
174+
"""Read shared file of reference links, returning dictionary of valid references
175+
{symbolic_name : URL}
176+
"""
177+
178+
result = {}
179+
urls_seen = set()
180+
if ref_path:
181+
with open(ref_path, 'r') as reader:
182+
for (num, line) in enumerate(reader):
183+
line_num = num + 1
184+
m = P_INTERNAL_LINK_DEF.search(line)
185+
require(m,
186+
'{0}:{1} not valid reference:\n{2}'.format(ref_path, line_num, line.rstrip()))
187+
name = m.group(1)
188+
url = m.group(2)
189+
require(name,
190+
'Empty reference at {0}:{1}'.format(ref_path, line_num))
191+
reporter.check(name not in result,
192+
ref_path,
193+
'Duplicate reference {0} at line {1}',
194+
name, line_num)
195+
reporter.check(url not in urls_seen,
196+
ref_path,
197+
'Duplicate definition of URL {0} at line {1}',
198+
url, line_num)
199+
result[name] = url
200+
urls_seen.add(url)
201+
return result
202+
203+
163204
def read_all_markdown(source_dir, parser):
164205
"""Read source files, returning
165206
{path : {'metadata':yaml, 'metadata_len':N, 'text':text, 'lines':[(i, line, len)], 'doc':doc}}
@@ -274,7 +315,7 @@ def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):
274315

275316

276317
def check(self):
277-
"""Run tests on metadata."""
318+
"""Run tests."""
278319

279320
self.check_metadata()
280321
self.check_line_lengths()
@@ -342,17 +383,16 @@ def check_codeblock_classes(self):
342383
def check_defined_link_references(self):
343384
"""Check that defined links resolve in the file.
344385
345-
Internally-defined links match the pattern [text][label]. If
346-
the label contains '{{...}}', it is hopefully a references to
347-
a configuration value - we should check that, but don't right
348-
now.
386+
Internally-defined links match the pattern [text][label].
349387
"""
350388

351389
result = set()
352390
for node in self.find_all(self.doc, {'type' : 'text'}):
353-
for match in P_INTERNALLY_DEFINED_LINK.findall(node['value']):
354-
if '{{' not in match:
355-
result.add(match)
391+
for match in P_INTERNAL_LINK_REF.findall(node['value']):
392+
text = match[0]
393+
link = match[1]
394+
if link not in self.args.references:
395+
result.add('"{0}"=>"{1}"'.format(text, link))
356396
self.reporter.check(not result,
357397
self.filename,
358398
'Internally-defined links may be missing definitions: {0}',
@@ -441,6 +481,14 @@ class CheckEpisode(CheckBase):
441481
def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):
442482
super(CheckEpisode, self).__init__(args, filename, metadata, metadata_len, text, lines, doc)
443483

484+
485+
def check(self):
486+
"""Run extra tests."""
487+
488+
super(CheckEpisode, self).check()
489+
self.check_reference_inclusion()
490+
491+
444492
def check_metadata(self):
445493
super(CheckEpisode, self).check_metadata()
446494
if self.metadata:
@@ -467,6 +515,26 @@ def check_metadata_fields(self, expected):
467515
name, type(self.metadata[name]), type_)
468516

469517

518+
def check_reference_inclusion(self):
519+
"""Check that links file has been included."""
520+
521+
if not self.args.reference_path:
522+
return
523+
524+
for (i, last_line, line_len) in reversed(self.lines):
525+
if last_line:
526+
break
527+
528+
require(last_line,
529+
'No non-empty lines in {0}'.format(self.filename))
530+
531+
include_filename = os.path.split(self.args.reference_path)[-1]
532+
if include_filename not in last_line:
533+
self.reporter.add(self.filename,
534+
'episode does not include "{0}"',
535+
include_filename)
536+
537+
470538
class CheckReference(CheckBase):
471539
"""Check the reference page."""
472540

0 commit comments

Comments
 (0)