|
1 | 1 | import argparse
|
2 | 2 | import re
|
3 | 3 | import json
|
| 4 | +from sys import stdout, stderr |
4 | 5 | from pathlib import Path
|
5 |
| -from collections import defaultdict, OrderedDict |
| 6 | +from collections import defaultdict |
6 | 7 | from concurrent.futures import ProcessPoolExecutor
|
7 |
| -from sys import stdout, stderr |
| 8 | + |
8 | 9 | from textwrap import TextWrapper
|
9 | 10 | from PyPDF2 import PdfReader
|
10 | 11 |
|
@@ -83,7 +84,7 @@ def make_index(file_pages, keep_roadmap=False, keep_toc=False, keep_continuation
|
83 | 84 | index = defaultdict(dict)
|
84 | 85 | for filename, pages in file_pages.items():
|
85 | 86 | for page_num, (header, text, references) in pages.items():
|
86 |
| - if not keep_roadmap and header in ["Course Roadmap", "Course Outline"]: |
| 87 | + if not keep_roadmap and header.startswith(("Course Roadmap", "Course Outline")): |
87 | 88 | continue
|
88 | 89 | if not keep_toc and header == "TABLE OF CONTENTS":
|
89 | 90 | continue
|
@@ -122,7 +123,7 @@ def print_index_by_alpha_order(index, stream=None, maxwidth=80):
|
122 | 123 |
|
123 | 124 | def sort_fn(x): return x[0].replace(
|
124 | 125 | 'The ', '', 1).replace('A ', '', 1).lower()
|
125 |
| - alpha_index = OrderedDict(sorted(alpha_index.items(), key=sort_fn)) |
| 126 | + alpha_index = dict(sorted(alpha_index.items(), key=sort_fn)) |
126 | 127 | max_pagestr_len = max(len(": " + ','.join(page_nums))
|
127 | 128 | for page_nums in alpha_index.values())
|
128 | 129 |
|
|
0 commit comments