-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspacy_ner.py
57 lines (37 loc) · 1.34 KB
/
spacy_ner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import argparse
import queue
from thread_worker import ThreadWorker
from pathlib import Path
from validate_directory import ValidateDirectory
thread_pool = []
work_queue = None
def main():
parser = _create_arg_parser()
args = parser.parse_args()
global work_queue
work_queue = _create_work_queue(args.source)
_initialize_threads(args)
for thread in thread_pool:
thread.join()
def _create_arg_parser():
parser = argparse.ArgumentParser()
parser.add_argument("--source", required=True, help="directory, which contains the files", action=ValidateDirectory)
parser.add_argument("--workers", required=False, help="specify the number of threads", default=8, type=int)
parser.add_argument("--target", required=True, help="directory, which will contain the output files",
action=ValidateDirectory)
return parser
def _initialize_threads(args):
for x in range(args.workers):
_thread = ThreadWorker(x, args.target, work_queue.get)
_thread.start()
thread_pool.append(_thread)
def _create_work_queue(source_dir):
_work_queue = queue.Queue()
source_dir = Path(source_dir)
for file in source_dir.iterdir():
if not file.is_file():
continue
_work_queue.put(file)
return _work_queue
if __name__ == "__main__":
main()