-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtsv2amr.py
70 lines (49 loc) · 1.87 KB
/
tsv2amr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
"""
This script converts a .tsv file with following columns:
1) 'sent1'
2) 'sent2'
...
n) ...
to 2 AMR-files, one with all of the first sentences and one with all second sentences.
Usage example:
python3 tsv2amr.py -i data/SICK2014.tsv -o data/amr/SICK2014_corpus
"""
import amrlib
import argparse
from pathlib import Path
def save_amr(amrs, filepath):
"""
Save AMR to given path
Args:
amrs: AMR to be saved
filepath: path to save AMR to
"""
with open(filepath, 'w') as f:
for amr in amrs:
print(amr, file=f, end='\n\n')
def convert_corpus_to_amr(corpus_path, save_path_prefix='processed/corpus'):
"""
Converts given text corpus to AMR format
Args:
corpus_path: Path to text corpus to be converted. Each line in corpus contains one tab separated sentence pair
save_path_prefix: Path to save converted corpus to
"""
with open(corpus_path) as f:
data = {}
lines = f.readlines()
data['a'] = [line.split('\t')[0] for line in lines]
data['b'] = [line.strip().split('\t')[1] for line in lines]
stog = amrlib.load_stog_model()
print('Stog model loaded sucessfully!')
sents_a, sents_b = data['a'], data['b']
sents_a_amr = stog.parse_sents(sents_a, add_metadata=True)
sents_b_amr = stog.parse_sents(sents_b, add_metadata=True)
Path(save_path_prefix).parent.mkdir(parents=True, exist_ok=True)
save_amr(sents_a_amr, f'{save_path_prefix}_a.amr')
save_amr(sents_b_amr, f'{save_path_prefix}_b.amr')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', help='filename to be converted to AMR')
parser.add_argument('-o', '--output', help='output filename prefix')
args = parser.parse_args()
convert_corpus_to_amr(args.input, args.output)