-
Notifications
You must be signed in to change notification settings - Fork 35
/
Copy pathprocess-one.sh
executable file
·93 lines (81 loc) · 2.57 KB
/
process-one.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/bin/bash
#
# process-one.sh <mode> <lang> <filename> <cogserver-host> <cogserver-port>
#
# Support script for batch parsing of plain-text files.
# Sentence-split one file, submit it, via perl script, to the parser.
# When done, move the file over to a 'finished' directory.
#
# Example usage:
# ./process-one.sh mst en Barbara localhost 17001
#
# Set up assorted constants needed to run.
lang=$2
filename="$3"
coghost="$4"
cogport=$5
splitter=./split-sentences.pl
splitdir=split-articles
parsesdir=mst-parses
# Default parameter values
cnt_mode="clique-dist"
cnt_reach=6
mst_dist=(1)
exp_parses="EXPORT"
split_sents="#t"
source ./config/params.txt # overrides default values, if present
# Split the filename into two parts
base=`echo $filename | cut -d \/ -f 1`
rest=`echo $filename | cut -d \/ -f 2-6`
# Gets processing mode for the cogserver
case $1 in
pairs)
subdir=submitted-articles
observe="observe-text-mode"
params="$cnt_mode $cnt_reach"
;;
mst)
subdir=mst-articles
observe="observe-mst-mode"
if [[ "$exp_parses" != "NONE" ]]; then
# create parses directory if missing
mkdir -p $(dirname "$parsesdir/$rest");
params="$cnt_mode $mst_dist ${rest}.ull"; # pass parses filename
else
params="$cnt_mode $mst_dist $exp_parses"; # don't print parses
fi
;;
esac
# Punt if the cogserver has crashed: use netcat to ping it.
haveping=`echo foo | nc -N $coghost $cogport`
if [[ $? -ne 0 ]] ; then
exit 1
fi
echo "Processing file >>>$rest<<<"
# Create directories if missing
mkdir -p $(dirname "$splitdir/$rest")
mkdir -p $(dirname "$subdir/$rest")
# Sentence split the article itself if requested
if [[ "$split_sents" == "#t" ]]; then
cat "$filename" | $splitter -l $lang > "$splitdir/$rest"
else # escape double quotes and backslashes if not split-sentence
cat "$filename" | sed -e 's/\\/\\\\/g' -e 's/\"/\\\"/g' > "$splitdir/$rest"
fi
# Submit the split article
cat "$splitdir/$rest" | ./submit-one.pl $coghost $cogport $observe $params
# Punt if the cogserver has crashed (second test, before doing the mv and rm below)
haveping=`echo foo | nc -N $coghost $cogport`
if [[ $? -ne 0 ]] ; then
exit 1
fi
if [ -f "${rest}.ull" ]; then
# Sort parses and remove index, convert to ull format
# sort -g "${rest}.ull" | cut -f2- | tr '\t' '\n' > "${rest}.ull_ordered";
# Organize parse file
# mv "${rest}.ull_ordered" "$parsesdir/${rest}.ull";
# rm "${rest}.ull"
mv ${rest}.ull "$parsesdir/${rest}.ull"
fi
# Move article to the done-queue
mv "$splitdir/$rest" "$subdir/$rest"
rm "$base/$rest"