-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathanalyze_groundtruth
executable file
·141 lines (120 loc) · 3.84 KB
/
analyze_groundtruth
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python2.7
import argparse
import glob
import json
import csv
import sys
import os
EXTRA_PATH = "/data/bff-results/bff/results/crashers/%s/%s"
"""
Something this code needs to do:
The ground truth data will include potentially multiple seps per bug. What
we should do is keep a map from the sets of fixes to (potentailly) fresh
integers, and "flatten" the data out into a first order map from one crasher
to one identifier. Then our comparison algorithms will work normally.
We might have some different knobs to turn to make those flattening decisions
differently in the future.
"""
def ed_data(args):
truths = {}
truths["name"] = args.name
truths["labels"] = []
labels = {}
fresh_label = 0
reader = csv.reader(open(args.groundtruth, 'r'))
for i in reader:
status = i[1]
path = i[3]
vfixes = i[8]
if status == "NORMAL":
fixes = [ i.split(":")[0] for i in vfixes.split(" ")]
fixes_s = frozenset(fixes)
fixes_label = None
if args.merge:
found = False
for j in labels:
pass
if found == False:
labels[fixes_s] = fresh_label
fresh_label = fresh_label + 1
for k in labels:
pass
fixes_label = 0
else:
if labels.has_key(fixes_s):
fixes_label = labels[fixes_s]
else:
labels[fixes_s] = fresh_label
fixes_label = fresh_label
fresh_label = fresh_label + 1
realpath = args.prefixdir + path.replace(EXTRA_PATH % (args.name, args.name), "")
truths["labels"].append((realpath, fixes_label))
output = open(args.outputfile, 'w')
json.dump(truths, output)
output.close()
return 0
# In this case, the data formatting is a little different.
# We need to take a base dir for where the crashers are, and a
# base dir for where the ground truth data is.
def benji_data(args):
# Base dir where the ground truth .txt files are.
groundtruth_base_dir = args.groundtruth
# Base dir where all the crashing files are.
crashers_dir = args.prefixdir
truths = {}
truths["name"] = args.name
truths["labels"] = []
cluster_id = 0
for c in glob.glob(groundtruth_base_dir+"/*.txt"):
# Ignore the crash_list entry.
if os.path.basename(c) == "crash_list.txt":
continue
buf = open(c, 'r').read()
crashers = [ i for i in buf.split("\n") if len(i) > 0 ]
for f in crashers:
realpath = crashers_dir + f
truths["labels"].append((realpath, cluster_id))
cluster_id = cluster_id + 1
output = open(args.outputfile, 'w')
json.dump(truths, output)
output.close()
return 0
def andrew_data(args):
q = open(args.groundtruth, 'r')
qr = csv.reader(q)
qr.next()
truths = {}
truths["name"] = args.name
truths["labels"] = []
cluster_map = {}
fresh_cluster_id = 0
skip = set(["2015-11-17 06:37:14", "unfixed"])
for (inputfile,classname) in qr:
if classname in skip:
continue
if classname not in cluster_map.keys():
cluster_map[classname] = fresh_cluster_id
fresh_cluster_id = fresh_cluster_id + 1
truths["labels"].append((inputfile, cluster_map[classname]))
output = open(args.outputfile, 'w')
json.dump(truths, output)
output.close()
return 0
def main(args):
if args.data_source == 'ed':
return ed_data(args)
elif args.data_source == 'benji':
return benji_data(args)
elif args.data_source == 'andrew':
return andrew_data(args)
return 1
if __name__ == '__main__':
parser = argparse.ArgumentParser("csvtool")
parser.add_argument("name")
parser.add_argument("groundtruth")
parser.add_argument("outputfile")
parser.add_argument("prefixdir")
parser.add_argument("--merge", default=False, action="store_true")
parser.add_argument('--data-source', type=str, default='ed', choices=['ed', 'benji', 'andrew'])
args = parser.parse_args()
sys.exit(main(args))