Skip to content

Commit 5e848bc

Browse files
committed
add option to list out percentage
1 parent 7a761b4 commit 5e848bc

File tree

3 files changed

+27
-9
lines changed

3 files changed

+27
-9
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
build
1+
build
2+
dist

data_hacks/bar_chart.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,17 @@ def load_stream(input_stream):
3838
yield clean_line
3939

4040
def run(input_stream, options):
41-
data = defaultdict(lambda:0)
41+
data = defaultdict(int)
42+
total = 0
4243
for row in input_stream:
4344
if options.agg_values:
4445
kv = row.replace('\t', ' ').split(' ',2);
45-
data[kv[0]]+= int(kv[1])
46+
value = int(kv[1])
47+
data[kv[0]] += value
48+
total += value
4649
else:
47-
data[row]+=1
50+
data[row] += 1
51+
total += 1
4852

4953
if not data:
5054
print "Error: no data"
@@ -57,7 +61,7 @@ def run(input_stream, options):
5761
scale = int(math.ceil(float(max_value) / value_characters))
5862
scale = max(1, scale)
5963

60-
print "# each ∎ represents a count of %d" % scale
64+
print "# each ∎ represents a count of %d. total %d" % (scale, total)
6165

6266
if options.sort_values:
6367
data = [[value, key] for key, value in data.items()]
@@ -71,9 +75,12 @@ def run(input_stream, options):
7175
else:
7276
data.sort(key=lambda x: x[1], reverse=options.reverse_sort)
7377

74-
format = "%" + str(max_length) + "s [%6d] %s"
75-
for value,key in data:
76-
print format % (key[:max_length], value, (value / scale) * "∎")
78+
str_format = "%" + str(max_length) + "s [%6d] %s%s"
79+
percentage = ""
80+
for value, key in data:
81+
if options.percentage:
82+
percentage = " (%0.2f%%)" % (100 * Decimal(value) / Decimal(total))
83+
print str_format % (key[:max_length], value, (value / scale) * "∎", percentage)
7784

7885
if __name__ == "__main__":
7986
parser = OptionParser()
@@ -88,6 +95,8 @@ def run(input_stream, options):
8895
help="reverse the sort")
8996
parser.add_option("-n", "--numeric-sort", dest="numeric_sort", default=False, action="store_true",
9097
help="sort keys by numeric sequencing")
98+
parser.add_option("-p", "--percentage", dest="percentage", default=False, action="store_true",
99+
help="List percentage for each bar")
91100

92101
(options, args) = parser.parse_args()
93102

data_hacks/histogram.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -202,14 +202,18 @@ def histogram(stream, options):
202202
print "# each ∎ represents a count of %d" % bucket_scale
203203
bucket_min = min_v
204204
bucket_max = min_v
205+
percentage = ""
206+
format_string = options.format + ' - ' + options.format + ' [%6d]: %s%s'
205207
for bucket in range(buckets):
206208
bucket_min = bucket_max
207209
bucket_max = boundaries[bucket]
208210
bucket_count = bucket_counts[bucket]
209211
star_count = 0
210212
if bucket_count:
211213
star_count = bucket_count / bucket_scale
212-
print '%10.4f - %10.4f [%6d]: %s' % (bucket_min, bucket_max, bucket_count, '∎' * star_count)
214+
if options.percentage:
215+
percentage = " (%0.2f%%)" % (100 * Decimal(bucket_count) / Decimal(samples))
216+
print format_string % (bucket_min, bucket_max, bucket_count, '∎' * star_count, percentage)
213217

214218

215219
if __name__ == "__main__":
@@ -227,6 +231,10 @@ def histogram(stream, options):
227231
help="Comma seperated list of bucket edges for the histogram")
228232
parser.add_option("--no-mvsd", dest="mvsd", action="store_false", default=True,
229233
help="Disable the calculation of Mean, Variance and SD (improves performance)")
234+
parser.add_option("-f", "--bucket-format", dest="format", default="%10.4f",
235+
help="format for bucket numbers")
236+
parser.add_option("-p", "--percentage", dest="percentage", default=False, action="store_true",
237+
help="List percentage for each bar")
230238

231239
(options, args) = parser.parse_args()
232240
if sys.stdin.isatty():

0 commit comments

Comments
 (0)