@@ -121,27 +121,29 @@ def test_median():
121
121
assert "4.50" == "%.2f" % median ([4.0 , 5 , 2 , 1 , 9 , 10 ])
122
122
123
123
124
- def histogram (stream , options ):
124
+ def _histogram (stream , minimum = None , maximum = None , num_buckets = None , logscale = False ,
125
+ custbuckets = None , calc_mvsd = True ,
126
+ bucket_format = '%10.4f' , calc_percentage = False , dot = '∎' ):
125
127
"""
126
128
Loop over the stream and add each entry to the dataset, printing out at the
127
129
end.
128
130
129
131
stream yields Decimal()
130
132
"""
131
- if not options . min or not options . max :
133
+ if not minimum or not maximum :
132
134
# glob the iterator here so we can do min/max on it
133
135
data = list (stream )
134
136
else :
135
137
data = stream
136
138
bucket_scale = 1
137
139
138
- if options . min :
139
- min_v = Decimal (options . min )
140
+ if minimum :
141
+ min_v = Decimal (minimum )
140
142
else :
141
143
min_v = min (data , key = lambda x : x .value )
142
144
min_v = min_v .value
143
- if options . max :
144
- max_v = Decimal (options . max )
145
+ if maximum :
146
+ max_v = Decimal (maximum )
145
147
else :
146
148
max_v = max (data , key = lambda x : x .value )
147
149
max_v = max_v .value
@@ -151,11 +153,9 @@ def histogram(stream, options):
151
153
diff = max_v - min_v
152
154
153
155
boundaries = []
154
- bucket_counts = []
155
- buckets = 0
156
156
157
- if options . custbuckets :
158
- bound = options . custbuckets .split (',' )
157
+ if custbuckets :
158
+ bound = custbuckets .split (',' )
159
159
bound_sort = sorted (map (Decimal , bound ))
160
160
161
161
# if the last value is smaller than the maximum, replace it
@@ -174,8 +174,8 @@ def histogram(stream, options):
174
174
# so no need to do a -1!
175
175
bucket_counts = [0 for x in range (len (boundaries ))]
176
176
buckets = len (boundaries )
177
- elif options . logscale :
178
- buckets = options . buckets and int (options . buckets ) or 10
177
+ elif logscale :
178
+ buckets = num_buckets and int (num_buckets ) or 10
179
179
if buckets <= 0 :
180
180
raise ValueError ('# of buckets must be > 0' )
181
181
@@ -202,7 +202,7 @@ def log_steps(k, n):
202
202
for step in log_steps (buckets , diff ):
203
203
boundaries .append (min_v + step )
204
204
else :
205
- buckets = options . buckets and int (options . buckets ) or 10
205
+ buckets = num_buckets and int (num_buckets ) or 10
206
206
if buckets <= 0 :
207
207
raise ValueError ('# of buckets must be > 0' )
208
208
step = diff / buckets
@@ -216,7 +216,7 @@ def log_steps(k, n):
216
216
accepted_data = []
217
217
for record in data :
218
218
samples += record .count
219
- if options . mvsd :
219
+ if calc_mvsd :
220
220
mvsd .add (record .value , record .count )
221
221
accepted_data .append (record )
222
222
# find the bucket this goes in
@@ -237,29 +237,39 @@ def log_steps(k, n):
237
237
if skipped :
238
238
print ("# %d value%s outside of min/max" %
239
239
(skipped , skipped > 1 and 's' or '' ))
240
- if options . mvsd :
240
+ if calc_mvsd :
241
241
print ("# Mean = %f; Variance = %f; SD = %f; Median %f" %
242
242
(mvsd .mean (), mvsd .var (), mvsd .sd (),
243
243
median (accepted_data , key = lambda x : x .value )))
244
- print "# each " + options .dot + " represents a count of %d" % bucket_scale
245
- bucket_min = min_v
244
+ print "# each " + dot + " represents a count of %d" % bucket_scale
246
245
bucket_max = min_v
247
246
percentage = ""
248
- format_string = options . format + ' - ' + options . format + ' [%6d]: %s%s'
247
+ format_string = bucket_format + ' - ' + bucket_format + ' [%6d]: %s%s'
249
248
for bucket in range (buckets ):
250
249
bucket_min = bucket_max
251
250
bucket_max = boundaries [bucket ]
252
251
bucket_count = bucket_counts [bucket ]
253
252
star_count = 0
254
253
if bucket_count :
255
254
star_count = bucket_count / bucket_scale
256
- if options . percentage :
255
+ if calc_percentage :
257
256
percentage = " (%0.2f%%)" % (100 * Decimal (bucket_count ) /
258
257
Decimal (samples ))
259
- print format_string % (bucket_min , bucket_max , bucket_count , options . dot *
258
+ print format_string % (bucket_min , bucket_max , bucket_count , dot *
260
259
star_count , percentage )
261
260
262
261
262
+ def histogram (stream , options ):
263
+ _histogram (stream , options .min , options .max , options .buckets , options .logscale ,
264
+ options .custbuckets , options .mvsd , options .format , options .percentage ,
265
+ options .dot )
266
+
267
+
268
+ def print_histogram (samples , ** kwargs ):
269
+ stream = [str (x ) for x in samples ]
270
+ _histogram (load_stream (stream , False , False ), ** kwargs )
271
+
272
+
263
273
if __name__ == "__main__" :
264
274
parser = OptionParser ()
265
275
parser .usage = "cat data | %prog [options]"
0 commit comments