diff --git a/README.rst b/README.rst index 78eeddf..7f0cbdb 100644 --- a/README.rst +++ b/README.rst @@ -5,16 +5,16 @@ Collection of benchmarks comparing various python-based machine learning packages. This is meant to work with the development version of the libraries -scikits.learn, mlpy, pybrain, pymvpa, mdp and shogun. It might be hard -to get all packages working on the same machine, but benchmarks are -designed so that if something fail it will just print the exception -and go to the next one. +scikits.learn, mlpy, pybrain, pymvpa, mdp, shogun, milk and orange. It +might be hard to get all packages working on the same machine, but +benchmarks are designed so that if something fail it will just print +the exception and go to the next one. To execute a benchmark, just type from the prompt:: $ python benchmarks/bench_$name.py -and you will se as output the mean and std deviation for the timing of +and you will see as output the mean and standard deviation for the timing of running the benchmark 10 times with its extreme values removed. Results @@ -43,6 +43,7 @@ References - Shogun: http://www.shogun-toolbox.org/ - PyBrain : http://pybrain.org/ - Milk : http://luispedro.org/software/milk + - Orange : http://orange.biolab.si/ Misc diff --git a/benchmarks/bench_elasticnet.py b/benchmarks/bench_elasticnet.py index 5e40b05..9317275 100644 --- a/benchmarks/bench_elasticnet.py +++ b/benchmarks/bench_elasticnet.py @@ -69,17 +69,17 @@ def bench_pymvpa(X, y, T, valid): print 'Loading data ...' data = misc.load_data(dataset) + print 'Done, %s samples with %s features loaded into ' \ - 'memory' % data[0].shape + 'memory\n' % data[0].shape + + score, res = misc.bench(bench_skl, data) + misc.print_result("elasticnet", dataset, "scikits.learn", score, res) - score, res_skl = misc.bench(bench_skl, data) - print 'scikits.learn: mean %s, std %s' % (res_skl.mean(), res_skl.std()) - print 'MSE ', score + score, res = misc.bench(bench_mlpy, data) + misc.print_result("elasticnet", dataset, "MLPy", score, res) - score, res_mlpy = misc.bench(bench_mlpy, data) - print 'MLPy: mean %s, std %s' % (res_mlpy.mean(), res_mlpy.std()) - print 'MSE ', score + score, res = misc.bench(bench_pymvpa, data) + misc.print_result("elasticnet", dataset, "PyMVPA", score, res) - score, res_pymvpa = misc.bench(bench_pymvpa, data) - print 'PyMVPA: mean %s, std %s' % (res_pymvpa.mean(), res_pymvpa.std()) - print 'MSE ', score + misc.save_results() diff --git a/benchmarks/bench_kmeans.py b/benchmarks/bench_kmeans.py index 3792505..9be0ffb 100644 --- a/benchmarks/bench_kmeans.py +++ b/benchmarks/bench_kmeans.py @@ -119,34 +119,24 @@ def bench_milk(X, y, T, valid): data = misc.load_data(dataset) print 'Done, %s samples with %s features loaded into ' \ - 'memory' % data[0].shape - - score, res_shogun = misc.bench(bench_shogun, data) - print 'Shogun: mean %.2f, std %.2f' % ( - np.mean(res_shogun), np.std(res_shogun)) - print 'Score: %2f\n' % score - - score, res_mdp = misc.bench(bench_mdp, data) - print 'MDP: mean %.2f, std %.2f' % ( - np.mean(res_mdp), np.std(res_mdp)) - print 'Score: %2f\n' % score - - score, res_skl = misc.bench(bench_skl, data) - print 'scikits.learn: mean %.2f, std %.2f' % ( - np.mean(res_skl), np.std(res_skl)) - print 'Score: %2f\n' % score - - score, res_mlpy = misc.bench(bench_mlpy, data) - print 'MLPy: mean %.2f, std %.2f' % ( - np.mean(res_mlpy), np.std(res_mlpy)) - print 'Score: %2f\n' % score - - score, res_pybrain = misc.bench(bench_pybrain, data) - print 'Pybrain: mean %.2f, std %.2f' % ( - np.mean(res_pybrain), np.std(res_pybrain)) - print 'Score: %2f\n' % score - - score, res_milk = misc.bench(bench_milk, data) - print 'milk: mean %.2f, std %.2f' % ( - np.mean(res_milk), np.std(res_milk)) - print 'Score: %2f\n' % score + 'memory\n' % data[0].shape + + score, res = misc.bench(bench_shogun, data) + misc.print_result("kmeans", dataset, "Shogun", score, res) + + score, res = misc.bench(bench_mdp, data) + misc.print_result("kmeans", dataset, "MDP", score, res) + + score, res = misc.bench(bench_skl, data) + misc.print_result("kmeans", dataset, "scikits.learn", score, res) + + score, res = misc.bench(bench_mlpy, data) + misc.print_result("kmeans", dataset, "MLPy", score, res) + + score, res = misc.bench(bench_pybrain, data) + misc.print_result("kmeans", dataset, "Pybrain", score, res) + + score, res = misc.bench(bench_milk, data) + misc.print_result("kmeans", dataset, "milk", score, res) + + misc.save_results() diff --git a/benchmarks/bench_knn.py b/benchmarks/bench_knn.py index 2fdc42c..21bd817 100644 --- a/benchmarks/bench_knn.py +++ b/benchmarks/bench_knn.py @@ -107,28 +107,24 @@ def bench_milk(X, y, T, valid): data = misc.load_data(dataset) print 'Done, %s samples with %s features loaded into ' \ - 'memory' % data[0].shape + 'memory\n' % data[0].shape - score, res_shogun = misc.bench(bench_shogun, data) - print 'Shogun: mean %.2f, std %.2f\n' % (res_shogun.mean(), res_shogun.std()) - print 'Score: %.2f' % score + score, res = misc.bench(bench_shogun, data) + misc.print_result("knn", dataset, "Shogun", score, res) - score, res_mdp = misc.bench(bench_mdp, data) - print 'MDP: mean %.2f, std %.2f\n' % (res_mdp.mean(), res_mdp.std()) - print 'Score: %.2f' % score + score, res = misc.bench(bench_mdp, data) + misc.print_result("knn", dataset, "MDP", score, res) - score, res_skl = misc.bench(bench_skl, data) - print 'scikits.learn: mean %.2f, std %.2f\n' % (res_skl.mean(), res_skl.std()) - print 'Score: %.2f' % score + score, res = misc.bench(bench_skl, data) + misc.print_result("knn", dataset, "scikits.learn", score, res) - score, res_mlpy = misc.bench(bench_mlpy, data) - print 'MLPy: mean %.2f, std %.2f\n' % (res_mlpy.mean(), res_mlpy.std()) - print 'Score: %.2f' % score + score, res = misc.bench(bench_mlpy, data) + misc.print_result("knn", dataset, "MLPy", score, res) - score, res_milk = misc.bench(bench_milk, data) - print 'milk: mean %.2f, std %.2f\n' % (res_milk.mean(), res_milk.std()) - print 'Score: %.2f' % score + score, res = misc.bench(bench_pymvpa, data) + misc.print_result("knn", dataset, "PyMVPA", score, res) - score, res_pymvpa = misc.bench(bench_pymvpa, data) - print 'PyMVPA: mean %.2f, std %.2f\n' % (res_pymvpa.mean(), res_pymvpa.std()) - print 'Score: %.2f' % score + score, res = misc.bench(bench_milk, data) + misc.print_result("knn", dataset, "milk", score, res) + + misc.save_results() diff --git a/benchmarks/bench_lassolars.py b/benchmarks/bench_lassolars.py index b234d69..61e1174 100644 --- a/benchmarks/bench_lassolars.py +++ b/benchmarks/bench_lassolars.py @@ -66,19 +66,15 @@ def bench_pymvpa(X, y, T, valid): data = misc.load_data(dataset) print 'Done, %s samples with %s features loaded into ' \ - 'memory' % data[0].shape - - score, res_skl = misc.bench(bench_skl, data) - print 'scikits.learn: mean %.2f, std %.2f' % ( - np.mean(res_skl), np.std(res_skl)) - print 'MSE: %s\n' % score - - score, res_mlpy = misc.bench(bench_mlpy, data) - print 'MLPy: mean %.2f, std %.2f' % ( - np.mean(res_mlpy), np.std(res_mlpy)) - print 'MSE: %s\n' % score - - score, res_pymvpa = misc.bench(bench_pymvpa, data) - print 'PyMVPA: mean %.2f, std %.2f' % ( - np.mean(res_pymvpa), np.std(res_pymvpa)) - print 'MSE: %s\n' % score + 'memory\n' % data[0].shape + + score, res = misc.bench(bench_skl, data) + misc.print_result("lassolars", dataset, "scikits.learn", score, res) + + score, res = misc.bench(bench_mlpy, data) + misc.print_result("lassolars", dataset, "MLPy", score, res) + + score, res = misc.bench(bench_pymvpa, data) + misc.print_result("lassolars", dataset, "PyMVPA", score, res) + + misc.save_results() diff --git a/benchmarks/bench_logistic.py b/benchmarks/bench_logistic.py index 9d00878..33067d7 100644 --- a/benchmarks/bench_logistic.py +++ b/benchmarks/bench_logistic.py @@ -36,8 +36,9 @@ def bench_skl(X, y, T, valid): data = misc.load_data(dataset) print 'Done, %s samples with %s features loaded into ' \ - 'memory' % data[0].shape + 'memory\n' % data[0].shape - res_skl = misc.bench(bench_skl, data) - print 'MLPy: mean %.2f, std %.2f\n' % ( - np.mean(res_skl), np.std(res_skl)) + score, res = misc.bench(bench_skl, data) + misc.print_result("logistic", dataset, "scikits.learn", score, res) + + misc.save_results() diff --git a/benchmarks/bench_pca.py b/benchmarks/bench_pca.py index 6a241ef..df9f2dc 100644 --- a/benchmarks/bench_pca.py +++ b/benchmarks/bench_pca.py @@ -107,28 +107,19 @@ def bench_milk(X, y, T, valid): print 'Done, %s samples with %s features loaded into ' \ 'memory' % data[0].shape - score, res_mdp = misc.bench(bench_mdp, data) - print 'MDP: mean %s, std %s' % ( - np.mean(res_mdp), np.std(res_mdp)) - print 'Explained variance: %s\n'% score - - score, res_skl = misc.bench(bench_skl, data) - print 'scikits.learn: mean %.2f, std %.2f' % ( - np.mean(res_skl), np.std(res_skl)) - print 'Explained variance: %s\n'% score - - score, res_pybrain = misc.bench(bench_pybrain, data) - print 'Pybrain: mean %s, std %s' % ( - np.mean(res_pybrain), np.std(res_pybrain)) - print 'Explained variance: %s\n'% score - - score, res_milk = misc.bench(bench_milk, data) - print 'milk: mean %s, std %s' % ( - np.mean(res_milk), np.std(res_milk)) - print 'Explained variance: %s\n'% score - - score, res_pymvpa = misc.bench(bench_pymvpa, data) - print 'PyMVPA: mean %s, std %s' % ( - np.mean(res_pymvpa), np.std(res_pymvpa)) - print 'Explained variance: %s\n'% score + score, res = misc.bench(bench_mdp, data) + misc.print_result("pca", dataset, "MDP", score, res) + score, res = misc.bench(bench_skl, data) + misc.print_result("pca", dataset, "scikits.learn", score, res) + + score, res = misc.bench(bench_pymvpa, data) + misc.print_result("pca", dataset, "PyMVPA", score, res) + + score, res = misc.bench(bench_pybrain, data) + misc.print_result("pca", dataset, "Pybrain", score, res) + + score, res = misc.bench(bench_milk, data) + misc.print_result("pca", dataset, "milk", score, res) + + misc.save_results() diff --git a/benchmarks/bench_svm.py b/benchmarks/bench_svm.py index f8b1cfc..a2091fd 100644 --- a/benchmarks/bench_svm.py +++ b/benchmarks/bench_svm.py @@ -172,44 +172,30 @@ def bench_orange(X, y, T, valid): sigma = np.median(pdist(data[0])) print 'Done, %s samples with %s features loaded into ' \ - 'memory' % data[0].shape - - score, res_shogun = misc.bench(bench_shogun, data) - print 'Shogun: mean %.2f, std %.2f' % ( - np.mean(res_shogun), np.std(res_shogun)) - print 'Score: %.2f\n' % score - - score, res_mdp = misc.bench(bench_mdp, data) - print 'MDP: mean %.2f, std %.2f' % ( - np.mean(res_mdp), np.std(res_mdp)) - print 'Score: %.2f\n' % score - - score, res_skl = misc.bench(bench_skl, data) - print 'scikits.learn: mean %.2f, std %.2f' % ( - np.mean(res_skl), np.std(res_skl)) - print 'Score: %.2f\n' % score - - score, res_mlpy = misc.bench(bench_mlpy, data) - print 'MLPy: mean %.2f, std %.2f' % ( - np.mean(res_mlpy), np.std(res_mlpy)) - print 'Score: %.2f\n' % score - - score, res_pymvpa = misc.bench(bench_pymvpa, data) - print 'PyMVPA: mean %.2f, std %.2f' % ( - np.mean(res_pymvpa), np.std(res_pymvpa)) - print 'Score: %.2f\n' % score - - score, res_pybrain = misc.bench(bench_pybrain, data) - print 'Pybrain: mean %.2f, std %.2f' % ( - np.mean(res_pybrain), np.std(res_pybrain)) - print 'Score: %.2f\n' % score - - score, res_milk = misc.bench(bench_milk, data) - print 'milk: mean %.2f, std %.2f' % ( - np.mean(res_milk), np.std(res_milk)) - print 'Score: %.2f\n' % score - - score, res_orange = misc.bench(bench_orange, data) - print 'Orange: mean %.2f, std %.2f' % ( - np.mean(res_orange), np.std(res_orange)) - print 'Score: %.2f\n' % score + 'memory\n' % data[0].shape + + score, res = misc.bench(bench_shogun, data) + misc.print_result("svm", dataset, "Shogun", score, res) + + score, res = misc.bench(bench_mdp, data) + misc.print_result("svm", dataset, "MDP", score, res) + + score, res = misc.bench(bench_skl, data) + misc.print_result("svm", dataset, "scikits.learn", score, res) + + score, res = misc.bench(bench_mlpy, data) + misc.print_result("svm", dataset, "MLPy", score, res) + + score, res = misc.bench(bench_pymvpa, data) + misc.print_result("svm", dataset, "PyMVPA", score, res) + + score, res = misc.bench(bench_pybrain, data) + misc.print_result("svm", dataset, "Pybrain", score, res) + + score, res = misc.bench(bench_milk, data) + misc.print_result("svm", dataset, "milk", score, res) + + score, res = misc.bench(bench_orange, data) + misc.print_result("svm", dataset, "Orange", score, res) + + misc.save_results() diff --git a/benchmarks/misc.py b/benchmarks/misc.py index 311dca1..6464b8d 100644 --- a/benchmarks/misc.py +++ b/benchmarks/misc.py @@ -2,6 +2,7 @@ import numpy as np import os + def load_data(dataset): f = open(os.path.dirname(__file__) + '/data/%s_train.data' % dataset) @@ -33,6 +34,7 @@ def load_data(dataset): def dtime_to_seconds(dtime): return dtime.seconds + (dtime.microseconds * 1e-6) + def bench(func, data, n=10): """ Benchmark a given function. The function is executed n times and @@ -44,7 +46,8 @@ def bench(func, data, n=10): ---------- func: function to benchmark - data: tuple (X, y, T, valid) containing training (X, y) and validation (T, valid) data. + data: tuple (X, y, T, valid) containing training (X, y) + and validation (T, valid) data. Returns ------- @@ -65,7 +68,46 @@ def bench(func, data, n=10): time = [] return score, np.array(time) +task_string = "" +dataset_string = "" +packages = [] +scores = [] +means = [] +stds = [] + + +def print_result(task, dataset, package, score, timing_results): + global task_string + global dataset_string + global packages + global scores + global means + global stds + + print '%s on dataset %s' % (task, dataset) + mean = np.mean(timing_results) + std = np.std(timing_results) + print '%s: mean %.2f, std %.2f' % (package, mean, std) + print 'Score: %.2f\n' % score + + task_string = task + dataset_string = dataset + packages.append(package) + scores.append(score) + means.append(mean) + stds.append(std) + + +def save_results(): + global task_string + global dataset_string + with open('%s_%s.results' % (task_string, dataset_string), 'w') as f: + import pickle + pickle.dump([task_string, dataset_string, packages, + scores, means, stds], f) + + USAGE = """usage: python %s dataset where dataset is one of {madelon, arcene} -""" \ No newline at end of file +""" diff --git a/benchmarks/plot.py b/benchmarks/plot.py new file mode 100644 index 0000000..b83065d --- /dev/null +++ b/benchmarks/plot.py @@ -0,0 +1,198 @@ + +import numpy as np + + +def plot_results_for_task(task, datasets, packages, scores, means, stds): + """Plot the results for this task, grouping by package + + task : string + The name of the task + datasets : list of strings, shape = [n_datasets] + The names of the datasets + packages : list of strings, shape = [n_packages] + scores : array-like, shape = [n_datasets, n_packages] + The scores of the tests + means : array-like, shape = [n_datasets, n_packages] + The means of the timings + std : array-like, shape = [n_datasets, n_packages] + The standard deviations of the timings + """ + + import matplotlib.pyplot as plt + import itertools + c = itertools.cycle('bgcmykbgrcmyk') + + n_datasets = len(datasets) + n_packages = len(packages) + + scores = np.atleast_2d(scores) + m, n = scores.shape + assert m == n_datasets, ValueError("scores must be shape %d,%d" % + (n_datasets, n_packages)) + + means = np.atleast_2d(means) + m, n = means.shape + assert m == n_datasets, ValueError("means must be shape %d,%d" % + (n_datasets, n_packages)) + + stds = np.atleast_2d(stds) + m, n = stds.shape + assert m == n_datasets, ValueError("stds must be shape %d,%d" % + (n_datasets, n_packages)) + + fig = plt.figure() + ax = fig.add_subplot(111) + + ind = np.arange(n_packages) # the x locations for the groups + width = 0.35 # the width of the bars + + for i in range(n_datasets): + rect = ax.bar(ind + i * width, means[i, :], width, + color=c.next(), yerr=stds[i, :], + ecolor='k', label=datasets[i]) + + ax.set_title('Time needed to perform train + predict (smaller is better)') + ax.set_ylabel('Seconds') + ax.set_xticks(ind + width) + ax.set_xticklabels(tuple(packages)) + ax.legend() + + plt.show() + plt.savefig("bench_%s.png" % (task)) + + +def hcat(left, right, spaces=""): + res = [] + for l, r in zip(left,right): + res.append(l + spaces + r) + return res + + +def frame(top, bottom): + # calculate the max length of all the strings + max_len = len(top) + for b in bottom: + if len(b) > max_len: + max_len = len(b) + + f = ["="*max_len] + spaces = " "*(max_len - len(top)) + f.append(spaces + top) + f.append("="*max_len) + for b in bottom: + spaces = " "*(max_len - len(b)) + f.append(spaces + b) + f.append("="*max_len) + + return f + + +def rst_table(task, datasets, packages, values, use_min=True): + """Print the results in a table like this one: + + ============ ======= ====== ====== ======= ======== ============= ======== + Dataset PyMVPA Shogun MDP Pybrain MLPy scikit-learn Milk + ============ ======= ====== ====== ======= ======== ============= ======== + Madelon 11.52 5.63 40.48 17.5 9.47 **5.20** 5.76 + Arcene 1.30 0.39 4.87 -- 1.61 0.38 **0.33** + ============ ======= ====== ====== ======= ======== ============= ======== + """ + import math + + a = "Dataset" + b = datasets + output = frame(a, b) + + value_strings = [] + for v in values: + # turn the values into strings + vs_temp = [] + m = np.inf + for v2 in v: + x = float(v2) + if not math.isnan(x) and x < m: + m = x + for v2 in v: + x = float(v2) + if math.isnan(x) or math.isinf(x): + vs_temp.append("--") + elif use_min and v2 == m: + vs_temp.append("**%.02f**" % v2) + else: + vs_temp.append("%.02f" % v2) + value_strings.append(vs_temp) + + # transpose the value strings list of lists so that we can work on + # columns + values = [list(v) for v in zip(*value_strings)] + + for a, b in zip(packages, values): + o = frame(a, b) + output = hcat(output, o, " "*4) + + return output + + +def prepare_results(task): + + import glob + result_files = glob.glob("%s*.results" % (task)) + + datasets = [] + packages = [] + + scores = [] + means = [] + stds = [] + + for i, result_file in enumerate(result_files): + + with open(result_file, 'r') as f: + import pickle + result = pickle.load(f) + + datasets.append(result[1]) + + if packages == []: + packages = result[2] + + scores.append(result[3]) + means.append(result[4]) + stds.append(result[5]) + + plot_results_for_task(task, datasets, packages, + scores, means, stds) + rst = rst_table(task, datasets, packages, means, use_min=True) + print "Timing for ", task + for l in rst: + print l + + print + rst = rst_table(task, datasets, packages, scores, use_min=False) + print "Scores for ", task + for l in rst: + print l + print + + +USAGE = """usage: python plot.py package + +where package is one of {elasticnet, kmeans, ...} +""" + +if __name__ == "__main__": + import sys + + # don't bother me with warnings + import warnings + warnings.simplefilter('ignore') + np.seterr(all='ignore') + + #print __doc__ + '\n' + if not len(sys.argv) == 2: + print USAGE + sys.exit(-1) + else: + task = sys.argv[1] + + prepare_results(task)