Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

One step #7

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ Collection of benchmarks comparing various python-based machine
learning packages.

This is meant to work with the development version of the libraries
scikits.learn, mlpy, pybrain, pymvpa, mdp and shogun. It might be hard
to get all packages working on the same machine, but benchmarks are
designed so that if something fail it will just print the exception
and go to the next one.
scikits.learn, mlpy, pybrain, pymvpa, mdp, shogun, milk and orange. It
might be hard to get all packages working on the same machine, but
benchmarks are designed so that if something fail it will just print
the exception and go to the next one.

To execute a benchmark, just type from the prompt::

$ python benchmarks/bench_$name.py

and you will se as output the mean and std deviation for the timing of
and you will see as output the mean and standard deviation for the timing of
running the benchmark 10 times with its extreme values removed.

Results
Expand Down Expand Up @@ -43,6 +43,7 @@ References
- Shogun: http://www.shogun-toolbox.org/
- PyBrain : http://pybrain.org/
- Milk : http://luispedro.org/software/milk
- Orange : http://orange.biolab.si/


Misc
Expand Down
20 changes: 10 additions & 10 deletions benchmarks/bench_elasticnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,17 @@ def bench_pymvpa(X, y, T, valid):

print 'Loading data ...'
data = misc.load_data(dataset)

print 'Done, %s samples with %s features loaded into ' \
'memory' % data[0].shape
'memory\n' % data[0].shape

score, res = misc.bench(bench_skl, data)
misc.print_result("elasticnet", dataset, "scikits.learn", score, res)

score, res_skl = misc.bench(bench_skl, data)
print 'scikits.learn: mean %s, std %s' % (res_skl.mean(), res_skl.std())
print 'MSE ', score
score, res = misc.bench(bench_mlpy, data)
misc.print_result("elasticnet", dataset, "MLPy", score, res)

score, res_mlpy = misc.bench(bench_mlpy, data)
print 'MLPy: mean %s, std %s' % (res_mlpy.mean(), res_mlpy.std())
print 'MSE ', score
score, res = misc.bench(bench_pymvpa, data)
misc.print_result("elasticnet", dataset, "PyMVPA", score, res)

score, res_pymvpa = misc.bench(bench_pymvpa, data)
print 'PyMVPA: mean %s, std %s' % (res_pymvpa.mean(), res_pymvpa.std())
print 'MSE ', score
misc.save_results()
52 changes: 21 additions & 31 deletions benchmarks/bench_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,34 +119,24 @@ def bench_milk(X, y, T, valid):
data = misc.load_data(dataset)

print 'Done, %s samples with %s features loaded into ' \
'memory' % data[0].shape

score, res_shogun = misc.bench(bench_shogun, data)
print 'Shogun: mean %.2f, std %.2f' % (
np.mean(res_shogun), np.std(res_shogun))
print 'Score: %2f\n' % score

score, res_mdp = misc.bench(bench_mdp, data)
print 'MDP: mean %.2f, std %.2f' % (
np.mean(res_mdp), np.std(res_mdp))
print 'Score: %2f\n' % score

score, res_skl = misc.bench(bench_skl, data)
print 'scikits.learn: mean %.2f, std %.2f' % (
np.mean(res_skl), np.std(res_skl))
print 'Score: %2f\n' % score

score, res_mlpy = misc.bench(bench_mlpy, data)
print 'MLPy: mean %.2f, std %.2f' % (
np.mean(res_mlpy), np.std(res_mlpy))
print 'Score: %2f\n' % score

score, res_pybrain = misc.bench(bench_pybrain, data)
print 'Pybrain: mean %.2f, std %.2f' % (
np.mean(res_pybrain), np.std(res_pybrain))
print 'Score: %2f\n' % score

score, res_milk = misc.bench(bench_milk, data)
print 'milk: mean %.2f, std %.2f' % (
np.mean(res_milk), np.std(res_milk))
print 'Score: %2f\n' % score
'memory\n' % data[0].shape

score, res = misc.bench(bench_shogun, data)
misc.print_result("kmeans", dataset, "Shogun", score, res)

score, res = misc.bench(bench_mdp, data)
misc.print_result("kmeans", dataset, "MDP", score, res)

score, res = misc.bench(bench_skl, data)
misc.print_result("kmeans", dataset, "scikits.learn", score, res)

score, res = misc.bench(bench_mlpy, data)
misc.print_result("kmeans", dataset, "MLPy", score, res)

score, res = misc.bench(bench_pybrain, data)
misc.print_result("kmeans", dataset, "Pybrain", score, res)

score, res = misc.bench(bench_milk, data)
misc.print_result("kmeans", dataset, "milk", score, res)

misc.save_results()
34 changes: 15 additions & 19 deletions benchmarks/bench_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,28 +107,24 @@ def bench_milk(X, y, T, valid):
data = misc.load_data(dataset)

print 'Done, %s samples with %s features loaded into ' \
'memory' % data[0].shape
'memory\n' % data[0].shape

score, res_shogun = misc.bench(bench_shogun, data)
print 'Shogun: mean %.2f, std %.2f\n' % (res_shogun.mean(), res_shogun.std())
print 'Score: %.2f' % score
score, res = misc.bench(bench_shogun, data)
misc.print_result("knn", dataset, "Shogun", score, res)

score, res_mdp = misc.bench(bench_mdp, data)
print 'MDP: mean %.2f, std %.2f\n' % (res_mdp.mean(), res_mdp.std())
print 'Score: %.2f' % score
score, res = misc.bench(bench_mdp, data)
misc.print_result("knn", dataset, "MDP", score, res)

score, res_skl = misc.bench(bench_skl, data)
print 'scikits.learn: mean %.2f, std %.2f\n' % (res_skl.mean(), res_skl.std())
print 'Score: %.2f' % score
score, res = misc.bench(bench_skl, data)
misc.print_result("knn", dataset, "scikits.learn", score, res)

score, res_mlpy = misc.bench(bench_mlpy, data)
print 'MLPy: mean %.2f, std %.2f\n' % (res_mlpy.mean(), res_mlpy.std())
print 'Score: %.2f' % score
score, res = misc.bench(bench_mlpy, data)
misc.print_result("knn", dataset, "MLPy", score, res)

score, res_milk = misc.bench(bench_milk, data)
print 'milk: mean %.2f, std %.2f\n' % (res_milk.mean(), res_milk.std())
print 'Score: %.2f' % score
score, res = misc.bench(bench_pymvpa, data)
misc.print_result("knn", dataset, "PyMVPA", score, res)

score, res_pymvpa = misc.bench(bench_pymvpa, data)
print 'PyMVPA: mean %.2f, std %.2f\n' % (res_pymvpa.mean(), res_pymvpa.std())
print 'Score: %.2f' % score
score, res = misc.bench(bench_milk, data)
misc.print_result("knn", dataset, "milk", score, res)

misc.save_results()
28 changes: 12 additions & 16 deletions benchmarks/bench_lassolars.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,15 @@ def bench_pymvpa(X, y, T, valid):
data = misc.load_data(dataset)

print 'Done, %s samples with %s features loaded into ' \
'memory' % data[0].shape

score, res_skl = misc.bench(bench_skl, data)
print 'scikits.learn: mean %.2f, std %.2f' % (
np.mean(res_skl), np.std(res_skl))
print 'MSE: %s\n' % score

score, res_mlpy = misc.bench(bench_mlpy, data)
print 'MLPy: mean %.2f, std %.2f' % (
np.mean(res_mlpy), np.std(res_mlpy))
print 'MSE: %s\n' % score

score, res_pymvpa = misc.bench(bench_pymvpa, data)
print 'PyMVPA: mean %.2f, std %.2f' % (
np.mean(res_pymvpa), np.std(res_pymvpa))
print 'MSE: %s\n' % score
'memory\n' % data[0].shape

score, res = misc.bench(bench_skl, data)
misc.print_result("lassolars", dataset, "scikits.learn", score, res)

score, res = misc.bench(bench_mlpy, data)
misc.print_result("lassolars", dataset, "MLPy", score, res)

score, res = misc.bench(bench_pymvpa, data)
misc.print_result("lassolars", dataset, "PyMVPA", score, res)

misc.save_results()
9 changes: 5 additions & 4 deletions benchmarks/bench_logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ def bench_skl(X, y, T, valid):
data = misc.load_data(dataset)

print 'Done, %s samples with %s features loaded into ' \
'memory' % data[0].shape
'memory\n' % data[0].shape

res_skl = misc.bench(bench_skl, data)
print 'MLPy: mean %.2f, std %.2f\n' % (
np.mean(res_skl), np.std(res_skl))
score, res = misc.bench(bench_skl, data)
misc.print_result("logistic", dataset, "scikits.learn", score, res)

misc.save_results()
39 changes: 15 additions & 24 deletions benchmarks/bench_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,28 +107,19 @@ def bench_milk(X, y, T, valid):
print 'Done, %s samples with %s features loaded into ' \
'memory' % data[0].shape

score, res_mdp = misc.bench(bench_mdp, data)
print 'MDP: mean %s, std %s' % (
np.mean(res_mdp), np.std(res_mdp))
print 'Explained variance: %s\n'% score

score, res_skl = misc.bench(bench_skl, data)
print 'scikits.learn: mean %.2f, std %.2f' % (
np.mean(res_skl), np.std(res_skl))
print 'Explained variance: %s\n'% score

score, res_pybrain = misc.bench(bench_pybrain, data)
print 'Pybrain: mean %s, std %s' % (
np.mean(res_pybrain), np.std(res_pybrain))
print 'Explained variance: %s\n'% score

score, res_milk = misc.bench(bench_milk, data)
print 'milk: mean %s, std %s' % (
np.mean(res_milk), np.std(res_milk))
print 'Explained variance: %s\n'% score

score, res_pymvpa = misc.bench(bench_pymvpa, data)
print 'PyMVPA: mean %s, std %s' % (
np.mean(res_pymvpa), np.std(res_pymvpa))
print 'Explained variance: %s\n'% score
score, res = misc.bench(bench_mdp, data)
misc.print_result("pca", dataset, "MDP", score, res)

score, res = misc.bench(bench_skl, data)
misc.print_result("pca", dataset, "scikits.learn", score, res)

score, res = misc.bench(bench_pymvpa, data)
misc.print_result("pca", dataset, "PyMVPA", score, res)

score, res = misc.bench(bench_pybrain, data)
misc.print_result("pca", dataset, "Pybrain", score, res)

score, res = misc.bench(bench_milk, data)
misc.print_result("pca", dataset, "milk", score, res)

misc.save_results()
68 changes: 27 additions & 41 deletions benchmarks/bench_svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,44 +172,30 @@ def bench_orange(X, y, T, valid):
sigma = np.median(pdist(data[0]))

print 'Done, %s samples with %s features loaded into ' \
'memory' % data[0].shape

score, res_shogun = misc.bench(bench_shogun, data)
print 'Shogun: mean %.2f, std %.2f' % (
np.mean(res_shogun), np.std(res_shogun))
print 'Score: %.2f\n' % score

score, res_mdp = misc.bench(bench_mdp, data)
print 'MDP: mean %.2f, std %.2f' % (
np.mean(res_mdp), np.std(res_mdp))
print 'Score: %.2f\n' % score

score, res_skl = misc.bench(bench_skl, data)
print 'scikits.learn: mean %.2f, std %.2f' % (
np.mean(res_skl), np.std(res_skl))
print 'Score: %.2f\n' % score

score, res_mlpy = misc.bench(bench_mlpy, data)
print 'MLPy: mean %.2f, std %.2f' % (
np.mean(res_mlpy), np.std(res_mlpy))
print 'Score: %.2f\n' % score

score, res_pymvpa = misc.bench(bench_pymvpa, data)
print 'PyMVPA: mean %.2f, std %.2f' % (
np.mean(res_pymvpa), np.std(res_pymvpa))
print 'Score: %.2f\n' % score

score, res_pybrain = misc.bench(bench_pybrain, data)
print 'Pybrain: mean %.2f, std %.2f' % (
np.mean(res_pybrain), np.std(res_pybrain))
print 'Score: %.2f\n' % score

score, res_milk = misc.bench(bench_milk, data)
print 'milk: mean %.2f, std %.2f' % (
np.mean(res_milk), np.std(res_milk))
print 'Score: %.2f\n' % score

score, res_orange = misc.bench(bench_orange, data)
print 'Orange: mean %.2f, std %.2f' % (
np.mean(res_orange), np.std(res_orange))
print 'Score: %.2f\n' % score
'memory\n' % data[0].shape

score, res = misc.bench(bench_shogun, data)
misc.print_result("svm", dataset, "Shogun", score, res)

score, res = misc.bench(bench_mdp, data)
misc.print_result("svm", dataset, "MDP", score, res)

score, res = misc.bench(bench_skl, data)
misc.print_result("svm", dataset, "scikits.learn", score, res)

score, res = misc.bench(bench_mlpy, data)
misc.print_result("svm", dataset, "MLPy", score, res)

score, res = misc.bench(bench_pymvpa, data)
misc.print_result("svm", dataset, "PyMVPA", score, res)

score, res = misc.bench(bench_pybrain, data)
misc.print_result("svm", dataset, "Pybrain", score, res)

score, res = misc.bench(bench_milk, data)
misc.print_result("svm", dataset, "milk", score, res)

score, res = misc.bench(bench_orange, data)
misc.print_result("svm", dataset, "Orange", score, res)

misc.save_results()
46 changes: 44 additions & 2 deletions benchmarks/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
import os


def load_data(dataset):

f = open(os.path.dirname(__file__) + '/data/%s_train.data' % dataset)
Expand Down Expand Up @@ -33,6 +34,7 @@ def load_data(dataset):
def dtime_to_seconds(dtime):
return dtime.seconds + (dtime.microseconds * 1e-6)


def bench(func, data, n=10):
"""
Benchmark a given function. The function is executed n times and
Expand All @@ -44,7 +46,8 @@ def bench(func, data, n=10):
----------
func: function to benchmark

data: tuple (X, y, T, valid) containing training (X, y) and validation (T, valid) data.
data: tuple (X, y, T, valid) containing training (X, y)
and validation (T, valid) data.

Returns
-------
Expand All @@ -65,7 +68,46 @@ def bench(func, data, n=10):
time = []
return score, np.array(time)

task_string = ""
dataset_string = ""
packages = []
scores = []
means = []
stds = []


def print_result(task, dataset, package, score, timing_results):
global task_string
global dataset_string
global packages
global scores
global means
global stds

print '%s on dataset %s' % (task, dataset)
mean = np.mean(timing_results)
std = np.std(timing_results)
print '%s: mean %.2f, std %.2f' % (package, mean, std)
print 'Score: %.2f\n' % score

task_string = task
dataset_string = dataset
packages.append(package)
scores.append(score)
means.append(mean)
stds.append(std)


def save_results():
global task_string
global dataset_string
with open('%s_%s.results' % (task_string, dataset_string), 'w') as f:
import pickle
pickle.dump([task_string, dataset_string, packages,
scores, means, stds], f)


USAGE = """usage: python %s dataset

where dataset is one of {madelon, arcene}
"""
"""
Loading