Add Pprof Profiling to Existing Benchmarks (rust-ml#273)

oojo12 · web-flow · commit 44b244cc1d12 · 2022-11-11T04:16:46.000Z
* add pprof profiling

* add missing config

* target spec dev-dep

* cfg targets

* add missing criterion_main!

* add space for readability

* update Benchmarking Section

* add profiling to all existing benchmarks
diff --git a/CONTRIBUTE.md b/CONTRIBUTE.md
@@ -156,6 +156,7 @@ let sol = decomp
 
 ## Benchmarking
 
+### Building Benchmarks
 It is important to the project that we have benchmarks in place to evaluate the benefit of performance related changes. To make that process easier we provide some guidelines for writing benchmarks.
 
 1. Test for a variety of sample sizes for most algorithms [1_000, 10_000, 20_000] will be sufficient. For algorithms where it's not too slow, use 100k instead of 20k.
@@ -169,3 +170,31 @@ It is important to the project that we have benchmarks in place to evaluate the
 6. When benchmarking multi-target the target count should be within the following range: [2, 4].
 7. In `BenchmarkId` include the values used to parametrize the benchmark. For example if we're doing Pls then we may have something like `Canonical-Nipals-5feats-1_000samples`
 8. Pass data as an argument to the function being benched. This will prevent Criterion from including data creation time as part of the benchmark.
+9. Add a profiler see [here](https://github.com/tikv/pprof-rs#integrate-with-criterion) for an example on how to do so with pprof, Criterion, and Flamegraph.
+
+### Running Benchmarks
+When running benchmarks sometimes you will want to profile the code execution. Assuming you have followed step 9 to add a pprof profiling hook for the linfa-ica package you can run the following to get your profiling results as a flamegraph.
+
+`cargo bench -p linfa-ica --bench fast_ica -q -- --profile-time 30`
+
+If you are interested in running a regular criterion bench for linfa-ica then you can run the following
+
+`cargo bench -p linfa-ica`
+
+### Reporting Benchmark Metrics
+It is important that we have a consistent methodology for reporting benchmarks below is a template that should aid reviewers.
+
+```
+### Context
+In a bullet list describe the following:
+1. Run on battery charge or while plugged in
+2. Power saving mode
+3. If the computer was idle during benchmark
+4. If the computer was overheating
+5. Hardware specs
+
+### Bench Command Run
+bench results (code format)
+
+[Attached Flamegraphs if profile runs were also done]
+```
diff --git a/algorithms/linfa-clustering/Cargo.toml b/algorithms/linfa-clustering/Cargo.toml
@@ -50,6 +50,9 @@ serde_json = "1"
 approx = "0.4"
 lax = "0.15.0"
 
+[target.'cfg(not(windows))'.dev-dependencies]
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+
 [[bench]]
 name = "k_means"
 harness = false
diff --git a/algorithms/linfa-clustering/benches/appx_dbscan.rs b/algorithms/linfa-clustering/benches/appx_dbscan.rs
@@ -9,6 +9,8 @@ use ndarray::Array2;
 use ndarray_rand::rand::SeedableRng;
 use ndarray_rand::rand_distr::Uniform;
 use ndarray_rand::RandomExt;
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 fn appx_dbscan_bench(c: &mut Criterion) {
@@ -48,9 +50,13 @@ fn appx_dbscan_bench(c: &mut Criterion) {
     benchmark.finish();
 }
 
+#[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default();
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
     targets = appx_dbscan_bench
 }
+#[cfg(target_os = "windows")]
+criterion_group!(benches, appx_dbscan_bench);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-clustering/benches/dbscan.rs b/algorithms/linfa-clustering/benches/dbscan.rs
@@ -9,6 +9,8 @@ use ndarray::Array2;
 use ndarray_rand::rand::SeedableRng;
 use ndarray_rand::rand_distr::Uniform;
 use ndarray_rand::RandomExt;
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 fn dbscan_bench(c: &mut Criterion) {
@@ -44,9 +46,13 @@ fn dbscan_bench(c: &mut Criterion) {
     benchmark.finish()
 }
 
+#[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default();
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
     targets = dbscan_bench
 }
+#[cfg(target_os = "windows")]
+criterion_group!(benches, dbscan_bench);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-clustering/benches/gaussian_mixture.rs b/algorithms/linfa-clustering/benches/gaussian_mixture.rs
@@ -10,6 +10,8 @@ use ndarray::Array2;
 use ndarray_rand::rand::SeedableRng;
 use ndarray_rand::rand_distr::Uniform;
 use ndarray_rand::RandomExt;
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 fn gaussian_mixture_bench(c: &mut Criterion) {
@@ -46,9 +48,13 @@ fn gaussian_mixture_bench(c: &mut Criterion) {
     benchmark.finish();
 }
 
+#[cfg(not(target_os = "windows"))]
 criterion_group! {
   name = benches;
-  config = Criterion::default();
+  config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
   targets = gaussian_mixture_bench
 }
+#[cfg(target_os = "windows")]
+criterion_group!(benches, gaussian_mixture_bench);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-clustering/benches/k_means.rs b/algorithms/linfa-clustering/benches/k_means.rs
@@ -9,6 +9,8 @@ use linfa_datasets::generate;
 use ndarray::Array2;
 use ndarray_rand::RandomExt;
 use ndarray_rand::{rand::SeedableRng, rand_distr::Uniform};
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 #[derive(Default)]
@@ -155,9 +157,18 @@ fn k_means_init_bench(c: &mut Criterion) {
     }
 }
 
+#[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default();
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
     targets = k_means_bench, k_means_init_bench, k_means_incr_bench
 }
+#[cfg(target_os = "windows")]
+criterion_group!(
+    benches,
+    k_means_bench,
+    k_means_init_bench,
+    k_means_incr_bench
+);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-ftrl/Cargo.toml b/algorithms/linfa-ftrl/Cargo.toml
@@ -29,6 +29,9 @@ criterion = "0.4.0"
 approx = "0.4"
 linfa-datasets = { version = "0.6.0", path = "../../datasets", features = ["winequality"] }
 
+[target.'cfg(not(windows))'.dev-dependencies]
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+
 [[bench]]
 name = "ftrl"
 harness = false
diff --git a/algorithms/linfa-ftrl/benches/ftrl.rs b/algorithms/linfa-ftrl/benches/ftrl.rs
@@ -7,6 +7,8 @@ use ndarray::{Array1, Array2};
 use ndarray_rand::{
     rand::distributions::Uniform, rand::rngs::SmallRng, rand::SeedableRng, RandomExt,
 };
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 
 fn fit_without_prior_model(c: &mut Criterion) {
     let mut rng = SmallRng::seed_from_u64(42);
@@ -86,9 +88,18 @@ fn get_dataset(
     Dataset::new(features, target)
 }
 
+#[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default();
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
     targets = fit_without_prior_model, fit_with_prior_model, predict
 }
+#[cfg(target_os = "windows")]
+criterion_group!(
+    benches,
+    fit_without_prior_model,
+    fit_with_prior_model,
+    predict
+);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-ica/Cargo.toml b/algorithms/linfa-ica/Cargo.toml
@@ -41,6 +41,9 @@ ndarray-npy = { version = "0.8", default-features = false }
 paste = "1.0"
 criterion = "0.4.0"
 
+[target.'cfg(not(windows))'.dev-dependencies]
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+
 [[bench]]
 name = "fast_ica"
 harness = false
diff --git a/algorithms/linfa-ica/benches/fast_ica.rs b/algorithms/linfa-ica/benches/fast_ica.rs
@@ -4,14 +4,16 @@ use linfa_ica::fast_ica::{FastIca, GFunc};
 use ndarray::{array, concatenate};
 use ndarray::{Array, Array2, Axis};
 use ndarray_rand::{rand::SeedableRng, rand_distr::Uniform, RandomExt};
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 fn perform_ica(size: usize, gfunc: GFunc) {
     let sources_mixed = create_data(size);
 
     let ica = FastIca::params().gfunc(gfunc).random_state(10);
 
-    let ica = ica.fit(&DatasetBase::from(sources_mixed.view()));
+    ica.fit(&DatasetBase::from(sources_mixed.view())).unwrap();
 }
 
 fn create_data(nsamples: usize) -> Array2<f64> {
@@ -64,5 +66,13 @@ fn bench(c: &mut Criterion) {
     }
 }
 
+#[cfg(not(target_os = "windows"))]
+criterion_group! {
+    name = benches;
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    targets = bench
+}
+#[cfg(target_os = "windows")]
 criterion_group!(benches, bench);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-linear/Cargo.toml b/algorithms/linfa-linear/Cargo.toml
@@ -36,6 +36,9 @@ approx = "0.4"
 criterion = "0.4.0" 
 statrs = "0.16.0"
 
+[target.'cfg(not(windows))'.dev-dependencies]
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+
 [[bench]]
 name = "ols_bench"
 harness = false
diff --git a/algorithms/linfa-linear/benches/ols_bench.rs b/algorithms/linfa-linear/benches/ols_bench.rs
@@ -4,6 +4,8 @@ use linfa::Dataset;
 use linfa_datasets::generate::make_dataset;
 use linfa_linear::{LinearRegression, TweedieRegressor};
 use ndarray::Ix1;
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use statrs::distribution::{DiscreteUniform, Laplace};
 
 #[allow(unused_must_use)]
@@ -57,5 +59,13 @@ fn bench(c: &mut Criterion) {
     group.finish();
 }
 
+#[cfg(not(target_os = "windows"))]
+criterion_group! {
+    name = benches;
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    targets = bench
+}
+#[cfg(target_os = "windows")]
 criterion_group!(benches, bench);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-nn/Cargo.toml b/algorithms/linfa-nn/Cargo.toml
@@ -41,6 +41,9 @@ criterion = "0.4.0"
 rand_xoshiro = "0.6"
 ndarray-rand = "0.14"
 
+[target.'cfg(not(windows))'.dev-dependencies]
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+
 [[bench]]
 name = "nn"
 harness = false
diff --git a/algorithms/linfa-nn/benches/nn.rs b/algorithms/linfa-nn/benches/nn.rs
@@ -2,6 +2,8 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
 use linfa_nn::{distance::*, CommonNearestNeighbour, NearestNeighbour};
 use ndarray::{Array1, Array2};
 use ndarray_rand::{rand::SeedableRng, rand_distr::Uniform, RandomExt};
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 fn nn_build_bench(c: &mut Criterion) {
@@ -96,9 +98,13 @@ fn within_range_bench(c: &mut Criterion) {
     }
 }
 
+#[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default();
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
     targets = nn_build_bench, k_nearest_bench, within_range_bench
 }
+#[cfg(target_os = "windows")]
+criterion_group!(benches, nn_build_bench, k_nearest_bench, within_range_bench);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-pls/Cargo.toml b/algorithms/linfa-pls/Cargo.toml
@@ -42,6 +42,9 @@ rand_xoshiro = "0.6"
 criterion = "0.4.0"
 statrs = "0.16.0"
 
+[target.'cfg(not(windows))'.dev-dependencies]
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+
 [[bench]]
 name = "pls"
 harness = false
diff --git a/algorithms/linfa-pls/benches/pls.rs b/algorithms/linfa-pls/benches/pls.rs
@@ -4,6 +4,8 @@ use linfa::Dataset;
 use linfa_datasets::generate::make_dataset;
 use linfa_pls::Algorithm;
 use linfa_pls::{PlsCanonical, PlsCca, PlsRegression};
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use statrs::distribution::{DiscreteUniform, Laplace};
 
 #[allow(unused_must_use)]
@@ -86,5 +88,13 @@ fn bench(c: &mut Criterion) {
     group.finish();
 }
 
+#[cfg(not(target_os = "windows"))]
+criterion_group! {
+    name = benches;
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    targets = bench
+}
+#[cfg(target_os = "windows")]
 criterion_group!(benches, bench);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-trees/Cargo.toml b/algorithms/linfa-trees/Cargo.toml
@@ -33,9 +33,11 @@ linfa = { version = "0.6.0", path = "../.." }
 rand = { version = "0.8", features = ["small_rng"] }
 criterion = "0.4.0"
 approx = "0.4"
-
 linfa-datasets = { version = "0.6.0", path = "../../datasets/", features = ["iris"] }
 
+[target.'cfg(not(windows))'.dev-dependencies]
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+
 [[bench]]
 name = "decision_tree"
 harness = false
diff --git a/algorithms/linfa-trees/benches/decision_tree.rs b/algorithms/linfa-trees/benches/decision_tree.rs
@@ -5,6 +5,8 @@ use ndarray::{concatenate, Array, Array1, Array2, Axis};
 use ndarray_rand::rand::SeedableRng;
 use ndarray_rand::rand_distr::{StandardNormal, Uniform};
 use ndarray_rand::RandomExt;
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use rand::rngs::SmallRng;
 
 fn generate_blobs(means: &Array2<f64>, samples: usize, mut rng: &mut SmallRng) -> Array2<f64> {
@@ -52,5 +54,13 @@ fn decision_tree_bench(c: &mut Criterion) {
     group.finish();
 }
 
+#[cfg(not(target_os = "windows"))]
+criterion_group! {
+    name = benches;
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    targets = decision_tree_bench
+}
+#[cfg(target_os = "windows")]
 criterion_group!(benches, decision_tree_bench);
+
 criterion_main!(benches);