From e28a445224fcd188e1a9f572853bf7136e980145 Mon Sep 17 00:00:00 2001 From: Nikhil Sinha Date: Tue, 18 Mar 2025 08:53:51 -0400 Subject: [PATCH 1/2] update batch size in session config current: 1000000 update to 8192 (default value) this helps in query performance as larger batch size consumes more memory and slows down performance with this change, parseable's clickbench numbers are [181,68,68] --- src/query/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/mod.rs b/src/query/mod.rs index df230ef07..baca477ed 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -111,7 +111,7 @@ impl Query { let mut config = SessionConfig::default() .with_parquet_pruning(true) .with_prefer_existing_sort(true) - .with_batch_size(1000000); + .with_batch_size(20000); // Pushdown filters allows DF to push the filters as far down in the plan as possible // and thus, reducing the number of rows decoded From 0c726f711af6697c9987fa9786c33c1e65269564 Mon Sep 17 00:00:00 2001 From: Nikhil Sinha Date: Wed, 19 Mar 2025 06:19:32 -0400 Subject: [PATCH 2/2] batch size made configurable via env var --- src/cli.rs | 8 ++++++++ src/query/mod.rs | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/cli.rs b/src/cli.rs index ffb3a80eb..9a0e1bca2 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -270,6 +270,14 @@ pub struct Options { )] pub row_group_size: usize, + #[arg( + long, + env = "P_EXECUTION_BATCH_SIZE", + default_value = "20000", + help = "batch size for query execution" + )] + pub execution_batch_size: usize, + #[arg( long = "compression-algo", env = "P_PARQUET_COMPRESSION_ALGO", diff --git a/src/query/mod.rs b/src/query/mod.rs index baca477ed..6486b4836 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -111,7 +111,9 @@ impl Query { let mut config = SessionConfig::default() .with_parquet_pruning(true) .with_prefer_existing_sort(true) - .with_batch_size(20000); + //batch size has been made configurable via environment variable + //default value is 20000 + .with_batch_size(PARSEABLE.options.execution_batch_size); // Pushdown filters allows DF to push the filters as far down in the plan as possible // and thus, reducing the number of rows decoded