Skip to content

Commit d694a70

Browse files
deepsource fix, coderabbitai suggestions
1 parent 4261679 commit d694a70

File tree

1 file changed

+53
-12
lines changed

1 file changed

+53
-12
lines changed

src/handlers/http/clickbench.rs

+53-12
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
*
1717
*/
1818

19-
use std::{collections::HashMap, env, fs, time::Instant};
19+
use std::{collections::HashMap, env, fs, process::Command, time::Instant};
2020

2121
use actix_web::{web::Json, Responder};
2222
use datafusion::{
@@ -28,15 +28,34 @@ use datafusion::{
2828
sql::{parser::DFParser, sqlparser::dialect::dialect_from_str},
2929
};
3030
use serde_json::{json, Value};
31+
use tracing::warn;
32+
static PARQUET_FILE: &str = "PARQUET_FILE";
33+
static QUERIES_FILE: &str = "QUERIES_FILE";
3134

3235
pub async fn clickbench_benchmark() -> Result<impl Responder, actix_web::Error> {
36+
drop_system_caches()
37+
.await
38+
.map_err(actix_web::error::ErrorInternalServerError)?;
3339
let results = tokio::task::spawn_blocking(run_benchmark)
3440
.await
3541
.map_err(actix_web::error::ErrorInternalServerError)?
3642
.map_err(actix_web::error::ErrorInternalServerError)?;
3743
Ok(results)
3844
}
3945

46+
pub async fn drop_system_caches() -> Result<(), anyhow::Error> {
47+
// Sync to flush file system buffers
48+
Command::new("sync")
49+
.status()
50+
.expect("Failed to execute sync command");
51+
let _ = Command::new("sudo")
52+
.args(["sh", "-c", "echo 3 > /proc/sys/vm/drop_caches"])
53+
.output()
54+
.map_err(|e| anyhow::Error::msg(e.to_string()))?;
55+
56+
Ok(())
57+
}
58+
4059
#[tokio::main(flavor = "multi_thread")]
4160
pub async fn run_benchmark() -> Result<Json<Value>, anyhow::Error> {
4261
let mut session_config = SessionConfig::from_env()?.with_information_schema(true);
@@ -61,10 +80,13 @@ pub async fn run_benchmark() -> Result<Json<Value>, anyhow::Error> {
6180
let mut table_options = HashMap::new();
6281
table_options.insert("binary_as_string", "true");
6382

64-
let parquet_file = env::var("PARQUET_FILE")?;
83+
let parquet_file = env::var(PARQUET_FILE)
84+
.map_err(|_| anyhow::anyhow!("PARQUET_FILE environment variable not set. Please set it to the path of the hits.parquet file."))?;
6585
register_hits(&ctx, &parquet_file).await?;
86+
println!("hits registered");
6687
let mut query_list = Vec::new();
67-
let queries_file = env::var("QUERIES_FILE")?;
88+
let queries_file = env::var(QUERIES_FILE)
89+
.map_err(|_| anyhow::anyhow!("QUERIES_FILE environment variable not set. Please set it to the path of the queries file."))?;
6890
let queries = fs::read_to_string(queries_file)?;
6991
for query in queries.lines() {
7092
query_list.push(query.to_string());
@@ -73,7 +95,7 @@ pub async fn run_benchmark() -> Result<Json<Value>, anyhow::Error> {
7395
}
7496

7597
async fn register_hits(ctx: &SessionContext, parquet_file: &str) -> Result<(), anyhow::Error> {
76-
let options: ParquetReadOptions<'_> = Default::default();
98+
let options: ParquetReadOptions<'_> = ParquetReadOptions::default();
7799
ctx.register_parquet("hits", parquet_file, options)
78100
.await
79101
.map_err(|e| {
@@ -87,34 +109,53 @@ pub async fn execute_queries(
87109
query_list: Vec<String>,
88110
) -> Result<Json<Value>, anyhow::Error> {
89111
const TRIES: usize = 3;
90-
let mut results = Vec::new();
112+
let mut results = Vec::with_capacity(query_list.len());
113+
let mut query_count = 1;
114+
let mut total_elapsed_per_iteration = [0.0; TRIES];
91115

92-
for sql in query_list.iter() {
93-
let mut elapsed_times = Vec::new();
94-
for _iteration in 1..=TRIES {
116+
for (query_index, sql) in query_list.iter().enumerate() {
117+
let mut elapsed_times = Vec::with_capacity(TRIES);
118+
for iteration in 1..=TRIES {
95119
let start = Instant::now();
96120
let task_ctx = ctx.task_ctx();
97121
let dialect = &task_ctx.session_config().options().sql_parser.dialect;
98122
let dialect = dialect_from_str(dialect).ok_or_else(|| {
99123
plan_datafusion_err!(
100124
"Unsupported SQL dialect: {dialect}. Available dialects: \
101-
Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, \
102-
MsSQL, ClickHouse, BigQuery, Ansi."
125+
Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, \
126+
MsSQL, ClickHouse, BigQuery, Ansi."
103127
)
104128
})?;
105129

106130
let statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;
107-
let statement = statements.front().unwrap();
131+
let statement = statements
132+
.front()
133+
.ok_or_else(|| anyhow::anyhow!("No SQL statement found in query: {}", sql))?;
108134
let plan = ctx.state().statement_to_plan(statement.clone()).await?;
109135

110136
let df = ctx.execute_logical_plan(plan).await?;
111137
let physical_plan = df.create_physical_plan().await?;
112138

113139
let _ = collect(physical_plan, task_ctx.clone()).await?;
114140
let elapsed = start.elapsed().as_secs_f64();
141+
total_elapsed_per_iteration[iteration - 1] += elapsed;
142+
143+
warn!("query {query_count} iteration {iteration} completed in {elapsed} secs");
115144
elapsed_times.push(elapsed);
116145
}
117-
results.push(elapsed_times);
146+
query_count += 1;
147+
results.push(json!({
148+
"query_index": query_index,
149+
"query": sql,
150+
"elapsed_times": elapsed_times
151+
}));
152+
}
153+
for (iteration, total_elapsed) in total_elapsed_per_iteration.iter().enumerate() {
154+
warn!(
155+
"Total time for iteration {}: {} seconds",
156+
iteration + 1,
157+
total_elapsed
158+
);
118159
}
119160

120161
let result_json = json!(results);

0 commit comments

Comments
 (0)