|
| 1 | +/* |
| 2 | + * Parseable Server (C) 2022 - 2024 Parseable, Inc. |
| 3 | + * |
| 4 | + * This program is free software: you can redistribute it and/or modify |
| 5 | + * it under the terms of the GNU Affero General Public License as |
| 6 | + * published by the Free Software Foundation, either version 3 of the |
| 7 | + * License, or (at your option) any later version. |
| 8 | + * |
| 9 | + * This program is distributed in the hope that it will be useful, |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | + * GNU Affero General Public License for more details. |
| 13 | + * |
| 14 | + * You should have received a copy of the GNU Affero General Public License |
| 15 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 16 | + * |
| 17 | + */ |
| 18 | + |
| 19 | +use std::{collections::HashMap, env, fs, time::Instant}; |
| 20 | + |
| 21 | +use actix_web::{web::Json, Responder}; |
| 22 | +use datafusion::{ |
| 23 | + common::plan_datafusion_err, |
| 24 | + error::DataFusionError, |
| 25 | + execution::{runtime_env::RuntimeEnvBuilder, SessionStateBuilder}, |
| 26 | + physical_plan::collect, |
| 27 | + prelude::{ParquetReadOptions, SessionConfig, SessionContext}, |
| 28 | + sql::{parser::DFParser, sqlparser::dialect::dialect_from_str}, |
| 29 | +}; |
| 30 | +use serde_json::{json, Value}; |
| 31 | + |
| 32 | +pub async fn clickbench_benchmark() -> Result<impl Responder, actix_web::Error> { |
| 33 | + let results = tokio::task::spawn_blocking(run_benchmark) |
| 34 | + .await |
| 35 | + .map_err(actix_web::error::ErrorInternalServerError)? |
| 36 | + .map_err(actix_web::error::ErrorInternalServerError)?; |
| 37 | + Ok(results) |
| 38 | +} |
| 39 | + |
| 40 | +#[tokio::main(flavor = "multi_thread")] |
| 41 | +pub async fn run_benchmark() -> Result<Json<Value>, anyhow::Error> { |
| 42 | + let mut session_config = SessionConfig::from_env()?.with_information_schema(true); |
| 43 | + |
| 44 | + session_config = session_config.with_batch_size(8192); |
| 45 | + |
| 46 | + let rt_builder = RuntimeEnvBuilder::new(); |
| 47 | + // set memory pool size |
| 48 | + let runtime_env = rt_builder.build_arc()?; |
| 49 | + let state = SessionStateBuilder::new() |
| 50 | + .with_default_features() |
| 51 | + .with_config(session_config) |
| 52 | + .with_runtime_env(runtime_env) |
| 53 | + .build(); |
| 54 | + state |
| 55 | + .catalog_list() |
| 56 | + .catalog(&state.config_options().catalog.default_catalog) |
| 57 | + .expect("default catalog is provided by datafusion"); |
| 58 | + |
| 59 | + let ctx = SessionContext::new_with_state(state); |
| 60 | + |
| 61 | + let mut table_options = HashMap::new(); |
| 62 | + table_options.insert("binary_as_string", "true"); |
| 63 | + |
| 64 | + let parquet_file = env::var("PARQUET_FILE")?; |
| 65 | + register_hits(&ctx, &parquet_file).await?; |
| 66 | + let mut query_list = Vec::new(); |
| 67 | + let queries_file = env::var("QUERIES_FILE")?; |
| 68 | + let queries = fs::read_to_string(queries_file)?; |
| 69 | + for query in queries.lines() { |
| 70 | + query_list.push(query.to_string()); |
| 71 | + } |
| 72 | + execute_queries(&ctx, query_list).await |
| 73 | +} |
| 74 | + |
| 75 | +async fn register_hits(ctx: &SessionContext, parquet_file: &str) -> Result<(), anyhow::Error> { |
| 76 | + let options: ParquetReadOptions<'_> = Default::default(); |
| 77 | + ctx.register_parquet("hits", parquet_file, options) |
| 78 | + .await |
| 79 | + .map_err(|e| { |
| 80 | + DataFusionError::Context(format!("Registering 'hits' as {parquet_file}"), Box::new(e)) |
| 81 | + })?; |
| 82 | + Ok(()) |
| 83 | +} |
| 84 | + |
| 85 | +pub async fn execute_queries( |
| 86 | + ctx: &SessionContext, |
| 87 | + query_list: Vec<String>, |
| 88 | +) -> Result<Json<Value>, anyhow::Error> { |
| 89 | + const TRIES: usize = 3; |
| 90 | + let mut results = Vec::new(); |
| 91 | + |
| 92 | + for sql in query_list.iter() { |
| 93 | + let mut elapsed_times = Vec::new(); |
| 94 | + for _iteration in 1..=TRIES { |
| 95 | + let start = Instant::now(); |
| 96 | + let task_ctx = ctx.task_ctx(); |
| 97 | + let dialect = &task_ctx.session_config().options().sql_parser.dialect; |
| 98 | + let dialect = dialect_from_str(dialect).ok_or_else(|| { |
| 99 | + plan_datafusion_err!( |
| 100 | + "Unsupported SQL dialect: {dialect}. Available dialects: \ |
| 101 | + Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, \ |
| 102 | + MsSQL, ClickHouse, BigQuery, Ansi." |
| 103 | + ) |
| 104 | + })?; |
| 105 | + |
| 106 | + let statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?; |
| 107 | + let statement = statements.front().unwrap(); |
| 108 | + let plan = ctx.state().statement_to_plan(statement.clone()).await?; |
| 109 | + |
| 110 | + let df = ctx.execute_logical_plan(plan).await?; |
| 111 | + let physical_plan = df.create_physical_plan().await?; |
| 112 | + |
| 113 | + let _ = collect(physical_plan, task_ctx.clone()).await?; |
| 114 | + let elapsed = start.elapsed().as_secs_f64(); |
| 115 | + elapsed_times.push(elapsed); |
| 116 | + } |
| 117 | + results.push(elapsed_times); |
| 118 | + } |
| 119 | + |
| 120 | + let result_json = json!(results); |
| 121 | + |
| 122 | + Ok(Json(result_json)) |
| 123 | +} |
0 commit comments