Skip to content

Commit e718c1a

Browse files
authored
minor: fix typos in comments / structure names (#13879)
* minor: fix typo error in datafusion * fix: fix rebase error * fix: format HashJoinExec doc * doc: recover thiserror/preemptively * fix: other typo error fixed * fix: directories to dir_entries in catalog example
1 parent 63ba5b6 commit e718c1a

File tree

138 files changed

+277
-277
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

138 files changed

+277
-277
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ version = "43.0.0"
7373
# selectively turn them on if needed, since we can override default-features = true (from false)
7474
# for the inherited dependency but cannot do the reverse (override from true to false).
7575
#
76-
# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
76+
# See for more details: https://github.com/rust-lang/cargo/issues/11329
7777
ahash = { version = "0.8", default-features = false, features = [
7878
"runtime-rng",
7979
] }

datafusion-cli/src/functions.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ impl TableFunctionImpl for ParquetMetadataFunc {
360360
Field::new("total_uncompressed_size", DataType::Int64, true),
361361
]));
362362

363-
// construct recordbatch from metadata
363+
// construct record batch from metadata
364364
let mut filename_arr = vec![];
365365
let mut row_group_id_arr = vec![];
366366
let mut row_group_num_rows_arr = vec![];

datafusion-examples/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
This crate includes end to end, highly commented examples of how to use
2323
various DataFusion APIs to help you get started.
2424

25-
## Prerequisites:
25+
## Prerequisites
2626

2727
Run `git submodule update --init` to init test files.
2828

datafusion-examples/examples/advanced_parquet_index.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ use url::Url;
8282
/// Specifically, this example illustrates how to:
8383
/// 1. Use [`ParquetFileReaderFactory`] to avoid re-reading parquet metadata on each query
8484
/// 2. Use [`PruningPredicate`] for predicate analysis
85-
/// 3. Pass a row group selection to [`ParuetExec`]
85+
/// 3. Pass a row group selection to [`ParquetExec`]
8686
/// 4. Pass a row selection (within a row group) to [`ParquetExec`]
8787
///
8888
/// Note this is a *VERY* low level example for people who want to build their
@@ -211,7 +211,7 @@ async fn main() -> Result<()> {
211211
//
212212
// Note: in order to prune pages, the Page Index must be loaded and the
213213
// ParquetExec will load it on demand if not present. To avoid a second IO
214-
// during query, this example loaded the Page Index pre-emptively by setting
214+
// during query, this example loaded the Page Index preemptively by setting
215215
// `ArrowReader::with_page_index` in `IndexedFile::try_new`
216216
provider.set_use_row_selection(true);
217217
println!("** Select data, predicate `id = 950`");

datafusion-examples/examples/analyzer_rule.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ impl AnalyzerRule for RowLevelAccessControl {
138138
fn analyze(&self, plan: LogicalPlan, _config: &ConfigOptions) -> Result<LogicalPlan> {
139139
// use the TreeNode API to recursively walk the LogicalPlan tree
140140
// and all of its children (inputs)
141-
let transfomed_plan = plan.transform(|plan| {
141+
let transformed_plan = plan.transform(|plan| {
142142
// This closure is called for each LogicalPlan node
143143
// if it is a Scan node, add a filter to remove all managers
144144
if is_employee_table_scan(&plan) {
@@ -166,7 +166,7 @@ impl AnalyzerRule for RowLevelAccessControl {
166166
//
167167
// This example does not need the value of either flag, so simply
168168
// extract the LogicalPlan "data"
169-
Ok(transfomed_plan.data)
169+
Ok(transformed_plan.data)
170170
}
171171

172172
fn name(&self) -> &str {

datafusion-examples/examples/catalog.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,11 @@ async fn main() -> Result<()> {
4646

4747
let ctx = SessionContext::new();
4848
let state = ctx.state();
49-
let cataloglist = Arc::new(CustomCatalogProviderList::new());
49+
let catalog_list = Arc::new(CustomCatalogProviderList::new());
5050

5151
// use our custom catalog list for context. each context has a single catalog list.
5252
// context will by default have [`MemoryCatalogProviderList`]
53-
ctx.register_catalog_list(cataloglist.clone());
53+
ctx.register_catalog_list(catalog_list.clone());
5454

5555
// initialize our catalog and schemas
5656
let catalog = DirCatalog::new();
@@ -81,7 +81,7 @@ async fn main() -> Result<()> {
8181
ctx.register_catalog("dircat", Arc::new(catalog));
8282
{
8383
// catalog was passed down into our custom catalog list since we override the ctx's default
84-
let catalogs = cataloglist.catalogs.read().unwrap();
84+
let catalogs = catalog_list.catalogs.read().unwrap();
8585
assert!(catalogs.contains_key("dircat"));
8686
};
8787

@@ -144,8 +144,8 @@ impl DirSchema {
144144
async fn create(state: &SessionState, opts: DirSchemaOpts<'_>) -> Result<Arc<Self>> {
145145
let DirSchemaOpts { ext, dir, format } = opts;
146146
let mut tables = HashMap::new();
147-
let direntries = std::fs::read_dir(dir).unwrap();
148-
for res in direntries {
147+
let dir_entries = std::fs::read_dir(dir).unwrap();
148+
for res in dir_entries {
149149
let entry = res.unwrap();
150150
let filename = entry.file_name().to_str().unwrap().to_string();
151151
if !filename.ends_with(ext) {

datafusion-examples/examples/expr_api.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ use datafusion_optimizer::analyzer::type_coercion::TypeCoercionRewriter;
5353
/// 4. Simplify expressions: [`simplify_demo`]
5454
/// 5. Analyze predicates for boundary ranges: [`range_analysis_demo`]
5555
/// 6. Get the types of the expressions: [`expression_type_demo`]
56-
/// 7. Apply type cocercion to expressions: [`type_coercion_demo`]
56+
/// 7. Apply type coercion to expressions: [`type_coercion_demo`]
5757
#[tokio::main]
5858
async fn main() -> Result<()> {
5959
// The easiest way to do create expressions is to use the
@@ -392,7 +392,7 @@ fn type_coercion_demo() -> Result<()> {
392392
)?;
393393
assert!(physical_expr.evaluate(&batch).is_ok());
394394

395-
// 4. Apply explict type coercion by manually rewriting the expression
395+
// 4. Apply explicit type coercion by manually rewriting the expression
396396
let coerced_expr = expr
397397
.transform(|e| {
398398
// Only type coerces binary expressions.

datafusion-examples/examples/function_factory.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ use datafusion_expr::{
3636
///
3737
/// Apart from [FunctionFactory], this example covers
3838
/// [ScalarUDFImpl::simplify()] which is often used at the same time, to replace
39-
/// a function call with another expression at rutime.
39+
/// a function call with another expression at runtime.
4040
///
4141
/// This example is rather simple and does not cover all cases required for a
4242
/// real implementation.

datafusion-examples/examples/memtable.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use std::sync::Arc;
2525
use std::time::Duration;
2626
use tokio::time::timeout;
2727

28-
/// This example demonstrates executing a simple query against a Memtable
28+
/// This example demonstrates executing a simple query against a [`MemTable`]
2929
#[tokio::main]
3030
async fn main() -> Result<()> {
3131
let mem_table = create_memtable()?;

datafusion-examples/examples/optimizer_rule.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ impl MyOptimizerRule {
146146
// Closure called for each sub tree
147147
match expr {
148148
Expr::BinaryExpr(binary_expr) if is_binary_eq(&binary_expr) => {
149-
// destruture the expression
149+
// destructure the expression
150150
let BinaryExpr { left, op: _, right } = binary_expr;
151151
// rewrite to `my_eq(left, right)`
152152
let udf = ScalarUDF::new_from_impl(MyEq::new());

datafusion-examples/examples/plan_to_sql.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ fn simple_expr_to_sql_demo() -> Result<()> {
6565
Ok(())
6666
}
6767

68-
/// DataFusioon can remove parentheses when converting an expression to SQL.
68+
/// DataFusion can remove parentheses when converting an expression to SQL.
6969
/// Note that output is intended for humans, not for other SQL engines,
7070
/// as difference in precedence rules can cause expressions to be parsed differently.
7171
fn simple_expr_to_pretty_sql_demo() -> Result<()> {

datafusion-examples/examples/simple_udtf.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ impl TableFunctionImpl for LocalCsvTableFunc {
140140
let limit = exprs
141141
.get(1)
142142
.map(|expr| {
143-
// try to simpify the expression, so 1+2 becomes 3, for example
143+
// try to simplify the expression, so 1+2 becomes 3, for example
144144
let execution_props = ExecutionProps::new();
145145
let info = SimplifyContext::new(&execution_props);
146146
let expr = ExprSimplifier::new(info).simplify(expr.clone())?;
@@ -173,8 +173,8 @@ fn read_csv_batches(csv_path: impl AsRef<Path>) -> Result<(SchemaRef, Vec<Record
173173
.with_header(true)
174174
.build(file)?;
175175
let mut batches = vec![];
176-
for bacth in reader {
177-
batches.push(bacth?);
176+
for batch in reader {
177+
batches.push(batch?);
178178
}
179179
let schema = Arc::new(schema);
180180
Ok((schema, batches))

datafusion/common/src/column.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ impl Column {
230230
.collect::<Vec<_>>();
231231
for using_col in using_columns {
232232
let all_matched = columns.iter().all(|c| using_col.contains(c));
233-
// All matched fields belong to the same using column set, in orther words
233+
// All matched fields belong to the same using column set, in other words
234234
// the same join clause. We simply pick the qualifier from the first match.
235235
if all_matched {
236236
return Ok(columns[0].clone());

datafusion/common/src/config.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -904,12 +904,12 @@ pub trait ConfigExtension: ExtensionOptions {
904904
pub trait ExtensionOptions: Send + Sync + fmt::Debug + 'static {
905905
/// Return `self` as [`Any`]
906906
///
907-
/// This is needed until trait upcasting is stabilised
907+
/// This is needed until trait upcasting is stabilized
908908
fn as_any(&self) -> &dyn Any;
909909

910910
/// Return `self` as [`Any`]
911911
///
912-
/// This is needed until trait upcasting is stabilised
912+
/// This is needed until trait upcasting is stabilized
913913
fn as_any_mut(&mut self) -> &mut dyn Any;
914914

915915
/// Return a deep clone of this [`ExtensionOptions`]

datafusion/common/src/cse.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ pub trait Normalizeable {
6060
}
6161

6262
/// The `NormalizeEq` trait extends `Eq` and `Normalizeable` to provide a method for comparing
63-
/// normlized nodes in optimizations like Common Subexpression Elimination (CSE).
63+
/// normalized nodes in optimizations like Common Subexpression Elimination (CSE).
6464
///
6565
/// The `normalize_eq` method ensures that two nodes that are semantically equivalent (after normalization)
6666
/// are considered equal in CSE optimization, even if their original forms differ.

datafusion/common/src/dfschema.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -955,7 +955,7 @@ pub trait ExprSchema: std::fmt::Debug {
955955
/// Returns the column's optional metadata.
956956
fn metadata(&self, col: &Column) -> Result<&HashMap<String, String>>;
957957

958-
/// Return the coulmn's datatype and nullability
958+
/// Return the column's datatype and nullability
959959
fn data_type_and_nullable(&self, col: &Column) -> Result<(&DataType, bool)>;
960960
}
961961

datafusion/common/src/error.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ pub enum DataFusionError {
115115
Execution(String),
116116
/// [`JoinError`] during execution of the query.
117117
///
118-
/// This error can unoccur for unjoined tasks, such as execution shutdown.
118+
/// This error can't occur for unjoined tasks, such as execution shutdown.
119119
ExecutionJoin(JoinError),
120120
/// Error when resources (such as memory of scratch disk space) are exhausted.
121121
///

datafusion/common/src/scalar/mod.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -2216,7 +2216,7 @@ impl ScalarValue {
22162216
///
22172217
/// Errors if `self` is
22182218
/// - a decimal that fails be converted to a decimal array of size
2219-
/// - a `Fixedsizelist` that fails to be concatenated into an array of size
2219+
/// - a `FixedsizeList` that fails to be concatenated into an array of size
22202220
/// - a `List` that fails to be concatenated into an array of size
22212221
/// - a `Dictionary` that fails be converted to a dictionary array of size
22222222
pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
@@ -2925,7 +2925,7 @@ impl ScalarValue {
29252925
/// preferred over this function if at all possible as they can be
29262926
/// vectorized and are generally much faster.
29272927
///
2928-
/// This function has a few narrow usescases such as hash table key
2928+
/// This function has a few narrow use cases such as hash table key
29292929
/// comparisons where comparing a single row at a time is necessary.
29302930
///
29312931
/// # Errors
@@ -4465,7 +4465,7 @@ mod tests {
44654465
Ok(())
44664466
}
44674467

4468-
// Verifies that ScalarValue has the same behavior with compute kernal when it overflows.
4468+
// Verifies that ScalarValue has the same behavior with compute kernel when it overflows.
44694469
fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
44704470
where
44714471
T: ArrowNumericType,
@@ -6150,9 +6150,9 @@ mod tests {
61506150
&DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
61516151
);
61526152

6153-
let newscalar = ScalarValue::try_from_array(&array, 0).unwrap();
6153+
let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
61546154
assert_eq!(
6155-
newscalar.data_type(),
6155+
new_scalar.data_type(),
61566156
DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
61576157
);
61586158
}

datafusion/common/src/tree_node.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -995,11 +995,11 @@ impl<
995995
/// construct a temporary container to be able to call `apply_ref_elements` on a
996996
/// collection of tree node references. But in that case the container's temporary
997997
/// lifetime is different to the lifetime of tree nodes that we put into it.
998-
/// Please find an example usecase in `Expr::apply_children` with the `Expr::Case` case.
998+
/// Please find an example use case in `Expr::apply_children` with the `Expr::Case` case.
999999
///
10001000
/// Most of the cases we don't need to create a temporary container with
10011001
/// `TreeNodeRefContainer`, but we can just call `TreeNodeContainer::apply_elements`.
1002-
/// Please find an example usecase in `Expr::apply_children` with the `Expr::GroupingSet`
1002+
/// Please find an example use case in `Expr::apply_children` with the `Expr::GroupingSet`
10031003
/// case.
10041004
pub trait TreeNodeRefContainer<'a, T: 'a>: Sized {
10051005
/// Applies `f` to all elements of the container.

datafusion/common/src/utils/memory.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! This module provides a function to estimate the memory size of a HashTable prior to alloaction
18+
//! This module provides a function to estimate the memory size of a HashTable prior to allocation
1919
2020
use crate::{DataFusionError, Result};
2121
use std::mem::size_of;
@@ -79,7 +79,7 @@ pub fn estimate_memory_size<T>(num_elements: usize, fixed_size: usize) -> Result
7979
// For the majority of cases hashbrown overestimates the bucket quantity
8080
// to keep ~1/8 of them empty. We take this factor into account by
8181
// multiplying the number of elements with a fixed ratio of 8/7 (~1.14).
82-
// This formula leads to overallocation for small tables (< 8 elements)
82+
// This formula leads to over-allocation for small tables (< 8 elements)
8383
// but should be fine overall.
8484
num_elements
8585
.checked_mul(8)

datafusion/common/src/utils/proxy.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,12 @@ impl<T> VecAllocExt for Vec<T> {
9292
type T = T;
9393

9494
fn push_accounted(&mut self, x: Self::T, accounting: &mut usize) {
95-
let prev_capacty = self.capacity();
95+
let prev_capacity = self.capacity();
9696
self.push(x);
9797
let new_capacity = self.capacity();
98-
if new_capacity > prev_capacty {
98+
if new_capacity > prev_capacity {
9999
// capacity changed, so we allocated more
100-
let bump_size = (new_capacity - prev_capacty) * size_of::<T>();
100+
let bump_size = (new_capacity - prev_capacity) * size_of::<T>();
101101
// Note multiplication should never overflow because `push` would
102102
// have panic'd first, but the checked_add could potentially
103103
// overflow since accounting could be tracking additional values, and

datafusion/core/benches/physical_plan.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ use datafusion::physical_plan::{
3838
use datafusion::prelude::SessionContext;
3939
use datafusion_physical_expr_common::sort_expr::LexOrdering;
4040

41-
// Initialise the operator using the provided record batches and the sort key
41+
// Initialize the operator using the provided record batches and the sort key
4242
// as inputs. All record batches must have the same schema.
4343
fn sort_preserving_merge_operator(
4444
session_ctx: Arc<SessionContext>,

datafusion/core/src/dataframe/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -3279,7 +3279,7 @@ mod tests {
32793279
&df_results
32803280
);
32813281

3282-
// check that col with the same name ovwewritten
3282+
// check that col with the same name overwritten
32833283
let df_results_overwrite = df
32843284
.clone()
32853285
.with_column("c1", col("c2") + col("c3"))?
@@ -3302,7 +3302,7 @@ mod tests {
33023302
&df_results_overwrite
33033303
);
33043304

3305-
// check that col with the same name ovwewritten using same name as reference
3305+
// check that col with the same name overwritten using same name as reference
33063306
let df_results_overwrite_self = df
33073307
.clone()
33083308
.with_column("c2", col("c2") + lit(1))?

datafusion/core/src/datasource/default_table_source.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ impl TableSource for DefaultTableSource {
6767
}
6868

6969
/// Tests whether the table provider can make use of any or all filter expressions
70-
/// to optimise data retrieval.
70+
/// to optimize data retrieval.
7171
fn supports_filters_pushdown(
7272
&self,
7373
filter: &[&Expr],

datafusion/core/src/datasource/physical_plan/file_groups.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,7 @@ mod test {
781781
assert_partitioned_files(expected, actual);
782782
}
783783

784-
/// Asserts that the two groups of `ParititonedFile` are the same
784+
/// Asserts that the two groups of [`PartitionedFile`] are the same
785785
/// (PartitionedFile doesn't implement PartialEq)
786786
fn assert_partitioned_files(
787787
expected: Option<Vec<Vec<PartitionedFile>>>,

datafusion/core/src/datasource/physical_plan/json.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -870,7 +870,7 @@ mod tests {
870870
)]
871871
#[cfg(feature = "compression")]
872872
#[tokio::test]
873-
async fn test_json_with_repartitioing(
873+
async fn test_json_with_repartitioning(
874874
file_compression_type: FileCompressionType,
875875
) -> Result<()> {
876876
let config = SessionConfig::new()

datafusion/core/src/datasource/physical_plan/parquet/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ impl ParquetExecBuilder {
333333

334334
/// Set the filter predicate when reading.
335335
///
336-
/// See the "Predicate Pushdown" section of the [`ParquetExec`] documenation
336+
/// See the "Predicate Pushdown" section of the [`ParquetExec`] documentation
337337
/// for more details.
338338
pub fn with_predicate(mut self, predicate: Arc<dyn PhysicalExpr>) -> Self {
339339
self.predicate = Some(predicate);
@@ -611,7 +611,7 @@ impl ParquetExec {
611611
}
612612

613613
/// If enabled, the reader will read the page index
614-
/// This is used to optimise filter pushdown
614+
/// This is used to optimize filter pushdown
615615
/// via `RowSelector` and `RowFilter` by
616616
/// eliminating unnecessary IO and decoding
617617
pub fn with_enable_page_index(mut self, enable_page_index: bool) -> Self {

datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ fn would_column_prevent_pushdown(
422422
checker.prevents_pushdown()
423423
}
424424

425-
/// Recurses through expr as a trea, finds all `column`s, and checks if any of them would prevent
425+
/// Recurses through expr as a tree, finds all `column`s, and checks if any of them would prevent
426426
/// this expression from being predicate pushed down. If any of them would, this returns false.
427427
/// Otherwise, true.
428428
pub fn can_expr_be_pushed_down_with_schemas(
@@ -692,7 +692,7 @@ mod test {
692692

693693
let mut parquet_reader = parquet_reader_builder.build().expect("building reader");
694694

695-
// Parquet file is small, we only need 1 recordbatch
695+
// Parquet file is small, we only need 1 record batch
696696
let first_rb = parquet_reader
697697
.next()
698698
.expect("expected record batch")

0 commit comments

Comments
 (0)