Skip to content

Commit dd09060

Browse files
feat: StructDType from_iter (#2003)
Added a `StructDType::from_iter` and used it. This should remove some interm Vecs.
1 parent f8caed0 commit dd09060

File tree

6 files changed

+56
-58
lines changed

6 files changed

+56
-58
lines changed

vortex-array/src/array/struct_/mod.rs

+7-5
Original file line numberDiff line numberDiff line change
@@ -90,17 +90,19 @@ impl StructArray {
9090
}
9191

9292
pub fn from_fields<N: AsRef<str>>(items: &[(N, ArrayData)]) -> VortexResult<Self> {
93-
let names: Vec<FieldName> = items
94-
.iter()
95-
.map(|(name, _)| FieldName::from(name.as_ref()))
96-
.collect();
93+
let names = items.iter().map(|(name, _)| FieldName::from(name.as_ref()));
9794
let fields: Vec<ArrayData> = items.iter().map(|(_, array)| array.clone()).collect();
9895
let len = fields
9996
.first()
10097
.map(|f| f.len())
10198
.ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
10299

103-
Self::try_new(FieldNames::from(names), fields, len, Validity::NonNullable)
100+
Self::try_new(
101+
FieldNames::from_iter(names),
102+
fields,
103+
len,
104+
Validity::NonNullable,
105+
)
104106
}
105107

106108
// TODO(aduffy): Add equivalent function to support field masks for nested column access.

vortex-array/src/arrow/dtype.rs

+18-25
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,9 @@
1313
use std::sync::Arc;
1414

1515
use arrow_schema::{DataType, Field, FieldRef, Fields, Schema, SchemaBuilder, SchemaRef};
16-
use itertools::Itertools;
1716
use vortex_datetime_dtype::arrow::{make_arrow_temporal_dtype, make_temporal_ext_dtype};
1817
use vortex_datetime_dtype::is_temporal_ext_type;
19-
use vortex_dtype::{DType, Nullability, PType, StructDType};
18+
use vortex_dtype::{DType, FieldName, Nullability, PType, StructDType};
2019
use vortex_error::{vortex_bail, vortex_err, VortexResult};
2120

2221
use crate::arrow::{FromArrowType, TryFromArrowType};
@@ -46,23 +45,23 @@ impl TryFromArrowType<&DataType> for PType {
4645
impl FromArrowType<SchemaRef> for DType {
4746
fn from_arrow(value: SchemaRef) -> Self {
4847
Self::Struct(
49-
StructDType::new(
50-
value
51-
.fields()
52-
.iter()
53-
.map(|f| f.name().as_str().into())
54-
.collect(),
55-
value
56-
.fields()
57-
.iter()
58-
.map(|f| Self::from_arrow(f.as_ref()))
59-
.collect_vec(),
60-
),
48+
StructDType::from_arrow(value.fields()),
6149
Nullability::NonNullable, // Must match From<RecordBatch> for Array
6250
)
6351
}
6452
}
6553

54+
impl FromArrowType<&Fields> for StructDType {
55+
fn from_arrow(value: &Fields) -> Self {
56+
StructDType::from_iter(value.into_iter().map(|f| {
57+
(
58+
FieldName::from(f.name().as_str()),
59+
DType::from_arrow(f.as_ref()),
60+
)
61+
}))
62+
}
63+
}
64+
6665
impl FromArrowType<&Field> for DType {
6766
fn from_arrow(field: &Field) -> Self {
6867
use vortex_dtype::DType::*;
@@ -88,13 +87,7 @@ impl FromArrowType<&Field> for DType {
8887
DataType::List(e) | DataType::LargeList(e) => {
8988
List(Arc::new(Self::from_arrow(e.as_ref())), nullability)
9089
}
91-
DataType::Struct(f) => Struct(
92-
StructDType::new(
93-
f.iter().map(|f| f.name().as_str().into()).collect(),
94-
f.iter().map(|f| Self::from_arrow(f.as_ref())).collect_vec(),
95-
),
96-
nullability,
97-
),
90+
DataType::Struct(f) => Struct(StructDType::from_arrow(f), nullability),
9891
_ => unimplemented!("Arrow data type not yet supported: {:?}", field.data_type()),
9992
}
10093
}
@@ -207,10 +200,10 @@ mod test {
207200

208201
assert_eq!(
209202
infer_data_type(&DType::Struct(
210-
StructDType::new(
211-
FieldNames::from(vec![FieldName::from("field_a"), FieldName::from("field_b")]),
212-
vec![DType::Bool(false.into()), DType::Utf8(true.into())],
213-
),
203+
StructDType::from_iter([
204+
("field_a", DType::Bool(false.into())),
205+
("field_b", DType::Utf8(true.into()))
206+
]),
214207
Nullability::NonNullable,
215208
))
216209
.unwrap(),

vortex-dtype/src/serde/flatbuffers/project.rs

+3-5
Original file line numberDiff line numberDiff line change
@@ -49,16 +49,14 @@ pub fn project_and_deserialize(
4949
.ok_or_else(|| vortex_err!("The top-level type should be a struct"))?;
5050
let nullability = fb_struct.nullable().into();
5151

52-
let (names, dtypes): (Vec<Arc<str>>, Vec<DType>) = projection
52+
let struct_dtype = projection
5353
.iter()
5454
.map(|f| resolve_field(fb_struct, f))
5555
.map(|idx| idx.and_then(|i| read_field(fb_struct, i, buffer)))
56-
.collect::<VortexResult<Vec<_>>>()?
57-
.into_iter()
58-
.unzip();
56+
.collect::<VortexResult<Vec<_>>>()?;
5957

6058
Ok(DType::Struct(
61-
StructDType::new(names.into(), dtypes),
59+
StructDType::from_iter(struct_dtype),
6260
nullability,
6361
))
6462
}

vortex-dtype/src/struct_.rs

+16-5
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use vortex_error::{
66
};
77

88
use crate::flatbuffers::ViewedDType;
9-
use crate::{DType, Field, FieldNames};
9+
use crate::{DType, Field, FieldName, FieldNames};
1010

1111
/// DType of a struct's field, either owned or a pointer to an underlying flatbuffer.
1212
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash)]
@@ -290,6 +290,20 @@ impl StructDType {
290290
}
291291
}
292292

293+
impl<T, V> FromIterator<(T, V)> for StructDType
294+
where
295+
T: Into<FieldName>,
296+
V: Into<FieldDType>,
297+
{
298+
fn from_iter<I: IntoIterator<Item = (T, V)>>(iter: I) -> Self {
299+
let (names, dtypes): (Vec<_>, Vec<_>) = iter
300+
.into_iter()
301+
.map(|(name, dtype)| (name.into(), dtype.into()))
302+
.unzip();
303+
StructDType::from_fields(names.into(), dtypes.into_iter().map(Into::into).collect())
304+
}
305+
}
306+
293307
#[cfg(test)]
294308
mod test {
295309
use crate::dtype::DType;
@@ -316,10 +330,7 @@ mod test {
316330
let b_type = DType::Bool(Nullability::NonNullable);
317331

318332
let dtype = DType::Struct(
319-
StructDType::new(
320-
vec!["A".into(), "B".into()].into(),
321-
vec![a_type.clone(), b_type.clone()],
322-
),
333+
StructDType::from_iter([("A", a_type.clone()), ("B", b_type.clone())]),
323334
Nullability::Nullable,
324335
);
325336
assert!(dtype.is_nullable());

vortex-expr/src/transform/partition.rs

+8-11
Original file line numberDiff line numberDiff line change
@@ -358,20 +358,17 @@ mod tests {
358358

359359
fn dtype() -> DType {
360360
DType::Struct(
361-
StructDType::new(
362-
vec!["a".into(), "b".into(), "c".into()].into(),
363-
vec![
361+
StructDType::from_iter([
362+
(
363+
"a",
364364
DType::Struct(
365-
StructDType::new(
366-
vec!["a".into(), "b".into()].into(),
367-
vec![I32.into(), I32.into()],
368-
),
365+
StructDType::from_iter([("a", I32.into()), ("b", DType::from(I32))]),
369366
NonNullable,
370367
),
371-
I32.into(),
372-
I32.into(),
373-
],
374-
),
368+
),
369+
("b", I32.into()),
370+
("c", I32.into()),
371+
]),
375372
NonNullable,
376373
)
377374
}

vortex-layout/src/layouts/chunked/stats_table.rs

+4-7
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,11 @@ impl StatsTable {
4242

4343
/// Returns the DType of the statistics table given a set of statistics and column [`DType`].
4444
pub fn dtype_for_stats_table(column_dtype: &DType, present_stats: &[Stat]) -> DType {
45-
let dtypes = present_stats
46-
.iter()
47-
.map(|s| s.dtype(column_dtype).as_nullable())
48-
.collect();
4945
DType::Struct(
50-
StructDType::new(
51-
present_stats.iter().map(|s| s.name().into()).collect(),
52-
dtypes,
46+
StructDType::from_iter(
47+
present_stats
48+
.iter()
49+
.map(|stat| (stat.name(), stat.dtype(column_dtype).as_nullable())),
5350
),
5451
Nullability::NonNullable,
5552
)

0 commit comments

Comments
 (0)