Skip to content

Commit d543556

Browse files
committed
reformat code
1 parent 2cfa42b commit d543556

File tree

2 files changed

+33
-20
lines changed

2 files changed

+33
-20
lines changed

Cargo.toml

+4-5
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ env_logger = "0.11.5"
2424
log = "0.4.22"
2525
emojis = "0.6.3"
2626

27-
[profile.release]
28-
debug = true
29-
#lto = true
30-
#opt-level = 3
31-
#codegen-units = 1
27+
# https://doc.rust-lang.org/cargo/reference/profiles.html#release
28+
[profile.profiling]
29+
inherits = "release"
30+
debug = true

sequila/sequila-core/src/physical_planner/joins/interval_join.rs

+29-15
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ use crate::physical_planner::joins::utils::{
66
use crate::session_context::Algorithm;
77
use ahash::RandomState;
88
use bio::data_structures::interval_tree as rust_bio;
9-
use coitrees::{COITree, Interval};
109
use datafusion::arrow::array::{Array, AsArray, PrimitiveArray, PrimitiveBuilder, RecordBatch};
1110
use datafusion::arrow::compute;
1211
use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef, UInt32Type};
@@ -40,6 +39,9 @@ use std::mem::size_of;
4039
use std::sync::Arc;
4140
use std::task::Poll;
4241

42+
// Max number of records in left side is 18,446,744,073,709,551,615 (usize::MAX on 64 bit)
43+
// We can switch to u32::MAX which is 4,294,967,295
44+
// which consumes ~30% less memory when building COITrees but limits the number of elements.
4345
type Position = usize;
4446

4547
#[derive(Debug)]
@@ -610,7 +612,7 @@ async fn collect_left_input(
610612
acc.0.push(batch);
611613
Ok(acc)
612614
})
613-
.await?; // 11.5mb 3250 al 768 tal
615+
.await?;
614616

615617
// Estimation of memory size, required for hashtable, prior to allocation.
616618
// Final result can be verified using `RawTable.allocation_info()`
@@ -641,7 +643,7 @@ async fn collect_left_input(
641643
// build a left hash map
642644
hashes_buffer.clear();
643645
hashes_buffer.resize(batch.num_rows(), 0);
644-
update_hashmap( // 760 al, 370 tal -> 462 al, 130 al
646+
update_hashmap(
645647
&on_left,
646648
&left_interval,
647649
batch,
@@ -653,9 +655,9 @@ async fn collect_left_input(
653655
offset += batch.num_rows();
654656
}
655657

656-
let hashmap = IntervalJoinAlgorithm::new(&algorithm, hashmap); // 14mb, 145 al
658+
let hashmap = IntervalJoinAlgorithm::new(&algorithm, hashmap);
657659

658-
let single_batch = compute::concat_batches(&schema, &batches)?; // 10.7 mb, 356 al
660+
let single_batch = compute::concat_batches(&schema, &batches)?;
659661
let data = JoinLeftData::new(hashmap, single_batch, reservation);
660662

661663
Ok(data)
@@ -703,7 +705,15 @@ enum IntervalJoinAlgorithm {
703705
ArrayIntervalTree(FnvHashMap<u64, rust_bio::ArrayBackedIntervalTree<i32, Position>>),
704706
AIList(FnvHashMap<u64, scailist::ScAIList<Position>>),
705707
Lapper(FnvHashMap<u64, rust_lapper::Lapper<u32, Position>>),
706-
CoitresNearest(FnvHashMap<u64, (COITree<Position, u32>, Vec<Interval<Position>>)>),
708+
CoitresNearest(
709+
FnvHashMap<
710+
u64,
711+
(
712+
coitrees::COITree<Position, u32>,
713+
Vec<coitrees::Interval<Position>>,
714+
),
715+
>,
716+
),
707717
}
708718

709719
impl Debug for IntervalJoinAlgorithm {
@@ -868,7 +878,12 @@ impl IntervalJoinAlgorithm {
868878
*node.metadata
869879
}
870880

871-
fn nearest(&self, start: i32, end: i32, ranges2: &[Interval<Position>]) -> Option<Position> {
881+
fn nearest(
882+
&self,
883+
start: i32,
884+
end: i32,
885+
ranges2: &[coitrees::Interval<Position>],
886+
) -> Option<Position> {
872887
if ranges2.is_empty() {
873888
return None;
874889
}
@@ -995,14 +1010,13 @@ fn update_hashmap(
9951010
let start = evaluate_as_i32(left_interval.start(), batch)?;
9961011
let end = evaluate_as_i32(left_interval.end(), batch)?;
9971012

998-
hash_values
999-
.iter()
1000-
.enumerate()
1001-
.for_each(|(i, hash_val)| {
1002-
let position = i + offset;
1003-
let intervals: &mut Vec<SequilaInterval> = hash_map.entry(*hash_val).or_insert_with(|| Vec::with_capacity(4096));
1004-
intervals.push(SequilaInterval::new(start.value(i), end.value(i), position))
1005-
});
1013+
hash_values.iter().enumerate().for_each(|(i, hash_val)| {
1014+
let position: Position = i + offset;
1015+
let intervals: &mut Vec<SequilaInterval> = hash_map
1016+
.entry(*hash_val)
1017+
.or_insert_with(|| Vec::with_capacity(4096));
1018+
intervals.push(SequilaInterval::new(start.value(i), end.value(i), position))
1019+
});
10061020

10071021
Ok(())
10081022
}

0 commit comments

Comments
 (0)