|
18 | 18 | //! Defines physical expressions that can evaluated at runtime during query execution
|
19 | 19 |
|
20 | 20 | use crate::hyperloglog::HyperLogLog;
|
21 |
| -use arrow::array::BinaryArray; |
| 21 | +use arrow::array::{BinaryArray, StringViewArray}; |
22 | 22 | use arrow::array::{
|
23 | 23 | GenericBinaryArray, GenericStringArray, OffsetSizeTrait, PrimitiveArray,
|
24 | 24 | };
|
@@ -126,6 +126,27 @@ where
|
126 | 126 | }
|
127 | 127 | }
|
128 | 128 |
|
| 129 | +#[derive(Debug)] |
| 130 | +struct StringViewHLLAccumulator<T> |
| 131 | +where |
| 132 | + T: OffsetSizeTrait, |
| 133 | +{ |
| 134 | + hll: HyperLogLog<String>, |
| 135 | + phantom_data: PhantomData<T>, |
| 136 | +} |
| 137 | + |
| 138 | +impl<T> StringViewHLLAccumulator<T> |
| 139 | +where |
| 140 | + T: OffsetSizeTrait, |
| 141 | +{ |
| 142 | + pub fn new() -> Self { |
| 143 | + Self { |
| 144 | + hll: HyperLogLog::new(), |
| 145 | + phantom_data: PhantomData, |
| 146 | + } |
| 147 | + } |
| 148 | +} |
| 149 | + |
129 | 150 | #[derive(Debug)]
|
130 | 151 | struct BinaryHLLAccumulator<T>
|
131 | 152 | where
|
@@ -197,6 +218,21 @@ where
|
197 | 218 | default_accumulator_impl!();
|
198 | 219 | }
|
199 | 220 |
|
| 221 | +impl<T> Accumulator for StringViewHLLAccumulator<T> |
| 222 | +where |
| 223 | + T: OffsetSizeTrait, |
| 224 | +{ |
| 225 | + fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> { |
| 226 | + let array: &StringViewArray = downcast_value!(values[0], StringViewArray); |
| 227 | + // flatten because we would skip nulls |
| 228 | + self.hll |
| 229 | + .extend(array.iter().flatten().map(|s| s.to_string())); |
| 230 | + Ok(()) |
| 231 | + } |
| 232 | + |
| 233 | + default_accumulator_impl!(); |
| 234 | +} |
| 235 | + |
200 | 236 | impl<T> Accumulator for StringHLLAccumulator<T>
|
201 | 237 | where
|
202 | 238 | T: OffsetSizeTrait,
|
@@ -311,6 +347,7 @@ impl AggregateUDFImpl for ApproxDistinct {
|
311 | 347 | DataType::Int64 => Box::new(NumericHLLAccumulator::<Int64Type>::new()),
|
312 | 348 | DataType::Utf8 => Box::new(StringHLLAccumulator::<i32>::new()),
|
313 | 349 | DataType::LargeUtf8 => Box::new(StringHLLAccumulator::<i64>::new()),
|
| 350 | + DataType::Utf8View => Box::new(StringViewHLLAccumulator::<i32>::new()), |
314 | 351 | DataType::Binary => Box::new(BinaryHLLAccumulator::<i32>::new()),
|
315 | 352 | DataType::LargeBinary => Box::new(BinaryHLLAccumulator::<i64>::new()),
|
316 | 353 | other => {
|
|
0 commit comments