Skip to content

Commit 8acfa3f

Browse files
umartinion-elgreco
authored andcommitted
fix. check for all known valid delta files in is_deltatable
Signed-off-by: Martin Andersson <[email protected]>
1 parent 3831462 commit 8acfa3f

File tree

2 files changed

+39
-3
lines changed

2 files changed

+39
-3
lines changed

crates/core/src/kernel/snapshot/log_segment.rs

+24
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ static CHECKPOINT_FILE_PATTERN: LazyLock<Regex> =
2828
static DELTA_FILE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\d+\.json$").unwrap());
2929
static CRC_FILE_PATTERN: LazyLock<Regex> =
3030
LazyLock::new(|| Regex::new(r"^(\.\d+(\.crc|\.json)|\d+)\.crc$").unwrap());
31+
static LAST_CHECKPOINT_FILE_PATTERN: LazyLock<Regex> =
32+
LazyLock::new(|| Regex::new(r"^_last_checkpoint$").unwrap());
33+
static LAST_VACUUM_INFO_FILE_PATTERN: LazyLock<Regex> =
34+
LazyLock::new(|| Regex::new(r"^_last_vacuum_info$").unwrap());
35+
static DELETION_VECTOR_FILE_PATTERN: LazyLock<Regex> =
36+
LazyLock::new(|| Regex::new(r".*\.bin$").unwrap());
3137
pub(super) static TOMBSTONE_SCHEMA: LazyLock<StructType> =
3238
LazyLock::new(|| StructType::new(vec![ActionType::Remove.schema_field().clone()]));
3339

@@ -66,6 +72,24 @@ pub(crate) trait PathExt {
6672
.map(|name| CRC_FILE_PATTERN.captures(name).is_some())
6773
.unwrap()
6874
}
75+
76+
fn is_last_checkpoint_file(&self) -> bool {
77+
self.filename()
78+
.map(|name| LAST_CHECKPOINT_FILE_PATTERN.captures(name).is_some())
79+
.unwrap_or(false)
80+
}
81+
82+
fn is_last_vacuum_info_file(&self) -> bool {
83+
self.filename()
84+
.map(|name| LAST_VACUUM_INFO_FILE_PATTERN.captures(name).is_some())
85+
.unwrap_or(false)
86+
}
87+
88+
fn is_deletion_vector_file(&self) -> bool {
89+
self.filename()
90+
.map(|name| DELETION_VECTOR_FILE_PATTERN.captures(name).is_some())
91+
.unwrap_or(false)
92+
}
6993
}
7094

7195
impl PathExt for Path {

crates/core/src/logstore/mod.rs

+15-3
Original file line numberDiff line numberDiff line change
@@ -267,11 +267,23 @@ pub trait LogStore: Send + Sync + AsAny {
267267
while let Some(res) = stream.next().await {
268268
match res {
269269
Ok(meta) => {
270-
// crc files are valid files according to the protocol
271-
if meta.location.is_crc_file() {
270+
// Valid but optional files.
271+
if meta.location.is_crc_file()
272+
|| meta.location.is_last_checkpoint_file()
273+
|| meta.location.is_last_vacuum_info_file()
274+
|| meta.location.is_deletion_vector_file()
275+
{
272276
continue;
273277
}
274-
return Ok(meta.location.is_commit_file() || meta.location.is_checkpoint_file());
278+
let is_valid =
279+
meta.location.is_commit_file() || meta.location.is_checkpoint_file();
280+
if !is_valid {
281+
warn!(
282+
"Expected a valid delta file. Found {}",
283+
meta.location.filename().unwrap_or("<empty>")
284+
)
285+
}
286+
return Ok(is_valid);
275287
}
276288
Err(ObjectStoreError::NotFound { .. }) => return Ok(false),
277289
Err(err) => return Err(err.into()),

0 commit comments

Comments
 (0)