Skip to content

Commit 00a6829

Browse files
authoredFeb 6, 2025··
[Spark][Refactor] Create helpers for DeletionVectorDescriptor (#4123)
#### Which Delta project/connector is this regarding? - [x] Spark - [ ] Standalone - [ ] Flink - [ ] Kernel - [ ] Other (fill in here) ## Description In this PR, we extract helpers for building the file name and for extracting DV prefix and UUID in DeletionVectorDescriptor to clean up the code a little and make testing easier in the future. ## How was this patch tested? Simple refactors. Existing tests pass. ## Does this PR introduce _any_ user-facing changes? No.
1 parent 1d8f6ac commit 00a6829

File tree

1 file changed

+22
-6
lines changed

1 file changed

+22
-6
lines changed
 

‎spark/src/main/scala/org/apache/spark/sql/delta/actions/DeletionVectorDescriptor.scala

+22-6
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,7 @@ case class DeletionVectorDescriptor(
112112
require(isOnDisk, "Can't get a path for an inline deletion vector")
113113
storageType match {
114114
case UUID_DV_MARKER =>
115-
// If the file was written with a random prefix, we have to extract that,
116-
// before decoding the UUID.
117-
val randomPrefixLength = pathOrInlineDv.length - Codec.Base85Codec.ENCODED_UUID_LENGTH
118-
val (randomPrefix, encodedUuid) = pathOrInlineDv.splitAt(randomPrefixLength)
119-
val uuid = Codec.Base85Codec.decodeUUID(encodedUuid)
115+
val (randomPrefix, uuid) = getRandomPrefixAndUuid.get
120116
assembleDeletionVectorPath(tableLocation, uuid, randomPrefix)
121117
case PATH_DV_MARKER =>
122118
// Since there is no need for legacy support for relative paths for DVs,
@@ -132,6 +128,20 @@ case class DeletionVectorDescriptor(
132128
def urlEncodedPath(tablePath: Path): String =
133129
SparkPath.fromPath(absolutePath(tablePath)).urlEncoded
134130

131+
/**
132+
* Parse the prefix and UUID of a relative DV. Returns None if the DV is not relative.
133+
*/
134+
@JsonIgnore
135+
def getRandomPrefixAndUuid: Option[(String, UUID)] = storageType match {
136+
case UUID_DV_MARKER =>
137+
// If the file was written with a random prefix, we have to extract that,
138+
// before decoding the UUID.
139+
val randomPrefixLength = pathOrInlineDv.length - Codec.Base85Codec.ENCODED_UUID_LENGTH
140+
val (randomPrefix, encodedUuid) = pathOrInlineDv.splitAt(randomPrefixLength)
141+
Some((randomPrefix, Codec.Base85Codec.decodeUUID(encodedUuid)))
142+
case _ =>
143+
None
144+
}
135145
/**
136146
* Produce a copy of this DV, but using an absolute path.
137147
*
@@ -318,14 +328,20 @@ object DeletionVectorDescriptor {
318328
* Optionally, prepend a `prefix` to the name.
319329
*/
320330
def assembleDeletionVectorPath(targetParentPath: Path, id: UUID, prefix: String = ""): Path = {
321-
val fileName = s"${DELETION_VECTOR_FILE_NAME_CORE}_${id}.bin"
331+
val fileName = assembleDeletionVectorFileName(id)
322332
if (prefix.nonEmpty) {
323333
new Path(new Path(targetParentPath, prefix), fileName)
324334
} else {
325335
new Path(targetParentPath, fileName)
326336
}
327337
}
328338

339+
/**
340+
* Return the unique file name for a deletion vector based on `id`.
341+
*/
342+
def assembleDeletionVectorFileName(id: UUID): String =
343+
s"${DELETION_VECTOR_FILE_NAME_CORE}_${id}.bin"
344+
329345
/** Descriptor for an empty stored bitmap. */
330346
val EMPTY: DeletionVectorDescriptor = DeletionVectorDescriptor(
331347
storageType = INLINE_DV_MARKER,

0 commit comments

Comments
 (0)