Skip to content

Commit 3b1a3ec

Browse files
authored
Fix performance of building row-level results (#577)
* Generate row-level results with withColumns Iteratively using withColumn (singular) causes performance issues when iterating over a large sequence of columns. * Add back UNIQUENESS_ID
1 parent d495234 commit 3b1a3ec

File tree

1 file changed

+1
-3
lines changed

1 file changed

+1
-3
lines changed

src/main/scala/com/amazon/deequ/VerificationResult.scala

+1-3
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,7 @@ object VerificationResult {
9898
val columnNamesToMetrics: Map[String, Column] = verificationResultToColumn(verificationResult)
9999

100100
val dataWithID = data.withColumn(UNIQUENESS_ID, monotonically_increasing_id())
101-
columnNamesToMetrics.foldLeft(dataWithID)(
102-
(dataWithID, newColumn: (String, Column)) =>
103-
dataWithID.withColumn(newColumn._1, newColumn._2)).drop(UNIQUENESS_ID)
101+
dataWithID.withColumns(columnNamesToMetrics).drop(UNIQUENESS_ID)
104102
}
105103

106104
def checkResultsAsJson(verificationResult: VerificationResult,

0 commit comments

Comments
 (0)