Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert codebase to scala 2.13 #499

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Convert codebase to scala 2.13
  • Loading branch information
samidalouche committed Aug 9, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit f569a328ca893d7c1dfc65b258061a83c879e7fe
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -13,8 +13,8 @@
<maven.compiler.target>1.8</maven.compiler.target>
<encoding>UTF-8</encoding>

<scala.major.version>2.12</scala.major.version>
<scala.version>${scala.major.version}.10</scala.version>
<scala.major.version>2.13</scala.major.version>
<scala.version>${scala.major.version}.11</scala.version>
<artifact.scala.version>${scala.major.version}</artifact.scala.version>
<scala-maven-plugin.version>4.8.1</scala-maven-plugin.version>

@@ -103,7 +103,7 @@
<dependency>
<groupId>org.scalanlp</groupId>
<artifactId>breeze_${scala.major.version}</artifactId>
<version>0.13.2</version>
<version>1.2</version>
</dependency>

<dependency>
10 changes: 9 additions & 1 deletion src/main/scala/com/amazon/deequ/VerificationResult.scala
Original file line number Diff line number Diff line change
@@ -186,6 +186,14 @@ object VerificationResult {
}
}

private[this] case class SimpleCheckResultOutput(checkDescription: String, checkLevel: String,
// remove private as it breaks somethingwith scala 2.13
// java.lang.RuntimeException: Error while encoding: java.util.concurrent.ExecutionException:
// org.codehaus.commons.compiler.CompileException:
// File 'generated.java', Line 105, Column 25: failed to compile:
// org.codehaus.commons.compiler.CompileException: File 'generated.java',
// Line 105, Column 25: No applicable constructor/method found for zero actual parameters; candidates are:
// "public java.lang.String com.amazon.deequ.VerificationResult$SimpleCheckResultOutput.constraintStatus()"
// private[this]
case class SimpleCheckResultOutput(checkDescription: String, checkLevel: String,
checkStatus: String, constraint: String, constraintStatus: String, constraintMessage: String)
}
2 changes: 1 addition & 1 deletion src/main/scala/com/amazon/deequ/analyzers/Analyzer.scala
Original file line number Diff line number Diff line change
@@ -453,7 +453,7 @@ private[deequ] object Analyzers {
if (nullInResult) {
None
} else {
Option(func(Unit))
Option(func(()))
}
}

Original file line number Diff line number Diff line change
@@ -52,7 +52,7 @@ class QuantileNonSample[T](
this.shrinkingFactor = shrinkingFactor
compactors = ArrayBuffer.fill(data.length)(new NonSampleCompactor[T])
for (i <- data.indices) {
compactors(i).buffer = data(i).to[ArrayBuffer]
compactors(i).buffer = ArrayBuffer.from(data(i)) // data(i).to[ArrayBuffer]
}
curNumOfCompactors = data.length
compactorActualSize = getCompactorItemsCount
5 changes: 3 additions & 2 deletions src/main/scala/com/amazon/deequ/analyzers/StateProvider.scala
Original file line number Diff line number Diff line change
@@ -24,7 +24,7 @@ import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.expressions.aggregate.{ApproximatePercentile, DeequHyperLogLogPlusPlusUtils}
import org.apache.spark.sql.SaveMode

import scala.collection.JavaConversions._
import scala.jdk.CollectionConverters._
import scala.util.hashing.MurmurHash3

private object StateInformation {
@@ -58,7 +58,8 @@ case class InMemoryStateProvider() extends StateLoader with StatePersister {

override def toString: String = {
val buffer = new StringBuilder()
statesByAnalyzer.foreach { case (analyzer, state) =>

statesByAnalyzer.asScala.foreach { case (analyzer, state) =>
buffer.append(analyzer.toString)
buffer.append(" => ")
buffer.append(state.toString)
Original file line number Diff line number Diff line change
@@ -124,13 +124,19 @@ object AnalyzerContext {
}
}

private[this] case class SimpleMetricOutput(
// when using private, leads to
// No applicable constructor/method
// private[this]
case class SimpleMetricOutput(
entity: String,
instance: String,
name: String,
value: Double)

private[this] object SimpleMetricOutput {
// when using private, leads to
// No applicable constructor/method
// private[this]
object SimpleMetricOutput {

def apply(doubleMetric: DoubleMetric): SimpleMetricOutput = {
SimpleMetricOutput(
Original file line number Diff line number Diff line change
@@ -116,12 +116,12 @@ case class OnlineNormalStrategy(
ret += OnlineCalculationResult(currentMean, stdDev, isAnomaly = true)
}
}
ret
ret.toSeq
}


/**
* Search for anomalies in a series of data points.
* Search for anomalies in a series of datag points.
*
* @param dataSeries The data contained in a Vector of Doubles
* @param searchInterval The indices between which anomalies should be detected. [a, b).
Original file line number Diff line number Diff line change
@@ -132,7 +132,7 @@ class HoltWinters(
}
val forecasted = Y.drop(series.size)

ModelResults(forecasted, level, trend, seasonality, residuals)
ModelResults(forecasted.toSeq, level.toSeq, trend.toSeq, seasonality.toSeq, residuals.toSeq)
}

private def modelSelectionFor(
Original file line number Diff line number Diff line change
@@ -347,7 +347,7 @@ object ColumnProfiler {
Histogram(histogram.column).equals(histogram)
case _ => false
}
analyzerContextExistingValues = AnalyzerContext(relevantEntries)
analyzerContextExistingValues = AnalyzerContext(relevantEntries.toMap)
}
}
}
Original file line number Diff line number Diff line change
@@ -27,14 +27,13 @@ import com.google.gson._
import com.google.gson.reflect.TypeToken

import scala.collection._
import scala.collection.JavaConverters._
import java.util.{ArrayList => JArrayList, HashMap => JHashMap, List => JList, Map => JMap}
import JsonSerializationConstants._
import com.amazon.deequ.analyzers.Histogram.{AggregateFunction, Count => HistogramCount, Sum => HistogramSum}
import org.apache.spark.sql.Column
import org.apache.spark.sql.functions.expr

import scala.collection.JavaConversions._
import scala.jdk.CollectionConverters._

private[repository] object JsonSerializationConstants {

@@ -70,6 +69,7 @@ private[deequ] object SimpleResultSerde {
.asInstanceOf[JArrayList[JMap[String, String]]]
.asScala
.map(map => immutable.Map(map.asScala.toList: _*))
.toSeq
}
}

@@ -422,22 +422,22 @@ private[deequ] object AnalyzerDeserializer
getOptionalWhereParam(json))

case "CountDistinct" =>
CountDistinct(getColumnsAsSeq(context, json))
CountDistinct(getColumnsAsSeq(context, json).toSeq)

case "Distinctness" =>
Distinctness(getColumnsAsSeq(context, json))
Distinctness(getColumnsAsSeq(context, json).toSeq)

case "Entropy" =>
Entropy(json.get(COLUMN_FIELD).getAsString)

case "MutualInformation" =>
MutualInformation(getColumnsAsSeq(context, json))
MutualInformation(getColumnsAsSeq(context, json).toSeq)

case "UniqueValueRatio" =>
UniqueValueRatio(getColumnsAsSeq(context, json))
UniqueValueRatio(getColumnsAsSeq(context, json).toSeq)

case "Uniqueness" =>
Uniqueness(getColumnsAsSeq(context, json))
Uniqueness(getColumnsAsSeq(context, json).toSeq)

case "Histogram" =>
Histogram(
@@ -598,7 +598,7 @@ private[deequ] object MetricDeserializer extends JsonDeserializer[Metric[_]] {
val instance = jsonObject.get("instance").getAsString
if (jsonObject.has("value")) {
val entries = jsonObject.get("value").getAsJsonObject
val values = entries.entrySet().map { entry =>
val values = entries.entrySet().asScala.map { entry =>
entry.getKey -> entry.getValue.getAsDouble
}
.toMap
Original file line number Diff line number Diff line change
@@ -99,8 +99,8 @@ private[repository] object MetricsRepositoryMultipleResultsLoader {

def jsonUnion(jsonOne: String, jsonTwo: String): String = {

val objectOne: Seq[Map[String, Any]] = SimpleResultSerde.deserialize(jsonOne)
val objectTwo: Seq[Map[String, Any]] = SimpleResultSerde.deserialize(jsonTwo)
val objectOne: Seq[Map[String, Any]] = SimpleResultSerde.deserialize(jsonOne).toSeq
val objectTwo: Seq[Map[String, Any]] = SimpleResultSerde.deserialize(jsonTwo).toSeq

val columnsTotal = objectOne.headOption.getOrElse(Map.empty).keySet ++
objectTwo.headOption.getOrElse(Map.empty).keySet
Original file line number Diff line number Diff line change
@@ -153,10 +153,11 @@ class FileSystemMetricsRepositoryMultipleResultsLoader(
.metricMap
.filterKeys(analyzer => forAnalyzers.isEmpty || forAnalyzers.get.contains(analyzer))

val requestedAnalyzerContext = AnalyzerContext(requestedMetrics)
val requestedAnalyzerContext = AnalyzerContext(requestedMetrics.toMap)

AnalysisResult(analysisResult.resultKey, requestedAnalyzerContext)
}
.toSeq
}
}

Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@ import com.amazon.deequ.metrics.Metric
import com.amazon.deequ.repository._
import com.amazon.deequ.analyzers.runners.AnalyzerContext

import scala.collection.JavaConversions._
import scala.jdk.CollectionConverters._
import java.util.concurrent.ConcurrentHashMap

/** A simple Repository implementation backed by a concurrent hash map */
@@ -118,6 +118,7 @@ class LimitedInMemoryMetricsRepositoryMultipleResultsLoader(
/** Get the AnalysisResult */
def get(): Seq[AnalysisResult] = {
resultsRepository
.asScala
.filterKeys(key => after.isEmpty || after.get <= key.dataSetDate)
.filterKeys(key => before.isEmpty || key.dataSetDate <= before.get)
.filterKeys(key => tagValues.isEmpty || tagValues.get.toSet.subsetOf(key.tags.toSet))
@@ -129,7 +130,7 @@ class LimitedInMemoryMetricsRepositoryMultipleResultsLoader(
.metricMap
.filterKeys(analyzer => forAnalyzers.isEmpty || forAnalyzers.get.contains(analyzer))

AnalysisResult(analysisResult.resultKey, AnalyzerContext(requestedMetrics))
AnalysisResult(analysisResult.resultKey, AnalyzerContext(requestedMetrics.toMap))
}
.toSeq
}
Original file line number Diff line number Diff line change
@@ -142,7 +142,7 @@ class ConstraintSuggestionRunner {
groupedSuggestionsWithColumnNames.map { case (_, suggestion) => suggestion } }

ConstraintSuggestionResult(columnProfiles.profiles, columnProfiles.numRecords,
columnsWithSuggestions, verificationResult)
columnsWithSuggestions.toMap, verificationResult)
}

private[this] def splitTrainTestSets(
6 changes: 3 additions & 3 deletions src/test/scala/com/amazon/deequ/KLL/KLLDistanceTest.scala
Original file line number Diff line number Diff line change
@@ -88,7 +88,7 @@ class KLLDistanceTest extends WordSpec with SparkContextSpec
val sample2 = scala.collection.mutable.Map(
"a" -> 22L, "b" -> 20L, "c" -> 25L, "d" -> 12L, "e" -> 13L, "f" -> 15L)
val distance = Distance.categoricalDistance(sample1, sample2, method = LInfinityMethod(alpha = Some(0.003)))
assert(distance == 0.2726338046550349)
assert(distance == 0.27263380465503484)
}

"Categorial distance should compute correct linf_robust with different alpha value .1" in {
@@ -137,7 +137,7 @@ class KLLDistanceTest extends WordSpec with SparkContextSpec
"a" -> 100L, "b" -> 22L, "c" -> 25L, "d" -> 5L, "e" -> 13L, "f" -> 2L)
val distance = Distance.categoricalDistance(
sample1, sample2, correctForLowNumberOfSamples = true, method = ChisquareMethod())
assert(distance == 8.789790456457125)
assert(distance == 8.789790456457123)
}

"Categorical distance should compute correct chisquare distance (low samples) with regrouping (yates)" in {
@@ -170,7 +170,7 @@ class KLLDistanceTest extends WordSpec with SparkContextSpec
"a" -> 100L, "b" -> 4L, "c" -> 3L, "d" -> 27L, "e" -> 20L, "f" -> 20L, "g" -> 20L, "h" -> 20L)
val distance = Distance.categoricalDistance(
sample, baseline, correctForLowNumberOfSamples = true, method = ChisquareMethod())
assert(distance == 6.827423492761593)
assert(distance == 6.827423492761592)
}

"Categorical distance should compute correct chisquare distance (low samples) " +
18 changes: 10 additions & 8 deletions src/test/scala/com/amazon/deequ/VerificationResultTest.scala
Original file line number Diff line number Diff line change
@@ -79,12 +79,13 @@ class VerificationResultTest extends WordSpec with Matchers with SparkContextSpe
val successMetricsResultsJson = VerificationResult.successMetricsAsJson(results)

val expectedJson =
"""[{"entity":"Column","instance":"item","name":"Distinctness","value":1.0},
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scala 2.13 collections break the previous ordering of elements

"""[
|{"entity":"Multicolumn","instance":"att1,att2","name":"Uniqueness","value":0.25},
{"entity":"Column","instance":"item","name":"Distinctness","value":1.0},
|{"entity": "Column", "instance":"att2","name":"Completeness","value":1.0},
|{"entity":"Column","instance":"att1","name":"Completeness","value":1.0},
|{"entity":"Multicolumn","instance":"att1,att2",
|"name":"Uniqueness","value":0.25},
|{"entity":"Dataset","instance":"*","name":"Size","value":4.0}]"""
|{"entity":"Dataset","instance":"*","name":"Size","value":4.0},
|{"entity":"Column","instance":"att1","name":"Completeness","value":1.0}
|]"""
.stripMargin.replaceAll("\n", "")

assertSameResultsJson(successMetricsResultsJson, expectedJson)
@@ -102,9 +103,10 @@ class VerificationResultTest extends WordSpec with Matchers with SparkContextSpe
VerificationResult.successMetricsAsJson(results, metricsForAnalyzers)

val expectedJson =
"""[{"entity":"Column","instance":"att1","name":"Completeness","value":1.0},
|{"entity":"Multicolumn","instance":"att1,att2",
|"name":"Uniqueness","value":0.25}]"""
"""[
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scala 2.13 collections break the previous ordering of elements

|{"entity":"Multicolumn","instance":"att1,att2","name":"Uniqueness","value":0.25},
|{"entity":"Column","instance":"att1","name":"Completeness","value":1.0}
|]"""
.stripMargin.replaceAll("\n", "")

assertSameResultsJson(successMetricsResultsJson, expectedJson)
Loading