diff --git a/src/main/scala/com/microsoft/hyperspace/index/Config.scala b/src/main/scala/com/microsoft/hyperspace/index/Config.scala new file mode 100644 index 000000000..3ff5ec807 --- /dev/null +++ b/src/main/scala/com/microsoft/hyperspace/index/Config.scala @@ -0,0 +1,137 @@ +/* + * Copyright (2020) The Hyperspace Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.microsoft.hyperspace.index + +import java.util.Locale + +/** + * IndexConfig specifies the configuration of an index. + * Associated Builder [[com.microsoft.hyperspace.index.configs.covering.IndexConfig.builder()]] + * + * @param indexName Index name. + * @param indexedColumns Columns from which an index is created. + * @param includedColumns Columns to be included in the index. + */ +case class IndexConfig( + indexName: String, + indexedColumns: Seq[String], + includedColumns: Seq[String] = Seq()) + extends CoveringIndexConfigBase { + if (indexName.isEmpty || indexedColumns.isEmpty) { + throw new IllegalArgumentException("Empty index name or indexed columns are not allowed.") + } + + val lowerCaseIndexedColumns: Seq[String] = toLowerCase(indexedColumns) + val lowerCaseIncludedColumns: Seq[String] = toLowerCase(includedColumns) + val lowerCaseIncludedColumnsSet: Set[String] = lowerCaseIncludedColumns.toSet + + if (lowerCaseIndexedColumns.toSet.size < lowerCaseIndexedColumns.size) { + throw new IllegalArgumentException("Duplicate indexed column names are not allowed.") + } + + if (lowerCaseIncludedColumnsSet.size < lowerCaseIncludedColumns.size) { + throw new IllegalArgumentException("Duplicate included column names are not allowed.") + } + + for (indexedColumn <- lowerCaseIndexedColumns) { + if (lowerCaseIncludedColumns.contains(indexedColumn)) { + throw new IllegalArgumentException( + "Duplicate column names in indexed/included columns are not allowed.") + } + } + + override def equals(that: Any): Boolean = { + that match { + case IndexConfig(thatIndexName, thatIndexedColumns, thatIncludedColumns) => + indexName.equalsIgnoreCase(thatIndexName) && + lowerCaseIndexedColumns.equals(toLowerCase(thatIndexedColumns)) && + lowerCaseIncludedColumnsSet.equals(toLowerCase(thatIncludedColumns).toSet) + case _ => false + } + } + + override def hashCode(): Int = { + lowerCaseIndexedColumns.hashCode + lowerCaseIncludedColumnsSet.hashCode + } + + override def toString: String = { + val indexedColumnNames = lowerCaseIndexedColumns.mkString(", ") + val includedColumnNames = lowerCaseIncludedColumns.mkString(", ") + s"[indexName: $indexName; indexedColumns: $indexedColumnNames; " + + s"includedColumns: $includedColumnNames]" + } + + private def toLowerCase(seq: Seq[String]): Seq[String] = seq.map(_.toLowerCase(Locale.ROOT)) +} + +case class BloomFilterIndexConfig( + indexName: String, + indexedColumn: String, + expectedNumItems: Long, + fpp: Double = -1, + numBits: Long = -1) + extends NonCoveringIndexConfigBase { + if (indexName.isEmpty || indexedColumn.isEmpty || expectedNumItems < 1) { + throw new IllegalArgumentException( + "Empty index name or indexed column or expected items less than 1 are not allowed.") + } + + if (fpp != -1 && fpp < 0) { + throw new IllegalArgumentException("False positive probability cannot be negative.") + } + + if (numBits != -1 && numBits < 0) { + throw new IllegalArgumentException("Bits given for ") + } + + val lowerCaseIndexedColumn: String = indexedColumn.toLowerCase(Locale.ROOT) + + override def equals(that: Any): Boolean = { + that match { + case BloomFilterIndexConfig( + thatIndexName, + thatIndexedColumn, + thatExpectedItems, + thatFpp, + thatBits) => + indexName.equalsIgnoreCase(thatIndexName) && + lowerCaseIndexedColumn.equals(thatIndexedColumn.toLowerCase(Locale.ROOT)) && + expectedNumItems == thatExpectedItems && + ((fpp == -1 && thatFpp == -1) || fpp == thatFpp) && + ((numBits == -1 && thatBits == -1) || numBits == thatBits) + case _ => false + } + } + + override def hashCode(): Int = { + (indexName.hashCode * (indexedColumn.hashCode + expectedNumItems + .hashCode())) % scala.Int.MaxValue + } + + override def toString: String = { + s"[indexName: $indexName; indexedColumn: $indexedColumn; " + + s"ExpectedItems: $expectedNumItems; FPP: $fpp; NumBitsUsed: $numBits;]" + } +} + +object Config { + + // TODO - prints info table about all types of index supported by hyperspace + def printAllIndexConfigInfo(): String = { + s"" + } +} diff --git a/src/main/scala/com/microsoft/hyperspace/index/ConfigBase.scala b/src/main/scala/com/microsoft/hyperspace/index/ConfigBase.scala new file mode 100644 index 000000000..4e1e8018a --- /dev/null +++ b/src/main/scala/com/microsoft/hyperspace/index/ConfigBase.scala @@ -0,0 +1,45 @@ +/* + * Copyright (2020) The Hyperspace Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.microsoft.hyperspace.index + +/** + * All index supported in Hyperspace whose user facing config needs to be defined needs + * to extend [[ConfigBase]] trait. + */ +sealed trait ConfigBase { + val indexName: String + + def equals(obj: Any): Boolean + + def hashCode(): Int + + def toString: String +} + +trait CoveringIndexConfigBase extends ConfigBase { + /* + * Columns from which index are created. + */ + val indexedColumns: Seq[String] + + /* + * Columns to be included with the indexed columns. + */ + val includedColumns: Seq[String] +} + +trait NonCoveringIndexConfigBase extends ConfigBase {} diff --git a/src/main/scala/com/microsoft/hyperspace/index/IndexConfig.scala b/src/main/scala/com/microsoft/hyperspace/index/configs/covering/IndexConfig.scala similarity index 57% rename from src/main/scala/com/microsoft/hyperspace/index/IndexConfig.scala rename to src/main/scala/com/microsoft/hyperspace/index/configs/covering/IndexConfig.scala index 212606e66..838966562 100644 --- a/src/main/scala/com/microsoft/hyperspace/index/IndexConfig.scala +++ b/src/main/scala/com/microsoft/hyperspace/index/configs/covering/IndexConfig.scala @@ -14,67 +14,9 @@ * limitations under the License. */ -package com.microsoft.hyperspace.index +package com.microsoft.hyperspace.index.configs.covering -import java.util.Locale - -/** - * IndexConfig specifies the configuration of an index. - * - * @param indexName Index name. - * @param indexedColumns Columns from which an index is created. - * @param includedColumns Columns to be included in the index. - */ -case class IndexConfig( - indexName: String, - indexedColumns: Seq[String], - includedColumns: Seq[String] = Seq()) { - if (indexName.isEmpty || indexedColumns.isEmpty) { - throw new IllegalArgumentException("Empty index name or indexed columns are not allowed.") - } - - val lowerCaseIndexedColumns = toLowerCase(indexedColumns) - val lowerCaseIncludedColumns = toLowerCase(includedColumns) - val lowerCaseIncludedColumnsSet = lowerCaseIncludedColumns.toSet - - if (lowerCaseIndexedColumns.toSet.size < lowerCaseIndexedColumns.size) { - throw new IllegalArgumentException("Duplicate indexed column names are not allowed.") - } - - if (lowerCaseIncludedColumnsSet.size < lowerCaseIncludedColumns.size) { - throw new IllegalArgumentException("Duplicate included column names are not allowed.") - } - - for (indexedColumn <- lowerCaseIndexedColumns) { - if (lowerCaseIncludedColumns.contains(indexedColumn)) { - throw new IllegalArgumentException( - "Duplicate column names in indexed/included columns are not allowed.") - } - } - - override def equals(that: Any): Boolean = { - that match { - case IndexConfig(thatIndexName, thatIndexedColumns, thatIncludedColumns) => - indexName.equalsIgnoreCase(thatIndexName) && - lowerCaseIndexedColumns.equals(toLowerCase(thatIndexedColumns)) && - lowerCaseIncludedColumnsSet.equals(toLowerCase(thatIncludedColumns).toSet) - case _ => false - } - } - - override def hashCode(): Int = { - lowerCaseIndexedColumns.hashCode + lowerCaseIncludedColumnsSet.hashCode - } - - override def toString: String = { - val indexedColumnNames = lowerCaseIndexedColumns.mkString(", ") - val includedColumnNames = lowerCaseIncludedColumns.mkString(", ") - s"[indexName: $indexName; indexedColumns: $indexedColumnNames; " + - s"includedColumns: $includedColumnNames]" - } - - private def toLowerCase(seq: Seq[String]): Seq[String] = seq.map(_.toLowerCase(Locale.ROOT)) -} +import com.microsoft.hyperspace.index.IndexConfig /** * Defines [[IndexConfig.Builder]] and relevant helper methods for enabling builder pattern for @@ -98,7 +40,7 @@ object IndexConfig { * @return an [[IndexConfig.Builder]] object with updated index name. */ def indexName(indexName: String): Builder = { - if (!this.indexName.isEmpty) { + if (this.indexName.nonEmpty) { throw new UnsupportedOperationException("Index name is already set.") } @@ -152,8 +94,8 @@ object IndexConfig { * * @return an [[IndexConfig]] object. */ - def create(): IndexConfig = { - IndexConfig(indexName, indexedColumns, includedColumns) + def build(): IndexConfig = { + new IndexConfig(indexName, indexedColumns, includedColumns) } } diff --git a/src/main/scala/com/microsoft/hyperspace/index/configs/noncovering/BloomFilterIndexConfig.scala b/src/main/scala/com/microsoft/hyperspace/index/configs/noncovering/BloomFilterIndexConfig.scala new file mode 100644 index 000000000..8ab24a9ed --- /dev/null +++ b/src/main/scala/com/microsoft/hyperspace/index/configs/noncovering/BloomFilterIndexConfig.scala @@ -0,0 +1,115 @@ +/* + * Copyright (2020) The Hyperspace Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.microsoft.hyperspace.index.configs.noncovering + +import com.microsoft.hyperspace.index.BloomFilterIndexConfig +import com.microsoft.hyperspace.index.configs.covering.IndexConfig + +/** + * TODO Defines [[BloomFilterIndexConfig.Builder]] and relevant helper methods for enabling + * builder pattern for [[BloomFilterIndexConfig]]. + */ +object BloomFilterIndexConfig { + + /** + * Builder for [[BloomFilterIndexConfig]]. + */ + private[index] class Builder { + + private[this] var indexedColumn: String = "" + private[this] var indexName: String = "" + private[this] var fpp: Double = -1 + private[this] var expectedItems: Long = -1 + private[this] var numBits: Long = -1 + + /** + * Updates index name for [[IndexConfig]]. + * + * @param indexName index name for the [[BloomFilterIndexConfig]]. + * @return an [[BloomFilterIndexConfig.Builder]] object with updated index name. + */ + def init(indexName: String, indexedColumn: String): Builder = { + if (this.indexName.nonEmpty || this.indexedColumn.nonEmpty) { + // TODO: Prevent creating index config if index already exists. + throw new UnsupportedOperationException( + "Bloom Filter index metadata already set can't override, " + + "maybe try creating a new config.") + } + + if (indexName.isEmpty || indexedColumn.isEmpty) { + throw new IllegalArgumentException("Empty metadata names is not allowed.") + } + + this.indexName = indexName + this.indexedColumn = indexedColumn + this + } + + /** + * + * @param items + * @return + */ + def expectedNumItems(items: Long): Builder = { + if (items < 1) { + throw new IllegalArgumentException("Can't support the items value provided.") + } + + this.expectedItems = items + this + } + + /** + * + * @param fpp + * @return + */ + def fppToSupport(fpp: Double): Builder = { + if (fpp <= 0) { + throw new IllegalArgumentException("Can't support the fpp value.") + } + + this.fpp = fpp + this + } + + /** + * + * @param bits + * @return + */ + def numBitsToDefineBloomFilter(bits: Long): Builder = { + if (bits < 1) { + throw new IllegalArgumentException("Can't allow bits for storage be less than 1") + } + + this.numBits = bits + this + } + + def build(): BloomFilterIndexConfig = { + new BloomFilterIndexConfig(indexName, indexedColumn, expectedItems, fpp, numBits) + } + } + + /** + * Creates new [[BloomFilterIndexConfig.Builder]] for constructing an [[BloomFilterIndexConfig]]. + * + * @return an [[BloomFilterIndexConfig.Builder]] object. + */ + def builder(): Builder = new Builder +} diff --git a/src/test/scala/com/microsoft/hyperspace/index/IndexConfigTest.scala b/src/test/scala/com/microsoft/hyperspace/index/IndexConfigTest.scala index cd848b86e..5cf769f81 100644 --- a/src/test/scala/com/microsoft/hyperspace/index/IndexConfigTest.scala +++ b/src/test/scala/com/microsoft/hyperspace/index/IndexConfigTest.scala @@ -18,29 +18,45 @@ package com.microsoft.hyperspace.index import org.apache.spark.SparkFunSuite +import com.microsoft.hyperspace.index.configs.covering + class IndexConfigTest extends SparkFunSuite { + val CoveringIndexConfigBuilder: covering.IndexConfig.type = covering.IndexConfig + test("Empty index name is not allowed.") { + intercept[IllegalArgumentException](IndexConfig("", Seq("c1"), Seq("c2"))) - intercept[IllegalArgumentException](IndexConfig.builder.indexBy("c1").include("c2").create) - intercept[IllegalArgumentException](IndexConfig.builder.indexName("")) + intercept[IllegalArgumentException]( + CoveringIndexConfigBuilder.builder().indexBy("c1").include("c2").build()) + intercept[IllegalArgumentException](CoveringIndexConfigBuilder.builder().indexName("")) } test("Empty indexed columns are not allowed.") { intercept[IllegalArgumentException](IndexConfig("name", Seq(), Seq("c1"))) intercept[IllegalArgumentException]( - IndexConfig.builder.indexName("name").include("c1").create) + CoveringIndexConfigBuilder.builder().indexName("name").include("c1").build()) } test("Same indexed column names (case-insensitive) are not allowed.") { intercept[IllegalArgumentException](IndexConfig("name", Seq("c1", "C1"), Seq("c2"))) intercept[IllegalArgumentException]( - IndexConfig.builder.indexName("name").indexBy("c1", "C1").include("c2").create) + CoveringIndexConfigBuilder + .builder() + .indexName("name") + .indexBy("c1", "C1") + .include("c2") + .build()) } test("Same column names (case-insensitive) in indexed/included columns are not allowed.") { intercept[IllegalArgumentException](IndexConfig("name", Seq("c1"), Seq("C1", "c2"))) intercept[IllegalArgumentException]( - IndexConfig.builder.indexName("name").indexBy("c1").include("C1", "c2").create) + CoveringIndexConfigBuilder + .builder() + .indexName("name") + .indexBy("c1") + .include("C1", "c2") + .build()) } test("Test equals() function.") { @@ -98,11 +114,12 @@ class IndexConfigTest extends SparkFunSuite { val indexedColumns = Seq("C1", "c2", "C3") val includedColumns = Seq("C4", "c5", "C6") - val indexConfig = IndexConfig.builder + val indexConfig = CoveringIndexConfigBuilder + .builder() .indexName(indexName) .indexBy(indexedColumns.head, indexedColumns.tail: _*) .include(includedColumns.head, includedColumns.tail: _*) - .create + .build() assert(indexConfig.indexName.equals(indexName)) assert(indexConfig.indexedColumns.equals(indexedColumns)) @@ -111,25 +128,28 @@ class IndexConfigTest extends SparkFunSuite { test("Test exception on multiple indexBy, include and index name on IndexConfig builder.") { intercept[UnsupportedOperationException]( - IndexConfig.builder + CoveringIndexConfigBuilder + .builder() .indexName("name1") .indexName("name2") .indexBy("c1", "c2") .include("c3", "c4") - .create) + .build()) intercept[UnsupportedOperationException]( - IndexConfig.builder + CoveringIndexConfigBuilder + .builder() .indexName("name") .indexBy("c1") .indexBy("c2") .include("c3", "c4") - .create) + .build()) intercept[UnsupportedOperationException]( - IndexConfig.builder + CoveringIndexConfigBuilder + .builder() .indexName("name") .indexBy("c1") .include("c2") .include("c3") - .create) + .build()) } }