Skip to content
This repository has been archived by the owner on Jun 14, 2024. It is now read-only.

[WIP] Refactor Hyperspace Index Configs to allow support for addition of new index #357

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 137 additions & 0 deletions src/main/scala/com/microsoft/hyperspace/index/Config.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
* Copyright (2020) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.microsoft.hyperspace.index

import java.util.Locale

/**
* IndexConfig specifies the configuration of an index.
* Associated Builder [[com.microsoft.hyperspace.index.configs.covering.IndexConfig.builder()]]
*
* @param indexName Index name.
* @param indexedColumns Columns from which an index is created.
* @param includedColumns Columns to be included in the index.
*/
case class IndexConfig(
indexName: String,
indexedColumns: Seq[String],
includedColumns: Seq[String] = Seq())
extends CoveringIndexConfigBase {
if (indexName.isEmpty || indexedColumns.isEmpty) {
throw new IllegalArgumentException("Empty index name or indexed columns are not allowed.")
}

val lowerCaseIndexedColumns: Seq[String] = toLowerCase(indexedColumns)
val lowerCaseIncludedColumns: Seq[String] = toLowerCase(includedColumns)
val lowerCaseIncludedColumnsSet: Set[String] = lowerCaseIncludedColumns.toSet

if (lowerCaseIndexedColumns.toSet.size < lowerCaseIndexedColumns.size) {
throw new IllegalArgumentException("Duplicate indexed column names are not allowed.")
}

if (lowerCaseIncludedColumnsSet.size < lowerCaseIncludedColumns.size) {
throw new IllegalArgumentException("Duplicate included column names are not allowed.")
}

for (indexedColumn <- lowerCaseIndexedColumns) {
if (lowerCaseIncludedColumns.contains(indexedColumn)) {
throw new IllegalArgumentException(
"Duplicate column names in indexed/included columns are not allowed.")
}
}

override def equals(that: Any): Boolean = {
that match {
case IndexConfig(thatIndexName, thatIndexedColumns, thatIncludedColumns) =>
indexName.equalsIgnoreCase(thatIndexName) &&
lowerCaseIndexedColumns.equals(toLowerCase(thatIndexedColumns)) &&
lowerCaseIncludedColumnsSet.equals(toLowerCase(thatIncludedColumns).toSet)
case _ => false
}
}

override def hashCode(): Int = {
lowerCaseIndexedColumns.hashCode + lowerCaseIncludedColumnsSet.hashCode
}

override def toString: String = {
val indexedColumnNames = lowerCaseIndexedColumns.mkString(", ")
val includedColumnNames = lowerCaseIncludedColumns.mkString(", ")
s"[indexName: $indexName; indexedColumns: $indexedColumnNames; " +
s"includedColumns: $includedColumnNames]"
}

private def toLowerCase(seq: Seq[String]): Seq[String] = seq.map(_.toLowerCase(Locale.ROOT))
}

case class BloomFilterIndexConfig(
indexName: String,
indexedColumn: String,
expectedNumItems: Long,
fpp: Double = -1,
numBits: Long = -1)
extends NonCoveringIndexConfigBase {
if (indexName.isEmpty || indexedColumn.isEmpty || expectedNumItems < 1) {
throw new IllegalArgumentException(
"Empty index name or indexed column or expected items less than 1 are not allowed.")
}

if (fpp != -1 && fpp < 0) {
throw new IllegalArgumentException("False positive probability cannot be negative.")
}

if (numBits != -1 && numBits < 0) {
throw new IllegalArgumentException("Bits given for ")
}

val lowerCaseIndexedColumn: String = indexedColumn.toLowerCase(Locale.ROOT)

override def equals(that: Any): Boolean = {
that match {
case BloomFilterIndexConfig(
thatIndexName,
thatIndexedColumn,
thatExpectedItems,
thatFpp,
thatBits) =>
indexName.equalsIgnoreCase(thatIndexName) &&
lowerCaseIndexedColumn.equals(thatIndexedColumn.toLowerCase(Locale.ROOT)) &&
expectedNumItems == thatExpectedItems &&
((fpp == -1 && thatFpp == -1) || fpp == thatFpp) &&
((numBits == -1 && thatBits == -1) || numBits == thatBits)
case _ => false
}
}

override def hashCode(): Int = {
(indexName.hashCode * (indexedColumn.hashCode + expectedNumItems
.hashCode())) % scala.Int.MaxValue
}

override def toString: String = {
s"[indexName: $indexName; indexedColumn: $indexedColumn; " +
s"ExpectedItems: $expectedNumItems; FPP: $fpp; NumBitsUsed: $numBits;]"
}
}

object Config {

// TODO - prints info table about all types of index supported by hyperspace
def printAllIndexConfigInfo(): String = {
s""
}
}
45 changes: 45 additions & 0 deletions src/main/scala/com/microsoft/hyperspace/index/ConfigBase.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright (2020) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.microsoft.hyperspace.index

/**
* All index supported in Hyperspace whose user facing config needs to be defined needs
* to extend [[ConfigBase]] trait.
*/
sealed trait ConfigBase {
val indexName: String

def equals(obj: Any): Boolean

def hashCode(): Int

def toString: String
}

trait CoveringIndexConfigBase extends ConfigBase {
/*
* Columns from which index are created.
*/
val indexedColumns: Seq[String]

/*
* Columns to be included with the indexed columns.
*/
val includedColumns: Seq[String]
}

trait NonCoveringIndexConfigBase extends ConfigBase {}
Original file line number Diff line number Diff line change
Expand Up @@ -14,67 +14,9 @@
* limitations under the License.
*/

package com.microsoft.hyperspace.index
package com.microsoft.hyperspace.index.configs.covering

import java.util.Locale

/**
* IndexConfig specifies the configuration of an index.
*
* @param indexName Index name.
* @param indexedColumns Columns from which an index is created.
* @param includedColumns Columns to be included in the index.
*/
case class IndexConfig(
indexName: String,
indexedColumns: Seq[String],
includedColumns: Seq[String] = Seq()) {
if (indexName.isEmpty || indexedColumns.isEmpty) {
throw new IllegalArgumentException("Empty index name or indexed columns are not allowed.")
}

val lowerCaseIndexedColumns = toLowerCase(indexedColumns)
val lowerCaseIncludedColumns = toLowerCase(includedColumns)
val lowerCaseIncludedColumnsSet = lowerCaseIncludedColumns.toSet

if (lowerCaseIndexedColumns.toSet.size < lowerCaseIndexedColumns.size) {
throw new IllegalArgumentException("Duplicate indexed column names are not allowed.")
}

if (lowerCaseIncludedColumnsSet.size < lowerCaseIncludedColumns.size) {
throw new IllegalArgumentException("Duplicate included column names are not allowed.")
}

for (indexedColumn <- lowerCaseIndexedColumns) {
if (lowerCaseIncludedColumns.contains(indexedColumn)) {
throw new IllegalArgumentException(
"Duplicate column names in indexed/included columns are not allowed.")
}
}

override def equals(that: Any): Boolean = {
that match {
case IndexConfig(thatIndexName, thatIndexedColumns, thatIncludedColumns) =>
indexName.equalsIgnoreCase(thatIndexName) &&
lowerCaseIndexedColumns.equals(toLowerCase(thatIndexedColumns)) &&
lowerCaseIncludedColumnsSet.equals(toLowerCase(thatIncludedColumns).toSet)
case _ => false
}
}

override def hashCode(): Int = {
lowerCaseIndexedColumns.hashCode + lowerCaseIncludedColumnsSet.hashCode
}

override def toString: String = {
val indexedColumnNames = lowerCaseIndexedColumns.mkString(", ")
val includedColumnNames = lowerCaseIncludedColumns.mkString(", ")
s"[indexName: $indexName; indexedColumns: $indexedColumnNames; " +
s"includedColumns: $includedColumnNames]"
}

private def toLowerCase(seq: Seq[String]): Seq[String] = seq.map(_.toLowerCase(Locale.ROOT))
}
import com.microsoft.hyperspace.index.IndexConfig

/**
* Defines [[IndexConfig.Builder]] and relevant helper methods for enabling builder pattern for
Expand All @@ -98,7 +40,7 @@ object IndexConfig {
* @return an [[IndexConfig.Builder]] object with updated index name.
*/
def indexName(indexName: String): Builder = {
if (!this.indexName.isEmpty) {
if (this.indexName.nonEmpty) {
throw new UnsupportedOperationException("Index name is already set.")
}

Expand Down Expand Up @@ -152,8 +94,8 @@ object IndexConfig {
*
* @return an [[IndexConfig]] object.
*/
def create(): IndexConfig = {
IndexConfig(indexName, indexedColumns, includedColumns)
def build(): IndexConfig = {
new IndexConfig(indexName, indexedColumns, includedColumns)
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Copyright (2020) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.microsoft.hyperspace.index.configs.noncovering

import com.microsoft.hyperspace.index.BloomFilterIndexConfig
import com.microsoft.hyperspace.index.configs.covering.IndexConfig

/**
* TODO Defines [[BloomFilterIndexConfig.Builder]] and relevant helper methods for enabling
* builder pattern for [[BloomFilterIndexConfig]].
*/
object BloomFilterIndexConfig {

/**
* Builder for [[BloomFilterIndexConfig]].
*/
private[index] class Builder {

private[this] var indexedColumn: String = ""
private[this] var indexName: String = ""
private[this] var fpp: Double = -1
private[this] var expectedItems: Long = -1
private[this] var numBits: Long = -1

/**
* Updates index name for [[IndexConfig]].
*
* @param indexName index name for the [[BloomFilterIndexConfig]].
* @return an [[BloomFilterIndexConfig.Builder]] object with updated index name.
*/
def init(indexName: String, indexedColumn: String): Builder = {
if (this.indexName.nonEmpty || this.indexedColumn.nonEmpty) {
// TODO: Prevent creating index config if index already exists.
throw new UnsupportedOperationException(
"Bloom Filter index metadata already set can't override, " +
"maybe try creating a new config.")
}

if (indexName.isEmpty || indexedColumn.isEmpty) {
throw new IllegalArgumentException("Empty metadata names is not allowed.")
}

this.indexName = indexName
this.indexedColumn = indexedColumn
this
}

/**
*
* @param items
* @return
*/
def expectedNumItems(items: Long): Builder = {
if (items < 1) {
throw new IllegalArgumentException("Can't support the items value provided.")
}

this.expectedItems = items
this
}

/**
*
* @param fpp
* @return
*/
def fppToSupport(fpp: Double): Builder = {
if (fpp <= 0) {
throw new IllegalArgumentException("Can't support the fpp value.")
}

this.fpp = fpp
this
}

/**
*
* @param bits
* @return
*/
def numBitsToDefineBloomFilter(bits: Long): Builder = {
if (bits < 1) {
throw new IllegalArgumentException("Can't allow bits for storage be less than 1")
}

this.numBits = bits
this
}

def build(): BloomFilterIndexConfig = {
new BloomFilterIndexConfig(indexName, indexedColumn, expectedItems, fpp, numBits)
}
}

/**
* Creates new [[BloomFilterIndexConfig.Builder]] for constructing an [[BloomFilterIndexConfig]].
*
* @return an [[BloomFilterIndexConfig.Builder]] object.
*/
def builder(): Builder = new Builder
}
Loading