diff --git a/.gitignore b/.gitignore
index 1025392fd7..1f909369da 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,4 @@ test/samples/
# Sphinx and Doxygen Doc-Site
doc/_build/*
doc/en/docs/model_zoo/
+cmake-build-debug/*
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 50e67f970f..5687b8e7a8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,10 +29,10 @@ LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Thirdparty)
#string(REGEX REPLACE "^[0-9]+\\.[0-9]+\\.([0-9]+).*" "\\1" VERSION_PATCH "${VERSION}")
-SET(PACKAGE_VERSION 4.1.0) # ${VERSION})
-SET(VERSION 4.1.0)
+SET(PACKAGE_VERSION 4.2.0) # ${VERSION})
+SET(VERSION 4.2.0)
SET(SINGA_MAJOR_VERSION 4)
-SET(SINGA_MINOR_VERSION 1)
+SET(SINGA_MINOR_VERSION 2)
SET(SINGA_PATCH_VERSION 0)
#SET(SINGA_MAJOR_VERSION ${VERSION_MAJOR}) # 0 -
#SET(SINGA_MINOR_VERSION ${VERSION_MINOR}) # 0 - 9
diff --git a/NOTICE b/NOTICE
index a94d16e654..1870a1f157 100644
--- a/NOTICE
+++ b/NOTICE
@@ -30,16 +30,4 @@ developers of Apache SINGA under Apache License, Version 2.0.
./doc/_static/images/sgd.png
./doc/_static/images/singa.png
./doc/_static/images/singav1-sw.png
-./examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174425377.png
-./examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174945226.png
-./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421214835152.png
-./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220338391.png
-./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220443231.png
-./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035554579.png
-./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035622198.png
-./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035639502.png
-./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035806963.png
-./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722202555763.png
-./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722205244718.png
-./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111325368.png
-./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111659545.png
\ No newline at end of file
+./examples/model_selection/Trails/documents/ai_db.001.jpeg
\ No newline at end of file
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 7a1f0eab43..b475aed551 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -1,3 +1,33 @@
+Release Notes - SINGA - Version singa-4.2.0
+
+SINGA is a distributed deep learning library.
+
+This release includes following changes:
+
+ * Add support for deep learning models running on top of PolarDB
+ * Implement efficient model selection for a given dataset stored in the database.
+ * Add support for dynamic model creation.
+ * Add support for flexible setting of model training configurations.
+ * Optimize the in-database analytics modules for scalability, efficiency and memory consumption.
+
+ * New example
+ * Add a horizontal federated learning example using the Bank dataset.
+
+ * Enhance examples
+ * Add sample training data for testing the model selection application.
+
+ * Update the website
+ * Update the star button in the main page.
+ * Refine the display of star statistics.
+
+ * Update the python versions for wheel files
+
+ * Fix bugs
+ * Fix the rat check files.
+ * Update the license files.
+
+----------------------------------------------------------------------------------------------
+
Release Notes - SINGA - Version singa-4.1.0
SINGA is a distributed deep learning library.
diff --git a/examples/hfl/README.md b/examples/hfl/README.md
new file mode 100644
index 0000000000..cf20e64cd8
--- /dev/null
+++ b/examples/hfl/README.md
@@ -0,0 +1,63 @@
+
+
+
+# Horizontal Federated Learning Example
+
+This is an example of federated learning (FL) using the Singa framework. In FL, there is a server and a set of clients. Each client has a local dataset.
+In each iteration, each client trains the model using its local dataset and uploads the model gradient to the server, which aggregates to get the global
+gradient using the Federated Average algorithm. The server sends the global gradient to all clients for iterative model training.
+This example uses the Bank dataset and an MLP model in FL.
+
+## Preparation
+
+Go to the Conda environment that contains the Singa library, and run
+
+```bash
+pip install -r requirements.txt
+```
+
+Download the bank dataset and split it into 3 partitions.
+
+```bash
+# 1. download the data from https://archive.ics.uci.edu/ml/datasets/bank+marketing
+# 2. put it under the /data folder
+# 3. run the following command which:
+# (1) splits the dataset into N subsets
+# (2) splits each subsets into train set and test set (8:2)
+python -m bank N
+```
+
+## Run the example
+
+Run the server first (set the number of epochs to 3)
+
+```bash
+python -m src.server -m 3 --num_clients 3
+```
+
+Then, start 3 clients in different terminal
+
+```bash
+python -m src.client --model mlp --data bank -m 3 -i 0 -d non-iid
+python -m src.client --model mlp --data bank -m 3 -i 1 -d non-iid
+python -m src.client --model mlp --data bank -m 3 -i 2 -d non-iid
+```
+
+Finally, the server and clients finish the FL training.
\ No newline at end of file
diff --git a/examples/hfl/config/.gitignore b/examples/hfl/config/.gitignore
new file mode 100644
index 0000000000..73f69e0958
--- /dev/null
+++ b/examples/hfl/config/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
diff --git a/examples/hfl/config/Singa-HFL.iml b/examples/hfl/config/Singa-HFL.iml
new file mode 100644
index 0000000000..b956fbec9d
--- /dev/null
+++ b/examples/hfl/config/Singa-HFL.iml
@@ -0,0 +1,29 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/examples/hfl/data/.gitkeep b/examples/hfl/data/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/examples/hfl/requirements.txt b/examples/hfl/requirements.txt
new file mode 100644
index 0000000000..ed89f5a724
--- /dev/null
+++ b/examples/hfl/requirements.txt
@@ -0,0 +1,3 @@
+pandas
+scikit-learn
+protobuf
\ No newline at end of file
diff --git a/examples/hfl/src/__init__.py b/examples/hfl/src/__init__.py
new file mode 100644
index 0000000000..d8a500d9d8
--- /dev/null
+++ b/examples/hfl/src/__init__.py
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
diff --git a/examples/hfl/src/bank.py b/examples/hfl/src/bank.py
new file mode 100644
index 0000000000..8cfa3d68cb
--- /dev/null
+++ b/examples/hfl/src/bank.py
@@ -0,0 +1,97 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# https://github.com/zhengzangw/Fed-SINGA/blob/main/src/client/data/bank.py
+
+import pandas as pd
+import numpy as np
+import sys
+from pandas.api.types import is_numeric_dtype
+from sklearn.model_selection import train_test_split
+from sklearn.utils import shuffle
+
+
+def encode(df):
+ res = pd.DataFrame()
+ for col in df.columns.values:
+ if not is_numeric_dtype(df[col]):
+ tmp = pd.get_dummies(df[col], prefix=col)
+ else:
+ tmp = df[col]
+ res = pd.concat([res, tmp], axis=1)
+ return res
+
+
+def load(device_id):
+ fn_train = "data/bank_train_" + str(device_id) + ".csv"
+ fn_test = "data/bank_test_" + str(device_id) + ".csv"
+
+ train = pd.read_csv(fn_train, sep=',')
+ test = pd.read_csv(fn_test, sep=',')
+
+ train_x = train.drop(['y'], axis=1)
+ train_y = train['y']
+ val_x = test.drop(['y'], axis=1)
+ val_y = test['y']
+
+ train_x = np.array((train_x), dtype=np.float32)
+ val_x = np.array((val_x), dtype=np.float32)
+ train_y = np.array((train_y), dtype=np.int32)
+ val_y = np.array((val_y), dtype=np.int32)
+
+ train_x, val_x = normalize(train_x, val_x)
+ num_classes = 2
+
+ return train_x, train_y, val_x, val_y, num_classes
+
+
+def normalize(X_train, X_test):
+ from sklearn.preprocessing import MinMaxScaler
+ scaler = MinMaxScaler()
+ X_train_scaled = scaler.fit_transform(X_train)
+ X_test_scaled = scaler.transform(X_test)
+ return X_train_scaled, X_test_scaled
+
+
+def split(num):
+ filepath = "../data/bank-additional-full.csv"
+ df = pd.read_csv(filepath, sep=';')
+ df['y'] = (df['y'] == 'yes').astype(int)
+ data = encode(df)
+ data = shuffle(data)
+ train, test = train_test_split(data, test_size=0.2)
+
+ train.to_csv("data/bank_train_.csv", index=False)
+ test.to_csv("data/bank_test_.csv", index=False)
+
+ train_per_client = len(train) // num
+ test_per_client = len(test) // num
+
+ print("train_per_client:", train_per_client)
+ print("test_per_client:", test_per_client)
+ for i in range(num):
+ sub_train = train[i * train_per_client:(i + 1) * train_per_client]
+ sub_test = test[i * test_per_client:(i + 1) * test_per_client]
+ sub_train.to_csv("data/bank_train_" + str(i) + ".csv", index=False)
+ sub_test.to_csv("data/bank_test_" + str(i) + ".csv", index=False)
+
+
+if __name__ == "__main__":
+ split(int(sys.argv[1]))
+
diff --git a/examples/hfl/src/client.py b/examples/hfl/src/client.py
new file mode 100644
index 0000000000..80ab11f3ad
--- /dev/null
+++ b/examples/hfl/src/client.py
@@ -0,0 +1,374 @@
+#!/usr/bin/env python3
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# modified from https://github.com/apache/singa/blob/master/examples/cnn/train_cnn.py
+# modified from https://github.com/zhengzangw/Fed-SINGA/blob/main/src/client/app.py
+# modified from https://github.com/zhengzangw/Fed-SINGA/blob/main/src/client/main.py
+
+import socket
+
+from .proto import interface_pb2 as proto
+from .proto import utils
+from .proto.utils import parseargs
+
+import time
+import numpy as np
+from PIL import Image
+from singa import device, opt, tensor
+from tqdm import tqdm
+
+from . import bank
+from . import mlp
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+class Client:
+ """Client sends and receives protobuf messages.
+
+ Create and start the server, then use pull and push to communicate with the server.
+
+ Attributes:
+ global_rank (int): The rank in training process.
+ host (str): Host address of the server.
+ port (str): Port of the server.
+ sock (socket.socket): Socket of the client.
+ weights (Dict[Any]): Weights stored locally.
+ """
+
+ def __init__(
+ self,
+ global_rank: int = 0,
+ host: str = "127.0.0.1",
+ port: str = 1234,
+ ) -> None:
+ """Class init method
+
+ Args:
+ global_rank (int, optional): The rank in training process. Defaults to 0.
+ host (str, optional): Host ip address. Defaults to '127.0.0.1'.
+ port (str, optional): Port. Defaults to 1234.
+ """
+ self.host = host
+ self.port = port
+ self.global_rank = global_rank
+
+ self.sock = socket.socket()
+
+ self.weights = {}
+
+ def __start_connection(self) -> None:
+ """Start the network connection to server."""
+ self.sock.connect((self.host, self.port))
+
+ def __start_rank_pairing(self) -> None:
+ """Sending global rank to server"""
+ utils.send_int(self.sock, self.global_rank)
+
+ def start(self) -> None:
+ """Start the client.
+
+ This method will first connect to the server. Then global rank is sent to the server.
+ """
+ self.__start_connection()
+ self.__start_rank_pairing()
+
+ print(f"[Client {self.global_rank}] Connect to {self.host}:{self.port}")
+
+ def close(self) -> None:
+ """Close the server."""
+ self.sock.close()
+
+ def pull(self) -> None:
+ """Client pull weights from server.
+
+ Namely server push weights from clients.
+ """
+ message = proto.WeightsExchange()
+ message = utils.receive_message(self.sock, message)
+ for k, v in message.weights.items():
+ self.weights[k] = utils.deserialize_tensor(v)
+
+ def push(self) -> None:
+ """Client push weights to server.
+
+ Namely server pull weights from clients.
+ """
+ message = proto.WeightsExchange()
+ message.op_type = proto.GATHER
+ for k, v in self.weights.items():
+ message.weights[k] = utils.serialize_tensor(v)
+ utils.send_message(self.sock, message)
+
+
+# Data augmentation
+def augmentation(x, batch_size):
+ xpad = np.pad(x, [[0, 0], [0, 0], [4, 4], [4, 4]], "symmetric")
+ for data_num in range(0, batch_size):
+ offset = np.random.randint(8, size=2)
+ x[data_num, :, :, :] = xpad[
+ data_num, :, offset[0]: offset[0] + x.shape[2], offset[1]: offset[1] + x.shape[2]
+ ]
+ if_flip = np.random.randint(2)
+ if if_flip:
+ x[data_num, :, :, :] = x[data_num, :, :, ::-1]
+ return x
+
+
+# Calculate accuracy
+def accuracy(pred, target):
+ # y is network output to be compared with ground truth (int)
+ y = np.argmax(pred, axis=1)
+ a = y == target
+ correct = np.array(a, "int").sum()
+ return correct
+
+
+# Data partition according to the rank
+def partition(global_rank, world_size, train_x, train_y, val_x, val_y):
+ # Partition training data
+ data_per_rank = train_x.shape[0] // world_size
+ idx_start = global_rank * data_per_rank
+ idx_end = (global_rank + 1) * data_per_rank
+ train_x = train_x[idx_start:idx_end]
+ train_y = train_y[idx_start:idx_end]
+
+ # Partition evaluation data
+ data_per_rank = val_x.shape[0] // world_size
+ idx_start = global_rank * data_per_rank
+ idx_end = (global_rank + 1) * data_per_rank
+ val_x = val_x[idx_start:idx_end]
+ val_y = val_y[idx_start:idx_end]
+ return train_x, train_y, val_x, val_y
+
+
+# Function to all reduce NUMPY accuracy and loss from multiple devices
+def reduce_variable(variable, dist_opt, reducer):
+ reducer.copy_from_numpy(variable)
+ dist_opt.all_reduce(reducer.data)
+ dist_opt.wait()
+ output = tensor.to_numpy(reducer)
+ return output
+
+
+def resize_dataset(x, image_size):
+ num_data = x.shape[0]
+ dim = x.shape[1]
+ X = np.zeros(shape=(num_data, dim, image_size, image_size), dtype=np.float32)
+ for n in range(0, num_data):
+ for d in range(0, dim):
+ X[n, d, :, :] = np.array(
+ Image.fromarray(x[n, d, :, :]).resize((image_size, image_size), Image.BILINEAR),
+ dtype=np.float32,
+ )
+ return X
+
+
+def get_data(data, data_dist="iid", device_id=None):
+ if data == "bank":
+ train_x, train_y, val_x, val_y, num_classes = bank.load(device_id)
+ else:
+ raise NotImplementedError
+ return train_x, train_y, val_x, val_y, num_classes
+
+
+def get_model(model, num_channels=None, num_classes=None, data_size=None):
+ if model == "mlp":
+ model = mlp.create_model(data_size=data_size, num_classes=num_classes)
+ else:
+ raise NotImplementedError
+ return model
+
+
+def run(
+ global_rank,
+ world_size,
+ device_id,
+ max_epoch,
+ batch_size,
+ model,
+ data,
+ data_dist,
+ sgd,
+ graph,
+ verbosity,
+ dist_option="plain",
+ spars=None,
+ precision="float32",
+):
+ # Connect to server
+ client = Client(global_rank=device_id)
+ client.start()
+
+ dev = device.get_default_device()
+ dev.SetRandSeed(0)
+ np.random.seed(0)
+
+ # Prepare dataset
+ train_x, train_y, val_x, val_y, num_classes = get_data(data, data_dist, device_id)
+
+ num_channels = train_x.shape[1]
+ data_size = np.prod(train_x.shape[1: train_x.ndim]).item()
+
+ # Prepare model
+ model = get_model(
+ model, num_channels=num_channels, num_classes=num_classes, data_size=data_size
+ )
+
+ if model.dimension == 4:
+ image_size = train_x.shape[2]
+
+ # For distributed training, sequential has better performance
+ if hasattr(sgd, "communicator"):
+ DIST = True
+ sequential = True
+ else:
+ DIST = False
+ sequential = False
+
+ if DIST:
+ train_x, train_y, val_x, val_y = partition(
+ global_rank, world_size, train_x, train_y, val_x, val_y
+ )
+
+ if model.dimension == 4:
+ tx = tensor.Tensor(
+ (batch_size, num_channels, model.input_size, model.input_size),
+ dev,
+ singa_dtype[precision],
+ )
+ elif model.dimension == 2:
+ tx = tensor.Tensor((batch_size, data_size), dev, singa_dtype[precision])
+ np.reshape(train_x, (train_x.shape[0], -1))
+ np.reshape(val_x, (val_x.shape[0], -1))
+
+ ty = tensor.Tensor((batch_size,), dev, tensor.int32)
+ num_train_batch = train_x.shape[0] // batch_size
+ num_val_batch = val_x.shape[0] // batch_size
+ idx = np.arange(train_x.shape[0], dtype=np.int32)
+
+ # Attach model to graph
+ model.set_optimizer(sgd)
+ model.compile([tx], is_train=True, use_graph=graph, sequential=sequential)
+ dev.SetVerbosity(verbosity)
+
+ # Training and evaluation loop
+ for epoch in range(max_epoch):
+ if epoch > 0:
+ client.pull()
+ model.set_states(client.weights)
+
+ if global_rank == 0:
+ print("Starting Epoch %d:" % (epoch))
+
+ start_time = time.time()
+ np.random.shuffle(idx)
+
+ # Training phase
+ max_inner_epoch = 1
+ for inner_epoch in range(max_inner_epoch):
+ train_correct = np.zeros(shape=[1], dtype=np.float32)
+ train_loss = np.zeros(shape=[1], dtype=np.float32)
+ test_correct = np.zeros(shape=[1], dtype=np.float32)
+
+ model.train()
+ for b in tqdm(range(num_train_batch)):
+ # Generate the patch data in this iteration
+ x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
+ if model.dimension == 4:
+ x = augmentation(x, batch_size)
+ if image_size != model.input_size:
+ x = resize_dataset(x, model.input_size)
+ x = x.astype(np_dtype[precision])
+ y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
+
+ # Copy the patch data into input tensors
+ tx.copy_from_numpy(x)
+ ty.copy_from_numpy(y)
+
+ # Train the model
+ out, loss = model(tx, ty, dist_option, spars)
+ train_correct += accuracy(tensor.to_numpy(out), y)
+ train_loss += tensor.to_numpy(loss)[0]
+
+ if DIST:
+ # Reduce the evaluation accuracy and loss from multiple devices
+ reducer = tensor.Tensor((1,), dev, tensor.float32)
+ train_correct = reduce_variable(train_correct, sgd, reducer)
+ train_loss = reduce_variable(train_loss, sgd, reducer)
+
+ if global_rank == 0:
+ train_acc = train_correct / (num_train_batch * batch_size * world_size)
+ print(
+ "[inner epoch %d] Training loss = %f, training accuracy = %f"
+ % (inner_epoch, train_loss, train_acc),
+ flush=True
+ )
+
+ # Evaluation phase
+ model.eval()
+ for b in range(num_val_batch):
+ x = val_x[b * batch_size:(b + 1) * batch_size]
+ if model.dimension == 4:
+ if (image_size != model.input_size):
+ x = resize_dataset(x, model.input_size)
+ x = x.astype(np_dtype[precision])
+ y = val_y[b * batch_size:(b + 1) * batch_size]
+ tx.copy_from_numpy(x)
+ ty.copy_from_numpy(y)
+ out_test = model(tx)
+ test_correct += accuracy(tensor.to_numpy(out_test), y)
+
+ if DIST:
+ # Reduce the evaluation accuracy from multiple devices
+ test_correct = reduce_variable(test_correct, sgd, reducer)
+
+ # Output the evaluation accuracy
+ if global_rank == 0:
+ print('[inner epoch %d] Evaluation accuracy = %f, Elapsed Time = %fs' %
+ (inner_epoch, test_correct / (num_val_batch * batch_size * world_size),
+ time.time() - start_time),
+ flush=True)
+
+ client.weights = model.get_states()
+ client.push()
+
+ dev.PrintTimeProfiling()
+
+ client.close()
+
+
+if __name__ == "__main__":
+ args = parseargs()
+ sgd = opt.SGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
+ run(
+ 0,
+ 1,
+ args.device_id,
+ args.max_epoch,
+ args.batch_size,
+ args.model,
+ args.data,
+ args.data_dist,
+ sgd,
+ args.graph,
+ args.verbosity,
+ precision=args.precision,
+ )
diff --git a/examples/hfl/src/mlp.py b/examples/hfl/src/mlp.py
new file mode 100644
index 0000000000..294db6c607
--- /dev/null
+++ b/examples/hfl/src/mlp.py
@@ -0,0 +1,134 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import argparse
+
+import numpy as np
+from singa import device, layer, model, opt, tensor
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+
+class MLP(model.Model):
+ def __init__(self, data_size=10, perceptron_size=100, num_classes=10):
+ super(MLP, self).__init__()
+ self.num_classes = num_classes
+ self.dimension = 2
+
+ self.relu = layer.ReLU()
+ self.linear1 = layer.Linear(perceptron_size)
+ self.linear2 = layer.Linear(num_classes)
+ self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+
+ def forward(self, inputs):
+ y = self.linear1(inputs)
+ y = self.relu(y)
+ y = self.linear2(y)
+ return y
+
+ def train_one_batch(self, x, y, dist_option, spars):
+ out = self.forward(x)
+ loss = self.softmax_cross_entropy(out, y)
+
+ if dist_option == "plain":
+ self.optimizer(loss)
+ elif dist_option == "half":
+ self.optimizer.backward_and_update_half(loss)
+ elif dist_option == "partialUpdate":
+ self.optimizer.backward_and_partial_update(loss)
+ elif dist_option == "sparseTopK":
+ self.optimizer.backward_and_sparse_update(loss, topK=True, spars=spars)
+ elif dist_option == "sparseThreshold":
+ self.optimizer.backward_and_sparse_update(loss, topK=False, spars=spars)
+ return out, loss
+
+ def set_optimizer(self, optimizer):
+ self.optimizer = optimizer
+
+
+def create_model(pretrained=False, **kwargs):
+ """Constructs a CNN model.
+ Args:
+ pretrained (bool): If True, returns a pre-trained model.
+
+ Returns:
+ The created CNN model.
+ """
+ model = MLP(**kwargs)
+
+ return model
+
+
+__all__ = ["MLP", "create_model"]
+
+if __name__ == "__main__":
+ np.random.seed(0)
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-p", choices=["float32", "float16"], default="float32", dest="precision")
+ parser.add_argument(
+ "-g",
+ "--disable-graph",
+ default="True",
+ action="store_false",
+ help="disable graph",
+ dest="graph",
+ )
+ parser.add_argument(
+ "-m", "--max-epoch", default=1001, type=int, help="maximum epochs", dest="max_epoch"
+ )
+ args = parser.parse_args()
+
+ # generate the boundary
+ f = lambda x: (5 * x + 1)
+ bd_x = np.linspace(-1.0, 1, 200)
+ bd_y = f(bd_x)
+
+ # generate the training data
+ x = np.random.uniform(-1, 1, 400)
+ y = f(x) + 2 * np.random.randn(len(x))
+
+ # choose one precision
+ precision = singa_dtype[args.precision]
+ np_precision = np_dtype[args.precision]
+
+ # convert training data to 2d space
+ label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)]).astype(np.int32)
+ data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np_precision)
+
+ dev = device.create_cuda_gpu_on(0)
+ sgd = opt.SGD(0.1, 0.9, 1e-5, dtype=singa_dtype[args.precision])
+ tx = tensor.Tensor((400, 2), dev, precision)
+ ty = tensor.Tensor((400,), dev, tensor.int32)
+ model = MLP(data_size=2, perceptron_size=3, num_classes=2)
+
+ # attach model to graph
+ model.set_optimizer(sgd)
+ model.compile([tx], is_train=True, use_graph=args.graph, sequential=True)
+ model.train()
+
+ for i in range(args.max_epoch):
+ tx.copy_from_numpy(data)
+ ty.copy_from_numpy(label)
+ out, loss = model(tx, ty, "fp32", spars=None)
+
+ if i % 100 == 0:
+ print("training loss = ", tensor.to_numpy(loss)[0])
diff --git a/examples/hfl/src/proto/__init__.py b/examples/hfl/src/proto/__init__.py
new file mode 100644
index 0000000000..d8a500d9d8
--- /dev/null
+++ b/examples/hfl/src/proto/__init__.py
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
diff --git a/examples/hfl/src/proto/interface.proto b/examples/hfl/src/proto/interface.proto
new file mode 100644
index 0000000000..dde5093e98
--- /dev/null
+++ b/examples/hfl/src/proto/interface.proto
@@ -0,0 +1,13 @@
+syntax = "proto3";
+package interface;
+
+enum Op {
+ DEFAULT = 0;
+ SCATTER = 1;
+ GATHER = 2;
+}
+
+message WeightsExchange {
+ Op op_type = 1;
+ map weights = 2;
+}
diff --git a/examples/hfl/src/proto/interface_pb2.py b/examples/hfl/src/proto/interface_pb2.py
new file mode 100644
index 0000000000..08e5b1cd15
--- /dev/null
+++ b/examples/hfl/src/proto/interface_pb2.py
@@ -0,0 +1,217 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import sys
+
+_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import enum_type_wrapper
+
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+ name="src/proto/interface.proto",
+ package="interface",
+ syntax="proto3",
+ serialized_options=None,
+ serialized_pb=_b(
+ "\n\x19src/proto/interface.proto\x12\tinterface\"\x9b\x01\n\x0fWeightsExchange\x12\x1e\n\x07op_type\x18\x01 \x01(\x0e\x32\r.interface.Op\x12\x38\n\x07weights\x18\x02 \x03(\x0b\x32'.interface.WeightsExchange.WeightsEntry\x1a.\n\x0cWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01**\n\x02Op\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\x0b\n\x07SCATTER\x10\x01\x12\n\n\x06GATHER\x10\x02\x62\x06proto3"
+ ),
+)
+
+_OP = _descriptor.EnumDescriptor(
+ name="Op",
+ full_name="interface.Op",
+ filename=None,
+ file=DESCRIPTOR,
+ values=[
+ _descriptor.EnumValueDescriptor(
+ name="DEFAULT", index=0, number=0, serialized_options=None, type=None
+ ),
+ _descriptor.EnumValueDescriptor(
+ name="SCATTER", index=1, number=1, serialized_options=None, type=None
+ ),
+ _descriptor.EnumValueDescriptor(
+ name="GATHER", index=2, number=2, serialized_options=None, type=None
+ ),
+ ],
+ containing_type=None,
+ serialized_options=None,
+ serialized_start=198,
+ serialized_end=240,
+)
+_sym_db.RegisterEnumDescriptor(_OP)
+
+Op = enum_type_wrapper.EnumTypeWrapper(_OP)
+DEFAULT = 0
+SCATTER = 1
+GATHER = 2
+
+
+_WEIGHTSEXCHANGE_WEIGHTSENTRY = _descriptor.Descriptor(
+ name="WeightsEntry",
+ full_name="interface.WeightsExchange.WeightsEntry",
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name="key",
+ full_name="interface.WeightsExchange.WeightsEntry.key",
+ index=0,
+ number=1,
+ type=9,
+ cpp_type=9,
+ label=1,
+ has_default_value=False,
+ default_value=_b("").decode("utf-8"),
+ message_type=None,
+ enum_type=None,
+ containing_type=None,
+ is_extension=False,
+ extension_scope=None,
+ serialized_options=None,
+ file=DESCRIPTOR,
+ ),
+ _descriptor.FieldDescriptor(
+ name="value",
+ full_name="interface.WeightsExchange.WeightsEntry.value",
+ index=1,
+ number=2,
+ type=12,
+ cpp_type=9,
+ label=1,
+ has_default_value=False,
+ default_value=_b(""),
+ message_type=None,
+ enum_type=None,
+ containing_type=None,
+ is_extension=False,
+ extension_scope=None,
+ serialized_options=None,
+ file=DESCRIPTOR,
+ ),
+ ],
+ extensions=[],
+ nested_types=[],
+ enum_types=[],
+ serialized_options=_b("8\001"),
+ is_extendable=False,
+ syntax="proto3",
+ extension_ranges=[],
+ oneofs=[],
+ serialized_start=150,
+ serialized_end=196,
+)
+
+_WEIGHTSEXCHANGE = _descriptor.Descriptor(
+ name="WeightsExchange",
+ full_name="interface.WeightsExchange",
+ filename=None,
+ file=DESCRIPTOR,
+ containing_type=None,
+ fields=[
+ _descriptor.FieldDescriptor(
+ name="op_type",
+ full_name="interface.WeightsExchange.op_type",
+ index=0,
+ number=1,
+ type=14,
+ cpp_type=8,
+ label=1,
+ has_default_value=False,
+ default_value=0,
+ message_type=None,
+ enum_type=None,
+ containing_type=None,
+ is_extension=False,
+ extension_scope=None,
+ serialized_options=None,
+ file=DESCRIPTOR,
+ ),
+ _descriptor.FieldDescriptor(
+ name="weights",
+ full_name="interface.WeightsExchange.weights",
+ index=1,
+ number=2,
+ type=11,
+ cpp_type=10,
+ label=3,
+ has_default_value=False,
+ default_value=[],
+ message_type=None,
+ enum_type=None,
+ containing_type=None,
+ is_extension=False,
+ extension_scope=None,
+ serialized_options=None,
+ file=DESCRIPTOR,
+ ),
+ ],
+ extensions=[],
+ nested_types=[
+ _WEIGHTSEXCHANGE_WEIGHTSENTRY,
+ ],
+ enum_types=[],
+ serialized_options=None,
+ is_extendable=False,
+ syntax="proto3",
+ extension_ranges=[],
+ oneofs=[],
+ serialized_start=41,
+ serialized_end=196,
+)
+
+_WEIGHTSEXCHANGE_WEIGHTSENTRY.containing_type = _WEIGHTSEXCHANGE
+_WEIGHTSEXCHANGE.fields_by_name["op_type"].enum_type = _OP
+_WEIGHTSEXCHANGE.fields_by_name["weights"].message_type = _WEIGHTSEXCHANGE_WEIGHTSENTRY
+DESCRIPTOR.message_types_by_name["WeightsExchange"] = _WEIGHTSEXCHANGE
+DESCRIPTOR.enum_types_by_name["Op"] = _OP
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+WeightsExchange = _reflection.GeneratedProtocolMessageType(
+ "WeightsExchange",
+ (_message.Message,),
+ {
+ "WeightsEntry": _reflection.GeneratedProtocolMessageType(
+ "WeightsEntry",
+ (_message.Message,),
+ {
+ "DESCRIPTOR": _WEIGHTSEXCHANGE_WEIGHTSENTRY,
+ "__module__": "src.proto.interface_pb2"
+ # @@protoc_insertion_point(class_scope:interface.WeightsExchange.WeightsEntry)
+ },
+ ),
+ "DESCRIPTOR": _WEIGHTSEXCHANGE,
+ "__module__": "src.proto.interface_pb2"
+ # @@protoc_insertion_point(class_scope:interface.WeightsExchange)
+ },
+)
+_sym_db.RegisterMessage(WeightsExchange)
+_sym_db.RegisterMessage(WeightsExchange.WeightsEntry)
+
+
+_WEIGHTSEXCHANGE_WEIGHTSENTRY._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/examples/hfl/src/proto/utils.py b/examples/hfl/src/proto/utils.py
new file mode 100644
index 0000000000..12f044ab50
--- /dev/null
+++ b/examples/hfl/src/proto/utils.py
@@ -0,0 +1,182 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import argparse
+import pickle
+import socket
+import struct
+
+from google.protobuf.message import Message
+from singa import tensor
+
+
+def receive_all(conn: socket.socket, size: int) -> bytes:
+ """Receive a given length of bytes from socket.
+
+ Args:
+ conn (socket.socket): Socket connection.
+ size (int): Length of bytes to receive.
+
+ Raises:
+ RuntimeError: If connection closed before chunk was read, it will raise an error.
+
+ Returns:
+ bytes: Received bytes.
+ """
+ buffer = b""
+ while size > 0:
+ chunk = conn.recv(size)
+ if not chunk:
+ raise RuntimeError("connection closed before chunk was read")
+ buffer += chunk
+ size -= len(chunk)
+ return buffer
+
+
+def send_int(conn: socket.socket, i: int, pack_format: str = "Q") -> None:
+ """Send an integer from socket.
+
+ Args:
+ conn (socket.socket): Socket connection.
+ i (int): Integer to send.
+ pack_format (str, optional): Pack format. Defaults to "Q", which means unsigned long long.
+ """
+ data = struct.pack(f"!{pack_format}", i)
+ conn.sendall(data)
+
+
+def receive_int(conn: socket.socket, pack_format: str = "Q") -> int:
+ """Receive an integer from socket.
+
+ Args:
+ conn (socket.socket): Socket connection.
+ pack_format (str, optional): Pack format. Defaults to "Q", which means unsigned long long.
+
+ Returns:
+ int: Received integer.
+ """
+ buffer_size = struct.Struct(pack_format).size
+ data = receive_all(conn, buffer_size)
+ (data,) = struct.unpack(f"!{pack_format}", data)
+ return data
+
+
+def send_message(conn: socket.socket, data: Message, pack_format: str = "Q") -> None:
+ """Send protobuf message from socket. First the length of protobuf message will be sent. Then the message is sent.
+
+ Args:
+ conn (socket.socket): Socket connection.
+ data (Message): Protobuf message to send.
+ pack_format (str, optional): Length of protobuf message pack format. Defaults to "Q", which means unsigned long long.
+ """
+ send_int(conn, data.ByteSize(), pack_format)
+ conn.sendall(data.SerializePartialToString())
+
+
+def receive_message(conn: socket.socket, data: Message, pack_format: str = "Q") -> Message:
+ """Receive protobuf message from socket
+
+ Args:
+ conn (socket.socket): Socket connection.
+ data (Message): Placehold for protobuf message.
+ pack_format (str, optional): Length of protobuf message pack format. Defaults to "Q", which means unsigned long long.
+
+ Returns:
+ Message: The protobuf message.
+ """
+ data_len = receive_int(conn, pack_format)
+ data.ParseFromString(receive_all(conn, data_len))
+ return data
+
+
+def serialize_tensor(t: tensor.Tensor) -> bytes:
+ """Serialize a singa tensor to bytes.
+
+ Args:
+ t (tensor.Tensor): The singa tensor.
+
+ Returns:
+ bytes: The serialized tensor.
+ """
+ return pickle.dumps(tensor.to_numpy(t), protocol=0)
+
+
+def deserialize_tensor(t: bytes) -> tensor.Tensor:
+ """Recover singa tensor from bytes.
+
+ Args:
+ t (bytes): The serialized tensor.
+
+ Returns:
+ tensor.Tensor: The singa tensor.
+ """
+ return tensor.from_numpy(pickle.loads(t))
+
+
+def parseargs(arg=None) -> argparse.Namespace:
+ """Parse command line arguments
+
+ Returns:
+ argparse.Namespace: parsed arguments
+ """
+
+ parser = argparse.ArgumentParser(description="Training using the autograd and graph.")
+ parser.add_argument(
+ "--model", choices=["cnn", "resnet", "xceptionnet", "mlp", "alexnet"], default="mlp"
+ )
+ parser.add_argument("--data", choices=["mnist", "cifar10", "cifar100", "bank"], default="mnist")
+ parser.add_argument("-p", choices=["float32", "float16"], default="float32", dest="precision")
+ parser.add_argument(
+ "-m", "--max-epoch", default=10, type=int, help="maximum epochs", dest="max_epoch"
+ )
+ parser.add_argument(
+ "-b", "--batch-size", default=64, type=int, help="batch size", dest="batch_size"
+ )
+ parser.add_argument(
+ "-l", "--learning-rate", default=0.005, type=float, help="initial learning rate", dest="lr"
+ )
+ # Determine which gpu to use
+ parser.add_argument(
+ "-i", "--device-id", default=0, type=int, help="which GPU to use", dest="device_id"
+ )
+ parser.add_argument(
+ "-g",
+ "--disable-graph",
+ default="True",
+ action="store_false",
+ help="disable graph",
+ dest="graph",
+ )
+ parser.add_argument(
+ "-v", "--log-verbosity", default=0, type=int, help="logging verbosity", dest="verbosity"
+ )
+ parser.add_argument(
+ "-d",
+ "--data-distribution",
+ choices=["iid", "non-iid"],
+ default="iid",
+ help="data distribution",
+ dest="data_dist",
+ )
+ parser.add_argument("--num_clients", default=10, type=int)
+ parser.add_argument("--host", default="127.0.0.1")
+ parser.add_argument("--port", default=1234)
+
+ args = parser.parse_args(arg)
+ return args
diff --git a/examples/hfl/src/server.py b/examples/hfl/src/server.py
new file mode 100644
index 0000000000..7450cc1cfc
--- /dev/null
+++ b/examples/hfl/src/server.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python3
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# modified from https://github.com/zhengzangw/Fed-SINGA/blob/main/src/server/app.py
+
+import socket
+from collections import defaultdict
+from typing import Dict, List
+
+from singa import tensor
+
+from .proto import interface_pb2 as proto
+from .proto.utils import parseargs
+from .proto import utils
+
+
+class Server:
+ """Server sends and receives protobuf messages.
+
+ Create and start the server, then use pull and push to communicate with clients.
+
+ Attributes:
+ num_clients (int): Number of clients.
+ host (str): Host address of the server.
+ port (str): Port of the server.
+ sock (socket.socket): Socket of the server.
+ conns (List[socket.socket]): List of num_clients sockets.
+ addrs (List[str]): List of socket address.
+ weights (Dict[Any]): Weights stored on server.
+ """
+
+ def __init__(
+ self,
+ num_clients=1,
+ host: str = "127.0.0.1",
+ port: str = 1234,
+ ) -> None:
+ """Class init method
+
+ Args:
+ num_clients (int, optional): Number of clients in training.
+ host (str, optional): Host ip address. Defaults to '127.0.0.1'.
+ port (str, optional): Port. Defaults to 1234.
+ """
+ self.num_clients = num_clients
+ self.host = host
+ self.port = port
+
+ self.sock = socket.socket()
+ self.conns = [None] * num_clients
+ self.addrs = [None] * num_clients
+
+ self.weights = {}
+
+ def __start_connection(self) -> None:
+ """Start the network connection of server."""
+ self.sock.bind((self.host, self.port))
+ self.sock.listen()
+ print("Server started.")
+
+ def __start_rank_pairing(self) -> None:
+ """Start pair each client to a global rank"""
+ for _ in range(self.num_clients):
+ conn, addr = self.sock.accept()
+ rank = utils.receive_int(conn)
+ self.conns[rank] = conn
+ self.addrs[rank] = addr
+ print(f"[Server] Connected by {addr} [global_rank {rank}]")
+
+ assert None not in self.conns
+
+ def start(self) -> None:
+ """Start the server.
+
+ This method will first bind and listen on the designated host and port.
+ Then it will connect to num_clients clients and maintain the socket.
+ In this process, each client shall provide their rank number.
+ """
+ self.__start_connection()
+ self.__start_rank_pairing()
+
+ def close(self) -> None:
+ """Close the server."""
+ self.sock.close()
+
+ def aggregate(self, weights: Dict[str, List[tensor.Tensor]]) -> Dict[str, tensor.Tensor]:
+ """Aggregate collected weights to update server weight.
+
+ Args:
+ weights (Dict[str, List[tensor.Tensor]]): The collected weights.
+
+ Returns:
+ Dict[str, tensor.Tensor]: Updated weight stored in server.
+ """
+ for k, v in weights.items():
+ self.weights[k] = sum(v) / self.num_clients
+ return self.weights
+
+ def pull(self) -> None:
+ """Server pull weights from clients.
+
+ Namely clients push weights to the server. It is the gather process.
+ """
+ # open space to collect weights from clients
+ datas = [proto.WeightsExchange() for _ in range(self.num_clients)]
+ weights = defaultdict(list)
+ # receive weights sequentially
+ for i in range(self.num_clients):
+ datas[i] = utils.receive_message(self.conns[i], datas[i])
+ for k, v in datas[i].weights.items():
+ weights[k].append(utils.deserialize_tensor(v))
+ # aggregation
+ self.aggregate(weights)
+
+ def push(self) -> None:
+ """Server push weights to clients.
+
+ Namely clients pull weights from server. It is the scatter process.
+ """
+ message = proto.WeightsExchange()
+ message.op_type = proto.SCATTER
+ for k, v in self.weights.items():
+ message.weights[k] = utils.serialize_tensor(v)
+
+ for conn in self.conns:
+ utils.send_message(conn, message)
+
+
+if __name__ == "__main__":
+ args = parseargs()
+
+ server = Server(num_clients=args.num_clients, host=args.host, port=args.port)
+ server.start()
+
+ for i in range(args.max_epoch):
+ print(f"On epoch {i}:")
+ if i > 0:
+ # Push to Clients
+ server.push()
+
+ # Collects from Clients
+ server.pull()
+
+ server.close()
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/README.md b/examples/model_selection/TRAILS-Database-Native-Model-Selection/README.md
deleted file mode 100644
index 5455000101..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/README.md
+++ /dev/null
@@ -1,174 +0,0 @@
-
-
-# Database-Native Model Selection
-
- -- based on SINGA
-
-
-
-data:image/s3,"s3://crabby-images/046f9/046f9ad2fa29328289ef38fe1e200f633e385232" alt="image-20231020174425377"
-
-## Build Docker Image
-
-```bash
-git clone https://github.com/apache/singa.git
-cd singa/examples/model_selection/TRAILS-Database-Native-Model-Selection/
-docker build -t trails-singa .
-```
-
-Inside the docker image, the PostgreSQL and its extensions are installed according to https://github.com/pgcentralfoundation/pgrx
-
-
-## Run Docker Image
-Download exp_data.zip from https://www.dropbox.com/scl/fi/xz4teosklwmfc5j4x2ug6/exp_data.zip?rlkey=5fk2ttib0zt49suyppcjhsrn2&dl=0
-and unzip the exp_data/ folder to a specific directory (path_to_exp_data_folder)
-```bash
-docker run -d --name trails-singa \
- --network="host" \
- -v path_to_exp_data_folder:/project/exp_data \
- trails-singa
-```
-
-## Start PostgreSQL Instance
-
-```bash
-# 1. Run docker container
-docker exec -it trails-singa bash
-# 2. Clone the code
-cd ~
-git clone https://github.com/apache/singa.git
-cd singa/
-git fetch origin singa-ms:singa-ms
-git checkout singa-ms
-cd examples/model_selection/TRAILS-Database-Native-Model-Selection/
-# 3. Export PYTHONPATH
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-# 4. Start the RDBMS and then exit
-cd internal/pg_extension
-cargo pgrx run
-exit
-cd ../..
-# 5. Load data into RDBMS
-bash internal/ml/model_selection/scripts/database/load_data_to_db.sh /project/exp_data/data/structure_data/frappe frappe
-# 6. Run database server
-cd internal/pg_extension
-cargo pgrx run
-
-```
-
-
-## Register Stored Procedure
-
-```sql
-CREATE OR REPLACE
-PROCEDURE model_selection_sp(
- dataset TEXT, --dataset name
- selected_columns TEXT[], --used columns
- N INTEGER, --number of models to evaluate
- batch_size INTEGER, --batch size, for profiling, filtering
- config_file TEXT --config file path
-)
-LANGUAGE plpgsql
-AS $$
-DECLARE
- -- global inputs/outputs
- result_status TEXT;
- column_list TEXT;
-BEGIN
- -- combine the columns into a string
- column_list := array_to_string(selected_columns, ', ');
-
- -- 4. Run filtering phase to get top K models.
- EXECUTE format('
- WITH batch_rows AS (
- SELECT %s
- FROM %I
- ORDER BY RANDOM()
- LIMIT %s OFFSET 0
- )
- SELECT filtering_phase(
- json_agg(row_to_json(t))::text, %s, %s, %L
- )
- FROM batch_rows AS t', column_list, dataset, batch_size, N, 1, config_file) INTO result_status;
- RAISE NOTICE '4. run filtering phase, k models = %', result_status;
-
-END; $$;
-```
-
-# Compile the UDF
-
-```bash
-# Try compile the UDF
-DROP EXTENSION IF EXISTS pg_extension;
-CREATE EXTENSION pg_extension;
-```
-
-If the above fails, open another terminal and go into the docker via docker exec -it trails-singa bash
-
-Then run the following
-```bash
-rm /home/postgres/.pgrx/14.9/pgrx-install/share/extension/pg_extension--0.1.0.sql
-vi /home/postgres/.pgrx/14.9/pgrx-install/share/extension/pg_extension--0.1.0.sql
-# Copy the following to the /home/postgres/.pgrx/14.9/pgrx-install/share/extension/pg_extension--0.1.0.sql
--- src/lib.rs:66
--- pg_extension::filtering_phase
-CREATE FUNCTION "filtering_phase"(
- "mini_batch" TEXT, /* alloc::string::String */
- "n" INT, /* i32 */
- "k" INT, /* i32 */
- "config_file" TEXT /* alloc::string::String */
-) RETURNS TEXT /* alloc::string::String */
- IMMUTABLE STRICT PARALLEL SAFE
-LANGUAGE c /* Rust */
-AS 'MODULE_PATHNAME', 'filtering_phase_wrapper';
-```
-
-Go back to the first terminal and run the following in the database server again
-```bash
-# Try compile the UDF
-DROP EXTENSION IF EXISTS pg_extension;
-CREATE EXTENSION pg_extension;
-```
-
-## Run Model Selection
-
-```sql
--- Template for calling 'model_selection_sp' stored procedure
-CALL model_selection_sp(
- , -- The name of the table or dataset from which data should be retrieved.
- , -- An array of column names to be considered in the model selection process.
- , -- Number of models to explore
- , -- Batch size
- -- The file path to a configuration file needed for the process.
-);
-
-
-# For example
-CALL model_selection_sp(
- 'frappe_train',
- ARRAY['col1', 'col2', 'col3', 'label'],
- 10,
- 32,
- '/home/postgres/singa/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/config.ini');
-```
-
-# Example Result
-
-data:image/s3,"s3://crabby-images/1adb7/1adb7e14f59843cdde3766ebc8516487e1c02a93" alt="image-20231020174945226"
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/dev_guide.md b/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/dev_guide.md
deleted file mode 100644
index 3b0927a837..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/dev_guide.md
+++ /dev/null
@@ -1,251 +0,0 @@
-
-
-# Change the permission
-
-```bash
-chmod -R 777 internal/pg_extension
-chmod -R 777 TRAILS
-```
-
-# PSQL CMD
-
-```sql
-psql -h localhost -p 28814 -U postgres
-\c frappe
-\dt
-\d frappe_train
-DROP TABLE frappe_train;
-SELECT * FROM frappe_train LIMIT 10;
-SELECT * FROM frappe_test LIMIT 10;
-SELECT * FROM frappe_valid LIMIT 10;
-DROP DATABASE frappe;
-psql -U postgres
-```
-
-# Build and run the container
-
-```bash
-docker build -t trails .
-
-docker run -d --name trails \
- --network="host" \
- -v $(pwd)/TRAILS:/project/TRAILS \
- -v /hdd1/xingnaili/exp_data/:/project/exp_data \
- trails
-
-docker exec -it trails bash
-```
-
-# This is in docker image already
-
-```bash
-# if those are already on docker, skip them.
-cargo install --locked cargo-pgrx
-# run after package update
-cargo pgrx init
-cargo pgrx new my_extension
-# just run this after code updates.
-cargo pgrx run
-```
-
-# Develop
-
-## Load data into database.
-
-```bash
-bash /project/TRAILS/internal/ml/model_selection/scripts/database/load_data_to_db.sh /project/exp_data/data/structure_data/frappe frappe
-bash /project/TRAILS/internal/ml/model_selection/scripts/database/load_data_to_db.sh /project/exp_data/data/structure_data/uci_diabetes uci_diabetes
-bash /project/TRAILS/internal/ml/model_selection/scripts/database/load_data_to_db.sh /project/exp_data/data/structure_data/criteo_full criteo
-```
-
-## 1. Compile
-
-In shell
-
-```bash
-cd ./internal/pg_extension/
-cargo clean
-rm -r /home/postgres/.pgrx/14.9/pgrx-install/lib/pg_extension.so
-cargo pgrx run
-rm /home/postgres/.pgrx/14.9/pgrx-install/share/extension/pg_extension--0.1.0.sql
-vi /home/postgres/.pgrx/14.9/pgrx-install/share/extension/pg_extension--0.1.0.sql
-paste the latest sqls
-# generate schema
-cargo pgrx schema >> /home/postgres/.pgrx/14.9/pgrx-install/share/extension/pg_extension--0.1.0.sql
-```
-
-In SQL
-
-```sql
-DROP EXTENSION IF EXISTS pg_extension;
-CREATE EXTENSION pg_extension;
-```
-
-## 2. Edit the config file
-
-Update the `nfield` in the `config.ini` file, it is == number of columns used. E.g, `ARRAY['col1', 'col2', 'col3', 'label']` => `nfield` = 3
-
-## 3. Run it
-
-```sql
-CREATE EXTENSION pg_extension;
-
-# Test if the UDF is there or not
-SELECT * FROM pg_proc WHERE proname = 'model_selection_workloads';
-
-# micro
-select benchmark_filtering_phase_latency(4, '/project/TRAILS/internal/ml/model_selection/config.ini');
-
-select benchmark_filtering_latency_in_db(5000, 'frappe', '/project/TRAILS/internal/ml/model_selection/config.ini');
-
-select benchmark_filtering_latency_in_db(5000, 'uci_diabetes', '/project/TRAILS/internal/ml/model_selection/config.ini');
-
-select benchmark_filtering_latency_in_db(4, 'criteo', '/project/TRAILS/internal/ml/model_selection/config.ini');
-
-# Test coordinator
-SELECT coordinator('0.08244', '168.830156', '800', false, '/project/TRAILS/internal/ml/model_selection/config.ini');
-
-# this is database name, columns used, time budget, batch size, and config file
-CALL model_selection_sp('dummy', ARRAY['col1', 'col2', 'col3', 'label'], '30', 32, '/project/TRAILS/internal/ml/model_selection/config.ini');
-
-# end2end model selection
-CALL model_selection_end2end('dummy', ARRAY['col1', 'col2', 'col3', 'label'], '15', '/project/TRAILS/internal/ml/model_selection/config.ini');
-
-# filtering & refinement with workloads
-CALL model_selection_workloads('dummy', ARRAY['col1', 'col2', 'col3', 'label'], 300, 3, '/project/TRAILS/internal/ml/model_selection/config.ini');
-
-response = requests.post(args.refinement_url, json=data).json()
-
-```
-
-# Test the pg-extension works using pipython
-
-```sql
-# switch to a postgres
-su postgres
-
-CREATE EXTENSION plpython3u;
-
-CREATE FUNCTION py_version() RETURNS text AS $$
-import sys
-return sys.version
-$$ LANGUAGE plpython3u;
-
-SELECT py_version();
-
-CREATE OR REPLACE FUNCTION test_numpy()
- RETURNS text
-LANGUAGE plpython3u
-AS $$
-import numpy
-import torch
-import sklearn
-import torchvision
-import tqdm
-print("asdf")
-return str(numpy.__version__) + " torch: " + str(torch.__version__)
-$$;
-
-SELECT test_numpy();
-
-CREATE EXTENSION my_extension;
-SELECT hello_my_extension();
-```
-
-# Container log
-
-Each line in your output represents a different process that is currently running on your PostgreSQL server. Here's what each one is doing:
-
-1. `/bin/sh -c service postgresql start && tail -F /var/log/postgresql/postgresq` : This is the command that was used to start your PostgreSQL server. It also includes a command to continuously display new entries from the PostgreSQL log file.
-
-
-2. `/usr/lib/postgresql/14/bin/postgres -D /var/lib/postgresql/14/main -c config` : This is the main PostgreSQL process. All other PostgreSQL processes are children of this process.
-
-
-3. `postgres: 14/main: checkpointer` : The checkpointer process is responsible for making sure data changes get saved to disk regularly. This is important for database recovery in case of a crash.
-
-
-4. `postgres: 14/main: background writer` : The background writer process is responsible for writing buffers to disk when they become dirty. This reduces the amount of work that needs to be done when a buffer is reused.
-
-
-5. `postgres: 14/main: walwriter` : The walwriter process writes transaction logs (Write-Ahead Logs or WAL) to disk. This is also important for database recovery and replication.
-
-
-6. `postgres: 14/main: autovacuum launcher` : The autovacuum launcher process starts autovacuum worker processes as needed. These processes automatically clean up and optimize the database.
-
-
-7. `postgres: 14/main: stats collector` : The stats collector process collects statistics about the server's activity. This information can be viewed using the `pg_stat` family of system views.
-
-
-8. `postgres: 14/main: logical replication launcher` : The logical replication launcher manages the worker processes that perform logical replication, copying data changes to other databases.
-
-
-9. `tail -F /var/log/postgresql/postgresql-14-main.log` : This process is displaying the end of the PostgreSQL log file and updating as more entries are added.
-
-
-10. `bash` : These are shell sessions, likely interactive ones you've started.
-
-
-11. `/usr/lib/postgresql/14/bin/psql -h localhost -p 28814 pg_extension` : These are instances of the psql command line interface, connected to your database.
-
-
-12. `postgres: postgres pg_extension 127.0.0.1(52236) CALL` : This is your currently running stored procedure.
-
-
-13. `ps aux` : This is the command you ran to display the list of processes.
-
-Each process is part of the PostgreSQL database system and helps it to run efficiently and robustly.
-
-# MAC locally
-
-```bash
-conda activate firmest38
-export PYTHON_SYS_EXECUTABLE=/Users/kevin/opt/anaconda3/envs/firmest38/bin/python
-export DYLD_LIBRARY_PATH=/Users/kevin/opt/anaconda3/envs/firmest38/lib/:$DYLD_LIBRARY_PATH
-cargo run --features python
-```
-
-# What cargo run do?
-
-Before:
-
-```
-postgres 1 0.1 0.0 2612 588 ? Ss 14:30 0:00 /bin/sh -c service postgresql start && tail -F /var/log/postgresql/postgresql-14-main.log
-postgres 20 0.1 0.0 214688 29332 ? Ss 14:30 0:00 /usr/lib/postgresql/14/bin/postgres -D /var/lib/postgresql/14/main -c config_file=/etc/postgresql/14/main/postgresql.conf
-postgres 22 0.0 0.0 214688 6120 ? Ss 14:30 0:00 postgres: 14/main: checkpointer
-postgres 23 0.0 0.0 214688 6084 ? Ss 14:30 0:00 postgres: 14/main: background writer
-postgres 24 0.0 0.0 214688 10352 ? Ss 14:30 0:00 postgres: 14/main: walwriter
-postgres 25 0.0 0.0 215224 8864 ? Ss 14:30 0:00 postgres: 14/main: autovacuum launcher
-postgres 26 0.0 0.0 69280 5184 ? Ss 14:30 0:00 postgres: 14/main: stats collector
-postgres 27 0.0 0.0 215236 6972 ? Ss 14:30 0:00 postgres: 14/main: logical replication launcher
-postgres 38 0.0 0.0 2548 512 ? S 14:30 0:00 tail -F /var/log/postgresql/postgresql-14-main.log
-postgres 39 0.1 0.0 4112 3424 pts/0 Ss+ 14:30 0:00 bash
-postgres 48 0.1 0.0 4112 3424 pts/1 Ss 14:30 0:00 bash
-postgres 59 0.0 0.0 5896 2860 pts/1 R+ 14:30 0:00 ps aux
-```
-
-After:
-
-
-
-
-
-
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174425377.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174425377.png
deleted file mode 100644
index 9e73b270d9..0000000000
Binary files a/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174425377.png and /dev/null differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174945226.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174945226.png
deleted file mode 100644
index d7b686d2ba..0000000000
Binary files a/examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174945226.png and /dev/null differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/README.md b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/README.md
deleted file mode 100644
index 3025139f17..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/README.md
+++ /dev/null
@@ -1,290 +0,0 @@
-
-
-# TRAILS: A Database Native Model Selection System
-
-data:image/s3,"s3://crabby-images/e48e6/e48e62e22dbcf1e3e8baa7e8c7cb35f7c7a0b23d" alt="image-20230702035806963"
-
-[TOC]
-
-# Config Environments
-
-```bash
-# Create virtual env
-conda config --set ssl_verify false
-conda create -n "trails" python=3.8.10
-conda activate trails
-pip install -r requirement.txt
-
-cd TRAILS
-
-# make a dir to store all results.
-mkdir ../exp_data
-```
-
-# Reproduce the results
-
-## NAS-Bench-Tabular
-
- NAS-Bench-Tabular can be either **download** or build from scratch.
-
-### Download NAS-Bench-Tabular
-
-1. **Download** the dataset using the following link, and extract them to `exp_data`
-
-```bash
-https://drive.google.com/file/d/1TGii9ymbmX81c9-GKWXbe_4Z64R8Btz1/view?usp=sharing
-```
-
-### Build NAS-Bench-Tabular
-
-2. Build the **NAS-Bench-Tabular** from scratch
-
-```python
-# Construct NAS-Bench-Tabular:
-## 1. Training all models.
-bash internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_frappe.sh
-bash internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_diabetes.sh
-bash internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo.sh
-
-## 2. Scoring all models using all TFMEMs.
-bash internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_frappe.sh
-bash internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_uci.sh
-bash internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_criteo.sh
-```
-
-3. Build the **NAS-Bench-Img** from scratch
-
- To facilitate the experiments and query speed (NASBENCH API is slow)
-
- 1. We retrieve all results from NASBENCH API and store them as a json file.
- 2. We score all models in NB201 and 28K models in NB101.
- 3. We search with EA + Score and record the searching process in terms of
- `run_id, current_explored_model, top_400 highest scored model, time_usage`
- to SQLLite.
-
-```python
-# 1. Record NASBENCH API data into json file
-## This requires to install nats_bench: pip install nats_bench
-bash ./internal/ml/model_selection/scripts/nas-bench-img/convert_api_2_json.sh
-
-# 2. Scoring all models using all TFMEMs.
-nohup bash ./internal/ml/model_selection/scripts/nas-bench-img/score_all_models.sh &
-
-# 3. Explore with EA ans score result and store exploring process into SQLLite
-bash ./internal/ml/model_selection/scripts/nas-bench-img/explore_all_models.sh
-
-# 4. Generate the baseline.
-bash ./internal/ml/model_selection/scripts/baseline_system_img.sh
-```
-
-The following experiment could then query filtering phase results based on `run_id`.
-
-## SLO-Aware 2Phase-MS
-
-With the above **NAS-Bench-Tabular**, we could run various experiments.
-
-```bash
-# 1. Generate the results for drawing the figure
-## tabular data: training-base-ms
-bash internal/ml/model_selection/scripts/baseline_system_tab.sh
-## tabular data: training-free-ms, 2phase-ms
-nohup bash internal/ml/model_selection/scripts/anytime_tab.sh &
-## image data: training-base-ms, training-free-ms, 2phase-ms
-nohup bash internal/ml/model_selection/scripts/anytime_img_w_baseline.sh &
-
-# 2. Draw figure
-python internal/ml/model_selection/exps/macro/anytime_tab_draw.py
-python internal/ml/model_selection/exps/macro/anytime_img_draw.py
-```
-
-data:image/s3,"s3://crabby-images/16518/16518333579d759f821fc7acd989b775b4cad689" alt="image-20230702035554579"
-
-## Micro: Benchmark TFMEMs
-
-```bash
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-python ./internal/ml/model_selection/exps/micro/benchmark_correlation.py
-```
-
-data:image/s3,"s3://crabby-images/2a751/2a75126bbe6b0ddd091eab4c2be66a59d7365518" alt="image-20230421214835152"
-
-## Micro: Benchmark Budge-Aware Algorithm
-
-```bash
-bash internal/ml/model_selection/scripts/micro_budget_aware_alg.sh
-```
-
-data:image/s3,"s3://crabby-images/2978a/2978afdabd53bd1599f394dca2cf7679ae44f548" alt="image-20230724111659545"
-
-## Micro: Benchmark N, K, U
-
-With ranking the models by ther TFMEM score in the filtering phase, we aim to determine
-
-1. Further examinng more models (**K**) with each going through less training epoch (**U**) is more easier to find good model?
- or examine less but each training more epochs?
-2. How many models to explore (**N**) and how many to keep (**K**) ?
-
-```bash
-bash internal/ml/model_selection/scripts/micro_nku_tradeoff.sh
-```
-
-This is the experimental result conducted at the UCI Diabetes datasets.
-Clearly, expore more models in refinement phase (large **K** ) is more helpful to find the a better model.
-Although increasing **U** can find a better model accurately, it runs more training epochs leading to higher training cost.
-
-data:image/s3,"s3://crabby-images/dbfa8/dbfa879163d168046cb009dc2e544076f3fd975f" alt="image-20230722202555763"
-
-Then we fix **U=1** for cost efficiency and determine N/K for higher searching effectiveness.
-Clearly, K/N reaches 100 yields better scheduling result in both image and tabular dataset, thus, we set **N/K=100** in coordinator.
-
-data:image/s3,"s3://crabby-images/45ac2/45ac2b28fc311da93a0f798b85837f29da61891b" alt="image-20230724111325368"
-
-data:image/s3,"s3://crabby-images/8aa62/8aa622d8b655d5713329fe16272628eacba7690a" alt="image-20230722205244718"
-
-## Micro: Device Placement & Embedding Cache
-
-1. To measure the time usage for filtering phase on vairous hardware, run the following
-
- ```bash
- # Without embedding cache at the filtering phase
- nohup bash internal/ml/model_selection/scripts/latency_phase1_cpu_gpu.sh &
- # With embedding cache at the filtering phase (faster)
- nohup bash internal/ml/model_selection/scripts/latency_embedding_cache.sh &
- # Draw graph
- python ./internal/ml/model_selection/exps/micro/draw_filtering_latency.py
- python ./internal/ml/model_selection/exps/micro/draw_filtering_memory_bar.py
- python ./internal/ml/model_selection/exps/micro/draw_filtering_memory_line.py
- python ./internal/ml/model_selection/exps/micro/draw_filtering_memory_cache_CPU.py
- ```
-
-2. Further we measure the end-2-end latency under two CPU, GPU, and Hybrid.
-
- ```bash
- nohup bash internal/ml/model_selection/scripts/latency_phase1_cpu_gpu.sh &
- ```
-
-## Micro: In-DB vs Out-DB filtering phase
-
-```bash
-# run out-of db, read data via psycopg2
-bash ./internal/ml/model_selection/scripts/latency_phase1_in_db.sh
-
-# run in-db query, read data via SPI
-select benchmark_filtering_latency_in_db(5000, 'frappe', '/project/TRAILS/internal/ml/model_selection/config.ini');
-
-select benchmark_filtering_latency_in_db(5000, 'uci_diabetes', '/project/TRAILS/internal/ml/model_selection/config.ini');
-
-select benchmark_filtering_latency_in_db(5000, 'criteo', '/project/TRAILS/internal/ml/model_selection/config.ini');
-```
-
-## Micro: On-the-Fly Data transmission, Refinement
-
-```bash
-# start cache service
-python ./internal/cache-service/cache_service.py
-python ./internal/cache-service/trigger_cache_svc.py
-# consume from the cache-svc
-
-
-```
-
-## Reproduce Figure7
-
-```bash
-python exps/main_v2/analysis/2.\ cost_draw.py
-python exps/main_v2/analysis/3.\ cost_train_based.py
-```
-
-data:image/s3,"s3://crabby-images/80686/80686d8c1b0f48de0adc94d41ff0cc2294edd178" alt="image-20230702035622198"
-
-## Reproduce Figure8
-
-```bash
-# draw figure 8(a)
-python exps/main_v2/analysis/5.draw_IDMS_var_workloads.py
-# draw figure 8(b)
-python exps/main_v2/analysis/6.draw_IDMS_dataloading.py
-```
-
-data:image/s3,"s3://crabby-images/5a2b3/5a2b354d72e9cdc1e73278a9287bd8b3771a5889" alt="image-20230702035639502"
-# Baselines
-
-We compare with Training-Based MS, TabNAS, and training-free MS etc.
-
-For image data, it already generated at the NAS-Bench-Img part, see above.
-
-# Appendix
-
-Here all experiments is on the Frappe dataset.
-
-1. Computational Costs
-
- ```bash
- bash ./internal/ml/model_selection/exps/micro/resp/benchmark_cost.sh
- ```
-
-2. Search Cost, multiple training-free or training-based combinations (warm-up / movel proposal)
-
- ```bash
- # get RL, RE, RS + training-based model evaluation
- bash ./internal/ml/model_selection/scripts/micro_search_strategy.sh
- # this will read previous file, and run warm-up/move proposal, and draw all together
- bash ./internal/ml/model_selection/exps/micro/resp/benchmark_search_cost.sh
- ```
-
-3. How des the K influence the result?
-
- ```bash
- python ./internal/ml/model_selection/exps/micro/resp/benchmark_k_fix_time.py
- ```
-
-4. Nosy in selecting top K models
-
- ```bash
- python ./internal/ml/model_selection/exps/micro/resp/benchmark_noisy_influence.py
- ```
-
-5. Weight-sharing result
-
- ```bash
- nohup bash internal/ml/model_selection/scripts/benchmark_weight_sharing.sh &
- ```
-
-
-
-
-
-
-
-# Run end2end model selection
-
-download the dataset and put it in the `exp_data/data/structure_data`
-
-```
-python main.py --budget=100 --dataset=frappe
-```
-
-Check the log at the `logs_default`
-
-data:image/s3,"s3://crabby-images/6802d/6802dee2a27b205b977fb3f9c1e5a09eb5f51101" alt="image-20230421220338391"
-
-data:image/s3,"s3://crabby-images/8fe20/8fe20ba99652938684229fecbc25f43d1abf13bb" alt="image-20230421220443231"
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421214835152.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421214835152.png
deleted file mode 100644
index 06a86f9537..0000000000
Binary files a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421214835152.png and /dev/null differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220338391.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220338391.png
deleted file mode 100644
index dde2a761b3..0000000000
Binary files a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220338391.png and /dev/null differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220443231.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220443231.png
deleted file mode 100644
index c94d59cfdf..0000000000
Binary files a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220443231.png and /dev/null differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035554579.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035554579.png
deleted file mode 100644
index 387f3ce492..0000000000
Binary files a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035554579.png and /dev/null differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035622198.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035622198.png
deleted file mode 100644
index c63e58598f..0000000000
Binary files a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035622198.png and /dev/null differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035639502.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035639502.png
deleted file mode 100644
index 0422e108b7..0000000000
Binary files a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035639502.png and /dev/null differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035806963.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035806963.png
deleted file mode 100644
index c33bee1859..0000000000
Binary files a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035806963.png and /dev/null differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722202555763.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722202555763.png
deleted file mode 100644
index 527d1eb848..0000000000
Binary files a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722202555763.png and /dev/null differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722205244718.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722205244718.png
deleted file mode 100644
index bc1ae3af5d..0000000000
Binary files a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722205244718.png and /dev/null differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111325368.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111325368.png
deleted file mode 100644
index 8637aaee7f..0000000000
Binary files a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111325368.png and /dev/null differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111659545.png b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111659545.png
deleted file mode 100644
index 2fb081d4e2..0000000000
Binary files a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111659545.png and /dev/null differ
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/draw_img_lib.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/draw_img_lib.py
deleted file mode 100644
index 7d4acb3e2e..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/draw_img_lib.py
+++ /dev/null
@@ -1,724 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import os
-
-from matplotlib import pyplot as plt
-import seaborn as sns
-import numpy as np
-import palettable
-from matplotlib.ticker import MaxNLocator
-import numpy
-from src.common.constant import Config
-import matplotlib
-
-# lines' mark size
-set_marker_size = 15
-# points' mark size
-set_marker_point = 14
-# points' mark size
-set_font_size = 40
-set_lgend_size = 15
-set_tick_size = 20
-
-frontinsidebox = 23
-
-# update tick size
-matplotlib.rc('xtick', labelsize=set_tick_size)
-matplotlib.rc('ytick', labelsize=set_tick_size)
-
-plt.rcParams['axes.labelsize'] = set_tick_size
-
-mark_list = ["o", "*", "<", "^", "s", "d", "D", ">", "h"]
-mark_size_list = [set_marker_size, set_marker_size + 1, set_marker_size + 1, set_marker_size,
- set_marker_size, set_marker_size, set_marker_size, set_marker_size + 1, set_marker_size + 2]
-line_shape_list = ['-.', '--', '-', ':']
-
-
-# this is for draw figure3 only
-def get_plot_compare_with_base_line_cfg(search_space, dataset, if_with_phase1=False):
- if search_space == Config.NB201:
- run_range_ = range(0, 100, 1)
- if if_with_phase1:
- draw_graph = draw_anytime_result_with_p1
- else:
- draw_graph = draw_anytime_result
- # min, this is for plot only
- if dataset == Config.c10:
- # C10 array
- budget_array = [0.017, 0.083] + list(range(1, 350, 4))
- sub_graph_y1 = [91, 94.5]
- sub_graph_y2 = [53.5, 55]
- sub_graph_split = 60
- elif dataset == Config.c100:
- # C10 array
- budget_array = [0.017, 0.083] + list(range(1, 350, 4))
-
- sub_graph_y1 = [64, 73.5]
- sub_graph_y2 = [15, 16]
- sub_graph_split = 20
- else:
- # ImgNet X array
- budget_array = [0.017, 0.083] + list(range(1, 350, 4))
- sub_graph_y1 = [33, 48]
- sub_graph_y2 = [15.5, 17]
- sub_graph_split = 34
- else:
- # this is NB101 + C10, because only 101 has 20 run. others have 100 run.
- run_range_ = range(0, 20, 1)
- if if_with_phase1:
- draw_graph = draw_anytime_result_one_graph_with_p1
- # budget_array = list(range(1, 16, 1))
- budget_array = numpy.arange(0.02, 15, 0.02).tolist()
- else:
- draw_graph = draw_anytime_result_one_graph
- budget_array = [0.017, 0.083] + list(range(1, 2000, 8))
-
- if dataset == Config.c10:
- # C10 array
- # budget_array = list(range(0, 2000, 1))
- sub_graph_y1 = [90, 94.5]
- sub_graph_y2 = [52, 55]
- sub_graph_split = 60
- else:
- raise Exception
-
- return run_range_, budget_array, sub_graph_y1, sub_graph_y2, sub_graph_split, draw_graph
-
-
-def draw_anytime_result(result_dir, y_acc_list_arr, x_T_list,
- x_acc_train, y_acc_train_l, y_acc_train_m, y_acc_train_h,
- annotations, lv,
- name_img, dataset,
- x1_lim=[], x2_lim=[],
- ):
- fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, dpi=100, gridspec_kw={'height_ratios': [4, 1]})
- exp = np.array(y_acc_list_arr)
- sys_acc_h = np.quantile(exp, .75, axis=0)
- sys_acc_m = np.quantile(exp, .5, axis=0)
- sys_acc_l = np.quantile(exp, .25, axis=0)
-
- # plot simulate result of system
- ax1.fill_between(x_T_list, sys_acc_l, sys_acc_h, alpha=0.1)
- ax1.plot(x_T_list, sys_acc_m, mark_list[-1], label="TRAILS")
- ax2.fill_between(x_T_list, sys_acc_l, sys_acc_h, alpha=0.1)
-
- # plot simulate result of train-based line
- ax1.fill_between(x_acc_train, y_acc_train_l, y_acc_train_h, alpha=0.3)
- ax1.plot(x_acc_train, y_acc_train_m, mark_list[-2], label="Training-based MS")
- ax2.fill_between(x_acc_train, y_acc_train_l, y_acc_train_h, alpha=0.3)
-
- for i in range(len(annotations)):
- ele = annotations[i]
- if ele[1] < lv:
- # convert to mins
- ax2.plot(ele[2] / 60, ele[1], mark_list[i], label=ele[0], fontsize=set_marker_size)
- else:
- ax1.plot(ele[2] / 60, ele[1], mark_list[i], label=ele[0], fontsize=set_marker_size)
- # ax2.scatter(ele[2]/60, ele[1]* 0.01, s=100, color="red")
- # ax2.annotate(ele[0], (ele[2]/60, ele[1] * 0.01))
-
- if len(x1_lim) > 0 and len(x2_lim) > 0:
- ax1.set_ylim(x1_lim[0], x1_lim[1]) # 子图1设置y轴范围,只显示部分图
- ax2.set_ylim(x2_lim[0], x2_lim[1]) # 子图2设置y轴范围,只显示部分图
-
- ax1.spines['bottom'].set_visible(False) # 关闭子图1中底部脊
- ax2.spines['top'].set_visible(False) ##关闭子图2中顶部脊
- ax2.set_xticks(range(0, 31, 1))
-
- d = .85 # 设置倾斜度
- # 绘制断裂处的标记
- kwargs = dict(marker=[(-1, -d), (1, d)], markersize=set_marker_size,
- linestyle='none', color='r', mec='r', mew=1, clip_on=False)
- ax1.plot([0, 1], [0, 0], transform=ax1.transAxes, **kwargs)
- ax2.plot([0, 1], [1, 1], transform=ax2.transAxes, **kwargs)
-
- plt.tight_layout()
- plt.xscale("symlog")
- ax1.grid()
- ax2.grid()
- plt.xlabel("Time Budget given by user (min)", fontsize=set_font_size)
- ax1.set_ylabel(f"Test accuracy on {dataset}", fontsize=set_font_size)
- ax1.legend(ncol=1, fontsize=set_lgend_size)
- ax2.legend(fontsize=set_lgend_size)
- # plt.show()
- plt.savefig(f"{result_dir}/any_time_{name_img}.pdf", bbox_inches='tight')
-
-
-def draw_anytime_result_one_graph(y_acc_list_arr, x_T_list,
- x_acc_train, y_acc_train_l, y_acc_train_m, y_acc_train_h,
- annotations, lv,
- name_img, dataset,
- x1_lim=[], x2_lim=[],
- ):
- # fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, dpi=100, gridspec_kw={'height_ratios': [5, 1]})
- exp = np.array(y_acc_list_arr) * 100
- sys_acc_h = np.quantile(exp, .75, axis=0)
- sys_acc_m = np.quantile(exp, .5, axis=0)
- sys_acc_l = np.quantile(exp, .25, axis=0)
-
- # exp_time = np.array(real_time_used_arr)
- # time_mean = np.quantile(exp_time, .5, axis=0)
- time_mean = x_T_list
-
- # plot simulate result of system
- plt.fill_between(time_mean, sys_acc_l, sys_acc_h, alpha=0.1)
- plt.plot(time_mean, sys_acc_m, "o-", label="TRAILS")
- # plt.plot(time_mean, sys_acc_m, label="TRAILS")
-
- # plot simulate result of train-based line
- plt.fill_between(x_acc_train, y_acc_train_l, y_acc_train_h, alpha=0.3)
- plt.plot(x_acc_train, y_acc_train_m, "o-", label="Training-based MS")
- # plt.plot(x_acc_train, y_acc_train_m, label="Training-based MS")
-
- if len(x1_lim) > 0:
- plt.ylim(x1_lim[0], x1_lim[1]) # 子图1设置y轴范围,只显示部分图
-
- d = .85 # 设置倾斜度
- # 绘制断裂处的标记
- kwargs = dict(marker=[(-1, -d), (1, d)], markersize=set_marker_size,
- linestyle='none', color='r', mec='r', mew=1, clip_on=False)
- # plt.plot([0, 1], [0, 0], transform=ax1.transAxes, **kwargs)
- # plt.plot([0, 1], [1, 1], transform=ax2.transAxes, **kwargs)
-
- plt.tight_layout()
- # plt.xscale("symlog")
- plt.grid()
- plt.xlabel("Time Budget given by user (min)", fontsize=set_font_size)
- plt.ylabel(f"Test accuracy on {dataset}", fontsize=set_font_size)
- plt.legend(ncol=1, fontsize=set_lgend_size)
- plt.show()
- # plt.savefig(f"amy_time_{name_img}.pdf", bbox_inches='tight')
-
-
-# those two function will plot phase 1 and phase 2
-def draw_anytime_result_with_p1(result_dir, y_acc_list_arr, x_T_list, y_acc_list_arr_p1, x_T_list_p1,
- x_acc_train, y_acc_train_l, y_acc_train_m, y_acc_train_h,
- annotations, lv,
- name_img, dataset, max_value,
- x1_lim=[], x2_lim=[],
- ):
- fig, (ax1, ax2) = plt.subplots(
- 2, 1,
- sharex=True,
- dpi=100,
- gridspec_kw={'height_ratios': [6, 1]})
-
- shade_degree = 0.2
-
- # plot simulate result of train-based line
- ax1.plot(x_acc_train, y_acc_train_m, mark_list[-3] + line_shape_list[0], label="Training-Based MS",
- markersize=mark_size_list[-3])
- ax1.fill_between(x_acc_train, y_acc_train_l, y_acc_train_h, alpha=shade_degree)
- ax2.fill_between(x_acc_train, y_acc_train_l, y_acc_train_h, alpha=shade_degree)
-
- # plot simulate result of system
- exp = np.array(y_acc_list_arr_p1)
- sys_acc_p1_h = np.quantile(exp, .75, axis=0)
- sys_acc_p1_m = np.quantile(exp, .5, axis=0)
- sys_acc_p1_l = np.quantile(exp, .25, axis=0)
- ax1.plot(x_T_list_p1, sys_acc_p1_m, mark_list[-2] + line_shape_list[1], label="Training-Free MS",
- markersize=mark_size_list[-2])
- ax1.fill_between(x_T_list_p1, sys_acc_p1_l, sys_acc_p1_h, alpha=shade_degree)
- ax2.fill_between(x_T_list_p1, sys_acc_p1_l, sys_acc_p1_h, alpha=shade_degree)
-
- # plot simulate result of system
- exp = np.array(y_acc_list_arr)
- sys_acc_h = np.quantile(exp, .75, axis=0)
- sys_acc_m = np.quantile(exp, .5, axis=0)
- sys_acc_l = np.quantile(exp, .25, axis=0)
- ax1.plot(x_T_list, sys_acc_m, mark_list[-1] + line_shape_list[2], label="2Phase-MS", markersize=mark_size_list[-1])
- ax1.fill_between(x_T_list, sys_acc_l, sys_acc_h, alpha=shade_degree)
- ax2.fill_between(x_T_list, sys_acc_l, sys_acc_h, alpha=shade_degree)
-
- print(f"speed-up on {dataset} = {x_acc_train[-1] / x_T_list[-2]}, "
- f"t_train = {x_acc_train[-1]}, t_f = {x_T_list[-2]}")
-
- for i in range(len(annotations)):
- ele = annotations[i]
- if ele[1] < lv:
- # convert to mins
- ax2.plot(ele[2] / 60, ele[1], mark_list[i], label=ele[0], markersize=set_marker_point)
- else:
- ax1.plot(ele[2] / 60, ele[1], mark_list[i], label=ele[0], markersize=set_marker_point)
- # ax2.scatter(ele[2]/60, ele[1]* 0.01, s=100, color="red")
- # ax2.annotate(ele[0], (ele[2]/60, ele[1] * 0.01))
-
- if len(x1_lim) > 0 and len(x2_lim) > 0:
- ax1.set_ylim(x1_lim[0], x1_lim[1]) # 子图1设置y轴范围,只显示部分图
- ax2.set_ylim(x2_lim[0], x2_lim[1]) # 子图2设置y轴范围,只显示部分图
-
- ax1.spines['bottom'].set_visible(False) # 关闭子图1中底部脊
- ax2.spines['top'].set_visible(False) ##关闭子图2中顶部脊
- ax2.set_xticks(range(0, 31, 1))
-
- d = .85 # 设置倾斜度
- # 绘制断裂处的标记
- kwargs = dict(marker=[(-1, -d), (1, d)], markersize=set_marker_size,
- linestyle='none', color='r', mec='r', mew=1, clip_on=False)
- ax1.plot([0, 1], [0, 0], transform=ax1.transAxes, **kwargs)
- ax2.plot([0, 1], [1, 1], transform=ax2.transAxes, **kwargs)
-
- plt.xscale("log")
- ax1.grid()
- ax2.grid()
- plt.xlabel(r"Response Time Threshold $T_{max}$ (min)", fontsize=set_font_size)
- ax1.set_ylabel(f"Test Acc on {'In-16'}", fontsize=set_font_size)
- # ax1.legend(ncol=1, fontsize=set_lgend_size)
- # ax2.legend(fontsize=set_lgend_size)
-
- ax1.xaxis.label.set_size(set_tick_size)
- ax1.yaxis.label.set_size(set_tick_size)
- # ax1.set_xticks([])
-
- ax2.xaxis.label.set_size(set_tick_size)
- ax2.yaxis.label.set_size(set_tick_size)
-
- ax1.yaxis.set_major_locator(MaxNLocator(nbins=4, integer=True))
-
- ax1.axhline(max_value, color='r', linestyle='-', label='Global Best Accuracy')
-
- tick_values = [0.01, 0.1, 1, 10, 100, 1000]
- ax2.set_xticks(tick_values)
- ax2.set_xticklabels([f'$10^{{{int(np.log10(val))}}}$' for val in tick_values])
-
- # this is for unique hash
- export_legend(
- fig,
- colnum=3,
- unique_labels=['TE-NAS (Training-Free)', 'ENAS (Weight sharing)',
- 'KNAS (Training-Free)', 'DARTS-V1 (Weight sharing)', 'DARTS-V2 (Weight sharing)',
- 'Training-Based MS', 'Training-Free MS', '2Phase-MS', 'Global Best Accuracy'])
- plt.tight_layout()
- fig.savefig(f"{result_dir}/any_time_{name_img}_p1_from_0.1_sec.pdf", bbox_inches='tight')
-
-
-def export_legend(ori_fig, filename="any_time_legend", colnum=9, unique_labels=[]):
- fig2 = plt.figure(figsize=(5, 0.3))
- lines_labels = [ax.get_legend_handles_labels() for ax in ori_fig.axes]
- lines, labels = [sum(lol, []) for lol in zip(*lines_labels)]
- # grab unique labels
- if len(unique_labels) == 0:
- unique_labels = set(labels)
- # assign labels and legends in dict
- legend_dict = dict(zip(labels, lines))
- # query dict based on unique labels
- unique_lines = [legend_dict[x] for x in unique_labels]
- fig2.legend(unique_lines, unique_labels, loc='center',
- ncol=colnum,
- fancybox=True,
- shadow=True, scatterpoints=1, fontsize=set_lgend_size)
- fig2.tight_layout()
- fig2.savefig(f"{filename}.pdf", bbox_inches='tight')
-
-
-def draw_anytime_result_one_graph_with_p1(y_acc_list_arr, x_T_list, y_acc_list_arr_p1, x_T_list_p1,
- x_acc_train, y_acc_train_l, y_acc_train_m, y_acc_train_h,
- annotations, lv,
- name_img, dataset,
- x1_lim=[], x2_lim=[],
- ):
- # fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, dpi=100, gridspec_kw={'height_ratios': [5, 1]})
-
- # plot simulate result of system
- exp = np.array(y_acc_list_arr_p1) * 100
- sys_acc_p1_h = np.quantile(exp, .75, axis=0)
- sys_acc_p1_m = np.quantile(exp, .5, axis=0)
- sys_acc_p1_l = np.quantile(exp, .25, axis=0)
-
- plt.fill_between(x_T_list_p1, sys_acc_p1_l, sys_acc_p1_h, alpha=0.1)
- plt.plot(x_T_list_p1, sys_acc_p1_m, "o-", label="TRAILS-P1")
- # plt.fill_between(x_T_list_p1, sys_acc_p1_l, sys_acc_p1_h, alpha=0.1)
-
- exp = np.array(y_acc_list_arr) * 100
- sys_acc_h = np.quantile(exp, .75, axis=0)
- sys_acc_m = np.quantile(exp, .5, axis=0)
- sys_acc_l = np.quantile(exp, .25, axis=0)
-
- # exp_time = np.array(real_time_used_arr)
- # time_mean = np.quantile(exp_time, .5, axis=0)
- time_mean = x_T_list
-
- # plot simulate result of system
- plt.fill_between(time_mean, sys_acc_l, sys_acc_h, alpha=0.1)
- plt.plot(time_mean, sys_acc_m, "o-", label="TRAILS")
- # plt.plot(time_mean, sys_acc_m, label="TRAILS")
-
- # plot simulate result of train-based line
- plt.fill_between(x_acc_train, y_acc_train_l, y_acc_train_h, alpha=0.3)
- plt.plot(x_acc_train, y_acc_train_m, "o-", label="Training-based MS")
- # plt.plot(x_acc_train, y_acc_train_m, label="Training-based MS")
-
- if len(x1_lim) > 0:
- plt.ylim(x1_lim[0], x1_lim[1]) # 子图1设置y轴范围,只显示部分图
-
- d = .85 # 设置倾斜度
- # 绘制断裂处的标记
- kwargs = dict(marker=[(-1, -d), (1, d)], markersize=set_marker_size,
- linestyle='none', color='r', mec='r', mew=1, clip_on=False)
- # plt.plot([0, 1], [0, 0], transform=ax1.transAxes, **kwargs)
- # plt.plot([0, 1], [1, 1], transform=ax2.transAxes, **kwargs)
-
- plt.tight_layout()
- plt.xscale("symlog")
- plt.grid()
- plt.xlabel("Time Budget given by user (min)", fontsize=set_font_size)
- plt.ylabel(f"Test accuracy on {dataset}", fontsize=set_font_size)
- plt.legend(ncol=1, fontsize=set_lgend_size)
- # plt.show()
- plt.savefig(f"amy_time_{name_img}.pdf", bbox_inches='tight')
-
-
-# for K, U N trade-off
-def draw_grid_graph_with_budget(
- acc, bt, b1, b2,
- img_name: str, y_array: list, x_array: list):
- """
- :param acc: Two array list
- :param bt: Two array list
- :param img_name: img name string
- :return:
- """
-
- acc_new = np.array(acc)
- acc = acc_new.tolist()
-
- mask = np.array(acc)
- mask[mask > 0] = 0
- mask[mask < 0] = 1
-
- bt = np.round(np.array(bt), 2).tolist()
- mask2 = np.array(bt)
- mask2[mask2 > 0] = 0
- mask2[mask2 < 0] = 1
-
- mask3 = np.array(b1)
- mask3[mask3 > 0] = 0
- mask3[mask3 < 0] = 1
-
- mask4 = np.array(b2)
- mask4[mask4 > 0] = 0
- mask4[mask4 < 0] = 1
-
- fig, ax = plt.subplots(2, 2, figsize=(15, 14))
-
- linewidths = 0.5
- sns.set(font_scale=3)
- sns.heatmap(
- data=acc,
- vmax=99,
- vmin=93,
- cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
- annot=True,
- fmt=".2f",
- annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'bottom'},
- mask=mask,
- square=True, linewidths=linewidths, # 每个方格外框显示,外框宽度设置
- cbar_kws={"shrink": .5},
- ax=ax[0, 0]
- )
-
- sns.heatmap(
- data=bt,
- # vmax=,
- vmin=-9,
- cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
- annot=True,
- fmt=".2f",
- annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'top'},
- mask=mask2,
- square=True, linewidths=linewidths, # 每个方格外框显示,外框宽度设置
- cbar_kws={"shrink": .5},
- ax=ax[0, 1]
- )
-
- sns.heatmap(
- data=b1,
- vmax=17000,
- vmin=15000,
- cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
- annot=True,
- fmt=".0f",
- annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'top'},
- mask=mask4,
- square=True, linewidths=linewidths, # 每个方格外框显示,外框宽度设置
- cbar_kws={"shrink": .5},
- ax=ax[1, 0]
- )
-
- sns.heatmap(
- data=b2,
- # vmax=,
- # vmin=-9,
- cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
- annot=True,
- fmt=".0f",
- annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'top'},
- mask=mask4,
- square=True, linewidths=linewidths, # 每个方格外框显示,外框宽度设置
- cbar_kws={"shrink": .5},
- ax=ax[1, 1]
- )
-
- plt.tight_layout()
- plt.xlabel("U (epoch)", fontsize=set_font_size)
- plt.ylabel("K (# models)", fontsize=set_font_size)
-
- for i in [0, 1]:
- for j in [0, 1]:
- ax[i, j].set_xticklabels(x_array, fontsize=set_font_size)
- ax[i, j].set_yticklabels(y_array, fontsize=set_font_size)
- ax[i, j].set_xlabel("U (# epoch)", fontsize=set_font_size)
- ax[i, j].set_ylabel("K (# models)", fontsize=set_font_size)
-
- ax[0, 0].set_title('Test Accuracy (%)', fontsize=set_font_size)
- ax[0, 1].set_title(r'Time Budget $T$ (min)', fontsize=set_font_size)
- ax[1, 0].set_title(r'$N$', fontsize=set_font_size)
- ax[1, 1].set_title(r"$K \cdot U \cdot \log_{\eta}K$", fontsize=set_font_size)
-
- plt.tight_layout()
- fig.subplots_adjust(wspace=0.001, hspace=0.3)
-
- # plt.show()
- base_dr = os.getcwd()
- path_gra = os.path.join(base_dr, f"{img_name}.pdf")
- fig.savefig(path_gra, bbox_inches='tight')
-
-
-def draw_grid_graph_with_budget_only_Acc_and_T(
- acc, bt, b1, b2,
- img_name: str, y_array: list, x_array: list):
- """
- :param acc: Two array list
- :param bt: Two array list
- :param img_name: img name string
- :return:
- """
-
- acc_new = np.array(acc)
- acc = acc_new.tolist()
-
- mask = np.array(acc)
- mask[mask > 0] = 0
- mask[mask < 0] = 1
-
- bt = np.round(np.array(bt), 2).tolist()
- mask2 = np.array(bt)
- mask2[mask2 > 0] = 0
- mask2[mask2 < 0] = 1
-
- mask3 = np.array(b1)
- mask3[mask3 > 0] = 0
- mask3[mask3 < 0] = 1
-
- mask4 = np.array(b2)
- mask4[mask4 > 0] = 0
- mask4[mask4 < 0] = 1
-
- fig, ax = plt.subplots(1, 2, figsize=(15, 14))
-
- linewidths = 0.5
- sns.set(font_scale=2)
- sns.heatmap(
- data=acc,
- vmax=99,
- vmin=93,
- cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
- annot=True,
- fmt=".2f",
- annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'bottom'},
- mask=mask,
- square=True,
- linewidths=linewidths, # 每个方格外框显示,外框宽度设置
- cbar_kws={"shrink": .4},
- ax=ax[0]
- )
-
- sns.heatmap(
- data=bt,
- vmax=600,
- # vmin=-9,
- cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
- annot=True,
- fmt=".2f",
- annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'top'},
- mask=mask2,
- square=True,
- linewidths=linewidths, # 每个方格外框显示,外框宽度设置
- cbar_kws={"shrink": .4},
- ax=ax[1]
- )
-
- plt.tight_layout()
- plt.xlabel("U (epoch)", fontsize=set_font_size)
- plt.ylabel("K (# models)", fontsize=set_font_size)
-
- for j in [0, 1]:
- ax[j].set_xticklabels(x_array, fontsize=set_font_size)
- ax[j].set_yticklabels(y_array, fontsize=set_font_size)
- ax[j].set_xlabel("U (# epoch)", fontsize=set_font_size)
- ax[j].set_ylabel("K (# models)", fontsize=set_font_size)
-
- ax[0].set_title('Test Accuracy (%)', fontsize=set_font_size)
- ax[1].set_title(r'Time Budget $T$ (min)', fontsize=set_font_size)
-
- plt.tight_layout()
- fig.subplots_adjust(wspace=0.3, hspace=0.3)
-
- # plt.show()
- base_dr = os.getcwd()
- path_gra = os.path.join(base_dr, f"{img_name}.pdf")
- fig.savefig(path_gra, bbox_inches='tight')
-
-
-def draw_grid_graph_with_budget_only_Acc(
- acc, bt, b1, b2,
- img_name: str, y_array: list, x_array: list):
- """
- :param acc: Two array list
- :param bt: Two array list
- :param img_name: img name string
- :return:
- """
-
- acc_new = np.array(acc)
- acc = acc_new.tolist()
-
- mask = np.array(acc)
- mask[mask > 0] = 0
- mask[mask < 0] = 1
-
- fig = plt.figure(figsize=(7, 14))
-
- linewidths = 0.5
- sns.set(font_scale=2)
- sns.heatmap(
- data=acc,
- vmax=99,
- vmin=93,
- cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
- annot=True,
- fmt=".2f",
- annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'bottom'},
- mask=mask,
- square=True,
- linewidths=linewidths, # 每个方格外框显示,外框宽度设置
- cbar_kws={"shrink": .4},
- ax=fig
- )
-
- plt.tight_layout()
- plt.xlabel("U (epoch)", fontsize=set_font_size)
- plt.ylabel("K (# models)", fontsize=set_font_size)
-
- plt.xticks(x_array, fontsize=set_font_size)
- plt.yticks(y_array, fontsize=set_font_size)
-
- plt.title('Test Accuracy (%)', fontsize=set_font_size)
- plt.tight_layout()
- # fig.subplots_adjust(wspace=0.3, hspace=0.3)
- # plt.show()
- base_dr = os.getcwd()
- path_gra = os.path.join(base_dr, f"{img_name}.pdf")
- fig.savefig(path_gra, bbox_inches='tight')
-
-
-def draw_grid_graph_with_budget_only_T(
- acc, bt, b1, b2,
- img_name: str, y_array: list, x_array: list):
- """
- :param acc: Two array list
- :param bt: Two array list
- :param img_name: img name string
- :return:
- """
-
- acc_new = np.array(acc)
- acc = acc_new.tolist()
-
- mask = np.array(acc)
- mask[mask > 0] = 0
- mask[mask < 0] = 1
-
- bt = np.round(np.array(bt), 2).tolist()
- mask2 = np.array(bt)
- mask2[mask2 > 0] = 0
- mask2[mask2 < 0] = 1
-
- mask3 = np.array(b1)
- mask3[mask3 > 0] = 0
- mask3[mask3 < 0] = 1
-
- mask4 = np.array(b2)
- mask4[mask4 > 0] = 0
- mask4[mask4 < 0] = 1
-
- fig, ax = plt.subplots(1, 2, figsize=(15, 14))
-
- linewidths = 0.5
- sns.set(font_scale=2)
- sns.heatmap(
- data=acc,
- vmax=99,
- vmin=93,
- cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
- annot=True,
- fmt=".2f",
- annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'bottom'},
- mask=mask,
- square=True,
- linewidths=linewidths, # 每个方格外框显示,外框宽度设置
- cbar_kws={"shrink": .4},
- ax=ax[0]
- )
-
- sns.heatmap(
- data=bt,
- vmax=600,
- # vmin=-9,
- cmap=palettable.cmocean.diverging.Curl_10.mpl_colors,
- annot=True,
- fmt=".2f",
- annot_kws={'size': frontinsidebox, 'weight': 'normal', 'color': 'w', 'va': 'top'},
- mask=mask2,
- square=True,
- linewidths=linewidths, # 每个方格外框显示,外框宽度设置
- cbar_kws={"shrink": .4},
- ax=ax[1]
- )
-
- plt.tight_layout()
- plt.xlabel("U (epoch)", fontsize=set_font_size)
- plt.ylabel("K (# models)", fontsize=set_font_size)
-
- for j in [0, 1]:
- ax[j].set_xticklabels(x_array, fontsize=set_font_size)
- ax[j].set_yticklabels(y_array, fontsize=set_font_size)
- ax[j].set_xlabel("U (# epoch)", fontsize=set_font_size)
- ax[j].set_ylabel("K (# models)", fontsize=set_font_size)
-
- ax[0].set_title('Test Accuracy (%)', fontsize=set_font_size)
- ax[1].set_title(r'Time Budget $T$ (min)', fontsize=set_font_size)
-
- plt.tight_layout()
- fig.subplots_adjust(wspace=0.3, hspace=0.3)
-
- # plt.show()
- base_dr = os.getcwd()
- path_gra = os.path.join(base_dr, f"{img_name}.pdf")
- fig.savefig(path_gra, bbox_inches='tight')
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/draw_tab_lib.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/draw_tab_lib.py
deleted file mode 100644
index 6c30cc06b4..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/draw_tab_lib.py
+++ /dev/null
@@ -1,215 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from typing import List
-
-import numpy as np
-from matplotlib import pyplot as plt
-from matplotlib.ticker import MaxNLocator
-import warnings
-import matplotlib.cbook
-
-warnings.filterwarnings("ignore", category=matplotlib.cbook.mplDeprecation)
-
-# lines' mark size
-set_marker_size = 1
-# points' mark size
-set_marker_point = 14
-# points' mark size
-set_font_size = 20
-set_lgend_size = 15
-set_tick_size = 20
-
-frontinsidebox = 23
-
-# update tick size
-matplotlib.rc('xtick', labelsize=set_tick_size)
-matplotlib.rc('ytick', labelsize=set_tick_size)
-
-plt.rcParams['axes.labelsize'] = set_tick_size
-
-mark_list = ["o", "*", "<", "^", "s", "d", "D", ">", "h"]
-mark_size_list = [set_marker_size, set_marker_size + 1, set_marker_size + 1, set_marker_size,
- set_marker_size, set_marker_size, set_marker_size, set_marker_size + 1, set_marker_size + 2]
-line_shape_list = ['-.', '--', '-', ':']
-shade_degree = 0.2
-
-
-def Add_one_line(x_time_array: list, y_twod_budget: List[List], namespace: str, index, ax):
- # training-based
- x_ = x_time_array
- y_ = y_twod_budget
-
- if all(isinstance(item, list) for item in x_):
- expx = np.array(x_)
- x_m = np.quantile(expx, .5, axis=0)
- else:
- x_m = x_
-
- exp = np.array(y_)
- exp = np.where(exp > 10, exp, exp * 100)
-
- y_h = np.quantile(exp, .75, axis=0)
- y_m = np.quantile(exp, .5, axis=0)
- y_l = np.quantile(exp, .25, axis=0)
-
- ax.plot(x_m, y_m,
- mark_list[int(index % len(mark_list))] + line_shape_list[int(index % len(line_shape_list))],
- label=namespace,
- markersize=mark_size_list[int(index % len(mark_list))],
- linewidth=3
- )
-
- ax.fill_between(x_m, y_l, y_h, alpha=shade_degree)
- return x_m
-
-
-def draw_structure_data_anytime(
- all_lines: List,
- dataset: str, name_img: str, max_value,
- figure_size=(6.4, 4.5),
- annotations=[],
- x_ticks=None, y_ticks=None, unique_labels=None):
- fig, ax = plt.subplots(figsize=figure_size)
-
- # draw all lines
- time_usage = []
- for i, each_line_info in enumerate(all_lines):
- _x_array = each_line_info[0]
- _y_2d_array = each_line_info[1]
- _name_space = each_line_info[2]
- time_arr = Add_one_line(_x_array, _y_2d_array, _name_space, i, ax)
- time_usage.append(time_arr)
-
- # print(f"speed-up on {dataset} = {time_usage[0][-1] / time_usage[2][-2]}, "
- # f"t_train = {time_usage[0][-1]}, t_f = {time_usage[2][-2]}")
-
- # plt.xscale("log")
- # plt.grid()
- # plt.xlabel(r"Time Budget $T$ (min)", fontsize=set_font_size)
- # plt.ylabel(f"AUC on {dataset.upper()}", fontsize=set_font_size)
-
- plt.xscale("log")
- ax.grid()
- ax.set_xlabel(r"Response Time Threshold $T_{max}$ (min)", fontsize=set_font_size)
- ax.set_ylabel(f"AUC on {dataset.upper()}", fontsize=set_font_size)
- # ax.set_xscale("log")
- # ax.set_xlim(0.001, 10e4)
- # ax.set_ylim(x1_lim[0], x1_lim[1])
-
- if y_ticks is not None:
- if y_ticks[0] is not None:
- ax.set_ylim(bottom=y_ticks[0])
- if y_ticks[1] is not None:
- ax.set_ylim(top=y_ticks[1])
- # ax.set_ylim(y_ticks[0], y_ticks[1])
- # ax.set_yticks(y_ticks)
- # ax.set_yticklabels(y_ticks)
- if x_ticks is not None:
- if x_ticks[0] is not None:
- ax.set_xlim(left=x_ticks[0])
- if x_ticks[1] is not None:
- ax.set_xlim(right=x_ticks[1])
-
- ax.yaxis.set_major_locator(MaxNLocator(nbins=6, integer=False))
-
- if max_value > 0:
- plt.axhline(max_value, color='r', linestyle='-', label='Global Best AUC')
-
- for i in range(len(annotations)):
- ele = annotations[i]
- ax.plot(ele[2], ele[1], mark_list[i], label=ele[0], markersize=set_marker_point)
-
- # export_legend(fig, filename="any_time_legend", unique_labels=["Training-Based MS", "Training-Free MS", "2Phase-MS", 'Global Best AUC'])
- export_legend(ori_fig=fig, colnum=5, unique_labels=unique_labels)
- plt.tight_layout()
-
- fig.savefig(f"{name_img}.pdf", bbox_inches='tight')
-
-
-def export_legend(ori_fig, filename="any_time_legend", colnum=9, unique_labels=None):
- if unique_labels is None:
- unique_labels = []
- fig2 = plt.figure(figsize=(5, 0.3))
- lines_labels = [ax.get_legend_handles_labels() for ax in ori_fig.axes]
- lines, labels = [sum(lol, []) for lol in zip(*lines_labels)]
- # grab unique labels
- if len(unique_labels) == 0:
- unique_labels = set(labels)
- # assign labels and legends in dict
- legend_dict = dict(zip(labels, lines))
- # query dict based on unique labels
- unique_lines = [legend_dict[x] for x in unique_labels]
- fig2.legend(unique_lines, unique_labels, loc='center',
- ncol=colnum,
- fancybox=True,
- shadow=True, scatterpoints=1, fontsize=set_lgend_size)
- fig2.tight_layout()
- fig2.savefig(f"{filename}.pdf", bbox_inches='tight')
-
-
-import seaborn as sns
-import matplotlib.pyplot as plt
-
-
-def plot_heatmap(data: List, fontsize: int,
- x_array_name: str, y_array_name: str,
- title: str, output_file: str,
- decimal_places: int,
- u_ticks, k_ticks,
- ):
- labelsize = fontsize
- # Convert the data to a NumPy array
- data_array = np.array(data)
-
- # Custom annotation function
- def custom_annot(val):
- return "{:.{}f}".format(val, decimal_places) if val > 0 else ""
-
- # Convert the custom annotations to a 2D array
- annot_array = np.vectorize(custom_annot)(data_array)
-
- # Create a masked array to hide the cells with values less than or equal to 0
- masked_data = np.ma.masked_array(data_array, data_array <= 0)
-
- # Set the figure size (width, height) in inches
- fig, ax = plt.subplots(figsize=(8, 4))
-
- # Use the "viridis" colormap
- cmap = "viridis"
-
- # Create a heatmap
- sns.heatmap(masked_data, annot=annot_array, fmt='', cmap=cmap, mask=masked_data.mask, ax=ax,
- annot_kws={"size": fontsize, "ha": "center", "va": "center"},
- xticklabels=u_ticks, yticklabels=k_ticks)
-
- # Set axis labels
- ax.set_xlabel(x_array_name, fontsize=fontsize)
- ax.set_ylabel(y_array_name, fontsize=fontsize)
-
- # Set x/y-axis tick size
- ax.tick_params(axis='both', which='major', labelsize=labelsize)
-
- # Set the title
- # ax.set_title(title, fontsize=fontsize)
-
- # Set tight layout
- plt.tight_layout()
-
- # Save the plot to a PDF file
- plt.savefig(output_file)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_dist_online.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_dist_online.py
deleted file mode 100644
index e515647ecb..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_dist_online.py
+++ /dev/null
@@ -1,163 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import argparse
-import calendar
-import json
-import logging
-import os
-import time
-
-from exps.shared_args import parse_arguments
-
-
-def partition_list_by_worker_id(lst, num_workers=15):
- partitions = []
- for i in range(num_workers):
- partitions.append([])
- for idx, item in enumerate(lst):
- worker_id = idx % num_workers
- partitions[worker_id].append(item)
- return partitions
-
-
-def start_one_worker(queue, args, worker_id, my_partition, search_space_ins, res):
- from src.tools.io_tools import write_json, read_json
- gmt = time.gmtime()
- ts = calendar.timegm(gmt)
-
- os.environ.setdefault("log_file_name", f"{args.log_name}_{args.dataset}_wkid_{worker_id}_{ts}.log")
- # import logging
- logger = logging.getLogger(f"{args.dataset}_wkid_{worker_id}_{ts}")
- if not os.path.exists(f"./{args.log_folder}"):
- os.makedirs(f"./{args.log_folder}")
- handler = logging.FileHandler(f"./{args.log_folder}/{args.log_name}_{args.dataset}_wkid_{worker_id}_{ts}.log")
- formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
- handler.setFormatter(formatter)
- logger.addHandler(handler)
- from src.eva_engine.phase2.algo.trainer import ModelTrainer
-
- if args.total_models_per_worker is None:
- logger.info(
- f" ---- begin exploring, current worker have "
- f"{len(my_partition)} models. explore all those models ")
- else:
- logger.info(f" ---- begin exploring, current worker have "
- f"{len(my_partition)} models. but explore {args.total_models_per_worker} models ")
-
- train_loader, val_loader, test_loader = queue.get()
-
- checkpoint_file_name = f"./base_line_res_{args.dataset}/train_baseline_{args.dataset}_wkid_{worker_id}.json"
- visited = read_json(checkpoint_file_name)
- if visited == {}:
- visited = {args.dataset: {}}
- logger.info(f" ---- initialize checkpointing with {visited} . ")
- else:
- logger.info(f" ---- recovery from checkpointing with {len(visited[args.dataset])} model. ")
-
- explored_arch_num = 0
- for arch_index in my_partition:
- print(f"begin to train the {arch_index}")
- model = search_space_ins.new_architecture(res[arch_index]).to(args.device)
- valid_auc, total_run_time, train_log = ModelTrainer.fully_train_arch(
- model=model,
- use_test_acc=False,
- epoch_num=args.epoch,
- train_loader=train_loader,
- val_loader=val_loader,
- test_loader=test_loader,
- args=args, logger=logger)
-
- logger.info(f' ----- model id: {res[arch_index]}, Val_AUC : {valid_auc} Total running time: '
- f'{total_run_time}-----')
-
- # update the shared model eval res
- logger.info(f" ---- exploring {explored_arch_num} model. ")
- logger.info(f" ---- info: {json.dumps({res[arch_index]: train_log})}")
- visited[args.dataset][res[arch_index]] = train_log
- explored_arch_num += 1
-
- if args.total_models_per_worker is not None and explored_arch_num > args.total_models_per_worker:
- break
-
- logger.info(f" Saving result to: {checkpoint_file_name}")
- write_json(checkpoint_file_name, visited)
-
-
-if __name__ == "__main__":
- mp.set_start_method('spawn', force=True)
- args = parse_arguments()
-
- # set the log name
- gmt = time.gmtime()
- ts = calendar.timegm(gmt)
-
- os.environ.setdefault("log_file_name", f"{args.log_name}_{args.dataset}_main_{ts}.log")
- os.environ.setdefault("base_dir", args.base_dir)
-
- from src.search_space.init_search_space import init_search_space
- from src.dataset_utils.structure_data_loader import libsvm_dataloader
- from src.tools.io_tools import write_json, read_json
- import torch.multiprocessing as mp
-
- search_space_ins = init_search_space(args)
- search_space_ins.load()
-
- # 1. main process partition data and group results,
- res = read_json(args.pre_partitioned_file)
-
- total_workers = args.worker_each_gpu * args.gpu_num
- all_partition = partition_list_by_worker_id(list(res.keys()), total_workers)
-
- train_loader, val_loader, test_loader = libsvm_dataloader(
- args=args,
- data_dir=os.path.join(args.base_dir, "data", "structure_data", args.dataset),
- nfield=args.nfield,
- batch_size=args.batch_size)
-
- # 2. put the shared dataloader into the queue,
- queue = mp.Queue()
-
- # 3. Create a list of processes to train the models
- processes = []
- worker_id = 0
- for gpu_id in range(args.gpu_num):
- for _ in range(args.worker_each_gpu):
- if args.device != "cpu":
- args.device = f"cuda:{gpu_id}"
- print(f"running process {[args.device, worker_id, len(all_partition[worker_id])]}")
- p = mp.Process(
- target=start_one_worker,
- args=(queue, args, worker_id, all_partition[worker_id], search_space_ins, res,
- )
- )
- p.start()
- processes.append(p)
- worker_id += 1
-
- # 4. send to the queue
- for gpu_id in range(args.gpu_num):
- for _ in range(args.worker_each_gpu):
- print("putting to queue ....")
- queue.put((train_loader, val_loader, test_loader))
-
- print("All processing are running, waiting all to finish....")
- for p in processes:
- p.join()
-
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_online.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_online.py
deleted file mode 100644
index cc1b44481a..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_online.py
+++ /dev/null
@@ -1,118 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import calendar
-import json
-import os
-import time
-
-from exps.shared_args import parse_arguments
-
-
-def partition_list_by_worker_id(lst, num_workers=15):
- partitions = []
- for i in range(num_workers):
- partitions.append([])
- for idx, item in enumerate(lst):
- worker_id = idx % num_workers
- partitions[worker_id].append(item)
- return partitions
-
-
-if __name__ == "__main__":
-
- args = parse_arguments()
-
- # set the log name
- gmt = time.gmtime()
- ts = calendar.timegm(gmt)
-
- os.environ.setdefault("log_logger_folder_name", f"{args.log_folder}")
- os.environ.setdefault("log_file_name", f"{args.log_name}_{args.dataset}_wkid_{args.worker_id}_{ts}.log")
- os.environ.setdefault("base_dir", args.base_dir)
-
- from src.logger import logger
- from src.eva_engine.phase2.algo.trainer import ModelTrainer
- from src.search_space.init_search_space import init_search_space
- from src.dataset_utils.structure_data_loader import libsvm_dataloader
- from src.tools.io_tools import write_json, read_json
-
- search_space_ins = init_search_space(args)
- search_space_ins.load()
-
- # 1. data loader
- logger.info(f" Loading data....")
- train_loader, val_loader, test_loader = libsvm_dataloader(
- args=args,
- data_dir=os.path.join(args.base_dir, "data", "structure_data", args.dataset),
- nfield=args.nfield,
- batch_size=args.batch_size)
-
- res = read_json(args.pre_partitioned_file)
-
- all_partition = partition_list_by_worker_id(list(res.keys()), args.total_workers)
-
- if args.total_models_per_worker == -1:
- logger.info(
- f" ---- begin exploring, current worker have "
- f"{len(all_partition[args.worker_id])} models. explore all those models ")
- else:
- logger.info(f" ---- begin exploring, current worker have "
- f"{len(all_partition[args.worker_id])} models. but explore {args.total_models_per_worker} models ")
-
- # read the checkpoint
- checkpoint_file_name = f"{args.result_dir}/train_baseline_{args.dataset}_wkid_{args.worker_id}.json"
- visited = read_json(checkpoint_file_name)
- if visited == {}:
- visited = {args.dataset: {}}
- logger.info(f" ---- initialize checkpointing with {visited} . ")
- else:
- logger.info(f" ---- recovery from checkpointing with {len(visited[args.dataset])} model. ")
-
- explored_arch_num = 0
- for arch_index in all_partition[args.worker_id]:
- print(f"begin to train the {arch_index}")
- if res[arch_index] in visited[args.dataset]:
- logger.info(f" ---- model {res[arch_index]} already visited")
- continue
- model = search_space_ins.new_architecture(res[arch_index])
- model.init_embedding(requires_grad=True)
- model.to(args.device)
- valid_auc, total_run_time, train_log = ModelTrainer.fully_train_arch(
- model=model,
- use_test_acc=False,
- epoch_num=args.epoch,
- train_loader=train_loader,
- val_loader=val_loader,
- test_loader=test_loader,
- args=args)
-
- logger.info(f' ----- model id: {res[arch_index]}, Val_AUC : {valid_auc} Total running time: '
- f'{total_run_time}-----')
-
- # update the shared model eval res
- logger.info(f" ---- exploring {explored_arch_num} model. ")
- logger.info(f" ---- info: {json.dumps({res[arch_index]: train_log})}")
- visited[args.dataset][res[arch_index]] = train_log
- explored_arch_num += 1
-
- if args.total_models_per_worker != -1 and explored_arch_num > args.total_models_per_worker:
- break
-
- logger.info(f" Saving result to: {checkpoint_file_name}")
- write_json(checkpoint_file_name, visited)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/measure_ecdf.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/measure_ecdf.py
deleted file mode 100644
index 645f72f437..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/measure_ecdf.py
+++ /dev/null
@@ -1,136 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import numpy as np
-import matplotlib
-import matplotlib.pyplot as plt
-import os
-from src.tools.io_tools import read_json
-
-# lines' mark size
-set_marker_size = 15
-# points' mark size
-set_marker_point = 14
-# points' mark size
-set_font_size = 25
-set_lgend_size = 15
-set_tick_size = 20
-
-frontinsidebox = 23
-
-# update tick size
-matplotlib.rc('xtick', labelsize=set_tick_size)
-matplotlib.rc('ytick', labelsize=set_tick_size)
-
-plt.rcParams['axes.labelsize'] = set_tick_size
-
-mark_list = ["o", "*", "<", "^", "s", "d", "D", ">", "h"]
-mark_size_list = [set_marker_size, set_marker_size + 1, set_marker_size + 1, set_marker_size,
- set_marker_size, set_marker_size, set_marker_size, set_marker_size + 1, set_marker_size + 2]
-line_shape_list = ['-.', '--', '-', ':']
-shade_degree = 0.2
-base_dir = "../exp_data/"
-
-
-def export_legend(ori_fig, filename="any_time_legend", colnum=9, unique_labels=None):
- if unique_labels is None:
- unique_labels = []
- fig2 = plt.figure(figsize=(5, 0.3))
- lines_labels = [ax.get_legend_handles_labels() for ax in ori_fig.axes]
- lines, labels = [sum(lol, []) for lol in zip(*lines_labels)]
- # grab unique labels
- if len(unique_labels) == 0:
- unique_labels = set(labels)
- # assign labels and legends in dict
- legend_dict = dict(zip(labels, lines))
- # query dict based on unique labels
- unique_lines = [legend_dict[x] for x in unique_labels]
- fig2.legend(unique_lines, unique_labels, loc='center',
- ncol=colnum,
- fancybox=True,
- shadow=True, scatterpoints=1, fontsize=set_lgend_size)
- fig2.tight_layout()
- fig2.savefig(f"{filename}.pdf", bbox_inches='tight')
-
-
-def draw_edcf():
- # extract train_auc and valid_auc into separate lists
- for dataset, architectures in data_dict.items():
-
- fig, ax = plt.subplots(figsize=(6.4, 3.5))
- print(dataset)
- train_auc = []
- valid_auc = []
- for architecture, epochs in architectures.items():
- for epoch, metrics in epochs.items():
- if str(epoch_sampled[dataset]) == epoch:
- train_auc.append(metrics["train_auc"])
- valid_auc.append(metrics["valid_auc"])
- break
-
- # calculate and plot ECDF for train_auc
- sorted_train_auc = np.sort(train_auc)
- y_train = np.arange(1, len(sorted_train_auc) + 1) / len(sorted_train_auc)
- plt.plot(sorted_train_auc, y_train, label='Training AUC', linewidth=3, linestyle='--')
-
- # calculate and plot ECDF for valid_auc
- sorted_valid_auc = np.sort(valid_auc)
- y_valid = np.arange(1, len(sorted_valid_auc) + 1) / len(sorted_valid_auc)
- plt.plot(sorted_valid_auc, y_valid, label='Validation AUC', linewidth=3, linestyle='-')
-
- y_m = np.quantile(sorted_valid_auc, .5, axis=0)
- print("medium", y_m, "best", max(sorted_valid_auc))
- # plt.xlim(left=0.45)
-
- plt.grid()
- plt.xlabel('Accuracy')
- plt.ylabel('ECDF')
- # plt.legend(loc='upper left', fontsize=set_lgend_size)
- plt.tight_layout()
- export_legend(ori_fig=fig, colnum=5)
- fig.savefig(f"space_{dataset}.pdf", bbox_inches='tight')
-
-
-# dataset_used = "frappe"
-dataset_used = "uci_diabetes"
-# dataset_used = "criteo"
-
-
-epoch_sampled = {"frappe": 19, "uci_diabetes": 35, "criteo": 9}
-
-if dataset_used == "frappe":
- mlp_train_frappe = os.path.join(
- base_dir,
- "tab_data/frappe/all_train_baseline_frappe.json")
- data_dict = read_json(mlp_train_frappe)
-elif dataset_used == "uci_diabetes":
- mlp_train_uci_diabetes = os.path.join(
- base_dir,
- "tab_data/uci_diabetes/all_train_baseline_uci_160k_40epoch.json")
-
- data_dict = read_json(mlp_train_uci_diabetes)
-elif dataset_used == "criteo":
- mlp_train_criteo = os.path.join(
- base_dir,
- "tab_data/criteo/all_train_baseline_criteo.json")
-
- data_dict = read_json(mlp_train_criteo)
-else:
- print("err")
-
-draw_edcf()
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/measure_param_auc.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/measure_param_auc.py
deleted file mode 100644
index 0ff6748231..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/measure_param_auc.py
+++ /dev/null
@@ -1,144 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import numpy as np
-import matplotlib
-import matplotlib.pyplot as plt
-import os
-from src.tools.io_tools import read_json
-
-# lines' mark size
-set_marker_size = 15
-# points' mark size
-set_marker_point = 14
-# points' mark size
-set_font_size = 25
-set_lgend_size = 15
-set_tick_size = 20
-import matplotlib.ticker as ticker
-
-frontinsidebox = 23
-
-# update tick size
-matplotlib.rc('xtick', labelsize=set_tick_size)
-matplotlib.rc('ytick', labelsize=set_tick_size)
-
-plt.rcParams['axes.labelsize'] = set_tick_size
-
-mark_list = ["o", "*", "<", "^", "s", "d", "D", ">", "h"]
-mark_size_list = [set_marker_size, set_marker_size + 1, set_marker_size + 1, set_marker_size,
- set_marker_size, set_marker_size, set_marker_size, set_marker_size + 1, set_marker_size + 2]
-line_shape_list = ['-.', '--', '-', ':']
-shade_degree = 0.2
-base_dir = "../exp_data/"
-
-
-def export_legend(ori_fig, filename="any_time_legend", colnum=9, unique_labels=None):
- if unique_labels is None:
- unique_labels = []
- fig2 = plt.figure(figsize=(5, 0.3))
- lines_labels = [ax.get_legend_handles_labels() for ax in ori_fig.axes]
- lines, labels = [sum(lol, []) for lol in zip(*lines_labels)]
- # grab unique labels
- if len(unique_labels) == 0:
- unique_labels = set(labels)
- # assign labels and legends in dict
- legend_dict = dict(zip(labels, lines))
- # query dict based on unique labels
- unique_lines = [legend_dict[x] for x in unique_labels]
- fig2.legend(unique_lines, unique_labels, loc='center',
- ncol=colnum,
- fancybox=True,
- shadow=True, scatterpoints=1, fontsize=set_lgend_size)
- fig2.tight_layout()
- fig2.savefig(f"{filename}.pdf", bbox_inches='tight')
-
-
-# Function to compute number of parameters for an architecture
-def compute_params(architecture):
- layers = [int(layer) for layer in architecture.split('-')]
- params = 0
- for i in range(len(layers) - 1):
- params += layers[i] * layers[i + 1]
- # Add bias terms
- params += sum(layers[1:])
- return params
-
-
-# Function to convert large number into a string with 'k' for thousands
-def func(x, pos): # formatter function takes tick label and tick position
- if x == 0:
- return f"0"
- else:
- s = f'{x / 1000000}M'
- return s
-
-
-def draw_parameter_performance():
- # extract train_auc and valid_auc into separate lists
- for dataset, architectures in data_dict.items():
- fig, ax = plt.subplots(figsize=(6.4, 4))
- print(dataset)
- param_sizes = []
- valid_auc = []
- for architecture, epochs in architectures.items():
- for epoch, metrics in epochs.items():
- if str(epoch_sampled[dataset]) == epoch:
- param_sizes.append(compute_params(architecture))
- valid_auc.append(metrics["valid_auc"])
- break
-
- plt.scatter(param_sizes, valid_auc)
- y_format = ticker.FuncFormatter(func)
- ax.xaxis.set_major_formatter(y_format)
- plt.grid()
- plt.xlabel('Parameter Size')
- plt.ylabel('Validation AUC')
- # plt.legend(loc='upper left', fontsize=set_lgend_size)
- plt.tight_layout()
- export_legend(ori_fig=fig, colnum=5)
- fig.savefig(f"para_{dataset}.jpg", bbox_inches='tight')
-
-
-dataset_used = "frappe"
-# dataset_used = "uci_diabetes"
-# dataset_used = "criteo"
-
-epoch_sampled = {"frappe": 19, "uci_diabetes": 35, "criteo": 9}
-
-if dataset_used == "frappe":
- mlp_train_frappe = os.path.join(
- base_dir,
- "tab_data/frappe/all_train_baseline_frappe.json")
- data_dict = read_json(mlp_train_frappe)
-elif dataset_used == "uci_diabetes":
- mlp_train_uci_diabetes = os.path.join(
- base_dir,
- "tab_data/uci_diabetes/all_train_baseline_uci_160k_40epoch.json")
-
- data_dict = read_json(mlp_train_uci_diabetes)
-elif dataset_used == "criteo":
- mlp_train_criteo = os.path.join(
- base_dir,
- "tab_data/criteo/all_train_baseline_criteo.json")
-
- data_dict = read_json(mlp_train_criteo)
-else:
- print("err")
-
-draw_parameter_performance()
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/init_env b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/init_env
deleted file mode 100644
index b3204ea062..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/init_env
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
-
-
-export PYTHONPATH=$PYTHONPATH:/project/TRAILS/internal/ml/model_selection
-conda activate trails
-
-
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/anytime_img_w_baseline.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/anytime_img_w_baseline.sh
deleted file mode 100644
index aef3810537..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/anytime_img_w_baseline.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-
-############## c10 dataset ##############
-# run both 2phase-MS and training-free MS
-python internal/ml/model_selection/exps/macro/anytime_img.py \
- --search_space nasbench201 \
- --api_loc NAS-Bench-201-v1_1-096897.pth \
- --epoch 200 \
- --dataset cifar10 \
- --num_labels 10 \
- --base_dir ../exp_data/ \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-
-############## c100 dataset ##############
-python internal/ml/model_selection/exps/macro/anytime_img.py \
- --search_space nasbench201 \
- --api_loc NAS-Bench-201-v1_1-096897.pth \
- --epoch 200 \
- --dataset cifar100 \
- --num_labels 100 \
- --base_dir ../exp_data/ \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-
-############## imageNet dataset ##############
-python internal/ml/model_selection/exps/macro/anytime_img.py \
- --search_space nasbench201 \
- --api_loc NAS-Bench-201-v1_1-096897.pth \
- --epoch 200 \
- --dataset ImageNet16-120 \
- --num_labels 120 \
- --base_dir ../exp_data/ \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/anytime_tab.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/anytime_tab.sh
deleted file mode 100644
index 3bfb947d5e..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/anytime_tab.sh
+++ /dev/null
@@ -1,142 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-
-
-############## frappe dataset ##############
-
-# run the 2phase-MS
-python internal/ml/model_selection/exps/macro/anytime_simulate.py \
- --search_space mlp_sp \
- --num_layers 4 \
- --hidden_choice_len 20 \
- --batch_size 128 \
- --nfeat 5500 \
- --nfield 10 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --dataset frappe \
- --num_labels 2 \
- --only_phase1 False \
- --is_simulate True \
- --device cpu \
- --log_folder any_time_frappe \
- --result_dir ./internal/ml/model_selection/exp_result/ \
- --num_points 5
-
-
-# run the training-free MS
-python internal/ml/model_selection/exps/macro/anytime_simulate.py \
- --search_space mlp_sp \
- --num_layers 4 \
- --hidden_choice_len 20 \
- --batch_size 128 \
- --nfeat 5500 \
- --nfield 10 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --dataset frappe \
- --num_labels 2 \
- --only_phase1 True \
- --is_simulate True \
- --device cpu \
- --log_folder any_time_frappe \
- --result_dir ./internal/ml/model_selection/exp_result/ \
- --num_points 5
-
-
-############## uci dataset ##############
-
-# run the 2phase-MS
-python internal/ml/model_selection/exps/macro/anytime_simulate.py \
- --search_space mlp_sp \
- --num_layers 4 \
- --hidden_choice_len 20 \
- --batch_size 128 \
- --nfeat 369 \
- --nfield 43 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --dataset uci_diabetes \
- --num_labels 2 \
- --only_phase1 False \
- --is_simulate True \
- --device cpu \
- --log_folder any_time_uci_diabetes \
- --result_dir ./internal/ml/model_selection/exp_result/ \
- --num_points 5
-
-
-# run the training-free MS
-python internal/ml/model_selection/exps/macro/anytime_simulate.py \
- --search_space mlp_sp \
- --num_layers 4 \
- --hidden_choice_len 20 \
- --batch_size 128 \
- --nfeat 369 \
- --nfield 43 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --dataset uci_diabetes \
- --num_labels 2 \
- --only_phase1 True \
- --is_simulate True \
- --device cpu \
- --log_folder any_time_uci_diabetes \
- --result_dir ./internal/ml/model_selection/exp_result/ \
- --num_points 5
-
-
-############## criteo dataset ##############
-
-# run the 2phase-MS
-python internal/ml/model_selection/exps/macro/anytime_simulate.py \
- --search_space mlp_sp \
- --num_layers 4 \
- --hidden_choice_len 10 \
- --batch_size 128 \
- --nfeat 2100000 \
- --nfield 39 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --dataset criteo \
- --num_labels 2 \
- --only_phase1 False \
- --is_simulate True \
- --device cpu \
- --log_folder any_time_criteo \
- --result_dir ./internal/ml/model_selection/exp_result/ \
- --num_points 5
-
-
-# run the training-free MS
-python internal/ml/model_selection/exps/macro/anytime_simulate.py \
- --search_space mlp_sp \
- --num_layers 4 \
- --hidden_choice_len 10 \
- --batch_size 128 \
- --nfeat 2100000 \
- --nfield 39 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --dataset criteo \
- --num_labels 2 \
- --only_phase1 True \
- --is_simulate True \
- --device cpu \
- --log_folder any_time_criteo \
- --result_dir ./internal/ml/model_selection/exp_result/ \
- --num_points 5
-
-
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/baseline_system_img.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/baseline_system_img.sh
deleted file mode 100644
index 20d6679177..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/baseline_system_img.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-
-# run both training-based MS
-############## c10 dataset ##############
-python internal/ml/model_selection/exps/baseline/train_with_ea.py \
- --search_space nasbench201 \
- --api_loc NAS-Bench-201-v1_1-096897.pth \
- --epoch 200 \
- --dataset cifar10 \
- --num_labels 10 \
- --base_dir ../exp_data/ \
- --log_folder log_baseline_c10 \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-
-############## c100 dataset ##############
-python internal/ml/model_selection/exps/baseline/train_with_ea.py \
- --search_space nasbench201 \
- --api_loc NAS-Bench-201-v1_1-096897.pth \
- --epoch 200 \
- --dataset cifar100 \
- --num_labels 100 \
- --base_dir ../exp_data/ \
- --log_folder log_baseline_c100 \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-
-############## ImgNet dataset ##############
-python internal/ml/model_selection/exps/baseline/train_with_ea.py \
- --search_space nasbench201 \
- --api_loc NAS-Bench-201-v1_1-096897.pth \
- --epoch 200 \
- --dataset ImageNet16-120 \
- --num_labels 120 \
- --base_dir ../exp_data/ \
- --log_folder log_baseline_imgnet \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/baseline_system_tab.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/baseline_system_tab.sh
deleted file mode 100644
index 41a2d00560..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/baseline_system_tab.sh
+++ /dev/null
@@ -1,83 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-
-# run both training-based MS
-############## frappe dataset ##############
-python internal/ml/model_selection/exps/baseline/train_with_ea.py \
- --search_space mlp_sp \
- --num_layers 4 \
- --hidden_choice_len 20 \
- --epoch 19 \
- --batch_size=512 \
- --lr=0.001 \
- --iter_per_epoch=200 \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --base_dir ../exp_data/ \
- --dataset frappe \
- --num_labels 2 \
- --device=cpu \
- --log_folder baseline_frappe \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-
-############## uci dataset ##############
-python internal/ml/model_selection/exps/baseline/train_with_ea.py \
- --search_space mlp_sp \
- --num_layers 4 \
- --hidden_choice_len 20 \
- --epoch 0 \
- --batch_size=1024 \
- --lr=0.001 \
- --iter_per_epoch=200 \
- --nfeat=369 \
- --nfield=43 \
- --nemb=10 \
- --base_dir ../exp_data/ \
- --dataset uci_diabetes \
- --num_labels 2 \
- --device=cpu \
- --log_folder baseline_uci_diabetes \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-
-############## criteo dataset ##############
-python internal/ml/model_selection/exps/baseline/train_with_ea.py \
- --search_space mlp_sp \
- --num_layers 4 \
- --hidden_choice_len 10 \
- --epoch 9 \
- --batch_size=1024 \
- --lr=0.001 \
- --iter_per_epoch=2000 \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --base_dir ../exp_data/ \
- --dataset criteo \
- --num_labels 2 \
- --device=cpu \
- --log_folder baseline_criteo \
- --result_dir ./internal/ml/model_selection/exp_result/
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/benchmark_weight_sharing.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/benchmark_weight_sharing.sh
deleted file mode 100644
index d1c2db5b2a..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/benchmark_weight_sharing.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-python ./internal/ml/model_selection/exps/micro/resp/benchmark_weight_sharing.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cuda:0 \
- --batch_size=512 \
- --lr=0.001 \
- --epoch=20 \
- --iter_per_epoch=200 \
- --dataset=frappe \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --log_folder=log_frappe \
- --total_models_per_worker=-1 \
- --result_dir=./internal/ml/model_selection/exp_result/
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_embedding_cache.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_embedding_cache.sh
deleted file mode 100644
index e9068a2bdd..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_embedding_cache.sh
+++ /dev/null
@@ -1,138 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-
-# frappe
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=True \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cuda:0 \
- --batch_size=32 \
- --dataset=frappe \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/
-
-#criteo
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=True \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=10 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cuda:0 \
- --batch_size=32 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/
-
-# uci
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=True \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cuda:0 \
- --batch_size=32 \
- --dataset=uci_diabetes \
- --nfeat=369 \
- --nfield=43 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/
-
-########################## CPU ##############################
-# this is run on cpu, only change the device==cpu for all above
-
-# frappe
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=True \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=frappe \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/
-
-#criteo
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=True \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=10 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/
-
-# uci
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=True \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=uci_diabetes \
- --nfeat=369 \
- --nfield=43 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/
-
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_embedding_cache_concurrent.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_embedding_cache_concurrent.sh
deleted file mode 100644
index f7eb18954e..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_embedding_cache_concurrent.sh
+++ /dev/null
@@ -1,155 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-
-########################## CPU ##############################
-# this is run on cpu, only change the device==cpu for all above
-
-# frappe
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_concurrent.py \
- --concurrency=8 \
- --embedding_cache_filtering=True \
- --tfmem=express_flow \
- --models_explore=5000 \
- --log_name=score_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=frappe \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_current_filter_cache/ \
- --log_folder=log_score_time_frappe_cache
-
-#criteo
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_concurrent.py \
- --embedding_cache_filtering=True \
- --tfmem=express_flow \
- --models_explore=5000 \
- --log_name=score_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=10 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_current_filter_cache/ \
- --log_folder=log_score_time_frappe_cache
-
-# uci
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_concurrent.py \
- --embedding_cache_filtering=True \
- --tfmem=express_flow \
- --models_explore=5000 \
- --log_name=score_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=uci_diabetes \
- --nfeat=369 \
- --nfield=43 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_current_filter_cache/ \
- --log_folder=log_score_time_frappe_cache
-
-
-# here is concurrent run but no embedding cache
-#######################################################################################
-
-# frappe
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_concurrent.py \
- --tfmem=express_flow \
- --models_explore=5000 \
- --log_name=score_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=frappe \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_current_filter_no_cache/ \
- --log_folder=log_score_time_frappe_cache
-
-#criteo
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_concurrent.py \
- --tfmem=express_flow \
- --models_explore=5000 \
- --log_name=score_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=10 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_current_filter_no_cache/ \
- --log_folder=log_score_time_frappe_cache
-
-# uci
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_concurrent.py \
- --tfmem=express_flow \
- --models_explore=5000 \
- --log_name=score_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=uci_diabetes \
- --nfeat=369 \
- --nfield=43 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_filter_exp_current_filter_no_cachecache/ \
- --log_folder=log_score_time_frappe_cache
-
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase1_cpu_gpu.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase1_cpu_gpu.sh
deleted file mode 100644
index 65aabd3c85..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase1_cpu_gpu.sh
+++ /dev/null
@@ -1,227 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-
-
-# frappe
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=False \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cuda:0 \
- --batch_size=32 \
- --dataset=frappe \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
-
-#criteo
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=False \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=10 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cuda:0 \
- --batch_size=32 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
-
-# uci
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=False \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cuda:0 \
- --batch_size=32 \
- --dataset=uci_diabetes \
- --nfeat=369 \
- --nfield=43 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
-
-
-# cifar 10
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=False \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=nasbench201 \
- --api_loc=NAS-Bench-201-v1_1-096897.pth \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=10 \
- --device=cuda:0 \
- --batch_size=32 \
- --dataset=cifar10 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
-
-
-# cifar 100
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=False \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=nasbench201 \
- --api_loc=NAS-Bench-201-v1_1-096897.pth \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=100 \
- --device=cuda:0 \
- --batch_size=32 \
- --dataset=cifar100 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
-
-
-# imageNet
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=False \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=nasbench201 \
- --api_loc=NAS-Bench-201-v1_1-096897.pth \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=120 \
- --device=cuda:0 \
- --batch_size=32 \
- --dataset=ImageNet16-120 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
-
-########################## CPU ##############################
-# this is run on cpu, only change the device==cpu for all above
-
-# frappe
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=False \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=frappe \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
-
-# criteo
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=False \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=10 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
-
-# uci
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=False \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=uci_diabetes \
- --nfeat=369 \
- --nfield=43 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
-
-
-# cifar 10
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=False \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=nasbench201 \
- --api_loc=NAS-Bench-201-v1_1-096897.pth \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=10 \
- --device=cpu \
- --batch_size=32 \
- --dataset=cifar10 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
-
-
-# cifar 100
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=False \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=nasbench201 \
- --api_loc=NAS-Bench-201-v1_1-096897.pth \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=100 \
- --device=cpu \
- --batch_size=32 \
- --dataset=cifar100 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
-
-
-# imageNet
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency.py \
- --embedding_cache_filtering=False \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=nasbench201 \
- --api_loc=NAS-Bench-201-v1_1-096897.pth \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=120 \
- --device=cpu \
- --batch_size=32 \
- --dataset=ImageNet16-120 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_wo_cache/
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase1_in_db.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase1_in_db.sh
deleted file mode 100644
index 84406263d0..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase1_in_db.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-
-
-# frappe
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_sql.py \
- --embedding_cache_filtering=True \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=frappe \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_cache_sql/
-
-#criteo
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_sql.py \
- --embedding_cache_filtering=True \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=10 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_cache_sql/
-
-# uci
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_sql.py \
- --embedding_cache_filtering=True \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=uci_diabetes \
- --nfeat=369 \
- --nfield=43 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_cache_sql/
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase2.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase2.sh
deleted file mode 100644
index 84406263d0..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/latency_phase2.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-
-
-# frappe
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_sql.py \
- --embedding_cache_filtering=True \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=frappe \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_cache_sql/
-
-#criteo
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_sql.py \
- --embedding_cache_filtering=True \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=10 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_cache_sql/
-
-# uci
-python3 ./internal/ml/model_selection/exps/micro/benchmark_filtering_latency_sql.py \
- --embedding_cache_filtering=True \
- --tfmem=express_flow \
- --models_explore=5000 \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=uci_diabetes \
- --nfeat=369 \
- --nfield=43 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result_sever_cache_sql/
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_budget_aware_alg.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_budget_aware_alg.sh
deleted file mode 100644
index f91ae3ce1f..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_budget_aware_alg.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-
-############## c10 dataset ##############
-# run both 2phase-MS and training-free MS
-python internal/ml/model_selection/exps/micro/benchmark_budget_aware_alg.py \
- --search_space nasbench201 \
- --api_loc NAS-Bench-201-v1_1-096897.pth \
- --dataset cifar10 \
- --epoch 200 \
- --base_dir ../exp_data/ \
- --log_name logs_default \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-
-############## c100 dataset ##############
-python internal/ml/model_selection/exps/micro/benchmark_budget_aware_alg.py \
- --search_space nasbench201 \
- --api_loc NAS-Bench-201-v1_1-096897.pth \
- --dataset cifar100 \
- --epoch 200 \
- --base_dir ../exp_data/ \
- --log_name logs_default \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-
-############## imageNet dataset ##############
-python internal/ml/model_selection/exps/micro/benchmark_budget_aware_alg.py \
- --search_space nasbench201 \
- --api_loc NAS-Bench-201-v1_1-096897.pth \
- --dataset ImageNet16-120 \
- --epoch 200 \
- --base_dir ../exp_data/ \
- --log_name logs_default \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-
-
-############## draw graphs ##############
-python internal/ml/model_selection/exps/micro/draw_budget_aware_alg.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_nku_tradeoff.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_nku_tradeoff.sh
deleted file mode 100644
index 6aec7c1959..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_nku_tradeoff.sh
+++ /dev/null
@@ -1,179 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-
-
-# ====================================
-# ====================================
-# determine the K and U tradeoff
-# ====================================
-# ====================================
-# frappe
-python internal/ml/model_selection/exps/micro/benchmark_ku.py \
- --search_space mlp_sp \
- --epoch 20 \
- --hidden_choice_len 20 \
- --dataset frappe \
- --base_dir ../exp_data/ \
- --only_phase1 True \
- --is_simulate True \
- --log_folder log_ku_tradeoff
-
-
-# uci
-python internal/ml/model_selection/exps/micro/benchmark_ku.py \
- --search_space mlp_sp \
- --hidden_choice_len 20 \
- --epoch 5 \
- --dataset uci_diabetes \
- --base_dir ../exp_data/ \
- --only_phase1 True \
- --is_simulate True \
- --log_folder log_ku_tradeoff
-
-# criteo
-python internal/ml/model_selection/exps/micro/benchmark_ku.py \
- --search_space mlp_sp \
- --hidden_choice_len 10 \
- --epoch 10 \
- --dataset criteo \
- --base_dir ../exp_data/ \
- --only_phase1 True \
- --is_simulate True \
- --log_folder log_ku_tradeoff
-
-
-# c10
-python internal/ml/model_selection/exps/micro/benchmark_ku.py \
- --search_space nasbench201 \
- --api_loc NAS-Bench-201-v1_1-096897.pth \
- --epoch 200 \
- --dataset cifar10 \
- --base_dir ../exp_data/ \
- --only_phase1 True \
- --is_simulate True \
- --log_folder log_ku_tradeoff
-
-
-# c100
-python internal/ml/model_selection/exps/micro/benchmark_ku.py \
- --search_space nasbench201 \
- --api_loc NAS-Bench-201-v1_1-096897.pth \
- --epoch 200 \
- --dataset cifar100 \
- --base_dir ../exp_data/ \
- --only_phase1 True \
- --is_simulate True \
- --log_folder log_ku_tradeoff
-
-
-# imageNet
-python internal/ml/model_selection/exps/micro/benchmark_ku.py \
- --search_space nasbench201 \
- --api_loc NAS-Bench-201-v1_1-096897.pth \
- --epoch 200 \
- --dataset ImageNet16-120 \
- --base_dir ../exp_data/ \
- --only_phase1 True \
- --is_simulate True \
- --log_folder log_ku_tradeoff
-
-
-
-# ====================================
-# ====================================
-# determine the K and U tradeoff
-# ====================================
-# ====================================
-
-
-python internal/ml/model_selection/exps/micro/benchmark_nk.py \
- --search_space mlp_sp \
- --epoch 20 \
- --hidden_choice_len 20 \
- --dataset frappe \
- --base_dir ../exp_data/ \
- --only_phase1 True \
- --is_simulate True \
- --log_folder log_ku_tradeoff
-
-
-#uci
-python internal/ml/model_selection/exps/micro/benchmark_nk.py \
- --search_space mlp_sp \
- --hidden_choice_len 20 \
- --epoch 5 \
- --dataset uci_diabetes \
- --base_dir ../exp_data/ \
- --only_phase1 True \
- --is_simulate True \
- --log_folder log_ku_tradeoff
-
-
-# criteo
-python internal/ml/model_selection/exps/micro/benchmark_nk.py \
- --search_space mlp_sp \
- --hidden_choice_len 10 \
- --epoch 10 \
- --dataset criteo \
- --base_dir ../exp_data/ \
- --only_phase1 True \
- --is_simulate True \
- --log_folder log_ku_tradeoff
-
-
-
-# c10
-python internal/ml/model_selection/exps/micro/benchmark_nk.py \
- --search_space nasbench201 \
- --api_loc NAS-Bench-201-v1_1-096897.pth \
- --epoch 200 \
- --dataset cifar10 \
- --base_dir ../exp_data/ \
- --only_phase1 True \
- --is_simulate True \
- --log_folder log_ku_tradeoff
-
-
-# c100
-python internal/ml/model_selection/exps/micro/benchmark_nk.py \
- --search_space nasbench201 \
- --api_loc NAS-Bench-201-v1_1-096897.pth \
- --epoch 200 \
- --dataset cifar100 \
- --base_dir ../exp_data/ \
- --only_phase1 True \
- --is_simulate True \
- --log_folder log_ku_tradeoff
-
-
-# imageNet
-python internal/ml/model_selection/exps/micro/benchmark_nk.py \
- --search_space nasbench201 \
- --api_loc NAS-Bench-201-v1_1-096897.pth \
- --epoch 200 \
- --dataset ImageNet16-120 \
- --base_dir ../exp_data/ \
- --only_phase1 True \
- --is_simulate True \
- --log_folder log_ku_tradeoff
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_score_metrics_relation.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_score_metrics_relation.sh
deleted file mode 100644
index 3e55e9e3d8..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_score_metrics_relation.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-
-
-############## Frappe ##############
-# run both 2phase-MS and training-free MS
-python ./internal/ml/model_selection/exps/micro/benchmark_score_metrics.py \
- --tfmem=express_flow \
- --search_space mlp_sp \
- --dataset frappe \
- --base_dir ../exp_data/ \
- --log_name logs_default \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-
-############## criteo dataset ##############
-python ./internal/ml/model_selection/exps/micro/benchmark_score_metrics.py \
- --tfmem=express_flow \
- --search_space mlp_sp \
- --dataset criteo \
- --base_dir ../exp_data/ \
- --log_name logs_default \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-
-############## Uci dataset ##############
-python ./internal/ml/model_selection/exps/micro/benchmark_score_metrics.py \
- --tfmem=express_flow \
- --search_space=mlp_sp \
- --dataset uci_diabetes \
- --base_dir ../exp_data/ \
- --log_name logs_default \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-
-############## draw graphs ##############
-python ./internal/ml/model_selection/exps/micro/draw_score_metric_relation.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_search_strategy.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_search_strategy.sh
deleted file mode 100644
index 4b9f375666..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/micro_search_strategy.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-
-# rs
-python internal/ml/model_selection/exps/baseline/train_with_random.py \
- --search_space mlp_sp \
- --num_layers 4 \
- --hidden_choice_len 20 \
- --epoch 19 \
- --batch_size=512 \
- --lr=0.001 \
- --iter_per_epoch=200 \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --base_dir ../exp_data/ \
- --dataset frappe \
- --num_labels 2 \
- --device=cpu \
- --log_folder baseline_frappe \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-
-# rl
-python internal/ml/model_selection/exps/baseline/train_with_rl.py
-
-
-# re
-python internal/ml/model_selection/exps/baseline/train_with_ea.py \
- --search_space mlp_sp \
- --num_layers 4 \
- --hidden_choice_len 20 \
- --epoch 19 \
- --batch_size=512 \
- --lr=0.001 \
- --iter_per_epoch=200 \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --base_dir ../exp_data/ \
- --dataset frappe \
- --num_labels 2 \
- --device=cpu \
- --log_folder baseline_frappe \
- --result_dir ./internal/ml/model_selection/exp_result/
-
-# bohb
-python internal/ml/model_selection/exps/baseline/train_bohb.py
-
-############## draw the graph ##############
-python internal/ml/model_selection/exps/baseline/draw_benchmark_train_based.py --dataset frappe
-
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/convert_api_2_json.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/convert_api_2_json.sh
deleted file mode 100644
index 8d71ff283b..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/convert_api_2_json.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-# pip install nats_bench
-
-python internal/ml/model_selection/exps/nas_bench_img/0_characterize_gt.py
-python internal/ml/model_selection/exps/nas_bench_img/0_parse_testacc_101.py
-python internal/ml/model_selection/exps/nas_bench_img/0_parse_testacc_201.py
-
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/explore_all_models.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/explore_all_models.sh
deleted file mode 100644
index aea5ff9e79..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/explore_all_models.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-# cifar10 + nb101
-python ./internal/ml/model_selection/exps/nas_bench_img/1_explore_models_100_run.py \
- --search_space=nasbench101 \
- --api_loc=nasbench_only108.pkl \
- --base_dir=../exp_data/ \
- --dataset=cifar10 \
- --num_labels=10 \
- --device=cpu \
- --log_folder=log_img_explore_ea \
- --result_dir=./internal/ml/model_selection/exp_result/
-
-
-# cifar10 + nb201
-python ./internal/ml/model_selection/exps/nas_bench_img/1_explore_models_100_run.py \
- --search_space=nasbench201 \
- --api_loc=NAS-Bench-201-v1_1-096897.pth \
- --base_dir=../exp_data/ \
- --dataset=cifar10 \
- --init_channels=16 \
- --num_stacks=3 \
- --num_modules_per_stack=3 \
- --num_labels=10 \
- --device=cpu \
- --log_folder=log_img_explore_ea \
- --result_dir=./internal/ml/model_selection/exp_result/
-
-
-# cifar100 + nb201
-python ./internal/ml/model_selection/exps/nas_bench_img/1_explore_models_100_run.py \
- --search_space=nasbench201 \
- --api_loc=NAS-Bench-201-v1_1-096897.pth \
- --base_dir=../exp_data/ \
- --dataset=cifar100 \
- --init_channels=16 \
- --num_stacks=3 \
- --num_modules_per_stack=3 \
- --num_labels=100 \
- --device=cpu \
- --log_folder=log_img_explore_ea \
- --result_dir=./internal/ml/model_selection/exp_result/
-
-
-# imgnet + nb201
-python ./internal/ml/model_selection/exps/nas_bench_img/1_explore_models_100_run.py \
- --search_space=nasbench201 \
- --api_loc=NAS-Bench-201-v1_1-096897.pth \
- --base_dir=../exp_data/ \
- --dataset=ImageNet16-120 \
- --init_channels=16 \
- --num_stacks=3 \
- --num_modules_per_stack=3 \
- --num_labels=120 \
- --device=cpu \
- --log_folder=log_img_explore_ea \
- --result_dir=./internal/ml/model_selection/exp_result/
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/score_all_models.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/score_all_models.sh
deleted file mode 100644
index 1e7e9bf3b1..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-img/score_all_models.sh
+++ /dev/null
@@ -1,75 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-
-
-for i in {1..4}
-do
- # cifar10 + nb101
-# /home/xingnaili/miniconda3/envs/trails/bin/python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
-# --models_explore=1200 \
-# --search_space=nasbench101 \
-# --api_loc=nasbench_only108.pkl \
-# --base_dir=/hdd1/xingnaili/exp_data/ \
-# --dataset=cifar10 \
-# --batch_size=32 \
-# --num_labels=10 \
-# --device=cuda:0 \
-# --log_folder=log_score_all_img10_101 \
-# --result_dir=./internal/ml/model_selection/exp_result/
-
- # cifar10 + nb201
- /home/xingnaili/miniconda3/envs/trails/bin/python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
- --models_explore=1200 \
- --search_space=nasbench201 \
- --api_loc=NAS-Bench-201-v1_1-096897.pth \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --dataset=cifar10 \
- --batch_size=32 \
- --num_labels=10 \
- --device=cpu \
- --log_folder=log_score_all_img10 \
- --result_dir=./internal/ml/model_selection/exp_result/
-
- # cifar100 + nb201
- /home/xingnaili/miniconda3/envs/trails/bin/python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
- --models_explore=1200 \
- --search_space=nasbench201 \
- --api_loc=NAS-Bench-201-v1_1-096897.pth \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --dataset=cifar100 \
- --batch_size=32 \
- --num_labels=100 \
- --device=cpu \
- --log_folder=log_score_all_img100 \
- --result_dir=./internal/ml/model_selection/exp_result/
-
- # imgnet + nb201
- /home/xingnaili/miniconda3/envs/trails/bin/python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
- --models_explore=1200 \
- --search_space=nasbench201 \
- --api_loc=NAS-Bench-201-v1_1-096897.pth \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --dataset=ImageNet16-120 \
- --batch_size=32 \
- --num_labels=120 \
- --device=cpu \
- --log_folder=log_score_all_img_imgnet \
- --result_dir=./internal/ml/model_selection/exp_result/
-done
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_criteo.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_criteo.sh
deleted file mode 100644
index 004ecb1a6e..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_criteo.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-
-
-nohup python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
- --embedding_cache_filtering=True \
- --models_explore=9999 \
- --tfmem=express_flow \
- --log_name=score_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=10 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --log_folder=log_score_time_criteo > outputCriScorAll.log&
-
-
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_frappe.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_frappe.sh
deleted file mode 100644
index 81d4ff12a1..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_frappe.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-nohup python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
- --embedding_cache_filtering=True \
- --models_explore=159999 \
- --tfmem=express_flow \
- --log_name=score_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=frappe \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --log_folder=log_score_time_frappe > output.log&
-
-
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo.sh
deleted file mode 100644
index 3d11671c8d..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo.sh
+++ /dev/null
@@ -1,63 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-worker_id=0
-GPU_NUM=9
-worker_each_gpu=6
-total_workers=$((worker_each_gpu*GPU_NUM))
-
-for((gpu_id=0; gpu_id < GPU_NUM; ++gpu_id)); do
-# echo "GPU id is $gpu_id"
- for((i=0; i < worker_each_gpu; ++i)); do
- echo "Assign task to worker id is $worker_id"
- echo "nohup python ./internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_online.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=10 \
- --base_dir=../exp_data/ \
- --num_labels=2 \
- --device=cuda:$gpu_id \
- --batch_size=1024 \
- --lr=0.001 \
- --epoch=10 \
- --iter_per_epoch=2000 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --worker_id=$worker_id \
- --total_workers=$total_workers \
- --workers=0 \
- --log_folder=log_train_criteo \
- --total_models_per_worker=-1 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --pre_partitioned_file=./internal/ml/model_selection/exps/nas_bench_tabular/sampled_models_10000_models.json & ">> train_all_models_criteo_seq.sh
-
-# sleep 1
- worker_id=$((worker_id+1))
- done
-done
-
-
-# pkill -9 -f 2.seq_train_online.py
-# run with bash internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo.sh >criteobash &
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo_distirbuted.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo_distirbuted.sh
deleted file mode 100644
index 39e7701240..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_criteo_distirbuted.sh
+++ /dev/null
@@ -1,64 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# frappe
-python exps/main_v2/ground_truth/2.seq_train_dist_online.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=../exp_data/ \
- --num_labels=1 \
- --device=gpu \
- --batch_size=1024 \
- --lr=0.001 \
- --epoch=10 \
- --iter_per_epoch=100 \
- --dataset=frappe \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --total_models_per_worker=10 \
- --workers=0 \
- --worker_each_gpu=1 \
- --gpu_num=8 \
- --log_folder=LogFrappee \
- --pre_partitioned_file=./exps/main_v2/ground_truth/sampled_models_10000_models.json &
-
-# criteo
-python exps/main_v2/ground_truth/2.seq_train_dist_online.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=10 \
- --base_dir=../exp_data/ \
- --num_labels=1 \
- --device=gpu \
- --batch_size=1024 \
- --lr=0.001 \
- --epoch=10 \
- --iter_per_epoch=2000 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --worker_each_gpu=9 \
- --gpu_num=8 \
- --log_folder=LogCriteo \
- --pre_partitioned_file=./exps/main_v2/ground_truth/sampled_models_10000_models.json &
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_diabetes.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_diabetes.sh
deleted file mode 100644
index 397836405e..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_diabetes.sh
+++ /dev/null
@@ -1,63 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-worker_id=0
-GPU_NUM=8
-worker_each_gpu=4
-total_workers=$((worker_each_gpu*GPU_NUM))
-
-for((gpu_id=0; gpu_id < GPU_NUM; ++gpu_id)); do
- for((i=0; i < worker_each_gpu; ++i)); do
-
- echo "nohup python ./internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_online.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=../exp_data/ \
- --num_labels=2 \
- --device=cuda:$gpu_id \
- --batch_size=1024 \
- --lr=0.001 \
- --epoch=40 \
- --iter_per_epoch=200 \
- --dataset=uci_diabetes \
- --nfeat=369 \
- --nfield=43 \
- --nemb=10 \
- --worker_id=$worker_id \
- --total_workers=$total_workers \
- --workers=0 \
- --log_folder=log_train_uci \
- --total_models_per_worker=-1 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --pre_partitioned_file=./internal/ml/model_selection/exps/nas_bench_tabular/uci_left_8k_models.json > outputuci.log& ">> train_all_models_diabetes_seq.sh
-
- worker_id=$((worker_id+1))
- done
-done
-
-
-# pkill -9 -f ./internal/ml/model_selection/exps/nas_bench_tabular//2.seq_train_online.py
-# pkill -9 -f /home/naili/miniconda3/envs/firmest_torch11/bin/python
-
-# run with bash internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_diabetes.sh >ucibash &
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_frappe.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_frappe.sh
deleted file mode 100644
index 8d4af9eac6..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_frappe.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-worker_id=0
-GPU_NUM=8
-worker_each_gpu=16
-total_workers=$((worker_each_gpu*GPU_NUM))
-
-for((gpu_id=0; gpu_id < GPU_NUM; ++gpu_id)); do
-# echo "GPU id is $gpu_id"
- for((i=0; i < worker_each_gpu; ++i)); do
- echo "nohup python ./internal/ml/model_selection/exps/nas_bench_tabular/2.seq_train_online.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/home/shaofeng/naili/firmest_data/ \
- --num_labels=2 \
- --device=cuda:$gpu_id \
- --batch_size=512 \
- --lr=0.001 \
- --epoch=20 \
- --iter_per_epoch=200 \
- --dataset=frappe \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --worker_id=$worker_id \
- --total_workers=$total_workers \
- --workers=0 \
- --log_folder=log_frappe \
- --total_models_per_worker=-1 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --pre_partitioned_file=./internal/ml/model_selection/exps/nas_bench_tabular/sampled_models_all.json & ">> train_all_models_frappe_seq.sh
-
- sleep 1
- worker_id=$((worker_id+1))
- done
-done
-
-
-# pkill -9 -f internal/ml/model_selection/scripts/nas-bench-tabular/train_all_models_frappe.sh
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_one_model_dev.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_one_model_dev.sh
deleted file mode 100644
index 86e36c2f51..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_one_model_dev.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-
-python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cuda:0 \
- --batch_size=512 \
- --lr=0.001 \
- --epoch=20 \
- --iter_per_epoch=200 \
- --dataset=frappe \
- --nfeat=5500 \
- --nfield=10 \
- --nemb=10 \
- --worker_id=0 \
- --total_workers=1 \
- --workers=1 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --log_folder=log_frappe
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_params_tune_criteo.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_params_tune_criteo.sh
deleted file mode 100644
index a3ea087907..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_params_tune_criteo.sh
+++ /dev/null
@@ -1,162 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-
-# default setting.
-python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --base_dir=../exp_data/ \
- --num_labels=2 \
- --device=cuda:0 \
- --batch_size=1024 \
- --lr=0.001 \
- --epoch=5 \
- --iter_per_epoch=2000 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --log_folder=log_criteo_train_tune >criteo_5.log &
-
-
-python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --base_dir=../exp_data/ \
- --num_labels=2 \
- --device=cuda:0 \
- --batch_size=1024 \
- --lr=0.001 \
- --epoch=10 \
- --iter_per_epoch=2000 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --log_folder=log_criteo_train_tune >criteo_10.log &
-
-
-
-python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --base_dir=../exp_data/ \
- --num_labels=2 \
- --device=cuda:1 \
- --batch_size=1024 \
- --lr=0.001 \
- --epoch=20 \
- --iter_per_epoch=2000 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --log_folder=log_criteo_train_tune >criteo_20.log &
-
-
-
-
-python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --base_dir=../exp_data/ \
- --num_labels=2 \
- --device=cuda:2 \
- --batch_size=1024 \
- --lr=0.001 \
- --epoch=40 \
- --iter_per_epoch=2000 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --log_folder=log_criteo_train_tune >criteo_40.log &
-
-
-
-python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --base_dir=../exp_data/ \
- --num_labels=2 \
- --device=cuda:3 \
- --batch_size=1024 \
- --lr=0.001 \
- --epoch=60 \
- --iter_per_epoch=2000 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --log_folder=log_criteo_train_tune >criteo_60.log &
-
-
-
-python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --base_dir=../exp_data/ \
- --num_labels=2 \
- --device=cuda:4 \
- --batch_size=1024 \
- --lr=0.001 \
- --epoch=80 \
- --iter_per_epoch=2000 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --log_folder=log_criteo_train_tune >criteo_80.log &
-
-
-
-python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --base_dir=../exp_data/ \
- --num_labels=2 \
- --device=cuda:5 \
- --batch_size=1024 \
- --lr=0.001 \
- --epoch=100 \
- --iter_per_epoch=2000 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --log_folder=log_criteo_train_tune >criteo_100.log &
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_params_tune_diabetes.sh b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_params_tune_diabetes.sh
deleted file mode 100644
index 697816e241..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/train_params_tune_diabetes.sh
+++ /dev/null
@@ -1,86 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-nohup python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=../exp_data/ \
- --num_labels=2 \
- --device=cuda:0 \
- --batch_size=1024 \
- --lr=0.001 \
- --epoch=3 \
- --iter_per_epoch=200 \
- --dataset=uci_diabetes \
- --nfeat=369 \
- --nfield=43 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --log_folder=log_uci_train_tune >uci_3.log &
-
-
-
-nohup python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=../exp_data/ \
- --num_labels=2 \
- --device=cuda:1 \
- --batch_size=1024 \
- --lr=0.001 \
- --epoch=5 \
- --iter_per_epoch=200 \
- --dataset=uci_diabetes \
- --nfeat=369 \
- --nfield=43 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --log_folder=log_uci_train_tune >uci_5.log &
-
-
-# default setting.
-nohup python ./internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py \
- --log_name=baseline_train_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=../exp_data/ \
- --num_labels=2 \
- --device=cuda:2 \
- --batch_size=1024 \
- --lr=0.001 \
- --epoch=7 \
- --iter_per_epoch=200 \
- --dataset=uci_diabetes \
- --nfeat=369 \
- --nfield=43 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --log_folder=log_uci_train_tune >uci_7.log &
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/__init__.py
deleted file mode 100644
index 4e04c2b3b1..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/vote.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/vote.py
deleted file mode 100644
index 1f1ee1f39d..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/vote.py
+++ /dev/null
@@ -1,133 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from src.eva_engine.phase1.algo.alg_base import Evaluator
-from .utils.autograd_hacks import *
-from src.common.constant import Config
-
-class VoteEvaluator(Evaluator):
-
- def __init__(self):
- super().__init__()
-
- def evaluate(self, arch: nn.Module, device, batch_data: object, batch_labels: torch.Tensor, space_name: str) -> float:
- """
- This is simply sum over all weigth's norm to calculate models performance
- :param arch:
- :param device: CPU or GPU
- :param batch_data:
- :param batch_labels:
- :return:
- """
-
- pass
-
-
-def vote_between_two_arch(arch1_info: dict, arch2_info: dict, metric: list, space: str):
- """
- Return which architecture is better,
- :param arch1_info:
- :param arch2_info:
- :param metric:
- :param space:
- :return:
- """
- left_vote = 0
- right_vote = 0
- for m_name in metric:
- # if this metrics vote to left
- if vote_to_left[space](m_name,
- float(arch1_info["scores"][m_name]["score"]),
- float(arch2_info["scores"][m_name]["score"])):
- left_vote += 1
- else:
- right_vote += 1
-
- if left_vote > right_vote:
- return arch1_info["architecture_id"]
- else:
- return arch2_info["architecture_id"]
-
-
-def compare_score_201(m_name: str, s1: float, s2: float) -> bool:
- """
- Return if s1 is better than s2,
- :param m_name:
- :param s1:
- :param s2:
- :return: if s1 is better than s2
- """
- if m_name == "grad_norm":
- return s1 > s2
- if m_name == "grad_plain":
- return s1 < s2
- if m_name == "ntk_cond_num":
- return s1 < s2
- if m_name == "ntk_trace":
- return s1 > s2
- if m_name == "ntk_trace_approx":
- return s1 > s2
- if m_name == "fisher":
- return s1 > s2
- if m_name == "grasp":
- return s1 > s2
- if m_name == "snip":
- return s1 > s2
- if m_name == "synflow":
- return s1 > s2
- if m_name == "weight_norm":
- return s1 > s2
- if m_name == "nas_wot":
- return s1 > s2
-
-
-def compare_score_101(m_name: str, s1: float, s2: float) -> bool:
- """
- Return if s1 is better than s2,
- :param m_name:
- :param s1:
- :param s2:
- :return: if s1 is better than s2
- """
- if m_name == "grad_norm":
- return s1 < s2
- if m_name == "grad_plain":
- return s1 < s2
- if m_name == "ntk_cond_num":
- return s1 < s2
- if m_name == "ntk_trace":
- return s1 < s2
- if m_name == "ntk_trace_approx":
- return s1 < s2
- if m_name == "fisher":
- return s1 < s2
- if m_name == "grasp":
- return s1 > s2
- if m_name == "snip":
- return s1 < s2
- if m_name == "synflow":
- return s1 > s2
- if m_name == "weight_norm":
- return s1 > s2
- if m_name == "nas_wot":
- return s1 > s2
-
-
-vote_to_left = {}
-vote_to_left["101"] = compare_score_101
-vote_to_left["201"] = compare_score_201
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/__init__.py
deleted file mode 100644
index 3df60b02f7..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/__init__.py
deleted file mode 100644
index 4e04c2b3b1..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/core/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/core/__init__.py
deleted file mode 100644
index 3df60b02f7..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/core/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/__init__.py
deleted file mode 100644
index 3df60b02f7..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/__init__.py
deleted file mode 100644
index 3df60b02f7..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/genotypes.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/genotypes.py
deleted file mode 100644
index fa94001867..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/genotypes.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from collections import namedtuple
-
-
-Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
-
-PRIMITIVES = [
- 'none',
- 'max_pool_3x3',
- 'avg_pool_3x3',
- 'skip_connect',
- 'sep_conv_3x3',
- 'sep_conv_5x5',
- 'dil_conv_3x3',
- 'dil_conv_5x5'
-]
-
-NUM_VERTICES = 4
-NUM_OPS = 7
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/model.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/model.py
deleted file mode 100644
index f8be9a9dd2..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/model.py
+++ /dev/null
@@ -1,308 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from operations import *
-from .utils import drop_path
-
-
-class Cell(nn.Module):
-
- def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
- super(Cell, self).__init__()
- # print(C_prev_prev, C_prev, C)
-
- if reduction_prev:
- self.preprocess0 = FactorizedReduce(C_prev_prev, C)
- else:
- self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
- self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
-
- if reduction:
- op_names, indices = zip(*genotype.reduce)
- concat = genotype.reduce_concat
- else:
- op_names, indices = zip(*genotype.normal)
- concat = genotype.normal_concat
- self._compile(C, op_names, indices, concat, reduction)
-
- def _compile(self, C, op_names, indices, concat, reduction):
- assert len(op_names) == len(indices)
- self._steps = len(op_names) // 2
- self._concat = concat
- self.multiplier = len(concat)
-
- self._ops = nn.ModuleList()
- for name, index in zip(op_names, indices):
- stride = 2 if reduction and index < 2 else 1
- op = OPS[name](C, stride, True)
- self._ops += [op]
- self._indices = indices
-
- def forward(self, s0, s1, drop_prob):
- s0 = self.preprocess0(s0)
- s1 = self.preprocess1(s1)
-
- states = [s0, s1]
- for i in range(self._steps):
- h1 = states[self._indices[2 * i]]
- h2 = states[self._indices[2 * i + 1]]
- op1 = self._ops[2 * i]
- op2 = self._ops[2 * i + 1]
- h1 = op1(h1)
- h2 = op2(h2)
- if self.training and drop_prob > 0.:
- if not isinstance(op1, Identity):
- h1 = drop_path(h1, drop_prob)
- if not isinstance(op2, Identity):
- h2 = drop_path(h2, drop_prob)
- s = h1 + h2
- states += [s]
- return torch.cat([states[i] for i in self._concat], dim=1)
-
-
-class AuxiliaryHeadCIFAR(nn.Module):
-
- def __init__(self, C, num_classes):
- """assuming input size 8x8"""
- super(AuxiliaryHeadCIFAR, self).__init__()
- self.features = nn.Sequential(
- nn.ReLU(inplace=True),
- # image size = 2 x 2
- nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False),
- nn.Conv2d(C, 128, 1, bias=False),
- nn.BatchNorm2d(128),
- nn.ReLU(inplace=True),
- nn.Conv2d(128, 768, 2, bias=False),
- nn.BatchNorm2d(768),
- nn.ReLU(inplace=True)
- )
- self.classifier = nn.Linear(768, num_classes)
-
- def forward(self, x):
- x = self.features(x)
- x = self.classifier(x.view(x.size(0), -1))
- return x
-
-
-class AuxiliaryHeadTinyImageNet(nn.Module):
-
- def __init__(self, C, num_classes):
- """assuming input size 8x8"""
- super(AuxiliaryHeadTinyImageNet, self).__init__()
- self.features = nn.Sequential(
- nn.ReLU(inplace=False),
- # image size = 2 x 2
- nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False),
- nn.Conv2d(C, 128, 1, bias=False),
- nn.BatchNorm2d(128),
- nn.ReLU(inplace=False),
- nn.Conv2d(128, 768, 2, bias=False),
- nn.BatchNorm2d(768),
- nn.ReLU(inplace=False)
- )
- self.classifier = nn.Linear(768, num_classes)
-
- def forward(self, x):
- x = self.features(x)
- x = self.classifier(x.view(x.size(0), -1))
- return x
-
-
-class AuxiliaryHeadImageNet(nn.Module):
-
- def __init__(self, C, num_classes):
- """assuming input size 14x14"""
- super(AuxiliaryHeadImageNet, self).__init__()
- self.features = nn.Sequential(
- nn.ReLU(inplace=True),
- nn.AvgPool2d(5, stride=2, padding=0, count_include_pad=False),
- nn.Conv2d(C, 128, 1, bias=False),
- nn.BatchNorm2d(128),
- nn.ReLU(inplace=True),
- nn.Conv2d(128, 768, 2, bias=False),
- # NOTE: This batchnorm was omitted in my earlier implementation due to a typo.
- # Commenting it out for consistency with the experiments in the paper.
- # nn.BatchNorm2d(768),
- nn.ReLU(inplace=True)
- )
- self.classifier = nn.Linear(768, num_classes)
-
- def forward(self, x):
- x = self.features(x)
- x = self.classifier(x.view(x.size(0), -1))
- return x
-
-
-class NetworkCIFAR(nn.Module):
-
- def __init__(self, C, num_classes, layers, auxiliary, genotype):
- super(NetworkCIFAR, self).__init__()
- self._layers = layers
- self._auxiliary = auxiliary
-
- stem_multiplier = 3
- C_curr = stem_multiplier * C
- self.stem = nn.Sequential(
- nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
- nn.BatchNorm2d(C_curr)
- )
-
- C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
- self.cells = nn.ModuleList()
- reduction_prev = False
- for i in range(layers):
- if i in [layers // 3, 2 * layers // 3]:
- C_curr *= 2
- reduction = True
- else:
- reduction = False
- cell = Cell(genotype, C_prev_prev, C_prev,
- C_curr, reduction, reduction_prev)
- reduction_prev = reduction
- self.cells += [cell]
- C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
- if i == 2 * layers // 3:
- C_to_auxiliary = C_prev
-
- if auxiliary:
- self.auxiliary_head = AuxiliaryHeadCIFAR(
- C_to_auxiliary, num_classes)
- self.global_pooling = nn.AdaptiveAvgPool2d(1)
- self.classifier = nn.Linear(C_prev, num_classes)
-
- def forward(self, input):
- logits_aux = None
- s0 = s1 = self.stem(input)
- for i, cell in enumerate(self.cells):
- s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
- if i == 2 * self._layers // 3:
- if self._auxiliary and self.training:
- logits_aux = self.auxiliary_head(s1)
- out = self.global_pooling(s1)
- logits = self.classifier(out.view(out.size(0), -1))
- return logits, logits_aux
-
-
-class NetworkTinyImageNet(nn.Module):
-
- def __init__(self, C, num_classes, layers, auxiliary, genotype):
- super(NetworkTinyImageNet, self).__init__()
- self._layers = layers
- self._auxiliary = auxiliary
-
- stem_multiplier = 3
- C_curr = stem_multiplier * C
- self.stem = nn.Sequential(
- nn.Conv2d(3, C_curr, 3, stride=2, padding=1, bias=False),
- nn.BatchNorm2d(C_curr)
- )
-
- C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
- self.cells = nn.ModuleList()
- reduction_prev = False
- for i in range(layers):
- if i in [layers // 3, 2 * layers // 3]:
- C_curr *= 2
- reduction = True
- else:
- reduction = False
- cell = Cell(genotype, C_prev_prev, C_prev,
- C_curr, reduction, reduction_prev)
- reduction_prev = reduction
- self.cells += [cell]
- C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
- if i == 2 * layers // 3:
- C_to_auxiliary = C_prev
-
- if auxiliary:
- self.auxiliary_head = AuxiliaryHeadCIFAR(
- C_to_auxiliary, num_classes)
- self.global_pooling = nn.AdaptiveAvgPool2d(1)
- self.classifier = nn.Linear(C_prev, num_classes)
-
- def forward(self, input):
- logits_aux = None
- s0 = s1 = self.stem(input)
- for i, cell in enumerate(self.cells):
- s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
- if i == 2 * self._layers // 3:
- if self._auxiliary and self.training:
- logits_aux = self.auxiliary_head(s1)
- out = self.global_pooling(s1)
- logits = self.classifier(out.view(out.size(0), -1))
- return logits, logits_aux
-
-
-class NetworkImageNet(nn.Module):
-
- def __init__(self, C, num_classes, layers, auxiliary, genotype):
- super(NetworkImageNet, self).__init__()
- self._layers = layers
- self._auxiliary = auxiliary
-
- self.stem0 = nn.Sequential(
- nn.Conv2d(3, C // 2, kernel_size=3,
- stride=2, padding=1, bias=False),
- nn.BatchNorm2d(C // 2),
- nn.ReLU(inplace=True),
- nn.Conv2d(C // 2, C, 3, stride=2, padding=1, bias=False),
- nn.BatchNorm2d(C),
- )
-
- self.stem1 = nn.Sequential(
- nn.ReLU(inplace=True),
- nn.Conv2d(C, C, 3, stride=2, padding=1, bias=False),
- nn.BatchNorm2d(C),
- )
-
- C_prev_prev, C_prev, C_curr = C, C, C
-
- self.cells = nn.ModuleList()
- reduction_prev = True
- for i in range(layers):
- if i in [layers // 3, 2 * layers // 3]:
- C_curr *= 2
- reduction = True
- else:
- reduction = False
- cell = Cell(genotype, C_prev_prev, C_prev,
- C_curr, reduction, reduction_prev)
- reduction_prev = reduction
- self.cells += [cell]
- C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
- if i == 2 * layers // 3:
- C_to_auxiliary = C_prev
-
- if auxiliary:
- self.auxiliary_head = AuxiliaryHeadImageNet(
- C_to_auxiliary, num_classes)
- self.global_pooling = nn.AvgPool2d(7)
- self.classifier = nn.Linear(C_prev, num_classes)
-
- def forward(self, input):
- logits_aux = None
- s0 = self.stem0(input)
- s1 = self.stem1(s0)
- for i, cell in enumerate(self.cells):
- s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
- if i == 2 * self._layers // 3:
- if self._auxiliary and self.training:
- logits_aux = self.auxiliary_head(s1)
- out = self.global_pooling(s1)
- logits = self.classifier(out.view(out.size(0), -1))
- return logits, logits_aux
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/util_convert.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/util_convert.py
deleted file mode 100644
index fee5905301..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/darts_lib/util_convert.py
+++ /dev/null
@@ -1,126 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from scipy.special import softmax
-from .genotypes import *
-
-
-def genotype(weights, steps=4, multiplier=4):
- def _parse(weights):
- gene = []
- n = 2
- start = 0
- for i in range(steps):
- end = start + n
- W = weights[start:end].copy()
- edges = sorted(range(i + 2), key=lambda x: -max(
- W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
- for j in edges:
- k_best = None
- for k in range(len(W[j])):
- if k != PRIMITIVES.index('none'):
- if k_best is None or W[j][k] > W[j][k_best]:
- k_best = k
- gene.append((PRIMITIVES[k_best], j))
- start = end
- n += 1
- return gene
-
- gene_normal = _parse(softmax(weights[0], axis=-1))
- gene_reduce = _parse(softmax(weights[1], axis=-1))
-
- concat = range(2 + steps - multiplier, steps + 2)
- genotype = Genotype(
- normal=gene_normal, normal_concat=concat,
- reduce=gene_reduce, reduce_concat=concat
- )
- return genotype
-
-
-# from naslib
-def convert_genotype_to_compact(genotype):
- """Converts Genotype to the compact representation"""
- OPS = [
- "max_pool_3x3",
- "avg_pool_3x3",
- "skip_connect",
- "sep_conv_3x3",
- "sep_conv_5x5",
- "dil_conv_3x3",
- "dil_conv_5x5",
- ]
- compact = []
-
- for i, cell_type in enumerate(["normal", "reduce"]):
- cell = eval("genotype." + cell_type)
- compact.append([])
-
- for j in range(8):
- compact[i].append((cell[j][1], OPS.index(cell[j][0])))
-
- compact_tuple = (tuple(compact[0]), tuple(compact[1]))
- return compact_tuple
-
-
-# from naslib
-def convert_compact_to_genotype(compact):
- """Converts the compact representation to a Genotype"""
- OPS = [
- "max_pool_3x3",
- "avg_pool_3x3",
- "skip_connect",
- "sep_conv_3x3",
- "sep_conv_5x5",
- "dil_conv_3x3",
- "dil_conv_5x5",
- ]
- genotype = []
-
- for i in range(2):
- cell = compact[i]
- genotype.append([])
-
- for j in range(8):
- genotype[i].append((OPS[cell[j][1]], cell[j][0]))
-
- return Genotype(
- normal=genotype[0],
- normal_concat=[2, 3, 4, 5],
- reduce=genotype[1],
- reduce_concat=[2, 3, 4, 5],
- )
- # TODO: need to check with Colin and/or Arber
- # return Genotype(
- # normal = genotype[0],
- # normal_concat = [2, 3, 4, 5, 6],
- # reduce = genotype[1],
- # reduce_concat = [4, 5, 6]
- # )
-
-
-# from naslib
-def make_compact_mutable(compact):
- # convert tuple to list so that it is mutable
- arch_list = []
- for cell in compact:
- arch_list.append([])
- for pair in cell:
- arch_list[-1].append([])
- for num in pair:
- arch_list[-1][-1].append(num)
- return arch_list
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/__init__.py
deleted file mode 100644
index 3df60b02f7..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/graph_util.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/graph_util.py
deleted file mode 100644
index b3e8194f0c..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/graph_util.py
+++ /dev/null
@@ -1,168 +0,0 @@
-# Copyright 2019 The Google Research Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Utility functions used by generate_graph.py."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import hashlib
-import itertools
-
-import numpy as np
-
-
-def gen_is_edge_fn(bits):
- """Generate a boolean function for the edge connectivity.
-
- Given a bitstring FEDCBA and a 4x4 matrix, the generated matrix is
- [[0, A, B, D],
- [0, 0, C, E],
- [0, 0, 0, F],
- [0, 0, 0, 0]]
-
- Note that this function is agnostic to the actual matrix dimension due to
- order in which elements are filled out (column-major, starting from least
- significant bit). For example, the same FEDCBA bitstring (0-padded) on a 5x5
- matrix is
- [[0, A, B, D, 0],
- [0, 0, C, E, 0],
- [0, 0, 0, F, 0],
- [0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0]]
-
- Args:
- bits: integer which will be interpreted as a bit mask.
-
- Returns:
- vectorized function that returns True when an edge is present.
- """
-
- def is_edge(x, y):
- """Is there an edge from x to y (0-indexed)?"""
- if x >= y:
- return 0
- # Map x, y to index into bit string
- index = x + (y * (y - 1) // 2)
- return (bits >> index) % 2 == 1
-
- return np.vectorize(is_edge)
-
-
-def is_full_dag(matrix):
- """Full DAG == all vertices on a path from vert 0 to (V-1).
-
- i.e. no disconnected or "hanging" vertices.
-
- It is sufficient to check for:
- 1) no rows of 0 except for row V-1 (only output vertex has no out-edges)
- 2) no cols of 0 except for col 0 (only input vertex has no in-edges)
-
- Args:
- matrix: V x V upper-triangular adjacency matrix
-
- Returns:
- True if the there are no dangling vertices.
- """
- shape = np.shape(matrix)
-
- rows = matrix[:shape[0] - 1, :] == 0
- rows = np.all(rows, axis=1) # Any row with all 0 will be True
- rows_bad = np.any(rows)
-
- cols = matrix[:, 1:] == 0
- cols = np.all(cols, axis=0) # Any col with all 0 will be True
- cols_bad = np.any(cols)
-
- return (not rows_bad) and (not cols_bad)
-
-
-def num_edges(matrix):
- """Computes number of edges in adjacency matrix."""
- return np.sum(matrix)
-
-
-def hash_module(matrix, labeling):
- """Computes a graph-invariance MD5 hash of the matrix and label pair.
-
- Args:
- matrix: np.ndarray square upper-triangular adjacency matrix.
- labeling: list of int labels of length equal to both dimensions of
- matrix.
-
- Returns:
- MD5 hash of the matrix and labeling.
- """
- vertices = np.shape(matrix)[0]
- in_edges = np.sum(matrix, axis=0).tolist()
- out_edges = np.sum(matrix, axis=1).tolist()
-
- assert len(in_edges) == len(out_edges) == len(labeling)
- hashes = list(zip(out_edges, in_edges, labeling))
- hashes = [hashlib.md5(str(h).encode('utf-8')).hexdigest() for h in hashes]
- # Computing this up to the diameter is probably sufficient but since the
- # operation is fast, it is okay to repeat more times.
- for _ in range(vertices):
- new_hashes = []
- for v in range(vertices):
- in_neighbors = [hashes[w] for w in range(vertices) if matrix[w, v]]
- out_neighbors = [hashes[w] for w in range(vertices) if matrix[v, w]]
- new_hashes.append(hashlib.md5(
- (''.join(sorted(in_neighbors)) + '|' +
- ''.join(sorted(out_neighbors)) + '|' +
- hashes[v]).encode('utf-8')).hexdigest())
- hashes = new_hashes
- fingerprint = hashlib.md5(str(sorted(hashes)).encode('utf-8')).hexdigest()
-
- return fingerprint
-
-
-def permute_graph(graph, label, permutation):
- """Permutes the graph and labels based on permutation.
-
- Args:
- graph: np.ndarray adjacency matrix.
- label: list of labels of same length as graph dimensions.
- permutation: a permutation list of ints of same length as graph dimensions.
-
- Returns:
- np.ndarray where vertex permutation[v] is vertex v from the original graph
- """
- # vertex permutation[v] in new graph is vertex v in the old graph
- forward_perm = zip(permutation, list(range(len(permutation))))
- inverse_perm = [x[1] for x in sorted(forward_perm)]
- edge_fn = lambda x, y: graph[inverse_perm[x], inverse_perm[y]] == 1
- new_matrix = np.fromfunction(np.vectorize(edge_fn),
- (len(label), len(label)),
- dtype=np.int8)
- new_label = [label[inverse_perm[i]] for i in range(len(label))]
- return new_matrix, new_label
-
-
-def is_isomorphic(graph1, graph2):
- """Exhaustively checks if 2 graphs are isomorphic."""
- matrix1, label1 = np.array(graph1[0]), graph1[1]
- matrix2, label2 = np.array(graph2[0]), graph2[1]
- assert np.shape(matrix1) == np.shape(matrix2)
- assert len(label1) == len(label2)
-
- vertices = np.shape(matrix1)[0]
- # Note: input and output in our constrained graphs always map to themselves
- # but this script does not enforce that.
- for perm in itertools.permutations(range(0, vertices)):
- pmatrix1, plabel1 = permute_graph(matrix1, label1, perm)
- if np.array_equal(pmatrix1, matrix2) and plabel1 == label2:
- return True
-
- return False
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/model_spec.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/model_spec.py
deleted file mode 100644
index 5d5992119a..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/model_spec.py
+++ /dev/null
@@ -1,343 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import copy
-import hashlib
-import itertools
-
-import numpy as np
-
-# Graphviz is optional and only required for visualization.
-try:
- import graphviz # pylint: disable=g-import-not-at-top
-except ImportError:
- pass
-
-INPUT = "input"
-OUTPUT = "output"
-CONV3X3 = "conv3x3-bn-relu"
-CONV1X1 = "conv1x1-bn-relu"
-MAXPOOL3X3 = "maxpool3x3"
-OPS = [CONV3X3, CONV1X1, MAXPOOL3X3]
-
-NUM_VERTICES = 7
-OP_SPOTS = NUM_VERTICES - 2
-MAX_EDGES = 9
-
-
-class NASBench101ModelSpec(object):
- """Model specification given adjacency matrix and labeling."""
-
- def __init__(self, matrix, ops, data_format='channels_last'):
- """Initialize the module spec.
-
- Args:
- matrix: ndarray or nested list with shape [V, V] for the adjacency matrix.
- ops: V-length list of labels for the base ops used. The first and last
- elements are ignored because they are the input and output vertices
- which have no operations. The elements are retained to keep consistent
- indexing.
- data_format: channels_last or channels_first.
-
- Raises:
- ValueError: invalid matrix or ops
- """
- if not isinstance(matrix, np.ndarray):
- matrix = np.array(matrix)
- shape = np.shape(matrix)
- if len(shape) != 2 or shape[0] != shape[1]:
- raise ValueError('matrix must be square')
- if shape[0] != len(ops):
- raise ValueError('length of ops must match matrix dimensions')
- if not is_upper_triangular(matrix):
- raise ValueError('matrix must be upper triangular')
-
- # Both the original and pruned matrices are deep copies of the matrix and
- # ops so any changes to those after initialization are not recognized by the
- # spec.
- self.original_matrix = copy.deepcopy(matrix)
- self.original_ops = copy.deepcopy(ops)
-
- self.matrix = copy.deepcopy(matrix)
- self.ops = copy.deepcopy(ops)
- self.valid_spec = True
- self._prune()
-
- self.data_format = data_format
-
- def _prune(self):
- """Prune the extraneous parts of the graph.
-
- General procedure:
- 1) Remove parts of graph not connected to input.
- 2) Remove parts of graph not connected to output.
- 3) Reorder the vertices so that they are consecutive after steps 1 and 2.
-
- These 3 steps can be combined by deleting the rows and columns of the
- vertices that are not reachable from both the input and output (in reverse).
- """
- num_vertices = np.shape(self.original_matrix)[0]
-
- # DFS forward from input
- visited_from_input = set([0])
- frontier = [0]
- while frontier:
- top = frontier.pop()
- for v in range(top + 1, num_vertices):
- if self.original_matrix[top, v] and v not in visited_from_input:
- visited_from_input.add(v)
- frontier.append(v)
-
- # DFS backward from output
- visited_from_output = set([num_vertices - 1])
- frontier = [num_vertices - 1]
- while frontier:
- top = frontier.pop()
- for v in range(0, top):
- if self.original_matrix[v, top] and v not in visited_from_output:
- visited_from_output.add(v)
- frontier.append(v)
-
- # Any vertex that isn't connected to both input and output is extraneous to
- # the computation graph.
- extraneous = set(range(num_vertices)).difference(
- visited_from_input.intersection(visited_from_output))
-
- # If the non-extraneous graph is less than 2 vertices, the input is not
- # connected to the output and the spec is invalid.
- if len(extraneous) > num_vertices - 2:
- self.matrix = None
- self.ops = None
- self.valid_spec = False
- return
-
- self.matrix = np.delete(self.matrix, list(extraneous), axis=0)
- self.matrix = np.delete(self.matrix, list(extraneous), axis=1)
- for index in sorted(extraneous, reverse=True):
- del self.ops[index]
-
- def hash_spec(self, canonical_ops):
- """Computes the isomorphism-invariant graph hash of this spec.
-
- Args:
- canonical_ops: list of operations in the canonical ordering which they
- were assigned (i.e. the order provided in the config['available_ops']).
-
- Returns:
- MD5 hash of this spec which can be used to query the dataset.
- """
- # Invert the operations back to integer label indices used in graph gen.
- labeling = [-1] + [canonical_ops.index(op) for op in self.ops[1:-1]] + [-2]
- return hash_module(self.matrix, labeling)
-
- def visualize(self):
- """Creates a dot graph. Can be visualized in colab directly."""
- num_vertices = np.shape(self.matrix)[0]
- g = graphviz.Digraph()
- g.node(str(0), 'input')
- for v in range(1, num_vertices - 1):
- g.node(str(v), self.ops[v])
- g.node(str(num_vertices - 1), 'output')
-
- for src in range(num_vertices - 1):
- for dst in range(src + 1, num_vertices):
- if self.matrix[src, dst]:
- g.edge(str(src), str(dst))
-
- return g
-
- @classmethod
- def random_sample_one_architecture(cls, dataset_api: dict, min_size=7):
- """
- This will sample a random architecture and update the edges in the
- naslib object accordingly.
- From the NASBench repository:
- one-hot adjacency matrix
- draw [0,1] for each slot in the adjacency matrix
- """
- while True:
- matrix = np.random.choice([0, 1], size=(NUM_VERTICES, NUM_VERTICES))
- matrix = np.triu(matrix, 1)
- ops = np.random.choice(OPS, size=min_size).tolist()
- ops[0] = INPUT
- ops[-1] = OUTPUT
- spec = dataset_api["api"].ModelSpec(matrix=matrix, ops=ops)
- if not dataset_api["nb101_data"].is_valid(spec):
- continue
-
- spec = NASBench101ModelSpec(matrix, ops)
- # only sample model with 7 nodes.
- if len(spec.matrix) == min_size:
- break
-
- return spec
-
-
-def is_upper_triangular(matrix):
- """True if matrix is 0 on diagonal and below."""
- for src in range(np.shape(matrix)[0]):
- for dst in range(0, src + 1):
- if matrix[src, dst] != 0:
- return False
-
- return True
-
-
-def gen_is_edge_fn(bits):
- """Generate a boolean function for the edge connectivity.
-
- Given a bitstring FEDCBA and a 4x4 matrix, the generated matrix is
- [[0, A, B, D],
- [0, 0, C, E],
- [0, 0, 0, F],
- [0, 0, 0, 0]]
-
- Note that this function is agnostic to the actual matrix dimension due to
- order in which elements are filled out (column-major, starting from least
- significant bit). For example, the same FEDCBA bitstring (0-padded) on a 5x5
- matrix is
- [[0, A, B, D, 0],
- [0, 0, C, E, 0],
- [0, 0, 0, F, 0],
- [0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0]]
-
- Args:
- bits: integer which will be interpreted as a bit mask.
-
- Returns:
- vectorized function that returns True when an edge is present.
- """
-
- def is_edge(x, y):
- """Is there an edge from x to y (0-indexed)?"""
- if x >= y:
- return 0
- # Map x, y to index into bit string
- index = x + (y * (y - 1) // 2)
- return (bits >> index) % 2 == 1
-
- return np.vectorize(is_edge)
-
-
-def is_full_dag(matrix):
- """Full DAG == all vertices on a path from vert 0 to (V-1).
-
- i.e. no disconnected or "hanging" vertices.
-
- It is sufficient to check for:
- 1) no rows of 0 except for row V-1 (only output vertex has no out-edges)
- 2) no cols of 0 except for col 0 (only input vertex has no in-edges)
-
- Args:
- matrix: V x V upper-triangular adjacency matrix
-
- Returns:
- True if the there are no dangling vertices.
- """
- shape = np.shape(matrix)
-
- rows = matrix[:shape[0] - 1, :] == 0
- rows = np.all(rows, axis=1) # Any row with all 0 will be True
- rows_bad = np.any(rows)
-
- cols = matrix[:, 1:] == 0
- cols = np.all(cols, axis=0) # Any col with all 0 will be True
- cols_bad = np.any(cols)
-
- return (not rows_bad) and (not cols_bad)
-
-
-def num_edges(matrix):
- """Computes number of edges in adjacency matrix."""
- return np.sum(matrix)
-
-
-def hash_module(matrix, labeling):
- """Computes a graph-invariance MD5 hash of the matrix and label pair.
-
- Args:
- matrix: np.ndarray square upper-triangular adjacency matrix.
- labeling: list of int labels of length equal to both dimensions of
- matrix.
-
- Returns:
- MD5 hash of the matrix and labeling.
- """
- vertices = np.shape(matrix)[0]
- in_edges = np.sum(matrix, axis=0).tolist()
- out_edges = np.sum(matrix, axis=1).tolist()
-
- assert len(in_edges) == len(out_edges) == len(labeling)
- hashes = list(zip(out_edges, in_edges, labeling))
- hashes = [hashlib.md5(str(h).encode('utf-8')).hexdigest() for h in hashes]
- # Computing this up to the diameter is probably sufficient but since the
- # operation is fast, it is okay to repeat more times.
- for _ in range(vertices):
- new_hashes = []
- for v in range(vertices):
- in_neighbors = [hashes[w] for w in range(vertices) if matrix[w, v]]
- out_neighbors = [hashes[w] for w in range(vertices) if matrix[v, w]]
- new_hashes.append(hashlib.md5(
- (''.join(sorted(in_neighbors)) + '|' +
- ''.join(sorted(out_neighbors)) + '|' +
- hashes[v]).encode('utf-8')).hexdigest())
- hashes = new_hashes
- fingerprint = hashlib.md5(str(sorted(hashes)).encode('utf-8')).hexdigest()
-
- return fingerprint
-
-
-def permute_graph(graph, label, permutation):
- """Permutes the graph and labels based on permutation.
-
- Args:
- graph: np.ndarray adjacency matrix.
- label: list of labels of same length as graph dimensions.
- permutation: a permutation list of ints of same length as graph dimensions.
-
- Returns:
- np.ndarray where vertex permutation[v] is vertex v from the original graph
- """
- # vertex permutation[v] in new graph is vertex v in the old graph
- forward_perm = zip(permutation, list(range(len(permutation))))
- inverse_perm = [x[1] for x in sorted(forward_perm)]
- edge_fn = lambda x, y: graph[inverse_perm[x], inverse_perm[y]] == 1
- new_matrix = np.fromfunction(np.vectorize(edge_fn),
- (len(label), len(label)),
- dtype=np.int8)
- new_label = [label[inverse_perm[i]] for i in range(len(label))]
- return new_matrix, new_label
-
-
-def is_isomorphic(graph1, graph2):
- """Exhaustively checks if 2 graphs are isomorphic."""
- matrix1, label1 = np.array(graph1[0]), graph1[1]
- matrix2, label2 = np.array(graph2[0]), graph2[1]
- assert np.shape(matrix1) == np.shape(matrix2)
- assert len(label1) == len(label2)
-
- vertices = np.shape(matrix1)[0]
- # Note: input and output in our constrained graphs always map to themselves
- # but this script does not enforce that.
- for perm in itertools.permutations(range(0, vertices)):
- pmatrix1, plabel1 = permute_graph(matrix1, label1, perm)
- if np.array_equal(pmatrix1, matrix2) and plabel1 == label2:
- return True
-
- return False
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/nb101_api.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/nb101_api.py
deleted file mode 100644
index 0990d13cec..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp101_lib/nb101_api.py
+++ /dev/null
@@ -1,481 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-
-"""This is a NAS-Bench-101 version.
-
-Before using this API, download the data files from the links in the README.
-
-Usage:
- # Load the data from file (this will take some time)
- nasbench = api.NASBench('/path/to/pickle/or/shelve')
-
- # Create an Inception-like module (5x5 convolution replaced with two 3x3
- # convolutions).
- model_spec = api.ModelSpec(
- # Adjacency matrix of the module
- matrix=[[0, 1, 1, 1, 0, 1, 0], # input layer
- [0, 0, 0, 0, 0, 0, 1], # 1x1 conv
- [0, 0, 0, 0, 0, 0, 1], # 3x3 conv
- [0, 0, 0, 0, 1, 0, 0], # 5x5 conv (replaced by two 3x3's)
- [0, 0, 0, 0, 0, 0, 1], # 5x5 conv (replaced by two 3x3's)
- [0, 0, 0, 0, 0, 0, 1], # 3x3 max-pool
- [0, 0, 0, 0, 0, 0, 0]], # output layer
- # Operations at the vertices of the module, matches order of matrix
- ops=[INPUT, CONV1X1, CONV3X3, CONV3X3, CONV3X3, MAXPOOL3X3, OUTPUT])
-
-
- # Query this model from dataset
- data = nasbench.query(model_spec)
-
-Adjacency matrices are expected to be upper-triangular 0-1 matrices within the
-defined search space (7 vertices, 9 edges, 3 allowed ops). The first and last
-operations must be 'input' and 'output'. The other operations should be from
-config['available_ops']. Currently, the available operations are:
- CONV3X3 = "conv3x3-bn-relu"
- CONV1X1 = "conv1x1-bn-relu"
- MAXPOOL3X3 = "maxpool3x3"
-
-When querying a spec, the spec will first be automatically pruned (removing
-unused vertices and edges along with ops). If the pruned spec is still out of
-the search space, an OutOfDomainError will be raised, otherwise the data is
-returned.
-
-The returned data object is a dictionary with the following keys:
- - module_adjacency: numpy array for the adjacency matrix
- - module_operations: list of operation labels
- - trainable_parameters: number of trainable parameters in the model
- - training_time: the total training time in seconds up to this point
- - train_accuracy: training accuracy
- - validation_accuracy: validation_accuracy
- - test_accuracy: testing accuracy
-
-Instead of querying the dataset for a single run of a model, it is also possible
-to retrieve all metrics for a given spec, using:
-
- fixed_stats, computed_stats = nasbench.get_metrics_from_spec(model_spec)
-
-The fixed_stats is a dictionary with the keys:
- - module_adjacency
- - module_operations
- - trainable_parameters
-
-The computed_stats is a dictionary from epoch count to a list of metric
-dicts. For example, computed_stats[108][0] contains the metrics for the first
-repeat of the provided model trained to 108 epochs. The available keys are:
- - halfway_training_time
- - halfway_train_accuracy
- - halfway_validation_accuracy
- - halfway_test_accuracy
- - final_training_time
- - final_train_accuracy
- - final_validation_accuracy
- - final_test_accuracy
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import copy
-import random
-import time
-import shelve
-import hashlib
-import _pickle as pickle
-import numpy as np
-
-
-class OutOfDomainError(Exception):
- """Indicates that the requested graph is outside of the search domain."""
-
-
-class NASBench(object):
- """User-facing API for accessing the NASBench dataset."""
-
- def __init__(self, dataset_file, seed=None, data_format='pickle'):
- """Initialize dataset, this should only be done once per experiment.
-
- Args:
- dataset_file: path to .tfrecord file containing the dataset.
- seed: random seed used for sampling queried models. Two NASBench objects
- created with the same seed will return the same data points when queried
- with the same models in the same order. By default, the seed is randomly
- generated.
- """
- self.config = {
- 'module_vertices': 7,
- 'max_edges': 9,
- 'num_repeats': 3,
- 'available_ops': ['conv3x3-bn-relu', 'conv1x1-bn-relu', 'maxpool3x3'],
- }
- random.seed(seed)
-
- print('Loading dataset from file... This may take a few minutes...')
- start = time.time()
-
- # Stores the fixed statistics that are independent of evaluation (i.e.,
- # adjacency matrix, operations, and number of parameters).
- # hash --> metric name --> scalar
- self.fixed_statistics = {}
-
- # Stores the statistics that are computed via training and evaluating the
- # model on CIFAR-10. Statistics are computed for multiple repeats of each
- # model at each max epoch length.
- # hash --> epochs --> repeat index --> metric name --> scalar
- self.computed_statistics = {}
-
- # Valid queriable epoch lengths. {4, 12, 36, 108} for the full dataset or
- # {108} for the smaller dataset with only the 108 epochs.
- self.valid_epochs = set()
-
- # open the database
- if data_format == 'shelve':
- with shelve.open(dataset_file, 'r') as shelf:
- for module_hash in shelf:
- # Parse the data from the data file.
- fixed_statistics, computed_statistics = shelf[module_hash]
-
- self.fixed_statistics[module_hash] = fixed_statistics
- self.computed_statistics[module_hash] = computed_statistics
-
- self.valid_epochs.update(set(computed_statistics.keys()))
- elif data_format == 'pickle':
- with open(dataset_file, 'rb') as f:
- data = pickle.load(f)
- for module_hash, stats in data.items():
- self.fixed_statistics[module_hash] = stats[0]
- self.computed_statistics[module_hash] = stats[1]
-
- self.valid_epochs.update(set(stats[1].keys()))
- else:
- raise Exception('Data format not supported')
-
- elapsed = time.time() - start
- print('Loaded dataset in %d seconds' % elapsed)
-
- self.history = {}
- self.training_time_spent = 0.0
- self.total_epochs_spent = 0
-
- def query(self, model_spec, epochs=108, stop_halfway=False):
- """Fetch one of the evaluations for this model spec.
-
- Each call will sample one of the config['num_repeats'] evaluations of the
- model. This means that repeated queries of the same model (or isomorphic
- models) may return identical metrics.
-
- This function will increment the budget counters for benchmarking purposes.
- See self.training_time_spent, and self.total_epochs_spent.
-
- This function also allows querying the evaluation metrics at the halfway
- point of training using stop_halfway. Using this option will increment the
- budget counters only up to the halfway point.
-
- Args:
- model_spec: ModelSpec object.
- epochs: number of epochs trained. Must be one of the evaluated number of
- epochs, [4, 12, 36, 108] for the full dataset.
- stop_halfway: if True, returned dict will only contain the training time
- and accuracies at the halfway point of training (num_epochs/2).
- Otherwise, returns the time and accuracies at the end of training
- (num_epochs).
-
- Returns:
- dict containing the evaluated data for this object.
-
- Raises:
- OutOfDomainError: if model_spec or num_epochs is outside the search space.
- """
- if epochs not in self.valid_epochs:
- raise OutOfDomainError('invalid number of epochs, must be one of %s'
- % self.valid_epochs)
-
- fixed_stat, computed_stat = self.get_metrics_from_spec(model_spec)
- sampled_index = random.randint(0, self.config['num_repeats'] - 1)
- computed_stat = computed_stat[epochs][sampled_index]
-
- data = {}
- data['module_adjacency'] = fixed_stat['module_adjacency']
- data['module_operations'] = fixed_stat['module_operations']
- data['trainable_parameters'] = fixed_stat['trainable_parameters']
-
- if stop_halfway:
- data['training_time'] = computed_stat['halfway_training_time']
- data['train_accuracy'] = computed_stat['halfway_train_accuracy']
- data['validation_accuracy'] = computed_stat['halfway_validation_accuracy']
- data['test_accuracy'] = computed_stat['halfway_test_accuracy']
- else:
- data['training_time'] = computed_stat['final_training_time']
- data['train_accuracy'] = computed_stat['final_train_accuracy']
- data['validation_accuracy'] = computed_stat['final_validation_accuracy']
- data['test_accuracy'] = computed_stat['final_test_accuracy']
-
- self.training_time_spent += data['training_time']
- if stop_halfway:
- self.total_epochs_spent += epochs // 2
- else:
- self.total_epochs_spent += epochs
-
- return data
-
- def is_valid(self, model_spec):
- """Checks the validity of the model_spec.
-
- For the purposes of benchmarking, this does not increment the budget
- counters.
-
- Args:
- model_spec: ModelSpec object.
-
- Returns:
- True if model is within space.
- """
- try:
- self._check_spec(model_spec)
- except OutOfDomainError:
- return False
-
- return True
-
- def get_budget_counters(self):
- """Returns the time and budget counters."""
- return self.training_time_spent, self.total_epochs_spent
-
- def reset_budget_counters(self):
- """Reset the time and epoch budget counters."""
- self.training_time_spent = 0.0
- self.total_epochs_spent = 0
-
- def hash_iterator(self):
- """Returns iterator over all unique model hashes."""
- return self.fixed_statistics.keys()
-
- def get_metrics_from_hash(self, module_hash):
- """Returns the metrics for all epochs and all repeats of a hash.
-
- This method is for dataset analysis and should not be used for benchmarking.
- As such, it does not increment any of the budget counters.
-
- Args:
- module_hash: MD5 hash, i.e., the values yielded by hash_iterator().
-
- Returns:
- fixed stats and computed stats of the model spec provided.
- """
- fixed_stat = copy.deepcopy(self.fixed_statistics[module_hash])
- computed_stat = copy.deepcopy(self.computed_statistics[module_hash])
- return fixed_stat, computed_stat
-
- def get_metrics_from_spec(self, model_spec):
- """Returns the metrics for all epochs and all repeats of a model.
-
- This method is for dataset analysis and should not be used for benchmarking.
- As such, it does not increment any of the budget counters.
-
- Args:
- model_spec: ModelSpec object.
-
- Returns:
- fixed stats and computed stats of the model spec provided.
- """
- self._check_spec(model_spec)
- module_hash = self._hash_spec(model_spec)
- return self.get_metrics_from_hash(module_hash)
-
- def _check_spec(self, model_spec):
- """Checks that the model spec is within the dataset."""
- if not model_spec.valid_spec:
- raise OutOfDomainError('invalid spec, provided graph is disconnected.')
-
- num_vertices = len(model_spec.ops)
- num_edges = np.sum(model_spec.matrix)
-
- if num_vertices > self.config['module_vertices']:
- raise OutOfDomainError('too many vertices, got %d (max vertices = %d)'
- % (num_vertices, config['module_vertices']))
-
- if num_edges > self.config['max_edges']:
- raise OutOfDomainError('too many edges, got %d (max edges = %d)'
- % (num_edges, self.config['max_edges']))
-
- if model_spec.ops[0] != 'input':
- raise OutOfDomainError('first operation should be \'input\'')
- if model_spec.ops[-1] != 'output':
- raise OutOfDomainError('last operation should be \'output\'')
- for op in model_spec.ops[1:-1]:
- if op not in self.config['available_ops']:
- raise OutOfDomainError('unsupported op %s (available ops = %s)'
- % (op, self.config['available_ops']))
-
- def _hash_spec(self, model_spec):
- """Returns the MD5 hash for a provided model_spec."""
- return model_spec.hash_spec(self.config['available_ops'])
-
-
-class ModelSpec(object):
- """Model specification given adjacency matrix and labeling."""
-
- def __init__(self, matrix, ops, data_format='channels_last'):
- """Initialize the module spec.
-
- Args:
- matrix: ndarray or nested list with shape [V, V] for the adjacency matrix.
- ops: V-length list of labels for the base ops used. The first and last
- elements are ignored because they are the input and output vertices
- which have no operations. The elements are retained to keep consistent
- indexing.
- data_format: channels_last or channels_first.
-
- Raises:
- ValueError: invalid matrix or ops
- """
- if not isinstance(matrix, np.ndarray):
- matrix = np.array(matrix)
- shape = np.shape(matrix)
- if len(shape) != 2 or shape[0] != shape[1]:
- raise ValueError('matrix must be square')
- if shape[0] != len(ops):
- raise ValueError('length of ops must match matrix dimensions')
- if not is_upper_triangular(matrix):
- raise ValueError('matrix must be upper triangular')
-
- # Both the original and pruned matrices are deep copies of the matrix and
- # ops so any changes to those after initialization are not recognized by the
- # spec.
- self.original_matrix = copy.deepcopy(matrix)
- self.original_ops = copy.deepcopy(ops)
-
- self.matrix = copy.deepcopy(matrix)
- self.ops = copy.deepcopy(ops)
- self.valid_spec = True
- self._prune()
-
- self.data_format = data_format
-
- def _prune(self):
- """Prune the extraneous parts of the graph.
-
- General procedure:
- 1) Remove parts of graph not connected to input.
- 2) Remove parts of graph not connected to output.
- 3) Reorder the vertices so that they are consecutive after steps 1 and 2.
-
- These 3 steps can be combined by deleting the rows and columns of the
- vertices that are not reachable from both the input and output (in reverse).
- """
- num_vertices = np.shape(self.original_matrix)[0]
-
- # DFS forward from input
- visited_from_input = set([0])
- frontier = [0]
- while frontier:
- top = frontier.pop()
- for v in range(top + 1, num_vertices):
- if self.original_matrix[top, v] and v not in visited_from_input:
- visited_from_input.add(v)
- frontier.append(v)
-
- # DFS backward from output
- visited_from_output = set([num_vertices - 1])
- frontier = [num_vertices - 1]
- while frontier:
- top = frontier.pop()
- for v in range(0, top):
- if self.original_matrix[v, top] and v not in visited_from_output:
- visited_from_output.add(v)
- frontier.append(v)
-
- # Any vertex that isn't connected to both input and output is extraneous to
- # the computation graph.
- extraneous = set(range(num_vertices)).difference(
- visited_from_input.intersection(visited_from_output))
-
- # If the non-extraneous graph is less than 2 vertices, the input is not
- # connected to the output and the spec is invalid.
- if len(extraneous) > num_vertices - 2:
- self.matrix = None
- self.ops = None
- self.valid_spec = False
- return
-
- self.matrix = np.delete(self.matrix, list(extraneous), axis=0)
- self.matrix = np.delete(self.matrix, list(extraneous), axis=1)
- for index in sorted(extraneous, reverse=True):
- del self.ops[index]
-
- def hash_spec(self, canonical_ops):
- """Computes the isomorphism-invariant graph hash of this spec.
-
- Args:
- canonical_ops: list of operations in the canonical ordering which they
- were assigned (i.e. the order provided in the config['available_ops']).
-
- Returns:
- MD5 hash of this spec which can be used to query the dataset.
- """
- # Invert the operations back to integer label indices used in graph gen.
- labeling = [-1] + [canonical_ops.index(op) for op in self.ops[1:-1]] + [-2]
- return hash_module(self.matrix, labeling)
-
-
-def is_upper_triangular(matrix):
- """True if matrix is 0 on diagonal and below."""
- for src in range(np.shape(matrix)[0]):
- for dst in range(0, src + 1):
- if matrix[src, dst] != 0:
- return False
-
- return True
-
-
-def hash_module(matrix, labeling):
- """Computes a graph-invariance MD5 hash of the matrix and label pair.
-
- Args:
- matrix: np.ndarray square upper-triangular adjacency matrix.
- labeling: list of int labels of length equal to both dimensions of
- matrix.
-
- Returns:
- MD5 hash of the matrix and labeling.
- """
- vertices = np.shape(matrix)[0]
- in_edges = np.sum(matrix, axis=0).tolist()
- out_edges = np.sum(matrix, axis=1).tolist()
-
- assert len(in_edges) == len(out_edges) == len(labeling)
- hashes = list(zip(out_edges, in_edges, labeling))
- hashes = [hashlib.md5(str(h).encode('utf-8')).hexdigest() for h in hashes]
- # Computing this up to the diameter is probably sufficient but since the
- # operation is fast, it is okay to repeat more times.
- for _ in range(vertices):
- new_hashes = []
- for v in range(vertices):
- in_neighbors = [hashes[w] for w in range(vertices) if matrix[w, v]]
- out_neighbors = [hashes[w] for w in range(vertices) if matrix[v, w]]
- new_hashes.append(hashlib.md5(
- (''.join(sorted(in_neighbors)) + '|' +
- ''.join(sorted(out_neighbors)) + '|' +
- hashes[v]).encode('utf-8')).hexdigest())
- hashes = new_hashes
- fingerprint = hashlib.md5(str(sorted(hashes)).encode('utf-8')).hexdigest()
-
- return fingerprint
-
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp201_lib/nasbench2.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp201_lib/nasbench2.py
deleted file mode 100644
index e845b6442f..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp201_lib/nasbench2.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# Copyright 2021 Samsung Electronics Co., Ltd.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-
-# http://www.apache.org/licenses/LICENSE-2.0
-
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-from .nasbench2_ops import *
-
-
-def gen_searchcell_mask_from_arch_str(arch_str):
- nodes = arch_str.split('+')
- nodes = [node[1:-1].split('|') for node in nodes]
- nodes = [[op_and_input.split('~') for op_and_input in node] for node in nodes]
-
- keep_mask = []
- for curr_node_idx in range(len(nodes)):
- for prev_node_idx in range(curr_node_idx+1):
- _op = [edge[0] for edge in nodes[curr_node_idx] if int(edge[1]) == prev_node_idx]
- assert len(_op) == 1, 'The arch string does not follow the assumption of 1 connection between two nodes.'
- for _op_name in OPS.keys():
- keep_mask.append(_op[0] == _op_name)
- return keep_mask
-
-
-def get_model_from_arch_str(arch_str, num_classes, bn=True, init_channels=16):
- keep_mask = gen_searchcell_mask_from_arch_str(arch_str)
- net = NAS201Model(arch_str=arch_str, num_classes=num_classes, use_bn=bn, keep_mask=keep_mask, stem_ch=init_channels)
- return net
-
-
-def get_super_model(num_classes, use_bn=True):
- net = NAS201Model(arch_str=arch_str, num_classes=num_classes, use_bn=use_bn)
- return net
-
-
-class NAS201Model(nn.Module):
-
- def __init__(self, arch_str, num_classes, use_bn=True, keep_mask=None, stem_ch=16):
- super(NAS201Model, self).__init__()
- self.arch_str=arch_str
- self.num_classes=num_classes
- self.use_bn= use_bn
- self.stem_ch = stem_ch
-
- self.stem = stem(out_channels=stem_ch, use_bn=use_bn)
- self.stack_cell1 = nn.Sequential(*[SearchCell(in_channels=stem_ch, out_channels=stem_ch, stride=1, affine=False, track_running_stats=False, use_bn=use_bn, keep_mask=keep_mask) for i in range(5)])
- self.reduction1 = reduction(in_channels=stem_ch, out_channels=stem_ch*2)
- self.stack_cell2 = nn.Sequential(*[SearchCell(in_channels=stem_ch*2, out_channels=stem_ch*2, stride=1, affine=False, track_running_stats=False, use_bn=use_bn, keep_mask=keep_mask) for i in range(5)])
- self.reduction2 = reduction(in_channels=stem_ch*2, out_channels=stem_ch*4)
- self.stack_cell3 = nn.Sequential(*[SearchCell(in_channels=stem_ch*4, out_channels=stem_ch*4, stride=1, affine=False, track_running_stats=False, use_bn=use_bn, keep_mask=keep_mask) for i in range(5)])
- self.top = top(in_dims=stem_ch*4, num_classes=num_classes, use_bn=use_bn)
-
- def forward(self, x):
- x = self.stem(x)
-
- x = self.stack_cell1(x)
- x = self.reduction1(x)
-
- x = self.stack_cell2(x)
- x = self.reduction2(x)
-
- x = self.stack_cell3(x)
-
- x = self.top(x)
- return x
-
- def get_prunable_copy(self, bn=False):
- model_new = get_model_from_arch_str(self.arch_str, self.num_classes, use_bn=bn, init_channels=self.stem_ch)
-
- #TODO this is quite brittle and doesn't work with nn.Sequential when bn is different
- # it is only required to maintain initialization -- maybe init after get_punable_copy?
- model_new.load_state_dict(self.state_dict(), strict=False)
- model_new.train()
-
- return model_new
-
-
-def get_arch_str_from_model(net):
- search_cell = net.stack_cell1[0].options
- keep_mask = net.stack_cell1[0].keep_mask
- num_nodes = net.stack_cell1[0].num_nodes
-
- nodes = []
- idx = 0
- for curr_node in range(num_nodes -1):
- edges = []
- for prev_node in range(curr_node+1): # n-1 prev nodes
- for _op_name in OPS.keys():
- if keep_mask[idx]:
- edges.append(f'{_op_name}~{prev_node}')
- idx += 1
- node_str = '|'.join(edges)
- node_str = f'|{node_str}|'
- nodes.append(node_str)
- arch_str = '+'.join(nodes)
- return arch_str
-
-
-if __name__ == "__main__":
- arch_str = '|nor_conv_3x3~0|+|none~0|none~1|+|avg_pool_3x3~0|nor_conv_3x3~1|nor_conv_3x3~2|'
-
- n = get_model_from_arch_str(arch_str=arch_str, num_classes=10)
- print(n.stack_cell1[0])
-
- arch_str2 = get_arch_str_from_model(n)
- print(arch_str)
- print(arch_str2)
- print(f'Are the two arch strings same? {arch_str == arch_str2}')
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp201_lib/nasbench2_ops.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp201_lib/nasbench2_ops.py
deleted file mode 100644
index efcdba3224..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/sp201_lib/nasbench2_ops.py
+++ /dev/null
@@ -1,160 +0,0 @@
-# Copyright 2021 Samsung Electronics Co., Ltd.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-
-# http://www.apache.org/licenses/LICENSE-2.0
-
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import torch.nn as nn
-
-class ReLUConvBN(nn.Module):
-
- def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, affine, track_running_stats=True, use_bn=True, name='ReLUConvBN'):
- super(ReLUConvBN, self).__init__()
- self.name = name
- if use_bn:
- self.op = nn.Sequential(
- nn.ReLU(inplace=False),
- nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, bias=not affine),
- nn.BatchNorm2d(out_channels, affine=affine, track_running_stats=track_running_stats)
- )
- else:
- self.op = nn.Sequential(
- nn.ReLU(inplace=False),
- nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, bias=not affine)
- )
-
- def forward(self, x):
- return self.op(x)
-
-class Identity(nn.Module):
- def __init__(self, name='Identity'):
- self.name = name
- super(Identity, self).__init__()
-
- def forward(self, x):
- return x
-
-class Zero(nn.Module):
-
- def __init__(self, stride, name='Zero'):
- self.name = name
- super(Zero, self).__init__()
- self.stride = stride
-
- def forward(self, x):
- if self.stride == 1:
- return x.mul(0.)
- return x[:,:,::self.stride,::self.stride].mul(0.)
-
-class POOLING(nn.Module):
- def __init__(self, kernel_size, stride, padding, name='POOLING'):
- super(POOLING, self).__init__()
- self.name = name
- self.avgpool = nn.AvgPool2d(kernel_size=kernel_size, stride=1, padding=1, count_include_pad=False)
-
- def forward(self, x):
- return self.avgpool(x)
-
-
-class reduction(nn.Module):
- def __init__(self, in_channels, out_channels):
- super(reduction, self).__init__()
- self.residual = nn.Sequential(
- nn.AvgPool2d(kernel_size=2, stride=2, padding=0),
- nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0, bias=False))
-
- self.conv_a = ReLUConvBN(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=2, padding=1, dilation=1, affine=True, track_running_stats=True)
- self.conv_b = ReLUConvBN(in_channels=out_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1, dilation=1, affine=True, track_running_stats=True)
-
- def forward(self, x):
- basicblock = self.conv_a(x)
- basicblock = self.conv_b(basicblock)
- residual = self.residual(x)
- return residual + basicblock
-
-class stem(nn.Module):
- def __init__(self, out_channels, use_bn=True):
- super(stem, self).__init__()
- if use_bn:
- self.net = nn.Sequential(
- nn.Conv2d(in_channels=3, out_channels=out_channels, kernel_size=3, padding=1, bias=False),
- nn.BatchNorm2d(out_channels))
- else:
- self.net = nn.Sequential(
- nn.Conv2d(in_channels=3, out_channels=out_channels, kernel_size=3, padding=1, bias=False)
- )
-
- def forward(self, x):
- return self.net(x)
-
-class top(nn.Module):
- def __init__(self, in_dims, num_classes, use_bn=True):
- super(top, self).__init__()
- if use_bn:
- self.lastact = nn.Sequential(nn.BatchNorm2d(in_dims), nn.ReLU(inplace=True))
- else:
- self.lastact = nn.ReLU(inplace=True)
- self.global_pooling = nn.AdaptiveAvgPool2d(1)
- self.classifier = nn.Linear(in_dims, num_classes)
-
- def forward(self, x):
- x = self.lastact(x)
- x = self.global_pooling(x)
- x = x.view(x.size(0), -1)
- logits = self.classifier(x)
- return logits
-
-
-class SearchCell(nn.Module):
-
- def __init__(self, in_channels, out_channels, stride, affine, track_running_stats, use_bn=True, num_nodes=4, keep_mask=None):
- super(SearchCell, self).__init__()
- self.num_nodes = num_nodes
- self.options = nn.ModuleList()
- for curr_node in range(self.num_nodes-1):
- for prev_node in range(curr_node+1):
- for _op_name in OPS.keys():
- op = OPS[_op_name](in_channels, out_channels, stride, affine, track_running_stats, use_bn)
- self.options.append(op)
-
- if keep_mask is not None:
- self.keep_mask = keep_mask
- else:
- self.keep_mask = [True]*len(self.options)
-
- def forward(self, x):
- outs = [x]
-
- idx = 0
- for curr_node in range(self.num_nodes-1):
- edges_in = []
- for prev_node in range(curr_node+1): # n-1 prev nodes
- for op_idx in range(len(OPS.keys())):
- if self.keep_mask[idx]:
- edges_in.append(self.options[idx](outs[prev_node]))
- idx += 1
- node_output = sum(edges_in)
- outs.append(node_output)
-
- return outs[-1]
-
-
-
-OPS = {
- 'none' : lambda in_channels, out_channels, stride, affine, track_running_stats, use_bn: Zero(stride, name='none'),
- 'avg_pool_3x3' : lambda in_channels, out_channels, stride, affine, track_running_stats, use_bn: POOLING(3, 1, 1, name='avg_pool_3x3'),
- 'nor_conv_3x3' : lambda in_channels, out_channels, stride, affine, track_running_stats, use_bn: ReLUConvBN(in_channels, out_channels, 3, 1, 1, 1, affine, track_running_stats, use_bn, name='nor_conv_3x3'),
- 'nor_conv_1x1' : lambda in_channels, out_channels, stride, affine, track_running_stats, use_bn: ReLUConvBN(in_channels, out_channels, 1, 1, 0, 1, affine, track_running_stats, use_bn, name='nor_conv_1x1'),
- 'skip_connect' : lambda in_channels, out_channels, stride, affine, track_running_stats, use_bn: Identity(name='skip_connect'),
-}
-
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/__init__.py b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/__init__.py
deleted file mode 100644
index 3df60b02f7..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/filter_phase.sql b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/filter_phase.sql
deleted file mode 100644
index 0fbfc05aef..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/filter_phase.sql
+++ /dev/null
@@ -1,54 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-CREATE OR REPLACE
-PROCEDURE model_selection_sp(
- dataset TEXT, --dataset name
- selected_columns TEXT[], --used columns
- N INTEGER, --number of models to evaluate
- batch_size INTEGER, --batch size, for profiling, filtering
- config_file TEXT --config file path
-)
-LANGUAGE plpgsql
-AS $$
-DECLARE
- -- global inputs/outputs
- result_status TEXT;
- column_list TEXT;
-BEGIN
- -- combine the columns into a string
- column_list := array_to_string(selected_columns, ', ');
-
- -- 4. Run filtering phase to get top K models.
- EXECUTE format('
- WITH batch_rows AS (
- SELECT %s
- FROM %I
- ORDER BY RANDOM()
- LIMIT %s OFFSET 0
- )
- SELECT filtering_phase(
- json_agg(row_to_json(t))::text, %s, %s, %L
- )
- FROM batch_rows AS t', column_list, dataset, batch_size, N, 1, config_file) INTO result_status;
- RAISE NOTICE '4. run filtering phase, k models = %', result_status;
-
-END; $$;
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/model.rs b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/model.rs
deleted file mode 100644
index 61268fea48..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/model.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-use serde::{Serialize, Deserialize};
-
-
-#[derive(Debug, Serialize, Deserialize)]
-pub(crate) struct Frappe {
- pub(crate) id: i32,
- pub(crate) label: i32,
- pub(crate) col1: String,
- pub(crate) col2: String,
- pub(crate) col3: String,
- pub(crate) col4: String,
- pub(crate) col5: String,
- pub(crate) col6: String,
- pub(crate) col7: String,
- pub(crate) col8: String,
- pub(crate) col9: String,
- pub(crate) col10: String,
-}
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/ms.rs b/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/ms.rs
deleted file mode 100644
index ca946aa091..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/ms.rs
+++ /dev/null
@@ -1,228 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-use serde_json::json;
-use std::collections::HashMap;
-use pgrx::prelude::*;
-use crate::bindings::ml_register::PY_MODULE;
-use crate::bindings::ml_register::run_python_function;
-use std::time::{Instant, Duration};
-
-
-pub fn profiling_filtering_phase(
- task: &String
-) -> serde_json::Value {
- run_python_function(&PY_MODULE, task, "profiling_filtering_phase")
-}
-
-
-pub fn profiling_refinement_phase(
- task: &String
-) -> serde_json::Value {
- run_python_function(&PY_MODULE, task, "profiling_refinement_phase")
-}
-
-
-pub fn coordinator(
- task: &String
-) -> serde_json::Value {
- run_python_function(&PY_MODULE, task, "coordinator")
-}
-
-
-pub fn filtering_phase(
- task: &String
-) -> serde_json::Value {
- run_python_function(&PY_MODULE, task, "filtering_phase_dataLoader")
-}
-
-
-pub fn refinement_phase() -> serde_json::Value {
- let task = "refinement_phase".to_string();
- run_python_function(&PY_MODULE, &task, "refinement_phase")
-}
-
-
-// this two are filtering + refinement in UDF runtime
-pub fn model_selection(
- task: &String
-) -> serde_json::Value {
- run_python_function(&PY_MODULE, task, "model_selection")
-}
-
-
-pub fn model_selection_workloads(
- task: &String
-) -> serde_json::Value {
- run_python_function(&PY_MODULE, task, "model_selection_workloads")
-}
-
-
-// this two are filtering + refinement in GPU server
-pub fn model_selection_trails(
- task: &String
-) -> serde_json::Value {
- run_python_function(&PY_MODULE, task, "model_selection_trails")
-}
-
-
-pub fn model_selection_trails_workloads(
- task: &String
-) -> serde_json::Value {
- run_python_function(&PY_MODULE, task, "model_selection_trails_workloads")
-}
-
-// micro benchmarks
-
-pub fn benchmark_filtering_phase_latency(
- task: &String
-) -> serde_json::Value {
- run_python_function(&PY_MODULE, task, "benchmark_filtering_phase_latency")
-}
-
-pub fn benchmark_filtering_latency_in_db(
- explore_models: i32, dataset: &String, config_file: &String) -> serde_json::Value {
-
- let overall_start_time = Instant::now();
-
- let database_name = "pg_extension";
- let mut last_id = 0;
- let mut eva_results = serde_json::Value::Null; // Initializing the eva_results
-
- for i in 1..explore_models {
-
- // Step 1: Initialize State in Python
- let mut task_map = HashMap::new();
- task_map.insert("config_file", config_file.clone());
- task_map.insert("dataset", dataset.clone());
- task_map.insert("eva_results", eva_results.to_string());
- let task_json = json!(task_map).to_string();
-
- // here it cache a state
- let sample_result = run_python_function(
- &PY_MODULE,
- &task_json,
- "in_db_filtering_state_init");
-
- // 2. query data via SPI
- let start_time = Instant::now();
- let results: Result>, String> = Spi::connect(|client| {
- let query = format!("SELECT * FROM {}_train WHERE id > {} ORDER BY id ASC LIMIT 32", dataset, last_id);
- let mut cursor = client.open_cursor(&query, None);
- let table = match cursor.fetch(32) {
- Ok(table) => table,
- Err(e) => return Err(e.to_string()), // Convert the error to a string and return
- };
-
- let mut mini_batch = Vec::new();
-
- for row in table.into_iter() {
- let mut each_row = Vec::new();
- // add primary key
- let col0 = match row.get::(1) {
- Ok(Some(val)) => {
- // Update last_id with the retrieved value
- if val > 100000{
- last_id = 0;
- }else{
- last_id = val
- }
- val.to_string()
- }
- Ok(None) => "".to_string(), // Handle the case when there's no valid value
- Err(e) => e.to_string(),
- };
- each_row.push(col0);
- // add label
- let col1 = match row.get::(2) {
- Ok(val) => val.map(|i| i.to_string()).unwrap_or_default(),
- Err(e) => e.to_string(),
- };
- each_row.push(col1);
- // add fields
- let texts: Vec = (3..row.columns()+1)
- .filter_map(|i| {
- match row.get::<&str>(i) {
- Ok(Some(s)) => Some(s.to_string()),
- Ok(None) => None,
- Err(e) => Some(e.to_string()), // Convert error to string
- }
- }).collect();
- each_row.extend(texts);
- mini_batch.push(each_row)
- }
- // return
- Ok(mini_batch)
- });
- // serialize the mini-batch data
- let tup_table = match results {
- Ok(data) => {
- serde_json::json!({
- "status": "success",
- "data": data
- })
- }
- Err(e) => {
- serde_json::json!({
- "status": "error",
- "message": format!("Error while connecting: {}", e)
- })
- }
- };
-
- let end_time = Instant::now();
- let elapsed_time = end_time.duration_since(start_time);
- let elapsed_seconds = elapsed_time.as_secs_f64();
-
- // Step 3: model evaluate in Python
- let mut eva_task_map = HashMap::new();
- eva_task_map.insert("config_file", config_file.clone());
- eva_task_map.insert("sample_result", sample_result.to_string());
- let mini_batch_json = tup_table.to_string();
- eva_task_map.insert("mini_batch", mini_batch_json);
- eva_task_map.insert("spi_seconds", elapsed_seconds.to_string());
- eva_task_map.insert("model_index", i.to_string());
-
- let eva_task_json = json!(eva_task_map).to_string(); // Corrected this line
-
- eva_results = run_python_function(
- &PY_MODULE,
- &eva_task_json,
- "in_db_filtering_evaluate");
- }
-
- let mut record_task_map = HashMap::new();
- record_task_map.insert("config_file", config_file.clone());
- record_task_map.insert("dataset", dataset.clone());
- let record_task_json = json!(record_task_map).to_string();
- run_python_function(
- &PY_MODULE,
- &record_task_json,
- "records_results");
-
- let overall_end_time = Instant::now();
- let overall_elapsed_time = overall_end_time.duration_since(overall_start_time);
- let overall_elapsed_seconds = overall_elapsed_time.as_secs_f64();
-
- // Step 4: Return to PostgresSQL
- return serde_json::json!(overall_elapsed_seconds.to_string());
-}
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/requirement.txt b/examples/model_selection/TRAILS-Database-Native-Model-Selection/requirement.txt
deleted file mode 100644
index ab233d87b5..0000000000
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/requirement.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-ConfigSpace==0.7.1
-contourpy==1.1.0
-cycler==0.11.0
-fonttools==4.41.0
-importlib-resources==6.0.0
-joblib==1.3.1
-kiwisolver==1.4.4
-matplotlib==3.7.2
-more-itertools==9.1.0
-numpy==1.24.4
-orjson==3.9.2
-packaging==23.1
-palettable==3.3.3
-pandas==2.0.3
-Pillow==10.0.0
-pyparsing==3.0.9
-python-dateutil==2.8.2
-pytz==2023.3
-scikit-learn==1.3.0
-scipy==1.10.1
-seaborn==0.12.2
-six==1.16.0
-sklearn==0.0
-threadpoolctl==3.1.0
-torch==1.8.1
-torchaudio==0.8.1
-torchvision==0.9.1
-tqdm==4.47.0
-typing_extensions==4.7.1
-tzdata==2023.3
-zipp==3.16.2
-requests==2.31.0
diff --git a/examples/model_selection/Trails/.gitignore b/examples/model_selection/Trails/.gitignore
new file mode 100644
index 0000000000..7eace33453
--- /dev/null
+++ b/examples/model_selection/Trails/.gitignore
@@ -0,0 +1,20 @@
+
+
+.idea/*
+.DS_Store
+*/__pycache__/*
+**/__pycache__/
+
+*.pdf
+logs_*
+*.log
+
+z_dev/*
+internal/pg_extension/target
+result_base/*
+log_*/
+exp_result/*.json
+exp_result/*.pdf
+
+
+/internal/ml/third_party/*
diff --git a/examples/model_selection/Trails/.gitmodules b/examples/model_selection/Trails/.gitmodules
new file mode 100644
index 0000000000..0f6e011590
--- /dev/null
+++ b/examples/model_selection/Trails/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "internal/ml/model_slicing/algorithm"]
+ path = internal/ml/model_slicing/algorithm
+ url = https://github.com/Zrealshadow/SAMS.git
diff --git a/examples/model_selection/Trails/README.md b/examples/model_selection/Trails/README.md
new file mode 100644
index 0000000000..39bd012602
--- /dev/null
+++ b/examples/model_selection/Trails/README.md
@@ -0,0 +1,99 @@
+
+
+
+# TRAILS: A Database Native Model Selection System
+
+data:image/s3,"s3://crabby-images/ee4b7/ee4b75ceae1b6c4b75839bae9e5fca24a90a1d3d" alt="image-20230702035806963"
+
+
+# Build & Run examples
+
+## Singa + PostgreSQL
+
+Inside the docker image, the PostgreSQL and its extensions are installed according to https://github.com/pgcentralfoundation/pgrx
+
+```bash
+# Remove existing one if there is
+docker rm -f singa_trails
+# Create project folder.
+mkdir project && cd project
+# Download the Dockerfile.
+wget -O Dockerfile https://raw.githubusercontent.com/apache/singa/dev-postgresql/examples/model_selection/Trails/singa.psql.Dockerfile
+
+# Build Dockerfile and run the docker.
+docker build -t singa_trails .
+docker run -d --name singa_trails singa_trails
+# Wait for 5 mins, monitor the logs until it shows "Done!", then exit the monitor
+docker logs -f singa_trails
+```
+Execute model selection query
+```bash
+# Connect to the pg server and use pg_extension database.
+docker exec -it singa_trails bash
+psql -h localhost -p 28814 -U postgres
+\c pg_extension
+
+# Test coordinator
+SELECT coordinator('0.08244', '168.830156', '800', false, '/project/Trails/internal/ml/model_selection/config.ini');
+# Run an example, wait one min, it will run filtering + refinemnt + training the selected model.
+CALL model_selection_end2end('frappe_train', ARRAY['col1', 'col2', 'col3', 'col4','col5','col6','col7','col8','col9','col10', 'label'], '10', '/project/Trails/internal/ml/model_selection/config.ini');
+
+# In other terminal, monitor the running process
+docker exec -it trails_polardb bash
+tail -f /home/postgres/.pgrx/data-14/trails_log_folder/
+```
+
+
+
+## Singa + PolarDB
+
+Inside the docker image, the PolarDB and its extensions are installed according to https://github.com/ApsaraDB/PolarDB-for-PostgreSQL
+```bash
+# Remove existing one if there is
+docker rm -f singa_trails_polardb
+# Create project folder.
+mkdir project_polardb_singa && cd project_polardb_singa
+# Download the Dockerfile.
+wget -O Dockerfile https://github.com/apache/singa/blob/dev-postgresql/examples/model_selection/Trails/singa.polarDB.Dockerfile
+
+# Build Dockerfile and run the docker.
+docker build -t singa_trails_polardb .
+docker run -d --name singa_trails_polardb singa_trails_polardb
+# Monitor the logs until the setup step is done.
+docker logs -f singa_trails_polardb
+# Run a setup script
+docker exec singa_trails_polardb /bin/bash -c "/home/postgres/Trails/init_polardb.sh"
+```
+Execute model selection query
+```bash
+# Connect to the primary pg server and use pg_extension database.
+docker exec -it singa_trails_polardb bash
+psql -h localhost -p 5432 -U postgres
+\c pg_extension
+
+# Test coordinator
+SELECT coordinator('0.08244', '168.830156', '800', false, '/home/postgres/Trails/internal/ml/model_selection/config.ini');
+# Run an example, wait one min, it will run filtering + refinemnt + training the selected model.
+CALL model_selection_end2end('frappe_train', ARRAY['col1', 'col2', 'col3', 'col4','col5','col6','col7','col8','col9','col10', 'label'], '10', '/home/postgres/Trails/internal/ml/model_selection/config.ini');
+
+# In other terminal, monitor the running process
+docker exec -it singa_trails_polardb bash
+tail -f /var/polardb/primary_datadir/trails_log_folder/
+```
diff --git a/examples/model_selection/Trails/dataset/frappe/decoded_train_feat_id.pt b/examples/model_selection/Trails/dataset/frappe/decoded_train_feat_id.pt
new file mode 100644
index 0000000000..cf0c55922c
Binary files /dev/null and b/examples/model_selection/Trails/dataset/frappe/decoded_train_feat_id.pt differ
diff --git a/examples/model_selection/Trails/dataset/frappe/decoded_train_feat_value.pt b/examples/model_selection/Trails/dataset/frappe/decoded_train_feat_value.pt
new file mode 100644
index 0000000000..d07449ebf6
Binary files /dev/null and b/examples/model_selection/Trails/dataset/frappe/decoded_train_feat_value.pt differ
diff --git a/examples/model_selection/Trails/dataset/frappe/decoded_train_y.pt b/examples/model_selection/Trails/dataset/frappe/decoded_train_y.pt
new file mode 100644
index 0000000000..869597e9a8
Binary files /dev/null and b/examples/model_selection/Trails/dataset/frappe/decoded_train_y.pt differ
diff --git a/examples/model_selection/Trails/dataset/frappe/decoded_valid_feat_id.pt b/examples/model_selection/Trails/dataset/frappe/decoded_valid_feat_id.pt
new file mode 100644
index 0000000000..86c14c9347
Binary files /dev/null and b/examples/model_selection/Trails/dataset/frappe/decoded_valid_feat_id.pt differ
diff --git a/examples/model_selection/Trails/dataset/frappe/decoded_valid_feat_value.pt b/examples/model_selection/Trails/dataset/frappe/decoded_valid_feat_value.pt
new file mode 100644
index 0000000000..8858b5a6f7
Binary files /dev/null and b/examples/model_selection/Trails/dataset/frappe/decoded_valid_feat_value.pt differ
diff --git a/examples/model_selection/Trails/dataset/frappe/decoded_valid_y.pt b/examples/model_selection/Trails/dataset/frappe/decoded_valid_y.pt
new file mode 100644
index 0000000000..80ece76d0d
Binary files /dev/null and b/examples/model_selection/Trails/dataset/frappe/decoded_valid_y.pt differ
diff --git a/examples/model_selection/Trails/dataset/frappe/test.libsvm b/examples/model_selection/Trails/dataset/frappe/test.libsvm
new file mode 100644
index 0000000000..657c411736
--- /dev/null
+++ b/examples/model_selection/Trails/dataset/frappe/test.libsvm
@@ -0,0 +1,50 @@
+0 204:1 4798:1 5041:1 5046:1 5053:1 5055:1 5058:1 5060:1 5073:1 5183:1
+1 42:1 1572:1 5042:1 5047:1 5053:1 5055:1 5058:1 5060:1 5070:1 5150:1
+1 282:1 2552:1 5044:1 5052:1 5054:1 5055:1 5058:1 5060:1 5072:1 5244:1
+0 215:1 1402:1 5039:1 5051:1 5054:1 5055:1 5058:1 5063:1 5069:1 5149:1
+0 346:1 2423:1 5043:1 5051:1 5054:1 5055:1 5058:1 5063:1 5088:1 5149:1
+0 391:1 2081:1 5039:1 5050:1 5054:1 5055:1 5058:1 5060:1 5088:1 5268:1
+0 164:1 3515:1 5042:1 5052:1 5053:1 5055:1 5058:1 5062:1 5074:1 5149:1
+0 4:1 1177:1 5044:1 5049:1 5054:1 5057:1 5058:1 5060:1 5071:1 5152:1
+0 177:1 1444:1 5042:1 5049:1 5054:1 5055:1 5058:1 5062:1 5075:1 5149:1
+0 7:1 2412:1 5039:1 5049:1 5054:1 5055:1 5058:1 5061:1 5072:1 5153:1
+1 176:1 2582:1 5041:1 5046:1 5053:1 5055:1 5058:1 5062:1 5074:1 5149:1
+0 23:1 2027:1 5040:1 5050:1 5054:1 5055:1 5058:1 5061:1 5075:1 5149:1
+0 37:1 3457:1 5040:1 5046:1 5053:1 5055:1 5058:1 5062:1 5073:1 5161:1
+0 117:1 2676:1 5040:1 5049:1 5054:1 5055:1 5058:1 5060:1 5100:1 5192:1
+1 243:1 1255:1 5042:1 5051:1 5054:1 5055:1 5058:1 5064:1 5075:1 5238:1
+1 89:1 978:1 5039:1 5047:1 5053:1 5055:1 5058:1 5060:1 5073:1 5157:1
+0 85:1 4988:1 5041:1 5051:1 5054:1 5055:1 5059:1 5061:1 5075:1 5159:1
+1 377:1 1187:1 5040:1 5046:1 5053:1 5055:1 5058:1 5061:1 5077:1 5155:1
+0 201:1 3693:1 5041:1 5047:1 5053:1 5055:1 5058:1 5061:1 5070:1 5150:1
+0 276:1 1824:1 5041:1 5052:1 5053:1 5055:1 5058:1 5061:1 5070:1 5149:1
+1 125:1 1050:1 5040:1 5048:1 5054:1 5056:1 5058:1 5061:1 5075:1 5159:1
+1 184:1 973:1 5041:1 5046:1 5053:1 5056:1 5058:1 5060:1 5069:1 5151:1
+1 331:1 1289:1 5040:1 5047:1 5053:1 5055:1 5059:1 5061:1 5069:1 5149:1
+0 22:1 3422:1 5039:1 5050:1 5054:1 5057:1 5058:1 5061:1 5075:1 5159:1
+0 194:1 1199:1 5039:1 5046:1 5053:1 5055:1 5058:1 5061:1 5106:1 5212:1
+1 414:1 962:1 5043:1 5047:1 5053:1 5055:1 5058:1 5062:1 5094:1 5272:1
+1 265:1 982:1 5044:1 5051:1 5054:1 5055:1 5058:1 5061:1 5079:1 5149:1
+0 70:1 4522:1 5040:1 5051:1 5054:1 5055:1 5058:1 5062:1 5074:1 5149:1
+0 71:1 1404:1 5040:1 5048:1 5054:1 5056:1 5058:1 5061:1 5069:1 5172:1
+0 74:1 1002:1 5045:1 5047:1 5053:1 5057:1 5058:1 5061:1 5073:1 5175:1
+0 16:1 1970:1 5043:1 5047:1 5053:1 5055:1 5059:1 5063:1 5073:1 5156:1
+0 70:1 2142:1 5043:1 5047:1 5053:1 5055:1 5058:1 5064:1 5070:1 5154:1
+0 272:1 1873:1 5041:1 5047:1 5053:1 5055:1 5058:1 5065:1 5069:1 5246:1
+0 178:1 3919:1 5040:1 5052:1 5054:1 5056:1 5058:1 5061:1 5075:1 5149:1
+1 7:1 1268:1 5039:1 5046:1 5053:1 5055:1 5058:1 5061:1 5072:1 5153:1
+0 50:1 3943:1 5043:1 5048:1 5054:1 5055:1 5058:1 5061:1 5077:1 5155:1
+1 273:1 974:1 5041:1 5051:1 5054:1 5055:1 5058:1 5063:1 5069:1 5149:1
+0 127:1 3184:1 5040:1 5050:1 5054:1 5057:1 5058:1 5061:1 5088:1 5149:1
+0 377:1 4716:1 5041:1 5051:1 5054:1 5055:1 5058:1 5063:1 5077:1 5155:1
+0 80:1 1884:1 5043:1 5047:1 5053:1 5056:1 5058:1 5061:1 5070:1 5149:1
+0 401:1 3692:1 5039:1 5049:1 5054:1 5056:1 5058:1 5061:1 5075:1 5149:1
+1 271:1 978:1 5041:1 5052:1 5053:1 5055:1 5058:1 5060:1 5069:1 5149:1
+1 180:1 1276:1 5041:1 5052:1 5053:1 5055:1 5058:1 5062:1 5070:1 5150:1
+1 254:1 1179:1 5040:1 5052:1 5054:1 5055:1 5058:1 5061:1 5069:1 5180:1
+0 47:1 2866:1 5041:1 5049:1 5054:1 5055:1 5059:1 5063:1 5077:1 5155:1
+0 117:1 4625:1 5041:1 5046:1 5053:1 5055:1 5058:1 5061:1 5100:1 5192:1
+1 270:1 966:1 5045:1 5049:1 5054:1 5055:1 5058:1 5061:1 5070:1 5150:1
+1 271:1 3431:1 5039:1 5048:1 5054:1 5055:1 5058:1 5060:1 5069:1 5149:1
+0 158:1 4210:1 5041:1 5048:1 5054:1 5056:1 5058:1 5061:1 5070:1 5149:1
+1 130:1 1194:1 5040:1 5049:1 5054:1 5055:1 5058:1 5060:1 5078:1 5165:1
\ No newline at end of file
diff --git a/examples/model_selection/Trails/dataset/frappe/train.libsvm b/examples/model_selection/Trails/dataset/frappe/train.libsvm
new file mode 100644
index 0000000000..0c6e9e8736
--- /dev/null
+++ b/examples/model_selection/Trails/dataset/frappe/train.libsvm
@@ -0,0 +1,100 @@
+0 451:1 4149:1 5041:1 5046:1 5053:1 5055:1 5058:1 5060:1 5069:1 5149:1
+0 91:1 3503:1 5041:1 5047:1 5053:1 5056:1 5058:1 5065:1 5095:1 5149:1
+1 168:1 983:1 5040:1 5050:1 5054:1 5055:1 5058:1 5060:1 5069:1 5207:1
+0 620:1 1743:1 5045:1 5051:1 5054:1 5055:1 5058:1 5061:1 5073:1 5149:1
+0 46:1 2692:1 5040:1 5049:1 5054:1 5055:1 5058:1 5060:1 5086:1 5211:1
+0 576:1 4933:1 5041:1 5049:1 5054:1 5056:1 5058:1 5061:1 5075:1 5149:1
+1 71:1 966:1 5043:1 5049:1 5054:1 5055:1 5058:1 5061:1 5069:1 5172:1
+1 43:1 974:1 5040:1 5048:1 5054:1 5055:1 5058:1 5060:1 5069:1 5252:1
+0 168:1 2928:1 5040:1 5051:1 5054:1 5055:1 5058:1 5062:1 5069:1 5149:1
+0 14:1 2396:1 5039:1 5047:1 5053:1 5055:1 5058:1 5061:1 5076:1 5149:1
+0 107:1 4380:1 5040:1 5046:1 5053:1 5055:1 5058:1 5061:1 5069:1 5149:1
+0 80:1 2662:1 5041:1 5047:1 5053:1 5055:1 5058:1 5061:1 5070:1 5243:1
+1 190:1 1093:1 5039:1 5052:1 5054:1 5055:1 5058:1 5061:1 5105:1 5149:1
+1 131:1 1432:1 5043:1 5050:1 5054:1 5055:1 5058:1 5061:1 5099:1 5215:1
+0 116:1 986:1 5039:1 5051:1 5054:1 5055:1 5058:1 5062:1 5074:1 5149:1
+0 92:1 4253:1 5041:1 5050:1 5054:1 5056:1 5058:1 5062:1 5069:1 5149:1
+0 16:1 1016:1 5039:1 5046:1 5053:1 5056:1 5059:1 5063:1 5073:1 5156:1
+0 38:1 2047:1 5040:1 5046:1 5053:1 5055:1 5058:1 5060:1 5069:1 5149:1
+1 432:1 1060:1 5040:1 5051:1 5054:1 5055:1 5058:1 5061:1 5073:1 5156:1
+1 488:1 957:1 5040:1 5052:1 5054:1 5055:1 5058:1 5064:1 5110:1 5149:1
+0 87:1 3379:1 5043:1 5050:1 5054:1 5055:1 5058:1 5061:1 5069:1 5162:1
+0 516:1 3857:1 5040:1 5051:1 5054:1 5055:1 5059:1 5062:1 5074:1 5149:1
+0 91:1 4554:1 5039:1 5047:1 5053:1 5055:1 5058:1 5066:1 5095:1 5149:1
+0 142:1 3839:1 5043:1 5051:1 5054:1 5056:1 5058:1 5060:1 5102:1 5149:1
+0 302:1 2769:1 5045:1 5047:1 5053:1 5056:1 5058:1 5064:1 5069:1 5235:1
+0 38:1 1263:1 5039:1 5047:1 5053:1 5057:1 5058:1 5064:1 5069:1 5162:1
+0 85:1 4745:1 5041:1 5048:1 5054:1 5055:1 5058:1 5061:1 5075:1 5159:1
+0 213:1 4030:1 5039:1 5046:1 5053:1 5055:1 5059:1 5061:1 5087:1 5222:1
+0 60:1 4426:1 5043:1 5047:1 5053:1 5056:1 5058:1 5062:1 5090:1 5149:1
+0 12:1 2879:1 5040:1 5050:1 5054:1 5055:1 5058:1 5063:1 5069:1 5149:1
+0 408:1 2947:1 5039:1 5050:1 5054:1 5055:1 5058:1 5065:1 5090:1 5149:1
+0 16:1 4514:1 5044:1 5047:1 5053:1 5055:1 5059:1 5064:1 5073:1 5156:1
+0 80:1 4951:1 5044:1 5047:1 5053:1 5055:1 5058:1 5063:1 5070:1 5243:1
+1 213:1 1286:1 5041:1 5047:1 5053:1 5055:1 5058:1 5061:1 5087:1 5149:1
+0 510:1 3696:1 5041:1 5048:1 5054:1 5055:1 5059:1 5060:1 5070:1 5173:1
+0 90:1 4749:1 5043:1 5052:1 5054:1 5057:1 5058:1 5060:1 5069:1 5179:1
+1 363:1 1083:1 5041:1 5046:1 5053:1 5056:1 5058:1 5061:1 5075:1 5149:1
+0 291:1 3801:1 5043:1 5050:1 5054:1 5055:1 5058:1 5061:1 5069:1 5149:1
+0 117:1 4513:1 5041:1 5046:1 5053:1 5055:1 5058:1 5060:1 5100:1 5192:1
+1 60:1 1112:1 5039:1 5047:1 5053:1 5056:1 5058:1 5062:1 5090:1 5149:1
+0 405:1 3783:1 5041:1 5048:1 5054:1 5055:1 5058:1 5061:1 5069:1 5185:1
+0 137:1 2632:1 5040:1 5046:1 5053:1 5055:1 5058:1 5063:1 5077:1 5155:1
+0 49:1 3930:1 5042:1 5050:1 5054:1 5056:1 5058:1 5060:1 5087:1 5149:1
+0 768:1 4254:1 5040:1 5046:1 5053:1 5055:1 5058:1 5060:1 5069:1 5149:1
+0 257:1 1830:1 5040:1 5049:1 5054:1 5055:1 5058:1 5065:1 5069:1 5149:1
+0 102:1 4626:1 5039:1 5047:1 5053:1 5056:1 5058:1 5061:1 5098:1 5186:1
+1 25:1 1063:1 5045:1 5049:1 5054:1 5055:1 5058:1 5061:1 5079:1 5149:1
+0 5:1 3878:1 5039:1 5048:1 5054:1 5056:1 5058:1 5061:1 5070:1 5150:1
+0 408:1 3691:1 5041:1 5048:1 5054:1 5055:1 5058:1 5060:1 5090:1 5149:1
+0 55:1 2171:1 5039:1 5046:1 5053:1 5055:1 5059:1 5061:1 5069:1 5149:1
+0 634:1 4027:1 5040:1 5050:1 5054:1 5055:1 5058:1 5062:1 5074:1 5149:1
+0 153:1 3312:1 5042:1 5048:1 5054:1 5055:1 5058:1 5061:1 5085:1 5202:1
+1 359:1 982:1 5043:1 5049:1 5054:1 5055:1 5058:1 5063:1 5077:1 5155:1
+1 245:1 978:1 5039:1 5049:1 5054:1 5056:1 5058:1 5061:1 5069:1 5149:1
+1 169:1 1710:1 5043:1 5046:1 5053:1 5055:1 5058:1 5061:1 5069:1 5206:1
+1 242:1 978:1 5039:1 5049:1 5054:1 5055:1 5058:1 5061:1 5087:1 5237:1
+1 844:1 1393:1 5039:1 5052:1 5054:1 5055:1 5058:1 5061:1 5075:1 5159:1
+0 454:1 3148:1 5044:1 5049:1 5054:1 5055:1 5058:1 5060:1 5069:1 5149:1
+0 223:1 4714:1 5043:1 5048:1 5054:1 5055:1 5059:1 5061:1 5085:1 5230:1
+0 130:1 4195:1 5040:1 5049:1 5054:1 5055:1 5058:1 5060:1 5078:1 5165:1
+0 13:1 4094:1 5041:1 5046:1 5053:1 5056:1 5058:1 5061:1 5075:1 5149:1
+0 570:1 1151:1 5044:1 5047:1 5053:1 5055:1 5058:1 5061:1 5075:1 5149:1
+0 108:1 4491:1 5039:1 5049:1 5054:1 5056:1 5058:1 5061:1 5088:1 5190:1
+1 366:1 4417:1 5039:1 5048:1 5054:1 5055:1 5058:1 5061:1 5120:1 5280:1
+0 142:1 1160:1 5041:1 5051:1 5054:1 5055:1 5058:1 5060:1 5102:1 5149:1
+0 396:1 1244:1 5040:1 5049:1 5054:1 5055:1 5059:1 5062:1 5074:1 5149:1
+0 50:1 2418:1 5043:1 5048:1 5054:1 5055:1 5059:1 5061:1 5077:1 5155:1
+0 78:1 3034:1 5041:1 5051:1 5054:1 5055:1 5058:1 5061:1 5093:1 5177:1
+0 270:1 2814:1 5041:1 5052:1 5053:1 5055:1 5058:1 5061:1 5070:1 5149:1
+0 149:1 2749:1 5041:1 5052:1 5053:1 5055:1 5058:1 5060:1 5087:1 5200:1
+0 153:1 2517:1 5040:1 5046:1 5053:1 5055:1 5058:1 5060:1 5085:1 5220:1
+0 131:1 4076:1 5040:1 5050:1 5054:1 5055:1 5058:1 5062:1 5099:1 5215:1
+0 168:1 1092:1 5039:1 5052:1 5054:1 5055:1 5058:1 5065:1 5069:1 5207:1
+1 125:1 1035:1 5043:1 5049:1 5054:1 5056:1 5058:1 5061:1 5075:1 5159:1
+1 167:1 1031:1 5040:1 5051:1 5054:1 5055:1 5058:1 5063:1 5077:1 5155:1
+1 26:1 1010:1 5039:1 5049:1 5054:1 5055:1 5058:1 5060:1 5080:1 5174:1
+0 60:1 4712:1 5044:1 5048:1 5054:1 5056:1 5058:1 5060:1 5090:1 5149:1
+0 61:1 4696:1 5040:1 5046:1 5053:1 5055:1 5058:1 5062:1 5074:1 5149:1
+0 86:1 4039:1 5041:1 5048:1 5054:1 5055:1 5058:1 5064:1 5075:1 5159:1
+0 55:1 5004:1 5040:1 5052:1 5054:1 5055:1 5058:1 5064:1 5069:1 5149:1
+1 622:1 2152:1 5039:1 5048:1 5054:1 5055:1 5058:1 5061:1 5069:1 5149:1
+1 332:1 2008:1 5045:1 5049:1 5054:1 5055:1 5058:1 5061:1 5069:1 5149:1
+1 499:1 1022:1 5040:1 5052:1 5054:1 5055:1 5058:1 5062:1 5072:1 5294:1
+1 61:1 1203:1 5039:1 5047:1 5053:1 5055:1 5058:1 5062:1 5074:1 5149:1
+1 97:1 1071:1 5042:1 5049:1 5054:1 5055:1 5058:1 5060:1 5073:1 5183:1
+0 459:1 4877:1 5042:1 5052:1 5053:1 5055:1 5058:1 5062:1 5074:1 5149:1
+0 271:1 4410:1 5039:1 5052:1 5054:1 5055:1 5058:1 5060:1 5069:1 5149:1
+0 77:1 3073:1 5040:1 5049:1 5054:1 5055:1 5058:1 5061:1 5069:1 5149:1
+0 51:1 3128:1 5039:1 5051:1 5054:1 5055:1 5058:1 5061:1 5070:1 5150:1
+1 498:1 2474:1 5040:1 5046:1 5053:1 5055:1 5058:1 5062:1 5074:1 5149:1
+0 45:1 1802:1 5040:1 5048:1 5054:1 5055:1 5058:1 5061:1 5070:1 5166:1
+0 273:1 2244:1 5045:1 5049:1 5054:1 5055:1 5058:1 5063:1 5069:1 5149:1
+0 6:1 3828:1 5039:1 5050:1 5054:1 5056:1 5058:1 5061:1 5070:1 5150:1
+0 38:1 3778:1 5039:1 5048:1 5054:1 5057:1 5058:1 5061:1 5069:1 5162:1
+1 563:1 1159:1 5042:1 5049:1 5054:1 5055:1 5058:1 5062:1 5074:1 5149:1
+0 89:1 4483:1 5042:1 5051:1 5054:1 5056:1 5058:1 5065:1 5073:1 5157:1
+1 545:1 997:1 5041:1 5047:1 5053:1 5056:1 5058:1 5060:1 5087:1 5222:1
+0 378:1 2541:1 5040:1 5046:1 5053:1 5055:1 5058:1 5061:1 5069:1 5149:1
+0 160:1 2368:1 5040:1 5047:1 5053:1 5055:1 5058:1 5062:1 5074:1 5149:1
+1 273:1 974:1 5041:1 5052:1 5053:1 5055:1 5058:1 5061:1 5069:1 5149:1
\ No newline at end of file
diff --git a/examples/model_selection/Trails/dataset/frappe/valid.libsvm b/examples/model_selection/Trails/dataset/frappe/valid.libsvm
new file mode 100644
index 0000000000..3dc8285723
--- /dev/null
+++ b/examples/model_selection/Trails/dataset/frappe/valid.libsvm
@@ -0,0 +1,50 @@
+1 266:1 1244:1 5042:1 5049:1 5054:1 5055:1 5058:1 5062:1 5074:1 5149:1
+0 138:1 1296:1 5041:1 5052:1 5053:1 5055:1 5058:1 5061:1 5085:1 5201:1
+0 317:1 1694:1 5041:1 5047:1 5053:1 5055:1 5058:1 5061:1 5070:1 5150:1
+0 14:1 2659:1 5043:1 5052:1 5054:1 5055:1 5058:1 5064:1 5076:1 5205:1
+1 123:1 4068:1 5043:1 5050:1 5054:1 5056:1 5058:1 5060:1 5069:1 5149:1
+1 222:1 978:1 5039:1 5051:1 5054:1 5055:1 5058:1 5060:1 5113:1 5229:1
+0 249:1 4666:1 5044:1 5047:1 5053:1 5057:1 5058:1 5061:1 5075:1 5149:1
+0 47:1 1313:1 5041:1 5049:1 5054:1 5055:1 5058:1 5061:1 5077:1 5155:1
+0 181:1 2123:1 5041:1 5048:1 5054:1 5055:1 5058:1 5061:1 5080:1 5174:1
+0 340:1 4732:1 5040:1 5046:1 5053:1 5055:1 5059:1 5060:1 5077:1 5256:1
+1 120:1 1396:1 5043:1 5046:1 5053:1 5055:1 5058:1 5062:1 5069:1 5149:1
+1 168:1 982:1 5040:1 5051:1 5054:1 5055:1 5058:1 5062:1 5069:1 5149:1
+1 78:1 973:1 5041:1 5051:1 5054:1 5055:1 5058:1 5060:1 5093:1 5177:1
+0 361:1 1636:1 5040:1 5046:1 5053:1 5055:1 5058:1 5060:1 5112:1 5265:1
+1 90:1 1024:1 5044:1 5049:1 5054:1 5055:1 5058:1 5061:1 5069:1 5182:1
+0 363:1 3256:1 5041:1 5047:1 5053:1 5055:1 5058:1 5063:1 5075:1 5149:1
+1 379:1 988:1 5042:1 5046:1 5053:1 5055:1 5058:1 5061:1 5077:1 5155:1
+1 272:1 997:1 5041:1 5047:1 5053:1 5055:1 5058:1 5061:1 5069:1 5246:1
+0 23:1 2698:1 5042:1 5049:1 5054:1 5055:1 5058:1 5061:1 5075:1 5149:1
+0 159:1 1167:1 5041:1 5046:1 5053:1 5055:1 5058:1 5063:1 5077:1 5155:1
+0 167:1 4281:1 5043:1 5052:1 5054:1 5055:1 5058:1 5063:1 5077:1 5155:1
+1 172:1 1071:1 5039:1 5049:1 5054:1 5056:1 5058:1 5060:1 5075:1 5159:1
+1 215:1 4052:1 5042:1 5050:1 5054:1 5055:1 5058:1 5062:1 5069:1 5149:1
+1 343:1 1688:1 5040:1 5049:1 5054:1 5055:1 5058:1 5061:1 5120:1 5253:1
+0 93:1 3293:1 5043:1 5047:1 5053:1 5055:1 5058:1 5060:1 5086:1 5149:1
+0 238:1 2217:1 5039:1 5051:1 5054:1 5055:1 5058:1 5062:1 5074:1 5149:1
+0 559:1 4912:1 5040:1 5046:1 5053:1 5055:1 5058:1 5062:1 5074:1 5149:1
+1 174:1 1772:1 5039:1 5049:1 5054:1 5056:1 5058:1 5060:1 5080:1 5174:1
+0 160:1 2513:1 5041:1 5051:1 5054:1 5055:1 5058:1 5062:1 5074:1 5149:1
+1 514:1 1936:1 5044:1 5051:1 5054:1 5055:1 5058:1 5061:1 5130:1 5296:1
+0 308:1 3720:1 5044:1 5052:1 5054:1 5055:1 5058:1 5060:1 5086:1 5310:1
+1 66:1 1868:1 5040:1 5046:1 5053:1 5055:1 5058:1 5060:1 5069:1 5172:1
+0 165:1 1046:1 5041:1 5049:1 5054:1 5055:1 5058:1 5061:1 5077:1 5155:1
+0 153:1 2077:1 5041:1 5051:1 5054:1 5055:1 5058:1 5061:1 5085:1 5220:1
+0 140:1 4591:1 5043:1 5050:1 5054:1 5055:1 5058:1 5062:1 5074:1 5149:1
+0 272:1 1749:1 5043:1 5047:1 5053:1 5055:1 5058:1 5061:1 5069:1 5149:1
+0 502:1 2111:1 5041:1 5051:1 5054:1 5055:1 5058:1 5061:1 5088:1 5149:1
+0 195:1 2422:1 5040:1 5052:1 5054:1 5055:1 5058:1 5061:1 5107:1 5213:1
+0 101:1 1835:1 5041:1 5050:1 5054:1 5055:1 5058:1 5060:1 5069:1 5185:1
+0 155:1 4201:1 5041:1 5051:1 5054:1 5055:1 5058:1 5061:1 5070:1 5150:1
+0 262:1 2426:1 5040:1 5050:1 5054:1 5055:1 5058:1 5062:1 5074:1 5149:1
+1 130:1 1080:1 5040:1 5049:1 5054:1 5055:1 5059:1 5060:1 5070:1 5150:1
+0 79:1 3412:1 5041:1 5047:1 5053:1 5055:1 5058:1 5062:1 5074:1 5149:1
+0 48:1 1181:1 5039:1 5047:1 5053:1 5055:1 5059:1 5061:1 5070:1 5150:1
+0 141:1 4225:1 5042:1 5050:1 5054:1 5055:1 5058:1 5064:1 5095:1 5198:1
+0 90:1 2919:1 5039:1 5047:1 5053:1 5057:1 5058:1 5060:1 5069:1 5179:1
+0 99:1 1391:1 5039:1 5051:1 5054:1 5055:1 5058:1 5063:1 5077:1 5155:1
+0 33:1 3617:1 5044:1 5048:1 5054:1 5055:1 5059:1 5064:1 5084:1 5160:1
+0 117:1 3932:1 5044:1 5048:1 5054:1 5055:1 5058:1 5061:1 5100:1 5192:1
+1 43:1 1315:1 5039:1 5049:1 5054:1 5055:1 5058:1 5061:1 5069:1 5252:1
\ No newline at end of file
diff --git a/examples/model_selection/Trails/documents/ai_db.001.jpeg b/examples/model_selection/Trails/documents/ai_db.001.jpeg
new file mode 100644
index 0000000000..01633e613b
Binary files /dev/null and b/examples/model_selection/Trails/documents/ai_db.001.jpeg differ
diff --git a/examples/model_selection/Trails/documents/dev_guide.md b/examples/model_selection/Trails/documents/dev_guide.md
new file mode 100644
index 0000000000..11c93e2659
--- /dev/null
+++ b/examples/model_selection/Trails/documents/dev_guide.md
@@ -0,0 +1,108 @@
+
+
+# Test Singa for model selection
+
+Run those three functions to ensure the singa can run.
+
+```bash
+python3 ./internal/ml/model_selection/exps/4.seq_score_online.py --embedding_cache_filtering=True --models_explore=10 --tfmem=synflow --log_name=score_based --search_space=mlp_sp --num_layers=4 --hidden_choice_len=20 --base_dir=./dataset --num_labels=2 --device=cpu --batch_size=32 --dataset=frappe --nfeat=5500 --nfield=10 --nemb=10 --workers=0 --result_dir=./exp_result/ --log_folder=log_foler
+
+python3 ./internal/ml/model_selection/exps/0.train_one_model.py --log_name=train_log --search_space=mlp_sp --base_dir=./dataset --num_labels=2 --device=cpu --batch_size=10 --lr=0.01 --epoch=5 --iter_per_epoch=2000 --dataset=frappe --nfeat=5500 --nfield=10 --nemb=10 --workers=0 --result_dir=./exp_result/ --log_folder=log_foler
+
+python3 internal/ml/model_selection/pg_interface.py
+```
+
+# How to add features to there?
+
+1. fork the git repo https://github.com/apache/singa/tree/dev-postgresql
+
+2. run the docker image as in [README.md](https://github.com/apache/singa/blob/dev-postgresql/examples/model_selection/Trails/README.md)
+
+ ```bash
+ # Remove existing one if there is
+ docker rm -f singa_trails
+ # Create project folder.
+ mkdir project && cd project
+ # Download the Dockerile.
+ wget -O Dockerfile https://raw.githubusercontent.com/apache/singa/dev-postgresql/examples/model_selection/Trails/singa.psql.Dockerfile
+
+ # Build Dockerile and run the docker.
+ docker build -t singa_trails .
+ docker run -d --name singa_trails singa_trails
+ # Wait for 5 mins, monitor the logs until it shows "Done!", then exit the monitor
+ docker logs -f singa_trails
+ ```
+
+3. after the docker image is running up, go to the code dir, add your own git repo's url as a remote
+
+ ```bash
+ docker exec -it singa_trails bash
+ cd /project/Trails
+ git fetch --all
+ git checkout -b /
+ # eg git checkout -b dev-postgresql nl2/dev-postgresql
+ ```
+
+4. then commit the change to your own git repo, and pull inside the docker image.
+
+5. then, compile and test as following
+
+## For the psql
+
+```bash
+cd Trails/internal/pg_extension
+cargo pgrx run --release
+
+# tests
+psql -h localhost -p 28814 -U postgres
+\c pg_extension
+
+# Test coordinator
+SELECT coordinator('0.08244', '168.830156', '800', false, '/project/Trails/internal/ml/model_selection/config.ini');
+# Run an example, wait one min, it will run filtering + refinemnt + training the selected model.
+CALL model_selection_end2end('frappe_train', ARRAY['col1', 'col2', 'col3', 'col4','col5','col6','col7','col8','col9','col10', 'label'], '10', '/project/Trails/internal/ml/model_selection/config.ini');
+
+# In other terminal, monitor the running process
+docker exec -it trails_polardb bash
+tail -f /home/postgres/.pgrx/data-14/trails_log_folder/
+```
+
+## For the polarDB
+
+```bash
+cd Trails/internal/pg_extension
+cargo clean
+cargo pgrx install --pg-config /home/postgres/tmp_basedir_polardb_pg_1100_bld/bin/pg_config
+
+# Connect to the primary pg server and use pg_extension database.
+docker exec -it singa_trails_polardb bash
+psql -h localhost -p 5432 -U postgres
+\c pg_extension
+
+# Test coordinator
+SELECT coordinator('0.08244', '168.830156', '800', false, '/home/postgres/Trails/internal/ml/model_selection/config.ini');
+# Run an example, wait one min, it will run filtering + refinemnt + training the selected model.
+CALL model_selection_end2end('frappe_train', ARRAY['col1', 'col2', 'col3', 'col4','col5','col6','col7','col8','col9','col10', 'label'], '10', '/home/postgres/Trails/internal/ml/model_selection/config.ini');
+
+# In other terminal, monitor the running process
+docker exec -it singa_trails_polardb bash
+tail -f /var/polardb/primary_datadir/trails_log_folder/
+```
+
diff --git a/examples/model_selection/Trails/init.sh b/examples/model_selection/Trails/init.sh
new file mode 100644
index 0000000000..1bc3e4b39b
--- /dev/null
+++ b/examples/model_selection/Trails/init.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+# Those cmds will triggered after docker run .
+
+# Compile code, and run postgresql
+cd /project/Trails/internal/pg_extension || exit
+/bin/bash -c "source $HOME/.cargo/env && echo '\q' | cargo pgrx run --release"
+
+# Wait for PostgreSQL to become available
+until psql -h localhost -p 28814 -U postgres -d pg_extension -c '\q'; do
+ >&2 echo "Postgres is unavailable - sleeping"
+ sleep 1
+done
+
+# Run setup commands
+psql -h localhost -p 28814 -U postgres -d pg_extension -c "CREATE EXTENSION pg_extension;"
+psql -h localhost -p 28814 -U postgres -d pg_extension -f /project/Trails/internal/pg_extension/sql/model_selection_cpu.sql
+# Load example dataset into database
+bash /project/Trails/internal/ml/model_selection/scripts/database/load_data_to_db.sh /project/Trails/dataset/frappe frappe 28814
+
+echo "Done!"
+
+# Continue with the rest of your container's CMD
+tail -f /dev/null
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_uci.sh b/examples/model_selection/Trails/init_polardb.sh
similarity index 54%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_uci.sh
rename to examples/model_selection/Trails/init_polardb.sh
index 99dfe0e4d5..70a23b320a 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/nas-bench-tabular/score_all_modesl_uci.sh
+++ b/examples/model_selection/Trails/init_polardb.sh
@@ -1,3 +1,4 @@
+#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
@@ -16,29 +17,19 @@
# limitations under the License.
#
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-nohup python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
- --embedding_cache_filtering=True \
- --models_explore=159999 \
- --tfmem=express_flow \
- --log_name=score_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=20 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cpu \
- --batch_size=32 \
- --dataset=uci_diabetes \
- --nfeat=369 \
- --nfield=43 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --log_folder=log_score_time_uci > outputUciScoreALl.log&
-
+# Wait for PostgreSQL to become available
+until psql -h localhost -p 5432 -U postgres -c '\q'; do
+ >&2 echo "Postgres is unavailable - sleeping"
+ sleep 1
+done
+# Run setup commands
+psql -h localhost -p 5432 -U postgres -c "CREATE DATABASE pg_extension;"
+psql -h localhost -p 5432 -U postgres -d pg_extension -c "CREATE EXTENSION pg_extension;"
+psql -h localhost -p 5432 -U postgres -d pg_extension -f /home/postgres/Trails/internal/pg_extension/sql/model_selection_cpu.sql
+# Load example dataset into database
+bash /home/postgres/Trails/internal/ml/model_selection/scripts/database/load_data_to_db.sh //home/postgres/Trails/dataset/frappe frappe 5432
+echo "Done!"
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/cache-service/cache_service.py b/examples/model_selection/Trails/internal/cache-service/cache_service.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/cache-service/cache_service.py
rename to examples/model_selection/Trails/internal/cache-service/cache_service.py
index 87479a704b..035e519e5c 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/cache-service/cache_service.py
+++ b/examples/model_selection/Trails/internal/cache-service/cache_service.py
@@ -17,6 +17,7 @@
# limitations under the License.
#
+
import time
import threading
import queue
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/cache-service/trigger_cache_svc.py b/examples/model_selection/Trails/internal/cache-service/trigger_cache_svc.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/cache-service/trigger_cache_svc.py
rename to examples/model_selection/Trails/internal/cache-service/trigger_cache_svc.py
index 2631abeabb..87a3843ad9 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/cache-service/trigger_cache_svc.py
+++ b/examples/model_selection/Trails/internal/cache-service/trigger_cache_svc.py
@@ -16,6 +16,7 @@
# limitations under the License.
#
+
import requests
url = 'http://localhost:8093/'
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/config.ini b/examples/model_selection/Trails/internal/ml/model_selection/config.ini
similarity index 50%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/config.ini
rename to examples/model_selection/Trails/internal/ml/model_selection/config.ini
index e7235b1d46..34f0ff2e4e 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/config.ini
+++ b/examples/model_selection/Trails/internal/ml/model_selection/config.ini
@@ -1,29 +1,9 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
[DEFAULT]
log_name = in_db_ms
budget = 100
device = cpu
-log_folder = ./internal/debug_singa_logger
-;log_folder = /project/TRAILS/log_score_time_frappe
-result_dir = ./internal/ml/model_selection/exp_result_singa/
-;result_dir = /project/TRAILS/internal/ml/model_selection/exp_result_sever_cache_sql_indb/
+log_folder = ./trails_log_folder
+result_dir = ./trails_log_folder
num_points = 12
max_load = -1
@@ -51,7 +31,7 @@ hidden_choice_len = 20
[MLP_TRAINER]
epoch = 20
-batch_size = 32
+batch_size = 8
lr = 0.002
patience = 1
iter_per_epoch = 200
@@ -62,8 +42,7 @@ report_freq = 30
workers = 0
[DATASET]
-;base_dir = ../exp_data/
-base_dir = /hdd1/xingnaili/exp_data/
+base_dir = ./dataset
dataset = frappe
num_labels = 2
@@ -98,7 +77,6 @@ db_port = 28814
[SYS_PERFORMANCE]
models_explore = -1
-# tfmem = express_flow
tfmem = synflow
embedding_cache_filtering = True
concurrency = 1
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/eva_service.py b/examples/model_selection/Trails/internal/ml/model_selection/eva_service.py
similarity index 97%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/eva_service.py
rename to examples/model_selection/Trails/internal/ml/model_selection/eva_service.py
index f3f5b4575d..6d089a7614 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/eva_service.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/eva_service.py
@@ -50,7 +50,7 @@ def refinement_phase(u: int, k_models: List, dataset_name: str, config_file: str
try:
rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
- best_arch, best_arch_performance, _ = rms.refinement_phase(
+ best_arch, best_arch_performance, _, _ = rms.refinement_phase(
U=u,
k_models=k_models,
train_loader=train_dataloader,
@@ -86,7 +86,6 @@ async def start_refinement_phase(request):
if __name__ == "__main__":
-
result = refinement_phase(
u=1,
k_models=["8-8-8-8", "16-16-16-16"],
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py b/examples/model_selection/Trails/internal/ml/model_selection/exps/0.train_one_model.py
similarity index 94%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py
rename to examples/model_selection/Trails/internal/ml/model_selection/exps/0.train_one_model.py
index 9a19f007db..67ffcb5e73 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/0.train_one_model.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/exps/0.train_one_model.py
@@ -53,23 +53,20 @@
# 1. data loader
train_loader, val_loader, test_loader = libsvm_dataloader(
args=args,
- data_dir=os.path.join(args.base_dir, "data", "structure_data", args.dataset),
+ data_dir=os.path.join(args.base_dir, args.dataset),
nfield=args.nfield,
batch_size=args.batch_size)
- # arch_id = "256-256-256-256"
arch_id = "128-128-128-128"
print(f"begin to train the {arch_id}")
model = search_space_ins.new_architecture(arch_id)
- # model.init_embedding(requires_grad=True)
if args.device == 'cpu':
dev = singa_device.get_default_device()
else: # GPU
dev = singa_device.create_cuda_gpu_on(args.local_rank) # need to change to CPU device for CPU-only machines
dev.SetRandSeed(0)
np.random.seed(0)
- # model.to(args.device)
valid_auc, total_run_time, train_log = ModelTrainer.fully_train_arch(
model=model,
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py b/examples/model_selection/Trails/internal/ml/model_selection/exps/4.seq_score_online.py
similarity index 82%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py
rename to examples/model_selection/Trails/internal/ml/model_selection/exps/4.seq_score_online.py
index 5d6c16ec00..063d843511 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/exps/4.seq_score_online.py
@@ -19,36 +19,23 @@
import calendar
import json
import os
-import random
import time
from exps.shared_args import parse_arguments
-from datetime import datetime
-import gc
-
-# import tracemalloc
-# tracemalloc.start()
-#
-#
-# def print_memory_usg():
-# snapshot = tracemalloc.take_snapshot()
-# top_stats = snapshot.statistics('lineno')
-# for stat in top_stats[:10]: # top 10 memory-consuming lines
-# print(stat)
def generate_data_loader():
- if args.dataset in [Config.c10, Config.c100, Config.imgNet]:
+ if args.dataset in [Config.c10, Config.c100, Config.imgNet, Config.imgNetFull]:
train_loader, val_loader, class_num = dataset.get_dataloader(
train_batch_size=args.batch_size,
test_batch_size=args.batch_size,
dataset=args.dataset,
num_workers=1,
- datadir=os.path.join(args.base_dir, "data"))
+ datadir=os.path.join(args.base_dir))
test_loader = val_loader
else:
train_loader, val_loader, test_loader = libsvm_dataloader(
args=args,
- data_dir=os.path.join(args.base_dir, "data", "structure_data", args.dataset),
+ data_dir=os.path.join(args.base_dir, args.dataset),
nfield=args.nfield,
batch_size=args.batch_size)
class_num = args.num_labels
@@ -58,6 +45,7 @@ def generate_data_loader():
if __name__ == "__main__":
args = parse_arguments()
+ from src.common.constant import Config
# set the log name
gmt = time.gmtime()
@@ -66,7 +54,6 @@ def generate_data_loader():
os.environ.setdefault("log_file_name", args.log_name + "_" + str(ts) + ".log")
os.environ.setdefault("base_dir", args.base_dir)
- from src.common.constant import Config
from src.common.structure import ModelAcquireData
from src.controller.sampler_all.seq_sampler import SequenceSampler
from src.eva_engine.phase1.evaluator import P1Evaluator
@@ -117,6 +104,16 @@ def generate_data_loader():
explored_n += 1
result[arch_id] = model_score
# print(f" {datetime.now()} finish arch = {arch_id}, model_score = {model_score}")
+
+ if explored_n < 10:
+ print("3. [trails] Phase 1: filter phase explored " + str(explored_n)
+ + "Total explored " + str(len(result)) +
+ " model, model_id = " + str(arch_id) +
+ " model_scores = " + json.dumps(model_score))
+ logger.info("3. [trails] Phase 1: filter phase explored " + str(explored_n)
+ + "Total explored " + str(len(result)) +
+ " model, model_id = " + str(arch_id) +
+ " model_scores = " + json.dumps(model_score))
if explored_n % 1000 == 0:
# print_memory_usg()
# _evaluator.force_gc()
@@ -124,6 +121,10 @@ def generate_data_loader():
+ "Total explored " + str(len(result)) +
" model, model_id = " + str(arch_id) +
" model_scores = " + json.dumps(model_score))
+ logger.info("3. [trails] Phase 1: filter phase explored " + str(explored_n)
+ + "Total explored " + str(len(result)) +
+ " model, model_id = " + str(arch_id) +
+ " model_scores = " + json.dumps(model_score))
if explored_n % 1000 == 0:
# print_memory_usg()
# _evaluator.force_gc()
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/README.md b/examples/model_selection/Trails/internal/ml/model_selection/exps/README.md
similarity index 93%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/README.md
rename to examples/model_selection/Trails/internal/ml/model_selection/exps/README.md
index 2adc07d972..84e9597ddb 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/exps/README.md
+++ b/examples/model_selection/Trails/internal/ml/model_selection/exps/README.md
@@ -17,6 +17,7 @@
under the License.
-->
+
# Folder description
## baseline
@@ -33,8 +34,4 @@ We benchmark the system from both macro and analysis component in micro
## nas_bench_tabular
-We build a nas-bench-tabular dataset here
-
-## system
-
-We run the experiment here
\ No newline at end of file
+We build a nas-bench-tabular dataset here
\ No newline at end of file
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/exps/shared_args.py b/examples/model_selection/Trails/internal/ml/model_selection/exps/shared_args.py
new file mode 100644
index 0000000000..c8a4200320
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/exps/shared_args.py
@@ -0,0 +1,223 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import os
+import random
+import numpy as np
+import torch
+
+
+def seed_everything(seed=2201):
+ # 2022 -> 2021 -> 2031
+ ''' [reference] https://gist.github.com/KirillVladimirov/005ec7f762293d2321385580d3dbe335 '''
+ random.seed(seed)
+ os.environ['PYTHONHASHSEED'] = str(seed)
+ np.random.seed(seed)
+ torch.manual_seed(seed)
+ torch.cuda.manual_seed(seed)
+ torch.backends.cudnn.deterministic = True
+
+
+def str2bool(v):
+ if isinstance(v, bool):
+ return v
+ if v.lower() in ('yes', 'true', 't', 'y', '1'):
+ return True
+ elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+ return False
+ else:
+ raise argparse.ArgumentTypeError('Boolean value expected.')
+
+
+def sampler_args(parser):
+ # define search space,
+ parser.add_argument('--search_space', type=str, default="mlp_sp",
+ help='[nasbench101, nasbench201, mlp_sp]')
+ # EA sampler's parameters,
+ parser.add_argument('--population_size', type=int, default=10, help="The learning rate for REINFORCE.")
+ parser.add_argument('--sample_size', type=int, default=3, help="The momentum value for EMA.")
+ parser.add_argument('--simple_score_sum', default='True', type=str2bool,
+ help="Sum multiple TFMEM score or use Global Rank")
+
+
+def space201_101_share_args(parser):
+ parser.add_argument('--api_loc', type=str, default="NAS-Bench-201-v1_1-096897.pth",
+ help='which search space file to use, ['
+ 'nasbench101: nasbench_only108.pkl'
+ 'nasbench201: NAS-Bench-201-v1_1-096897.pth'
+ ' ... ]')
+
+ parser.add_argument('--init_channels', default=16, type=int, help='output channels of stem convolution')
+ parser.add_argument('--bn', type=int, default=1, help="If use batch norm in network 1 = true, 0 = false")
+
+
+def nb101_args(parser):
+ parser.add_argument('--num_stacks', default=3, type=int, help='#stacks of modules')
+ parser.add_argument('--num_modules_per_stack', default=3, type=int, help='# modules per stack')
+
+
+def nb201_args(parser):
+ parser.add_argument('--init_w_type', type=str, default='none',
+ help='weight initialization (before pruning) type [none, xavier, kaiming, zero]')
+ parser.add_argument('--init_b_type', type=str, default='none',
+ help='bias initialization (before pruning) type [none, xavier, kaiming, zero]')
+ parser.add_argument('--arch_size', type=int, default=1,
+ help='How many node the architecture has at least')
+
+
+def mlp_args(parser):
+ parser.add_argument('--num_layers', default=4, type=int, help='# hidden layers')
+ parser.add_argument('--hidden_choice_len', default=20, type=int, help=
+ 'number of hidden layer choices, 10 for criteo, 20 for others')
+
+
+def mlp_trainner_args(parser):
+ parser.add_argument('--epoch', type=int, default=20,
+ help='number of maximum epochs, '
+ 'frappe: 20, uci_diabetes: 40, criteo: 10'
+ 'nb101: 108, nb201: 200')
+
+ parser.add_argument('--batch_size', type=int, default=32, help='batch size')
+ parser.add_argument('--lr', type=float, default=0.001, help="learning reate")
+ parser.add_argument('--patience', type=int, default=1, help='number of epochs for stopping training')
+ # parser.add_argument('--eval_freq', type=int, default=10000, help='max number of batches to train per epoch')
+
+ parser.add_argument('--iter_per_epoch', type=int, default=200,
+ help="None, "
+ "200 for frappe, uci_diabetes, "
+ "2000 for criteo")
+
+ # MLP model config
+ parser.add_argument('--nfeat', type=int, default=5500,
+ help='the number of features, '
+ 'frappe: 5500, '
+ 'uci_diabetes: 369,'
+ 'criteo: 2100000')
+ parser.add_argument('--nfield', type=int, default=10,
+ help='the number of fields, '
+ 'frappe: 10, '
+ 'uci_diabetes: 43,'
+ 'criteo: 39')
+ parser.add_argument('--nemb', type=int, default=10,
+ help='embedding size 10')
+
+ # MLP train config
+ parser.add_argument('--report_freq', type=int, default=30, help='report frequency')
+ parser.add_argument('--workers', default=1, type=int, help='data loading workers')
+
+
+def data_set_config(parser):
+ parser.add_argument('--base_dir', type=str, default="./dataset/",
+ help='path of data and result parent folder')
+ # define search space,
+ parser.add_argument('--dataset', type=str, default='frappe',
+ help='cifar10, cifar100, ImageNet16-120 '
+ 'frappe, criteo, uci_diabetes')
+
+ parser.add_argument('--num_labels', type=int, default=2,
+ help='[10, 100, 120],'
+ '[2, 2, 2]')
+
+
+def seq_train_all_params(parser):
+ parser.add_argument('--worker_id', type=int, default=0, help='start from 0')
+ parser.add_argument('--total_workers', type=int, default=120,
+ help='total number of workers, each train some models')
+ parser.add_argument('--total_models_per_worker', type=int, default=-1, help='How many models to evaluate')
+ parser.add_argument('--pre_partitioned_file',
+ default="./internal/ml/model_selection/exps/sampled_data/sampled_models_all.json",
+ type=str, help='all models with id')
+
+
+def dis_train_all_models(parser):
+ parser.add_argument('--worker_each_gpu', default=6, type=int, help='num worker each gpu')
+ parser.add_argument('--gpu_num', default=8, type=int, help='num GPus')
+
+
+# tune interval and schedule NK rate such that it can produce a good result
+def tune_interval_NK_rate(parser):
+ parser.add_argument('--kn_rate', default=-1, type=int, help="default N/K = 100")
+
+
+def db4nas(parser):
+ parser.add_argument('--db_name', default="pg_extension", type=str)
+ parser.add_argument('--db_user', default="postgres", type=str)
+ parser.add_argument('--db_host', default="localhost", type=str)
+ parser.add_argument('--db_port', default=28814, type=int)
+
+
+def anytime_exp_set(parser):
+ parser.add_argument('--only_phase1', default='False', type=str2bool)
+ parser.add_argument('--is_simulate', default='True', type=str2bool,
+ help='Use pre-computed result or run online')
+
+
+def system_performance_exp(parser):
+ parser.add_argument('--models_explore', default=10, type=int, help='# models to explore in the filtering phase')
+ parser.add_argument('--tfmem', default="jacflow", type=str, help='the matrix t use, all_matrix')
+ parser.add_argument('--embedding_cache_filtering', default='True', type=str2bool,
+ help='Cache embedding for MLP in filtering phase?')
+ parser.add_argument('--concurrency', default=1, type=int, help='number of worker in filtering phase')
+
+
+def parse_arguments():
+ parser = argparse.ArgumentParser(description='system')
+
+ # job config
+ parser.add_argument('--log_name', type=str, default="main_T_100s")
+ parser.add_argument('--budget', type=int, default=100, help="in second")
+
+ # define base dir, where it stores apis, datasets, logs, etc,
+ parser.add_argument('--device', type=str, default="cpu")
+ parser.add_argument('--local_rank', type=int, default=1, help="local rank")
+
+ parser.add_argument('--log_folder', default="log_debug", type=str)
+
+ parser.add_argument('--result_dir', default="./internal/ml/model_selection/exp_result/", type=str,
+ help='path to store exp outputs')
+ parser.add_argument('--num_points', default=12, type=int, help='num GPus')
+
+ sampler_args(parser)
+
+ nb101_args(parser)
+ nb201_args(parser)
+ space201_101_share_args(parser)
+
+ mlp_args(parser)
+ data_set_config(parser)
+ mlp_trainner_args(parser)
+ seq_train_all_params(parser)
+ dis_train_all_models(parser)
+
+ tune_interval_NK_rate(parser)
+
+ db4nas(parser)
+ anytime_exp_set(parser)
+
+ system_performance_exp(parser)
+
+ # tmp
+ parser.add_argument('--max_load', type=int, default=-1, help="Max Loading time")
+
+ # refinement server
+ parser.add_argument('--url', type=str, default=-1, help="Max Loading time")
+
+ seed_everything()
+
+ return parser.parse_args()
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/main.py b/examples/model_selection/Trails/internal/ml/model_selection/main.py
similarity index 92%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/main.py
rename to examples/model_selection/Trails/internal/ml/model_selection/main.py
index 38357f3049..33cb88f3ca 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/main.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/main.py
@@ -16,7 +16,6 @@
# limitations under the License.
#
-# this is the main function of model selection.
import calendar
import os
@@ -33,12 +32,12 @@ def generate_data_loader():
test_batch_size=args.batch_size,
dataset=args.dataset,
num_workers=1,
- datadir=os.path.join(args.base_dir, "data"))
+ datadir=os.path.join(args.base_dir))
test_loader = val_loader
else:
train_loader, val_loader, test_loader = libsvm_dataloader(
args=args,
- data_dir=os.path.join(args.base_dir, "data", "structure_data", args.dataset),
+ data_dir=os.path.join(args.base_dir, args.dataset),
nfield=args.nfield,
batch_size=args.batch_size)
class_num = args.num_labels
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/pg_interface.py b/examples/model_selection/Trails/internal/ml/model_selection/pg_interface.py
similarity index 82%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/pg_interface.py
rename to examples/model_selection/Trails/internal/ml/model_selection/pg_interface.py
index 98317c08a4..3636c51db3 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/pg_interface.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/pg_interface.py
@@ -22,6 +22,8 @@
import requests
import json
from typing import List, Dict
+import torch
+from torch.utils.data import Dataset, DataLoader
import traceback
import orjson
from argparse import Namespace
@@ -30,6 +32,7 @@
def exception_catcher(func):
def wrapper(encoded_str: str):
+ global_res = "NA, "
try:
# each functon accepts a json string
params = json.loads(encoded_str)
@@ -45,15 +48,15 @@ def wrapper(encoded_str: str):
os.environ.setdefault("log_file_name", args.log_name + "_" + str(ts) + ".log")
# Call the original function with the parsed parameters
- return func(params, args)
+ global_res = func(params, args)
+ return global_res
except Exception as e:
return orjson.dumps(
- {"Errored": traceback.format_exc()}).decode('utf-8')
+ {"res": global_res, "Errored": traceback.format_exc()}).decode('utf-8')
return wrapper
-from torch.utils.data import Dataset
-import torch
+
class LibsvmDataset(Dataset):
""" Dataset loader for Libsvm data format """
@@ -97,7 +100,6 @@ def __getitem__(self, idx):
def generate_dataloader(mini_batch_data, args):
from src.logger import logger
- from torch.utils.data import DataLoader
logger.info(f"Begin to preprocessing dataset")
begin_time = time.time()
dataloader = DataLoader(LibsvmDataset(mini_batch_data),
@@ -112,6 +114,9 @@ def model_selection(params: dict, args: Namespace):
from src.logger import logger
logger.info(f"begin run model_selection on UDF runtime with CPU only")
+ begin = time.time()
+ # logger.info(params["mini_batch"])
+
mini_batch_data = json.loads(params["mini_batch"])
budget = float(params["budget"])
@@ -121,6 +126,10 @@ def model_selection(params: dict, args: Namespace):
data_loader = [dataloader, dataloader, dataloader]
+ logger.info(f"[end2end model_selection] Done with dataloader generation, time usage = " + str(time.time() - begin))
+
+ begin = time.time()
+
rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
best_arch, best_arch_performance, time_usage, _, p1_trace_highest_score, p1_trace_highest_scored_models_id = \
rms.select_model_online_clean(
@@ -129,9 +138,17 @@ def model_selection(params: dict, args: Namespace):
only_phase1=False,
run_workers=1)
+ logger.info(f"[end2end model_selection] Done with model selection, time usage = " + str(time.time() - begin))
+
+ # here is some response notation
+ if best_arch_performance == 0:
+ best_arch_performance_str = "Not Fully Train Yet"
+ else:
+ best_arch_performance_str = str(best_arch_performance)
+
return orjson.dumps(
{"best_arch": best_arch,
- "best_arch_performance": best_arch_performance,
+ "best_arch_performance": best_arch_performance_str,
"time_usage": time_usage}).decode('utf-8')
@@ -179,8 +196,8 @@ def profiling_refinement_phase(params: dict, args: Namespace):
@exception_catcher
def coordinator(params: dict, args: Namespace):
from src.logger import logger
+
logger.info(f"begin run coordinator")
- # print (f"begin run coordinator")
budget = float(params["budget"])
score_time_per_model = float(params["score_time_per_model"])
@@ -201,6 +218,8 @@ def coordinator(params: dict, args: Namespace):
train_time_per_epoch=train_time_per_epoch,
only_phase1=only_phase1)
+ logger.info(f"coordinator done with K, U, N with {K, U, N}")
+
return orjson.dumps(
{"k": K, "u": U, "n": N}).decode('utf-8')
@@ -229,12 +248,8 @@ def filtering_phase(params: dict, args: Namespace):
def filtering_phase_dataLoader(params: dict, args: Namespace):
from src.logger import logger
logger.info(f"begin run filtering_phase CPU only")
- # print (f"begin run filtering_phase CPU only")
mini_batch_m = params["mini_batch"]
- # print ("mini_batch_m: ", mini_batch_m)
-
-
n = int(params["n"])
k = int(params["k"])
@@ -275,7 +290,7 @@ def model_selection_workloads(params: dict, args: Namespace):
dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)
rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
k_models, _, _, _ = rms.filtering_phase(N=n, K=k, train_loader=dataloader)
- best_arch, best_arch_performance, _ = rms.refinement_phase(
+ best_arch, best_arch_performance, _, _ = rms.refinement_phase(
U=1,
k_models=k_models,
train_loader=dataloader,
@@ -394,7 +409,6 @@ def benchmark_filtering_phase_latency(params: dict, args: Namespace):
from src.search_space.init_search_space import init_search_space
from src.tools.io_tools import write_json, read_json
from src.tools.res_measure import print_cpu_gpu_usage
- import torch
logger.info(f"begin run filtering_phase CPU only")
args.models_explore = int(params["explore_models"])
@@ -457,7 +471,6 @@ def benchmark_filtering_phase_latency(params: dict, args: Namespace):
# the first two are used for warming up
_evaluator.time_usage["io_latency"] = \
sum(_evaluator.time_usage["track_io_model_load"][2:]) + \
- sum(_evaluator.time_usage["track_io_model_release_each_50"]) + \
sum(_evaluator.time_usage["track_io_model_init"][2:]) + \
sum(_evaluator.time_usage["track_io_res_load"][2:]) + \
sum(_evaluator.time_usage["track_io_data_retrievel"][2:]) + \
@@ -538,20 +551,28 @@ def in_db_filtering_evaluate(params: dict, args: Namespace):
logger.info("search_space_ins, _evaluator, sampler is None")
return orjson.dumps({"error": "erroed, plz call init first"}).decode('utf-8')
+ begin_read = time.time()
+ mini_batch = get_data_from_shared_memory_int(int(params["rows"]))
+ read_done = time.time()
+ # logger.info(mini_batch)
+ # logger.info(mini_batch.size())
+ # logger.info(list(mini_batch[0]))
+
+ logger.info(f"Data Retrievel time {params['spi_seconds']}, "
+ f"read shared memory time = {read_done - begin_read}")
+
sampled_result = json.loads(params["sample_result"])
arch_id, model_encoding = str(sampled_result["arch_id"]), str(sampled_result["model_encoding"])
- mini_batch = json.loads(params["mini_batch"])
- if mini_batch["status"] == "error":
- return orjson.dumps({"error": mini_batch["message"]}).decode('utf-8')
logger.info(f"Begin evaluate {params['model_index']}, "
- f"with size of batch = {len(mini_batch['data'])}, "
- f"size of columns = {len(mini_batch['data'][0])}")
+ f"with size of batch = {len(mini_batch)}, "
+ f"size of columns = {len(mini_batch[0])}")
model_acquire_data = ModelAcquireData(model_id=arch_id,
model_encoding=model_encoding,
is_last=False,
- spi_seconds=float(params["spi_seconds"]),
- spi_mini_batch=mini_batch["data"],
+ spi_seconds=float(params["spi_seconds"]) + read_done - begin_read,
+ spi_mini_batch=mini_batch,
+ batch_size=int(params["rows"])
)
model_score = _evaluator._p1_evaluate_online(model_acquire_data)
@@ -577,7 +598,6 @@ def records_results(params: dict, args: Namespace):
time_output_file = f"{args.result_dir}/time_score_{args.search_space}_{params['dataset']}_batch_size_{args.batch_size}_{args.device}_{args.tfmem}.json"
_evaluator.time_usage["io_latency"] = \
sum(_evaluator.time_usage["track_io_model_load"][2:]) + \
- sum(_evaluator.time_usage["track_io_model_release_each_50"]) + \
sum(_evaluator.time_usage["track_io_model_init"][2:]) + \
sum(_evaluator.time_usage["track_io_res_load"][2:]) + \
sum(_evaluator.time_usage["track_io_data_retrievel"][2:]) + \
@@ -604,6 +624,22 @@ def records_results(params: dict, args: Namespace):
return orjson.dumps({"Done": 1}).decode('utf-8')
+@exception_catcher
+def measure_call_overheads(params: dict, args: Namespace):
+ return orjson.dumps({"Done": 1}).decode('utf-8')
+
+
+import numpy as np
+from multiprocessing import shared_memory
+
+
+def get_data_from_shared_memory_int(n_rows):
+ shm = shared_memory.SharedMemory(name="my_shared_memory")
+ data = np.frombuffer(shm.buf, dtype=np.float32)
+ data = data.reshape(n_rows, -1)
+ return data
+
+
if __name__ == "__main__":
params = {}
params["budget"] = 10
@@ -620,16 +656,3 @@ def records_results(params: dict, args: Namespace):
params["k"] = 1
params["config_file"] = './internal/ml/model_selection/config.ini'
print(filtering_phase_dataLoader(json.dumps(params)))
-
- # params = {}
- # params[
- # "mini_batch"] = '[{"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}]'
- # params["config_file"] = './internal/ml/model_selection/config.ini'
- # print(profiling_refinement_phase(json.dumps(params)))
- #
- # params = {}
- # params["budget"] = 10
- # params[
- # "mini_batch"] = '[{"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}]'
- # params["config_file"] = './internal/ml/model_selection/config.ini'
- # print(model_selection(json.dumps(params)))
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/requirement.txt b/examples/model_selection/Trails/internal/ml/model_selection/requirement.txt
similarity index 83%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/requirement.txt
rename to examples/model_selection/Trails/internal/ml/model_selection/requirement.txt
index 591daefa59..855bab3c91 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/requirement.txt
+++ b/examples/model_selection/Trails/internal/ml/model_selection/requirement.txt
@@ -5,12 +5,16 @@ charset-normalizer==3.2.0
ConfigSpace==0.7.1
contourpy==1.1.0
cycler==0.11.0
+einops==0.7.0
fonttools==4.41.0
+fvcore==0.1.5.post20221221
gpustat==1.1
+h5py==3.10.0
html5tagger==1.3.0
httptools==0.6.0
idna==3.4
importlib-resources==6.0.0
+iopath==0.1.10
joblib==1.3.1
kiwisolver==1.4.4
matplotlib==3.7.2
@@ -18,16 +22,20 @@ more-itertools==9.1.0
multidict==6.0.4
numpy==1.24.4
nvidia-ml-py==12.535.77
+objgraph==3.6.0
orjson==3.9.2
packaging==23.1
palettable==3.3.3
pandas==2.0.3
Pillow==10.0.0
+portalocker==2.8.2
psutil==5.9.5
psycopg2-binary==2.9.6
+Pympler==1.0.1
pyparsing==3.0.9
python-dateutil==2.8.2
pytz==2023.3
+PyYAML==6.0.1
requests==2.31.0
sanic==23.6.0
sanic-routing==23.6.0
@@ -36,6 +44,8 @@ scipy==1.10.1
seaborn==0.12.2
six==1.16.0
sklearn==0.0
+tabulate==0.9.0
+termcolor==2.3.0
thop @ git+https://github.com/Lyken17/pytorch-OpCounter.git@43c064afb71383501e41eaef9e8c8407265cf77f
threadpoolctl==3.1.0
torch==1.8.1
@@ -51,4 +61,5 @@ urllib3==2.0.4
uvloop==0.17.0
wcwidth==0.2.6
websockets==11.0.3
-zipp==3.16.2
+yacs==0.1.8
+zipp==3.16.2
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/database/load_data_to_db.sh b/examples/model_selection/Trails/internal/ml/model_selection/scripts/database/load_data_to_db.sh
similarity index 82%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/database/load_data_to_db.sh
rename to examples/model_selection/Trails/internal/ml/model_selection/scripts/database/load_data_to_db.sh
index dc7e0172f5..c121e513c7 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/database/load_data_to_db.sh
+++ b/examples/model_selection/Trails/internal/ml/model_selection/scripts/database/load_data_to_db.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
@@ -7,30 +9,31 @@
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
#
-#!/bin/bash
# Check for proper number of command line args
-if [[ $# -ne 2 ]]; then
- echo "Usage: $0 "
+if [[ $# -ne 3 ]]; then
+ echo "Usage: $0 "
exit 1
fi
# Configurations
DATA_PATH="$1"
DB_NAME="$2"
+PORT="$3"
# Connection details
HOST="localhost"
-PORT="28814"
+#PORT="28814"
USERNAME="postgres"
DBNAME="pg_extension"
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/scripts/database/load_data_to_db_int.sh b/examples/model_selection/Trails/internal/ml/model_selection/scripts/database/load_data_to_db_int.sh
new file mode 100644
index 0000000000..5e62b0b093
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/scripts/database/load_data_to_db_int.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Check for proper number of command line args
+if [[ $# -ne 2 ]]; then
+ echo "Usage: $0 "
+ exit 1
+fi
+
+# Configurations
+DATA_PATH="$1"
+DB_NAME="${2}_int"
+
+# Connection details
+HOST="localhost"
+PORT="28814"
+USERNAME="postgres"
+DBNAME="pg_extension"
+
+# Create the database
+echo "Creating database..."
+createdb -h $HOST -p $PORT -U $USERNAME $DBNAME
+
+# Define datasets to process
+datasets=("train" "valid" "test")
+
+# Loop over each dataset
+for dataset in "${datasets[@]}"; do
+ rm "${DATA_PATH}/${dataset}.csv"
+
+ # 1. Identify the number of columns
+ num_columns=$(awk 'NF > max { max = NF } END { print max }' "${DATA_PATH}/${dataset}.libsvm")
+
+ # 2. Create the table dynamically
+ create_table_cmd="CREATE TABLE ${DB_NAME}_${dataset} (id SERIAL PRIMARY KEY, label INTEGER"
+
+ for (( i=2; i<=$num_columns; i++ )); do
+ create_table_cmd+=", col$(($i-1)) INTEGER" # Change to INTEGER type
+ done
+ create_table_cmd+=");"
+
+ echo "Creating ${dataset} table..."
+ echo $create_table_cmd | psql -h $HOST -p $PORT -U $USERNAME -d $DBNAME
+
+ # 3. Transform the libsvm format to CSV
+ echo "Transforming ${dataset} to CSV format..."
+
+ awk '{
+ printf $1; # print label
+ for (i = 2; i <= NF; i++) {
+ split($i, a, ":");
+ printf " %s", a[1]; # print the first part of the split
+ }
+ printf "\n"; # end of line
+ }' "${DATA_PATH}/${dataset}.libsvm" > "${DATA_PATH}/${dataset}.csv"
+
+ # 4. Import into PostgreSQL
+ columns="label"
+ for (( i=2; i<=$num_columns; i++ )); do
+ columns+=", col$(($i-1))"
+ done
+
+ echo "Loading ${dataset} into PostgreSQL..."
+ psql -h $HOST -p $PORT -U $USERNAME -d $DBNAME -c "\COPY ${DB_NAME}_${dataset}($columns) FROM '${DATA_PATH}/${dataset}.csv' DELIMITER ' '"
+done
+
+echo "Data load complete."
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/scripts/database/load_data_to_db_raw_source.sh b/examples/model_selection/Trails/internal/ml/model_selection/scripts/database/load_data_to_db_raw_source.sh
new file mode 100644
index 0000000000..c14c9f1c73
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/scripts/database/load_data_to_db_raw_source.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Check for proper number of command line args
+if [[ $# -ne 2 ]]; then
+ echo "Usage: $0 "
+ exit 1
+fi
+
+# Configurations
+DATA_PATH="$1"
+DB_NAME="$2"
+
+# Connection details
+HOST="localhost"
+PORT="5432"
+USERNAME="postgres"
+DBNAME="model_slicing"
+
+# Create the database
+echo "Creating database..."
+createdb -h $HOST -p $PORT -U $USERNAME $DBNAME
+
+
+
+echo "Data load complete."
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/shared_config.py b/examples/model_selection/Trails/internal/ml/model_selection/shared_config.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/shared_config.py
rename to examples/model_selection/Trails/internal/ml/model_selection/shared_config.py
index f40ac15d67..10f9163688 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/shared_config.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/shared_config.py
@@ -16,9 +16,7 @@
# limitations under the License.
#
-import calendar
-import os
-import time
+
import argparse
import configparser
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/__init__.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/__init__.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/__init__.py
index 3df60b02f7..8c328a3bbc 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/__init__.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/__init__.py
@@ -15,3 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/common/__init__.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/__init__.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/common/__init__.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/common/constant.py b/examples/model_selection/Trails/internal/ml/model_selection/src/common/constant.py
similarity index 92%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/common/constant.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/common/constant.py
index 36227ec38d..ccc2e352c6 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/common/constant.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/common/constant.py
@@ -16,6 +16,7 @@
# limitations under the License.
#
+
class CommonVars:
# SAMPLER
@@ -44,6 +45,9 @@ class CommonVars:
PRUNE_SYNFLOW = "synflow"
WEIGHT_NORM = "weight_norm"
+ KNAS = "knas"
+
+ JACFLOW = "jacflow"
ALL_EVALUATOR = "all_matrix"
@@ -72,9 +76,15 @@ class Config:
c10 = "cifar10"
c100 = "cifar100"
imgNet = "ImageNet16-120"
+ imgNetFull = "ImageNet1k"
# struct dataset
Frappe = "frappe"
Criteo = "criteo"
UCIDataset = "uci_diabetes"
+ SUCCHALF = "SUCCHALF"
+ SUCCREJCT = "SUCCREJCT"
+ UNIFORM = "UNIFORM"
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/common/structure.py b/examples/model_selection/Trails/internal/ml/model_selection/src/common/structure.py
similarity index 82%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/common/structure.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/common/structure.py
index 521f45f1e6..b7ab685393 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/common/structure.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/common/structure.py
@@ -51,7 +51,7 @@ class ModelAcquireData:
"""
def __init__(self, model_id: str, model_encoding: str, is_last: bool = False,
- spi_seconds=None, spi_mini_batch=None):
+ spi_seconds=None, spi_mini_batch=None, batch_size=32):
self.is_last = is_last
self.model_id = model_id
self.model_encoding = model_encoding
@@ -59,25 +59,28 @@ def __init__(self, model_id: str, model_encoding: str, is_last: bool = False,
# this is when using spi
self.spi_seconds = spi_seconds
self.spi_mini_batch = spi_mini_batch
+ self.batch_size = batch_size
- def serialize_model(self) -> str:
+ def serialize_model(self) -> dict:
data = {"is_last": self.is_last,
"model_id": self.model_id,
"model_encoding": self.model_encoding,
"spi_seconds": self.spi_seconds,
+ "preprocess_seconds": self.spi_seconds,
+ "batch_size": self.batch_size,
"spi_mini_batch": self.spi_mini_batch}
- return json.dumps(data)
+ return data
@classmethod
- def deserialize(cls, data_str: str):
- data = json.loads(data_str)
+ def deserialize(cls, data: dict):
res = cls(
- data["model_id"],
- data["model_encoding"],
- data["is_last"],
- data["spi_mini_batch"],
- data["spi_seconds"])
+ model_id=data["model_id"],
+ model_encoding=data["model_encoding"],
+ is_last=data["is_last"],
+ spi_mini_batch=data["spi_mini_batch"],
+ batch_size=data["batch_size"],
+ spi_seconds=data["spi_seconds"])
return res
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/__init__.py
similarity index 87%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/__init__.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/controller/__init__.py
index 222757523c..69595db4bb 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/__init__.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/__init__.py
@@ -18,14 +18,11 @@
from src.common.constant import CommonVars
from src.controller.sampler_ea.regularized_ea import RegularizedEASampler
-from src.controller.sampler_all.seq_sampler import SequenceSampler
from src.controller.sampler_rl.reinforcement_learning import RLSampler
-from src.controller.sampler_rand.random_sample import RandomSampler
from src.controller.sampler_all.seq_sampler import SequenceSampler
sampler_register = {
CommonVars.TEST_SAMPLER: SequenceSampler,
- # CommonVars.RANDOM_SAMPLER: RandomSampler,
CommonVars.RANDOM_SAMPLER: SequenceSampler,
CommonVars.RL_SAMPLER: RLSampler,
CommonVars.EA_SAMPLER: RegularizedEASampler,
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/controler.py b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/controler.py
similarity index 94%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/controler.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/controller/controler.py
index 2770a72ee8..0b49e33178 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/controler.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/controler.py
@@ -16,10 +16,11 @@
# limitations under the License.
#
+
import time
from src.controller.core.sample import Sampler
-from src.third_pkg.models import CellStructure
+from src.search_space.core.model_params import ModelMicroCfg
class ModelScore:
@@ -84,14 +85,15 @@ def __init__(self, search_strategy: Sampler):
# use when simple_score_sum=True, record the model's sum score
self.history = {}
- def sample_next_arch(self) -> (str, CellStructure):
+ def sample_next_arch(self) -> (str, ModelMicroCfg):
"""
Return a generator
:return:
"""
return self.search_strategy.sample_next_arch(self.ranked_models)
- def fit_sampler(self, arch_id: str, alg_score: dict, simple_score_sum: bool = False) -> float:
+ def fit_sampler(self, arch_id: str, alg_score: dict, simple_score_sum: bool = False,
+ is_sync: bool = True, arch_micro=None) -> float:
"""
:param arch_id:
:param alg_score: {alg_name1: score1, alg_name2: score2}
@@ -103,7 +105,10 @@ def fit_sampler(self, arch_id: str, alg_score: dict, simple_score_sum: bool = Fa
score = self._use_pure_score_as_final_res(arch_id, alg_score)
else:
score = self._use_vote_rank_as_final_res(arch_id, alg_score)
- self.search_strategy.fit_sampler(score)
+ if is_sync:
+ self.search_strategy.fit_sampler(score)
+ else:
+ self.search_strategy.async_fit_sampler(arch_id, arch_micro, score)
return score
def _use_vote_rank_as_final_res(self, model_id: str, alg_score: dict):
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_EA/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/core/__init__.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_EA/__init__.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/controller/core/__init__.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/metrics.py b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/core/metrics.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/metrics.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/controller/core/metrics.py
index 77eeea32dc..840ca16504 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/metrics.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/core/metrics.py
@@ -16,6 +16,7 @@
# limitations under the License.
#
+
from enum import Enum, auto
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/sample.py b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/core/sample.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/sample.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/controller/core/sample.py
index b48066925b..244c1abdf5 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/core/sample.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/core/sample.py
@@ -1,3 +1,4 @@
+
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
@@ -16,6 +17,7 @@
# limitations under the License.
#
+
from abc import abstractmethod
from src.search_space.core.model_params import ModelMicroCfg
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_all/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_all/__init__.py
new file mode 100644
index 0000000000..8c328a3bbc
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_all/__init__.py
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_all/seq_sampler.py b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_all/seq_sampler.py
similarity index 95%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_all/seq_sampler.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_all/seq_sampler.py
index 4eaf04ff3f..8fd5e8f445 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_all/seq_sampler.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_all/seq_sampler.py
@@ -16,6 +16,7 @@
# limitations under the License.
#
+
import random
from src.controller.core.sample import Sampler
@@ -44,7 +45,8 @@ def sample_next_arch(self, sorted_model: list = None) -> (str, ModelMicroCfg):
print("the end")
return None, None
else:
- raise e
+ print("Error", str(e))
+ return None, None
def fit_sampler(self, score: float):
pass
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_bohb/bohb_or.py b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_bohb/bohb_or.py
new file mode 100644
index 0000000000..ff12c969ee
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_bohb/bohb_or.py
@@ -0,0 +1,294 @@
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os, sys, time, random, argparse, collections
+from src.tools.env_tools import prepare_seed
+from src.logger import logger
+from models import CellStructure, get_search_spaces
+
+# BOHB: Robust and Efficient Hyperparameter Optimization at Scale, ICML 2018
+import ConfigSpace
+from hpbandster.optimizers.bohb import BOHB
+import hpbandster.core.nameserver as hpns
+from hpbandster.core.worker import Worker
+
+from nats_bench import create
+
+
+
+def time_string():
+ ISOTIMEFORMAT = "%Y-%m-%d %X"
+ string = "[{:}]".format(time.strftime(ISOTIMEFORMAT, time.gmtime(time.time())))
+ return string
+
+
+def get_topology_config_space(search_space, max_nodes=4):
+ cs = ConfigSpace.ConfigurationSpace()
+ # edge2index = {}
+ for i in range(1, max_nodes):
+ for j in range(i):
+ node_str = "{:}<-{:}".format(i, j)
+ cs.add_hyperparameter(
+ ConfigSpace.CategoricalHyperparameter(node_str, search_space)
+ )
+ return cs
+
+
+def get_size_config_space(search_space):
+ cs = ConfigSpace.ConfigurationSpace()
+ for ilayer in range(search_space["numbers"]):
+ node_str = "layer-{:}".format(ilayer)
+ cs.add_hyperparameter(
+ ConfigSpace.CategoricalHyperparameter(node_str, search_space["candidates"])
+ )
+ return cs
+
+
+def config2topology_func(max_nodes=4):
+ def config2structure(config):
+ genotypes = []
+ for i in range(1, max_nodes):
+ xlist = []
+ for j in range(i):
+ node_str = "{:}<-{:}".format(i, j)
+ op_name = config[node_str]
+ xlist.append((op_name, j))
+ genotypes.append(tuple(xlist))
+ return CellStructure(genotypes)
+
+ return config2structure
+
+
+def config2size_func(search_space):
+ def config2structure(config):
+ channels = []
+ for ilayer in range(search_space["numbers"]):
+ node_str = "layer-{:}".format(ilayer)
+ channels.append(str(config[node_str]))
+ return ":".join(channels)
+
+ return config2structure
+
+
+class MyWorker(Worker):
+ def __init__(self, *args, convert_func=None, dataset=None, api=None, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.convert_func = convert_func
+ self._dataset = dataset
+ self._api = api
+ self.total_times = []
+ self.trajectory = []
+
+ def compute(self, config, budget, **kwargs):
+ arch = self.convert_func(config)
+ accuracy, latency, time_cost, total_time = self._api.simulate_train_eval(
+ arch, self._dataset, iepoch=int(budget) - 1, hp="12"
+ )
+ self.trajectory.append((accuracy, arch))
+ self.total_times.append(total_time)
+ return {"loss": 100 - accuracy, "info": self._api.query_index_by_arch(arch)}
+
+
+def main(xargs, api):
+ prepare_seed(xargs.rand_seed)
+
+ logger.info("{:} use api : {:}".format(time_string(), api))
+ api.reset_time()
+ search_space = get_search_spaces(xargs.search_space, "nats-bench")
+ if xargs.search_space == "tss":
+ cs = get_topology_config_space(search_space)
+ config2structure = config2topology_func()
+ else:
+ cs = get_size_config_space(search_space)
+ config2structure = config2size_func(search_space)
+
+ hb_run_id = "0"
+
+ NS = hpns.NameServer(run_id=hb_run_id, host="localhost", port=0)
+ ns_host, ns_port = NS.start()
+ num_workers = 1
+
+ workers = []
+ for i in range(num_workers):
+ w = MyWorker(
+ nameserver=ns_host,
+ nameserver_port=ns_port,
+ convert_func=config2structure,
+ dataset=xargs.dataset,
+ api=api,
+ run_id=hb_run_id,
+ id=i,
+ )
+ w.run(background=True)
+ workers.append(w)
+
+ start_time = time.time()
+ bohb = BOHB(
+ configspace=cs,
+ run_id=hb_run_id,
+ eta=3,
+ min_budget=1,
+ max_budget=12,
+ nameserver=ns_host,
+ nameserver_port=ns_port,
+ num_samples=xargs.num_samples,
+ random_fraction=xargs.random_fraction,
+ bandwidth_factor=xargs.bandwidth_factor,
+ ping_interval=10,
+ min_bandwidth=xargs.min_bandwidth,
+ )
+
+ results = run(xargs.n_iters, min_n_workers=num_workers)
+
+ bohb.shutdown(shutdown_workers=True)
+ NS.shutdown()
+
+ # print('There are {:} runs.'.format(len(results.get_all_runs())))
+ # workers[0].total_times
+ # workers[0].trajectory
+ current_best_index = []
+ for idx in range(len(workers[0].trajectory)):
+ trajectory = workers[0].trajectory[: idx + 1]
+ arch = max(trajectory, key=lambda x: x[0])[1]
+ current_best_index.append(api.query_index_by_arch(arch))
+
+ best_arch = max(workers[0].trajectory, key=lambda x: x[0])[1]
+ logger.log(
+ "Best found configuration: {:} within {:.3f} s".format(
+ best_arch, workers[0].total_times[-1]
+ )
+ )
+ info = api.query_info_str_by_arch(
+ best_arch, "200" if xargs.search_space == "tss" else "90"
+ )
+ logger.log("{:}".format(info))
+ logger.log("-" * 100)
+ logger.close()
+
+ return logger.log_dir, current_best_index, workers[0].total_times
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ "BOHB: Robust and Efficient Hyperparameter Optimization at Scale"
+ )
+ parser.add_argument(
+ "--dataset",
+ default="cifar10",
+ type=str,
+ choices=["cifar10", "cifar100", "ImageNet16-120"],
+ help="Choose between Cifar10/100 and ImageNet-16.",
+ )
+ # general arg
+ parser.add_argument(
+ "--search_space",
+ default="tss",
+ type=str,
+ choices=["tss", "sss"],
+ help="Choose the search space.",
+ )
+ parser.add_argument(
+ "--time_budget",
+ type=int,
+ default=20000,
+ help="The total time cost budge for searching (in seconds).",
+ )
+ parser.add_argument(
+ "--loops_if_rand", type=int, default=500, help="The total runs for evaluation."
+ )
+ # BOHB
+ parser.add_argument(
+ "--strategy",
+ default="sampling",
+ type=str,
+ nargs="?",
+ help="optimization strategy for the acquisition function",
+ )
+ parser.add_argument(
+ "--min_bandwidth",
+ default=0.3,
+ type=float,
+ nargs="?",
+ help="minimum bandwidth for KDE",
+ )
+ parser.add_argument(
+ "--num_samples",
+ default=64,
+ type=int,
+ nargs="?",
+ help="number of samples for the acquisition function",
+ )
+ parser.add_argument(
+ "--random_fraction",
+ default=0.33,
+ type=float,
+ nargs="?",
+ help="fraction of random configurations",
+ )
+ parser.add_argument(
+ "--bandwidth_factor",
+ default=3,
+ type=int,
+ nargs="?",
+ help="factor multiplied to the bandwidth",
+ )
+ parser.add_argument(
+ "--n_iters",
+ default=300,
+ type=int,
+ nargs="?",
+ help="number of iterations for optimization method",
+ )
+ # log
+ parser.add_argument(
+ "--save_dir",
+ type=str,
+ default="./output/search",
+ help="Folder to save checkpoints and log.",
+ )
+ parser.add_argument("--rand_seed", type=int, default=-1, help="manual seed")
+ args = parser.parse_args()
+
+ api = create(None, args.search_space, fast_mode=False, verbose=False)
+
+ args.save_dir = os.path.join(
+ "{:}-{:}".format(args.save_dir, args.search_space),
+ "{:}-T{:}".format(args.dataset, args.time_budget),
+ "BOHB",
+ )
+ print("save-dir : {:}".format(args.save_dir))
+
+ if args.rand_seed < 0:
+ save_dir, all_info = None, collections.OrderedDict()
+ for i in range(args.loops_if_rand):
+ print("{:} : {:03d}/{:03d}".format(time_string(), i, args.loops_if_rand))
+ args.rand_seed = random.randint(1, 100000)
+ save_dir, all_archs, all_total_times = main(args, api)
+ all_info[i] = {"all_archs": all_archs, "all_total_times": all_total_times}
+ save_path = save_dir / "results.pth"
+ print("save into {:}".format(save_path))
+
+ import pickle
+ with open(save_path, 'wb') as f:
+ pickle.dump(all_info, f)
+
+ else:
+ main(args, api)
+
+
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_ea/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_ea/__init__.py
new file mode 100644
index 0000000000..8c328a3bbc
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_ea/__init__.py
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_ea/regularized_ea.py b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_ea/regularized_ea.py
similarity index 59%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_ea/regularized_ea.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_ea/regularized_ea.py
index 62126bef61..2e3685fa68 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_ea/regularized_ea.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_ea/regularized_ea.py
@@ -16,6 +16,7 @@
# limitations under the License.
#
+
import collections
from src.search_space.core.model_params import ModelMicroCfg
from src.controller.core.sample import Sampler
@@ -54,8 +55,8 @@ def __init__(self, space: SpaceWrapper, population_size: int, sample_size: int):
# use the visited to reduce the collapse
self.visited = {}
- self.max_mutate_time = 2
- self.max_mutate_sampler_time = 2
+ self.max_mutate_time = 4
+ self.max_mutate_sampler_time = 4
def sample_next_arch(self, sorted_model_ids: list) -> (str, ModelMicroCfg):
"""
@@ -146,3 +147,101 @@ def fit_sampler(self, score: float):
# Remove the oldest model.
self.population.popleft()
self.population_model_ids.popleft()
+
+
+class AsyncRegularizedEASampler(Sampler):
+
+ def __init__(self, space: SpaceWrapper, population_size: int, sample_size: int):
+ super().__init__(space)
+
+ self.population_size = population_size
+ # list of object,
+ self.population = collections.deque()
+ # list of str, for duplicate checking
+ self.population_model_ids = collections.deque()
+
+ self.space = space
+ self.sample_size = sample_size
+ self.current_sampled = 0
+
+ # use the visited to reduce the collapse
+ self.visited = {}
+ self.max_mutate_time = 2
+ self.max_mutate_sampler_time = 3
+
+ def sample_next_arch(self, sorted_model_ids: list) -> (str, ModelMicroCfg):
+ # Case 1: If population hasn't reached desired size, add random architectures
+ if len(self.population) < self.population_size:
+ while True:
+ arch_id, arch_micro = self.space.random_architecture_id()
+ # Ensure that EA population has no repeated value
+ if str(arch_id) not in self.population_model_ids:
+ break
+ return arch_id, arch_micro
+
+ # Case 2: If population has reached desired size, evolve population
+ else:
+ cur_mutate_sampler_time = 0
+ is_found_new = False
+
+ # Keep attempting mutations for a maximum of 'max_mutate_sampler_time' times
+ while cur_mutate_sampler_time < self.max_mutate_sampler_time:
+ cur_mutate_time = 0
+
+ # Randomly select a sample of models from the population
+ sample = []
+ sample_ids = []
+ while len(sample) < self.sample_size:
+ candidate = random.choice(list(self.population))
+ candidate_id = self.population_model_ids[self.population.index(candidate)]
+ sample.append(candidate)
+ sample_ids.append(candidate_id)
+
+ # Select the best parent from the sample (based on the order in sorted_model_ids)
+ parent_id = max(sample_ids, key=lambda _id: sorted_model_ids.index(str(_id)))
+ parent = sample[sample_ids.index(parent_id)]
+
+ # Try to mutate the parent up to 'max_mutate_time' times
+ while cur_mutate_time < self.max_mutate_time:
+ arch_id, arch_micro = self.space.mutate_architecture(parent.arch)
+
+ # If the mutated architecture hasn't been visited or we've visited all possible architectures, stop
+ if arch_id not in self.visited or len(self.space) == len(self.visited):
+ self.visited[arch_id] = True
+ is_found_new = True
+ break
+ cur_mutate_time += 1
+
+ # If we've found a new architecture, stop sampling
+ if is_found_new:
+ break
+
+ cur_mutate_sampler_time += 1
+
+ # If we've hit the maximum number of mutation attempts, do nothing
+ if cur_mutate_time * cur_mutate_sampler_time == self.max_mutate_time * self.max_mutate_sampler_time:
+ pass
+
+ # Update current architecture details
+ return arch_id, arch_micro
+
+ def async_fit_sampler(self, current_arch_id, current_arch_micro, score: float):
+ # if it's in Initialize stage, add to the population with random models.
+ if len(self.population) < self.population_size:
+ model = Model()
+ model.arch = current_arch_micro
+ model.score = score
+ self.population.append(model)
+ self.population_model_ids.append(current_arch_id)
+
+ # if it's in mutation stage
+ else:
+ child = Model()
+ child.arch = current_arch_micro
+ child.score = score
+
+ self.population.append(child)
+ self.population_model_ids.append(current_arch_id)
+ # Remove the oldest model.
+ self.population.popleft()
+ self.population_model_ids.popleft()
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_rand/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_rand/__init__.py
new file mode 100644
index 0000000000..8c328a3bbc
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_rand/__init__.py
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rand/random_sample.py b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_rand/random_sample.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rand/random_sample.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_rand/random_sample.py
index 8c31254462..ff010b9872 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rand/random_sample.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_rand/random_sample.py
@@ -16,6 +16,7 @@
# limitations under the License.
#
+
from src.controller.core.sample import Sampler
from src.search_space.core.space import SpaceWrapper
from src.search_space.core.model_params import ModelMicroCfg
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_rl/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_rl/__init__.py
new file mode 100644
index 0000000000..8c328a3bbc
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_rl/__init__.py
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rl/reinforcement_learning.py b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_rl/reinforcement_learning.py
similarity index 98%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rl/reinforcement_learning.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_rl/reinforcement_learning.py
index a65eed36e8..aabe4997b7 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rl/reinforcement_learning.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/controller/sampler_rl/reinforcement_learning.py
@@ -16,10 +16,10 @@
# limitations under the License.
#
+
from src.controller.core.sample import Sampler
from src.search_space.core.space import SpaceWrapper
from src.search_space.core.model_params import ModelMicroCfg
-from src.third_pkg.models import CellStructure
class ExponentialMovingAverage(object):
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/__init__.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/__init__.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/__init__.py
index 01d7057208..42c2ac6db1 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/__init__.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/__init__.py
@@ -16,3 +16,4 @@
# limitations under the License.
#
+
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/dataset.py b/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/dataset.py
new file mode 100644
index 0000000000..cf7ac352bd
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/dataset.py
@@ -0,0 +1,217 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import torch
+from torch import tensor
+from torch.utils.data import DataLoader, Subset
+from torchvision.datasets import ImageFolder
+from torchvision.datasets import MNIST, CIFAR10, CIFAR100, SVHN
+from torchvision.transforms import Compose
+from torchvision import transforms
+from .imagenet16 import *
+
+
+def get_dataloader(train_batch_size: int, test_batch_size: int, dataset: str,
+ num_workers: int, datadir: str, resize=None) -> (DataLoader, DataLoader, int):
+ """
+ Load CIFAR or imagenet datasets
+ :param train_batch_size:
+ :param test_batch_size:
+ :param dataset: ImageNet16, cifar, svhn, ImageNet1k, mnist
+ :param num_workers:
+ :param datadir:
+ :param resize:
+ :return:
+ """
+
+ class_num = 0
+ mean = []
+ std = []
+ pad = 0
+
+ if 'ImageNet16' in dataset:
+ mean = [x / 255 for x in [122.68, 116.66, 104.01]]
+ std = [x / 255 for x in [63.22, 61.26, 65.09]]
+ size, pad = 16, 2
+ elif 'cifar' in dataset:
+ mean = (0.4914, 0.4822, 0.4465)
+ std = (0.2023, 0.1994, 0.2010)
+ size, pad = 32, 4
+ elif 'svhn' in dataset:
+ mean = (0.5, 0.5, 0.5)
+ std = (0.5, 0.5, 0.5)
+ size, pad = 32, 0
+ elif dataset == 'ImageNet1k':
+ from .h5py_dataset import H5Dataset
+ size, pad = 224, 2
+ mean = (0.485, 0.456, 0.406)
+ std = (0.229, 0.224, 0.225)
+ # resize = 256
+ elif dataset == 'ImageNet224-120':
+ from .h5py_dataset import H5Dataset
+ size, pad = 224, 2
+ mean = (0.485, 0.456, 0.406)
+ std = (0.229, 0.224, 0.225)
+ # resize = 256
+
+ if resize is None:
+ resize = size
+
+ train_transform = transforms.Compose([
+ transforms.RandomCrop(size, padding=pad),
+ transforms.Resize(resize),
+ transforms.RandomHorizontalFlip(),
+ transforms.ToTensor(),
+ transforms.Normalize(mean, std),
+ ])
+
+ test_transform = transforms.Compose([
+ transforms.Resize((resize, resize)),
+ transforms.ToTensor(),
+ transforms.Normalize(mean, std),
+ ])
+
+ if dataset == 'cifar10':
+ class_num = 10
+ train_dataset = CIFAR10(datadir, True, train_transform, download=True)
+ test_dataset = CIFAR10(datadir, False, test_transform, download=True)
+ elif dataset == 'cifar100':
+ class_num = 100
+ train_dataset = CIFAR100(datadir, True, train_transform, download=True)
+ test_dataset = CIFAR100(datadir, False, test_transform, download=True)
+ elif dataset == 'svhn':
+ class_num = 10
+ train_dataset = SVHN(datadir, split='train', transform=train_transform, download=True)
+ test_dataset = SVHN(datadir, split='test', transform=test_transform, download=True)
+ elif dataset == 'ImageNet16-120':
+ class_num = 120
+ train_dataset = ImageNet16(os.path.join(datadir, 'ImageNet16'), True, train_transform, 120)
+ test_dataset = ImageNet16(os.path.join(datadir, 'ImageNet16'), False, test_transform, 120)
+ elif dataset == 'ImageNet1k':
+ class_num = 1000
+ # train_dataset = ImageFolder(root=os.path.join(datadir, 'imagenet/val'), transform=train_transform)
+ test_dataset = ImageFolder(root=os.path.join(datadir, 'imagenet/val'), transform=test_transform)
+ train_dataset = test_dataset
+ elif dataset == 'ImageNet224-120':
+ class_num = 120
+ test_dataset = ImageFolder(root=os.path.join(datadir, 'imagenet/val'), transform=test_transform)
+
+ # get 0-120 classes
+ class_indices = list(range(120)) # 0-120 inclusive
+ subset_indices = [i for i, (_, label) in enumerate(test_dataset.samples) if label in class_indices]
+ filtered_test_dataset = Subset(test_dataset, subset_indices)
+ # get 0-120 classes
+ train_dataset = filtered_test_dataset
+ test_dataset = filtered_test_dataset
+ elif dataset == 'mnist':
+ data_transform = Compose([transforms.ToTensor()])
+ # Normalise? transforms.Normalize((0.1307,), (0.3081,))
+ train_dataset = MNIST("_dataset", True, data_transform, download=True)
+ test_dataset = MNIST("_dataset", False, data_transform, download=True)
+ else:
+ raise ValueError('There are no more cifars or imagenets.')
+
+ train_loader = DataLoader(
+ train_dataset,
+ train_batch_size,
+ shuffle=True,
+ num_workers=4,
+ pin_memory=False
+ )
+ test_loader = DataLoader(
+ test_dataset,
+ test_batch_size,
+ shuffle=False,
+ num_workers=4,
+ pin_memory=False
+ )
+
+ print("dataset load done")
+
+ return train_loader, test_loader, class_num
+
+
+def get_mini_batch(dataloader: DataLoader, sample_alg: str, batch_size: int, num_classes: int) -> (tensor, tensor):
+ """
+ Get a mini-batch of data,
+ :param dataloader: DataLoader
+ :param sample_alg: random or grasp
+ :param batch_size: batch_size
+ :param num_classes: num_classes
+ :return: two tensor
+ """
+
+ if sample_alg == 'random':
+ inputs, targets = _get_some_data(dataloader, batch_size=batch_size)
+ elif sample_alg == 'grasp':
+ inputs, targets = _get_some_data_grasp(dataloader, num_classes, samples_per_class=batch_size // num_classes)
+ else:
+ raise NotImplementedError(f'dataload {sample_alg} is not supported')
+
+ return inputs, targets
+
+
+def _get_some_data(train_dataloader: DataLoader, batch_size: int) -> (torch.tensor, torch.tensor):
+ """
+ Randomly sample some data, some class may not be sampled
+ :param train_dataloader: torch dataLoader
+ :param batch_size: batch_size of the data.
+ :return:
+ """
+ traindata = []
+
+ dataloader_iter = iter(train_dataloader)
+ traindata.append(next(dataloader_iter))
+
+ inputs = torch.cat([a for a, _ in traindata])
+ targets = torch.cat([b for _, b in traindata])
+ inputs = inputs
+ targets = targets
+ return inputs, targets
+
+
+def _get_some_data_grasp(train_dataloader: DataLoader, num_classes: int,
+ samples_per_class: int) -> (torch.tensor, torch.tensor):
+ """
+ Sample some data while guarantee example class has equal number of samples.
+ :param train_dataloader: torch dataLoader
+ :param num_classes: number of class
+ :param samples_per_class: how many samples for eacl class.
+ :return:
+ """
+
+ datas = [[] for _ in range(num_classes)]
+ labels = [[] for _ in range(num_classes)]
+ mark = dict()
+ dataloader_iter = iter(train_dataloader)
+ while True:
+ inputs, targets = next(dataloader_iter)
+ for idx in range(inputs.shape[0]):
+ x, y = inputs[idx:idx + 1], targets[idx:idx + 1]
+ category = y.item()
+ if len(datas[category]) == samples_per_class:
+ mark[category] = True
+ continue
+ datas[category].append(x)
+ labels[category].append(y)
+ if len(mark) == num_classes:
+ break
+
+ x = torch.cat([torch.cat(_, 0) for _ in datas])
+ y = torch.cat([torch.cat(_) for _ in labels]).view(-1)
+ return x, y
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/download_critero_and_avazu.py b/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/download_critero_and_avazu.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/download_critero_and_avazu.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/download_critero_and_avazu.py
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/h5py_dataset.py b/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/h5py_dataset.py
new file mode 100644
index 0000000000..31feeb06fc
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/h5py_dataset.py
@@ -0,0 +1,59 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import h5py
+import numpy as np
+from PIL import Image
+
+import torch
+from torch.utils.data import Dataset, DataLoader
+
+
+class H5Dataset(Dataset):
+ def __init__(self, h5_path, transform=None):
+ self.h5_path = h5_path
+ self.h5_file = None
+ self.length = len(h5py.File(h5_path, 'r'))
+ self.transform = transform
+
+ def __getitem__(self, index):
+
+ # loading in getitem allows us to use multiple processes for data loading
+ # because hdf5 files aren't pickelable so can't transfer them across processes
+ # https://discuss.pytorch.org/t/hdf5-a-data-format-for-pytorch/40379
+ # https://discuss.pytorch.org/t/dataloader-when-num-worker-0-there-is-bug/25643/16
+ # TODO possible look at __getstate__ and __setstate__ as a more elegant solution
+ if self.h5_file is None:
+ self.h5_file = h5py.File(self.h5_path, 'r')
+
+ record = self.h5_file[str(index)]
+
+ if self.transform:
+ x = Image.fromarray(record['data'][()])
+ x = self.transform(x)
+ else:
+ x = torch.from_numpy(record['data'][()])
+
+ y = record['target'][()]
+ y = torch.from_numpy(np.asarray(y))
+
+ return (x, y)
+
+ def __len__(self):
+ return self.length
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/save_load_torch.py b/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/save_load_torch.py
new file mode 100644
index 0000000000..4da19a8673
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/save_load_torch.py
@@ -0,0 +1,106 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import argparse
+
+from tqdm import tqdm
+import torch
+import os
+import glob
+
+
+def decode_libsvm(line):
+ columns = line.split(' ')
+ map_func = lambda pair: (int(pair[0]), float(pair[1]))
+ id, value = zip(*map(lambda col: map_func(col.split(':')), columns[1:]))
+ sample = {'id': torch.LongTensor(id),
+ 'value': torch.FloatTensor(value),
+ 'y': float(columns[0])}
+ return sample
+
+
+def _save_data(data_dir, fname, nfields, namespace):
+ with open(fname) as f:
+ sample_lines = sum(1 for line in f)
+
+ feat_id = torch.LongTensor(sample_lines, nfields)
+ feat_value = torch.FloatTensor(sample_lines, nfields)
+ y = torch.FloatTensor(sample_lines)
+
+ nsamples = 0
+ with tqdm(total=sample_lines) as pbar:
+ with open(fname) as fp:
+ line = fp.readline()
+ while line:
+ try:
+ sample = decode_libsvm(line)
+ feat_id[nsamples] = sample['id']
+ feat_value[nsamples] = sample['value']
+ y[nsamples] = sample['y']
+ nsamples += 1
+ except Exception:
+ print(f'incorrect data format line "{line}" !')
+ line = fp.readline()
+ pbar.update(1)
+ print(f'# {nsamples} data samples loaded...')
+
+ # save the tensors to disk
+ torch.save(feat_id, f'{data_dir}/{namespace}_feat_id.pt')
+ torch.save(feat_value, f'{data_dir}/{namespace}_feat_value.pt')
+ torch.save(y, f'{data_dir}/{namespace}_y.pt')
+
+
+def parse_arguments():
+ parser = argparse.ArgumentParser(description='FastAutoNAS')
+
+ parser.add_argument('--nfield', type=int, default=10,
+ help='the number of fields, frappe: 10, uci_diabetes: 43, criteo: 39')
+
+ parser.add_argument('--dataset', type=str, default='frappe',
+ help='cifar10, cifar100, ImageNet16-120, frappe, criteo, uci_diabetes')
+
+ return parser.parse_args()
+
+
+def load_data(data_dir, namespace):
+ feat_id = torch.load(f'{data_dir}/{namespace}_feat_id.pt')
+ feat_value = torch.load(f'{data_dir}/{namespace}_feat_value.pt')
+ y = torch.load(f'{data_dir}/{namespace}_y.pt')
+
+ print(f'# {int(y.shape[0])} data samples loaded...')
+
+ return feat_id, feat_value, y, int(y.shape[0])
+
+
+if __name__ == "__main__":
+ args = parse_arguments()
+
+ _data_dir = os.path.join("./dataset", args.dataset)
+
+ train_name_space = "decoded_train"
+ valid_name_space = "decoded_valid"
+ # save
+ train_file = glob.glob("%s/tr*libsvm" % _data_dir)[0]
+ val_file = glob.glob("%s/va*libsvm" % _data_dir)[0]
+ _save_data(_data_dir, train_file, args.nfield, train_name_space)
+ _save_data(_data_dir, val_file, args.nfield, valid_name_space)
+
+ # read
+ # load_data(data_dir, train_name_space)
+ # load_data(data_dir, valid_name_space)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/sequence_dataloader.py b/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/sequence_dataloader.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/sequence_dataloader.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/sequence_dataloader.py
index 9a6587e9e0..9503d87b46 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/sequence_dataloader.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/sequence_dataloader.py
@@ -16,10 +16,12 @@
# limitations under the License.
#
+
import queue
import threading
import requests
import time
+import torch
from src.logger import logger
@@ -61,7 +63,6 @@ def fetch_data(self):
# end_signal in trianing, then keep training
continue
else:
- import torch
# convert to tensor again
id_tensor = torch.LongTensor(batch['id'])
value_tensor = torch.FloatTensor(batch['value'])
@@ -94,3 +95,5 @@ def stop(self):
self.stop_event.set()
self.thread.join()
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/stream_dataloader.py b/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/stream_dataloader.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/stream_dataloader.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/stream_dataloader.py
index f39499f335..9c5211081e 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/stream_dataloader.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/stream_dataloader.py
@@ -16,10 +16,12 @@
# limitations under the License.
#
+
import queue
import threading
import requests
import time
+import torch
from src.logger import logger
@@ -62,7 +64,6 @@ def fetch_data(self):
continue
else:
# convert to tensor again
- import torch
id_tensor = torch.LongTensor(batch['id'])
value_tensor = torch.FloatTensor(batch['value'])
y_tensor = torch.FloatTensor(batch['y'])
@@ -94,3 +95,5 @@ def stop(self):
self.stop_event.set()
self.thread.join()
+
+
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/structure_data_loader.py b/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/structure_data_loader.py
new file mode 100644
index 0000000000..d83dfc77c5
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/dataset_utils/structure_data_loader.py
@@ -0,0 +1,276 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from tqdm import tqdm
+import torch
+from torch.utils.data import Dataset, DataLoader, TensorDataset
+import os
+import glob
+import numpy as np
+import sklearn.model_selection
+from scipy.io.arff import loadarff
+
+
+def load_data(data_dir, namespace):
+ print(f'# loading data from '
+ f'{data_dir}/{namespace}_feat_id.pt, '
+ f'{data_dir}/{namespace}_feat_value.pt'
+ f'{data_dir}/{namespace}_y.pt ......')
+
+ feat_id = torch.load(f'{data_dir}/{namespace}_feat_id.pt')
+ feat_value = torch.load(f'{data_dir}/{namespace}_feat_value.pt')
+ y = torch.load(f'{data_dir}/{namespace}_y.pt')
+
+ print(f'# {int(y.shape[0])} data samples loaded...')
+
+ return feat_id, feat_value, y, int(y.shape[0])
+
+
+class LibsvmDatasetReadOnce(Dataset):
+ """ Dataset loader for Libsvm data format """
+
+ def __init__(self, fname):
+ parent_directory = os.path.dirname(fname)
+ if "train" in fname:
+ namespace = "decoded_train"
+ elif "valid" in fname:
+ namespace = "decoded_valid"
+ else:
+ raise
+ self.feat_id, self.feat_value, self.y, self.nsamples = load_data(parent_directory, namespace)
+
+ print(f'# {self.nsamples} data samples loaded...')
+
+ def __len__(self):
+ return self.nsamples
+
+ def __getitem__(self, idx):
+ return {'id': self.feat_id[idx],
+ 'value': self.feat_value[idx],
+ 'y': self.y[idx]}
+
+
+class LibsvmDataset(Dataset):
+ """ Dataset loader for Libsvm data format """
+
+ def __init__(self, fname, nfields, max_load=-1):
+
+ def decode_libsvm(line):
+ columns = line.split(' ')
+ map_func = lambda pair: (int(pair[0]), float(pair[1]))
+ id, value = zip(*map(lambda col: map_func(col.split(':')), columns[1:]))
+ sample = {'id': torch.LongTensor(id),
+ 'value': torch.FloatTensor(value),
+ 'y': float(columns[0])}
+ return sample
+
+ with open(fname) as f:
+ sample_lines = sum(1 for line in f)
+
+ self.feat_id = torch.LongTensor(sample_lines, nfields)
+ self.feat_value = torch.FloatTensor(sample_lines, nfields)
+ self.y = torch.FloatTensor(sample_lines)
+
+ self.nsamples = 0
+ with tqdm(total=sample_lines) as pbar:
+ with open(fname) as fp:
+ line = fp.readline()
+ while line:
+ if max_load > 0 and self.nsamples > max_load:
+ break
+ try:
+ sample = decode_libsvm(line)
+ self.feat_id[self.nsamples] = sample['id']
+ self.feat_value[self.nsamples] = sample['value']
+ self.y[self.nsamples] = sample['y']
+ self.nsamples += 1
+ except Exception:
+ print(f'incorrect data format line "{line}" !')
+ line = fp.readline()
+ pbar.update(1)
+ print(f'# {self.nsamples} data samples loaded...')
+
+ def __len__(self):
+ return self.nsamples
+
+ def __getitem__(self, idx):
+ return {'id': self.feat_id[idx],
+ 'value': self.feat_value[idx],
+ 'y': self.y[idx]}
+
+
+def libsvm_dataloader(args, data_dir, nfield, batch_size):
+ print("Loading data from ", data_dir)
+ workers = args.workers
+ train_file_name = f"{data_dir}/train.libsvm"
+ valid_file_name = f"{data_dir}/valid.libsvm"
+ test_file_name = f"{data_dir}/test.libsvm"
+ print(f"using train={train_file_name}, valid={valid_file_name}")
+ # read the converted file
+ if args.device == "cpu":
+ train_loader = DataLoader(LibsvmDatasetReadOnce(train_file_name),
+ batch_size=batch_size,
+ shuffle=True)
+ val_loader = DataLoader(LibsvmDatasetReadOnce(valid_file_name),
+ batch_size=batch_size * 8,
+ shuffle=False)
+
+ else:
+ train_loader = DataLoader(LibsvmDatasetReadOnce(train_file_name),
+ batch_size=batch_size,
+ shuffle=True,
+ num_workers=workers,
+ pin_memory=False)
+
+ val_loader = DataLoader(LibsvmDatasetReadOnce(valid_file_name),
+ batch_size=batch_size * 8,
+ shuffle=False,
+ num_workers=workers,
+ pin_memory=False)
+
+ return train_loader, val_loader, val_loader
+
+
+def libsvm_dataloader_ori(args):
+ data_dir = args.base_dir + args.dataset
+ print(data_dir)
+ train_file = glob.glob("%s/tr*libsvm" % data_dir)[0]
+ val_file = glob.glob("%s/va*libsvm" % data_dir)[0]
+ test_file = glob.glob("%s/te*libsvm" % data_dir)[0]
+
+ train_loader = DataLoader(LibsvmDataset(train_file, args.nfield, args.max_load),
+ batch_size=args.batch_size, shuffle=True,
+ num_workers=args.workers, pin_memory=True)
+ val_loader = DataLoader(LibsvmDataset(val_file, args.nfield, args.max_load),
+ batch_size=args.batch_size, shuffle=False,
+ num_workers=args.workers, pin_memory=True)
+ # test_loader = DataLoader(LibsvmDataset(test_file, args.nfield),
+ # batch_size=args.batch_size, shuffle=False,
+ # num_workers=args.workers, pin_memory=True)
+
+ return train_loader, val_loader, val_loader
+
+
+class UCILibsvmDataset(Dataset):
+ """ Dataset loader for loading UCI dataset of Libsvm format """
+
+ def __init__(self, X, y):
+ assert X.shape[0] == y.shape[0]
+ self.nsamples, self.nfeat = X.shape
+
+ self.feat_id = torch.LongTensor(self.nsamples, self.nfeat)
+ self.feat_value = torch.FloatTensor(self.nsamples, self.nfeat)
+ self.y = torch.FloatTensor(self.nsamples)
+
+ with tqdm(total=self.nsamples) as pbar:
+ id = torch.LongTensor(range(self.nfeat))
+ for idx in range(self.nsamples):
+ self.feat_id[idx] = id
+ self.feat_value[idx] = torch.FloatTensor(X[idx])
+ self.y[idx] = y[idx]
+
+ pbar.update(1)
+ print(f'Data loader: {self.nsamples} data samples')
+
+ def __len__(self):
+ return self.nsamples
+
+ def __getitem__(self, idx):
+ return {'id': self.feat_id[idx],
+ 'value': self.feat_value[idx],
+ 'y': self.y[idx]}
+
+
+def uci_loader(data_dir, batch_size, valid_perc=0., libsvm=False, workers=4):
+ '''
+ :param data_dir: Path to load the uci dataset
+ :param batch_size: Batch size
+ :param valid_perc: valid percentage split from train (default 0, whole train set)
+ :param libsvm: Libsvm loader of format {'id', 'value', 'y'}
+ :param workers: the number of subprocesses to load data
+ :return: train/valid/test loader, train_loader.nclass
+ '''
+
+ def uci_validation_set(X, y, split_perc=0.2):
+ return sklearn.model_selection.train_test_split(
+ X, y, test_size=split_perc, random_state=0)
+
+ def make_loader(X, y, transformer=None, batch_size=64):
+ if transformer is None:
+ transformer = sklearn.preprocessing.StandardScaler()
+ transformer.fit(X)
+ X = transformer.transform(X)
+ if libsvm:
+ return DataLoader(UCILibsvmDataset(X, y),
+ batch_size=batch_size,
+ shuffle=transformer is None,
+ num_workers=workers, pin_memory=True
+ ), transformer
+ else:
+ return DataLoader(
+ dataset=TensorDataset(*[torch.from_numpy(e) for e in [X, y]]),
+ batch_size=batch_size,
+ shuffle=transformer is None,
+ num_workers=workers, pin_memory=True
+ ), transformer
+
+ def uci_folder_to_name(f):
+ return f.split('/')[-1]
+
+ def line_to_idx(l):
+ return np.array([int(e) for e in l.split()], dtype=np.int32)
+
+ def load_uci_dataset(folder, train=True):
+ full_file = f'{folder}/{uci_folder_to_name(folder)}.arff'
+ if os.path.exists(full_file):
+ data = loadarff(full_file)
+ train_idx, test_idx = [line_to_idx(l) for l in open(f'{folder}/conxuntos.dat').readlines()]
+ assert len(set(train_idx) & set(test_idx)) == 0
+ all_idx = list(train_idx) + list(test_idx)
+ assert len(all_idx) == np.max(all_idx) + 1
+ assert np.min(all_idx) == 0
+ if train:
+ data = (data[0][train_idx], data[1])
+ else:
+ data = (data[0][test_idx], data[1])
+ else:
+ typename = 'train' if train else 'test'
+ filename = f'{folder}/{uci_folder_to_name(folder)}_{typename}.arff'
+ data = loadarff(filename)
+ assert data[1].types() == ['numeric'] * (len(data[1].types()) - 1) + ['nominal']
+ X = np.array(data[0][data[1].names()[:-1]].tolist())
+ y = np.array([int(e) for e in data[0][data[1].names()[-1]]])
+ nclass = len(data[1]['clase'][1])
+ return X.astype(np.float32), y, nclass
+
+ Xtrain, ytrain, nclass = load_uci_dataset(data_dir)
+ if valid_perc > 0:
+ Xtrain, Xvalid, ytrain, yvalid = uci_validation_set(Xtrain, ytrain, split_perc=valid_perc)
+ train_loader, _ = make_loader(Xtrain, ytrain, batch_size=batch_size)
+ valid_loader, _ = make_loader(Xvalid, yvalid, batch_size=batch_size)
+ else:
+ train_loader, _ = make_loader(Xtrain, ytrain, batch_size=batch_size)
+ valid_loader = train_loader
+
+ print(f'{uci_folder_to_name(data_dir)}: {len(ytrain)} training samples loaded.')
+ Xtest, ytest, _ = load_uci_dataset(data_dir, False)
+ test_loader, _ = make_loader(Xtest, ytest, batch_size=batch_size)
+ print(f'{uci_folder_to_name(data_dir)}: {len(ytest)} testing samples loaded.')
+ train_loader.nclass = nclass
+ return train_loader, valid_loader, test_loader
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/__init__.py
similarity index 98%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/__init__.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/__init__.py
index e5ddb1e193..59ffabc364 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/__init__.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/__init__.py
@@ -16,13 +16,13 @@
# limitations under the License.
#
+
from src.common.constant import *
from src.eva_engine.phase1.algo.prune_synflow import SynFlowEvaluator
+
# evaluator mapper to register many existing evaluation algorithms
evaluator_register = {
-
- # prune based
CommonVars.PRUNE_SYNFLOW: SynFlowEvaluator(),
-
}
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/coordinator.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/coordinator.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/coordinator.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/coordinator.py
index 8142a5cd6a..cd9bf95e1d 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/coordinator.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/coordinator.py
@@ -16,6 +16,7 @@
# limitations under the License.
#
+
from src.common.constant import Config
from src.eva_engine.phase2.run_sh import BudgetAwareControllerSH
from src.logger import logger
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_all/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/__init__.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_all/__init__.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/__init__.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_RL/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/__init__.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_RL/__init__.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/__init__.py
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/alg_base.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/alg_base.py
new file mode 100644
index 0000000000..947ce71c26
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/alg_base.py
@@ -0,0 +1,150 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import math
+import time
+from abc import abstractmethod
+import torch
+from torch import nn
+
+
+class Evaluator:
+ def __init__(self):
+ pass
+
+ @abstractmethod
+ def evaluate(self, arch: nn.Module,
+ device: str,
+ batch_data: object, batch_labels: torch.Tensor,
+ space_name: str
+ ) -> float:
+ """
+ Score each architecture with predefined architecture and data
+ :param arch: architecture to be scored
+ :param device: cpu or gpu
+ :param batch_data: a mini batch of data, [ batch_size, channel, W, H ] or dict for structure data
+ :param batch_labels: a mini batch of labels
+ :param space_name: string
+ :return: score
+ """
+ raise NotImplementedError
+
+ def evaluate_wrapper(self, arch, device: str, space_name: str,
+ batch_data: torch.tensor,
+ batch_labels: torch.tensor) -> (float, float):
+ """
+ :param arch: architecture to be scored
+ :param device: cpu or GPU
+ :param space_name: search space name
+ :param batch_data: a mini batch of data, [ batch_size, channel, W, H ]
+ :param batch_labels: a mini batch of labels
+ :return: score, timeUsage
+ """
+
+ arch.train()
+ # arch.zero_grad()
+
+ # measure scoring time
+ if "cuda" in device:
+ torch.cuda.synchronize()
+ # use this will not need cuda.sync
+ # starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
+ # starter.record()
+ starter, ender = time.time(), time.time()
+ else:
+ starter, ender = time.time(), time.time()
+
+ # score
+ score = self.evaluate(arch, device, batch_data, batch_labels, space_name)
+
+ if "cuda" in device:
+ # ender.record()
+ # implicitly waits for the event to be marked as complete before calculating the time difference
+ # curr_time = starter.elapsed_time(ender)
+ torch.cuda.synchronize()
+ ender = time.time()
+ curr_time = ender - starter
+ else:
+ ender = time.time()
+ curr_time = ender - starter
+
+ if math.isnan(score):
+ if score > 0:
+ score = 1e8
+ else:
+ score = -1e8
+ if math.isinf(score):
+ if score > 0:
+ score = 1e8
+ else:
+ score = -1e8
+
+ return score, curr_time
+
+ def evaluate_wrapper_origin(self, arch, device: str, space_name: str,
+ batch_data: torch.tensor,
+ batch_labels: torch.tensor) -> (float, float):
+ """
+ :param arch: architecture to be scored
+ :param device: cpu or GPU
+ :param space_name: search space name
+ :param batch_data: a mini batch of data, [ batch_size, channel, W, H ]
+ :param batch_labels: a mini batch of labels
+ :return: score, timeUsage
+ """
+
+ arch.train()
+ arch.zero_grad()
+
+ # measure scoring time
+ if "cuda" in device:
+ torch.cuda.synchronize()
+ # use this will not need cuda.sync
+ # starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
+ # starter.record()
+ starter, ender = time.time(), time.time()
+ else:
+ starter, ender = time.time(), time.time()
+
+ # score
+ score = self.evaluate(arch, device, batch_data, batch_labels, space_name)
+
+ if "cuda" in device:
+ # ender.record()
+ # implicitly waits for the event to be marked as complete before calculating the time difference
+ # curr_time = starter.elapsed_time(ender)
+ torch.cuda.synchronize()
+ ender = time.time()
+ curr_time = ender - starter
+ else:
+ ender = time.time()
+ curr_time = ender - starter
+
+ if math.isnan(score):
+ if score > 0:
+ score = 1e8
+ else:
+ score = -1e8
+ if math.isinf(score):
+ if score > 0:
+ score = 1e8
+ else:
+ score = -1e8
+
+ return score, curr_time
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/prune_synflow.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/prune_synflow.py
similarity index 96%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/prune_synflow.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/prune_synflow.py
index 1c671febda..e1088f83bc 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/prune_synflow.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/prune_synflow.py
@@ -38,6 +38,7 @@
# singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
singa_dtype = {"float32": tensor.float32}
+
### MSOptimizer
class MSOptimizer(Optimizer):
def __call__(self, loss):
@@ -64,6 +65,7 @@ def call_with_returns(self, loss):
# print ("call_with_returns after apply loss.data: \n", loss.data)
return pn_p_g_list
+
# MSSGD -- actually no change of code
class MSSGD(MSOptimizer):
"""Implements stochastic gradient descent (optionally with momentum).
@@ -236,14 +238,15 @@ def set_states(self, states):
self.moments = states['moments']
self.mom_value = self.momentum(self.step_counter)
+
# Data augmentation
def augmentation(x, batch_size):
xpad = np.pad(x, [[0, 0], [0, 0], [4, 4], [4, 4]], 'symmetric')
for data_num in range(0, batch_size):
offset = np.random.randint(8, size=2)
x[data_num, :, :, :] = xpad[data_num, :,
- offset[0]:offset[0] + x.shape[2],
- offset[1]:offset[1] + x.shape[2]]
+ offset[0]:offset[0] + x.shape[2],
+ offset[1]:offset[1] + x.shape[2]]
if_flip = np.random.randint(2)
if (if_flip):
x[data_num, :, :, :] = x[data_num, :, :, ::-1]
@@ -295,10 +298,13 @@ def resize_dataset(x, image_size):
for d in range(0, dim):
X[n, d, :, :] = np.array(Image.fromarray(x[n, d, :, :]).resize(
(image_size, image_size), Image.BILINEAR),
- dtype=np.float32)
+ dtype=np.float32)
return X
+
import torch
+
+
class SynFlowEvaluator(Evaluator):
def __init__(self):
@@ -319,7 +325,7 @@ def evaluate(self, arch, device, batch_data: object, batch_labels: torch.Tensor,
1. this is data-Agnostic
2. only compute on a single example
"""
-
+
### singa configs
mssgd = MSSGD(lr=0.005, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype['float32'])
device_id = 0
@@ -327,8 +333,8 @@ def evaluate(self, arch, device, batch_data: object, batch_labels: torch.Tensor,
model = arch
graph = True
verbosity = 0
- dist_option='plain'
- spars=None
+ dist_option = 'plain'
+ spars = None
precision = 'float32'
global_rank = 0
world_size = 1
@@ -353,7 +359,7 @@ def evaluate(self, arch, device, batch_data: object, batch_labels: torch.Tensor,
model.train()
### process batch_data
- x = batch_data.cpu().numpy() # Size([1, 100]) and all ones
+ x = batch_data.cpu().numpy() # Size([1, 100]) and all ones
x = x.astype(np_dtype[precision])
y = np.ones(x.shape[0], dtype=np.int32)
if model.dimension == 2: # input data dimension
@@ -364,16 +370,15 @@ def evaluate(self, arch, device, batch_data: object, batch_labels: torch.Tensor,
model.compile([tx], is_train=True, use_graph=graph, sequential=sequential)
dev.SetVerbosity(verbosity)
-
# 1. Convert params to their abs.
- synflow_flag = True ### just change the model to the absolute value
+ synflow_flag = True ### just change the model to the absolute value
tx.copy_from_numpy(x) # dtype=np.float32
ty.copy_from_numpy(y)
# print ("before model forward ...")
pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
# print ("---------------------------------------")
# print ("before absolute prune_synflow !!!nemb input vector!!! tensor.to_numpy(loss)[0]: ", tensor.to_numpy(loss)[0])
- # print ("before absolute prune_synflow !!!nemb input vector!!! tensor.to_numpy(loss): ", tensor.to_numpy(loss))
+ # print ("before absolute prune_synflow !!!nemb input vector!!! tensor.to_numpy(loss): ", tensor.to_numpy(loss))
# train_correct += accuracy(tensor.to_numpy(out), y)
# train_loss += tensor.to_numpy(loss)[0]
# all params turned to positive
@@ -409,7 +414,7 @@ def evaluate(self, arch, device, batch_data: object, batch_labels: torch.Tensor,
score = 0.0
for pn_p_g_item in pn_p_g_list:
# print ("calculate weight param * grad parameter name: \n", pn_p_g_item[0])
- if len(pn_p_g_item[1].shape) == 2: # param_value.data is "weight"
+ if len(pn_p_g_item[1].shape) == 2: # param_value.data is "weight"
# print ("pn_p_g_item[1].shape: \n", pn_p_g_item[1].shape)
# print ("tensor.to_numpy(pn_p_g_item[1][0]): ", tensor.to_numpy(pn_p_g_item[1][0]))
# print ("calculate synflow parameter name: \n", pn_p_g_item[0])
@@ -420,6 +425,6 @@ def evaluate(self, arch, device, batch_data: object, batch_labels: torch.Tensor,
score += np.sum(np.absolute(tensor.to_numpy(pn_p_g_item[1]) * tensor.to_numpy(pn_p_g_item[2])))
# print ("layer_hidden_list: \n", layer_hidden_list)
# print ("prune_synflow !!!one-hot input vector!!! absolute step tensor.to_numpy(loss)[0]: ", tensor.to_numpy(loss)[0])
- print ("score: \n", score)
+ print("score: \n", score)
return score
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/README.md b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/README.md
similarity index 82%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/README.md
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/README.md
index b7c96e8845..72d356fc24 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/README.md
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/README.md
@@ -16,3 +16,7 @@
specific language governing permissions and limitations
under the License.
-->
+
+
+(1) copy cnn_ms/pkg_model_code/model.py to ~/miniconda3/lib/python3.6/site-packages/singa/model.py
+(2) enter cnn_ms/ and run "python train_ms_model.py ms_model_mlp mnist"
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/README.md b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/README.md
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/README.md
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/README.md
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/cifar10_multiprocess.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/cifar10_multiprocess.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/cifar10_multiprocess.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/cifar10_multiprocess.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_cnn.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_cnn.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_cnn.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_cnn.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_dist.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_dist.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_dist.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_dist.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_multiprocess.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_multiprocess.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_multiprocess.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/mnist_multiprocess.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_cifar10.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_cifar10.py
similarity index 97%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_cifar10.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_cifar10.py
index 7541736994..d71e0f29b6 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_cifar10.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_cifar10.py
@@ -1,292 +1,292 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-try:
- import pickle
-except ImportError:
- import cPickle as pickle
-
-from singa import singa_wrap as singa
-from singa import autograd
-from singa import tensor
-from singa import device
-from singa import opt
-from PIL import Image
-import numpy as np
-import os
-import sys
-import time
-
-
-def load_dataset(filepath):
- with open(filepath, 'rb') as fd:
- try:
- cifar10 = pickle.load(fd, encoding='latin1')
- except TypeError:
- cifar10 = pickle.load(fd)
- image = cifar10['data'].astype(dtype=np.uint8)
- image = image.reshape((-1, 3, 32, 32))
- label = np.asarray(cifar10['labels'], dtype=np.uint8)
- label = label.reshape(label.size, 1)
- return image, label
-
-
-def load_train_data(dir_path='cifar-10-batches-py', num_batches=5):
- labels = []
- batchsize = 10000
- images = np.empty((num_batches * batchsize, 3, 32, 32), dtype=np.uint8)
- for did in range(1, num_batches + 1):
- fname_train_data = dir_path + "/data_batch_{}".format(did)
- image, label = load_dataset(check_dataset_exist(fname_train_data))
- images[(did - 1) * batchsize:did * batchsize] = image
- labels.extend(label)
- images = np.array(images, dtype=np.float32)
- labels = np.array(labels, dtype=np.int32)
- return images, labels
-
-
-def load_test_data(dir_path='cifar-10-batches-py'):
- images, labels = load_dataset(check_dataset_exist(dir_path + "/test_batch"))
- return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
-
-
-def check_dataset_exist(dirpath):
- if not os.path.exists(dirpath):
- print(
- 'Please download the cifar10 dataset using download_data.py (e.g. python ~/singa/examples/cifar10/download_data.py py)'
- )
- sys.exit(0)
- return dirpath
-
-
-def normalize_for_resnet(train_x, test_x):
- mean = [0.4914, 0.4822, 0.4465]
- std = [0.2023, 0.1994, 0.2010]
- train_x /= 255
- test_x /= 255
- for ch in range(0, 2):
- train_x[:, ch, :, :] -= mean[ch]
- train_x[:, ch, :, :] /= std[ch]
- test_x[:, ch, :, :] -= mean[ch]
- test_x[:, ch, :, :] /= std[ch]
- return train_x, test_x
-
-
-def resize_dataset(x, IMG_SIZE):
- num_data = x.shape[0]
- dim = x.shape[1]
- X = np.zeros(shape=(num_data, dim, IMG_SIZE, IMG_SIZE), dtype=np.float32)
- for n in range(0, num_data):
- for d in range(0, dim):
- X[n, d, :, :] = np.array(Image.fromarray(x[n, d, :, :]).resize(
- (IMG_SIZE, IMG_SIZE), Image.BILINEAR),
- dtype=np.float32)
- return X
-
-
-def augmentation(x, batch_size):
- xpad = np.pad(x, [[0, 0], [0, 0], [4, 4], [4, 4]], 'symmetric')
- for data_num in range(0, batch_size):
- offset = np.random.randint(8, size=2)
- x[data_num, :, :, :] = xpad[data_num, :, offset[0]:offset[0] + 32,
- offset[1]:offset[1] + 32]
- if_flip = np.random.randint(2)
- if (if_flip):
- x[data_num, :, :, :] = x[data_num, :, :, ::-1]
- return x
-
-
-def accuracy(pred, target):
- y = np.argmax(pred, axis=1)
- t = np.argmax(target, axis=1)
- a = y == t
- return np.array(a, "int").sum()
-
-
-def to_categorical(y, num_classes):
- y = np.array(y, dtype="int")
- n = y.shape[0]
- categorical = np.zeros((n, num_classes))
- for i in range(0, n):
- categorical[i, y[i]] = 1
- categorical = categorical.astype(np.float32)
- return categorical
-
-
-# Function to all reduce NUMPY accuracy and loss from multiple devices
-def reduce_variable(variable, dist_opt, reducer):
- reducer.copy_from_numpy(variable)
- dist_opt.all_reduce(reducer.data)
- dist_opt.wait()
- output = tensor.to_numpy(reducer)
- return output
-
-
-# Function to sychronize SINGA TENSOR initial model parameters
-def synchronize(tensor, dist_opt):
- dist_opt.all_reduce(tensor.data)
- dist_opt.wait()
- tensor /= dist_opt.world_size
-
-
-# Data partition
-def data_partition(dataset_x, dataset_y, global_rank, world_size):
- data_per_rank = dataset_x.shape[0] // world_size
- idx_start = global_rank * data_per_rank
- idx_end = (global_rank + 1) * data_per_rank
- return dataset_x[idx_start:idx_end], dataset_y[idx_start:idx_end]
-
-
-def train_cifar10(DIST=False,
- local_rank=None,
- world_size=None,
- nccl_id=None,
- partial_update=False):
-
- # Define the hypermeters for the train_cifar10
- sgd = opt.SGD(lr=0.005, momentum=0.9, weight_decay=1e-5)
- max_epoch = 5
- batch_size = 32
-
- train_x, train_y = load_train_data()
- test_x, test_y = load_test_data()
- train_x, test_x = normalize_for_resnet(train_x, test_x)
- IMG_SIZE = 224
- num_classes = 10
-
- if DIST:
- # For distributed GPU training
- sgd = opt.DistOpt(sgd,
- nccl_id=nccl_id,
- local_rank=local_rank,
- world_size=world_size)
- dev = device.create_cuda_gpu_on(sgd.local_rank)
-
- # Dataset partition for distributed training
- train_x, train_y = data_partition(train_x, train_y, sgd.global_rank,
- sgd.world_size)
- test_x, test_y = data_partition(test_x, test_y, sgd.global_rank,
- sgd.world_size)
- world_size = sgd.world_size
- else:
- # For single GPU
- dev = device.create_cuda_gpu()
- world_size = 1
-
- from resnet import resnet50
- model = resnet50(num_classes=num_classes)
-
- tx = tensor.Tensor((batch_size, 3, IMG_SIZE, IMG_SIZE), dev, tensor.float32)
- ty = tensor.Tensor((batch_size,), dev, tensor.int32)
- num_train_batch = train_x.shape[0] // batch_size
- num_test_batch = test_x.shape[0] // batch_size
- idx = np.arange(train_x.shape[0], dtype=np.int32)
-
- if DIST:
- # Sychronize the initial parameters
- autograd.training = True
- x = np.random.randn(batch_size, 3, IMG_SIZE,
- IMG_SIZE).astype(np.float32)
- y = np.zeros(shape=(batch_size,), dtype=np.int32)
- tx.copy_from_numpy(x)
- ty.copy_from_numpy(y)
- out = model(tx)
- loss = autograd.softmax_cross_entropy(out, ty)
- param = []
- for p, _ in autograd.backward(loss):
- synchronize(p, sgd)
- param.append(p)
-
- for epoch in range(max_epoch):
- start_time = time.time()
- np.random.shuffle(idx)
-
- if ((DIST == False) or (sgd.global_rank == 0)):
- print('Starting Epoch %d:' % (epoch))
-
- # Training phase
- autograd.training = True
- train_correct = np.zeros(shape=[1], dtype=np.float32)
- test_correct = np.zeros(shape=[1], dtype=np.float32)
- train_loss = np.zeros(shape=[1], dtype=np.float32)
-
- for b in range(num_train_batch):
- x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
- x = augmentation(x, batch_size)
- x = resize_dataset(x, IMG_SIZE)
- y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
- tx.copy_from_numpy(x)
- ty.copy_from_numpy(y)
- out = model(tx)
- loss = autograd.softmax_cross_entropy(out, ty)
- train_correct += accuracy(tensor.to_numpy(out),
- to_categorical(y, num_classes)).astype(
- np.float32)
- train_loss += tensor.to_numpy(loss)[0]
- if not partial_update:
- sgd.backward_and_update(loss)
- else:
- sgd.backward_and_partial_update(loss)
-
- if DIST:
- # Reduce the evaluation accuracy and loss from multiple devices
- reducer = tensor.Tensor((1,), dev, tensor.float32)
- train_correct = reduce_variable(train_correct, sgd, reducer)
- train_loss = reduce_variable(train_loss, sgd, reducer)
-
- # Output the training loss and accuracy
- if ((DIST == False) or (sgd.global_rank == 0)):
- print('Training loss = %f, training accuracy = %f' %
- (train_loss, train_correct /
- (num_train_batch * batch_size * world_size)),
- flush=True)
-
- if partial_update:
- # Sychronize parameters before evaluation phase
- for p in param:
- synchronize(p, sgd)
-
- # Evaulation phase
- autograd.training = False
- for b in range(num_test_batch):
- x = test_x[b * batch_size:(b + 1) * batch_size]
- x = resize_dataset(x, IMG_SIZE)
- y = test_y[b * batch_size:(b + 1) * batch_size]
- tx.copy_from_numpy(x)
- ty.copy_from_numpy(y)
- out_test = model(tx)
- test_correct += accuracy(tensor.to_numpy(out_test),
- to_categorical(y, num_classes))
-
- if DIST:
- # Reduce the evaulation accuracy from multiple devices
- test_correct = reduce_variable(test_correct, sgd, reducer)
-
- # Output the evaluation accuracy
- if ((DIST == False) or (sgd.global_rank == 0)):
- print('Evaluation accuracy = %f, Elapsed Time = %fs' %
- (test_correct / (num_test_batch * batch_size * world_size),
- time.time() - start_time),
- flush=True)
-
-
-if __name__ == '__main__':
-
- DIST = False
- train_cifar10(DIST=DIST)
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+try:
+ import pickle
+except ImportError:
+ import cPickle as pickle
+
+from singa import singa_wrap as singa
+from singa import autograd
+from singa import tensor
+from singa import device
+from singa import opt
+from PIL import Image
+import numpy as np
+import os
+import sys
+import time
+
+
+def load_dataset(filepath):
+ with open(filepath, 'rb') as fd:
+ try:
+ cifar10 = pickle.load(fd, encoding='latin1')
+ except TypeError:
+ cifar10 = pickle.load(fd)
+ image = cifar10['data'].astype(dtype=np.uint8)
+ image = image.reshape((-1, 3, 32, 32))
+ label = np.asarray(cifar10['labels'], dtype=np.uint8)
+ label = label.reshape(label.size, 1)
+ return image, label
+
+
+def load_train_data(dir_path='cifar-10-batches-py', num_batches=5):
+ labels = []
+ batchsize = 10000
+ images = np.empty((num_batches * batchsize, 3, 32, 32), dtype=np.uint8)
+ for did in range(1, num_batches + 1):
+ fname_train_data = dir_path + "/data_batch_{}".format(did)
+ image, label = load_dataset(check_dataset_exist(fname_train_data))
+ images[(did - 1) * batchsize:did * batchsize] = image
+ labels.extend(label)
+ images = np.array(images, dtype=np.float32)
+ labels = np.array(labels, dtype=np.int32)
+ return images, labels
+
+
+def load_test_data(dir_path='cifar-10-batches-py'):
+ images, labels = load_dataset(check_dataset_exist(dir_path + "/test_batch"))
+ return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
+
+
+def check_dataset_exist(dirpath):
+ if not os.path.exists(dirpath):
+ print(
+ 'Please download the cifar10 dataset using download_data.py (e.g. python ~/singa/examples/cifar10/download_data.py py)'
+ )
+ sys.exit(0)
+ return dirpath
+
+
+def normalize_for_resnet(train_x, test_x):
+ mean = [0.4914, 0.4822, 0.4465]
+ std = [0.2023, 0.1994, 0.2010]
+ train_x /= 255
+ test_x /= 255
+ for ch in range(0, 2):
+ train_x[:, ch, :, :] -= mean[ch]
+ train_x[:, ch, :, :] /= std[ch]
+ test_x[:, ch, :, :] -= mean[ch]
+ test_x[:, ch, :, :] /= std[ch]
+ return train_x, test_x
+
+
+def resize_dataset(x, IMG_SIZE):
+ num_data = x.shape[0]
+ dim = x.shape[1]
+ X = np.zeros(shape=(num_data, dim, IMG_SIZE, IMG_SIZE), dtype=np.float32)
+ for n in range(0, num_data):
+ for d in range(0, dim):
+ X[n, d, :, :] = np.array(Image.fromarray(x[n, d, :, :]).resize(
+ (IMG_SIZE, IMG_SIZE), Image.BILINEAR),
+ dtype=np.float32)
+ return X
+
+
+def augmentation(x, batch_size):
+ xpad = np.pad(x, [[0, 0], [0, 0], [4, 4], [4, 4]], 'symmetric')
+ for data_num in range(0, batch_size):
+ offset = np.random.randint(8, size=2)
+ x[data_num, :, :, :] = xpad[data_num, :, offset[0]:offset[0] + 32,
+ offset[1]:offset[1] + 32]
+ if_flip = np.random.randint(2)
+ if (if_flip):
+ x[data_num, :, :, :] = x[data_num, :, :, ::-1]
+ return x
+
+
+def accuracy(pred, target):
+ y = np.argmax(pred, axis=1)
+ t = np.argmax(target, axis=1)
+ a = y == t
+ return np.array(a, "int").sum()
+
+
+def to_categorical(y, num_classes):
+ y = np.array(y, dtype="int")
+ n = y.shape[0]
+ categorical = np.zeros((n, num_classes))
+ for i in range(0, n):
+ categorical[i, y[i]] = 1
+ categorical = categorical.astype(np.float32)
+ return categorical
+
+
+# Function to all reduce NUMPY accuracy and loss from multiple devices
+def reduce_variable(variable, dist_opt, reducer):
+ reducer.copy_from_numpy(variable)
+ dist_opt.all_reduce(reducer.data)
+ dist_opt.wait()
+ output = tensor.to_numpy(reducer)
+ return output
+
+
+# Function to sychronize SINGA TENSOR initial model parameters
+def synchronize(tensor, dist_opt):
+ dist_opt.all_reduce(tensor.data)
+ dist_opt.wait()
+ tensor /= dist_opt.world_size
+
+
+# Data partition
+def data_partition(dataset_x, dataset_y, global_rank, world_size):
+ data_per_rank = dataset_x.shape[0] // world_size
+ idx_start = global_rank * data_per_rank
+ idx_end = (global_rank + 1) * data_per_rank
+ return dataset_x[idx_start:idx_end], dataset_y[idx_start:idx_end]
+
+
+def train_cifar10(DIST=False,
+ local_rank=None,
+ world_size=None,
+ nccl_id=None,
+ partial_update=False):
+
+ # Define the hypermeters for the train_cifar10
+ sgd = opt.SGD(lr=0.005, momentum=0.9, weight_decay=1e-5)
+ max_epoch = 5
+ batch_size = 32
+
+ train_x, train_y = load_train_data()
+ test_x, test_y = load_test_data()
+ train_x, test_x = normalize_for_resnet(train_x, test_x)
+ IMG_SIZE = 224
+ num_classes = 10
+
+ if DIST:
+ # For distributed GPU training
+ sgd = opt.DistOpt(sgd,
+ nccl_id=nccl_id,
+ local_rank=local_rank,
+ world_size=world_size)
+ dev = device.create_cuda_gpu_on(sgd.local_rank)
+
+ # Dataset partition for distributed training
+ train_x, train_y = data_partition(train_x, train_y, sgd.global_rank,
+ sgd.world_size)
+ test_x, test_y = data_partition(test_x, test_y, sgd.global_rank,
+ sgd.world_size)
+ world_size = sgd.world_size
+ else:
+ # For single GPU
+ dev = device.create_cuda_gpu()
+ world_size = 1
+
+ from resnet import resnet50
+ model = resnet50(num_classes=num_classes)
+
+ tx = tensor.Tensor((batch_size, 3, IMG_SIZE, IMG_SIZE), dev, tensor.float32)
+ ty = tensor.Tensor((batch_size,), dev, tensor.int32)
+ num_train_batch = train_x.shape[0] // batch_size
+ num_test_batch = test_x.shape[0] // batch_size
+ idx = np.arange(train_x.shape[0], dtype=np.int32)
+
+ if DIST:
+ # Sychronize the initial parameters
+ autograd.training = True
+ x = np.random.randn(batch_size, 3, IMG_SIZE,
+ IMG_SIZE).astype(np.float32)
+ y = np.zeros(shape=(batch_size,), dtype=np.int32)
+ tx.copy_from_numpy(x)
+ ty.copy_from_numpy(y)
+ out = model(tx)
+ loss = autograd.softmax_cross_entropy(out, ty)
+ param = []
+ for p, _ in autograd.backward(loss):
+ synchronize(p, sgd)
+ param.append(p)
+
+ for epoch in range(max_epoch):
+ start_time = time.time()
+ np.random.shuffle(idx)
+
+ if ((DIST == False) or (sgd.global_rank == 0)):
+ print('Starting Epoch %d:' % (epoch))
+
+ # Training phase
+ autograd.training = True
+ train_correct = np.zeros(shape=[1], dtype=np.float32)
+ test_correct = np.zeros(shape=[1], dtype=np.float32)
+ train_loss = np.zeros(shape=[1], dtype=np.float32)
+
+ for b in range(num_train_batch):
+ x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
+ x = augmentation(x, batch_size)
+ x = resize_dataset(x, IMG_SIZE)
+ y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
+ tx.copy_from_numpy(x)
+ ty.copy_from_numpy(y)
+ out = model(tx)
+ loss = autograd.softmax_cross_entropy(out, ty)
+ train_correct += accuracy(tensor.to_numpy(out),
+ to_categorical(y, num_classes)).astype(
+ np.float32)
+ train_loss += tensor.to_numpy(loss)[0]
+ if not partial_update:
+ sgd.backward_and_update(loss)
+ else:
+ sgd.backward_and_partial_update(loss)
+
+ if DIST:
+ # Reduce the evaluation accuracy and loss from multiple devices
+ reducer = tensor.Tensor((1,), dev, tensor.float32)
+ train_correct = reduce_variable(train_correct, sgd, reducer)
+ train_loss = reduce_variable(train_loss, sgd, reducer)
+
+ # Output the training loss and accuracy
+ if ((DIST == False) or (sgd.global_rank == 0)):
+ print('Training loss = %f, training accuracy = %f' %
+ (train_loss, train_correct /
+ (num_train_batch * batch_size * world_size)),
+ flush=True)
+
+ if partial_update:
+ # Sychronize parameters before evaluation phase
+ for p in param:
+ synchronize(p, sgd)
+
+ # Evaulation phase
+ autograd.training = False
+ for b in range(num_test_batch):
+ x = test_x[b * batch_size:(b + 1) * batch_size]
+ x = resize_dataset(x, IMG_SIZE)
+ y = test_y[b * batch_size:(b + 1) * batch_size]
+ tx.copy_from_numpy(x)
+ ty.copy_from_numpy(y)
+ out_test = model(tx)
+ test_correct += accuracy(tensor.to_numpy(out_test),
+ to_categorical(y, num_classes))
+
+ if DIST:
+ # Reduce the evaulation accuracy from multiple devices
+ test_correct = reduce_variable(test_correct, sgd, reducer)
+
+ # Output the evaluation accuracy
+ if ((DIST == False) or (sgd.global_rank == 0)):
+ print('Evaluation accuracy = %f, Elapsed Time = %fs' %
+ (test_correct / (num_test_batch * batch_size * world_size),
+ time.time() - start_time),
+ flush=True)
+
+
+if __name__ == '__main__':
+
+ DIST = False
+ train_cifar10(DIST=DIST)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_dist.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_dist.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_dist.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/resnet_dist.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/sparsification_mnist.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/sparsification_mnist.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/sparsification_mnist.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/sparsification_mnist.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/xceptionnet.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/xceptionnet.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/xceptionnet.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/autograd/xceptionnet.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/benchmark.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/benchmark.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/benchmark.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/benchmark.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar10.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar10.py
similarity index 97%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar10.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar10.py
index 5caaf30f44..74230d0de7 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar10.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar10.py
@@ -1,89 +1,89 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-try:
- import pickle
-except ImportError:
- import cPickle as pickle
-
-import numpy as np
-import os
-import sys
-
-
-def load_dataset(filepath):
- with open(filepath, 'rb') as fd:
- try:
- cifar10 = pickle.load(fd, encoding='latin1')
- except TypeError:
- cifar10 = pickle.load(fd)
- image = cifar10['data'].astype(dtype=np.uint8)
- image = image.reshape((-1, 3, 32, 32))
- label = np.asarray(cifar10['labels'], dtype=np.uint8)
- label = label.reshape(label.size, 1)
- return image, label
-
-
-def load_train_data(dir_path='/tmp/cifar-10-batches-py', num_batches=5): # need to save to specific local directories
- labels = []
- batchsize = 10000
- images = np.empty((num_batches * batchsize, 3, 32, 32), dtype=np.uint8)
- for did in range(1, num_batches + 1):
- fname_train_data = dir_path + "/data_batch_{}".format(did)
- image, label = load_dataset(check_dataset_exist(fname_train_data))
- images[(did - 1) * batchsize:did * batchsize] = image
- labels.extend(label)
- images = np.array(images, dtype=np.float32)
- labels = np.array(labels, dtype=np.int32)
- return images, labels
-
-
-def load_test_data(dir_path='/tmp/cifar-10-batches-py'): # need to save to specific local directories
- images, labels = load_dataset(check_dataset_exist(dir_path + "/test_batch"))
- return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
-
-
-def check_dataset_exist(dirpath):
- if not os.path.exists(dirpath):
- print(
- 'Please download the cifar10 dataset using python data/download_cifar10.py'
- )
- sys.exit(0)
- return dirpath
-
-
-def normalize(train_x, val_x):
- mean = [0.4914, 0.4822, 0.4465]
- std = [0.2023, 0.1994, 0.2010]
- train_x /= 255
- val_x /= 255
- for ch in range(0, 2):
- train_x[:, ch, :, :] -= mean[ch]
- train_x[:, ch, :, :] /= std[ch]
- val_x[:, ch, :, :] -= mean[ch]
- val_x[:, ch, :, :] /= std[ch]
- return train_x, val_x
-
-def load():
- train_x, train_y = load_train_data()
- val_x, val_y = load_test_data()
- train_x, val_x = normalize(train_x, val_x)
- train_y = train_y.flatten()
- val_y = val_y.flatten()
- return train_x, train_y, val_x, val_y
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+try:
+ import pickle
+except ImportError:
+ import cPickle as pickle
+
+import numpy as np
+import os
+import sys
+
+
+def load_dataset(filepath):
+ with open(filepath, 'rb') as fd:
+ try:
+ cifar10 = pickle.load(fd, encoding='latin1')
+ except TypeError:
+ cifar10 = pickle.load(fd)
+ image = cifar10['data'].astype(dtype=np.uint8)
+ image = image.reshape((-1, 3, 32, 32))
+ label = np.asarray(cifar10['labels'], dtype=np.uint8)
+ label = label.reshape(label.size, 1)
+ return image, label
+
+
+def load_train_data(dir_path='/tmp/cifar-10-batches-py', num_batches=5): # need to save to specific local directories
+ labels = []
+ batchsize = 10000
+ images = np.empty((num_batches * batchsize, 3, 32, 32), dtype=np.uint8)
+ for did in range(1, num_batches + 1):
+ fname_train_data = dir_path + "/data_batch_{}".format(did)
+ image, label = load_dataset(check_dataset_exist(fname_train_data))
+ images[(did - 1) * batchsize:did * batchsize] = image
+ labels.extend(label)
+ images = np.array(images, dtype=np.float32)
+ labels = np.array(labels, dtype=np.int32)
+ return images, labels
+
+
+def load_test_data(dir_path='/tmp/cifar-10-batches-py'): # need to save to specific local directories
+ images, labels = load_dataset(check_dataset_exist(dir_path + "/test_batch"))
+ return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
+
+
+def check_dataset_exist(dirpath):
+ if not os.path.exists(dirpath):
+ print(
+ 'Please download the cifar10 dataset using python data/download_cifar10.py'
+ )
+ sys.exit(0)
+ return dirpath
+
+
+def normalize(train_x, val_x):
+ mean = [0.4914, 0.4822, 0.4465]
+ std = [0.2023, 0.1994, 0.2010]
+ train_x /= 255
+ val_x /= 255
+ for ch in range(0, 2):
+ train_x[:, ch, :, :] -= mean[ch]
+ train_x[:, ch, :, :] /= std[ch]
+ val_x[:, ch, :, :] -= mean[ch]
+ val_x[:, ch, :, :] /= std[ch]
+ return train_x, val_x
+
+def load():
+ train_x, train_y = load_train_data()
+ val_x, val_y = load_test_data()
+ train_x, val_x = normalize(train_x, val_x)
+ train_y = train_y.flatten()
+ val_y = val_y.flatten()
+ return train_x, train_y, val_x, val_y
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar100.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar100.py
similarity index 96%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar100.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar100.py
index 88b943f074..b9f121b0a7 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar100.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/cifar100.py
@@ -1,81 +1,81 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-try:
- import pickle
-except ImportError:
- import cPickle as pickle
-
-import numpy as np
-import os
-import sys
-
-
-def load_dataset(filepath):
- with open(filepath, 'rb') as fd:
- try:
- cifar100 = pickle.load(fd, encoding='latin1')
- except TypeError:
- cifar100 = pickle.load(fd)
- image = cifar100['data'].astype(dtype=np.uint8)
- image = image.reshape((-1, 3, 32, 32))
- label = np.asarray(cifar100['fine_labels'], dtype=np.uint8)
- label = label.reshape(label.size, 1)
- return image, label
-
-
-def load_train_data(dir_path='/tmp/cifar-100-python'):
- images, labels = load_dataset(check_dataset_exist(dir_path + "/train"))
- return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
-
-
-def load_test_data(dir_path='/tmp/cifar-100-python'):
- images, labels = load_dataset(check_dataset_exist(dir_path + "/test"))
- return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
-
-
-def check_dataset_exist(dirpath):
- if not os.path.exists(dirpath):
- print(
- 'Please download the cifar100 dataset using python data/download_cifar100.py'
- )
- sys.exit(0)
- return dirpath
-
-
-def normalize(train_x, val_x):
- mean = [0.4914, 0.4822, 0.4465]
- std = [0.2023, 0.1994, 0.2010]
- train_x /= 255
- val_x /= 255
- for ch in range(0, 2):
- train_x[:, ch, :, :] -= mean[ch]
- train_x[:, ch, :, :] /= std[ch]
- val_x[:, ch, :, :] -= mean[ch]
- val_x[:, ch, :, :] /= std[ch]
- return train_x, val_x
-
-
-def load():
- train_x, train_y = load_train_data()
- val_x, val_y = load_test_data()
- train_x, val_x = normalize(train_x, val_x)
- train_y = train_y.flatten()
- val_y = val_y.flatten()
- return train_x, train_y, val_x, val_y
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+try:
+ import pickle
+except ImportError:
+ import cPickle as pickle
+
+import numpy as np
+import os
+import sys
+
+
+def load_dataset(filepath):
+ with open(filepath, 'rb') as fd:
+ try:
+ cifar100 = pickle.load(fd, encoding='latin1')
+ except TypeError:
+ cifar100 = pickle.load(fd)
+ image = cifar100['data'].astype(dtype=np.uint8)
+ image = image.reshape((-1, 3, 32, 32))
+ label = np.asarray(cifar100['fine_labels'], dtype=np.uint8)
+ label = label.reshape(label.size, 1)
+ return image, label
+
+
+def load_train_data(dir_path='/tmp/cifar-100-python'):
+ images, labels = load_dataset(check_dataset_exist(dir_path + "/train"))
+ return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
+
+
+def load_test_data(dir_path='/tmp/cifar-100-python'):
+ images, labels = load_dataset(check_dataset_exist(dir_path + "/test"))
+ return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
+
+
+def check_dataset_exist(dirpath):
+ if not os.path.exists(dirpath):
+ print(
+ 'Please download the cifar100 dataset using python data/download_cifar100.py'
+ )
+ sys.exit(0)
+ return dirpath
+
+
+def normalize(train_x, val_x):
+ mean = [0.4914, 0.4822, 0.4465]
+ std = [0.2023, 0.1994, 0.2010]
+ train_x /= 255
+ val_x /= 255
+ for ch in range(0, 2):
+ train_x[:, ch, :, :] -= mean[ch]
+ train_x[:, ch, :, :] /= std[ch]
+ val_x[:, ch, :, :] -= mean[ch]
+ val_x[:, ch, :, :] /= std[ch]
+ return train_x, val_x
+
+
+def load():
+ train_x, train_y = load_train_data()
+ val_x, val_y = load_test_data()
+ train_x, val_x = normalize(train_x, val_x)
+ train_y = train_y.flatten()
+ val_y = val_y.flatten()
+ return train_x, train_y, val_x, val_y
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_cifar10.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_cifar10.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_cifar10.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_cifar10.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_cifar100.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_cifar100.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_cifar100.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_cifar100.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_mnist.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_mnist.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_mnist.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/download_mnist.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/mnist.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/mnist.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/mnist.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/data/mnist.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/alexnet.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/alexnet.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/alexnet.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/alexnet.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/cnn.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/cnn.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/cnn.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/cnn.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/resnet.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/resnet.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/resnet.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/resnet.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/xceptionnet.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/xceptionnet.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/xceptionnet.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/model/xceptionnet.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/pkg_model_code/model.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/pkg_model_code/model.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/pkg_model_code/model.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/pkg_model_code/model.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/run.sh b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/run.sh
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/run.sh
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/run.sh
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_cnn.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_cnn.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_cnn.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_cnn.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_mpi.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_mpi.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_mpi.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_mpi.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_ms_model.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_ms_model.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_ms_model.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_ms_model.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_multiprocess.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_multiprocess.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_multiprocess.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/cnn_ms/train_multiprocess.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/ms_model_mlp/model.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/ms_model_mlp/model.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/ms_model_mlp/model.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/ms_model_mlp/model.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/ms_model_mlp/native.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/ms_model_mlp/native.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/ms_model_mlp/native.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/ms_model_mlp/native.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/msmlp/model.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/msmlp/model.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/msmlp/model.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/msmlp/model.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/msmlp/native.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/msmlp/native.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/msmlp/native.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/algo/singa_ms/msmlp/native.py
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/concurrent_evaluator.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/concurrent_evaluator.py
new file mode 100644
index 0000000000..ea53d4e667
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/concurrent_evaluator.py
@@ -0,0 +1,213 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+try:
+ from thop import profile
+except:
+ pass
+from src.common.constant import Config, CommonVars
+from src.common.structure import ModelAcquireData
+from src.eva_engine import evaluator_register
+from src.query_api.interface import SimulateScore
+from src.dataset_utils import dataset
+from torch.utils.data import DataLoader
+import torch
+import time
+from torch import nn
+from src.search_space.core.space import SpaceWrapper
+from multiprocessing import Manager
+import gc
+
+
+class ConcurrentP1Evaluator:
+
+ def __init__(self, device: str, num_label: int, dataset_name: str,
+ search_space_ins: SpaceWrapper,
+ train_loader: DataLoader, is_simulate: bool, metrics: str = CommonVars.ExpressFlow,
+ enable_cache: bool = False):
+ """
+ :param device:
+ :param num_label:
+ :param dataset_name:
+ :param search_space_ins:
+ :param search_space_ins:
+ :param train_loader:
+ :param is_simulate:
+ :param metrics: which TFMEM to use?
+ :param enable_cache: if cache embedding for scoring? only used on structued data
+ """
+ self.metrics = metrics
+ self.is_simulate = is_simulate
+
+ self.dataset_name = dataset_name
+
+ self.search_space_ins = search_space_ins
+
+ self.device = device
+ self.num_labels = num_label
+
+ self.score_getter = None
+
+ # get one mini batch
+ if not self.is_simulate:
+ if self.dataset_name in [Config.c10, Config.c100, Config.imgNet]:
+ # for img data
+ self.mini_batch, self.mini_batch_targets = dataset.get_mini_batch(
+ dataloader=train_loader,
+ sample_alg="random",
+ batch_size=32,
+ num_classes=self.num_labels)
+ self.mini_batch.to(self.device)
+ self.mini_batch_targets.to(self.device)
+ elif self.dataset_name in [Config.Criteo, Config.Frappe, Config.UCIDataset]:
+ # this is structure data
+ batch = iter(train_loader).__next__()
+ target = batch['y'].type(torch.LongTensor).to(self.device)
+ batch['id'] = batch['id'].to(self.device)
+ batch['value'] = batch['value'].to(self.device)
+ self.mini_batch = batch
+ self.mini_batch_targets = target.to(self.device)
+ else:
+ raise NotImplementedError
+
+ print("GC the large train data loader")
+ del train_loader
+ # Force garbage collection
+ gc.collect()
+
+ self.time_usage = {
+ "latency": 0.0,
+ "io_latency": 0.0,
+ "compute_latency": 0.0,
+ "track_compute": [], # compute time
+ "track_io_model_init": [], # init model weight
+ "track_io_model_load": [], # load into GPU/CPU
+ "track_io_data": [], # context switch
+ }
+
+ # this is to do the expeirment
+ self.enable_cache = enable_cache
+ if self.enable_cache:
+ # todo: warmup for concurrent usage. this is only test for MLP with embedding.
+ new_model = self.search_space_ins.new_arch_scratch_with_default_setting("8-8-8-8", bn=False)
+ new_model.init_embedding()
+ # shared embedding
+ manager = Manager()
+ self.model_cache = manager.dict()
+ self.model_cache["model"] = new_model.embedding
+ self.get_cache_data = self._get_cache_data_enabled
+ self.set_cache_data = self._set_cache_data_enabled
+ else:
+ # this is the baseline, independently run
+ self.get_cache_data = self._get_cache_data_disabled
+ self.set_cache_data = self._set_cache_data_disabled
+
+ def _get_cache_data_enabled(self):
+ return self.model_cache["model"]
+
+ def _set_cache_data_enabled(self, data):
+ self.model_cache["model"] = data
+
+ def _get_cache_data_disabled(self):
+ return None
+
+ def _set_cache_data_disabled(self, data):
+ pass
+
+ def if_cuda_avaiable(self):
+ if "cuda" in self.device:
+ return True
+ else:
+ return False
+
+ def p1_evaluate(self, data_str: str) -> dict:
+ """
+ :param data_str: encoded ModelAcquireData
+ :return:
+ """
+
+ model_acquire = ModelAcquireData.deserialize(data_str)
+ return self._p1_evaluate_online(model_acquire)
+
+ def _p1_evaluate_online(self, model_acquire: ModelAcquireData) -> dict:
+
+ model_encoding = model_acquire.model_encoding
+
+ # score using only one metrics
+ if self.metrics == CommonVars.PRUNE_SYNFLOW or self.metrics == CommonVars.ExpressFlow:
+ bn = False
+ else:
+ bn = True
+
+ # measure model load time
+ begin = time.time()
+ new_model = self.search_space_ins.new_arch_scratch_with_default_setting(model_encoding, bn=bn)
+
+ # mlp have embedding layer, which can be cached, optimization!
+ if self.search_space_ins.name == Config.MLPSP:
+ if self.enable_cache:
+ new_model.init_embedding(self.get_cache_data())
+ if self.get_cache_data() is None:
+ self.set_cache_data(new_model.embedding.to(self.device))
+ else:
+ new_model.init_embedding()
+
+ self.time_usage["track_io_model_init"].append(time.time() - begin)
+
+ begin = time.time()
+ new_model = new_model.to(self.device)
+
+ self.time_usage["track_io_model_load"].append(time.time() - begin)
+
+ # measure data load time
+ begin = time.time()
+ mini_batch = self.data_pre_processing(self.metrics, new_model)
+ self.time_usage["track_io_data"].append(time.time() - begin)
+
+ _score, curr_time = evaluator_register[self.metrics].evaluate_wrapper(
+ arch=new_model,
+ device=self.device,
+ space_name=self.search_space_ins.name,
+ batch_data=mini_batch,
+ batch_labels=self.mini_batch_targets)
+
+ self.time_usage["track_compute"].append(curr_time)
+
+ del new_model
+ model_score = {self.metrics: _score}
+ return model_score
+
+ def data_pre_processing(self, metrics: str, new_model: nn.Module):
+ """
+ To measure the io/compute time more acccuretely, we pick the data pre_processing here.
+ """
+
+ # for those two metrics, we use all one embedding for efficiency (as in their paper)
+ if metrics in [CommonVars.ExpressFlow, CommonVars.PRUNE_SYNFLOW]:
+ if isinstance(self.mini_batch, torch.Tensor):
+ feature_dim = list(self.mini_batch[0, :].shape)
+ # add one dimension to feature dim, [1] + [3, 32, 32] = [1, 3, 32, 32]
+ mini_batch = torch.ones([1] + feature_dim).float().to(self.device)
+ else:
+ # this is for the tabular data,
+ mini_batch = new_model.generate_all_ones_embedding().float().to(self.device)
+ else:
+ mini_batch = self.mini_batch
+
+ return mini_batch
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/evaluator.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/evaluator.py
new file mode 100644
index 0000000000..3255d69bcc
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/evaluator.py
@@ -0,0 +1,526 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+# this is for checking the flops and params
+try:
+ from thop import profile
+except:
+ pass
+from src.common.constant import Config, CommonVars
+from src.common.structure import ModelAcquireData
+from src.eva_engine import evaluator_register
+from src.query_api.interface import SimulateScore
+from src.dataset_utils import dataset
+from torch.utils.data import DataLoader
+import torch
+import time
+from torch import nn
+from src.search_space.core.space import SpaceWrapper
+import psycopg2
+from typing import Any, List, Dict, Tuple
+from src.logger import logger
+
+
+class P1Evaluator:
+
+ def __init__(self, device: str, num_label: int, dataset_name: str,
+ search_space_ins: SpaceWrapper,
+ train_loader: DataLoader, is_simulate: bool, metrics: str = CommonVars.ExpressFlow,
+ enable_cache: bool = False, db_config: Dict = None,
+ data_retrievel: str = "sql"):
+ """
+ :param device:
+ :param num_label:
+ :param dataset_name:
+ :param search_space_ins:
+ :param search_space_ins:
+ :param train_loader:
+ :param is_simulate:
+ :param metrics: which TFMEM to use?
+ :param enable_cache: if cache embedding for scoring? only used on structued data
+ :param db_config: how to connect to databaes
+ :param data_retrievel: sql or spi
+ """
+ self.metrics = metrics
+ self.is_simulate = is_simulate
+ # used only is_simulate = True
+ self.score_getter = None
+
+ # dataset settings
+ self.dataset_name = dataset_name
+ self.train_loader = train_loader
+ self.num_labels = num_label
+
+ self.search_space_ins = search_space_ins
+
+ self.device = device
+
+ # this is to do the expeirment
+ self.enable_cache = enable_cache
+ self.model_cache = None
+
+ # performance records
+ self.time_usage = {
+ "model_id": [],
+
+ "latency": 0.0,
+ "io_latency": 0.0,
+ "compute_latency": 0.0,
+
+ "track_compute": [], # compute time
+ "track_io_model_init": [], # init model weight
+ "track_io_model_load": [], # load model into GPU/CPU
+ "track_io_res_load": [], # load result into GPU/CPU
+ "track_io_data_retrievel": [], # release data
+ "track_io_data_preprocess": [], # pre-processing
+ }
+
+ self.db_config = db_config
+ self.last_id = -1
+ self.data_retrievel = data_retrievel
+
+ # at the benchmarking, we only use one batch for fast evaluate
+ self.cached_mini_batch = None
+ self.cached_mini_batch_target = None
+
+ self.conn = None
+
+ def if_cuda_avaiable(self):
+ if "cuda" in self.device:
+ return True
+ else:
+ return False
+
+ def p1_evaluate(self, data_str: dict) -> dict:
+ """
+ :param data_str: encoded ModelAcquireData
+ :return:
+ """
+
+ model_acquire = ModelAcquireData.deserialize(data_str)
+
+ if self.is_simulate:
+ if self.metrics == "jacflow":
+ return self._p1_evaluate_simu_jacflow(model_acquire)
+ else:
+ return self._p1_evaluate_simu(model_acquire)
+ else:
+ return self._p1_evaluate_online(model_acquire)
+
+ def measure_model_flops(self, data_str: dict, batch_size: int, channel_size: int):
+ # todo: check the package
+ mini_batch, mini_batch_targets, _ = self.retrievel_data(None)
+ model_acquire = ModelAcquireData.deserialize(data_str)
+ model_encoding = model_acquire.model_encoding
+ new_model = self.search_space_ins.new_arch_scratch_with_default_setting(model_encoding, bn=True)
+ if self.search_space_ins.name == Config.MLPSP:
+ new_model.init_embedding(requires_grad=True)
+ new_model = new_model.to(self.device)
+ flops, params = profile(new_model, inputs=(mini_batch,))
+ print('FLOPs = ' + str(flops / 1000 ** 3) + 'G')
+ print('Params = ' + str(params / 1000 ** 2) + 'M')
+
+ # # 1. Score NasWot
+ # new_model = self.search_space_ins.new_arch_scratch_with_default_setting(model_encoding, bn=True)
+ # new_model = new_model.to(self.device)
+ # naswot_score, _ = evaluator_register[CommonVars.NAS_WOT].evaluate_wrapper(
+ # arch=new_model,
+ # device=self.device,
+ # space_name = self.search_space_ins.name,
+ # batch_data=self.mini_batch,
+ # batch_labels=self.mini_batch_targets)
+ #
+ # # 2. Score SynFlow
+ # new_model = self.search_space_ins.new_arch_scratch_with_default_setting(model_encoding, bn=False)
+ # new_model = new_model.to(self.device)
+ # synflow_score, _ = evaluator_register[CommonVars.PRUNE_SYNFLOW].evaluate_wrapper(
+ # arch=new_model,
+ # device=self.device,
+ # space_name = self.search_space_ins.name,
+ # batch_data=self.mini_batch,
+ # batch_labels=self.mini_batch_targets)
+ #
+ # # 3. combine the result and return
+ # model_score = {CommonVars.NAS_WOT: naswot_score,
+ # CommonVars.PRUNE_SYNFLOW: synflow_score}
+
+ def _p1_evaluate_online(self, model_acquire: ModelAcquireData) -> dict:
+
+ model_encoding = model_acquire.model_encoding
+
+ # 1. Get a batch of data
+ mini_batch, mini_batch_targets, data_load_time_usage, data_pre_process_time = self.retrievel_data(model_acquire)
+ # logger.info(
+ # f"mini_batch sizes - id: {mini_batch['id'].size()}, value: {mini_batch['value'].size()},
+ # targets: {mini_batch_targets.size()}")
+ # print(
+ # f"mini_batch sizes - id: {mini_batch['id'].size()}, value: {mini_batch['value'].size()},
+ # targets: {mini_batch_targets.size()}")
+ self.time_usage["track_io_data_retrievel"].append(data_load_time_usage)
+
+ # 2. Score all tfmem
+ if self.metrics == CommonVars.ALL_EVALUATOR:
+ model_score = {}
+ for alg, score_evaluator in evaluator_register.items():
+ if alg == CommonVars.PRUNE_SYNFLOW or alg == CommonVars.ExpressFlow:
+ bn = False
+ else:
+ bn = True
+ new_model = self.search_space_ins.new_arch_scratch_with_default_setting(model_encoding, bn=bn)
+ if self.search_space_ins.name == Config.MLPSP:
+ new_model.init_embedding()
+ new_model = new_model.to(self.device)
+
+ mini_batch = self.data_pre_processing(mini_batch, self.metrics, new_model)
+
+ _score, _ = score_evaluator.evaluate_wrapper(
+ arch=new_model,
+ device=self.device,
+ space_name=self.search_space_ins.name,
+ batch_data=mini_batch,
+ batch_labels=mini_batch_targets)
+
+ _score = _score.item()
+ model_score[alg] = abs(_score)
+
+ # clear the cache
+ if "cuda" in self.device:
+ torch.cuda.empty_cache()
+
+ elif self.metrics == CommonVars.JACFLOW:
+ begin = time.time()
+ new_model = self.search_space_ins.new_arch_scratch_with_default_setting(model_encoding, bn=False)
+ if self.search_space_ins.name == Config.MLPSP:
+ if self.enable_cache:
+ new_model.init_embedding(self.model_cache)
+ if self.model_cache is None:
+ self.model_cache = new_model.embedding.to(self.device)
+ else:
+ # init embedding every time created a new model
+ new_model.init_embedding()
+ time_usage = time.time() - begin
+ self.time_usage["track_io_model_init"].append(time_usage)
+ print("Model Init", self.enable_cache, time_usage)
+
+ if self.if_cuda_avaiable():
+ begin = time.time()
+ new_model = new_model.to(self.device)
+ torch.cuda.synchronize()
+ self.time_usage["track_io_model_load"].append(time.time() - begin)
+ else:
+ self.time_usage["track_io_model_load"].append(0)
+
+ # measure data load time
+ begin = time.time()
+ all_one_mini_batch = self.data_pre_processing(mini_batch, CommonVars.PRUNE_SYNFLOW, new_model)
+ self.time_usage["track_io_data_preprocess"].append(data_pre_process_time + time.time() - begin)
+ if self.search_space_ins.name == Config.MLPSP:
+ print("compute with done", all_one_mini_batch.size(), mini_batch["id"].size(), mini_batch["value"].size())
+ logger.info(
+ f"mini_batch sizes - {all_one_mini_batch.size()} "
+ f"id: {mini_batch['id'].size()}, value: {mini_batch['value'].size()},"
+ f"targets: {mini_batch_targets.size()}")
+
+ _score_1, compute_time1 = evaluator_register[CommonVars.PRUNE_SYNFLOW].evaluate_wrapper(
+ arch=new_model,
+ device=self.device,
+ space_name=self.search_space_ins.name,
+ batch_data=all_one_mini_batch,
+ batch_labels=mini_batch_targets)
+
+ _score_2, compute_time2 = evaluator_register[CommonVars.NAS_WOT].evaluate_wrapper(
+ arch=new_model,
+ device=self.device,
+ space_name=self.search_space_ins.name,
+ batch_data=mini_batch,
+ batch_labels=mini_batch_targets)
+ print(compute_time1, compute_time2)
+ logger.info(f"{compute_time1}, {compute_time2}")
+
+ self.time_usage["track_compute"].append(compute_time1 + compute_time2)
+ self.time_usage["model_id"].append(model_encoding)
+
+ if self.if_cuda_avaiable():
+ begin = time.time()
+ _score = _score_1.item() + _score_2
+ torch.cuda.synchronize()
+ self.time_usage["track_io_res_load"].append(time.time() - begin)
+ else:
+ _score = _score_1.item() + _score_2
+ self.time_usage["track_io_res_load"].append(0)
+
+ model_score = {self.metrics: float(abs(_score))}
+ del new_model
+ # 2. score using only one metrics
+ else:
+ if self.metrics == CommonVars.PRUNE_SYNFLOW or self.metrics == CommonVars.ExpressFlow:
+ bn = False
+ else:
+ bn = True
+ # measure model load time
+ begin = time.time()
+ new_model = self.search_space_ins.new_arch_scratch_with_default_setting(model_encoding, bn=bn)
+
+ # # mlp have embedding layer, which can be cached, optimization!
+ # if self.search_space_ins.name == Config.MLPSP:
+ # if self.enable_cache:
+ # new_model.init_embedding(self.model_cache)
+ # if self.model_cache is None:
+ # self.model_cache = new_model.embedding.to(self.device)
+ # else:
+ # # init embedding every time created a new model
+ # new_model.init_embedding()
+
+ self.time_usage["track_io_model_init"].append(time.time() - begin)
+
+ if self.if_cuda_avaiable():
+ begin = time.time()
+ new_model = new_model.to(self.device)
+ torch.cuda.synchronize()
+ self.time_usage["track_io_model_load"].append(time.time() - begin)
+ else:
+ self.time_usage["track_io_model_load"].append(0)
+
+ # measure data load time
+ begin = time.time()
+ mini_batch = self.data_pre_processing(mini_batch, self.metrics, new_model)
+ self.time_usage["track_io_data_preprocess"].append(data_pre_process_time + time.time() - begin)
+
+ _score, compute_time = evaluator_register[self.metrics].evaluate_wrapper(
+ arch=new_model,
+ device=self.device,
+ space_name=self.search_space_ins.name,
+ batch_data=mini_batch,
+ batch_labels=mini_batch_targets)
+
+ self.time_usage["track_compute"].append(compute_time)
+
+ if self.if_cuda_avaiable():
+ begin = time.time()
+ _score = _score.item()
+ torch.cuda.synchronize()
+ self.time_usage["track_io_res_load"].append(time.time() - begin)
+
+ else:
+ _score = _score.item()
+ self.time_usage["track_io_res_load"].append(0)
+
+ model_score = {self.metrics: abs(_score)}
+ del new_model
+ return model_score
+
+ def _p1_evaluate_simu_jacflow(self, model_acquire: ModelAcquireData) -> dict:
+ """
+ This involves get rank, and get jacflow
+ """
+ if self.score_getter is None:
+ self.score_getter = SimulateScore(space_name=self.search_space_ins.name,
+ dataset_name=self.dataset_name)
+
+ model_score = self.score_getter.query_tfmem_rank_score(arch_id=model_acquire.model_id)
+
+ return model_score
+
+ def _p1_evaluate_simu(self, model_acquire: ModelAcquireData) -> dict:
+ """
+ This involves simulate get alls core,
+ """
+ if self.score_getter is None:
+ self.score_getter = SimulateScore(space_name=self.search_space_ins.name,
+ dataset_name=self.dataset_name)
+
+ score = self.score_getter.query_all_tfmem_score(arch_id=model_acquire.model_id)
+ model_score = {self.metrics: abs(float(score[self.metrics]))}
+ return model_score
+
+ def retrievel_data(self, model_acquire):
+ if not self.is_simulate:
+ if self.dataset_name in [Config.c10, Config.c100, Config.imgNet, Config.imgNetFull]:
+ if self.train_loader is None:
+ raise f"self.train_loader is None for {self.dataset_name}"
+ # for img data
+ begin = time.time()
+ mini_batch, mini_batch_targets = dataset.get_mini_batch(
+ dataloader=self.train_loader,
+ sample_alg="random",
+ batch_size=model_acquire.batch_size,
+ num_classes=self.num_labels)
+ mini_batch.to(self.device)
+ mini_batch_targets.to(self.device)
+ # wait for moving data to GPU
+ if self.if_cuda_avaiable():
+ torch.cuda.synchronize()
+ time_usage = time.time() - begin
+ # todo: here is inaccurate
+ return mini_batch, mini_batch_targets, time_usage, 0
+ elif self.dataset_name in [Config.Criteo, Config.Frappe, Config.UCIDataset]:
+ if self.train_loader is None:
+ if self.data_retrievel == "sql":
+ batch, time_usage = self._retrievel_from_db_sql(model_acquire.batch_size)
+ data_tensor, y_tensor, process_time = self.sql_batch_data_pre_processing(batch)
+ return data_tensor, y_tensor, time_usage, process_time
+ elif self.data_retrievel == "spi":
+ batch, time_usage = self._retrievel_from_db_spi(model_acquire)
+ # pre-processing
+ begin = time.time()
+ id_tensor = torch.LongTensor(batch[:, 1::2]).to(self.device)
+ value_tensor = torch.FloatTensor(batch[:, 2::2]).to(self.device)
+ y_tensor = torch.FloatTensor(batch[:, 0:1]).to(self.device)
+ data_tensor = {'id': id_tensor, 'value': value_tensor, 'y': y_tensor}
+ logger.info(id_tensor.size())
+ return data_tensor, y_tensor, time_usage + time.time() - begin, 0
+ else:
+ if self.cached_mini_batch is None and self.cached_mini_batch_target is None:
+ # this is structure data
+ begin = time.time()
+ batch = iter(self.train_loader).__next__()
+ target = batch['y'].type(torch.LongTensor).to(self.device)
+ batch['id'] = batch['id'].to(self.device)
+ batch['value'] = batch['value'].to(self.device)
+
+ # wait for moving data to GPU
+ if self.if_cuda_avaiable():
+ torch.cuda.synchronize()
+ time_usage = time.time() - begin
+ self.cached_mini_batch = batch
+ self.cached_mini_batch_target = target
+ return batch, target, time_usage, 0
+ else:
+ return self.cached_mini_batch, self.cached_mini_batch_target, 0, 0
+ else:
+ # here is to test the expressflow
+ # todo: debut, this is to debut, mannuly tune it
+ y_tensor = torch.rand(1)
+ dimensions = 2000
+ data_tensor = {'id': torch.rand([1, dimensions]), 'value': torch.rand([1, dimensions]), 'y': y_tensor}
+ return data_tensor, y_tensor, 0, 0
+
+ def connect_to_db(self):
+ try:
+ self.conn = psycopg2.connect(
+ dbname=self.db_config["db_name"],
+ user=self.db_config["db_user"],
+ host=self.db_config["db_host"],
+ port=self.db_config["db_port"]
+ )
+ except Exception as e:
+ print(f"Error connecting to the database: {e}")
+
+ def _retrievel_from_db_sql(self, batch_size):
+
+ begin_time = time.time()
+ if self.conn is None or self.conn.closed:
+ # If the connection is not established or was closed, reconnect.
+ self.connect_to_db()
+
+ # fetch and preprocess data from PostgreSQL
+ cur = self.conn.cursor()
+
+ cur.execute(f"SELECT * FROM {self.dataset_name}_train WHERE id > {self.last_id} LIMIT {batch_size};")
+ rows = cur.fetchall()
+
+ if self.last_id <= 80000:
+ # Update last_id with max id of fetched rows
+ self.last_id = max(row[0] for row in rows) # assuming 'id' is at index 0
+ else:
+ # If no more new rows, reset last_id to start over scan and return 'end_position'
+ self.last_id = 0
+
+ # block until a free slot is available
+ time_usage = time.time() - begin_time
+ return rows, time_usage
+
+ def _retrievel_from_db_spi(self, model_acquire):
+ batch = model_acquire.spi_mini_batch
+ data_retrieval_time_usage = model_acquire.spi_seconds
+ return batch, data_retrieval_time_usage
+
+ def data_pre_processing(self, mini_batch, metrics: str, new_model: nn.Module):
+
+ # for those two metrics, we use all one embedding for efficiency (as in their paper)
+ if metrics in [CommonVars.ExpressFlow, CommonVars.PRUNE_SYNFLOW]:
+ if isinstance(mini_batch, torch.Tensor):
+ feature_dim = list(mini_batch[0, :].shape)
+ # add one dimension to feature dim, [1] + [3, 32, 32] = [1, 3, 32, 32]
+ mini_batch = torch.ones([1] + feature_dim).float().to(self.device)
+ else:
+ # this is for the tabular data,
+ mini_batch = new_model.generate_all_ones_embedding().float().to(self.device)
+ # print(mini_batch.size())
+ else:
+ # for others, skip preprocessing
+ pass
+
+ # wait for moving data to GPU
+ if self.if_cuda_avaiable():
+ torch.cuda.synchronize()
+ return mini_batch
+
+ def sql_batch_data_pre_processing(self, queryed_rows: List[Tuple]):
+ """
+ mini_batch_data: [('0', '0', '123:123', '123:123', '123:123',)
+ """
+
+ # def decode_libsvm(columns):
+ # # Decode without additional mapping or zipping, directly processing the splits.
+ # ids = []
+ # values = []
+ # for col in columns[2:]:
+ # id, value = col.split(':')
+ # ids.append(int(id))
+ # values.append(float(value))
+ # return {'id': ids, 'value': values, 'y': int(columns[1])}
+
+ def decode_libsvm(columns):
+ map_func = lambda pair: (int(pair[0]), float(pair[1]))
+ # 0 is id, 1 is label
+ id, value = zip(*map(lambda col: map_func(col.split(':')), columns[2:]))
+ sample = {'id': list(id),
+ 'value': list(value),
+ 'y': int(columns[1])}
+ return sample
+
+ def pre_processing(mini_batch_data: List[Tuple]):
+ """
+ mini_batch_data: [('0', '0', '123:123', '123:123', '123:123',)
+ """
+ sample_lines = len(mini_batch_data)
+ feat_id = []
+ feat_value = []
+ y = []
+
+ for i in range(sample_lines):
+ row_value = mini_batch_data[i]
+ sample = decode_libsvm(list(row_value))
+ feat_id.append(sample['id'])
+ feat_value.append(sample['value'])
+ y.append(sample['y'])
+ return {'id': feat_id, 'value': feat_value, 'y': y}
+
+ begin = time.time()
+ batch = pre_processing(queryed_rows)
+ id_tensor = torch.LongTensor(batch['id']).to(self.device)
+ value_tensor = torch.FloatTensor(batch['value']).to(self.device)
+ y_tensor = torch.FloatTensor(batch['y']).to(self.device)
+ data_tensor = {'id': id_tensor, 'value': value_tensor, 'y': y_tensor}
+ # wait for moving data to GPU
+ if self.if_cuda_avaiable():
+ torch.cuda.synchronize()
+ duration = time.time() - begin
+ return data_tensor, y_tensor, duration
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/run_phase1.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/run_phase1.py
new file mode 100644
index 0000000000..3f81101e4d
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase1/run_phase1.py
@@ -0,0 +1,169 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import json
+
+from src.common.structure import ModelAcquireData, ModelEvaData
+from src.controller.controler import SampleController
+from src.controller.sampler_all.seq_sampler import SequenceSampler
+
+from src.eva_engine.phase1.evaluator import P1Evaluator
+from src.logger import logger
+from src.query_api.img_explore_ea import fetch_from_db
+from torch.utils.data import DataLoader
+from src.controller.sampler_ea.regularized_ea import RegularizedEASampler
+from src.search_space.core.space import SpaceWrapper
+from src.common.constant import Config
+
+
+# this is for image only
+def p1_evaluate_query(space_name, dataset, run_id, N, K) -> (list, float):
+ """
+ :param space_name:
+ :param dataset:
+ :param run_id:
+ :param N:
+ :param K:
+ :return: return list of models and time usage.
+ """
+ arch_id, candidates, current_time = fetch_from_db(space_name, dataset, run_id, N)
+ return candidates[-K:], current_time
+
+
+class RunPhase1:
+
+ def __init__(self, args, K: int, N: int, search_space_ins: SpaceWrapper,
+ train_loader: DataLoader = None, is_simulate: bool = False):
+ """
+ Each model selection job will init one class here.
+ :param args: space, population_size, sample_size
+ :param K: K models return in 1st phase
+ :param N: N models eval in total
+ :param search_space_ins:
+ """
+
+ # return K models
+ self.K = K
+ # explore N models
+ self.N = N
+
+ self.args = args
+
+ self.search_space_ins = search_space_ins
+
+ # seq: init the search strategy and controller,
+ if self.search_space_ins.name == Config.MLPSP and self.N >= min(len(self.search_space_ins), 100000):
+ print("Explore all models")
+ strategy = SequenceSampler(self.search_space_ins)
+ elif self.search_space_ins.name != Config.MLPSP and self.N >= min(len(self.search_space_ins), 8000):
+ print("Explore all models")
+ strategy = SequenceSampler(self.search_space_ins)
+ else:
+ strategy = RegularizedEASampler(self.search_space_ins,
+ population_size=self.args.population_size,
+ sample_size=self.args.sample_size)
+ self.sampler = SampleController(strategy)
+
+ # generate db config
+ db_config = {
+ "db_name": self.args.db_name,
+ "db_user": self.args.db_user,
+ "db_host": self.args.db_host,
+ "db_port": self.args.db_port,
+ }
+
+ # seq: init the phase 1 evaluator,
+ self._evaluator = P1Evaluator(device=self.args.device,
+ num_label=self.args.num_labels,
+ dataset_name=self.args.dataset,
+ search_space_ins=self.search_space_ins,
+ train_loader=train_loader,
+ is_simulate=is_simulate,
+ metrics=self.args.tfmem,
+ db_config=db_config)
+
+ def run_phase1(self) -> (list, list, list, list):
+ """
+ Controller explore n models, and return the top K models.
+ :return:
+ """
+
+ # those two are used to track performance trace
+ # current best model id
+ trace_highest_scored_models_id = []
+ # current highest score
+ trace_highest_score = []
+ explored_n = 1
+ model_eva = ModelEvaData()
+
+ while explored_n <= self.N:
+ # generate new model
+ arch_id, arch_micro = self.sampler.sample_next_arch()
+ # this is for sequence sampler.
+ if arch_id is None:
+ break
+ model_encoding = self.search_space_ins.serialize_model_encoding(arch_micro)
+
+ explored_n += 1
+
+ # run the model selection
+ model_acquire_data = ModelAcquireData(model_id=str(arch_id),
+ model_encoding=model_encoding,
+ is_last=False)
+ data_str = model_acquire_data.serialize_model()
+
+ # update the shared model eval res
+ try:
+ model_eva.model_id = str(arch_id)
+ model_eva.model_score = self._evaluator.p1_evaluate(data_str)
+ except KeyError as e:
+ # when it is simulate, it could be keyerror, since some arch is not scored yet
+ continue
+
+ if explored_n % 100 == 0:
+ logger.info("3. [trails] Phase 1: filter phase explored " + str(explored_n) +
+ " model, model_id = " + model_eva.model_id +
+ " model_scores = " + json.dumps(model_eva.model_score))
+
+ print("3. [trails] Phase 1: filter phase explored " + str(explored_n) +
+ " model, model_id = " + model_eva.model_id +
+ " model_scores = " + json.dumps(model_eva.model_score))
+
+ ranked_score = self.sampler.fit_sampler(model_eva.model_id,
+ model_eva.model_score,
+ simple_score_sum=self.args.simple_score_sum)
+
+ # this is to measure the value of metrix, sum of two value.
+ if len(trace_highest_score) == 0:
+ trace_highest_score.append(ranked_score)
+ trace_highest_scored_models_id.append(str(arch_id))
+ else:
+ if ranked_score > trace_highest_score[-1]:
+ trace_highest_score.append(ranked_score)
+ trace_highest_scored_models_id.append(str(arch_id))
+ else:
+ trace_highest_score.append(trace_highest_score[-1])
+ trace_highest_scored_models_id.append(trace_highest_scored_models_id[-1])
+
+ logger.info("3. [trails] Phase 1: filter phase explored " + str(explored_n) +
+ " model, model_id = " + model_eva.model_id +
+ " model_scores = " + json.dumps(model_eva.model_score))
+ # return the top K models
+ return self.sampler.get_current_top_k_models(self.K), self.sampler.get_current_top_k_models(-1), \
+ trace_highest_score, trace_highest_scored_models_id
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/__init__.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/__init__.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/__init__.py
index 01d7057208..ea94ffe9d7 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/third_pkg/__init__.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/__init__.py
@@ -16,3 +16,5 @@
# limitations under the License.
#
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/algo/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/algo/__init__.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/algo/__init__.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/algo/__init__.py
index 3df60b02f7..52a3e50080 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/algo/__init__.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/algo/__init__.py
@@ -14,4 +14,4 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-#
+#
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/algo/trainer.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/algo/trainer.py
similarity index 61%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/algo/trainer.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/algo/trainer.py
index 259ebea6d4..4ea5038051 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/algo/trainer.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/algo/trainer.py
@@ -16,8 +16,10 @@
# limitations under the License.
#
-import time
-
+import torch
+import torch.nn as nn
+from torch import optim
+from torch.utils.data import DataLoader
from src.tools import utils
from singa import singa_wrap as singa
@@ -32,12 +34,14 @@
import time
import argparse
from PIL import Image
+import json
np_dtype = {"float16": np.float16, "float32": np.float32}
# singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
singa_dtype = {"float32": tensor.float32}
+
### MSOptimizer
class MSOptimizer(Optimizer):
def __call__(self, loss):
@@ -64,6 +68,8 @@ def call_with_returns(self, loss):
# print ("call_with_returns after apply loss.data: \n", loss.data)
return pn_p_g_list
+
+# MSSGD -- actually no change of code
class MSSGD(MSOptimizer):
"""Implements stochastic gradient descent (optionally with momentum).
@@ -235,14 +241,15 @@ def set_states(self, states):
self.moments = states['moments']
self.mom_value = self.momentum(self.step_counter)
+
# Data augmentation
def augmentation(x, batch_size):
xpad = np.pad(x, [[0, 0], [0, 0], [4, 4], [4, 4]], 'symmetric')
for data_num in range(0, batch_size):
offset = np.random.randint(8, size=2)
x[data_num, :, :, :] = xpad[data_num, :,
- offset[0]:offset[0] + x.shape[2],
- offset[1]:offset[1] + x.shape[2]]
+ offset[0]:offset[0] + x.shape[2],
+ offset[1]:offset[1] + x.shape[2]]
if_flip = np.random.randint(2)
if (if_flip):
x[data_num, :, :, :] = x[data_num, :, :, ::-1]
@@ -296,10 +303,10 @@ def resize_dataset(x, image_size):
for d in range(0, dim):
X[n, d, :, :] = np.array(Image.fromarray(x[n, d, :, :]).resize(
(image_size, image_size), Image.BILINEAR),
- dtype=np.float32)
+ dtype=np.float32)
return X
-from torch.utils.data import DataLoader
+
class ModelTrainer:
@classmethod
@@ -328,9 +335,9 @@ def fully_train_arch(cls,
if logger is None:
from src.logger import logger
logger = logger
-
+ logger.info(f'begin to train, batch size = {args.batch_size}')
start_time, best_valid_auc = time.time(), 0.
-
+
num_labels = args.num_labels
lr = args.lr
iter_per_epoch = args.iter_per_epoch
@@ -341,20 +348,33 @@ def fully_train_arch(cls,
args.epoch_num = epoch_num
# for multiple classification
+ # opt_metric = nn.CrossEntropyLoss(reduction='mean').to(device)
+ # this is only sutiable when output is dimension 1,
+ # opt_metric = nn.BCEWithLogitsLoss(reduction='mean').to(device)
# optimizer
+ # optimizer = optim.Adam(model.parameters(), lr=lr)
+ # scheduler = optim.lr_scheduler.CosineAnnealingLR(
+ # optimizer,
+ # T_max=epoch_num, # Maximum number of iterations.
+ # eta_min=1e-4) # Minimum learning rate.
precision = 'float32'
mssgd = MSSGD(lr=args.lr, momentum=0.9, weight_decay=1e-4, dtype=singa_dtype[precision])
device_id = 0
max_epoch = epoch_num
+ # model = arch
graph = True
verbosity = 0
- dist_option='plain'
- spars=None
+ dist_option = 'plain'
+ spars = None
global_rank = 0
world_size = 1
+ # gradient clipping, set the gradient value to be -1 - 1
+ # for p in model.parameters():
+ # p.register_hook(lambda grad: torch.clamp(grad, -1., 1.))
# training params
+ # device = args.device
if args.device == 'cpu':
dev = singa_device.get_default_device()
else: # GPU
@@ -377,11 +397,49 @@ def fully_train_arch(cls,
tx = tensor.Tensor((args.batch_size, args.nfeat), dev, singa_dtype[precision])
ty = tensor.Tensor((args.batch_size,), dev, tensor.int32)
### singa data
-
+
model.set_optimizer(mssgd)
model.compile([tx], is_train=True, use_graph=graph, sequential=sequential)
dev.SetVerbosity(verbosity)
+ # synflow_flag = False ### just change the model to the absolute value
+ # for epoch in range(epoch_num):
+ # logger.info(f'Epoch [{epoch:3d}/{epoch_num:3d}]')
+ # train and eval
+ # print("begin to train...")
+ # logger.info(f"Begin to train.....")
+ # train_auc, train_loss = ModelTrainer.run(logger,
+ # epoch, iter_per_epoch, model, train_loader, opt_metric, args,
+ # optimizer=optimizer, namespace='train')
+ # scheduler.step()
+ # logger.info(f"Begin to evaluate on valid.....")
+ # print("begin to evaluate...")
+ # valid_auc, valid_loss = ModelTrainer.run(logger,
+ # epoch, iter_per_epoch, model, val_loader,
+ # opt_metric, args, namespace='val')
+
+ # if use_test_acc:
+ # logger.info(f"Begin to evaluate on test.....")
+ # test_auc, test_loss = ModelTrainer.run(logger,
+ # epoch, iter_per_epoch, model, test_loader,
+ # opt_metric, args, namespace='test')
+ # else:
+ # test_auc = -1
+
+ # info_dic[epoch] = {
+ # "train_auc": train_auc,
+ # "valid_auc": valid_auc,
+ # "train_loss": train_loss,
+ # "valid_loss": valid_loss,
+ # "train_val_total_time": time.time() - start_time}
+
+ # record best auc and save checkpoint
+ # if valid_auc >= best_valid_auc:
+ # best_valid_auc, best_test_auc = valid_auc, test_auc
+ # logger.info(f'best valid auc: valid {valid_auc:.4f}, test {test_auc:.4f}')
+ # else:
+ # logger.info(f'valid {valid_auc:.4f}, test {test_auc:.4f}')
+
# Training and evaluation loop
for epoch in range(max_epoch):
start_time = time.time()
@@ -390,6 +448,7 @@ def fully_train_arch(cls,
if global_rank == 0:
print('Starting Epoch %d:' % (epoch))
+ logger.info('Starting Epoch %d:' % (epoch))
# Training phase
train_correct = np.zeros(shape=[1], dtype=np.float32)
@@ -401,9 +460,10 @@ def fully_train_arch(cls,
# print ()
batch_idx = 0
# for b in range(num_train_batch):
- for batch_idx, batch in enumerate(train_loader):
+ for batch_idx, batch in enumerate(train_loader, start=1):
if batch_idx % 50 == 0:
- print ("trainer.py train batch_idx: \n", batch_idx)
+ print("trainer.py train batch_idx: \n", batch_idx)
+ logger.info("trainer.py train batch_idx: \n", batch_idx)
# Generate the batch data in this iteration
# x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
# if model.dimension == 4:
@@ -440,7 +500,7 @@ def fully_train_arch(cls,
# print ("x.astype(np.float32): \n", x.astype(np.float32))
# print ("y: \n", y)
tx = tensor.Tensor(x.shape, dev, singa_dtype[precision])
- ty = tensor.Tensor((y.shape[0],), dev, tensor.int32)
+ ty = tensor.Tensor((y.shape[0],), dev, tensor.int32)
tx.copy_from_numpy(x) # dtype=np.float32
# print ("tx: \n", tx)
ty.copy_from_numpy(y)
@@ -468,10 +528,16 @@ def fully_train_arch(cls,
if global_rank == 0:
print('Training loss = %f, training accuracy = %f' %
- (train_loss, train_correct /
- (batch_idx * args.batch_size * world_size)),
- flush=True)
- print ("train total batch_idx: ", batch_idx)
+ (train_loss, train_correct /
+ (batch_idx * args.batch_size * world_size)),
+ flush=True)
+ print("train total batch_idx: ", batch_idx)
+
+ logger.info('Training loss = %f, training accuracy = %f' %
+ (train_loss, train_correct /
+ (batch_idx * args.batch_size * world_size)))
+
+ logger.info("train total batch_idx: ", batch_idx)
train_metric = train_correct / (batch_idx * args.batch_size * world_size)
# Evaluation phase
@@ -479,7 +545,7 @@ def fully_train_arch(cls,
batch_idx = 0
# for b in range(num_val_batch):
# print ("evaluation begins")
- for batch_idx, batch in enumerate(test_loader):
+ for batch_idx, batch in enumerate(test_loader, start=1):
# print ("trainer.py test batch_idx: \n", batch_idx)
# x = val_x[b * batch_size:(b + 1) * batch_size]
# if model.dimension == 4:
@@ -532,13 +598,16 @@ def fully_train_arch(cls,
# Output the evaluation accuracy
if global_rank == 0:
print('Evaluation accuracy = %f, Elapsed Time = %fs' %
- (test_correct / (batch_idx * args.batch_size * 8 * world_size),
- time.time() - start_time),
- flush=True)
+ (test_correct / (batch_idx * args.batch_size * 8 * world_size),
+ time.time() - start_time),
+ flush=True)
+
+ logger.info('Evaluation accuracy = %f, Elapsed Time = %fs' %
+ (test_correct / (batch_idx * args.batch_size * 8 * world_size),
+ time.time() - start_time))
# print ("test all batch_idx: ", batch_idx)
test_metric = test_correct / (batch_idx * args.batch_size * 8 * world_size)
-
info_dic[epoch] = {
"train_metric": str(train_metric[0]),
"test_metric": str(test_metric[0]),
@@ -549,5 +618,237 @@ def fully_train_arch(cls,
dev.PrintTimeProfiling()
# return valid_auc, time.time() - start_time, info_dic
- print ("info_dic: ", info_dic)
+ print("info_dic: ", info_dic)
+ logger.info("info_dic: ", info_dic)
+
+ logger.info(json.dumps(info_dic))
+
+ test_metric = train_metric
return test_metric, time.time() - start_time, info_dic
+
+ @classmethod
+ def fully_train_arch_origin(cls,
+ model: nn.Module,
+ use_test_acc: bool,
+ epoch_num,
+ train_loader: DataLoader,
+ val_loader: DataLoader,
+ test_loader: DataLoader,
+ args,
+ logger=None
+ ) -> (float, float, dict):
+ """
+ Args:
+ model:
+ use_test_acc:
+ epoch_num: how many epoch, set by scheduler
+ train_loader:
+ val_loader:
+ test_loader:
+ args:
+ Returns:
+ """
+
+ if logger is None:
+ from src.logger import logger
+ logger = logger
+
+ start_time, best_valid_auc = time.time(), 0.
+
+ # training params
+ device = args.device
+ num_labels = args.num_labels
+ lr = args.lr
+ iter_per_epoch = args.iter_per_epoch
+ # report_freq = args.report_freq
+ # given_patience = args.patience
+
+ # assign new values
+ args.epoch_num = epoch_num
+
+ # for multiple classification
+ opt_metric = nn.CrossEntropyLoss(reduction='mean').to(device)
+ # this is only sutiable when output is dimension 1,
+ # opt_metric = nn.BCEWithLogitsLoss(reduction='mean').to(device)
+
+ # optimizer
+ optimizer = optim.Adam(model.parameters(), lr=lr)
+ scheduler = optim.lr_scheduler.CosineAnnealingLR(
+ optimizer,
+ T_max=epoch_num, # Maximum number of iterations.
+ eta_min=1e-4) # Minimum learning rate.
+
+ # gradient clipping, set the gradient value to be -1 - 1
+ for p in model.parameters():
+ p.register_hook(lambda grad: torch.clamp(grad, -1., 1.))
+
+ info_dic = {}
+ valid_auc = -1
+ valid_loss = 0
+ for epoch in range(epoch_num):
+ logger.info(f'Epoch [{epoch:3d}/{epoch_num:3d}]')
+ # train and eval
+ # print("begin to train...")
+ logger.info(f"Begin to train.....")
+ train_auc, train_loss = ModelTrainer.run(logger,
+ epoch, iter_per_epoch, model, train_loader, opt_metric, args,
+ optimizer=optimizer, namespace='train')
+ scheduler.step()
+ logger.info(f"Begin to evaluate on valid.....")
+ # print("begin to evaluate...")
+ valid_auc, valid_loss = ModelTrainer.run(logger,
+ epoch, iter_per_epoch, model, val_loader,
+ opt_metric, args, namespace='val')
+
+ if use_test_acc:
+ logger.info(f"Begin to evaluate on test.....")
+ test_auc, test_loss = ModelTrainer.run(logger,
+ epoch, iter_per_epoch, model, test_loader,
+ opt_metric, args, namespace='test')
+ else:
+ test_auc = -1
+
+ info_dic[epoch] = {
+ "train_auc": train_auc,
+ "valid_auc": valid_auc,
+ "train_loss": train_loss,
+ "valid_loss": valid_loss,
+ "train_val_total_time": time.time() - start_time}
+
+ # record best auc and save checkpoint
+ if valid_auc >= best_valid_auc:
+ best_valid_auc, best_test_auc = valid_auc, test_auc
+ logger.info(f'best valid auc: valid {valid_auc:.4f}, test {test_auc:.4f}')
+ else:
+ logger.info(f'valid {valid_auc:.4f}, test {test_auc:.4f}')
+
+ return valid_auc, time.time() - start_time, info_dic
+
+ @classmethod
+ def fully_evaluate_arch(cls,
+ model: nn.Module,
+ use_test_acc: bool,
+ epoch_num,
+ val_loader: DataLoader,
+ test_loader: DataLoader,
+ args,
+ logger=None,
+ ) -> (float, float, dict):
+ """
+ Args:
+ model:
+ use_test_acc:
+ epoch_num: how many epoch, set by scheduler
+ val_loader:
+ test_loader:
+ args:
+ Returns:
+ """
+
+ if logger is None:
+ from src.logger import logger
+ logger = logger
+
+ start_time, best_valid_auc = time.time(), 0.
+
+ device = args.device
+ iter_per_epoch = args.iter_per_epoch
+ args.epoch_num = epoch_num
+ opt_metric = nn.CrossEntropyLoss(reduction='mean').to(device)
+
+ info_dic = {}
+ valid_auc = -1
+ valid_loss = 0
+ for epoch in range(epoch_num):
+ logger.info(f'Epoch [{epoch:3d}/{epoch_num:3d}]')
+ # print("begin to evaluate...")
+ valid_auc, valid_loss = ModelTrainer.run(logger,
+ epoch, iter_per_epoch, model, val_loader,
+ opt_metric, args, namespace='val')
+
+ if use_test_acc:
+ test_auc, test_loss = ModelTrainer.run(logger,
+ epoch, iter_per_epoch, model, test_loader,
+ opt_metric, args, namespace='test')
+ else:
+ test_auc = -1
+
+ # record best auc and save checkpoint
+ if valid_auc >= best_valid_auc:
+ best_valid_auc, best_test_auc = valid_auc, test_auc
+ logger.info(f'best valid auc: valid {valid_auc:.4f}, test {test_auc:.4f}')
+ else:
+ logger.info(f'valid {valid_auc:.4f}, test {test_auc:.4f}')
+
+ return valid_auc, time.time() - start_time, info_dic
+
+ # train one epoch of train/val/test
+ @classmethod
+ def run(cls, logger, epoch, iter_per_epoch, model, data_loader, opt_metric, args, optimizer=None,
+ namespace='train'):
+ if optimizer:
+ model.train()
+ else:
+ model.eval()
+
+ time_avg, timestamp = utils.AvgrageMeter(), time.time()
+ loss_avg, auc_avg = utils.AvgrageMeter(), utils.AvgrageMeter()
+
+ batch_idx = 0
+ for batch_idx, batch in enumerate(data_loader):
+ # if suer set this, then only train fix number of iteras
+ # stop training current epoch for evaluation
+ if namespace == 'train' and iter_per_epoch is not None and batch_idx >= iter_per_epoch:
+ logger.info(f"Traing Iteration {batch_idx} > iter_per_epoch = {iter_per_epoch}, breakout")
+ break
+
+ target = batch['y'].type(torch.LongTensor).to(args.device)
+ batch['id'] = batch['id'].to(args.device)
+ batch['value'] = batch['value'].to(args.device)
+
+ if namespace == 'train':
+ y = model(batch)
+ loss = opt_metric(y, target)
+
+ optimizer.zero_grad()
+ loss.backward()
+ optimizer.step()
+ else:
+ with torch.no_grad():
+ y = model(batch)
+ loss = opt_metric(y, target)
+
+ # for multiple classification
+ auc = utils.roc_auc_compute_fn(torch.nn.functional.softmax(y, dim=1)[:, 1], target)
+ # for binary classification
+ # auc = utils.roc_auc_compute_fn(y, target)
+ loss_avg.update(loss.item(), target.size(0))
+ auc_avg.update(auc, target.size(0))
+
+ time_avg.update(time.time() - timestamp)
+ timestamp = time.time()
+ if batch_idx % args.report_freq == 0:
+ logger.info(f'Epoch [{epoch:3d}/{args.epoch_num}][{batch_idx:3d}/{len(data_loader)}]\t'
+ f'{time_avg.val:.3f} ({time_avg.avg:.3f}) AUC {auc_avg.val:4f} ({auc_avg.avg:4f}) '
+ f'Loss {loss_avg.val:8.4f} ({loss_avg.avg:8.4f})')
+
+ # print(f'Epoch [{epoch:3d}/{args.epoch_num}][{batch_idx:3d}/{len(data_loader)}]\t'
+ # f'{time_avg.val:.3f} ({time_avg.avg:.3f}) AUC {auc_avg.val:4f} ({auc_avg.avg:4f}) '
+ # f'Loss {loss_avg.val:8.4f} ({loss_avg.avg:8.4f})')
+
+ # record the last epoch information
+ logger.info(f'Epoch [{epoch:3d}/{args.epoch_num}][{batch_idx:3d}/{len(data_loader)}]\t'
+ f'{time_avg.val:.3f} ({time_avg.avg:.3f}) AUC {auc_avg.val:4f} ({auc_avg.avg:4f}) '
+ f'Loss {loss_avg.val:8.4f} ({loss_avg.avg:8.4f})')
+
+ # print(f'Epoch [{epoch:3d}/{args.epoch_num}][{batch_idx:3d}/{len(data_loader)}]\t'
+ # f'{time_avg.val:.3f} ({time_avg.avg:.3f}) AUC {auc_avg.val:4f} ({auc_avg.avg:4f}) '
+ # f'Loss {loss_avg.val:8.4f} ({loss_avg.avg:8.4f})')
+
+ logger.info(f'{namespace}\tTime {utils.timeSince(s=time_avg.sum):>12s} '
+ f'AUC {auc_avg.avg:8.4f} Loss {loss_avg.avg:8.4f}')
+
+ # print(f'{namespace}\tTime {utils.timeSince(s=time_avg.sum):>12s} '
+ # f'AUC {auc_avg.avg:8.4f} Loss {loss_avg.avg:8.4f}')
+
+ return auc_avg.avg, loss_avg.avg
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/evaluator.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/evaluator.py
new file mode 100644
index 0000000000..743c42e5c4
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/evaluator.py
@@ -0,0 +1,97 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from src.common.constant import Config
+from src.eva_engine.phase2.algo.trainer import ModelTrainer
+from src.logger import logger
+from src.query_api.interface import SimulateTrain
+from src.search_space.core.space import SpaceWrapper
+from torch.utils.data import DataLoader
+
+
+class P2Evaluator:
+
+ def __init__(self,
+ search_space_ins: SpaceWrapper,
+ dataset: str,
+ is_simulate: bool = True,
+ train_loader: DataLoader = None,
+ val_loader: DataLoader = None,
+ args=None):
+ """
+ :param search_space_ins:
+ :param dataset:
+ :param is_simulate: train or not, default query from API.
+ """
+ self.search_space_ins = search_space_ins
+
+ # dataset name
+ self.dataset = dataset
+ self.is_simulate = is_simulate
+ self.acc_getter = None
+
+ # for training only
+ self.train_loader = train_loader
+ self.val_loader = val_loader
+ self.args = args
+
+ def p2_evaluate(self, cand: str, epoch_per_model: int) -> (float, float):
+ """
+ :param cand: candidate id
+ :param epoch_per_model: epoch for each model
+ :return:
+ """
+ # if it's simulate or it's image dataset
+ if self.is_simulate or self.search_space_ins.name in [Config.NB101, Config.NB201]:
+ return self._evaluate_query(cand, epoch_per_model)
+ else:
+ return self._evaluate_train(cand, epoch_per_model)
+
+ def _evaluate_query(self, cand: str, epoch_per_model: int) -> (float, float):
+ """
+ :param cand: the candidate to evaluate
+ :param epoch_per_model: how many resource it can use, epoch number
+ :return:
+ """
+ if self.acc_getter is None:
+ self.acc_getter = SimulateTrain(space_name=self.search_space_ins.name)
+
+ acc, time_usage = self.acc_getter.get_ground_truth(arch_id=cand, epoch_num=epoch_per_model, dataset=self.dataset)
+
+ return acc, time_usage
+
+ def _evaluate_train(self, cand: str, epoch_per_model: int) -> (float, float):
+ """
+ :param cand: the candidate to evaluate
+ :param epoch_per_model: how many resource it can use, epoch number
+ :return:
+ """
+ model = self.search_space_ins.new_architecture(cand)
+ valid_auc, total_run_time, train_log = ModelTrainer.fully_train_arch(
+ model=model,
+ use_test_acc=False,
+ epoch_num=epoch_per_model,
+ train_loader=self.train_loader,
+ val_loader=self.val_loader,
+ test_loader=self.val_loader,
+ args=self.args)
+
+ logger.info(f' ----- model id: {cand}, Val_AUC : {valid_auc} Total running time: '
+ f'{total_run_time}-----')
+
+ return valid_auc, total_run_time
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/run_sh.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/run_sh.py
new file mode 100644
index 0000000000..b2188e6b8c
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/run_sh.py
@@ -0,0 +1,182 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from copy import copy
+
+from src.common.constant import Config
+from src.eva_engine.phase2.evaluator import P2Evaluator
+
+# successive halving
+from src.logger import logger
+from src.search_space.core.space import SpaceWrapper
+from torch.utils.data import DataLoader
+
+
+class BudgetAwareControllerSH:
+
+ @staticmethod
+ def pre_calculate_epoch_required(K: int, U: int, eta: int=3, max_unit_per_model: int=200):
+ if K == 1:
+ return 0
+
+ cur_cand_num = K
+ cur_epoch = min(U, max_unit_per_model) # Limit the current epoch to max_unit_per_model
+ total_epochs = 0
+
+ while cur_cand_num > 1 and cur_epoch < max_unit_per_model:
+ total_epochs += cur_cand_num * cur_epoch
+ # Prune models
+ cur_cand_num = int(cur_cand_num * (1 / eta))
+ # Increase the training epoch for the remaining models
+ cur_epoch = min(cur_epoch * eta, max_unit_per_model)
+
+ # If the models are fully trained and there is more than one candidate, add these final evaluations to the total
+ if cur_cand_num > 1 and cur_epoch >= max_unit_per_model:
+ total_epochs += cur_cand_num * max_unit_per_model
+
+ return total_epochs
+
+ def __init__(self,
+ search_space_ins: SpaceWrapper, dataset_name: str,
+ eta, time_per_epoch,
+ train_loader: DataLoader = None,
+ val_loader: DataLoader = None,
+ args=None,
+ is_simulate: bool = True):
+ """
+ :param search_space_ins:
+ :param dataset_name:
+ :param time_per_epoch:
+ :param is_simulate:
+ :param eta: 1/mu to keep in each iteration
+ """
+ self.is_simulate = is_simulate
+ self._evaluator = P2Evaluator(search_space_ins, dataset_name,
+ is_simulate=is_simulate,
+ train_loader=train_loader, val_loader=val_loader,
+ args=args)
+ self.eta = eta
+ self.max_unit_per_model = args.epoch
+ self.time_per_epoch = time_per_epoch
+ self.name = "SUCCHALF"
+
+ def schedule_budget_per_model_based_on_T(self, space_name, fixed_time_budget, K_):
+ # for benchmarking only phase 2
+
+ # try different K and U combinations
+ # only consider 15625 arches in this paper
+ # min_budget_required: when K = 1, N = min_budget_required * 1
+ if space_name == Config.NB101:
+ U_options = [4, 12, 16, 108]
+ else:
+ U_options = list(range(1, 200))
+
+ history = []
+
+ for U in U_options:
+ real_time_used = \
+ BudgetAwareControllerSH.pre_calculate_epoch_required(
+ self.eta, self.max_unit_per_model, K_, U) * self.time_per_epoch
+
+ if real_time_used > fixed_time_budget:
+ break
+ else:
+ history.append(U)
+ if len(history) == 0:
+ print(f"{fixed_time_budget} is too small for current config")
+ raise f"{fixed_time_budget} is too small for current config"
+ return history[-1]
+
+ def pre_calculate_time_required(self, K, U):
+ all_epoch = BudgetAwareControllerSH.pre_calculate_epoch_required(self.eta, self.max_unit_per_model, K, U)
+ return all_epoch, all_epoch * self.time_per_epoch
+
+ def run_phase2(self, U: int, candidates_m: list) -> (str, float, float):
+ total_time = 0
+ if len(candidates_m) == 0:
+ raise "No model to explore during the second phase!"
+ candidates_m_ori = copy(candidates_m)
+ if len(candidates_m) == 1:
+ best_perform, _ = self._evaluator.p2_evaluate(candidates_m[0], self.max_unit_per_model)
+ return candidates_m[0], best_perform, 0, 0
+
+ eta = self.eta
+ max_unit_per_model = self.max_unit_per_model
+
+ cur_cand_num = len(candidates_m)
+ cur_epoch = min(U, max_unit_per_model) # Limit the current epoch to max_unit_per_model
+ total_epochs = 0
+
+ while cur_cand_num > 1 and cur_epoch < max_unit_per_model:
+ logger.info(f"4. [trails] Running phase2: train {len(candidates_m)} models each with {cur_epoch} epochs")
+ scores = []
+ # Evaluate all models
+ for cand in candidates_m:
+ score, time_usage = self._evaluator.p2_evaluate(cand, cur_epoch)
+ scores.append((score, cand))
+ total_epochs += cur_epoch
+ total_time += time_usage
+
+ # Sort models based on score
+ scores.sort(reverse=True, key=lambda x: x[0])
+
+ # Prune models, at lease keep one model
+ cur_cand_num = max(int(cur_cand_num * (1 / eta)), 1)
+ candidates_m = [x[1] for x in scores[:cur_cand_num]]
+
+ # Increase the training epoch for the remaining models
+ cur_epoch = min(cur_epoch * eta, max_unit_per_model)
+
+ # If the models can be fully trained and there is more than one candidate, select the top one
+ if cur_cand_num > 1 and cur_epoch >= max_unit_per_model:
+ logger.info(
+ f"4. [trails] Running phase2: train {len(candidates_m)} models each with {max_unit_per_model} epochs")
+ scores = []
+ for cand in candidates_m:
+ score, time_usage = self._evaluator.p2_evaluate(cand, max_unit_per_model)
+ scores.append((score, cand))
+ total_epochs += cur_epoch
+ total_time += time_usage
+ scores.sort(reverse=True, key=lambda x: x[0])
+ candidates_m = [scores[0][1]]
+
+ # only return the performance when simulating, skip the training, just return model
+ if self.is_simulate:
+ logger.info(
+ f"5. [trails] Phase2 Done, Select {candidates_m[0]}, "
+ f"simulate={self.is_simulate}. Acqure the ground truth")
+ best_perform, _ = self._evaluator.p2_evaluate(candidates_m[0], self.max_unit_per_model)
+ else:
+ logger.info(
+ f"5. [trails] Phase2 Done, Select {candidates_m[0]}, "
+ f"simulate={self.is_simulate}, Skip training")
+ best_perform = 0
+ # Return the best model and the total epochs used
+ return candidates_m[0], best_perform, total_epochs, total_time
+
+
+if __name__ == "__main__":
+ 'frappe: 20, uci_diabetes: 40, criteo: 10'
+ 'nb101: 108, nb201: 200'
+ k_options = [1, 2, 4, 8, 16]
+ u_options = [1, 2, 4, 8, 16]
+ print(f"k={10}, u={8}, total_epoch = {BudgetAwareControllerSH.pre_calculate_epoch_required(3, 20, 10, 8)}")
+ for k in k_options:
+ for u in u_options:
+ print(f"k={k}, u={u}, total_epoch = {BudgetAwareControllerSH.pre_calculate_epoch_required(3, 20, k, u)}")
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/run_sr.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/run_sr.py
similarity index 74%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/run_sr.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/run_sr.py
index e5610c89cc..dd8ce96b31 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/run_sr.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/run_sr.py
@@ -16,45 +16,17 @@
# limitations under the License.
#
+
from copy import copy
from src.common.constant import Config
+from src.eva_engine.phase2.evaluator import P2Evaluator
+from src.search_space.core.space import SpaceWrapper
class BudgetAwareControllerSR:
- def __init__(self, evaluator, time_per_epoch, max_unit=200):
- """
- :param evaluator:
- :param max_unit: for 201, it's 200, for 101 it's 108
- """
- self._evaluator = evaluator
- self.max_unit_per_model = max_unit
- self.time_per_epoch = time_per_epoch
- self.name = "SUCCREJCT"
- def schedule_budget_per_model_based_on_T(self, space_name, fixed_time_budget, K_):
- # for benchmarking only phase 2
-
- # try different K and U combinations
- # only consider 15625 arches in this paper
- # min_budget_required: when K = 1, N = min_budget_required * 1
- if space_name == Config.NB101:
- U_options = [4, 12, 16, 108]
- else:
- U_options = list(range(1, 200))
-
- history = []
-
- for U in U_options:
- expected_time_used = self.pre_calculate_epoch_required(K_, U) * self.time_per_epoch
- if expected_time_used > fixed_time_budget:
- break
- else:
- history.append(U)
- if len(history) == 0:
- raise f"{fixed_time_budget} is too small for current config"
- return history[-1]
-
- def pre_calculate_epoch_required(self, K, U):
+ @staticmethod
+ def pre_calculate_epoch_required(K, U, eta: int = 3, max_unit_per_model: int = 200):
"""
:param K: candidates lists
:param U: min resource each candidate needs
@@ -73,16 +45,21 @@ def pre_calculate_epoch_required(self, K, U):
if previous_epoch is None:
previous_epoch = epoch_per_model
elif previous_epoch == epoch_per_model:
- # which means the epoch don't increase, no need to re-evaluate each component
+ # current epoch == last epoch, no need to re-evaluate each component
K = cur_cand_num - 1
continue
- if epoch_per_model >= self.max_unit_per_model:
- epoch_per_model = self.max_unit_per_model
+ previous_epoch = epoch_per_model
+
+ if epoch_per_model >= max_unit_per_model:
+ epoch_per_model = max_unit_per_model
+
+ # print(f"[successive_reject]: {cur_cand_num} model left, "
+ # f"and evaluate each model with {epoch_per_model} epoch, total epoch = {max_unit_per_model}")
# evaluate each arch
min_budget_required += epoch_per_model * cur_cand_num
# sort from min to max
- if epoch_per_model == self.max_unit_per_model:
+ if epoch_per_model == max_unit_per_model:
# each model is fully evaluated, just return top 1
K = 1
else:
@@ -90,12 +67,53 @@ def pre_calculate_epoch_required(self, K, U):
K = cur_cand_num - 1
return min_budget_required
+ def __init__(self,
+ search_space_ins: SpaceWrapper, dataset_name: str,
+ eta, args, time_per_epoch):
+
+ self.is_simulate = True
+ self._evaluator = P2Evaluator(search_space_ins,
+ dataset_name,
+ is_simulate=True,
+ train_loader=None,
+ val_loader=None,
+ args=None)
+
+ self.eta = eta
+ self.max_unit_per_model = args.epoch
+ self.time_per_epoch = time_per_epoch
+ self.name = "SUCCREJCT"
+
+ def schedule_budget_per_model_based_on_T(self, space_name, fixed_time_budget, K_):
+ # for benchmarking only phase 2
+
+ # try different K and U combinations
+ # only consider 15625 arches in this paper
+ # min_budget_required: when K = 1, N = min_budget_required * 1
+ if space_name == Config.NB101:
+ U_options = [4, 12, 16, 108]
+ else:
+ U_options = list(range(1, 200))
+
+ history = []
+
+ for U in U_options:
+ expected_time_used = self.pre_calculate_epoch_required(K_, U) * self.time_per_epoch
+ if expected_time_used > fixed_time_budget:
+ break
+ else:
+ history.append(U)
+ if len(history) == 0:
+ raise f"{fixed_time_budget} is too small for current config"
+ return history[-1]
+
def run_phase2(self, U: int, candidates_m: list):
"""
:param candidates_m: candidates lists
:param U: min resource each candidate needs
:return:
"""
+ total_time = 0
# print(f" *********** begin BudgetAwareControllerSR with U={U}, K={len(candidates_m)} ***********")
candidates = copy(candidates_m)
total_epoch_each_rounds = len(candidates) * U
@@ -118,14 +136,17 @@ def run_phase2(self, U: int, candidates_m: list):
candidates = [ele[0] for ele in scored_cand[-num_keep:]]
continue
+ previous_epoch = epoch_per_model
+
if epoch_per_model >= self.max_unit_per_model:
epoch_per_model = self.max_unit_per_model
# print(f"[successive_reject]: {cur_cand_num} model left, "
- # f"and evaluate each model with {epoch_per_model} epoch")
+ # f"and evaluate each model with {epoch_per_model} epoch, total epoch = {self.max_unit_per_model}")
# evaluate each arch
for cand in candidates:
- score = self._evaluator.p2_evaluate(cand, epoch_per_model)
+ score, time_usage = self._evaluator.p2_evaluate(cand, epoch_per_model)
+ total_time += time_usage
total_score.append((cand, score))
min_budget_required += epoch_per_model
# sort from min to max
@@ -139,6 +160,5 @@ def run_phase2(self, U: int, candidates_m: list):
num_keep = cur_cand_num - 1
candidates = [ele[0] for ele in scored_cand[-num_keep:]]
- return candidates[0], None, min_budget_required
-
-
+ best_perform, _ = self._evaluator.p2_evaluate(candidates[0], self.max_unit_per_model)
+ return candidates[0], best_perform, min_budget_required, total_time
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/run_uniform.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/run_uniform.py
similarity index 69%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/run_uniform.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/run_uniform.py
index 02c9b8e703..8b32fab3df 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase2/run_uniform.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/phase2/run_uniform.py
@@ -16,21 +16,29 @@
# limitations under the License.
#
+
from copy import copy
-from random import randint
+from src.search_space.core.space import SpaceWrapper
from src.common.constant import Config
+from src.eva_engine.phase2.evaluator import P2Evaluator
# UniformAllocation
class UniformAllocation:
- def __init__(self, evaluator, time_per_epoch, max_unit=200):
- """
- :param evaluator:
- :param max_unit: for 201, it's 200, for 101 it's 108
- """
- self._evaluator = evaluator
- self.max_unit_per_model = max_unit
+ def __init__(self,
+ search_space_ins: SpaceWrapper, dataset_name: str,
+ eta, time_per_epoch, args=None):
+
+ self.is_simulate = True
+ self._evaluator = P2Evaluator(search_space_ins,
+ dataset_name,
+ is_simulate=True,
+ train_loader=None,
+ val_loader=None,
+ args=None)
+ self.eta = eta
+ self.max_unit_per_model = args.epoch
self.time_per_epoch = time_per_epoch
self.name = "UNIFORM"
@@ -55,14 +63,14 @@ def schedule_budget_per_model_based_on_T(self, space_name, fixed_time_budget, K_
history.append(U)
return history[-1]
- def pre_calculate_epoch_required(self, K, U):
+ def pre_calculate_epoch_required(self, K, U, eta: int=3, max_unit_per_model: int=200):
"""
:param B: total budget for phase 2
:param U: mini unit computation for each modle
:param candidates_m:
:return:
"""
- return K*U
+ return K * U
def run_phase2(self, U: int, candidates_m: list):
"""
@@ -72,23 +80,26 @@ def run_phase2(self, U: int, candidates_m: list):
"""
# print(f" *********** begin uniformly_allocate with U={U}, K={len(candidates_m)} ***********")
-
candidates = copy(candidates_m)
min_budget_required = 0
+ # todo: this is to run the full training, when compute full traiing
+ # U = self.max_unit_per_model
+
if U >= self.max_unit_per_model:
U = self.max_unit_per_model
# print(f"[uniformly_allocate]: uniformly allocate {U} epoch to each model")
+ total_time = 0
total_score = []
for cand in candidates:
- score = self._evaluator.p2_evaluate(cand, U)
+ score, time_usage = self._evaluator.p2_evaluate(cand, U)
+ total_time += time_usage
total_score.append((cand, score))
min_budget_required += U
# sort from min to max
scored_cand = sorted(total_score, key=lambda x: x[1])
candidate = scored_cand[-1][0]
- return candidate, None, min_budget_required
-
-
+ best_perform, _ = self._evaluator.p2_evaluate(candidate, self.max_unit_per_model)
+ return candidate, best_perform, min_budget_required, total_time
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/run_ms.py b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/run_ms.py
new file mode 100644
index 0000000000..5b0a640ba1
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/eva_engine/run_ms.py
@@ -0,0 +1,353 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import time
+
+from typing import Set, List
+
+from src.eva_engine import coordinator
+from src.eva_engine.phase1.run_phase1 import RunPhase1, p1_evaluate_query
+from torch.utils.data import DataLoader
+from src.eva_engine.phase2.run_sh import BudgetAwareControllerSH
+from src.eva_engine.phase2.run_sr import BudgetAwareControllerSR
+from src.eva_engine.phase2.run_uniform import UniformAllocation
+from src.logger import logger
+from src.search_space.init_search_space import init_search_space
+from src.query_api.interface import profile_NK_trade_off
+from src.common.constant import Config
+
+
+class RunModelSelection:
+
+ def __init__(self, search_space_name: str, args, is_simulate: bool = False):
+ self.args = args
+
+ self.eta = 3
+ self.is_simulate = is_simulate
+ # basic
+ self.search_space_name = search_space_name
+ self.dataset = self.args.dataset
+
+ # p2 evaluator
+ self.sh = None
+
+ # instance of the search space.
+ self.search_space_ins = init_search_space(self.args)
+
+ def select_model_simulate(self, budget: float, run_id: int = 0, only_phase1: bool = False, run_workers: int = 1):
+ """
+ This is for image data only
+ """
+
+ # 0. profiling dataset and search space, get t1 and t2
+
+ score_time_per_model, train_time_per_epoch, N_K_ratio = self.search_space_ins.profiling(self.dataset)
+ self.sh = BudgetAwareControllerSH(
+ search_space_ins=self.search_space_ins,
+ dataset_name=self.dataset,
+ eta=self.eta,
+ time_per_epoch=train_time_per_epoch,
+ args=self.args,
+ is_simulate=self.is_simulate)
+
+ # 1. run coordinator to schedule
+ K, U, N, B1_planed_time, B2_planed_time, B2_all_epoch = coordinator.schedule(self.dataset, self.sh, budget,
+ score_time_per_model,
+ train_time_per_epoch,
+ run_workers,
+ self.search_space_ins,
+ N_K_ratio,
+ only_phase1)
+
+ print(f"Budget = {budget}, N={N}, K={K}")
+
+ # 2. run phase 1 to score N models
+ k_models, B1_actual_time_use = p1_evaluate_query(self.search_space_name, self.dataset, run_id, N, K)
+
+ # 3. run phase-2 to determine the final model
+ best_arch, best_arch_performance, B2_actual_epoch_use, _ = self.sh.run_phase2(U, k_models)
+ # print("best model returned from Phase2 = ", k_models)
+
+ return best_arch, B1_actual_time_use + B2_actual_epoch_use * train_time_per_epoch, \
+ B1_planed_time + B2_planed_time, B2_all_epoch
+
+ def select_model_online_clean(self, budget: float, data_loader: List[DataLoader],
+ only_phase1: bool = False, run_workers: int = 1):
+ """
+ Select model online for structured data.
+ :param budget: time budget
+ :param data_loader: time budget
+ :param only_phase1:
+ :param run_workers:
+ :return:
+ """
+ begin_time = time.time()
+ logger.info("1. profiling....")
+ score_time_per_model = self.profile_filtering(data_loader)
+ train_time_per_epoch = self.profile_refinement(data_loader)
+ logger.info("2. coordination....")
+ K, U, N = self.coordination(budget, score_time_per_model, train_time_per_epoch, only_phase1)
+ logger.info("3. filtering phase....")
+ k_models, all_models, p1_trace_highest_score, p1_trace_highest_scored_models_id = self.filtering_phase(
+ N, K, train_loader=data_loader[0])
+ logger.info("4. refinement phase....")
+ best_arch, best_arch_performance, _, _ = self.refinement_phase(
+ U, k_models, train_loader=data_loader[0], valid_loader=data_loader[1])
+
+ end_time = time.time()
+ real_time_usage = end_time - begin_time
+
+ return best_arch, best_arch_performance, real_time_usage, all_models, \
+ p1_trace_highest_score, p1_trace_highest_scored_models_id
+
+ def select_model_online(self, budget: float, data_loader: List[DataLoader],
+ only_phase1: bool = False, run_workers: int = 1):
+ """
+ Select model online for structured data.
+ :param budget: time budget
+ :param data_loader: time budget
+ :param only_phase1:
+ :param run_workers:
+ :return:
+ """
+
+ train_loader, valid_loader, test_loader = data_loader
+
+ logger.info(f"0. [trails] Begin model selection, is_simulate={self.is_simulate} ... ")
+ begin_time = time.time()
+
+ logger.info("1. [trails] Begin profiling.")
+ # 0. profiling dataset and search space, get t1 and t2
+ score_time_per_model, train_time_per_epoch, N_K_ratio = self.search_space_ins.profiling(
+ self.dataset,
+ train_loader,
+ valid_loader,
+ self.args,
+ is_simulate=self.is_simulate)
+
+ self.sh = BudgetAwareControllerSH(
+ search_space_ins=self.search_space_ins,
+ dataset_name=self.dataset,
+ eta=self.eta,
+ time_per_epoch=train_time_per_epoch,
+ is_simulate=self.is_simulate,
+ train_loader=train_loader,
+ val_loader=valid_loader,
+ args=self.args)
+
+ # 1. run coordinator to schedule
+ logger.info("2. [trails] Begin scheduling...")
+ K, U, N, B1_planed_time, B2_planed_time, B2_all_epoch = coordinator.schedule(self.dataset, self.sh, budget,
+ score_time_per_model,
+ train_time_per_epoch,
+ run_workers,
+ self.search_space_ins,
+ N_K_ratio,
+ only_phase1)
+
+ print(f"Budget = {budget}, N={N}, K={K}")
+
+ # 2. run phase 1 to score N models
+ logger.info("3. [trails] Begin to run phase1: filter phase")
+ # lazy loading the search space if needed.
+
+ # run phase-1 to get the K models.
+ p1_runner = RunPhase1(
+ args=self.args,
+ K=K, N=N,
+ search_space_ins=self.search_space_ins,
+ train_loader=train_loader,
+ is_simulate=self.is_simulate)
+
+ k_models, all_models, p1_trace_highest_score, p1_trace_highest_scored_models_id \
+ = p1_runner.run_phase1()
+
+ logger.info("4. [trails] Begin to run phase2: refinement phase")
+
+ # 3. run phase-2 to determine the final model
+ best_arch, best_arch_performance, B2_actual_epoch_use, _ = self.sh.run_phase2(U, k_models)
+ # print("best model returned from Phase2 = ", k_models)
+ end_time = time.time()
+ real_time_usage = end_time - begin_time
+ planned_time_usage = B1_planed_time + B2_planed_time
+ logger.info("5. [trails] Real time Usage = " + str(real_time_usage)
+ + ", Final selected model = " + str(best_arch)
+ + ", planned time usage = " + str(planned_time_usage)
+ )
+ # best arch returned,
+ # time usage, epoch trained,
+ # p1 ea trace
+ return best_arch, best_arch_performance, \
+ real_time_usage, planned_time_usage, B2_all_epoch, \
+ all_models, p1_trace_highest_score, p1_trace_highest_scored_models_id
+
+ def schedule_only(self, budget: float, data_loader: List[DataLoader],
+ only_phase1: bool = False, run_workers: int = 1):
+ """
+ Select model online
+ :param budget: time budget
+ :param data_loader: time budget
+ :param only_phase1:
+ :param run_workers:
+ :return:
+ """
+
+ train_loader, valid_loader, test_loader = data_loader
+
+ logger.info("0. [trails] Begin model selection ... ")
+
+ logger.info("1. [trails] Begin profiling.")
+ # 0. profiling dataset and search space, get t1 and t2
+ score_time_per_model, train_time_per_epoch, N_K_ratio = self.search_space_ins.profiling(
+ self.dataset,
+ train_loader,
+ valid_loader,
+ self.args,
+ is_simulate=self.is_simulate)
+
+ self.sh = BudgetAwareControllerSH(
+ search_space_ins=self.search_space_ins,
+ dataset_name=self.dataset,
+ eta=self.eta,
+ time_per_epoch=train_time_per_epoch,
+ is_simulate=self.is_simulate,
+ train_loader=train_loader,
+ val_loader=valid_loader,
+ args=self.args)
+
+ # 1. run coordinator to schedule
+ logger.info("2. [trails] Begin scheduling...")
+ K, U, N, B1_planed_time, B2_planed_time, B2_all_epoch = coordinator.schedule(self.dataset, self.sh, budget,
+ score_time_per_model,
+ train_time_per_epoch,
+ run_workers,
+ self.search_space_ins,
+ N_K_ratio,
+ only_phase1)
+
+ return K, U, N, B1_planed_time, B2_planed_time, B2_all_epoch
+
+ #############################################
+ # to support in-database model selection
+ #############################################
+
+ def profile_filtering(self, data_loader: List[DataLoader] = [None, None, None]):
+ logger.info("0. [trails] Begin profile_filtering...")
+ begin_time = time.time()
+ train_loader, valid_loader, test_loader = data_loader
+ score_time_per_model = self.search_space_ins.profiling_score_time(
+ self.dataset,
+ train_loader,
+ valid_loader,
+ self.args,
+ is_simulate=self.is_simulate)
+ logger.info(f"0. [trails] profile_filtering Done, time_usage = {time.time() - begin_time}")
+ return score_time_per_model
+
+ def profile_refinement(self, data_loader: List[DataLoader] = [None, None, None]):
+ logger.info("0. [trails] Begin profile_refinement...")
+ begin_time = time.time()
+ train_loader, valid_loader, test_loader = data_loader
+ train_time_per_epoch = self.search_space_ins.profiling_train_time(
+ self.dataset,
+ train_loader,
+ valid_loader,
+ self.args,
+ is_simulate=self.is_simulate)
+ logger.info(f"0. [trails] profile_refinement Done, time_usage = {time.time() - begin_time}")
+ return train_time_per_epoch
+
+ def coordination(self, budget: float, score_time_per_model: float, train_time_per_epoch: float, only_phase1: bool):
+ logger.info("1. [trails] Begin coordination...")
+ begin_time = time.time()
+ sh = BudgetAwareControllerSH(
+ search_space_ins=self.search_space_ins,
+ dataset_name=self.dataset,
+ eta=self.eta,
+ time_per_epoch=train_time_per_epoch,
+ is_simulate=self.is_simulate,
+ train_loader=None,
+ val_loader=None,
+ args=self.args)
+ n_k_ratio = profile_NK_trade_off(self.dataset)
+ K, U, N, B1_planed_time, B2_planed_time, B2_all_epoch = coordinator.schedule(
+ self.dataset, sh, budget,
+ score_time_per_model,
+ train_time_per_epoch,
+ 1,
+ self.search_space_ins,
+ n_k_ratio,
+ only_phase1)
+
+ logger.info(f"1. [trails] Coordination Done, time_usage = {time.time() - begin_time}")
+ return K, U, N
+
+ def filtering_phase(self, N, K, train_loader=None):
+ logger.info("2. [trails] Begin filtering_phase...")
+ begin_time = time.time()
+ p1_runner = RunPhase1(
+ args=self.args,
+ K=K, N=N,
+ search_space_ins=self.search_space_ins,
+ train_loader=train_loader,
+ is_simulate=self.is_simulate)
+
+ k_models, all_models, p1_trace_highest_score, p1_trace_highest_scored_models_id \
+ = p1_runner.run_phase1()
+ logger.info(f"2. [trails] filtering_phase Done, time_usage = {time.time() - begin_time}")
+ print(f"2. [trails] filtering_phase Done, time_usage = {time.time() - begin_time}")
+ return k_models, all_models, p1_trace_highest_score, p1_trace_highest_scored_models_id
+
+ def refinement_phase(self, U, k_models, alg_name: str = Config.SUCCHALF, train_loader=None, valid_loader=None,
+ train_time_per_epoch=None):
+ logger.info("3. [trails] Begin refinement...")
+ begin_time = time.time()
+
+ if alg_name == Config.SUCCHALF:
+ self.sh = BudgetAwareControllerSH(
+ search_space_ins=self.search_space_ins,
+ dataset_name=self.dataset,
+ eta=self.eta,
+ time_per_epoch=train_time_per_epoch,
+ is_simulate=self.is_simulate,
+ train_loader=train_loader,
+ val_loader=valid_loader,
+ args=self.args)
+ elif alg_name == Config.SUCCREJCT:
+ self.sh = BudgetAwareControllerSR(
+ search_space_ins=self.search_space_ins,
+ dataset_name=self.dataset,
+ eta=self.eta,
+ time_per_epoch=train_time_per_epoch,
+ args=self.args)
+ elif alg_name == Config.UNIFORM:
+ self.sh = UniformAllocation(
+ search_space_ins=self.search_space_ins,
+ dataset_name=self.dataset,
+ eta=self.eta,
+ time_per_epoch=train_time_per_epoch,
+ args=self.args)
+ else:
+ raise NotImplementedError
+
+ best_arch, best_arch_performance, B2_actual_epoch_use, total_time_usage = self.sh.run_phase2(U, k_models)
+ logger.info(
+ f"3. [trails] refinement phase Done, time_usage = {time.time() - begin_time}, "
+ f"epoches_used = {B2_actual_epoch_use}")
+ return best_arch, best_arch_performance, B2_actual_epoch_use, total_time_usage
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/logger/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/logger/__init__.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/logger/__init__.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/logger/__init__.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/README.md b/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/README.md
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/README.md
rename to examples/model_selection/Trails/internal/ml/model_selection/src/query_api/README.md
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/__init__.py
new file mode 100644
index 0000000000..8c328a3bbc
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/__init__.py
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/img_explore_ea.py b/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/img_explore_ea.py
similarity index 96%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/img_explore_ea.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/query_api/img_explore_ea.py
index b032997749..ae3f691044 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/img_explore_ea.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/img_explore_ea.py
@@ -25,7 +25,7 @@
base_folder_dir = os.environ.get("base_dir")
if base_folder_dir is None: base_folder_dir = os.getcwd()
-base_dir = os.path.join(base_folder_dir, "img_data", "ground_truth")
+base_dir = os.path.join(base_folder_dir, "img_data")
print("local api running at {}".format(base_dir))
# sum score is better
@@ -93,4 +93,4 @@ def fetch_from_db(space_name, dataset, run_id_m, N_m):
if __name__ == '__main__':
- print(fetch_from_db(Config.NB201, Config.c10, 3, 10))
+ print(fetch_from_db(Config.NB201, Config.c100, 3, 10))
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/img_train_baseline.py b/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/img_train_baseline.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/img_train_baseline.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/query_api/img_train_baseline.py
index 7b81505165..b614fb2999 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/img_train_baseline.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/img_train_baseline.py
@@ -16,6 +16,7 @@
# limitations under the License.
#
+
import os
import numpy as np
from src.common.constant import Config
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/interface.py b/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/interface.py
similarity index 96%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/interface.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/query_api/interface.py
index d2d335ceef..265ecdc5a7 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/interface.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/interface.py
@@ -16,7 +16,6 @@
# limitations under the License.
#
-# query ground truth
from src.common.constant import Config, CommonVars
from src.query_api.query_api_img import Gt201, Gt101
from src.query_api.query_api_mlp import GTMLP
@@ -30,13 +29,16 @@ def profile_NK_trade_off(dataset):
We try various N/K combinations, and find this is better.
"""
if dataset == Config.c10:
- return 85
+ return 100
elif dataset == Config.c100:
- return 85
+ return 100
elif dataset == Config.imgNet:
- return 130
+ return 100
else:
- return 30
+ # this is the expressflow
+ # return 30
+ # this is the jacflow
+ return 100
class SimulateTrain:
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/query_api_img.py b/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/query_api_img.py
similarity index 98%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/query_api_img.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/query_api/query_api_img.py
index dd3f4ca9c8..b5f77e38ac 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/query_api_img.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/query_api_img.py
@@ -68,6 +68,7 @@ def guess_train_one_epoch_time(search_space_m, dataset):
return Gt101().guess_train_one_epoch_time()
if search_space_m == Config.NB201:
return Gt201().guess_train_one_epoch_time(dataset)
+ raise NotImplementedError
class ImgScoreQueryApi:
@@ -221,7 +222,9 @@ def guess_train_one_epoch_time(self, dataset):
# if time_usage > res:
# res = time_usage
# return res
- return 40
+ arch_id = random.randint(1, 15625)
+ time_usage = self.data201[str(arch_id)]["200"][dataset]["0"]["time_usage"]
+ return time_usage
def get_all_trained_model_ids(self):
# 201 all data has the same model set.
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/query_api_mlp.py b/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/query_api_mlp.py
similarity index 88%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/query_api_mlp.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/query_api/query_api_mlp.py
index affeacaa44..1527567924 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/query_api_mlp.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/query_api_mlp.py
@@ -39,9 +39,9 @@
# 0.8028456677612497
# todo: here is for debug expressFlow only
-exp_mlp_score_frappe = os.path.join(base_dir, "score_scale_traj_width/score_mlp_sp_frappe_batch_size_32_cpu.json")
-exp_mlp_score_uci = os.path.join(base_dir, "score_scale_traj_width/score_mlp_sp_uci_diabetes_batch_size_32_cpu.json")
-exp_mlp_score_criteo = os.path.join(base_dir, "score_scale_traj_width/score_mlp_sp_criteo_batch_size_32_cpu.json")
+exp_mlp_score_frappe = os.path.join(base_dir, "micro_sensitivity/3_batch_size/4/score_mlp_sp_frappe_batch_size_32_cpu.json")
+exp_mlp_score_uci = os.path.join(base_dir, "micro_sensitivity/3_batch_size/4/score_mlp_sp_uci_diabetes_batch_size_32_cpu.json")
+exp_mlp_score_criteo = os.path.join(base_dir, "micro_sensitivity/3_batch_size/4/score_mlp_sp_criteo_batch_size_32_cpu.json")
# todo here we use weigth sharing.
mlp_score_frappe_weight_share = os.path.join(base_dir, "tab_data/weight_share_nas_frappe.json")
@@ -139,18 +139,22 @@ def get_train_one_epoch_time(self, device: str):
def get_valid_auc(self, arch_id: str, epoch_num: int):
# todo: due to the too many job contention on server, the time usage may not valid.
- time_usage = (int(epoch_num) + 1) * self.get_train_one_epoch_time(self.device)
+ # train on gpu,
+ time_usage = (int(epoch_num) + 1) * self.get_train_one_epoch_time("gpu")
if self.dataset == Config.Frappe:
- if epoch_num is None or epoch_num >= 20: epoch_num = 19
+ if epoch_num is None or epoch_num >= 13: epoch_num = 13
t_acc = self.mlp_train[self.dataset][arch_id][str(epoch_num)]["valid_auc"]
+ time_usage = self.mlp_train[self.dataset][arch_id][str(epoch_num)]["train_val_total_time"]
return t_acc, time_usage
elif self.dataset == Config.Criteo:
if epoch_num is None or epoch_num >= 10: epoch_num = 9
t_acc = self.mlp_train[self.dataset][arch_id][str(epoch_num)]["valid_auc"]
+ time_usage = self.mlp_train[self.dataset][arch_id][str(epoch_num)]["train_val_total_time"]
return t_acc, time_usage
elif self.dataset == Config.UCIDataset:
if epoch_num is None or epoch_num >= 40: epoch_num = 39
- t_acc = self.mlp_train[self.dataset][arch_id][str(epoch_num)]["valid_auc"]
+ t_acc = self.mlp_train[self.dataset][arch_id][str(0)]["valid_auc"]
+ time_usage = self.mlp_train[self.dataset][arch_id][str(epoch_num)]["train_val_total_time"]
return t_acc, time_usage
else:
raise NotImplementedError
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/singleton.py b/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/singleton.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/singleton.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/query_api/singleton.py
index 24814b1190..d0478d2f30 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/query_api/singleton.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/query_api/singleton.py
@@ -16,6 +16,7 @@
# limitations under the License.
#
+
import threading
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rand/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/__init__.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/controller/sampler_rand/__init__.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/search_space/__init__.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/core/__init__.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/dataset_utils/__init__.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/search_space/core/__init__.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/core/model_params.py b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/core/model_params.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/core/model_params.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/search_space/core/model_params.py
index 811bf71c72..0851964e51 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/core/model_params.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/core/model_params.py
@@ -15,7 +15,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-
class ModelMacroCfg:
"""
Macro search space config
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/core/rl_policy.py b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/core/rl_policy.py
new file mode 100644
index 0000000000..eb1d169bdd
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/core/rl_policy.py
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import torch.nn as nn
+
+
+class RLPolicyBase(nn.Module):
+ pass
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/core/space.py b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/core/space.py
new file mode 100644
index 0000000000..607c65ac5f
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/core/space.py
@@ -0,0 +1,193 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from abc import abstractmethod
+from typing import Generator
+
+from torch.utils.data import DataLoader
+from src.search_space.core.model_params import ModelMacroCfg, ModelMicroCfg
+
+
+class SpaceWrapper:
+
+ def __init__(self, cfg: ModelMacroCfg, name: str):
+ self.model_cfg = cfg
+ self.name = name
+
+ @abstractmethod
+ def sample_all_models(self) -> Generator[str, None, None]:
+ """
+ Sample all models, return a list of arch ids
+ """
+ raise NotImplementedError
+
+ """serialize and deserialize"""
+
+ @classmethod
+ def serialize_model_encoding(cls, arch_micro: ModelMicroCfg) -> str:
+ raise NotImplementedError
+
+ @classmethod
+ def deserialize_model_encoding(cls, model_encoding) -> ModelMicroCfg:
+ raise NotImplementedError
+
+ @classmethod
+ def new_arch_scratch(cls, arch_macro: ModelMacroCfg, arch_micro: ModelMicroCfg, bn: bool = True):
+ """
+ Args:
+ arch_macro: macro setting for one architecture
+ arch_micro: micro setting for one architecture
+ bn: true or false
+ Returns:
+ """
+ raise NotImplementedError
+
+ def new_arch_scratch_with_default_setting(self, model_encoding: str, bn: bool):
+ """
+ Use the current search space's macro setting.
+ Args:
+ model_encoding: str of the model encoding
+ bn: true or false
+ Returns:
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def load(self):
+ """
+ Load the related API
+ Returns:
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def profiling(self, dataset: str,
+ train_loader: DataLoader = None, val_loader: DataLoader = None,
+ args=None, is_simulate: bool = False) -> (float, float, int):
+ """
+ Profile the training and scoring time.
+ Args:
+ dataset:
+ train_loader:
+ val_loader
+ args:
+ is_simulate:
+ Returns:
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def micro_to_id(self, arch_struct: ModelMicroCfg) -> str:
+ raise NotImplementedError
+
+ """init new architecture"""
+
+ @abstractmethod
+ def new_architecture(self, arch_id: str):
+ """
+ Generate an architecture with arch id
+ :return:
+ """
+ raise NotImplementedError
+
+ def new_architecture_with_micro_cfg(self, arch_micro: ModelMicroCfg):
+ """
+ Generate an architecture with arch_micro
+ :return:
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def __len__(self):
+ """
+ How many architectures the space has
+ :return:
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def get_arch_size(self, architecture):
+ """
+ Get how many edges in each cell of the architecture.
+ :return:
+ """
+ raise NotImplementedError
+
+ def update_bn_flag(self, bn: bool):
+ """
+ Update architecture's bn,
+ :param bn:
+ :return:
+ """
+ self.model_cfg.bn = bn
+
+ """Below is for integrating space with various sampler"""
+
+ def random_architecture_id(self) -> (str, ModelMicroCfg):
+ """
+ Random generate architecture id, cell structure, supporting RN, RL, R
+ :param max_nodes: how many nodes in this cell
+ :return:
+ """
+ raise NotImplementedError
+
+ def mutate_architecture(self, parent_arch: ModelMicroCfg) -> (str, ModelMicroCfg):
+ """
+ Mutate architecture, this is to support EA sampler
+ :rtype: object
+ :return:
+ """
+ raise NotImplementedError
+
+ def get_reinforcement_learning_policy(self, lr_rate):
+ """
+ This is fpr reinforcement learning policy sampler
+ :return:
+ """
+ raise NotImplementedError
+
+ """In-RDBMS Helper Functions"""
+
+ def profiling_score_time(self, dataset: str, train_loader: DataLoader = None, val_loader: DataLoader = None,
+ args=None, is_simulate: bool = False) -> float:
+ """
+ Profile the scoring time.
+ Args:
+ dataset:
+ train_loader:
+ val_loader
+ args:
+ is_simulate:
+ Returns:
+ """
+ raise NotImplementedError
+
+ def profiling_train_time(self, dataset: str, train_loader: DataLoader = None, val_loader: DataLoader = None,
+ args=None, is_simulate: bool = False) -> float:
+ """
+ Profile the training time.
+ Args:
+ dataset:
+ train_loader:
+ val_loader
+ args:
+ is_simulate:
+ Returns:
+ """
+ raise NotImplementedError
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/init_search_space.py b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/init_search_space.py
similarity index 94%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/init_search_space.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/search_space/init_search_space.py
index 8d46ebdbd9..1ad6e4398a 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/init_search_space.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/init_search_space.py
@@ -16,23 +16,25 @@
# limitations under the License.
#
+
import os
from src.common.constant import Config
from src.search_space.core.space import SpaceWrapper
from src.query_api.query_api_img import ImgScoreQueryApi
+
def init_search_space(args) -> SpaceWrapper:
"""
:param args:
:param loapi: Local score API, records all scored arch, 101 use it to detect which arch is scored.
:return:
"""
- # elif args.search_space == Config.MLPSP:
+
if args.search_space == Config.MLPSP:
from .mlp_api.space import MlpSpace
from .mlp_api.model_params import MlpMacroCfg
from .mlp_api.space import DEFAULT_LAYER_CHOICES_20, DEFAULT_LAYER_CHOICES_10
- print ("src/search_space/init_search_space.py config.MLPSP")
+ print("[Singa] src/search_space/init_search_space.py config.MLPSP")
if args.hidden_choice_len == 10:
model_cfg = MlpMacroCfg(
args.nfield,
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/mlp_api/__init__.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/eva_engine/phase1/__init__.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/search_space/mlp_api/__init__.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/model_params.py b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/mlp_api/model_params.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/model_params.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/search_space/mlp_api/model_params.py
index 7edf35e1d4..1d7e676525 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/model_params.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/mlp_api/model_params.py
@@ -16,6 +16,7 @@
# limitations under the License.
#
+
from src.search_space.core.model_params import ModelMacroCfg
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/rl_policy.py b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/mlp_api/rl_policy.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/rl_policy.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/search_space/mlp_api/rl_policy.py
index e3372525da..aa8d24d978 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/rl_policy.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/mlp_api/rl_policy.py
@@ -16,6 +16,7 @@
# limitations under the License.
#
+
from src.search_space.core.rl_policy import RLPolicyBase
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/space.py b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/mlp_api/space.py
similarity index 73%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/space.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/search_space/mlp_api/space.py
index 8336750ae2..33a9b6fb12 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/mlp_api/space.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/mlp_api/space.py
@@ -16,6 +16,7 @@
# limitations under the License.
#
+
import copy
import itertools
import random
@@ -23,6 +24,7 @@
from copy import deepcopy
from typing import Generator
+import torch
from src.common.constant import Config, CommonVars
from src.eva_engine import evaluator_register
from src.eva_engine.phase2.algo.trainer import ModelTrainer
@@ -30,6 +32,8 @@
from src.search_space.core.model_params import ModelMicroCfg, ModelMacroCfg
from src.search_space.core.space import SpaceWrapper
from src.search_space.mlp_api.model_params import MlpMacroCfg
+import torch.nn as nn
+from torch.utils.data import DataLoader
from src.query_api.interface import profile_NK_trade_off
from src.query_api.query_api_mlp import GTMLP
@@ -53,12 +57,12 @@
48, 96, 112, 144, 176, 240,
384]
-
np_dtype = {"float16": np.float16, "float32": np.float32}
# singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
singa_dtype = {"float32": tensor.float32}
+
class MlpMicroCfg(ModelMicroCfg):
@classmethod
@@ -72,6 +76,83 @@ def __init__(self, hidden_layer_list: list):
def __str__(self):
return "-".join(str(x) for x in self.hidden_layer_list)
+
+class Embedding(nn.Module):
+
+ def __init__(self, nfeat, nemb):
+ super().__init__()
+ self.embedding = nn.Embedding(nfeat, nemb)
+ nn.init.xavier_uniform_(self.embedding.weight)
+
+ def forward(self, x: dict):
+ """
+ :param x: {'id': LongTensor B*F, 'value': FloatTensor B*F}
+ :return: embeddings B*F*E
+ """
+ emb = self.embedding(x['id']) # B*F*E
+ return emb * x['value'].unsqueeze(2) # B*F*E
+
+
+class MLP(nn.Module):
+
+ def __init__(self, ninput: int, hidden_layer_list: list, dropout_rate: float, noutput: int, use_bn: bool):
+ super().__init__()
+ """
+ Args:
+ ninput: number of input feature dim
+ hidden_layer_list: [a,b,c..] each value is number of Neurons in corresponding hidden layer
+ dropout_rate: if use drop out
+ noutput: number of labels.
+ """
+
+ layers = list()
+ # 1. all hidden layers.
+ for index, layer_size in enumerate(hidden_layer_list):
+ layers.append(nn.Linear(ninput, layer_size))
+ if use_bn:
+ layers.append(nn.BatchNorm1d(layer_size))
+ layers.append(nn.ReLU())
+ layers.append(nn.Dropout(p=dropout_rate))
+ ninput = layer_size
+ # 2. last hidden layer
+ if len(hidden_layer_list) == 0:
+ last_hidden_layer_num = ninput
+ else:
+ last_hidden_layer_num = hidden_layer_list[-1]
+ layers.append(nn.Linear(last_hidden_layer_num, noutput))
+
+ # 3. generate the MLP
+ self.mlp = nn.Sequential(*layers)
+
+ self._initialize_weights()
+
+ def forward(self, x):
+ """
+ each element represents the probability of the positive class.
+ :param x: FloatTensor B*ninput
+ :return: FloatTensor B*nouput
+ """
+ return self.mlp(x)
+
+ def _initialize_weights(self, method='xavier'):
+ for m in self.modules():
+ if isinstance(m, nn.BatchNorm2d):
+ m.weight.data.fill_(1)
+ m.bias.data.zero_()
+ elif isinstance(m, nn.Linear):
+ if method == 'lecun':
+ nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='linear')
+ elif method == 'xavier':
+ nn.init.xavier_uniform_(m.weight)
+ elif method == 'he':
+ nn.init.kaiming_uniform_(m.weight)
+ # m.weight.data.normal_(0, 0.01)
+ # m.bias.data.zero_()
+
+ def reset_zero_grads(self):
+ self.zero_grad()
+
+
#### self-defined loss begin
### from autograd.py
@@ -107,11 +188,13 @@ def backward(self, dy=1.0):
dx.data *= float(dy)
return dx.data
+
def se_loss(x):
# assert x.shape == t.shape, "input and target shape different: %s, %s" % (
# x.shape, t.shape)
return SumError()(x)[0]
+
### from layer.py
class SumErrorLayer(Layer):
"""
@@ -124,6 +207,7 @@ def __init__(self):
def forward(self, x):
return se_loss(x)
+
#### self-defined loss end
class SINGADNNModel(model.Model):
@@ -131,7 +215,7 @@ class SINGADNNModel(model.Model):
def __init__(self, nfield: int, nfeat: int, nemb: int,
hidden_layer_list: list, dropout_rate: float,
noutput: int, use_bn: bool = True):
- # def __init__(self, data_size=10, perceptron_size=100, num_classes=10, layer_hidden_list=[10,10,10,10]):
+ # def __init__(self, data_size=10, perceptron_size=100, num_classes=10, layer_hidden_list=[10,10,10,10]):
super(SINGADNNModel, self).__init__()
# self.num_classes = num_classes
self.dimension = 2 # data dimension = 2
@@ -165,7 +249,7 @@ def __init__(self, nfield: int, nfeat: int, nemb: int,
self.hidden_layer_list = hidden_layer_list
# Initialize subnet mask with ones
self.subnet_mask = [np.ones(size) for size in hidden_layer_list]
-
+
def forward(self, inputs):
# print ("in space.py forward")
# print ("in space.py inputs shape: ", inputs.shape)
@@ -179,13 +263,12 @@ def forward(self, inputs):
y = self.relu(y)
y = self.linear5(y)
return y
-
+
def generate_all_ones_embedding(self):
"""
Only for the MLP
Returns:
"""
- import torch
# batch_data = torch.ones(1, self.mlp_ninput).double() # embedding
batch_data = torch.ones(1, self.nfeat).double() # one-hot
# print ("batch_data shape: ", batch_data.shape)
@@ -258,7 +341,7 @@ def create_model(pretrained=False, **kwargs):
Args:
pretrained (bool): If True, returns a pre-trained model.
-
+
Returns:
The created CNN model.
"""
@@ -269,7 +352,117 @@ def create_model(pretrained=False, **kwargs):
__all__ = ['SINGADNNModel', 'create_model']
-from torch.utils.data import DataLoader
+
+class DNNModel(torch.nn.Module):
+ """
+ Model: Deep Neural Networks
+ """
+
+ def __init__(self, nfield: int, nfeat: int, nemb: int,
+ hidden_layer_list: list, dropout_rate: float,
+ noutput: int, use_bn: bool = True):
+ """
+ Args:
+ nfield: the number of fields
+ nfeat: the number of features
+ nemb: embedding size
+ """
+ super().__init__()
+ self.nfeat = nfeat
+ self.nemb = nemb
+ self.embedding = None
+ self.mlp_ninput = nfield * nemb
+ self.mlp = MLP(self.mlp_ninput, hidden_layer_list, dropout_rate, noutput, use_bn)
+ # self.sigmoid = nn.Sigmoid()
+
+ # for weight-sharing
+ self.is_masked_subnet = False
+ self.hidden_layer_list = hidden_layer_list
+ # Initialize subnet mask with ones
+ self.subnet_mask = [torch.ones(size) for size in hidden_layer_list]
+
+ def init_embedding(self, cached_embedding=None, requires_grad=False):
+ """
+ This is slow, in filtering phase, we could enable caching here.
+ """
+ if self.embedding is None:
+ if cached_embedding is None:
+ self.embedding = Embedding(self.nfeat, self.nemb)
+ else:
+ self.embedding = cached_embedding
+
+ # in scoring process
+ # Disable gradients for all parameters in the embedding layer
+ if not requires_grad:
+ for param in self.embedding.parameters():
+ param.requires_grad = False
+
+ def generate_all_ones_embedding(self):
+ """
+ Only for the MLP
+ Returns:
+ """
+ batch_data = torch.ones(1, self.mlp_ninput).double()
+ return batch_data
+
+ def forward_wo_embedding(self, x):
+ """
+ Only used when embedding is generated outside, eg, all 1 embedding.
+ """
+ y = self.mlp(x) # B*label
+ return y.squeeze(1)
+
+ def forward(self, x):
+ """
+ :param x: {'id': LongTensor B*F, 'value': FloatTensor B*F}
+ :return: y of size B, Regression and Classification (+sigmoid)
+ """
+ if self.is_masked_subnet:
+ return self.forward_w_mask(x)
+ else:
+ x_emb = self.embedding(x) # B*F*E
+ y = self.mlp(x_emb.view(-1, self.mlp_ninput)) # B*label
+ # this is for binary classification
+ return y.squeeze(1)
+
+ def sample_subnet(self, arch_id: str, device: str):
+ # arch_id e.g., '128-128-128-128'
+ sizes = list(map(int, arch_id.split('-')))
+ self.is_masked_subnet = True
+ # randomly mask neurons in the layers.
+
+ for idx, size in enumerate(sizes):
+ # Create a mask of ones and zeros with the required length
+ mask = torch.cat([
+ torch.ones(size),
+ torch.zeros(self.hidden_layer_list[idx] - size)],
+ dim=0).to(device)
+ # Shuffle the mask to randomize which neurons are active
+ mask = mask[torch.randperm(mask.size(0))]
+ self.subnet_mask[idx] = mask
+
+ def forward_w_mask(self, x):
+ x_emb = self.embedding(x) # B*F*E
+ x_emb = x_emb.view(-1, self.mlp_ninput)
+
+ # Loop till the second last layer of the MLP
+ for idx, layer in enumerate(self.mlp.mlp[:-1]): # Exclude the last Linear layer
+ # 1. subnet_mask: idx // 4 is to map computation later => mlp later
+ # 2. unsqueeze(1): convert to 2 dimension,
+ # and then the mask is broadcasted across the row, correspond to one neuron,
+ # 3. matrix multiplication between input and the transposed weight
+ if isinstance(layer, nn.Linear):
+ weight = layer.weight * self.subnet_mask[idx // 4].unsqueeze(1)
+ x_emb = torch.nn.functional.linear(x_emb, weight, layer.bias)
+ else:
+ x_emb = layer(x_emb) # apply activation, dropout, batchnorm, etc.
+
+ # Handle the output layer
+ output_layer = self.mlp.mlp[-1]
+ y = output_layer(x_emb)
+ return y.squeeze(1)
+
+
class MlpSpace(SpaceWrapper):
def __init__(self, modelCfg: MlpMacroCfg):
super().__init__(modelCfg, Config.MLPSP)
@@ -356,7 +549,6 @@ def profiling_score_time(
else:
# get a random batch.
- import torch
batch = iter(train_loader).__next__()
target = batch['y'].type(torch.LongTensor)
batch['id'] = batch['id'].to(device)
@@ -373,33 +565,44 @@ def profiling_score_time(
hidden_layer_list=[DEFAULT_LAYER_CHOICES_20[-1]] * self.model_cfg.num_layers,
dropout_rate=0,
noutput=self.model_cfg.num_labels)
- super_net.init_embedding(requires_grad=False)
- super_net.to(device)
+ # super_net.init_embedding(requires_grad=False)
+ # super_net.to(device)
# measure score time,
score_time_begin = time.time()
- naswot_score, _ = evaluator_register[CommonVars.NAS_WOT].evaluate_wrapper(
- arch=super_net,
- device=device,
- batch_data=batch,
- batch_labels=target)
+ # naswot_score, _ = evaluator_register[CommonVars.NAS_WOT].evaluate_wrapper(
+ # arch=super_net,
+ # device=device,
+ # batch_data=batch,
+ # batch_labels=target)
+ #
+ # # re-init hte net
+ # del super_net
+ # # super_net = DNNModel(
+ # super_net = SINGADNNModel(
+ # nfield=args.nfield,
+ # nfeat=args.nfeat,
+ # nemb=args.nemb,
+ # hidden_layer_list=[DEFAULT_LAYER_CHOICES_20[-1]] * self.model_cfg.num_layers,
+ # dropout_rate=0,
+ # noutput=self.model_cfg.num_labels,
+ # use_bn=False)
+ # super_net.init_embedding(requires_grad=False)
+ # super_net.to(device)
+
+ # preprocessing
+ if isinstance(batch, torch.Tensor):
+ feature_dim = list(batch[0, :].shape)
+ # add one dimension to feature dim, [1] + [3, 32, 32] = [1, 3, 32, 32]
+ mini_batch = torch.ones([1] + feature_dim).float().to(device)
+ else:
+ # this is for the tabular data,
+ mini_batch = super_net.generate_all_ones_embedding().float().to(device)
- # re-init hte net
- del super_net
- # super_net = DNNModel(
- super_net = SINGADNNModel(
- nfield=args.nfield,
- nfeat=args.nfeat,
- nemb=args.nemb,
- hidden_layer_list=[DEFAULT_LAYER_CHOICES_20[-1]] * self.model_cfg.num_layers,
- dropout_rate=0,
- noutput=self.model_cfg.num_labels,
- use_bn=False)
- super_net.init_embedding(requires_grad=False)
- super_net.to(device)
synflow_score, _ = evaluator_register[CommonVars.PRUNE_SYNFLOW].evaluate_wrapper(
arch=super_net,
device=device,
- batch_data=batch,
+ space_name=self.name,
+ batch_data=mini_batch,
batch_labels=target)
score_time = time.time() - score_time_begin
@@ -428,8 +631,8 @@ def profiling_train_time(self, dataset: str,
hidden_layer_list=[DEFAULT_LAYER_CHOICES_20[-1]] * self.model_cfg.num_layers,
dropout_rate=0,
noutput=self.model_cfg.num_labels)
- super_net.init_embedding(requires_grad=True)
- super_net.to(device)
+ # super_net.init_embedding(requires_grad=True)
+ # super_net.to(device)
# only train for ony iteratin to evaluat the time usage.
targs = copy.deepcopy(args)
valid_auc, train_time_epoch, train_log = ModelTrainer.fully_train_arch(
@@ -459,7 +662,7 @@ def profiling(self, dataset: str,
_train_time_per_epoch = gtmlp.get_score_one_model_time("cpu")
score_time = _train_time_per_epoch
else:
- import torch
+
# get a random batch.
batch = iter(train_loader).__next__()
target = batch['y'].type(torch.LongTensor)
@@ -477,14 +680,15 @@ def profiling(self, dataset: str,
hidden_layer_list=[DEFAULT_LAYER_CHOICES_20[-1]] * self.model_cfg.num_layers,
dropout_rate=0,
noutput=self.model_cfg.num_labels)
- super_net.init_embedding(requires_grad=False)
- super_net.to(device)
+ # super_net.init_embedding(requires_grad=False)
+ # super_net.to(device)
# measure score time,
score_time_begin = time.time()
naswot_score, _ = evaluator_register[CommonVars.NAS_WOT].evaluate_wrapper(
arch=super_net,
device=device,
+ space_name=self.name,
batch_data=batch,
batch_labels=target)
@@ -499,12 +703,13 @@ def profiling(self, dataset: str,
dropout_rate=0,
noutput=self.model_cfg.num_labels,
use_bn=False)
- super_net.init_embedding(requires_grad=False)
- super_net.to(device)
+ # super_net.init_embedding(requires_grad=False)
+ # super_net.to(device)
synflow_score, _ = evaluator_register[CommonVars.PRUNE_SYNFLOW].evaluate_wrapper(
arch=super_net,
device=device,
+ space_name=self.name,
batch_data=batch,
batch_labels=target)
@@ -527,8 +732,8 @@ def profiling(self, dataset: str,
hidden_layer_list=[DEFAULT_LAYER_CHOICES_20[-1]] * self.model_cfg.num_layers,
dropout_rate=0,
noutput=self.model_cfg.num_labels)
- super_net.init_embedding(requires_grad=True)
- super_net.to(device)
+ # super_net.init_embedding(requires_grad=True)
+ # super_net.to(device)
# only train for ony iteratin to evaluat the time usage.
targs = copy.deepcopy(args)
diff --git a/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/utils/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/utils/__init__.py
new file mode 100644
index 0000000000..fe5964787e
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/utils/__init__.py
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+
+
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/utils/weight_initializers.py b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/utils/weight_initializers.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/utils/weight_initializers.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/search_space/utils/weight_initializers.py
index de1c544423..5f6634bb77 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/utils/weight_initializers.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/search_space/utils/weight_initializers.py
@@ -13,6 +13,9 @@
# limitations under the License.
# =============================================================================
+import torch.nn as nn
+
+
def init_net(net, w_type, b_type):
"""
Init network with various algorithms
@@ -43,7 +46,6 @@ def init_net(net, w_type, b_type):
else:
raise NotImplementedError(f'init_type={b_type} is not supported.')
-import torch.nn as nn
def _init_weights_vs(m):
if type(m) == nn.Linear or type(m) == nn.Conv2d:
@@ -76,3 +78,5 @@ def _init_bias_zero(m):
if type(m) == nn.Linear or type(m) == nn.Conv2d:
if m.bias is not None:
m.bias.data.fill_(.0)
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/utils/__init__.py b/examples/model_selection/Trails/internal/ml/model_selection/src/tools/__init__.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/utils/__init__.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/tools/__init__.py
index 01d7057208..42c2ac6db1 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/search_space/utils/__init__.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/tools/__init__.py
@@ -16,3 +16,4 @@
# limitations under the License.
#
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/compute.py b/examples/model_selection/Trails/internal/ml/model_selection/src/tools/compute.py
similarity index 97%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/compute.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/tools/compute.py
index 0400ef5467..b60e848e7e 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/compute.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/tools/compute.py
@@ -16,6 +16,7 @@
# limitations under the License.
#
+
# for binary insert
from typing import List
import numpy as np
@@ -92,12 +93,12 @@ def generate_global_rank(ml_data_score_dic: dict, alg_name_list: List) -> dict:
return model_new_rank_score
-def log_scale_x_array(num_points, max_minute, base=10) -> list:
+def log_scale_x_array(num_points, max_minute, base=10, min_val=1) -> list:
"""
return a list of mins in log scale distance.
"""
# Set the minimum and maximum values for the log scale
- min_val = 1 # 1 second
+ min_val = min_val # 1 second
max_val = max_minute * 60 # 1440 minutes converted to seconds
# Generate the log scale values
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/correlation.py b/examples/model_selection/Trails/internal/ml/model_selection/src/tools/correlation.py
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/correlation.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/tools/correlation.py
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/io_tools.py b/examples/model_selection/Trails/internal/ml/model_selection/src/tools/io_tools.py
similarity index 98%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/io_tools.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/tools/io_tools.py
index e657b9e04b..a66575b7b1 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/io_tools.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/tools/io_tools.py
@@ -53,8 +53,7 @@ def write_pickle(file_name, data):
if __name__ == "__main__":
- a = {1:1}
+ a = {1: 1}
write_json("./asdf.json", a)
- b = {2:2323}
+ b = {2: 2323}
write_json("./asdf.json", b)
-
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/res_measure.py b/examples/model_selection/Trails/internal/ml/model_selection/src/tools/res_measure.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/res_measure.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/tools/res_measure.py
index 93270ae31e..bc261fa54b 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/res_measure.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/tools/res_measure.py
@@ -23,6 +23,16 @@
import time
from src.tools.io_tools import write_json
import sys
+import torch
+
+
+def get_variable_memory_size(obj):
+ # If it's a PyTorch tensor and on the GPU
+ if torch.is_tensor(obj) and obj.is_cuda:
+ return obj.element_size() * obj.nelement()
+ else:
+ return sys.getsizeof(obj)
+
def print_cpu_gpu_usage(interval=1, output_file="path_to_folder", stop_event=None):
def print_usage():
@@ -72,13 +82,6 @@ def print_usage():
thread.start()
return stop_event, thread
-def get_variable_memory_size(obj):
- # If it's a PyTorch tensor and on the GPU
- import torch
- if torch.is_tensor(obj) and obj.is_cuda:
- return obj.element_size() * obj.nelement()
- else:
- return sys.getsizeof(obj)
def print_memory_usage():
# Get current process
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/utils.py b/examples/model_selection/Trails/internal/ml/model_selection/src/tools/utils.py
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/utils.py
rename to examples/model_selection/Trails/internal/ml/model_selection/src/tools/utils.py
index 8e4232caa0..c2c45a705c 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/src/tools/utils.py
+++ b/examples/model_selection/Trails/internal/ml/model_selection/src/tools/utils.py
@@ -25,9 +25,16 @@
import numpy
import numpy as np
+import torch
import shutil
import logging
+import torchvision.transforms as transforms
+from torch.autograd import Variable
+import torch.nn.functional as F
+import torchvision.datasets as dset
+import torch.nn as nn
+
warnings.filterwarnings("error")
@@ -59,7 +66,7 @@ def update(self, val, n=1):
self.count += n
self.avg = self.sum / self.count
-import torch
+
def get_correct_num(y, target):
pred_label = torch.argmax(y, dim=1)
return (target == pred_label).sum().item()
@@ -101,7 +108,7 @@ def __call__(self, img):
img *= mask
return img
-import torchvision.transforms as transforms
+
def _data_transforms_cifar10(args):
CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]
@@ -121,7 +128,7 @@ def _data_transforms_cifar10(args):
])
return train_transform, valid_transform
-import torchvision.datasets as dset
+
def _get_cifar10(args):
train_transform, valid_transform = _data_transforms_cifar10(args)
train_data = dset.CIFAR10(
@@ -342,7 +349,6 @@ def save_ckpt(ckpt, file_dir, file_name='model.ckpt', is_best=False):
def drop_path(x, drop_prob, dims=(0,)):
- from torch.autograd import Variable
var_size = [1 for _ in range(x.dim())]
for i in dims:
var_size[i] = x.size(i)
@@ -372,7 +378,6 @@ def __init__(self, path):
self.data = None
def update(self, alphas_normal, alphas_reduce, val_loss):
- import torch.nn.functional as F
a_normal = F.softmax(alphas_normal, dim=-1)
# print("alpha normal size: ", a_normal.data.size())
a_reduce = F.softmax(alphas_reduce, dim=-1)
@@ -406,7 +411,7 @@ def logger(log_dir, need_time=True, need_stdout=False):
log.addHandler(fh)
return log
-import torch.nn as nn
+
class CrossEntropyLabelSmooth(nn.Module):
def __init__(self, num_classes, epsilon):
diff --git a/examples/model_selection/Trails/internal/ml/model_slicing/README.md b/examples/model_selection/Trails/internal/ml/model_slicing/README.md
new file mode 100644
index 0000000000..44979a684b
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_slicing/README.md
@@ -0,0 +1,1093 @@
+
+
+
+# Powering In-Database Dynamic Model Slicing for Structured Data Analytics
+
+The general model based on LEADS is at [algorithm](https://github.com/Zrealshadow/SAMS/tree/f0570730563e7e05e073d5b7eaedabebe6286f56).
+
+# Envs
+
+```bash
+pip install orjson
+pip install einops
+pip install tqdm
+pip install matplotlib
+
+unset PYTHONPATH
+export PYTHONPATH=$PYTHONPATH:/project/Trails/internal/ml/
+export PYTHONPATH=$PYTHONPATH:/project/Trails/internal/ml/model_slicing/
+export PYTHONPATH=$PYTHONPATH:/project/Trails/internal/ml/model_slicing/algorithm/
+echo $PYTHONPATH
+
+
+export PYTHONPATH=$PYTHONPATH:/home/xingnaili/Trails/internal/ml/
+export PYTHONPATH=$PYTHONPATH:/home/xingnaili/Trails/internal/ml/model_slicing/
+export PYTHONPATH=$PYTHONPATH:/home/xingnaili/Trails/internal/ml/model_slicing/algorithm/
+
+
+/project/Trails/internal/ml/
+/project/Trails/internal/ml/model_slicing/algorithm:
+/project/Trails/internal/ml/model_slicing:
+
+```
+
+# Save data
+
+4 datasets are used here.
+
+```
+adult bank cvd frappe payment(credit) credit(hcdr) census diabetes
+```
+
+Save the statistics
+
+```bash
+# save the data cardinalities, run in docker
+
+# frappe
+python3 ./internal/ml/model_slicing/algorithm/save_satistics.py --dataset frappe --data_dir /hdd1/sams/data/ --nfeat 5500 --nfield 10 --max_filter_col 10 --train_dir ./
+
+# adult
+python3 ./internal/ml/model_slicing/algorithm/save_satistics.py --dataset adult --data_dir /hdd1/sams/data/ --nfeat 140 --nfield 13 --max_filter_col 13 --train_dir ./
+
+# cvd
+python3 ./internal/ml/model_slicing/algorithm/save_satistics.py --dataset cvd --data_dir /hdd1/sams/data/ --nfeat 110 --nfield 11 --max_filter_col 11 --train_dir ./
+
+# bank
+python3 ./internal/ml/model_slicing/algorithm/save_satistics.py --dataset bank --data_dir /hdd1/sams/data/ --nfeat 80 --nfield 16 --max_filter_col 16 --train_dir ./
+
+
+
+New Datasets
+# census
+python3 ./internal/ml/model_slicing/algorithm/save_satistics.py --dataset census --data_dir /hdd1/sams/data/ --nfeat 540 --nfield 41 --max_filter_col 41 --train_dir ./
+
+# Payment (credit)
+python3 ./internal/ml/model_slicing/algorithm/save_satistics.py --dataset credit --data_dir /hdd1/sams/data/ --nfeat 350 --nfield 23 --max_filter_col 23 --train_dir ./
+
+# diabetes
+python3 ./internal/ml/model_slicing/algorithm/save_satistics.py --dataset diabetes --data_dir /hdd1/sams/data/ --nfeat 850 --nfield 48 --max_filter_col 48 --train_dir ./
+
+# credit (hcdr)
+python3 ./internal/ml/model_slicing/algorithm/save_satistics.py --dataset hcdr --data_dir /hdd1/sams/data/ --nfeat 550 --nfield 69 --max_filter_col 69 --train_dir ./
+
+```
+
+# Run docker
+
+```bash
+# in server
+ssh panda17
+
+# goes to /home/xingnaili/firmest_docker/Trails
+git submodule update --recursive --remote
+
+# run container
+docker run -d --name moe_inf \
+ --network="host" \
+ -v $(pwd)/Trails:/project/Trails \
+ -v /hdd1/sams/tensor_log/:/project/tensor_log \
+ -v /hdd1/sams/data/:/project/data_all \
+ trails
+
+# Enter the docker container.
+docker exec -it moe_inf bash
+```
+
+
+
+# 12 Run in database
+
+Config the database runtime
+
+```sql
+cargo pgrx run --release
+```
+
+Load data into RDBMS
+
+```bash
+
+psql -h localhost -p 28814 -U postgres
+\l
+\c pg_extension
+\dt
+\d frappe_train
+
+
+# frappe
+bash /project/Trails/internal/ml/model_selection/scripts/database/load_data_to_db.sh /project/data_all/frappe frappe
+# frappe, only feature ids
+bash /project/Trails/internal/ml/model_selection/scripts/database/load_data_to_db_int.sh /project/data_all/frappe frappe
+
+
+# adult
+bash ./internal/ml/model_selection/scripts/database/load_data_to_db.sh /project/data_all/adult adult
+# adult, only feature ids
+bash ./internal/ml/model_selection/scripts/database/load_data_to_db_int.sh /project/data_all/adult adult
+# check type is correct or not.
+SELECT column_name, data_type, column_default, is_nullable
+FROM information_schema.columns
+WHERE table_name = 'adult_int_train';
+
+
+# cvd
+bash /project/Trails/internal/ml/model_selection/scripts/database/load_data_to_db.sh /project/data_all/cvd cvd
+# cvd, only feature ids
+bash /project/Trails/internal/ml/model_selection/scripts/database/load_data_to_db_int.sh /project/data_all/cvd cvd
+
+
+# bank
+bash /project/Trails/internal/ml/model_selection/scripts/database/load_data_to_db.sh /project/data_all/bank bank
+# bank, only feature ids
+bash /project/Trails/internal/ml/model_selection/scripts/database/load_data_to_db_int.sh /project/data_all/bank bank
+
+
+New Datasets
+
+# census
+bash /project/Trails/internal/ml/model_selection/scripts/database/load_data_to_db_int.sh /project/data_all/census census
+
+# credit
+bash /project/Trails/internal/ml/model_selection/scripts/database/load_data_to_db_int.sh /project/data_all/credit credit
+
+# hcdr
+bash /project/Trails/internal/ml/model_selection/scripts/database/load_data_to_db_int.sh /project/data_all/hcdr hcdr
+
+# diabetes
+bash /project/Trails/internal/ml/model_selection/scripts/database/load_data_to_db_int.sh /project/data_all/diabetes diabetes
+```
+
+Verify data is in the DB
+
+```sql
+# check table status
+\dt
+\d frappe_train
+SELECT * FROM frappe_train LIMIT 10;
+```
+
+Config
+
+```sql
+# after run the pgrx, then edie the sql
+# generate schema
+cargo pgrx schema >> /home/postgres/.pgrx/14.9/pgrx-install/share/extension/pg_extension--0.1.0.sql
+
+
+-- src/lib.rs:266
+-- pg_extension::model_init
+CREATE FUNCTION "model_init"(
+ "condition" TEXT, /* alloc::string::String */
+ "config_file" TEXT, /* alloc::string::String */
+ "col_cardinalities_file" TEXT, /* alloc::string::String */
+ "model_path" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'model_init_wrapper';
+
+-- src/lib.rs:242
+-- pg_extension::inference_shared_write_once_int
+CREATE FUNCTION "inference_shared_write_once_int"(
+ "dataset" TEXT, /* alloc::string::String */
+ "condition" TEXT, /* alloc::string::String */
+ "config_file" TEXT, /* alloc::string::String */
+ "col_cardinalities_file" TEXT, /* alloc::string::String */
+ "model_path" TEXT, /* alloc::string::String */
+ "sql" TEXT, /* alloc::string::String */
+ "batch_size" INT /* i32 */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'inference_shared_write_once_int_wrapper';
+
+-- src/lib.rs:219
+-- pg_extension::inference_shared_write_once
+CREATE FUNCTION "inference_shared_write_once"(
+ "dataset" TEXT, /* alloc::string::String */
+ "condition" TEXT, /* alloc::string::String */
+ "config_file" TEXT, /* alloc::string::String */
+ "col_cardinalities_file" TEXT, /* alloc::string::String */
+ "model_path" TEXT, /* alloc::string::String */
+ "sql" TEXT, /* alloc::string::String */
+ "batch_size" INT /* i32 */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'inference_shared_write_once_wrapper';
+
+-- src/lib.rs:196
+-- pg_extension::inference_shared
+CREATE FUNCTION "inference_shared"(
+ "dataset" TEXT, /* alloc::string::String */
+ "condition" TEXT, /* alloc::string::String */
+ "config_file" TEXT, /* alloc::string::String */
+ "col_cardinalities_file" TEXT, /* alloc::string::String */
+ "model_path" TEXT, /* alloc::string::String */
+ "sql" TEXT, /* alloc::string::String */
+ "batch_size" INT /* i32 */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'run_inference_shared_wrapper';
+
+-- src/lib.rs:173
+-- pg_extension::inference
+CREATE FUNCTION "inference"(
+ "dataset" TEXT, /* alloc::string::String */
+ "condition" TEXT, /* alloc::string::String */
+ "config_file" TEXT, /* alloc::string::String */
+ "col_cardinalities_file" TEXT, /* alloc::string::String */
+ "model_path" TEXT, /* alloc::string::String */
+ "sql" TEXT, /* alloc::string::String */
+ "batch_size" INT /* i32 */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'run_inference_wrapper';
+
+
+# record the necessary func above and then copy it to following
+rm /home/postgres/.pgrx/14.9/pgrx-install/share/extension/pg_extension--0.1.0.sql
+vi /home/postgres/.pgrx/14.9/pgrx-install/share/extension/pg_extension--0.1.0.sql
+
+# then drop/create extension
+DROP EXTENSION IF EXISTS pg_extension;
+CREATE EXTENSION pg_extension;
+```
+
+Examples
+
+```sql
+
+# this is database name, columns used, time budget, batch size, and config file
+SELECT count(*) FROM frappe_train WHERE col2='973:1' LIMIT 1000;
+SELECT col2, count(*) FROM frappe_train group by col2 order by count(*) desc;
+
+# query with two conditions
+SELECT inference(
+ 'frappe',
+ '{"1":266, "2":1244}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ 'WHERE col1=''266:1'' and col2=''1244:1''',
+ 32
+);
+
+# query with 1 conditions
+SELECT inference(
+ 'frappe',
+ '{"2":977}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ 'WHERE col2=''977:1''',
+ 10000
+);
+
+# query with no conditions
+SELECT inference(
+ 'frappe',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ '',
+ 8000
+);
+
+# explaination
+EXPLAIN (ANALYZE, BUFFERS) SELECT inference(
+ 'frappe',
+ '{"2":977}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ 'WHERE col2=''977:1''',
+ 8000
+);
+
+
+```
+
+# Clear cache
+
+```sql
+DISCARD ALL;
+```
+
+# Benchmark Latency over all datasets
+
+## Adult
+
+```sql
+SELECT inference(
+ 'adult',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/adult_col_cardinalities',
+ '/project/tensor_log/adult/Ednn_K16_alpha2-5',
+ '',
+ 10000
+);
+
+
+# exps
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/adult_col_cardinalities',
+ '/project/tensor_log/adult/Ednn_K16_alpha2-5',
+);
+SELECT inference(
+ 'adult',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/adult_col_cardinalities',
+ '/project/tensor_log/adult/Ednn_K16_alpha2-5',
+ '',
+ 10000
+);
+
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/adult_col_cardinalities',
+ '/project/tensor_log/adult/Ednn_K16_alpha2-5'
+);
+SELECT inference_shared_write_once(
+ 'adult',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/adult_col_cardinalities',
+ '/project/tensor_log/adult/Ednn_K16_alpha2-5',
+ '',
+ 100000
+);
+
+# replicate data
+INSERT INTO adult_train (label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13)
+SELECT label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13
+FROM adult_train;
+
+INSERT INTO adult_int_train (label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13)
+SELECT label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13
+FROM adult_int_train;
+```
+
+## Frappe
+
+```sql
+SELECT inference(
+ 'frappe',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ '',
+ 10000
+);
+
+SELECT inference(
+ 'frappe',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ '',
+ 20000
+);
+
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4'
+);
+SELECT inference(
+ 'frappe',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ '',
+ 10000
+);
+
+
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4'
+);
+SELECT inference_shared_write_once(
+ 'frappe',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ '',
+ 100000
+);
+
+
+SELECT inference_shared(
+ 'frappe',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ '',
+ 40000
+);
+
+
+
+SELECT inference(
+ 'frappe',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ '',
+ 80000
+);
+
+
+SELECT inference(
+ 'frappe',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ '',
+ 160000
+);
+
+# replicate data
+INSERT INTO frappe_train (label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10)
+SELECT label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10
+FROM frappe_train;
+
+
+INSERT INTO frappe_int_train (label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10)
+SELECT label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10
+FROM frappe_int_train;
+```
+
+## CVD
+
+```sql
+SELECT inference(
+ 'cvd',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/cvd_col_cardinalities',
+ '/project/tensor_log/cvd/dnn_K16_alpha2-5',
+ '',
+ 10000
+);
+
+# exps
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/cvd_col_cardinalities',
+ '/project/tensor_log/cvd/dnn_K16_alpha2-5',
+);
+SELECT inference(
+ 'cvd',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/cvd_col_cardinalities',
+ '/project/tensor_log/cvd/dnn_K16_alpha2-5',
+ '',
+ 10000
+);
+
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/cvd_col_cardinalities',
+ '/project/tensor_log/cvd/dnn_K16_alpha2-5'
+);
+SELECT inference_shared_write_once(
+ 'cvd',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/cvd_col_cardinalities',
+ '/project/tensor_log/cvd/dnn_K16_alpha2-5',
+ '',
+ 100000
+);
+
+
+# replicate data
+INSERT INTO cvd_train (label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11)
+SELECT label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11
+FROM cvd_train;
+
+INSERT INTO cvd_int_train (label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11)
+SELECT label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11
+FROM cvd_int_train;
+
+```
+
+## Bank
+
+```sql
+SELECT inference(
+ 'bank',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/bank_col_cardinalities',
+ '/project/tensor_log/bank/dnn_K16_alpha2-3_beta1e-3',
+ '',
+ 10000
+);
+
+
+# exps
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/bank_col_cardinalities',
+ '/project/tensor_log/bank/dnn_K16_alpha2-3_beta1e-3',
+);
+SELECT inference(
+ 'bank',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/data/bank_col_cardinalities',
+ '/project/tensor_log/bank/dnn_K16_alpha2-3_beta1e-3',
+ '',
+ 10000
+);
+
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/bank_col_cardinalities',
+ '/project/tensor_log/bank/dnn_K16_alpha2-3_beta1e-3'
+);
+SELECT inference_shared_write_once(
+ 'bank',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/bank_col_cardinalities',
+ '/project/tensor_log/bank/dnn_K16_alpha2-3_beta1e-3',
+ '',
+ 100000
+);
+
+
+# replicate data
+INSERT INTO bank_train (label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15, col16)
+SELECT label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15, col16
+FROM bank_train;
+
+
+INSERT INTO bank_int_train (label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15, col16)
+SELECT label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15, col16
+FROM bank_int_train;
+
+```
+
+## Census
+
+```sql
+# replicate data
+INSERT INTO census_int_train (label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15, col16,col17,col18,col19,col20,col21,col22,col23,col24,col25,col26,col27,col28,col29,col30,col31,col32,col33,col34,col35,col36,col37,col38,col39,col40,col41)
+SELECT label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15, col16,col17,col18,col19,col20,col21,col22,col23,col24,col25,col26,col27,col28,col29,col30,col31,col32,col33,col34,col35,col36,col37,col38,col39,col40,col41
+FROM census_int_train;
+
+
+```
+
+## Credit
+
+```sql
+# replicate data
+INSERT INTO credit_int_train (label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15, col16,col17,col18,col19,col20,col21,col22,col23)
+SELECT label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15, col16,col17,col18,col19,col20,col21,col22,col23
+FROM credit_int_train;
+
+
+
+```
+
+## Diabetes
+
+```sql
+# replicate data
+INSERT INTO diabetes_int_train (label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15, col16,col17,col18,col19,col20,col21,col22,col23,col24,col25,col26,col27,col28,col29,col30,col31,col32,col33,col34,col35,col36,col37,col38,col39,col40,col41,col42,col43,col44,col45,col46,col47,col48)
+SELECT label, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15, col16,col17,col18,col19,col20,col21,col22,col23,col24,col25,col26,col27,col28,col29,col30,col31,col32,col33,col34,col35,col36,col37,col38,col39,col40,col41,col42,col43,col44,col45,col46,col47,col48
+FROM diabetes_int_train;
+
+
+
+```
+
+## Hcdr
+
+```sql
+# replicate data
+INSERT INTO hcdr_int_train (label,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12,col13,col14,col15,col16,col17,col18,col19,col20,col21,col22,col23,col24,col25,col26,col27,col28,col29,col30,col31,col32,col33,col34,col35,col36,col37,col38,col39,col40,col41,col42,col43,col44,col45,col46,col47,col48,col49,col50,col51,col52,col53,col54,col55,col56,col57,col58,col59,col60,col61,col62,col63,col64,col65,col66,col67,col68,col69)
+SELECT label,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12,col13,col14,col15,col16,col17,col18,col19,col20,col21,col22,col23,col24,col25,col26,col27,col28,col29,col30,col31,col32,col33,col34,col35,col36,col37,col38,col39,col40,col41,col42,col43,col44,col45,col46,col47,col48,col49,col50,col51,col52,col53,col54,col55,col56,col57,col58,col59,col60,col61,col62,col63,col64,col65,col66,col67,col68,col69
+FROM hcdr_int_train;
+
+
+```
+
+
+
+# Baseline System & SAMS
+
+## Frappe
+
+```bash
+# frappe
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/algorithm/baseline.py /hdd1/sams/tensor_log/frappe/dnn_K16_alpha4 --device cpu --dataset frappe --batch_size 10 --col_cardinalities_file data/frappe_col_cardinalities --target_batch 10
+
+
+CUDA_VISIBLE_DEVICES="0" python ./internal/ml/model_slicing/algorithm/baseline.py /hdd1/sams/tensor_log/frappe/dnn_K16_alpha4 --device cuda:0 --dataset frappe --batch_size 100000 --col_cardinalities_file data/frappe_col_cardinalities --target_batch 100000
+
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/algorithm/baseline_int.py /hdd1/sams/tensor_log/frappe/dnn_K16_alpha4 --device cpu --dataset frappe --batch_size 100000 --col_cardinalities_file data/frappe_col_cardinalities --target_batch 100000
+
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4'
+);
+SELECT inference_shared_write_once(
+ 'frappe',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ '',
+ 100000
+);
+
+# read int data
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4'
+);
+SELECT inference_shared_write_once_int(
+ 'frappe',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ '',
+ 100000
+);
+```
+
+## Adult
+
+```bash
+
+# adult
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/algorithm/baseline.py /hdd1/sams/tensor_log/adult/Ednn_K16_alpha2-5 --device cpu --dataset adult --batch_size 100000 --col_cardinalities_file data/adult_col_cardinalities --target_batch 100000
+
+CUDA_VISIBLE_DEVICES="0" python ./internal/ml/model_slicing/algorithm/baseline.py /hdd1/sams/tensor_log/adult/Ednn_K16_alpha2-5 --device cuda:0 --dataset adult --batch_size 100000 --col_cardinalities_file data/adult_col_cardinalities --target_batch 100000
+
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/algorithm/baseline_int.py /hdd1/sams/tensor_log/adult/Ednn_K16_alpha2-5 --device cpu --dataset adult --batch_size 100000 --col_cardinalities_file data/adult_col_cardinalities --target_batch 100000
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/adult_col_cardinalities',
+ '/project/tensor_log/adult/Ednn_K16_alpha2-5'
+);
+SELECT inference_shared_write_once(
+ 'adult',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/adult_col_cardinalities',
+ '/project/tensor_log/adult/Ednn_K16_alpha2-5',
+ '',
+ 100000
+);
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/adult_col_cardinalities',
+ '/project/tensor_log/adult/Ednn_K16_alpha2-5'
+);
+SELECT inference_shared_write_once_int(
+ 'adult',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/adult_col_cardinalities',
+ '/project/tensor_log/adult/Ednn_K16_alpha2-5',
+ '',
+ 640000
+);
+```
+
+## CVD
+```bash
+# CVD
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/algorithm/baseline.py /hdd1/sams/tensor_log/cvd/dnn_K16_alpha2-5 --device cpu --dataset cvd --batch_size 100000 --col_cardinalities_file data/cvd_col_cardinalities --target_batch 100000
+
+CUDA_VISIBLE_DEVICES="0" python ./internal/ml/model_slicing/algorithm/baseline.py /hdd1/sams/tensor_log/cvd/dnn_K16_alpha2-5 --device cuda:0 --dataset cvd --batch_size 100000 --col_cardinalities_file data/cvd_col_cardinalities --target_batch 100000
+
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/algorithm/baseline_int.py /hdd1/sams/tensor_log/cvd/dnn_K16_alpha2-5 --device cpu --dataset cvd --batch_size 100000 --col_cardinalities_file data/cvd_col_cardinalities --target_batch 100000
+
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/cvd_col_cardinalities',
+ '/project/tensor_log/cvd/dnn_K16_alpha2-5'
+);
+SELECT inference_shared_write_once(
+ 'cvd',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/cvd_col_cardinalities',
+ '/project/tensor_log/cvd/dnn_K16_alpha2-5',
+ '',
+ 100000
+);
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/cvd_col_cardinalities',
+ '/project/tensor_log/cvd/dnn_K16_alpha2-5'
+);
+SELECT inference_shared_write_once_int(
+ 'cvd',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/cvd_col_cardinalities',
+ '/project/tensor_log/cvd/dnn_K16_alpha2-5',
+ '',
+ 100000
+);
+```
+
+## Bank
+
+```bash
+# Bank
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/algorithm/baseline.py /hdd1/sams/tensor_log/bank/dnn_K16_alpha2-3_beta1e-3 --device cpu --dataset bank --batch_size 100000 --col_cardinalities_file data/bank_col_cardinalities --target_batch 100000
+
+CUDA_VISIBLE_DEVICES="0" python ./internal/ml/model_slicing/algorithm/baseline.py /hdd1/sams/tensor_log/bank/dnn_K16_alpha2-3_beta1e-3 --device cuda:0 --dataset bank --batch_size 100000 --col_cardinalities_file data/bank_col_cardinalities --target_batch 100000
+
+
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/algorithm/baseline_int.py /hdd1/sams/tensor_log/bank/dnn_K16_alpha2-3_beta1e-3 --device cpu --dataset bank --batch_size 100000 --col_cardinalities_file data/bank_col_cardinalities --target_batch 100000
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/bank_col_cardinalities',
+ '/project/tensor_log/bank/dnn_K16_alpha2-3_beta1e-3'
+);
+SELECT inference_shared_write_once(
+ 'bank',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/bank_col_cardinalities',
+ '/project/tensor_log/bank/dnn_K16_alpha2-3_beta1e-3',
+ '',
+ 100000
+);
+
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/bank_col_cardinalities',
+ '/project/tensor_log/bank/dnn_K16_alpha2-3_beta1e-3'
+);
+SELECT inference_shared_write_once_int(
+ 'bank',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/bank_col_cardinalities',
+ '/project/tensor_log/bank/dnn_K16_alpha2-3_beta1e-3',
+ '',
+ 100000
+);
+
+
+```
+
+## Census
+
+```sql
+# Bank
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/baseline_int.py /hdd1/sams/tensor_log/census/dnn_K16 --device cpu --dataset census --batch_size 100000 --col_cardinalities_file ./internal/ml/model_slicing/data/census_col_cardinalities --target_batch 100000
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/internal/ml/model_slicing/data/census_col_cardinalities',
+ '/project/tensor_log/census/dnn_K16'
+);
+SELECT inference_shared_write_once_int(
+ 'census',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/internal/ml/model_slicing/data/census_col_cardinalities',
+ '/project/tensor_log/census/dnn_K16',
+ '',
+ 100000
+);
+```
+
+## Credit
+
+```sql
+# Bank
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/baseline_int.py /hdd1/sams/tensor_log/credit/dnn_K16_epoch50 --device cpu --dataset credit --batch_size 100000 --col_cardinalities_file ./internal/ml/model_slicing/data/credit_col_cardinalities --target_batch 100000
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/internal/ml/model_slicing/data/credit_col_cardinalities',
+ '/project/tensor_log/credit/dnn_K16_epoch50'
+);
+SELECT inference_shared_write_once_int(
+ 'credit',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/internal/ml/model_slicing/data/credit_col_cardinalities',
+ '/project/tensor_log/credit/dnn_K16_epoch50',
+ '',
+ 100000
+);
+```
+
+## Diabetes
+
+```sql
+# Bank
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/baseline_int.py /hdd1/sams/tensor_log/diabetes/dnn_K16_epoch50 --device cpu --dataset diabetes --batch_size 100000 --col_cardinalities_file ./internal/ml/model_slicing/data/diabetes_col_cardinalities --target_batch 100000
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/internal/ml/model_slicing/data/diabetes_col_cardinalities',
+ '/project/tensor_log/diabetes/dnn_K16_epoch50'
+);
+SELECT inference_shared_write_once_int(
+ 'diabetes',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/internal/ml/model_slicing/data/diabetes_col_cardinalities',
+ '/project/tensor_log/diabetes/dnn_K16_epoch50',
+ '',
+ 100000
+);
+```
+
+## Hcdr
+
+```sql
+# Bank
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/baseline_int.py /hdd1/sams/tensor_log/hcdr/dnn_K16 --device cpu --dataset hcdr --batch_size 100000 --col_cardinalities_file ./internal/ml/model_slicing/data/hcdr_col_cardinalities --target_batch 100000
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/internal/ml/model_slicing/data/hcdr_col_cardinalities',
+ '/project/tensor_log/hcdr/dnn_K16'
+);
+SELECT inference_shared_write_once_int(
+ 'hcdr',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/internal/ml/model_slicing/data/hcdr_col_cardinalities',
+ '/project/tensor_log/hcdr/dnn_K16',
+ '',
+ 100000
+);
+```
+
+# Data Scale
+
+```sql
+# Bank
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/baseline_int.py /hdd1/sams/tensor_log/credit/dnn_K16_epoch50 --device cpu --dataset credit --batch_size 640000 --col_cardinalities_file ./internal/ml/model_slicing/data/credit_col_cardinalities --target_batch 640000
+
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/internal/ml/model_slicing/data/credit_col_cardinalities',
+ '/project/tensor_log/credit/dnn_K16_epoch50'
+);
+SELECT inference_shared_write_once_int(
+ 'credit',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/internal/ml/model_slicing/data/credit_col_cardinalities',
+ '/project/tensor_log/credit/dnn_K16_epoch50',
+ '',
+ 640000
+);
+```
+
+# Micro
+
+## Profiling
+
+```bash
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/algorithm/baseline.py /hdd1/sams/tensor_log/frappe/dnn_K16_alpha4 --device cpu --dataset frappe --batch_size 20000 --col_cardinalities_file frappe_col_cardinalities --target_batch 20000`
+```
+
+## Optimizations
+
+```bash
+
+# 1. with all opt
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4'
+);
+SELECT inference_shared_write_once(
+ 'frappe',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ '',
+ 100000
+);
+
+# 2. w/o model cache
+SELECT inference_shared_write_once(
+ 'frappe',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ '',
+ 100000
+);
+
+# 3. w/o shared memory
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4'
+);
+SELECT inference(
+ 'frappe',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/frappe_col_cardinalities',
+ '/project/tensor_log/frappe/dnn_K16_alpha4',
+ '',
+ 100000
+);
+
+# w/o SPI this can measure the time usage for not using spi
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/algorithm/baseline.py /hdd1/sams/tensor_log/frappe/dnn_K16_alpha4 --device cpu --dataset frappe --batch_size 100000 --col_cardinalities_file frappe_col_cardinalities --target_batch 100000
+```
+
+Int dataset
+
+```bash
+
+# 1. with all opt
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/internal/ml/model_slicing/data/credit_col_cardinalities',
+ '/project/tensor_log/credit/dnn_K16_epoch50'
+);
+SELECT inference_shared_write_once_int(
+ 'credit',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/internal/ml/model_slicing/data/credit_col_cardinalities',
+ '/project/tensor_log/credit/dnn_K16_epoch50',
+ '',
+ 100000
+);
+
+# 2. w/o model cache
+SELECT inference_shared_write_once_int(
+ 'credit',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/internal/ml/model_slicing/data/credit_col_cardinalities',
+ '/project/tensor_log/credit/dnn_K16_epoch50',
+ '',
+ 100000
+);
+
+# 3. w/o shared memory
+SELECT model_init(
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/internal/ml/model_slicing/data/credit_col_cardinalities',
+ '/project/tensor_log/credit/dnn_K16_epoch50'
+);
+SELECT inference(
+ 'credit',
+ '{}',
+ '/project/Trails/internal/ml/model_selection/config.ini',
+ '/project/Trails/internal/ml/model_slicing/data/credit_col_cardinalities',
+ '/project/tensor_log/credit/dnn_K16_epoch50',
+ '',
+ 100000
+);
+
+# w/o SPI this can measure the time usage for not using spi
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/algorithm/baseline.py /hdd1/sams/tensor_log/frappe/dnn_K16_alpha4 --device cpu --dataset frappe --batch_size 100000 --col_cardinalities_file frappe_col_cardinalities --target_batch 100000
+
+CUDA_VISIBLE_DEVICES=-1 python ./internal/ml/model_slicing/baseline_int.py /hdd1/sams/tensor_log/credit/dnn_K16_epoch50 --device cpu --dataset credit --batch_size 100000 --col_cardinalities_file ./internal/ml/model_slicing/data/credit_col_cardinalities --target_batch 100000
+```
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/examples/model_selection/Trails/internal/ml/model_slicing/baseline_int.py b/examples/model_selection/Trails/internal/ml/model_slicing/baseline_int.py
new file mode 100644
index 0000000000..e10fd7a148
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_slicing/baseline_int.py
@@ -0,0 +1,206 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import os
+import torch
+import argparse
+from model_slicing.algorithm.src.model.sparsemax_verticalMoe import SliceModel, SparseMax_VerticalSAMS
+import time
+import psycopg2
+from model_slicing.algorithm.src.model.factory import initialize_model
+from typing import Any, List, Dict, Tuple
+import json
+
+USER = "postgres"
+HOST = "localhost"
+PORT = "28814"
+DB_NAME = "pg_extension"
+PASSWOD = "1234"
+
+time_dict = {
+ "load_model": 0,
+ "data_query_time": 0,
+ "py_conver_to_tensor": 0,
+ "tensor_to_gpu": 0,
+ "py_compute": 0
+
+}
+
+
+def read_json(file_name):
+ print(f"Loading {file_name}...")
+ is_exist = os.path.exists(file_name)
+ if is_exist:
+ with open(file_name, 'r') as readfile:
+ data = json.load(readfile)
+ return data
+ else:
+ print(f"{file_name} is not exist")
+ return {}
+
+
+def fetch_and_preprocess(conn, batch_size, database):
+ cur = conn.cursor()
+ # Select rows greater than last_id
+ cur.execute(f"SELECT * FROM {database}_int_train LIMIT {batch_size}")
+ rows = cur.fetchall()
+ return rows
+
+
+def pre_processing(mini_batch_data: List[Tuple]):
+ """
+ mini_batch_data: [('0', '0', '123:123', '123:123', '123:123',)
+ """
+ feat_id = torch.LongTensor(mini_batch_data)
+ print("feat_id = ", feat_id[:, 2:].size())
+ return {'id': feat_id[:, 2:]}
+
+
+def fetch_data(database, batch_size):
+ global time_dict
+ print("Data fetching ....")
+ begin_time = time.time()
+ with psycopg2.connect(database=DB_NAME, user=USER, host=HOST, port=PORT) as conn:
+ rows = fetch_and_preprocess(conn, batch_size, database)
+ time_dict["data_query_time"] += time.time() - begin_time
+ print(f"Data fetching done {rows[0]}, size = {len(rows)}, type = {type(rows)}, {type(rows[0])}")
+
+ print("Data preprocessing ....")
+ begin_time = time.time()
+ batch = pre_processing(rows)
+ time_dict["py_conver_to_tensor"] += time.time() - begin_time
+ print("Data preprocessing done")
+ return batch
+
+
+def load_model(tensorboard_path: str, device: str = "cuda"):
+ """
+ Args:
+ tensorboard_path: the path of the directory of tensorboard
+ """
+ arg_file_path = os.path.join(tensorboard_path, "args.txt")
+ model_config = reload_argparse(arg_file_path)
+
+ net = initialize_model(model_config)
+
+ model_pth_path = os.path.join(tensorboard_path, "best_model.pth")
+ saved_state_dict = torch.load(model_pth_path, map_location=device)
+
+ net.load_state_dict(saved_state_dict)
+ print("successfully load model")
+ return net, model_config
+
+
+def if_cuda_avaiable(device):
+ if "cuda" in device:
+ return True
+ else:
+ return False
+
+
+def reload_argparse(file_path: str):
+ d = {}
+
+ with open(file_path, 'r', encoding='utf-8') as f:
+ for line in f.readlines():
+ key, value = line.strip('\n').split(',')
+ # print(f"{key}, {value}\n")
+ try:
+ re = eval(value)
+ except:
+ re = value
+ d[key] = re
+
+ return argparse.Namespace(**d)
+
+
+parser = argparse.ArgumentParser(description='predict FLOPS')
+parser.add_argument('path', type=str,
+ help="directory to model file")
+parser.add_argument('--flag', '-p', action='store_true',
+ help="wehther to print profile")
+parser.add_argument('--print_net', '--b', action='store_true',
+ help="print the structure of network")
+
+parser.add_argument('--device', type=str, default="cuda")
+parser.add_argument('--dataset', type=str, default="frappe")
+parser.add_argument('--target_batch', type=int, default=10000)
+parser.add_argument('--batch_size', type=int, default=10000)
+parser.add_argument('--col_cardinalities_file', type=str, default="path to the stored file")
+
+if __name__ == '__main__':
+ args = parser.parse_args()
+ path = args.path
+ flag = args.flag
+ device = torch.device(args.device)
+ print(path)
+ load_time = time.time()
+ net, config = load_model(path, args.device)
+ net: SparseMax_VerticalSAMS = net
+ config.workload = 'random'
+ time_dict["load_model"] = time.time() - load_time
+
+ print(config.workload)
+
+ overall_query_latency = time.time()
+ if config.net == "sparsemax_vertical_sams":
+ alpha = net.sparsemax.alpha
+ print(alpha)
+
+ print()
+
+ col_cardinalities = read_json(args.col_cardinalities_file)
+ target_sql = torch.tensor([col[-1] for col in col_cardinalities]).reshape(1, -1)
+
+ net.eval()
+ net = net.to(device)
+ with torch.no_grad():
+ sql = target_sql.to(device)
+ if config.net == "sparsemax_vertical_sams":
+ subnet: SliceModel = net.tailor_by_sql(sql)
+ subnet.to(device)
+ else:
+ subnet = net
+ subnet.eval()
+ target_list, y_list = [], []
+ ops = 0
+
+ # default batch to 1024
+ num_ite = args.target_batch // args.batch_size
+ print(f"num_ite = {num_ite}")
+ for i in range(num_ite):
+ # fetch from db
+ data_batch = fetch_data(args.dataset, args.batch_size)
+ print("Copy to device")
+ # wait for moving data to GPU
+ begin = time.time()
+ x_id = data_batch['id'].to(device)
+ if if_cuda_avaiable(args.device):
+ torch.cuda.synchronize()
+ time_dict["tensor_to_gpu"] += time.time() - begin
+
+ print(f"begin to compute on {args.device}, is_cuda = {if_cuda_avaiable(args.device)}")
+ # compute
+ begin = time.time()
+ y = subnet(x_id, None)
+ if if_cuda_avaiable(args.device):
+ torch.cuda.synchronize()
+ time_dict["py_compute"] += time.time() - begin
+ time_dict["overall_query_latency"] = time.time() - overall_query_latency
+ print(time_dict)
diff --git a/examples/model_selection/Trails/internal/ml/model_slicing/data/adult_col_cardinalities b/examples/model_selection/Trails/internal/ml/model_slicing/data/adult_col_cardinalities
new file mode 100755
index 0000000000..5aa7be266c
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_slicing/data/adult_col_cardinalities
@@ -0,0 +1 @@
+[[1, 2, 3, 4, 5, 6, 140], [7, 8, 9, 10, 11, 12, 13, 141], [14, 15, 16, 17, 18, 19, 142], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 143], [36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 144], [52, 53, 54, 55, 56, 57, 58, 145], [59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 146], [73, 74, 75, 76, 77, 78, 147], [79, 80, 81, 82, 83, 148], [84, 85, 149], [86, 87, 88, 89, 150], [90, 91, 92, 93, 94, 95, 96, 97, 98, 151], [99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 133, 134, 135, 136, 137, 138, 139, 152]]
\ No newline at end of file
diff --git a/examples/model_selection/Trails/internal/ml/model_slicing/data/bank_col_cardinalities b/examples/model_selection/Trails/internal/ml/model_slicing/data/bank_col_cardinalities
new file mode 100644
index 0000000000..2d876c271a
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_slicing/data/bank_col_cardinalities
@@ -0,0 +1 @@
+[[1, 2, 3, 4, 5, 75], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 76], [18, 19, 20, 77], [21, 22, 23, 24, 78], [25, 26, 79], [27, 28, 29, 30, 31, 80], [32, 33, 81], [34, 35, 82], [36, 37, 38, 83], [39, 40, 41, 42, 84], [43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 85], [55, 56, 57, 58, 86], [59, 60, 61, 62, 87], [63, 64, 65, 66, 67, 88], [68, 69, 70, 89], [71, 72, 73, 74, 90]]
\ No newline at end of file
diff --git a/examples/model_selection/Trails/internal/ml/model_slicing/data/census_col_cardinalities b/examples/model_selection/Trails/internal/ml/model_slicing/data/census_col_cardinalities
new file mode 100755
index 0000000000..ecacf07efc
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_slicing/data/census_col_cardinalities
@@ -0,0 +1 @@
+[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 532], [46, 47, 48, 49, 50, 51, 52, 53, 54, 533], [55, 56, 57, 58, 59, 60, 534], [61, 62, 535], [63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 536], [80, 81, 82, 83, 84, 85, 537], [86, 87, 88, 538], [89, 90, 91, 92, 93, 94, 95, 539], [96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 540], [120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 541], [135, 136, 137, 138, 139, 542], [140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 543], [150, 151, 544], [152, 153, 154, 545], [155, 156, 157, 158, 159, 160, 546], [161, 162, 163, 164, 165, 166, 167, 168, 547], [169, 170, 171, 172, 173, 174, 175, 548], [176, 177, 178, 179, 180, 181, 182, 549], [183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 550], [200, 201, 202, 203, 204, 205, 551], [206, 207, 208, 209, 210, 211, 552], [212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 553], [263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 554], [301, 302, 303, 304, 305, 306, 307, 308, 555], [309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 556], [320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 557], [330, 331, 332, 333, 334, 335, 336, 337, 338, 558], [339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 559], [349, 350, 351, 560], [352, 353, 354, 355, 561], [356, 357, 358, 359, 360, 361, 362, 562], [363, 364, 365, 366, 367, 563], [368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 564], [411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 565], [454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 566], [497, 498, 499, 500, 501, 567], [502, 503, 504, 568], [505, 506, 507, 569], [508, 509, 510, 570], [511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 571], [530, 531, 572]]
\ No newline at end of file
diff --git a/examples/model_selection/Trails/internal/ml/model_slicing/data/credit_col_cardinalities b/examples/model_selection/Trails/internal/ml/model_slicing/data/credit_col_cardinalities
new file mode 100755
index 0000000000..0e8246bf81
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_slicing/data/credit_col_cardinalities
@@ -0,0 +1 @@
+[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 348], [81, 82, 349], [83, 84, 85, 86, 87, 88, 89, 350], [90, 91, 92, 93, 351], [94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 352], [150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 353], [161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 354], [172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 355], [183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 356], [194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 357], [204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 358], [214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 359], [226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 360], [237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 361], [247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 362], [260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 363], [273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 364], [283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 365], [294, 295, 296, 297, 298, 299, 300, 301, 302, 366], [303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 367], [314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 368], [325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 369], [337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 370]]
\ No newline at end of file
diff --git a/examples/model_selection/Trails/internal/ml/model_slicing/data/cvd_col_cardinalities b/examples/model_selection/Trails/internal/ml/model_slicing/data/cvd_col_cardinalities
new file mode 100644
index 0000000000..1464dfe8f0
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_slicing/data/cvd_col_cardinalities
@@ -0,0 +1 @@
+[[1, 2, 102], [3, 4, 5, 6, 7, 8, 9, 103], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 104], [33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 105], [49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 106], [64, 65, 66, 107], [67, 68, 69, 108], [70, 71, 109], [72, 73, 110], [74, 75, 111], [76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 112]]
\ No newline at end of file
diff --git a/examples/model_selection/Trails/internal/ml/model_slicing/data/diabetes_col_cardinalities b/examples/model_selection/Trails/internal/ml/model_slicing/data/diabetes_col_cardinalities
new file mode 100755
index 0000000000..1b4f12cea8
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_slicing/data/diabetes_col_cardinalities
@@ -0,0 +1 @@
+[[0, 1, 833], [2, 3, 4, 5, 6, 7, 834], [8, 9, 10, 835], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 836], [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 837], [31, 32, 33, 34, 35, 36, 37, 38, 838], [39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 839], [65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 840], [82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 841], [96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 842], [114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 155, 156, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 179, 180, 181, 182, 183, 184, 185, 186, 843], [187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 844], [305, 306, 307, 308, 309, 310, 311, 845], [312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 846], [387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 847], [426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 439, 440, 441, 442, 443, 444, 446, 447, 449, 451, 452, 453, 454, 456, 457, 458, 848], [459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 473, 474, 475, 476, 477, 478, 479, 849], [480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 850], [571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 851], [651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 852], [734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 853], [750, 751, 752, 753, 854], [754, 755, 756, 757, 855], [758, 759, 760, 761, 856], [762, 763, 764, 765, 857], [766, 767, 768, 769, 858], [770, 772, 773, 859], [774, 775, 776, 777, 860], [778, 861], [780, 781, 782, 783, 862], [784, 785, 786, 787, 863], [788, 789, 864], [790, 791, 792, 793, 865], [794, 795, 796, 797, 866], [798, 799, 800, 801, 867], [802, 803, 804, 805, 868], [806, 807, 869], [808, 810, 870], [811, 871], [812, 872], [813, 814, 815, 816, 873], [817, 818, 819, 820, 874], [821, 822, 875], [823, 824, 876], [825, 826, 877], [827, 828, 878], [829, 830, 879], [831, 832, 880]]
\ No newline at end of file
diff --git a/examples/model_selection/Trails/internal/ml/model_slicing/data/frappe_col_cardinalities b/examples/model_selection/Trails/internal/ml/model_slicing/data/frappe_col_cardinalities
new file mode 100755
index 0000000000..219e664784
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_slicing/data/frappe_col_cardinalities
@@ -0,0 +1 @@
+[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 5382], [957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1386, 1387, 1388, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1401, 1402, 1403, 1404, 1405, 1406, 1407, 1408, 1409, 1410, 1411, 1412, 1413, 1414, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423, 1424, 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 1481, 1482, 1483, 1484, 1485, 1486, 1487, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503, 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, 1515, 1516, 1517, 1518, 1519, 1520, 1521, 1522, 1523, 1524, 1525, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1533, 1534, 1535, 1536, 1537, 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1553, 1554, 1555, 1556, 1557, 1558, 1559, 1560, 1561, 1562, 1563, 1564, 1565, 1566, 1567, 1568, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, 1595, 1596, 1597, 1598, 1599, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, 1647, 1648, 1649, 1650, 1651, 1652, 1653, 1654, 1655, 1656, 1657, 1658, 1659, 1660, 1661, 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, 1670, 1671, 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, 1686, 1687, 1688, 1689, 1690, 1691, 1692, 1693, 1694, 1695, 1696, 1697, 1698, 1699, 1700, 1701, 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, 1718, 1719, 1720, 1721, 1722, 1723, 1724, 1725, 1726, 1727, 1728, 1729, 1730, 1731, 1732, 1733, 1734, 1735, 1736, 1737, 1738, 1739, 1740, 1741, 1742, 1743, 1744, 1745, 1746, 1747, 1748, 1749, 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1757, 1758, 1759, 1760, 1761, 1762, 1763, 1764, 1765, 1766, 1767, 1768, 1769, 1770, 1771, 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1782, 1783, 1784, 1785, 1786, 1787, 1788, 1789, 1790, 1791, 1792, 1793, 1794, 1795, 1796, 1797, 1798, 1799, 1800, 1801, 1802, 1803, 1804, 1805, 1806, 1807, 1808, 1809, 1810, 1811, 1812, 1813, 1814, 1815, 1816, 1817, 1818, 1819, 1820, 1821, 1822, 1823, 1824, 1825, 1826, 1827, 1828, 1829, 1830, 1831, 1832, 1833, 1834, 1835, 1836, 1837, 1838, 1839, 1840, 1841, 1842, 1843, 1844, 1845, 1846, 1847, 1848, 1849, 1850, 1851, 1852, 1853, 1854, 1855, 1856, 1857, 1858, 1859, 1860, 1861, 1862, 1863, 1864, 1865, 1866, 1867, 1868, 1869, 1870, 1871, 1872, 1873, 1874, 1875, 1876, 1877, 1878, 1879, 1880, 1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031, 2032, 2033, 2034, 2035, 2036, 2037, 2038, 2039, 2040, 2041, 2042, 2043, 2044, 2045, 2046, 2047, 2048, 2049, 2050, 2051, 2052, 2053, 2054, 2055, 2056, 2057, 2058, 2059, 2060, 2061, 2062, 2063, 2064, 2065, 2066, 2067, 2068, 2069, 2070, 2071, 2072, 2073, 2074, 2075, 2076, 2077, 2078, 2079, 2080, 2081, 2082, 2083, 2084, 2085, 2086, 2087, 2088, 2089, 2090, 2091, 2092, 2093, 2094, 2095, 2096, 2097, 2098, 2099, 2100, 2101, 2102, 2103, 2104, 2105, 2106, 2107, 2108, 2109, 2110, 2111, 2112, 2113, 2114, 2115, 2116, 2117, 2118, 2119, 2120, 2121, 2122, 2123, 2124, 2125, 2126, 2127, 2128, 2129, 2130, 2131, 2132, 2133, 2134, 2135, 2136, 2137, 2138, 2139, 2140, 2141, 2142, 2143, 2144, 2145, 2146, 2147, 2148, 2149, 2150, 2151, 2152, 2153, 2154, 2155, 2156, 2157, 2158, 2159, 2160, 2161, 2162, 2163, 2164, 2165, 2166, 2167, 2168, 2169, 2170, 2171, 2172, 2173, 2174, 2175, 2176, 2177, 2178, 2179, 2180, 2181, 2182, 2183, 2184, 2185, 2186, 2187, 2188, 2189, 2190, 2191, 2192, 2193, 2194, 2195, 2196, 2197, 2198, 2199, 2200, 2201, 2202, 2203, 2204, 2205, 2206, 2207, 2208, 2209, 2210, 2211, 2212, 2213, 2214, 2215, 2216, 2217, 2218, 2219, 2220, 2221, 2222, 2223, 2224, 2225, 2226, 2227, 2228, 2229, 2230, 2231, 2232, 2233, 2234, 2235, 2236, 2237, 2238, 2239, 2240, 2241, 2242, 2243, 2244, 2245, 2246, 2247, 2248, 2249, 2250, 2251, 2252, 2253, 2254, 2255, 2256, 2257, 2258, 2259, 2260, 2261, 2262, 2263, 2264, 2265, 2266, 2267, 2268, 2269, 2270, 2271, 2272, 2273, 2274, 2275, 2276, 2277, 2278, 2279, 2280, 2281, 2282, 2283, 2284, 2285, 2286, 2287, 2288, 2289, 2290, 2291, 2292, 2293, 2294, 2295, 2296, 2297, 2298, 2299, 2300, 2301, 2302, 2303, 2304, 2305, 2306, 2307, 2308, 2309, 2310, 2311, 2312, 2313, 2314, 2315, 2316, 2317, 2318, 2319, 2320, 2321, 2322, 2323, 2324, 2325, 2326, 2327, 2328, 2329, 2330, 2331, 2332, 2333, 2334, 2335, 2336, 2337, 2338, 2339, 2340, 2341, 2342, 2343, 2344, 2345, 2346, 2347, 2348, 2349, 2350, 2351, 2352, 2353, 2354, 2355, 2356, 2357, 2358, 2359, 2360, 2361, 2362, 2363, 2364, 2365, 2366, 2367, 2368, 2369, 2370, 2371, 2372, 2373, 2374, 2375, 2376, 2377, 2378, 2379, 2380, 2381, 2382, 2383, 2384, 2385, 2386, 2387, 2388, 2389, 2390, 2391, 2392, 2393, 2394, 2395, 2396, 2397, 2398, 2399, 2400, 2401, 2402, 2403, 2404, 2405, 2406, 2407, 2408, 2409, 2410, 2411, 2412, 2413, 2414, 2415, 2416, 2417, 2418, 2419, 2420, 2421, 2422, 2423, 2424, 2425, 2426, 2427, 2428, 2429, 2430, 2431, 2432, 2433, 2434, 2435, 2436, 2437, 2438, 2439, 2440, 2441, 2442, 2443, 2444, 2445, 2446, 2447, 2448, 2449, 2450, 2451, 2452, 2453, 2454, 2455, 2456, 2457, 2458, 2459, 2460, 2461, 2462, 2463, 2464, 2465, 2466, 2467, 2468, 2469, 2470, 2471, 2472, 2473, 2474, 2475, 2476, 2477, 2478, 2479, 2480, 2481, 2482, 2483, 2484, 2485, 2486, 2487, 2488, 2489, 2490, 2491, 2492, 2493, 2494, 2495, 2496, 2497, 2498, 2499, 2500, 2501, 2502, 2503, 2504, 2505, 2506, 2507, 2508, 2509, 2510, 2511, 2512, 2513, 2514, 2515, 2516, 2517, 2518, 2519, 2520, 2521, 2522, 2523, 2524, 2525, 2526, 2527, 2528, 2529, 2530, 2531, 2532, 2533, 2534, 2535, 2536, 2537, 2538, 2539, 2540, 2541, 2542, 2543, 2544, 2545, 2546, 2547, 2548, 2549, 2550, 2551, 2552, 2553, 2554, 2555, 2556, 2557, 2558, 2559, 2560, 2561, 2562, 2563, 2564, 2565, 2566, 2567, 2568, 2569, 2570, 2571, 2572, 2573, 2574, 2575, 2576, 2577, 2578, 2579, 2580, 2581, 2582, 2583, 2584, 2585, 2586, 2587, 2588, 2589, 2590, 2591, 2592, 2593, 2594, 2595, 2596, 2597, 2598, 2599, 2600, 2601, 2602, 2603, 2604, 2605, 2606, 2607, 2608, 2609, 2610, 2611, 2612, 2613, 2614, 2615, 2616, 2617, 2618, 2619, 2620, 2621, 2622, 2623, 2624, 2625, 2626, 2627, 2628, 2629, 2630, 2631, 2632, 2633, 2634, 2635, 2636, 2637, 2638, 2639, 2640, 2641, 2642, 2643, 2644, 2645, 2646, 2647, 2648, 2649, 2650, 2651, 2652, 2653, 2654, 2655, 2656, 2657, 2658, 2659, 2660, 2661, 2662, 2663, 2664, 2665, 2666, 2667, 2668, 2669, 2670, 2671, 2672, 2673, 2674, 2675, 2676, 2677, 2678, 2679, 2680, 2681, 2682, 2683, 2684, 2685, 2686, 2687, 2688, 2689, 2690, 2691, 2692, 2693, 2694, 2695, 2696, 2697, 2698, 2699, 2700, 2701, 2702, 2703, 2704, 2705, 2706, 2707, 2708, 2709, 2710, 2711, 2712, 2713, 2714, 2715, 2716, 2717, 2718, 2719, 2720, 2721, 2722, 2723, 2724, 2725, 2726, 2727, 2728, 2729, 2730, 2731, 2732, 2733, 2734, 2735, 2736, 2737, 2738, 2739, 2740, 2741, 2742, 2743, 2744, 2745, 2746, 2747, 2748, 2749, 2750, 2751, 2752, 2753, 2754, 2755, 2756, 2757, 2758, 2759, 2760, 2761, 2762, 2763, 2764, 2765, 2766, 2767, 2768, 2769, 2770, 2771, 2772, 2773, 2774, 2775, 2776, 2777, 2778, 2779, 2780, 2781, 2782, 2783, 2784, 2785, 2786, 2787, 2788, 2789, 2790, 2791, 2792, 2793, 2794, 2795, 2796, 2797, 2798, 2799, 2800, 2801, 2802, 2803, 2804, 2805, 2806, 2807, 2808, 2809, 2810, 2811, 2812, 2813, 2814, 2815, 2816, 2817, 2818, 2819, 2820, 2821, 2822, 2823, 2824, 2825, 2826, 2827, 2828, 2829, 2830, 2831, 2832, 2833, 2834, 2835, 2836, 2837, 2838, 2839, 2840, 2841, 2842, 2843, 2844, 2845, 2846, 2847, 2848, 2849, 2850, 2851, 2852, 2853, 2854, 2855, 2856, 2857, 2858, 2859, 2860, 2861, 2862, 2863, 2864, 2865, 2866, 2867, 2868, 2869, 2870, 2871, 2872, 2873, 2874, 2875, 2876, 2877, 2878, 2879, 2880, 2881, 2882, 2883, 2884, 2885, 2886, 2887, 2888, 2889, 2890, 2891, 2892, 2893, 2894, 2895, 2896, 2897, 2898, 2899, 2900, 2901, 2902, 2903, 2904, 2905, 2906, 2907, 2908, 2909, 2910, 2911, 2912, 2913, 2914, 2915, 2916, 2917, 2918, 2919, 2920, 2921, 2922, 2923, 2924, 2925, 2926, 2927, 2928, 2929, 2930, 2931, 2932, 2933, 2934, 2935, 2936, 2937, 2938, 2939, 2940, 2941, 2942, 2943, 2944, 2945, 2946, 2947, 2948, 2949, 2950, 2951, 2952, 2953, 2954, 2955, 2956, 2957, 2958, 2959, 2960, 2961, 2962, 2963, 2964, 2965, 2966, 2967, 2968, 2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, 2977, 2978, 2979, 2980, 2981, 2982, 2983, 2984, 2985, 2986, 2987, 2988, 2989, 2990, 2991, 2992, 2993, 2994, 2995, 2996, 2997, 2998, 2999, 3000, 3001, 3002, 3003, 3004, 3005, 3006, 3007, 3008, 3009, 3010, 3011, 3012, 3013, 3014, 3015, 3016, 3017, 3018, 3019, 3020, 3021, 3022, 3023, 3024, 3025, 3026, 3027, 3028, 3029, 3030, 3031, 3032, 3033, 3034, 3035, 3036, 3037, 3038, 3039, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3050, 3051, 3052, 3053, 3054, 3055, 3056, 3057, 3058, 3059, 3060, 3061, 3062, 3063, 3064, 3065, 3066, 3067, 3068, 3069, 3070, 3071, 3072, 3073, 3074, 3075, 3076, 3077, 3078, 3079, 3080, 3081, 3082, 3083, 3084, 3085, 3086, 3087, 3088, 3089, 3090, 3091, 3092, 3093, 3094, 3095, 3096, 3097, 3098, 3099, 3100, 3101, 3102, 3103, 3104, 3105, 3106, 3107, 3108, 3109, 3110, 3111, 3112, 3113, 3114, 3115, 3116, 3117, 3118, 3119, 3120, 3121, 3122, 3123, 3124, 3125, 3126, 3127, 3128, 3129, 3130, 3131, 3132, 3133, 3134, 3135, 3136, 3137, 3138, 3139, 3140, 3141, 3142, 3143, 3144, 3145, 3146, 3147, 3148, 3149, 3150, 3151, 3152, 3153, 3154, 3155, 3156, 3157, 3158, 3159, 3160, 3161, 3162, 3163, 3164, 3165, 3166, 3167, 3168, 3169, 3170, 3171, 3172, 3173, 3174, 3175, 3176, 3177, 3178, 3179, 3180, 3181, 3182, 3183, 3184, 3185, 3186, 3187, 3188, 3189, 3190, 3191, 3192, 3193, 3194, 3195, 3196, 3197, 3198, 3199, 3200, 3201, 3202, 3203, 3204, 3205, 3206, 3207, 3208, 3209, 3210, 3211, 3212, 3213, 3214, 3215, 3216, 3217, 3218, 3219, 3220, 3221, 3222, 3223, 3224, 3225, 3226, 3227, 3228, 3229, 3230, 3231, 3232, 3233, 3234, 3235, 3236, 3237, 3238, 3239, 3240, 3241, 3242, 3243, 3244, 3245, 3246, 3247, 3248, 3249, 3250, 3251, 3252, 3253, 3254, 3255, 3256, 3257, 3258, 3259, 3260, 3261, 3262, 3263, 3264, 3265, 3266, 3267, 3268, 3269, 3270, 3271, 3272, 3273, 3274, 3275, 3276, 3277, 3278, 3279, 3280, 3281, 3282, 3283, 3284, 3285, 3286, 3287, 3288, 3289, 3290, 3291, 3292, 3293, 3294, 3295, 3296, 3297, 3298, 3299, 3300, 3301, 3302, 3303, 3304, 3305, 3306, 3307, 3308, 3309, 3310, 3311, 3312, 3313, 3314, 3315, 3316, 3317, 3318, 3319, 3320, 3321, 3322, 3323, 3324, 3325, 3326, 3327, 3328, 3329, 3330, 3331, 3332, 3333, 3334, 3335, 3336, 3337, 3338, 3339, 3340, 3341, 3342, 3343, 3344, 3345, 3346, 3347, 3348, 3349, 3350, 3351, 3352, 3353, 3354, 3355, 3356, 3357, 3358, 3359, 3360, 3361, 3362, 3363, 3364, 3365, 3366, 3367, 3368, 3369, 3370, 3371, 3372, 3373, 3374, 3375, 3376, 3377, 3378, 3379, 3380, 3381, 3382, 3383, 3384, 3385, 3386, 3387, 3388, 3389, 3390, 3391, 3392, 3393, 3394, 3395, 3396, 3397, 3398, 3399, 3400, 3401, 3402, 3403, 3404, 3405, 3406, 3407, 3408, 3409, 3410, 3411, 3412, 3413, 3414, 3415, 3416, 3417, 3418, 3419, 3420, 3421, 3422, 3423, 3424, 3425, 3426, 3427, 3428, 3429, 3430, 3431, 3432, 3433, 3434, 3435, 3436, 3437, 3438, 3439, 3440, 3441, 3442, 3443, 3444, 3445, 3446, 3447, 3448, 3449, 3450, 3451, 3452, 3453, 3454, 3455, 3456, 3457, 3458, 3459, 3460, 3461, 3462, 3463, 3464, 3465, 3466, 3467, 3468, 3469, 3470, 3471, 3472, 3473, 3474, 3475, 3476, 3477, 3478, 3479, 3480, 3481, 3482, 3483, 3484, 3485, 3486, 3487, 3488, 3489, 3490, 3491, 3492, 3493, 3494, 3495, 3496, 3497, 3498, 3499, 3500, 3501, 3502, 3503, 3504, 3505, 3506, 3507, 3508, 3509, 3510, 3511, 3512, 3513, 3514, 3515, 3516, 3517, 3518, 3519, 3520, 3521, 3522, 3523, 3524, 3525, 3526, 3527, 3528, 3529, 3530, 3531, 3532, 3533, 3534, 3535, 3536, 3537, 3538, 3539, 3540, 3541, 3542, 3543, 3544, 3545, 3546, 3547, 3548, 3549, 3550, 3551, 3552, 3553, 3554, 3555, 3556, 3557, 3558, 3559, 3560, 3561, 3562, 3563, 3564, 3565, 3566, 3567, 3568, 3569, 3570, 3571, 3572, 3573, 3574, 3575, 3576, 3577, 3578, 3579, 3580, 3581, 3582, 3583, 3584, 3585, 3586, 3587, 3588, 3589, 3590, 3591, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 3600, 3601, 3602, 3603, 3604, 3605, 3606, 3607, 3608, 3609, 3610, 3611, 3612, 3613, 3614, 3615, 3616, 3617, 3618, 3619, 3620, 3621, 3622, 3623, 3624, 3625, 3626, 3627, 3628, 3629, 3630, 3631, 3632, 3633, 3634, 3635, 3636, 3637, 3638, 3639, 3640, 3641, 3642, 3643, 3644, 3645, 3646, 3647, 3648, 3649, 3650, 3651, 3652, 3653, 3654, 3655, 3656, 3657, 3658, 3659, 3660, 3661, 3662, 3663, 3664, 3665, 3666, 3667, 3668, 3669, 3670, 3671, 3672, 3673, 3674, 3675, 3676, 3677, 3678, 3679, 3680, 3681, 3682, 3683, 3684, 3685, 3686, 3687, 3688, 3689, 3690, 3691, 3692, 3693, 3694, 3695, 3696, 3697, 3698, 3699, 3700, 3701, 3702, 3703, 3704, 3705, 3706, 3707, 3708, 3709, 3710, 3711, 3712, 3713, 3714, 3715, 3716, 3717, 3718, 3719, 3720, 3721, 3722, 3723, 3724, 3725, 3726, 3727, 3728, 3729, 3730, 3731, 3732, 3733, 3734, 3735, 3736, 3737, 3738, 3739, 3740, 3741, 3742, 3743, 3744, 3745, 3746, 3747, 3748, 3749, 3750, 3751, 3752, 3753, 3754, 3755, 3756, 3757, 3758, 3759, 3760, 3761, 3762, 3763, 3764, 3765, 3766, 3767, 3768, 3769, 3770, 3771, 3772, 3773, 3774, 3775, 3776, 3777, 3778, 3779, 3780, 3781, 3782, 3783, 3784, 3785, 3786, 3787, 3788, 3789, 3790, 3791, 3792, 3793, 3794, 3795, 3796, 3797, 3798, 3799, 3800, 3801, 3802, 3803, 3804, 3805, 3806, 3807, 3808, 3809, 3810, 3811, 3812, 3813, 3814, 3815, 3816, 3817, 3818, 3819, 3820, 3821, 3822, 3823, 3824, 3825, 3826, 3827, 3828, 3829, 3830, 3831, 3832, 3833, 3834, 3835, 3836, 3837, 3838, 3839, 3840, 3841, 3842, 3843, 3844, 3845, 3846, 3847, 3848, 3849, 3850, 3851, 3852, 3853, 3854, 3855, 3856, 3857, 3858, 3859, 3860, 3861, 3862, 3863, 3864, 3865, 3866, 3867, 3868, 3869, 3870, 3871, 3872, 3873, 3874, 3875, 3876, 3877, 3878, 3879, 3880, 3881, 3882, 3883, 3884, 3885, 3886, 3887, 3888, 3889, 3890, 3891, 3892, 3893, 3894, 3895, 3896, 3897, 3898, 3899, 3900, 3901, 3902, 3903, 3904, 3905, 3906, 3907, 3908, 3909, 3910, 3911, 3912, 3913, 3914, 3915, 3916, 3917, 3918, 3919, 3920, 3921, 3922, 3923, 3924, 3925, 3926, 3927, 3928, 3929, 3930, 3931, 3932, 3933, 3934, 3935, 3936, 3937, 3938, 3939, 3940, 3941, 3942, 3943, 3944, 3945, 3946, 3947, 3948, 3949, 3950, 3951, 3952, 3953, 3954, 3955, 3956, 3957, 3958, 3959, 3960, 3961, 3962, 3963, 3964, 3965, 3966, 3967, 3968, 3969, 3970, 3971, 3972, 3973, 3974, 3975, 3976, 3977, 3978, 3979, 3980, 3981, 3982, 3983, 3984, 3985, 3986, 3987, 3988, 3989, 3990, 3991, 3992, 3993, 3994, 3995, 3996, 3997, 3998, 3999, 4000, 4001, 4002, 4003, 4004, 4005, 4006, 4007, 4008, 4009, 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4017, 4018, 4019, 4020, 4021, 4022, 4023, 4024, 4025, 4026, 4027, 4028, 4029, 4030, 4031, 4032, 4033, 4034, 4035, 4036, 4037, 4038, 4039, 4040, 4041, 4042, 4043, 4044, 4045, 4046, 4047, 4048, 4049, 4050, 4051, 4052, 4053, 4054, 4055, 4056, 4057, 4058, 4059, 4060, 4061, 4062, 4063, 4064, 4065, 4066, 4067, 4068, 4069, 4070, 4071, 4072, 4073, 4074, 4075, 4076, 4077, 4078, 4079, 4080, 4081, 4082, 4083, 4084, 4085, 4086, 4087, 4088, 4089, 4090, 4091, 4092, 4093, 4094, 4095, 4096, 4097, 4098, 4099, 4100, 4101, 4102, 4103, 4104, 4105, 4106, 4107, 4108, 4109, 4110, 4111, 4112, 4113, 4114, 4115, 4116, 4117, 4118, 4119, 4120, 4121, 4122, 4123, 4124, 4125, 4126, 4127, 4128, 4129, 4130, 4131, 4132, 4133, 4134, 4135, 4136, 4137, 4138, 4139, 4140, 4141, 4142, 4143, 4144, 4145, 4146, 4147, 4148, 4149, 4150, 4151, 4152, 4153, 4154, 4155, 4156, 4157, 4158, 4159, 4160, 4161, 4162, 4163, 4164, 4165, 4166, 4167, 4168, 4169, 4170, 4171, 4172, 4173, 4174, 4175, 4176, 4177, 4178, 4179, 4180, 4181, 4182, 4183, 4184, 4185, 4186, 4187, 4188, 4189, 4190, 4191, 4192, 4193, 4194, 4195, 4196, 4197, 4198, 4199, 4200, 4201, 4202, 4203, 4204, 4205, 4206, 4207, 4208, 4209, 4210, 4211, 4212, 4213, 4214, 4215, 4216, 4217, 4218, 4219, 4220, 4221, 4222, 4223, 4224, 4225, 4226, 4227, 4228, 4229, 4230, 4231, 4232, 4233, 4234, 4235, 4236, 4237, 4238, 4239, 4240, 4241, 4242, 4243, 4244, 4245, 4246, 4247, 4248, 4249, 4250, 4251, 4252, 4253, 4254, 4255, 4256, 4257, 4258, 4259, 4260, 4261, 4262, 4263, 4264, 4265, 4266, 4267, 4268, 4269, 4270, 4271, 4272, 4273, 4274, 4275, 4276, 4277, 4278, 4279, 4280, 4281, 4282, 4283, 4284, 4285, 4286, 4287, 4288, 4289, 4290, 4291, 4292, 4293, 4294, 4295, 4296, 4297, 4298, 4299, 4300, 4301, 4302, 4303, 4304, 4305, 4306, 4307, 4308, 4309, 4310, 4311, 4312, 4313, 4314, 4315, 4316, 4317, 4318, 4319, 4320, 4321, 4322, 4323, 4324, 4325, 4326, 4327, 4328, 4329, 4330, 4331, 4332, 4333, 4334, 4335, 4336, 4337, 4338, 4339, 4340, 4341, 4342, 4343, 4344, 4345, 4346, 4347, 4348, 4349, 4350, 4351, 4352, 4353, 4354, 4355, 4356, 4357, 4358, 4359, 4360, 4361, 4362, 4363, 4364, 4365, 4366, 4367, 4368, 4369, 4370, 4371, 4372, 4373, 4374, 4375, 4376, 4377, 4378, 4379, 4380, 4381, 4382, 4383, 4384, 4385, 4386, 4387, 4388, 4389, 4390, 4391, 4392, 4393, 4394, 4395, 4396, 4397, 4398, 4399, 4400, 4401, 4402, 4403, 4404, 4405, 4406, 4407, 4408, 4409, 4410, 4411, 4412, 4413, 4414, 4415, 4416, 4417, 4418, 4419, 4420, 4421, 4422, 4423, 4424, 4425, 4426, 4427, 4428, 4429, 4430, 4431, 4432, 4433, 4434, 4435, 4436, 4437, 4438, 4439, 4440, 4441, 4442, 4443, 4444, 4445, 4446, 4447, 4448, 4449, 4450, 4451, 4452, 4453, 4454, 4455, 4456, 4457, 4458, 4459, 4460, 4461, 4462, 4463, 4464, 4465, 4466, 4467, 4468, 4469, 4470, 4471, 4472, 4473, 4474, 4475, 4476, 4477, 4478, 4479, 4480, 4481, 4482, 4483, 4484, 4485, 4486, 4487, 4488, 4489, 4490, 4491, 4492, 4493, 4494, 4495, 4496, 4497, 4498, 4499, 4500, 4501, 4502, 4503, 4504, 4505, 4506, 4507, 4508, 4509, 4510, 4511, 4512, 4513, 4514, 4515, 4516, 4517, 4518, 4519, 4520, 4521, 4522, 4523, 4524, 4525, 4526, 4527, 4528, 4529, 4530, 4531, 4532, 4533, 4534, 4535, 4536, 4537, 4538, 4539, 4540, 4541, 4542, 4543, 4544, 4545, 4546, 4547, 4548, 4549, 4550, 4551, 4552, 4553, 4554, 4555, 4556, 4557, 4558, 4559, 4560, 4561, 4562, 4563, 4564, 4565, 4566, 4567, 4568, 4569, 4570, 4571, 4572, 4573, 4574, 4575, 4576, 4577, 4578, 4579, 4580, 4581, 4582, 4583, 4584, 4585, 4586, 4587, 4588, 4589, 4590, 4591, 4592, 4593, 4594, 4595, 4596, 4597, 4598, 4599, 4600, 4601, 4602, 4603, 4604, 4605, 4606, 4607, 4608, 4609, 4610, 4611, 4612, 4613, 4614, 4615, 4616, 4617, 4618, 4619, 4620, 4621, 4622, 4623, 4624, 4625, 4626, 4627, 4628, 4629, 4630, 4631, 4632, 4633, 4634, 4635, 4636, 4637, 4638, 4639, 4640, 4641, 4642, 4643, 4644, 4645, 4646, 4647, 4648, 4649, 4650, 4651, 4652, 4653, 4654, 4655, 4656, 4657, 4658, 4659, 4660, 4661, 4662, 4663, 4664, 4665, 4666, 4667, 4668, 4669, 4670, 4671, 4672, 4673, 4674, 4675, 4676, 4677, 4678, 4679, 4680, 4681, 4682, 4683, 4684, 4685, 4686, 4687, 4688, 4689, 4690, 4691, 4692, 4693, 4694, 4695, 4696, 4697, 4698, 4699, 4700, 4701, 4702, 4703, 4704, 4705, 4706, 4707, 4708, 4709, 4710, 4711, 4712, 4713, 4714, 4715, 4716, 4717, 4718, 4719, 4720, 4721, 4722, 4723, 4724, 4725, 4726, 4727, 4728, 4729, 4730, 4731, 4732, 4733, 4734, 4735, 4736, 4737, 4738, 4739, 4740, 4741, 4742, 4743, 4744, 4745, 4746, 4747, 4748, 4749, 4750, 4751, 4752, 4753, 4754, 4755, 4756, 4757, 4758, 4759, 4760, 4761, 4762, 4763, 4764, 4765, 4766, 4767, 4768, 4769, 4770, 4771, 4772, 4773, 4774, 4775, 4776, 4777, 4778, 4779, 4780, 4781, 4782, 4783, 4784, 4785, 4786, 4787, 4788, 4789, 4790, 4791, 4792, 4793, 4794, 4795, 4796, 4797, 4798, 4799, 4800, 4801, 4802, 4803, 4804, 4805, 4806, 4807, 4808, 4809, 4810, 4811, 4812, 4813, 4814, 4815, 4816, 4817, 4818, 4819, 4820, 4821, 4822, 4823, 4824, 4825, 4826, 4827, 4828, 4829, 4830, 4831, 4832, 4833, 4834, 4835, 4836, 4837, 4838, 4839, 4840, 4841, 4842, 4843, 4844, 4845, 4846, 4847, 4848, 4849, 4850, 4851, 4852, 4853, 4854, 4855, 4856, 4857, 4858, 4859, 4860, 4861, 4862, 4863, 4864, 4865, 4866, 4867, 4868, 4869, 4870, 4871, 4872, 4873, 4874, 4875, 4876, 4877, 4878, 4879, 4880, 4881, 4882, 4883, 4884, 4885, 4886, 4887, 4888, 4889, 4890, 4891, 4892, 4893, 4894, 4895, 4896, 4897, 4898, 4899, 4900, 4901, 4902, 4903, 4904, 4905, 4906, 4907, 4908, 4909, 4910, 4911, 4912, 4913, 4914, 4915, 4916, 4917, 4918, 4919, 4920, 4921, 4922, 4923, 4924, 4925, 4926, 4927, 4928, 4929, 4930, 4931, 4932, 4933, 4934, 4935, 4936, 4937, 4938, 4939, 4940, 4941, 4942, 4943, 4944, 4945, 4946, 4947, 4948, 4949, 4950, 4951, 4952, 4953, 4954, 4955, 4956, 4957, 4958, 4959, 4960, 4961, 4962, 4963, 4964, 4965, 4966, 4967, 4968, 4969, 4970, 4971, 4972, 4973, 4974, 4975, 4976, 4977, 4978, 4979, 4980, 4981, 4982, 4983, 4984, 4985, 4986, 4987, 4988, 4989, 4990, 4991, 4992, 4993, 4994, 4995, 4996, 4997, 4998, 4999, 5000, 5001, 5002, 5003, 5004, 5005, 5006, 5007, 5008, 5009, 5010, 5011, 5012, 5013, 5014, 5015, 5016, 5017, 5018, 5019, 5020, 5021, 5022, 5023, 5024, 5025, 5026, 5027, 5028, 5029, 5030, 5031, 5032, 5033, 5034, 5035, 5036, 5037, 5038, 5383], [5039, 5040, 5041, 5042, 5043, 5044, 5045, 5384], [5046, 5047, 5048, 5049, 5050, 5051, 5052, 5385], [5053, 5054, 5386], [5055, 5056, 5057, 5387], [5058, 5059, 5388], [5060, 5061, 5062, 5063, 5064, 5065, 5066, 5067, 5068, 5389], [5069, 5070, 5071, 5072, 5073, 5074, 5075, 5076, 5077, 5078, 5079, 5080, 5081, 5082, 5083, 5084, 5085, 5086, 5087, 5088, 5089, 5090, 5091, 5092, 5093, 5094, 5095, 5096, 5097, 5098, 5099, 5100, 5101, 5102, 5103, 5104, 5105, 5106, 5107, 5108, 5109, 5110, 5111, 5112, 5113, 5114, 5115, 5116, 5117, 5118, 5119, 5120, 5121, 5122, 5123, 5124, 5125, 5126, 5127, 5128, 5129, 5130, 5131, 5132, 5133, 5134, 5135, 5136, 5137, 5138, 5139, 5140, 5141, 5142, 5143, 5144, 5145, 5146, 5147, 5148, 5390], [5149, 5150, 5151, 5152, 5153, 5154, 5155, 5156, 5157, 5158, 5159, 5160, 5161, 5162, 5163, 5164, 5165, 5166, 5167, 5168, 5169, 5170, 5171, 5172, 5173, 5174, 5175, 5176, 5177, 5178, 5179, 5180, 5181, 5182, 5183, 5184, 5185, 5186, 5187, 5188, 5189, 5190, 5191, 5192, 5193, 5194, 5195, 5196, 5197, 5198, 5199, 5200, 5201, 5202, 5203, 5204, 5205, 5206, 5207, 5208, 5209, 5210, 5211, 5212, 5213, 5214, 5215, 5216, 5217, 5218, 5219, 5220, 5221, 5222, 5223, 5224, 5225, 5226, 5227, 5228, 5229, 5230, 5231, 5232, 5233, 5234, 5235, 5236, 5237, 5238, 5239, 5240, 5241, 5242, 5243, 5244, 5245, 5246, 5247, 5248, 5249, 5250, 5251, 5252, 5253, 5254, 5255, 5256, 5257, 5258, 5259, 5260, 5261, 5262, 5263, 5264, 5265, 5266, 5267, 5268, 5269, 5270, 5271, 5272, 5273, 5274, 5275, 5276, 5277, 5278, 5279, 5280, 5281, 5282, 5283, 5284, 5285, 5286, 5287, 5288, 5289, 5290, 5291, 5292, 5293, 5294, 5295, 5296, 5297, 5298, 5299, 5300, 5301, 5302, 5303, 5304, 5305, 5306, 5307, 5308, 5309, 5310, 5311, 5312, 5313, 5314, 5315, 5316, 5317, 5318, 5319, 5320, 5321, 5322, 5323, 5324, 5325, 5326, 5327, 5328, 5329, 5330, 5331, 5332, 5333, 5334, 5335, 5336, 5337, 5338, 5339, 5340, 5341, 5342, 5343, 5344, 5345, 5346, 5347, 5348, 5349, 5350, 5351, 5352, 5353, 5354, 5355, 5356, 5357, 5358, 5359, 5360, 5361, 5362, 5363, 5364, 5365, 5366, 5367, 5368, 5369, 5370, 5371, 5372, 5373, 5374, 5375, 5376, 5377, 5378, 5379, 5380, 5381, 5391]]
\ No newline at end of file
diff --git a/examples/model_selection/Trails/internal/ml/model_slicing/data/hcdr_col_cardinalities b/examples/model_selection/Trails/internal/ml/model_slicing/data/hcdr_col_cardinalities
new file mode 100755
index 0000000000..389a3c589c
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_slicing/data/hcdr_col_cardinalities
@@ -0,0 +1 @@
+[[0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 544], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 545], [33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 546], [50, 51, 52, 53, 54, 55, 547], [56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 548], [78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 549], [89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 550], [103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 551], [114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 552], [130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 553], [144, 146, 147, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 554], [161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 555], [185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 556], [209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 557], [241, 242, 243, 244, 245, 246, 247, 248, 249, 558], [250, 251, 252, 253, 254, 255, 256, 257, 259, 559], [260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 560], [271, 272, 273, 274, 275, 276, 277, 278, 561], [279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 562], [293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 563], [317, 318, 319, 320, 321, 322, 323, 324, 325, 564], [326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 565], [350, 351, 566], [352, 353, 567], [354, 355, 356, 357, 358, 359, 360, 568], [361, 362, 569], [363, 364, 570], [365, 366, 571], [367, 368, 369, 370, 371, 372, 572], [373, 374, 375, 573], [376, 377, 574], [378, 379, 575], [380, 381, 382, 383, 384, 576], [385, 386, 577], [387, 388, 578], [389, 390, 391, 392, 393, 579], [394, 395, 396, 397, 398, 399, 400, 580], [401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 581], [459, 460, 582], [461, 462, 583], [463, 464, 465, 466, 467, 468, 469, 470, 471, 584], [472, 473, 474, 585], [475, 476, 586], [477, 478, 587], [479, 480, 588], [481, 482, 589], [483, 484, 590], [485, 486, 591], [487, 488, 489, 592], [490, 491, 593], [492, 493, 594], [494, 495, 595], [496, 596], [497, 498, 597], [499, 500, 598], [501, 502, 599], [503, 504, 600], [505, 506, 601], [507, 508, 602], [509, 510, 603], [511, 512, 604], [513, 514, 605], [515, 516, 517, 518, 519, 520, 521, 522, 523, 606], [524, 525, 607], [526, 527, 608], [528, 529, 530, 531, 532, 533, 534, 609], [535, 536, 537, 538, 539, 610], [540, 541, 611], [542, 543, 612]]
\ No newline at end of file
diff --git a/examples/model_selection/Trails/internal/ml/model_slicing/pg_interface.py b/examples/model_selection/Trails/internal/ml/model_slicing/pg_interface.py
new file mode 100644
index 0000000000..b33ad148b9
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_slicing/pg_interface.py
@@ -0,0 +1,351 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+# set PYTHONPATH
+import sys
+
+# Path you want to add
+sys.path = [
+ '/project/Trails/internal/ml/model_slicing',
+ '/project/Trails/internal/ml/model_slicing/algorithm',
+ '/project/Trails/internal/ml',
+ '/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '/home/postgres/.local/lib/python3.8/site-packages', '/usr/local/lib/python3.8/dist-packages', '/usr/lib/python3/dist-packages']
+
+import calendar
+import os
+import time
+import json
+import traceback
+import orjson
+from argparse import Namespace
+from model_selection.shared_config import parse_config_arguments
+from multiprocessing import shared_memory
+import torch
+from typing import Any, List, Dict, Tuple
+
+
+def read_json(file_name):
+ print(f"Loading {file_name}...")
+ is_exist = os.path.exists(file_name)
+ if is_exist:
+ with open(file_name, 'r') as readfile:
+ data = json.load(readfile)
+ return data
+ else:
+ print(f"{file_name} is not exist")
+ return {}
+
+
+def exception_catcher(func):
+ def wrapper(encoded_str: str):
+ try:
+ # each functon accepts a json string
+ params = json.loads(encoded_str)
+ config_file = params.get("config_file")
+
+ # Parse the config file
+ args = parse_config_arguments(config_file)
+
+ # Set the environment variables
+ ts = calendar.timegm(time.gmtime())
+ os.environ.setdefault("base_dir", args.base_dir)
+ os.environ.setdefault("log_logger_folder_name", args.log_folder)
+ os.environ.setdefault("log_file_name", args.log_name + "_" + str(ts) + ".log")
+
+ # Call the original function with the parsed parameters
+ return func(params, args)
+ except Exception as e:
+ return orjson.dumps(
+ {"Errored": traceback.format_exc()}).decode('utf-8')
+
+ return wrapper
+
+
+# Micro benchmarking filterting phaes
+model = None
+sliced_model = None
+col_cardinalities = None
+time_usage_dic = {}
+
+
+@exception_catcher
+def model_inference_load_model(params: dict, args: Namespace):
+ global model, sliced_model, col_cardinalities
+ from model_selection.src.logger import logger
+ try:
+ logger.info(f"Received parameters: {params}")
+
+ from model_slicing.algorithm.src.data_loader import sql_attached_dataloader
+ from model_slicing.algorithm.profile_model_clean import load_model
+ # read saved col_cardinatlites file
+ if col_cardinalities is None:
+ col_cardinalities = read_json(params["col_cardinalities_file"])
+
+ # read the model path,
+ model_path = params["model_path"]
+
+ # get the where condition
+ where_cond = json.loads(params["where_cond"])
+ # generate default sql and selected sql
+ target_sql = [col[-1] for col in col_cardinalities]
+ for col_index, value in where_cond.items():
+ target_sql[int(col_index)] = value
+ logger.info(f"target_sql encoding is: {target_sql}")
+
+ if model is None:
+ logger.info("Load model .....")
+ model, config = load_model(model_path)
+ model.eval()
+ sliced_model = model.tailor_by_sql(torch.tensor(target_sql).reshape(1, -1))
+ sliced_model.eval()
+ logger.info("Load model Done!")
+ else:
+ logger.info("Skip Load model")
+ except:
+ logger.info(orjson.dumps(
+ {"Errored": traceback.format_exc()}).decode('utf-8'))
+ return orjson.dumps({"ok": 1}).decode('utf-8')
+
+
+@exception_catcher
+def model_inference_compute(params: dict, args: Namespace):
+ global model, sliced_model, col_cardinalities, time_usage_dic
+ from model_selection.src.logger import logger
+ try:
+
+ overall_begin = time.time()
+ mini_batch = json.loads(params["mini_batch"])
+ logger.info("-----" * 10)
+
+ time_usage_dic = {}
+
+ # logger.info(f"Received status: {mini_batch['status']}")
+ # if mini_batch["status"] != 'success':
+ # raise Exception
+
+ # todo: for credit datasets, it has 23 fields
+ mini_batch_used = [mini_batch[i:i + 23] for i in range(0, len(mini_batch), 23)]
+
+ begin = time.time()
+ # pre-processing mini_batch
+ transformed_data = torch.LongTensor(mini_batch_used)
+ time_usage_dic["py_conver_to_tensor"] = time.time() - begin
+
+ logger.info(f"transformed data size: {transformed_data.size()}")
+
+ begin = time.time()
+ y = sliced_model(transformed_data, None)
+ time_usage_dic["py_compute"] = time.time() - begin
+ logger.info(f"Prediction Results = {y.tolist()[:2]}...")
+
+ logger.info("-----" * 10)
+ overall_end = time.time()
+ time_usage_dic["py_overall_duration"] = overall_end - overall_begin
+ time_usage_dic["py_diff"] = time_usage_dic["py_overall_duration"] - \
+ (time_usage_dic["py_conver_to_tensor"] + time_usage_dic["py_compute"])
+
+ logger.info(f"time usage of inference {len(transformed_data)} rows is {time_usage_dic}")
+ except:
+ logger.info(orjson.dumps(
+ {"Errored": traceback.format_exc()}).decode('utf-8'))
+
+ return orjson.dumps({"model_outputs": 1}).decode('utf-8')
+
+
+@exception_catcher
+def model_inference_compute_shared_memory(params: dict, args: Namespace):
+ global model, sliced_model, col_cardinalities, time_usage_dic
+ from model_selection.src.logger import logger
+ try:
+ mini_batch_shared = get_data_from_shared_memory()
+ logger.info(f"mini_batch_shared: {mini_batch_shared[:100]}")
+
+ overall_begin = time.time()
+ mini_batch = json.loads(mini_batch_shared)
+ logger.info("-----" * 10)
+
+ time_usage_dic = {}
+
+ logger.info(f"Received status: {mini_batch['status']}")
+ if mini_batch["status"] != 'success':
+ raise Exception
+
+ begin = time.time()
+ # pre-processing mini_batch
+ transformed_data = torch.LongTensor([
+ [int(item.split(':')[0]) for item in sublist[2:]]
+ for sublist in mini_batch["data"]])
+ time_usage_dic["py_conver_to_tensor"] = time.time() - begin
+
+ logger.info(f"transformed data size: {len(transformed_data)}")
+
+ begin = time.time()
+ y = sliced_model(transformed_data, None)
+ time_usage_dic["py_compute"] = time.time() - begin
+ logger.info(f"Prediction Results = {y.tolist()[:2]}...")
+
+ logger.info("-----" * 10)
+ overall_end = time.time()
+ time_usage_dic["py_overall_duration"] = overall_end - overall_begin
+ time_usage_dic["py_diff"] = time_usage_dic["py_overall_duration"] - \
+ (time_usage_dic["py_conver_to_tensor"] + time_usage_dic["py_compute"])
+
+ logger.info(f"time usage of inference {len(transformed_data)} rows is {time_usage_dic}")
+ except:
+ logger.info(orjson.dumps(
+ {"Errored": traceback.format_exc()}).decode('utf-8'))
+
+ return orjson.dumps({"model_outputs": 1}).decode('utf-8')
+
+
+def decode_libsvm(columns):
+ map_func = lambda pair: (int(pair[0]), float(pair[1]))
+ # 0 is id, 1 is label
+ id, value = zip(*map(lambda col: map_func(col.split(':')), columns[2:]))
+ sample = {'id': list(id)}
+ return sample
+
+
+def pre_processing(mini_batch_data: List[Tuple]):
+ """
+ mini_batch_data: [('0', '0', '123:123', '123:123', '123:123',)
+ """
+ sample_lines = len(mini_batch_data)
+ feat_id = []
+ feat_value = []
+ y = []
+ for i in range(sample_lines):
+ row_value = mini_batch_data[i]
+ sample = decode_libsvm(row_value)
+ feat_id.append(sample['id'])
+ feat_id = torch.LongTensor(feat_id)
+ return {'id': feat_id}
+
+
+@exception_catcher
+def model_inference_compute_shared_memory_write_once(params: dict, args: Namespace):
+ global model, sliced_model, col_cardinalities, time_usage_dic
+ from model_selection.src.logger import logger
+ try:
+ mini_batch_shared = get_data_from_shared_memory()
+ logger.info(f"mini_batch_shared: <-{mini_batch_shared[:50]}->, type: {type(mini_batch_shared)}")
+
+ overall_begin = time.time()
+ mini_batch = json.loads(mini_batch_shared)
+ logger.info("-----" * 10)
+
+ time_usage_dic = {}
+
+ begin = time.time()
+ # pre-processing mini_batch
+ transformed_data = pre_processing(mini_batch)['id']
+ time_usage_dic["py_conver_to_tensor"] = time.time() - begin
+ logger.info(f"transformed data size: {len(transformed_data)}")
+
+ begin = time.time()
+ y = sliced_model(transformed_data, None)
+ time_usage_dic["py_compute"] = time.time() - begin
+ logger.info(f"Prediction Results = {y.tolist()[:2]}...")
+
+ logger.info("-----" * 10)
+ overall_end = time.time()
+ time_usage_dic["py_overall_duration"] = overall_end - overall_begin
+ time_usage_dic["py_diff"] = time_usage_dic["py_overall_duration"] - \
+ (time_usage_dic["py_conver_to_tensor"] + time_usage_dic["py_compute"])
+
+ logger.info(f"time usage of inference {len(transformed_data)} rows is {time_usage_dic}")
+ except:
+ logger.info(orjson.dumps(
+ {"Errored": traceback.format_exc()}).decode('utf-8'))
+
+ return orjson.dumps({"model_outputs": 1}).decode('utf-8')
+
+@exception_catcher
+def model_inference_compute_shared_memory_write_once_int(params: dict, args: Namespace):
+ global model, sliced_model, col_cardinalities, time_usage_dic
+ from model_selection.src.logger import logger
+ time_usage_dic = {}
+
+ try:
+ mini_batch_shared = get_data_from_shared_memory_int(int(params["rows"]))
+ # logger.info(f"mini_batch_shared: <-{mini_batch_shared[:50]}->, type: {type(mini_batch_shared)}")
+ logger.info(f"mini_batch_shared: <-{mini_batch_shared}->, type: {type(mini_batch_shared)}")
+
+ overall_begin = time.time()
+ logger.info("-----" * 10)
+
+ begin = time.time()
+ # pre-processing mini_batch
+ transformed_data = torch.LongTensor(mini_batch_shared)
+ time_usage_dic["py_conver_to_tensor"] = time.time() - begin
+ logger.info(f"transformed data size: {transformed_data.size()}")
+
+ begin = time.time()
+ y = sliced_model(transformed_data, None)
+ time_usage_dic["py_compute"] = time.time() - begin
+ logger.info(f"Prediction Results = {y.tolist()[:2]}...")
+
+ logger.info("-----" * 10)
+ overall_end = time.time()
+ time_usage_dic["py_overall_duration"] = overall_end - overall_begin
+ time_usage_dic["py_diff"] = time_usage_dic["py_overall_duration"] - \
+ (time_usage_dic["py_conver_to_tensor"] + time_usage_dic["py_compute"])
+
+ logger.info(f"time usage of inference {len(transformed_data)} rows is {time_usage_dic}")
+ except:
+ logger.info(orjson.dumps(
+ {"Errored": traceback.format_exc()}).decode('utf-8'))
+
+ return orjson.dumps({"model_outputs": 1}).decode('utf-8')
+
+
+def records_results(params: str):
+ global time_usage_dic
+ from model_selection.src.logger import logger
+ try:
+ params = json.loads(params)
+ params.update(time_usage_dic)
+ logger.info(f"final result = {params}")
+ except:
+ logger.info(orjson.dumps(
+ {"Errored": traceback.format_exc()}).decode('utf-8'))
+ return orjson.dumps({"Done": 1}).decode('utf-8')
+
+
+def get_data_from_shared_memory(shmem_name="my_shmem"):
+ # Open existing shared memory segment
+ shm = shared_memory.SharedMemory(name="my_shared_memory")
+ # Read data
+ data = shm.buf.tobytes().decode()
+ # Close
+ shm.close()
+ return data.rstrip('\x00')
+
+
+import numpy as np
+
+def get_data_from_shared_memory_int(n_rows):
+ # Connect to existing shared memory by name
+ shm = shared_memory.SharedMemory(name="my_shared_memory")
+ # Map the shared memory to a numpy array. Assuming i32 integers.
+ data = np.frombuffer(shm.buf, dtype=np.int32)
+ # Reshape the 1D array to have n_rows and let numpy infer the number of columns
+ data = data.reshape(n_rows, -1)
+ return data
+
diff --git a/examples/model_selection/Trails/internal/ml/model_slicing/profile_preprocess.py b/examples/model_selection/Trails/internal/ml/model_slicing/profile_preprocess.py
new file mode 100644
index 0000000000..5a0e82475b
--- /dev/null
+++ b/examples/model_selection/Trails/internal/ml/model_slicing/profile_preprocess.py
@@ -0,0 +1,74 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from typing import List, Tuple
+import time
+
+
+def decode_libsvm(columns):
+ map_func = lambda pair: (int(pair[0]), float(pair[1]))
+ # 0 is id, 1 is label
+ id, value = zip(*map(lambda col: map_func(col.split(':')), columns[2:]))
+ sample = {'id': list(id),
+ 'value': list(value),
+ 'y': int(columns[1])}
+ return sample
+
+def decode_libsvm(columns):
+ # Decode without additional mapping or zipping, directly processing the splits.
+ ids = []
+ values = []
+ for col in columns[2:]:
+ id, value = col.split(':')
+ ids.append(int(id))
+ values.append(float(value))
+ return {'id': ids, 'value': values, 'y': int(columns[1])}
+
+
+def pre_processing(mini_batch_data: List[Tuple]):
+ # Prepare storage for the results.
+ all_feat_ids = []
+ all_feat_values = []
+ all_ys = []
+
+ for row_value in mini_batch_data:
+ # Decode and extract data directly without additional unpacking.
+ sample = decode_libsvm(list(row_value))
+ all_feat_ids.append(sample['id'])
+ all_feat_values.append(sample['value'])
+ all_ys.append(sample['y'])
+
+ return {'id': all_feat_ids, 'value': all_feat_values, 'y': all_ys}
+
+
+mini_batch = [
+ ('4801', '0', '2:1', '4656:1', '5042:1', '5051:1', '5054:1', '5055:1', '5058:1', '5061:1', '5070:1', '5150:1'),
+]
+
+mini_batch = mini_batch * 100000
+print(len(mini_batch))
+
+begin = time.time()
+pre_processing(mini_batch)
+end = time.time()
+print(end-begin)
+
+
+
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/pre_processing/pre_processing_data.sh b/examples/model_selection/Trails/internal/ml/model_slicing/save_satistics.py
similarity index 54%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/pre_processing/pre_processing_data.sh
rename to examples/model_selection/Trails/internal/ml/model_slicing/save_satistics.py
index a54caf042e..2f77d313c2 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/scripts/pre_processing/pre_processing_data.sh
+++ b/examples/model_selection/Trails/internal/ml/model_slicing/save_satistics.py
@@ -16,48 +16,35 @@
# limitations under the License.
#
-export PYTHONPATH=$PYTHONPATH:./internal/ml/model_selection
-conda activate trails
-
-
-
-
-python ./internal/ml/model_selection/exps/nas_bench_tabular/4.seq_score_online.py \
- --models_explore=1000 \
- --log_name=score_based \
- --search_space=mlp_sp \
- --num_layers=4 \
- --hidden_choice_len=10 \
- --base_dir=/hdd1/xingnaili/exp_data/ \
- --num_labels=2 \
- --device=cuda:6 \
- --batch_size=32 \
- --dataset=criteo \
- --nfeat=2100000 \
- --nfield=39 \
- --nemb=10 \
- --workers=0 \
- --result_dir=./internal/ml/model_selection/exp_result/ \
- --log_folder=log_score_time_criteo > outputCriScorAll.log&
-
-
-
-
-
-
-
-
-
-
+from main import parse_arguments, seed_everything
+import os
+import glob
+import json
+from model_slicing.algorithm.src.data_loader import SQLAttacedLibsvmDataset
+def write_json(file_name, data):
+ print(f"writting {file_name}...")
+ with open(file_name, 'w') as outfile:
+ outfile.write(json.dumps(data))
+args = parse_arguments()
+seed_everything(args.seed)
+data_dir = os.path.join(args.data_dir, args.dataset)
+train_file = glob.glob("%s/tr*libsvm" % data_dir)[0]
+train_loader = SQLAttacedLibsvmDataset(
+ train_file,
+ args.nfield,
+ args.max_filter_col)
+write_json(
+ f"{args.dataset}_col_cardinalities",
+ train_loader.col_cardinalities)
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/.cargo/config.toml b/examples/model_selection/Trails/internal/pg_extension/.cargo/config.toml
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/.cargo/config.toml
rename to examples/model_selection/Trails/internal/pg_extension/.cargo/config.toml
diff --git a/examples/model_selection/Trails/internal/pg_extension/.gitignore b/examples/model_selection/Trails/internal/pg_extension/.gitignore
new file mode 100644
index 0000000000..3906c33241
--- /dev/null
+++ b/examples/model_selection/Trails/internal/pg_extension/.gitignore
@@ -0,0 +1,6 @@
+.DS_Store
+.idea/
+/target
+*.iml
+**/*.rs.bk
+Cargo.lock
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/pg_extension.control b/examples/model_selection/Trails/internal/pg_extension/pg_extension.control
similarity index 100%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/pg_extension.control
rename to examples/model_selection/Trails/internal/pg_extension/pg_extension.control
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_cpu.sql b/examples/model_selection/Trails/internal/pg_extension/sql/model_selection_cpu.sql
similarity index 94%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_cpu.sql
rename to examples/model_selection/Trails/internal/pg_extension/sql/model_selection_cpu.sql
index bcbf5c0fce..ea90dea72b 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_cpu.sql
+++ b/examples/model_selection/Trails/internal/pg_extension/sql/model_selection_cpu.sql
@@ -1,5 +1,5 @@
/************************************************************
-*
+*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -19,6 +19,7 @@
*
*************************************************************/
+
CREATE OR REPLACE
PROCEDURE model_selection_end2end(
dataset TEXT, --dataset name
@@ -40,11 +41,11 @@ BEGIN
WITH batch_rows AS (
SELECT %s
FROM %I
- ORDER BY RANDOM()
+ LIMIT 3200
)
SELECT model_selection(
json_agg(row_to_json(t))::text, %L, %L
)
FROM batch_rows AS t', column_list, dataset, budget, config_file) INTO result_status;
- RAISE NOTICE '1. model_selection result: %', result_status;
+ RAISE NOTICE 'model_selection result: %', result_status;
END; $$;
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_cpu_workloads.sql b/examples/model_selection/Trails/internal/pg_extension/sql/model_selection_cpu_workloads.sql
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_cpu_workloads.sql
rename to examples/model_selection/Trails/internal/pg_extension/sql/model_selection_cpu_workloads.sql
index 882be67f4e..932cc33b96 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_cpu_workloads.sql
+++ b/examples/model_selection/Trails/internal/pg_extension/sql/model_selection_cpu_workloads.sql
@@ -1,5 +1,5 @@
/************************************************************
-*
+*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -19,6 +19,7 @@
*
*************************************************************/
+
CREATE OR REPLACE
PROCEDURE model_selection_workloads(
dataset TEXT, --dataset name
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_dev.sql b/examples/model_selection/Trails/internal/pg_extension/sql/model_selection_dev.sql
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_dev.sql
rename to examples/model_selection/Trails/internal/pg_extension/sql/model_selection_dev.sql
index 6a72975471..1d1f1b3e1e 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_dev.sql
+++ b/examples/model_selection/Trails/internal/pg_extension/sql/model_selection_dev.sql
@@ -1,5 +1,5 @@
/************************************************************
-*
+*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -19,6 +19,7 @@
*
*************************************************************/
+
CREATE OR REPLACE
PROCEDURE model_selection_sp(
dataset TEXT, --dataset name
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_trails.sql b/examples/model_selection/Trails/internal/pg_extension/sql/model_selection_trails.sql
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_trails.sql
rename to examples/model_selection/Trails/internal/pg_extension/sql/model_selection_trails.sql
index 3d91da5017..9163e4aab2 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_trails.sql
+++ b/examples/model_selection/Trails/internal/pg_extension/sql/model_selection_trails.sql
@@ -1,5 +1,5 @@
/************************************************************
-*
+*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -19,6 +19,7 @@
*
*************************************************************/
+
CREATE OR REPLACE
PROCEDURE model_selection_end2end(
dataset TEXT, --dataset name
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_trails_workloads.sql b/examples/model_selection/Trails/internal/pg_extension/sql/model_selection_trails_workloads.sql
similarity index 99%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_trails_workloads.sql
rename to examples/model_selection/Trails/internal/pg_extension/sql/model_selection_trails_workloads.sql
index aaf62c63d2..8a36afdaf3 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/sql/model_selection_trails_workloads.sql
+++ b/examples/model_selection/Trails/internal/pg_extension/sql/model_selection_trails_workloads.sql
@@ -1,5 +1,5 @@
/************************************************************
-*
+*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -19,6 +19,7 @@
*
*************************************************************/
+
CREATE OR REPLACE
PROCEDURE model_selection_workloads(
dataset TEXT, --dataset name
diff --git a/examples/model_selection/Trails/internal/pg_extension/sql/pg_extension--0.1.0.sql b/examples/model_selection/Trails/internal/pg_extension/sql/pg_extension--0.1.0.sql
new file mode 100644
index 0000000000..ed4cd0626f
--- /dev/null
+++ b/examples/model_selection/Trails/internal/pg_extension/sql/pg_extension--0.1.0.sql
@@ -0,0 +1,221 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+
+/*
+This file is auto generated by pgrx.
+
+The ordering of items is not stable, it is driven by a dependency graph.
+*/
+
+-- src/lib.rs:80
+-- pg_extension::refinement_phase
+CREATE FUNCTION "refinement_phase"(
+ "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'refinement_phase_wrapper';
+
+-- src/lib.rs:31
+-- pg_extension::profiling_refinement_phase
+CREATE FUNCTION "profiling_refinement_phase"(
+ "mini_batch" TEXT, /* alloc::string::String */
+ "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'profiling_refinement_phase_wrapper';
+
+-- src/lib.rs:16
+-- pg_extension::profiling_filtering_phase
+CREATE FUNCTION "profiling_filtering_phase"(
+ "mini_batch" TEXT, /* alloc::string::String */
+ "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'profiling_filtering_phase_wrapper';
+
+-- src/lib.rs:110
+-- pg_extension::model_selection_workloads
+CREATE FUNCTION "model_selection_workloads"(
+ "mini_batch" TEXT, /* alloc::string::String */
+ "n" INT, /* i32 */
+ "k" INT, /* i32 */
+ "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'model_selection_workloads_wrapper';
+
+-- src/lib.rs:138
+-- pg_extension::model_selection_trails_workloads
+CREATE FUNCTION "model_selection_trails_workloads"(
+ "mini_batch" TEXT, /* alloc::string::String */
+ "n" INT, /* i32 */
+ "k" INT, /* i32 */
+ "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'model_selection_trails_workloads_wrapper';
+
+-- src/lib.rs:125
+-- pg_extension::model_selection_trails
+CREATE FUNCTION "model_selection_trails"(
+ "mini_batch" TEXT, /* alloc::string::String */
+ "time_budget" TEXT, /* alloc::string::String */
+ "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'model_selection_trails_wrapper';
+
+-- src/lib.rs:94
+-- pg_extension::model_selection
+CREATE FUNCTION "model_selection"(
+ "mini_batch" TEXT, /* alloc::string::String */
+ "time_budget" TEXT, /* alloc::string::String */
+ "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'model_selection_wrapper';
+
+-- src/lib.rs:267
+-- pg_extension::model_init
+CREATE FUNCTION "model_init"(
+ "condition" TEXT, /* alloc::string::String */
+ "config_file" TEXT, /* alloc::string::String */
+ "col_cardinalities_file" TEXT, /* alloc::string::String */
+ "model_path" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'model_init_wrapper';
+
+-- src/lib.rs:243
+-- pg_extension::inference_shared_write_once_int
+CREATE FUNCTION "inference_shared_write_once_int"(
+ "dataset" TEXT, /* alloc::string::String */
+ "condition" TEXT, /* alloc::string::String */
+ "config_file" TEXT, /* alloc::string::String */
+ "col_cardinalities_file" TEXT, /* alloc::string::String */
+ "model_path" TEXT, /* alloc::string::String */
+ "sql" TEXT, /* alloc::string::String */
+ "batch_size" INT /* i32 */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'inference_shared_write_once_int_wrapper';
+
+-- src/lib.rs:220
+-- pg_extension::inference_shared_write_once
+CREATE FUNCTION "inference_shared_write_once"(
+ "dataset" TEXT, /* alloc::string::String */
+ "condition" TEXT, /* alloc::string::String */
+ "config_file" TEXT, /* alloc::string::String */
+ "col_cardinalities_file" TEXT, /* alloc::string::String */
+ "model_path" TEXT, /* alloc::string::String */
+ "sql" TEXT, /* alloc::string::String */
+ "batch_size" INT /* i32 */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'inference_shared_write_once_wrapper';
+
+-- src/lib.rs:197
+-- pg_extension::inference_shared
+CREATE FUNCTION "inference_shared"(
+ "dataset" TEXT, /* alloc::string::String */
+ "condition" TEXT, /* alloc::string::String */
+ "config_file" TEXT, /* alloc::string::String */
+ "col_cardinalities_file" TEXT, /* alloc::string::String */
+ "model_path" TEXT, /* alloc::string::String */
+ "sql" TEXT, /* alloc::string::String */
+ "batch_size" INT /* i32 */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'run_inference_shared_wrapper';
+
+-- src/lib.rs:174
+-- pg_extension::inference
+CREATE FUNCTION "inference"(
+ "dataset" TEXT, /* alloc::string::String */
+ "condition" TEXT, /* alloc::string::String */
+ "config_file" TEXT, /* alloc::string::String */
+ "col_cardinalities_file" TEXT, /* alloc::string::String */
+ "model_path" TEXT, /* alloc::string::String */
+ "sql" TEXT, /* alloc::string::String */
+ "batch_size" INT /* i32 */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'run_inference_wrapper';
+
+-- src/lib.rs:66
+-- pg_extension::filtering_phase
+CREATE FUNCTION "filtering_phase"(
+ "mini_batch" TEXT, /* alloc::string::String */
+ "n" INT, /* i32 */
+ "k" INT, /* i32 */
+ "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'filtering_phase_wrapper';
+
+-- src/lib.rs:46
+-- pg_extension::coordinator
+CREATE FUNCTION "coordinator"(
+ "time_score" TEXT, /* alloc::string::String */
+ "time_train" TEXT, /* alloc::string::String */
+ "time_budget" TEXT, /* alloc::string::String */
+ "only_phase1" bool, /* bool */
+ "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'coordinator_wrapper';
+
+-- src/lib.rs:152
+-- pg_extension::benchmark_filtering_phase_latency
+CREATE FUNCTION "benchmark_filtering_phase_latency"(
+ "explore_models" INT, /* i32 */
+ "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'benchmark_filtering_phase_latency_wrapper';
+
+-- src/lib.rs:163
+-- pg_extension::benchmark_filtering_latency_in_db
+CREATE FUNCTION "benchmark_filtering_latency_in_db"(
+ "explore_models" INT, /* i32 */
+ "dataset" TEXT, /* alloc::string::String */
+ "batch_size_m" INT, /* i32 */
+ "config_file" TEXT /* alloc::string::String */
+) RETURNS TEXT /* alloc::string::String */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'benchmark_filtering_latency_in_db_wrapper';
\ No newline at end of file
diff --git a/examples/model_selection/Trails/internal/pg_extension/src/bindings/inference.rs b/examples/model_selection/Trails/internal/pg_extension/src/bindings/inference.rs
new file mode 100644
index 0000000000..aa6161184f
--- /dev/null
+++ b/examples/model_selection/Trails/internal/pg_extension/src/bindings/inference.rs
@@ -0,0 +1,797 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+
+use serde_json::json;
+use std::collections::HashMap;
+use std::ffi::c_long;
+use pgrx::prelude::*;
+use crate::bindings::ml_register::PY_MODULE_INFERENCE;
+use crate::bindings::ml_register::run_python_function;
+use std::time::{Instant};
+use shared_memory::*;
+
+
+pub fn run_inference_shared_memory(
+ dataset: &String,
+ condition: &String,
+ config_file: &String,
+ col_cardinalities_file: &String,
+ model_path: &String,
+ sql: &String,
+ batch_size: i32,
+) -> serde_json::Value {
+ let mut response = HashMap::new();
+
+ let overall_start_time = Instant::now();
+
+ let mut last_id = 0;
+
+ // Step 1: load model and columns etc
+ let mut task_map = HashMap::new();
+ task_map.insert("where_cond", condition.clone());
+ task_map.insert("config_file", config_file.clone());
+ task_map.insert("col_cardinalities_file", col_cardinalities_file.clone());
+ task_map.insert("model_path", model_path.clone());
+ let task_json = json!(task_map).to_string();
+ // here it cache a state
+ run_python_function(
+ &PY_MODULE_INFERENCE,
+ &task_json,
+ "model_inference_load_model");
+
+ let _end_time = Instant::now();
+ let model_init_time = _end_time.duration_since(overall_start_time).as_secs_f64();
+ response.insert("model_init_time", model_init_time.clone());
+
+ // Step 2: query data via SPI
+ let start_time = Instant::now();
+ let results: Result>, String> = Spi::connect(|client| {
+ let query = format!("SELECT * FROM {}_train {} LIMIT {}",
+ dataset, sql, batch_size);
+ let mut cursor = client.open_cursor(&query, None);
+ let table = match cursor.fetch(batch_size as c_long) {
+ Ok(table) => table,
+ Err(e) => return Err(e.to_string()), // Convert the error to a string and return
+ };
+
+ let mut mini_batch = Vec::new();
+
+ for row in table.into_iter() {
+ let mut each_row = Vec::new();
+ // add primary key
+ let col0 = match row.get::(1) {
+ Ok(Some(val)) => {
+ // Update last_id with the retrieved value
+ if val > 100000 {
+ last_id = 0;
+ } else {
+ last_id = val
+ }
+ val.to_string()
+ }
+ Ok(None) => "".to_string(), // Handle the case when there's no valid value
+ Err(e) => e.to_string(),
+ };
+ each_row.push(col0);
+ // add label
+ let col1 = match row.get::(2) {
+ Ok(val) => val.map(|i| i.to_string()).unwrap_or_default(),
+ Err(e) => e.to_string(),
+ };
+ each_row.push(col1);
+ // add fields
+ let texts: Vec = (3..row.columns() + 1)
+ .filter_map(|i| {
+ match row.get::<&str>(i) {
+ Ok(Some(s)) => Some(s.to_string()),
+ Ok(None) => None,
+ Err(e) => Some(e.to_string()), // Convert error to string
+ }
+ }).collect();
+ each_row.extend(texts);
+ mini_batch.push(each_row)
+ }
+ // return
+ Ok(mini_batch)
+ });
+ // serialize the mini-batch data
+ let tup_table = match results {
+ Ok(data) => {
+ serde_json::json!({
+ "status": "success",
+ "data": data
+ })
+ }
+ Err(e) => {
+ serde_json::json!({
+ "status": "error",
+ "message": format!("Error while connecting: {}", e)
+ })
+ }
+ };
+ let mini_batch_json = tup_table.to_string();
+
+ let end_time = Instant::now();
+ let data_query_time = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("data_query_time", data_query_time.clone());
+
+
+ let start_time = Instant::now();
+ // Set an identifier for the shared memory
+ let shmem_name = "my_shared_memory";
+ let my_shmem = ShmemConf::new()
+ .size(tup_table.to_string().len())
+ .os_id(shmem_name)
+ .create()
+ .unwrap();
+
+ // Use unsafe to access and write to the raw memory
+ let data_to_write = mini_batch_json.as_bytes();
+ unsafe {
+ // Get the raw pointer to the shared memory
+ let shmem_ptr = my_shmem.as_ptr() as *mut u8;
+ // Copy data into the shared memory
+ std::ptr::copy_nonoverlapping(
+ data_to_write.as_ptr(), shmem_ptr, data_to_write.len());
+ }
+
+ let end_time = Instant::now();
+ let data_copy_time = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("data_copy", data_copy_time.clone());
+
+ let start_time = Instant::now();
+ // Step 3: model evaluate in Python
+ let mut eva_task_map = HashMap::new();
+ eva_task_map.insert("config_file", config_file.clone());
+ eva_task_map.insert("spi_seconds", data_query_time.to_string());
+
+ let eva_task_json = json!(eva_task_map).to_string(); // Corrected this line
+
+ run_python_function(
+ &PY_MODULE_INFERENCE,
+ &eva_task_json,
+ "model_inference_compute_shared_memory");
+
+ let end_time = Instant::now();
+ let python_compute_time = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("python_compute_time", python_compute_time.clone());
+
+ let overall_end_time = Instant::now();
+ let overall_elapsed_time = overall_end_time.duration_since(overall_start_time).as_secs_f64();
+ let diff_time = model_init_time + data_query_time + data_copy_time + python_compute_time - overall_elapsed_time;
+
+ response.insert("overall_query_latency", overall_elapsed_time.clone());
+ response.insert("diff", diff_time.clone());
+
+ // Step 4: Return to PostgresSQL
+ return serde_json::json!(response);
+}
+
+
+pub fn run_inference(
+ dataset: &String,
+ condition: &String,
+ config_file: &String,
+ col_cardinalities_file: &String,
+ model_path: &String,
+ sql: &String,
+ batch_size: i32,
+) -> serde_json::Value {
+ let mut response = HashMap::new();
+
+ let overall_start_time = Instant::now();
+
+// let mut last_id = 0;
+
+ // Step 1: load model and columns etc
+ let mut task_map = HashMap::new();
+ task_map.insert("where_cond", condition.clone());
+ task_map.insert("config_file", config_file.clone());
+ task_map.insert("col_cardinalities_file", col_cardinalities_file.clone());
+ task_map.insert("model_path", model_path.clone());
+ let task_json = json!(task_map).to_string();
+ // here it cache a state
+ run_python_function(
+ &PY_MODULE_INFERENCE,
+ &task_json,
+ "model_inference_load_model");
+
+ let _end_time = Instant::now();
+ let model_init_time = _end_time.duration_since(overall_start_time).as_secs_f64();
+ response.insert("model_init_time", model_init_time.clone());
+
+ // Step 2: query data via SPI
+ let start_time = Instant::now();
+ let mut all_rows = Vec::new();
+ let _ = Spi::connect(|client| {
+ let query = format!("SELECT * FROM {}_int_train {} LIMIT {}", dataset, sql, batch_size);
+ let mut cursor = client.open_cursor(&query, None);
+ let table = match cursor.fetch(batch_size as c_long) {
+ Ok(table) => table,
+ Err(e) => return Err(e.to_string()),
+ };
+
+ let end_time = Instant::now();
+ let data_query_time_spi = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("data_query_time_spi", data_query_time_spi);
+
+ // todo: nl: this part can must be optimized, since i go through all of those staff.
+ for row in table.into_iter() {
+ for i in 3..=row.columns() {
+ match row.get::(i) {
+ Ok(Some(val)) => all_rows.push(val), // Handle the case when a valid i32 is obtained
+ Ok(None) => {
+ // Handle the case when the value is missing or erroneous
+ // For example, you can add a default value, like -1
+ all_rows.push(-1);
+ }
+ Err(e) => {
+ // Handle the error, e.g., log it or handle it in some way
+ eprintln!("Error fetching value: {:?}", e);
+ }
+ }
+ }
+ }
+ // Return OK or some status
+ Ok(())
+ });
+
+ let mini_batch_json = serde_json::to_string(&all_rows).unwrap();
+
+ let end_time = Instant::now();
+ let data_query_time = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("data_query_time", data_query_time.clone());
+
+ let start_time = Instant::now();
+ // Step 3: model evaluate in Python
+ let mut eva_task_map = HashMap::new();
+ eva_task_map.insert("config_file", config_file.clone());
+ eva_task_map.insert("mini_batch", mini_batch_json);
+ eva_task_map.insert("spi_seconds", data_query_time.to_string());
+
+ let eva_task_json = json!(eva_task_map).to_string(); // Corrected this line
+
+ run_python_function(
+ &PY_MODULE_INFERENCE,
+ &eva_task_json,
+ "model_inference_compute");
+
+ let end_time = Instant::now();
+ let python_compute_time = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("python_compute_time", python_compute_time.clone());
+
+ let overall_end_time = Instant::now();
+ let overall_elapsed_time = overall_end_time.duration_since(overall_start_time).as_secs_f64();
+ let diff_time = model_init_time + data_query_time + python_compute_time - overall_elapsed_time;
+
+ response.insert("overall_query_latency", overall_elapsed_time.clone());
+ response.insert("diff", diff_time.clone());
+
+ let response_json = json!(response).to_string();
+ run_python_function(
+ &PY_MODULE_INFERENCE,
+ &response_json,
+ "records_results");
+
+ // Step 4: Return to PostgresSQL
+ return serde_json::json!(response);
+}
+
+
+pub fn run_inference_shared_memory_write_once(
+ dataset: &String,
+ condition: &String,
+ config_file: &String,
+ col_cardinalities_file: &String,
+ model_path: &String,
+ sql: &String,
+ batch_size: i32,
+) -> serde_json::Value {
+ let mut response = HashMap::new();
+
+ let overall_start_time = Instant::now();
+
+ let mut last_id = 0;
+
+ // Step 1: load model and columns etc
+ let mut task_map = HashMap::new();
+ task_map.insert("where_cond", condition.clone());
+ task_map.insert("config_file", config_file.clone());
+ task_map.insert("col_cardinalities_file", col_cardinalities_file.clone());
+ task_map.insert("model_path", model_path.clone());
+ let task_json = json!(task_map).to_string();
+ // here it cache a state
+ run_python_function(
+ &PY_MODULE_INFERENCE,
+ &task_json,
+ "model_inference_load_model");
+
+ let _end_time = Instant::now();
+ let model_init_time = _end_time.duration_since(overall_start_time).as_secs_f64();
+ response.insert("model_init_time", model_init_time.clone());
+
+ // Step 2: query data via SPI
+ let start_time = Instant::now();
+ // Allocate shared memory in advance
+ // Set an identifier for the shared memory
+ let shmem_name = "my_shared_memory";
+
+ // Pre-allocate a size for shared memory (this might need some logic to determine a reasonable size)
+ let avg_row_size = 120;
+ let shmem_size = (1.5 * (avg_row_size * batch_size as usize) as f64) as usize;
+ let my_shmem = ShmemConf::new()
+ .size(shmem_size)
+ .os_id(shmem_name)
+ .create()
+ .unwrap();
+
+ let shmem_ptr = my_shmem.as_ptr() as *mut u8;
+
+ let end_time = Instant::now();
+ let mem_allocate_time = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("mem_allocate_time", mem_allocate_time.clone());
+
+ let start_time = Instant::now();
+ // Use unsafe to access and write to the raw memory
+ unsafe {
+ let _ = Spi::connect(|client| {
+ let query = format!("SELECT * FROM {}_train {} LIMIT {}", dataset, sql, batch_size);
+ let mut cursor = client.open_cursor(&query, None);
+ let table = match cursor.fetch(batch_size as c_long) {
+ Ok(table) => table,
+ Err(e) => return Err(e.to_string()),
+ };
+
+ let end_time = Instant::now();
+ let data_query_time_spi = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("data_query_time_spi", data_query_time_spi.clone());
+
+ let mut offset = 0; // Keep track of how much we've written to shared memory
+
+ // Write the opening square bracket
+ shmem_ptr.offset(offset as isize).write(b"["[0]);
+ offset += 1;
+
+ let mut is_first_row = true;
+ for row in table.into_iter() {
+
+ // If not the first row, write a comma before the next row's data
+ if !is_first_row {
+ shmem_ptr.offset(offset as isize).write(b","[0]);
+ offset += 1;
+ } else {
+ is_first_row = false;
+ }
+
+ let mut each_row = Vec::new();
+ // add primary key
+ let col0 = match row.get::(1) {
+ Ok(Some(val)) => {
+ // Update last_id with the retrieved value
+ if val > 100000 {
+ last_id = 0;
+ } else {
+ last_id = val
+ }
+ val.to_string()
+ }
+ Ok(None) => "".to_string(), // Handle the case when there's no valid value
+ Err(e) => e.to_string(),
+ };
+ each_row.push(col0);
+ // add label
+ let col1 = match row.get::(2) {
+ Ok(val) => val.map(|i| i.to_string()).unwrap_or_default(),
+ Err(e) => e.to_string(),
+ };
+ each_row.push(col1);
+ // add fields
+ let texts: Vec = (3..row.columns() + 1)
+ .filter_map(|i| {
+ match row.get::<&str>(i) {
+ Ok(Some(s)) => Some(s.to_string()),
+ Ok(None) => None,
+ Err(e) => Some(e.to_string()), // Convert error to string
+ }
+ }).collect();
+ each_row.extend(texts);
+
+ // Serialize each row into shared memory
+ let serialized_row = serde_json::to_string(&each_row).unwrap();
+ let bytes = serialized_row.as_bytes();
+
+ // Check if there's enough space left in shared memory
+ if offset + bytes.len() > shmem_size {
+ // Handle error: not enough space in shared memory
+ return Err("Shared memory exceeded estimated size.".to_string());
+ }
+
+ // Copy the serialized row into shared memory
+ std::ptr::copy_nonoverlapping(bytes.as_ptr(),
+ shmem_ptr.offset(offset as isize),
+ bytes.len());
+ offset += bytes.len();
+ }
+ // Write the closing square bracket after all rows
+ shmem_ptr.offset(offset as isize).write(b"]"[0]);
+
+ // Return OK or some status
+ Ok(())
+ });
+ }
+
+ let end_time = Instant::now();
+ let data_query_time = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("data_query_time", data_query_time.clone());
+
+ let start_time = Instant::now();
+ // Step 3: model evaluate in Python
+ let mut eva_task_map = HashMap::new();
+ eva_task_map.insert("config_file", config_file.clone());
+ eva_task_map.insert("spi_seconds", data_query_time.to_string());
+
+ let eva_task_json = json!(eva_task_map).to_string(); // Corrected this line
+
+ run_python_function(
+ &PY_MODULE_INFERENCE,
+ &eva_task_json,
+ "model_inference_compute_shared_memory_write_once");
+
+ let end_time = Instant::now();
+ let python_compute_time = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("python_compute_time", python_compute_time.clone());
+
+ let overall_end_time = Instant::now();
+ let overall_elapsed_time = overall_end_time.duration_since(overall_start_time).as_secs_f64();
+ let diff_time = model_init_time + data_query_time + python_compute_time - overall_elapsed_time;
+
+ response.insert("overall_query_latency", overall_elapsed_time.clone());
+ response.insert("diff", diff_time.clone());
+
+
+ let response_json = json!(response).to_string();
+ run_python_function(
+ &PY_MODULE_INFERENCE,
+ &response_json,
+ "records_results");
+
+ // Step 4: Return to PostgresSQL
+ return serde_json::json!(response);
+}
+
+
+pub fn run_inference_shared_memory_write_once_int_exp(
+ dataset: &String,
+ condition: &String,
+ config_file: &String,
+ col_cardinalities_file: &String,
+ model_path: &String,
+ sql: &String,
+ batch_size: i32,
+) -> serde_json::Value {
+ let mut response = HashMap::new();
+ // let mut response_log = HashMap::new();
+
+ let mut num_columns: i32 = 0;
+ match dataset.as_str() { // assuming dataset is a String
+ "frappe" => num_columns = 12,
+ "adult" => num_columns = 15,
+ "cvd" => num_columns = 13,
+ "bank" => num_columns = 18,
+ "census" => num_columns = 41+2,
+ "credit" => num_columns = 23+2,
+ "diabetes" => num_columns = 48+2,
+ "hcdr" => num_columns = 69+2,
+ _ => {},
+ }
+
+ let overall_start_time = Instant::now();
+
+ // Step 1: load model and columns etc
+ let mut task_map = HashMap::new();
+ task_map.insert("where_cond", condition.clone());
+ task_map.insert("config_file", config_file.clone());
+ task_map.insert("col_cardinalities_file", col_cardinalities_file.clone());
+ task_map.insert("model_path", model_path.clone());
+ let task_json = json!(task_map).to_string();
+ // here it cache a state
+ run_python_function(
+ &PY_MODULE_INFERENCE,
+ &task_json,
+ "model_inference_load_model");
+
+ let _end_time = Instant::now();
+ let model_init_time = _end_time.duration_since(overall_start_time).as_secs_f64();
+ response.insert("model_init_time", model_init_time.clone());
+
+
+ // Step 1: query data
+ let start_time = Instant::now();
+ let mut all_rows = Vec::new();
+
+ let _ = Spi::connect(|client| {
+ let query = format!("SELECT * FROM {}_int_train {} LIMIT {}", dataset, sql, batch_size);
+ let mut cursor = client.open_cursor(&query, None);
+ let table = match cursor.fetch(batch_size as c_long) {
+ Ok(table) => table,
+ Err(e) => return Err(e.to_string()),
+ };
+
+ let end_time = Instant::now();
+ let data_query_time_spi = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("data_query_time_spi", data_query_time_spi);
+
+ let mut t1: f64 = 0.0;
+ // todo: nl: this part can must be optimized, since i go through all of those staff.
+ let start_time_3 = Instant::now();
+ for row in table.into_iter() {
+ for i in 3..=num_columns as usize {
+ let start_time_min = Instant::now();
+ if let Ok(Some(val)) = row.get::(i) {
+ all_rows.push(val);
+ }
+ let end_time_min = Instant::now();
+ let data_query_time_min = end_time_min.duration_since(start_time_min).as_secs_f64();
+ t1 += data_query_time_min;
+ }
+ }
+ let end_time_min3 = Instant::now();
+ let data_query_time_min3 = end_time_min3.duration_since(start_time_3).as_secs_f64();
+
+ response.insert("data_query_time3", data_query_time_min3.clone());
+ response.insert("data_query_time2", t1.clone());
+
+ // Return OK or some status
+ Ok(())
+ });
+ let end_time = Instant::now();
+ let data_query_time = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("data_query_time", data_query_time.clone());
+
+
+ // log the query datas
+ // let serialized_row = serde_json::to_string(&all_rows).unwrap();
+ // response_log.insert("query_data", serialized_row);
+
+ // Step 3: Putting all data to he shared memory
+ let start_time = Instant::now();
+ let shmem_name = "my_shared_memory";
+ let my_shmem = ShmemConf::new()
+ .size(4 * all_rows.len())
+ .os_id(shmem_name)
+ .create()
+ .unwrap();
+ let shmem_ptr = my_shmem.as_ptr() as *mut i32;
+
+ unsafe {
+ // Copy data into shared memory
+ std::ptr::copy_nonoverlapping(
+ all_rows.as_ptr(),
+ shmem_ptr as *mut i32,
+ all_rows.len(),
+ );
+ }
+ let end_time = Instant::now();
+ let mem_allocate_time = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("mem_allocate_time", mem_allocate_time.clone());
+
+
+ let start_time = Instant::now();
+ // Step 3: model evaluate in Python
+ let mut eva_task_map = HashMap::new();
+ eva_task_map.insert("config_file", config_file.clone());
+ eva_task_map.insert("spi_seconds", data_query_time.to_string());
+ eva_task_map.insert("rows", batch_size.to_string());
+
+ let eva_task_json = json!(eva_task_map).to_string(); // Corrected this line
+
+ run_python_function(
+ &PY_MODULE_INFERENCE,
+ &eva_task_json,
+ "model_inference_compute_shared_memory_write_once_int");
+
+ let end_time = Instant::now();
+ let python_compute_time = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("python_compute_time", python_compute_time.clone());
+
+ let overall_end_time = Instant::now();
+ let overall_elapsed_time = overall_end_time.duration_since(overall_start_time).as_secs_f64();
+ let diff_time = model_init_time + data_query_time + python_compute_time - overall_elapsed_time;
+
+ response.insert("overall_query_latency", overall_elapsed_time.clone());
+ response.insert("diff", diff_time.clone());
+
+ let response_json = json!(response).to_string();
+ run_python_function(
+ &PY_MODULE_INFERENCE,
+ &response_json,
+ "records_results");
+
+ // Step 4: Return to PostgresSQL
+ return serde_json::json!(response);
+}
+
+pub fn run_inference_shared_memory_write_once_int(
+ dataset: &String,
+ condition: &String,
+ config_file: &String,
+ col_cardinalities_file: &String,
+ model_path: &String,
+ sql: &String,
+ batch_size: i32,
+) -> serde_json::Value {
+ let mut response = HashMap::new();
+
+ let mut num_columns: i32 = 0;
+ match dataset.as_str() { // assuming dataset is a String
+ "frappe" => num_columns = 12,
+ "adult" => num_columns = 15,
+ "cvd" => num_columns = 13,
+ "bank" => num_columns = 18,
+ "census" => num_columns = 41+2,
+ "credit" => num_columns = 23+2,
+ "diabetes" => num_columns = 48+2,
+ "hcdr" => num_columns = 69+2,
+ _ => {},
+ }
+
+ let overall_start_time = Instant::now();
+
+ // Step 1: load model and columns etc
+ let mut task_map = HashMap::new();
+ task_map.insert("where_cond", condition.clone());
+ task_map.insert("config_file", config_file.clone());
+ task_map.insert("col_cardinalities_file", col_cardinalities_file.clone());
+ task_map.insert("model_path", model_path.clone());
+ let task_json = json!(task_map).to_string();
+ // here it cache a state
+ run_python_function(
+ &PY_MODULE_INFERENCE,
+ &task_json,
+ "model_inference_load_model");
+
+ let _end_time = Instant::now();
+ let model_init_time = _end_time.duration_since(overall_start_time).as_secs_f64();
+ response.insert("model_init_time", model_init_time.clone());
+
+
+ // Step 1: query data
+ let start_time = Instant::now();
+ let mut all_rows = Vec::new();
+ let _ = Spi::connect(|client| {
+ let query = format!("SELECT * FROM {}_int_train {} LIMIT {}", dataset, sql, batch_size);
+ let mut cursor = client.open_cursor(&query, None);
+ let table = match cursor.fetch(batch_size as c_long) {
+ Ok(table) => table,
+ Err(e) => return Err(e.to_string()),
+ };
+ let end_time = Instant::now();
+ let data_query_time_spi = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("data_query_time_spi", data_query_time_spi);
+
+ // todo: nl: this part can must be optimized, since i go through all of those staff.
+ let start_time_3 = Instant::now();
+ for row in table.into_iter() {
+ for i in 3..= num_columns as usize {
+ if let Ok(Some(val)) = row.get::(i) {
+ all_rows.push(val);
+ }
+ }
+ }
+ let end_time_min3 = Instant::now();
+ let data_query_time_min3 = end_time_min3.duration_since(start_time_3).as_secs_f64();
+ response.insert("data_type_convert_time", data_query_time_min3.clone());
+
+ // Return OK or some status
+ Ok(())
+ });
+ let end_time = Instant::now();
+ let data_query_time = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("data_query_time", data_query_time.clone());
+
+
+ // log the query datas
+ // let serialized_row = serde_json::to_string(&all_rows).unwrap();
+ // response_log.insert("query_data", serialized_row);
+
+ // Step 3: Putting all data to he shared memory
+ let start_time = Instant::now();
+ let shmem_name = "my_shared_memory";
+ let my_shmem = ShmemConf::new()
+ .size(4 * all_rows.len())
+ .os_id(shmem_name)
+ .create()
+ .unwrap();
+ let shmem_ptr = my_shmem.as_ptr() as *mut i32;
+
+ unsafe {
+ // Copy data into shared memory
+ std::ptr::copy_nonoverlapping(
+ all_rows.as_ptr(),
+ shmem_ptr as *mut i32,
+ all_rows.len(),
+ );
+ }
+ let end_time = Instant::now();
+ let mem_allocate_time = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("mem_allocate_time", mem_allocate_time.clone());
+
+
+ let start_time = Instant::now();
+ // Step 3: model evaluate in Python
+ let mut eva_task_map = HashMap::new();
+ eva_task_map.insert("config_file", config_file.clone());
+ eva_task_map.insert("spi_seconds", data_query_time.to_string());
+ eva_task_map.insert("rows", batch_size.to_string());
+
+ let eva_task_json = json!(eva_task_map).to_string(); // Corrected this line
+
+ run_python_function(
+ &PY_MODULE_INFERENCE,
+ &eva_task_json,
+ "model_inference_compute_shared_memory_write_once_int");
+
+ let end_time = Instant::now();
+ let python_compute_time = end_time.duration_since(start_time).as_secs_f64();
+ response.insert("python_compute_time", python_compute_time.clone());
+
+ let overall_end_time = Instant::now();
+ let overall_elapsed_time = overall_end_time.duration_since(overall_start_time).as_secs_f64();
+ let diff_time = model_init_time + data_query_time + python_compute_time - overall_elapsed_time;
+
+ response.insert("overall_query_latency", overall_elapsed_time.clone());
+ response.insert("diff", diff_time.clone());
+
+ let response_json = json!(response).to_string();
+ run_python_function(
+ &PY_MODULE_INFERENCE,
+ &response_json,
+ "records_results");
+
+ // Step 4: Return to PostgresSQL
+ return serde_json::json!(response);
+}
+
+
+pub fn init_model(
+ condition: &String,
+ config_file: &String,
+ col_cardinalities_file: &String,
+ model_path: &String,
+) -> serde_json::Value {
+ let overall_start_time = Instant::now();
+ // Step 1: load model and columns etc
+ let mut task_map = HashMap::new();
+ task_map.insert("where_cond", condition.clone());
+ task_map.insert("config_file", config_file.clone());
+ task_map.insert("col_cardinalities_file", col_cardinalities_file.clone());
+ task_map.insert("model_path", model_path.clone());
+ let task_json = json!(task_map).to_string();
+ // here it cache a state
+ run_python_function(
+ &PY_MODULE_INFERENCE,
+ &task_json,
+ "model_inference_load_model");
+ let _end_time = Instant::now();
+ let model_init_time = _end_time.duration_since(overall_start_time).as_secs_f64();
+ return serde_json::json!(model_init_time);
+}
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/ml_register.rs b/examples/model_selection/Trails/internal/pg_extension/src/bindings/ml_register.rs
similarity index 75%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/ml_register.rs
rename to examples/model_selection/Trails/internal/pg_extension/src/bindings/ml_register.rs
index 5ca5396533..617edf52fc 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/ml_register.rs
+++ b/examples/model_selection/Trails/internal/pg_extension/src/bindings/ml_register.rs
@@ -1,5 +1,5 @@
/************************************************************
-*
+*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -19,10 +19,13 @@
*
*************************************************************/
+
use log::error;
use once_cell::sync::Lazy;
use pyo3::prelude::*;
use pyo3::types::PyTuple;
+use std::env;
+use pyo3::types::PyList;
pub fn run_python_function(
@@ -32,6 +35,12 @@ pub fn run_python_function(
) -> serde_json::Value {
let parameters_str = parameters.to_string();
let results = Python::with_gil(|py| -> String {
+
+ // load package such that it can import python packages, we do this onyl for integrate with polarDB env
+ let sys_module = py.import("sys").unwrap();
+ let sys_path: &PyList = sys_module.getattr("path").unwrap().downcast().unwrap();
+ sys_path.append("/home/postgres/Trails/internal/ml/model_selection/").unwrap();
+
let run_script: Py = py_module.getattr(py, function_name).unwrap().into();
let result = run_script.call1(
py,
@@ -69,6 +78,18 @@ pub static PY_MODULE: Lazy> = Lazy::new(|| {
});
+/*
+ Python Module Path for SAMS
+ */
+pub static PY_MODULE_INFERENCE: Lazy> = Lazy::new(|| {
+ Python::with_gil(|py| -> Py {
+ let src = include_str!(concat!(
+ env!("CARGO_MANIFEST_DIR"),
+ "/../ml/model_slicing/pg_interface.py"
+ ));
+ PyModule::from_code(py, src, "", "").unwrap().into()
+ })
+});
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/mod.rs b/examples/model_selection/Trails/internal/pg_extension/src/bindings/mod.rs
similarity index 95%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/mod.rs
rename to examples/model_selection/Trails/internal/pg_extension/src/bindings/mod.rs
index 4e976d605e..b7d29e6b70 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/bindings/mod.rs
+++ b/examples/model_selection/Trails/internal/pg_extension/src/bindings/mod.rs
@@ -1,5 +1,5 @@
/************************************************************
-*
+*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -19,8 +19,12 @@
*
*************************************************************/
+
+mod ml_register;
+
#[cfg(feature = "python")]
pub mod ms;
-mod ml_register;
-mod model;
+#[cfg(feature = "python")]
+pub mod inference;
+
diff --git a/examples/model_selection/Trails/internal/pg_extension/src/bindings/ms.rs b/examples/model_selection/Trails/internal/pg_extension/src/bindings/ms.rs
new file mode 100644
index 0000000000..bc8640430f
--- /dev/null
+++ b/examples/model_selection/Trails/internal/pg_extension/src/bindings/ms.rs
@@ -0,0 +1,265 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+use serde_json::json;
+use std::collections::HashMap;
+use pgrx::prelude::*;
+use crate::bindings::ml_register::PY_MODULE;
+use crate::bindings::ml_register::run_python_function;
+use std::time::{Instant};
+use shared_memory::*;
+
+pub fn profiling_filtering_phase(
+ task: &String
+) -> serde_json::Value {
+ run_python_function(&PY_MODULE, task, "profiling_filtering_phase")
+}
+
+
+pub fn profiling_refinement_phase(
+ task: &String
+) -> serde_json::Value {
+ run_python_function(&PY_MODULE, task, "profiling_refinement_phase")
+}
+
+
+pub fn coordinator(
+ task: &String
+) -> serde_json::Value {
+ run_python_function(&PY_MODULE, task, "coordinator")
+}
+
+
+pub fn filtering_phase(
+ task: &String
+) -> serde_json::Value {
+ run_python_function(&PY_MODULE, task, "filtering_phase")
+}
+
+
+pub fn refinement_phase() -> serde_json::Value {
+ let task = "refinement_phase".to_string();
+ run_python_function(&PY_MODULE, &task, "refinement_phase")
+}
+
+
+// this two are filtering + refinement in UDF runtime
+pub fn model_selection(
+ task: &String
+) -> serde_json::Value {
+ run_python_function(&PY_MODULE, task, "model_selection")
+}
+
+
+pub fn model_selection_workloads(
+ task: &String
+) -> serde_json::Value {
+ run_python_function(&PY_MODULE, task, "model_selection_workloads")
+}
+
+
+// this two are filtering + refinement in GPU server
+pub fn model_selection_trails(
+ task: &String
+) -> serde_json::Value {
+ run_python_function(&PY_MODULE, task, "model_selection_trails")
+}
+
+
+pub fn model_selection_trails_workloads(
+ task: &String
+) -> serde_json::Value {
+ run_python_function(&PY_MODULE, task, "model_selection_trails_workloads")
+}
+
+// micro benchmarks
+// this is query data in filtering phase via sql
+pub fn benchmark_filtering_phase_latency(
+ task: &String
+) -> serde_json::Value {
+ run_python_function(&PY_MODULE, task, "benchmark_filtering_phase_latency")
+}
+
+// this is query data in filtering phase via spi
+pub fn benchmark_filtering_latency_in_db(
+ explore_models: i32, dataset: &String, batch_size_m: i32, config_file: &String) -> serde_json::Value {
+ let mut return_result = HashMap::new();
+
+
+ let mut total_columns: i32 = 0;
+ match dataset.as_str() { // assuming dataset is a String
+ "frappe" => total_columns = 12,
+ "criteo" => total_columns = 41,
+ "uci_diabetes" => total_columns = 45,
+ _ => {}
+ }
+
+ let mut num_columns: i64 = 0;
+ match dataset.as_str() { // assuming dataset is a String
+ "frappe" => num_columns = 10 * 2 + 1,
+ "criteo" => num_columns = 39 * 2 + 1,
+ "uci_diabetes" => num_columns = 43 * 2 + 1,
+ _ => {}
+ }
+
+ let batch_size: i64 = batch_size_m as i64;
+
+ let call_time_begin = Instant::now();
+ for _ in 1..=5000 {
+ run_python_function(
+ &PY_MODULE,
+ &"".to_string(),
+ "measure_call_overheads");
+ }
+ let _end_time = Instant::now();
+ let call_time = _end_time.duration_since(call_time_begin).as_secs_f64();
+ return_result.insert("call_time", call_time.to_string());
+
+
+ let overall_start_time = Instant::now();
+
+ let mut last_id = 0;
+ let mut eva_results = serde_json::Value::Null; // Initializing the eva_results
+
+ // Step 3: Putting all data to he shared memory
+ let shmem_name = "my_shared_memory";
+ let my_shmem = ShmemConf::new()
+ .size((4 * batch_size * num_columns) as usize)
+ .os_id(shmem_name)
+ .create()
+ .unwrap();
+
+ let mut numbers: Vec = Vec::with_capacity((num_columns - 1) as usize );
+
+ let _ = Spi::connect(|client| {
+ for i in 1..explore_models + 1 {
+ // Step 1: Initialize State in Python
+ let mut task_map = HashMap::new();
+ task_map.insert("config_file", config_file.clone());
+ task_map.insert("dataset", dataset.clone());
+ task_map.insert("eva_results", eva_results.to_string());
+ let task_json = json!(task_map).to_string();
+
+ // here it cache a state
+ let sample_result = run_python_function(
+ &PY_MODULE,
+ &task_json,
+ "in_db_filtering_state_init");
+
+ // 2. query data via SPI
+ let start_time = Instant::now();
+ let mut mini_batch = Vec::new();
+
+ let query = format!("SELECT * FROM {}_train WHERE id > {} ORDER BY id ASC LIMIT {}", dataset, last_id, batch_size);
+ let mut cursor = client.open_cursor(&query, None);
+ let table = match cursor.fetch(batch_size) {
+ Ok(table) => table,
+ Err(e) => return Err(e.to_string()), // Convert the error to a string and return
+ };
+
+ for row in table.into_iter() {
+ // add primary key
+ let val = row.get::(1)
+ .expect("Failed to retrieve value") // This will panic if it encounters `Err`
+ .expect("Retrieved value is NULL"); // This will panic if it encounters `None`
+
+ if val > 80000 {
+ last_id = 0;
+ } else {
+ last_id = val;
+ }
+
+ // add label
+ if let Ok(Some(col1)) = row.get::(2) {
+ mini_batch.push(col1 as f32);
+ };
+
+ numbers.clear();
+ for i in 3..= total_columns as usize {
+ if let Some(s) = row.get::<&str>(i).ok().flatten() { // Ensuring it's Some(&str)
+ for part in s.split(':') {
+ match part.parse::() {
+ Ok(num) => numbers.push(num),
+ Err(_) => eprintln!("Failed to parse part as f32"), // Handle the error as appropriate for your application.
+ }
+ }
+ }
+ }
+
+ mini_batch.extend_from_slice(&numbers);
+ }
+
+ unsafe {
+ let shmem_ptr = my_shmem.as_ptr() as *mut f32;
+ // Copy data into shared memory
+ std::ptr::copy_nonoverlapping(
+ mini_batch.as_ptr(),
+ shmem_ptr as *mut f32,
+ mini_batch.len(),
+ );
+ }
+
+ let end_time = Instant::now();
+ let elapsed_time = end_time.duration_since(start_time);
+ let elapsed_seconds = elapsed_time.as_secs_f64();
+
+ // Step 3: model evaluate in Python
+ let mut eva_task_map = HashMap::new();
+ eva_task_map.insert("config_file", config_file.clone());
+ eva_task_map.insert("sample_result", sample_result.to_string());
+ eva_task_map.insert("spi_seconds", elapsed_seconds.to_string());
+ eva_task_map.insert("rows", batch_size.to_string());
+ eva_task_map.insert("model_index", i.to_string());
+ let eva_task_json = json!(eva_task_map).to_string(); // Corrected this line
+
+ eva_results = run_python_function(
+ &PY_MODULE,
+ &eva_task_json,
+ "in_db_filtering_evaluate");
+
+ // debug the fetched data
+ // if i == 1{
+ // let serialized_data = json!(mini_batch).to_string();
+ // return_result.insert("serialized_data", serialized_data);
+ // };
+ };
+ Ok(())
+ });
+
+ let overall_end_time = Instant::now();
+ let overall_elapsed_time = overall_end_time.duration_since(overall_start_time);
+ let overall_elapsed_seconds = overall_elapsed_time.as_secs_f64();
+
+ return_result.insert("overall time usage", overall_elapsed_seconds.to_string());
+
+ let mut record_task_map = HashMap::new();
+ record_task_map.insert("config_file", config_file.clone());
+ record_task_map.insert("dataset", dataset.clone());
+ let record_task_json = json!(record_task_map).to_string();
+ run_python_function(
+ &PY_MODULE,
+ &record_task_json,
+ "records_results");
+
+ // Step 4: Return to PostgresSQL
+ return serde_json::json!(return_result);
+}
+
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/lib.rs b/examples/model_selection/Trails/internal/pg_extension/src/lib.rs
similarity index 71%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/lib.rs
rename to examples/model_selection/Trails/internal/pg_extension/src/lib.rs
index 5ff49a7085..e4f361a868 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/src/lib.rs
+++ b/examples/model_selection/Trails/internal/pg_extension/src/lib.rs
@@ -1,5 +1,5 @@
/************************************************************
-*
+*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -182,11 +182,118 @@ pub fn benchmark_filtering_phase_latency(explore_models: i32, config_file: Strin
#[pg_extern(immutable, parallel_safe, name = "benchmark_filtering_latency_in_db")]
#[allow(unused_variables)]
pub fn benchmark_filtering_latency_in_db(
- explore_models: i32, dataset: String, config_file: String) -> String {
- crate::bindings::ms::benchmark_filtering_latency_in_db(explore_models, &dataset, &config_file).to_string()
+ explore_models: i32, dataset: String, batch_size_m: i32, config_file: String) -> String {
+ crate::bindings::ms::benchmark_filtering_latency_in_db(explore_models, &dataset, batch_size_m ,&config_file).to_string()
}
+// Model Inference
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "inference")]
+#[allow(unused_variables)]
+pub fn run_inference(
+ dataset: String,
+ condition: String,
+ config_file: String,
+ col_cardinalities_file: String,
+ model_path: String,
+ sql: String,
+ batch_size: i32,
+) -> String {
+ crate::bindings::inference::run_inference(
+ &dataset,
+ &condition,
+ &config_file,
+ &col_cardinalities_file,
+ &model_path,
+ &sql,
+ batch_size).to_string()
+}
+// Model Inference
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "inference_shared")]
+#[allow(unused_variables)]
+pub fn run_inference_shared(
+ dataset: String,
+ condition: String,
+ config_file: String,
+ col_cardinalities_file: String,
+ model_path: String,
+ sql: String,
+ batch_size: i32,
+) -> String {
+ crate::bindings::inference::run_inference_shared_memory(
+ &dataset,
+ &condition,
+ &config_file,
+ &col_cardinalities_file,
+ &model_path,
+ &sql,
+ batch_size).to_string()
+}
+// Model Inference
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "inference_shared_write_once")]
+#[allow(unused_variables)]
+pub fn inference_shared_write_once(
+ dataset: String,
+ condition: String,
+ config_file: String,
+ col_cardinalities_file: String,
+ model_path: String,
+ sql: String,
+ batch_size: i32,
+) -> String {
+ crate::bindings::inference::run_inference_shared_memory_write_once(
+ &dataset,
+ &condition,
+ &config_file,
+ &col_cardinalities_file,
+ &model_path,
+ &sql,
+ batch_size).to_string()
+}
+
+// Model Inference
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "inference_shared_write_once_int")]
+#[allow(unused_variables)]
+pub fn inference_shared_write_once_int(
+ dataset: String,
+ condition: String,
+ config_file: String,
+ col_cardinalities_file: String,
+ model_path: String,
+ sql: String,
+ batch_size: i32,
+) -> String {
+ crate::bindings::inference::run_inference_shared_memory_write_once_int(
+ &dataset,
+ &condition,
+ &config_file,
+ &col_cardinalities_file,
+ &model_path,
+ &sql,
+ batch_size).to_string()
+}
+
+
+// Model Inference
+#[cfg(feature = "python")]
+#[pg_extern(immutable, parallel_safe, name = "model_init")]
+#[allow(unused_variables)]
+pub fn model_init(
+ condition: String,
+ config_file: String,
+ col_cardinalities_file: String,
+ model_path: String
+) -> String {
+ crate::bindings::inference::init_model(
+ &condition,
+ &config_file,
+ &col_cardinalities_file,
+ &model_path).to_string()
+}
diff --git a/examples/model_selection/Trails/internal/pg_extension/template/Cargo.pg11.toml b/examples/model_selection/Trails/internal/pg_extension/template/Cargo.pg11.toml
new file mode 100644
index 0000000000..02b15a4564
--- /dev/null
+++ b/examples/model_selection/Trails/internal/pg_extension/template/Cargo.pg11.toml
@@ -0,0 +1,40 @@
+[package]
+name = "pg_extension"
+version = "0.1.0"
+edition = "2021"
+
+[lib]
+crate-type = ["cdylib"]
+
+[features]
+default = ["pg11", "python"]
+python = ["pyo3"]
+pg11 = ["pgrx/pg11", "pgrx-tests/pg11" ]
+pg12 = ["pgrx/pg12", "pgrx-tests/pg12" ]
+pg13 = ["pgrx/pg13", "pgrx-tests/pg13" ]
+pg14 = ["pgrx/pg14", "pgrx-tests/pg14" ]
+pg15 = ["pgrx/pg15", "pgrx-tests/pg15" ]
+pg_test = []
+
+[dependencies]
+pgrx = "=0.9.7"
+pgrx-pg-sys = "=0.9.7"
+serde_json = { version = "1.0.85", features = ["preserve_order"] }
+pyo3 = { version = "0.17", features = ["auto-initialize"], optional = true }
+once_cell = "1.8.0"
+log = "0.4.14"
+serde = "1.0"
+serde_derive = "1.0"
+shared_memory = "0.12.4"
+
+[dev-dependencies]
+pgrx-tests = "=0.9.7"
+
+[profile.dev]
+panic = "unwind"
+
+[profile.release]
+panic = "unwind"
+opt-level = 3
+lto = "fat"
+codegen-units = 1
\ No newline at end of file
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/Cargo.toml b/examples/model_selection/Trails/internal/pg_extension/template/Cargo.pg14.toml
similarity index 96%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/Cargo.toml
rename to examples/model_selection/Trails/internal/pg_extension/template/Cargo.pg14.toml
index 5c3e747391..667fa75360 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/Cargo.toml
+++ b/examples/model_selection/Trails/internal/pg_extension/template/Cargo.pg14.toml
@@ -25,6 +25,7 @@ once_cell = "1.8.0"
log = "0.4.14"
serde = "1.0"
serde_derive = "1.0"
+shared_memory = "0.12.4"
[dev-dependencies]
pgrx-tests = "=0.9.7"
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/test/lib.rs b/examples/model_selection/Trails/internal/pg_extension/test/lib.rs
similarity index 97%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/test/lib.rs
rename to examples/model_selection/Trails/internal/pg_extension/test/lib.rs
index bb91c29811..21a45e1d34 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/test/lib.rs
+++ b/examples/model_selection/Trails/internal/pg_extension/test/lib.rs
@@ -1,21 +1,21 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
diff --git a/examples/model_selection/Trails/requirement.txt b/examples/model_selection/Trails/requirement.txt
new file mode 100644
index 0000000000..855bab3c91
--- /dev/null
+++ b/examples/model_selection/Trails/requirement.txt
@@ -0,0 +1,65 @@
+aiofiles==23.1.0
+blessed==1.20.0
+certifi==2023.7.22
+charset-normalizer==3.2.0
+ConfigSpace==0.7.1
+contourpy==1.1.0
+cycler==0.11.0
+einops==0.7.0
+fonttools==4.41.0
+fvcore==0.1.5.post20221221
+gpustat==1.1
+h5py==3.10.0
+html5tagger==1.3.0
+httptools==0.6.0
+idna==3.4
+importlib-resources==6.0.0
+iopath==0.1.10
+joblib==1.3.1
+kiwisolver==1.4.4
+matplotlib==3.7.2
+more-itertools==9.1.0
+multidict==6.0.4
+numpy==1.24.4
+nvidia-ml-py==12.535.77
+objgraph==3.6.0
+orjson==3.9.2
+packaging==23.1
+palettable==3.3.3
+pandas==2.0.3
+Pillow==10.0.0
+portalocker==2.8.2
+psutil==5.9.5
+psycopg2-binary==2.9.6
+Pympler==1.0.1
+pyparsing==3.0.9
+python-dateutil==2.8.2
+pytz==2023.3
+PyYAML==6.0.1
+requests==2.31.0
+sanic==23.6.0
+sanic-routing==23.6.0
+scikit-learn==1.3.0
+scipy==1.10.1
+seaborn==0.12.2
+six==1.16.0
+sklearn==0.0
+tabulate==0.9.0
+termcolor==2.3.0
+thop @ git+https://github.com/Lyken17/pytorch-OpCounter.git@43c064afb71383501e41eaef9e8c8407265cf77f
+threadpoolctl==3.1.0
+torch==1.8.1
+torchaudio==0.8.1
+torchinfo==1.8.0
+torchvision==0.9.1
+tqdm==4.47.0
+tracerite==1.1.0
+typing_extensions==4.7.1
+tzdata==2023.3
+ujson==5.8.0
+urllib3==2.0.4
+uvloop==0.17.0
+wcwidth==0.2.6
+websockets==11.0.3
+yacs==0.1.8
+zipp==3.16.2
\ No newline at end of file
diff --git a/examples/model_selection/Trails/singa.polarDB.Dockerfile b/examples/model_selection/Trails/singa.polarDB.Dockerfile
new file mode 100644
index 0000000000..4a93817c93
--- /dev/null
+++ b/examples/model_selection/Trails/singa.polarDB.Dockerfile
@@ -0,0 +1,89 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+# Based on PolarDB with PostgreSQL 11.9
+FROM polardb/polardb_pg_local_instance:latest
+
+# LABEL maintainer="Naili Xing "
+
+# Install Python, Vim, and necessary libraries
+# Note: The 'pip' package might not be directly available like this, usually python3-pip is the package name.
+USER root
+RUN apt-get update && apt-get install -y \
+ python3-pip \
+ bzip2 \
+ libbz2-dev \
+ build-essential \
+ libffi-dev \
+ libssl-dev \
+ zlib1g-dev \
+ libbz2-dev \
+ libreadline-dev \
+ libsqlite3-dev \
+ wget \
+ llvm \
+ libncursesw5-dev \
+ xz-utils \
+ tk-dev \
+ libxml2-dev \
+ libxmlsec1-dev \
+ liblzma-dev \
+ && rm -rf /var/lib/apt/lists/*
+
+USER postgres
+# Install pyenv and Python 3.8
+RUN curl https://pyenv.run | bash \
+ && export PYENV_ROOT="$HOME/.pyenv" \
+ && export PATH="$PYENV_ROOT/bin:$PATH" \
+ && eval "$(pyenv init --path)" \
+ && eval "$(pyenv init -)" \
+ && env PYTHON_CONFIGURE_OPTS="--enable-shared" pyenv install 3.8
+
+
+# Switch to the postgres user, install Rust, init the cargo
+# polarDB uses the pg 11.9
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \
+ echo 'source $HOME/.cargo/env' >> $HOME/.bashrc && \
+ /bin/bash -c "source $HOME/.cargo/env && cargo install cargo-pgrx --version '0.9.7' --locked" && \
+ /bin/bash -c "source $HOME/.cargo/env && cargo pgrx init --pg11 /home/postgres/tmp_basedir_polardb_pg_1100_bld/bin/pg_config"
+
+
+# Clone code to there, install dependences,
+WORKDIR /home/postgres
+RUN mkdir Trails && \
+ cd Trails && \
+ git init && \
+ git remote add origin https://github.com/apache/singa.git && \
+ git config core.sparseCheckout true && \
+ echo "examples/model_selection/Trails/*" > .git/info/sparse-checkout && \
+ git pull --depth=1 origin dev-postgresql && \
+ mv examples/model_selection/Trails/* . && \
+ rm -rf examples && \
+ cp ./internal/pg_extension/template/Cargo.pg14.toml ./internal/pg_extension/Cargo.toml && \
+ cd ./internal/ml/model_selection && \
+ pip install -r requirement.txt && \
+ pip install https://singa-wheel.s3.ap-southeast-1.amazonaws.com/tmp/singa-3.1.0-cp38-cp38-manylinux2014_x86_64.whl
+
+
+WORKDIR /home/postgres/Trails/internal/pg_extension
+RUN /bin/bash -c "source $HOME/.cargo/env && cargo pgrx install --pg-config /home/postgres/tmp_basedir_polardb_pg_1100_bld/bin/pg_config"
+
+WORKDIR /home/postgres
+RUN chmod +x ./Trails/init_polardb.sh
+# here we run the default script in /home/postgres
diff --git a/examples/model_selection/TRAILS-Database-Native-Model-Selection/Dockerfile b/examples/model_selection/Trails/singa.psql.Dockerfile
similarity index 60%
rename from examples/model_selection/TRAILS-Database-Native-Model-Selection/Dockerfile
rename to examples/model_selection/Trails/singa.psql.Dockerfile
index 9a14cff854..bca9ee7603 100644
--- a/examples/model_selection/TRAILS-Database-Native-Model-Selection/Dockerfile
+++ b/examples/model_selection/Trails/singa.psql.Dockerfile
@@ -18,11 +18,13 @@
FROM ubuntu:20.04
+#LABEL maintainer="Naili Xing "
+
ENV DEBIAN_FRONTEND=noninteractive
# Install Python, Vim, and necessary libraries
RUN apt-get update && \
- apt-get install -y software-properties-common wget gnupg2 lsb-release git && \
+ apt-get install -y software-properties-common wget gnupg2 lsb-release git sudo && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get install -y python3.6 python3-pip vim && \
apt-get clean && \
@@ -47,7 +49,17 @@ RUN adduser --disabled-password --gecos "" postgres && \
adduser postgres sudo && \
chown -R postgres:postgres /project
-# Switch to the postgres user andInstall Rust and init the cargo
+# Add PostgreSQL's repository
+RUN wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - \
+ && sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(. /etc/os-release; echo $VERSION_CODENAME)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
+
+# Install postgresql client
+RUN apt-get update && apt-get install -y \
+ postgresql-client-14 && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+# Switch to the postgres user and Install Rust and init the cargo
USER postgres
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \
echo 'source $HOME/.cargo/env' >> $HOME/.bashrc && \
@@ -56,21 +68,30 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \
# Set environment variables for Rust and Python
ENV PATH="/root/.cargo/bin:${PATH}"
-ENV PYTHONPATH="${PYTHONPATH}:/project/TRAILS/internal/ml/model_selection"
+ENV PYTHONPATH="${PYTHONPATH}:/project/Trails/internal/ml/model_selection"
-WORKDIR /project
-COPY ./internal/ml/model_selection/requirement.txt ./requirement.txt
-RUN pip install -r requirement.txt
+# ARG CACHEBUST=1 is to force re-execute the following CMDs at each updates.
+ARG CACHEBUST=1
-RUN pip install https://www.comp.nus.edu.sg/~zhaojing/files/singa-3.1.0-cp38-cp38-manylinux2014_x86_64.whl
+# Clone code to there, install dependences,
+WORKDIR /project
+RUN mkdir Trails && \
+ cd Trails && \
+ git init && \
+ git remote add origin https://github.com/apache/singa.git && \
+ git config core.sparseCheckout true && \
+ echo "examples/model_selection/Trails/*" > .git/info/sparse-checkout && \
+ git pull --depth=1 origin dev-postgresql && \
+ mv examples/model_selection/Trails/* . && \
+ rm -rf examples && \
+ cp ./internal/pg_extension/template/Cargo.pg14.toml ./internal/pg_extension/Cargo.toml && \
+ cd ./internal/ml/model_selection && \
+ pip install -r requirement.txt && \
+ pip install https://singa-wheel.s3.ap-southeast-1.amazonaws.com/tmp/singa-3.1.0-cp38-cp38-manylinux2014_x86_64.whl
-# appendix
-USER root
-RUN apt-get update && apt-get install -y \
- postgresql-client && \
- apt-get clean && \
- rm -rf /var/lib/apt/lists/*
-USER postgres
+WORKDIR /project
+RUN chmod +x ./Trails/init.sh
-CMD ["tail", "-f", "/dev/null"]
+# Set the entry point to your script
+ENTRYPOINT ["/project/Trails/init.sh"]
diff --git a/examples/model_selection/Trails/singa_pkg_code/model.py b/examples/model_selection/Trails/singa_pkg_code/model.py
new file mode 100644
index 0000000000..34ae3ce3e2
--- /dev/null
+++ b/examples/model_selection/Trails/singa_pkg_code/model.py
@@ -0,0 +1,383 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# =============================================================================
+'''
+This script includes Model class for python users
+to use Computational Graph in their model.
+'''
+
+import os
+import gc
+import time
+import json
+import zipfile
+import numpy as np
+from functools import wraps
+# from collections import Iterable
+
+try:
+ from collections.abc import Iterable
+except ImportError:
+ from collections import Iterable
+
+from singa import tensor
+from singa import autograd
+from singa import layer
+from .tensor import Tensor
+from . import singa_wrap as singa
+
+
+class ModelMeta(layer.LayerMeta):
+
+ def buffer_operation(func):
+
+ def remove_creator(tensors):
+ if not tensors:
+ return
+
+ if isinstance(tensors, Iterable):
+ if isinstance(tensors, str):
+ return
+ else:
+ for item in tensors:
+ if isinstance(item, Iterable):
+ remove_creator(item)
+ elif isinstance(item, tensor.Tensor):
+ item.creator = None
+ elif isinstance(tensors, tensor.Tensor):
+ tensors.creator = None
+
+ @wraps(func)
+ def wrapper(self, *args, **kwargs):
+ # print ("in model.py wrapper function")
+ # print ("in model.py wrapper function args[0] shape: ", args[0].shape)
+ # print ("in model.py self._buffered: ", self._buffered)
+ # print ("in model.py begin wrapper self._results: ", self._results)
+ if self.graph_mode and self.training:
+ if len(args) == 0:
+ raise ValueError('expect at least one input tensor')
+
+ if isinstance(args[0], list):
+ assert isinstance(
+ args[0][0],
+ Tensor), ('function expects PlaceHolders or Tensors')
+ dev = args[0][0].device
+ else:
+ assert isinstance(
+ args[0],
+ Tensor), ('function expects PlaceHolders or Tensors')
+ dev = args[0].device
+
+ if not self._buffered:
+ # buffer operations
+ dev.EnableGraph(True)
+ # print ("model.py wrap not self._buffered args[0].shape", args[0].shape)
+ self._results = func(self, *args, **kwargs)
+ # print ("model.py wrap not self._buffered func: ", func)
+ dev.Sync()
+ dev.EnableGraph(False)
+ self._buffered = True
+
+ # deconstruct Operations before running the entire graph
+ remove_creator(self._results)
+
+ # make sure all Operations are deallocated
+ gc.collect()
+
+ # run graph
+ # print ("in model.py before dev.RunGraph self._results[0] shape: ", self._results[0].shape)
+ # print ("in model.py before dev.RunGraph args[0] shape: ", args[0].shape)
+ # print ("in model.py before dev.RunGraph self._results: ", self._results)
+ dev.RunGraph(self.sequential)
+ # print ("in model.py after dev.RunGraph")
+ # print ("in model.py after dev.RunGraph self._results[0] shape: ", self._results[0].shape)
+ # print ("in model.py after dev.RunGraph self._results: ", self._results)
+ # print ("in model.py after dev.RunGraph args[0] shape: ", args[0].shape)
+ return self._results
+ else:
+ return func(self, *args, **kwargs)
+
+ print ("model.py return buffer_operation wrapper: ", wrapper)
+ return wrapper
+
+ def __new__(cls, name, bases, attr):
+ print ("in __new__ attr['train_one_batch']: \n", attr['train_one_batch'])
+ if 'train_one_batch' in attr:
+ attr['train_one_batch'] = ModelMeta.buffer_operation(
+ attr['train_one_batch'])
+
+ return super(ModelMeta, cls).__new__(cls, name, bases, attr)
+
+
+class Model(layer.Layer, metaclass=ModelMeta):
+ """ Base class for your neural network models.
+
+ Example usage::
+
+ import numpy as np
+ from singa import opt
+ from singa import tensor
+ from singa import device
+ from singa import autograd
+ from singa import layer
+ from singa import model
+
+ class MyModel(model.Model):
+ def __init__(self):
+ super(MyModel, self).__init__()
+
+ self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+ self.conv1 = layer.Conv2d(1, 20, 5, padding=0)
+ self.conv2 = layer.Conv2d(20, 50, 5, padding=0)
+ self.sgd = opt.SGD(lr=0.01)
+
+ def forward(self, x):
+ y = self.conv1(x)
+ y = self.conv2(y)
+ return y
+
+ def train_one_batch(self, x, y):
+ out = self.forward(x)
+ loss = self.softmax_cross_entropy(out, y)
+ self.sgd(loss)
+ return out, loss
+
+ """
+
+ # save load states constant
+ TENSOR_DICT_FILENAME = '/tensor_dict.npz'
+ STATES_ATTR_FILENAME = '/states_attr.json'
+ MODEL_STATE_TYPE = 0
+ AUX_STATE_TYPE = 1
+
+ def __init__(self):
+ """
+ Initializes internal Model state
+ """
+ super(Model, self).__init__()
+
+ self.training = True
+ self.graph_mode = True
+ self.sequential = False
+ self._buffered = False
+ self._results = None
+
+ def compile(self, inputs, is_train=True, use_graph=False, sequential=False):
+ """ Compile and initialize the model
+
+ This function will automatically derive the shape of parameters
+ in each sublayer based on the shape of input placeholders. It will
+ also do some settings.
+
+ Args:
+ inputs(list): the list of input tensors(placeholders)
+ is_train(bool): when is_trainis True, this model will enter
+ training mode, otherwise it will enter the evaluation mode
+ use_graph(bool): when use_graph is True, computational graph
+ will be used to train this model
+ sequential(bool): when sequential is True, model will execute ops
+ in the graph follow the order of joining the graph
+ """
+ assert len(inputs) > 0 and isinstance(inputs[0], Tensor), (
+ 'compile function expects PlaceHolders or Tensors')
+
+ dev = inputs[0].device
+ dev.EnableGraph(True)
+ self.forward(*inputs)
+ dev.EnableGraph(False)
+ dev.ResetGraph()
+
+ autograd.training = is_train
+ self.training = is_train
+ self.graph_mode = use_graph
+ self.sequential = sequential
+
+ def forward(self, *input):
+ """Defines the computation performed in every forward propagation.
+
+ Should be overridden by all subclasses.
+
+ Args:
+ *input: the input training data for the model
+
+ Returns:
+ out: the outputs of the forward propagation.
+ """
+ raise NotImplementedError
+
+ def train_one_batch(self, *input, **kwargs):
+ """Defines the computation performed in every training iteration
+
+ Should be overridden by all subclasses.
+
+ Args:
+ *input: the arguments of train_one_batch
+ **kwargs: the keyword arguments of train_one_batch
+ """
+ raise NotImplementedError
+
+ def train(self, mode=True):
+ """Set the model in evaluation mode.
+
+ Args:
+ mode(bool): when mode is True, this model will enter training mode
+ """
+ self.training = mode
+ autograd.training = mode
+
+ def eval(self):
+ """Sets the model in evaluation mode.
+ """
+ self.train(mode=False)
+
+ def graph(self, mode=True, sequential=False):
+ """ Turn on the computational graph. Specify execution mode.
+
+ Args:
+ mode(bool): when mode is True, model will use computational graph
+ sequential(bool): when sequential is True, model will execute ops
+ in the graph follow the order of joining the graph
+ """
+ self.graph_mode = mode
+ self.sequential = sequential
+
+ def __get_name__(self):
+ return self.__class__.__name__
+
+ def __call__(self, *input, **kwargs):
+ # print ("in pkg model.py __call__")
+ if self.training:
+ # print ("in pkg model.py train_one_batch")
+ # print ("self: ", self)
+ # print ("self.num_classes: ", self.num_classes)
+ # print ("input[0].shape: ", input[0].shape)
+ return self.train_one_batch(*input, **kwargs)
+ else:
+ # print ("in pkg model.py forward")
+ return self.forward(*input, **kwargs)
+
+ def save_states(self, fpath, aux_states={}):
+ """Save states.
+
+ Args:
+ fpath: output file path (without the extension)
+ aux_states(dict): values are standard data types or Tensor,
+ e.g., epoch ID, learning rate, optimizer states
+ """
+ assert not os.path.isfile(fpath), (
+ "Failed to save states, %s is already existed." % fpath)
+
+ states = self.get_states()
+
+ # save states data and attr
+ tensor_dict = {}
+ states_attr = {}
+ for k, v in states.items():
+ assert isinstance(v, tensor.Tensor), "Only tensor state is allowed"
+ tensor_dict[k] = tensor.to_numpy(v)
+ states_attr[k] = {
+ 'state_type': self.MODEL_STATE_TYPE,
+ 'shape': v.shape,
+ 'dtype': v.dtype
+ }
+
+ for k, v in aux_states.items():
+ assert isinstance(v,
+ tensor.Tensor), "Only tensor aux state is allowed"
+ tensor_dict[k] = tensor.to_numpy(v)
+ states_attr[k] = {
+ 'state_type': self.AUX_STATE_TYPE,
+ 'shape': v.shape,
+ 'dtype': v.dtype
+ }
+
+ # save to files
+ timestamp = time.time()
+ tmp_dir = '/tmp/singa_save_states_%s' % timestamp
+ os.mkdir(tmp_dir)
+ tensor_dict_fp = tmp_dir + self.TENSOR_DICT_FILENAME
+ states_attr_fp = tmp_dir + self.STATES_ATTR_FILENAME
+
+ np.savez(tensor_dict_fp, **tensor_dict)
+
+ with open(states_attr_fp, 'w') as fp:
+ json.dump(states_attr, fp)
+
+ compression = zipfile.ZIP_DEFLATED
+ with zipfile.ZipFile(fpath, mode="w") as zf:
+ zf.write(tensor_dict_fp,
+ os.path.basename(tensor_dict_fp),
+ compress_type=compression)
+ zf.write(states_attr_fp,
+ os.path.basename(states_attr_fp),
+ compress_type=compression)
+
+ # clean up tmp files
+ os.remove(tensor_dict_fp)
+ os.remove(states_attr_fp)
+ os.rmdir(tmp_dir)
+
+ def load_states(self, fpath):
+ """Load the model states and auxiliary states from disk.
+
+ Usage:
+ m = MyModel()
+ m.compile(...)
+ aux_states = m.load_states('mymodel.zip')
+
+ Args:
+ path: input file path (without the extension)
+ Returns:
+ dict
+ """
+
+ assert os.path.isfile(fpath), (
+ "Failed to load states, %s is not exist." % fpath)
+
+ timestamp = time.time()
+ tmp_dir = '/tmp/singa_load_states_%s' % timestamp
+ os.mkdir(tmp_dir)
+
+ with zipfile.ZipFile(fpath, 'r') as zf:
+ zf.extractall(tmp_dir)
+
+ tensor_dict_fp = tmp_dir + self.TENSOR_DICT_FILENAME
+ states_attr_fp = tmp_dir + self.STATES_ATTR_FILENAME
+
+ with open(states_attr_fp) as f:
+ states_attr = json.load(f)
+
+ tensor_dict = np.load(tensor_dict_fp)
+
+ # restore singa tensor from numpy
+ model_states = dict()
+ aux_states = dict()
+
+ for k in tensor_dict.files:
+ if states_attr[k]['state_type'] == self.MODEL_STATE_TYPE:
+ model_states[k] = tensor.from_numpy(tensor_dict[k])
+ elif states_attr[k]['state_type'] == self.AUX_STATE_TYPE:
+ aux_states[k] = tensor.from_numpy(tensor_dict[k])
+
+ # restore model_states
+ self.set_states(model_states)
+
+ # clean up tmp files
+ os.remove(tensor_dict_fp)
+ os.remove(states_attr_fp)
+ os.rmdir(tmp_dir)
+ return aux_states
diff --git a/examples/model_selection/Trails/singa_pkg_code/tensor.py b/examples/model_selection/Trails/singa_pkg_code/tensor.py
new file mode 100644
index 0000000000..d1ebb61d9c
--- /dev/null
+++ b/examples/model_selection/Trails/singa_pkg_code/tensor.py
@@ -0,0 +1,1804 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# =============================================================================
+"""
+Example usage::
+
+ import numpy as np
+ from singa import tensor
+ from singa import device
+
+ # create a tensor with shape (2,3), default CppCPU device and float32
+ x = tensor.Tensor((2, 3))
+ x.set_value(0.4)
+
+ # create a tensor from a numpy array
+ npy = np.zeros((3, 3), dtype=np.float32)
+ y = tensor.from_numpy(npy)
+
+ y.uniform(-1, 1) # sample values from the uniform distribution
+
+ z = tensor.mult(x, y) # gemm -> z of shape (2, 3)
+
+ x += z # element-wise addition
+
+ dev = device.get_default_device()
+ x.to_device(dev) # move the data to a gpu device
+
+ s = tensor.to_numpy(x) # tensor -> numpy array
+
+There are two sets of tensor functions,
+
+Tensor member functions
+ which would change the internal state of the Tensor instance.
+
+Tensor module functions
+ which accept Tensor instances as arguments and return Tensor instances.
+
+Every Tesor instance must be initialized before reading data from it.
+"""
+from __future__ import division
+from __future__ import print_function
+from __future__ import absolute_import
+
+from deprecated import deprecated
+from builtins import object
+import numpy as np
+from functools import reduce
+import re
+
+from . import singa_wrap as singa
+from .device import get_default_device
+
+int32 = 2 #core.proto.kInt32
+float32 = 0 #core.proto.kFloat32
+CTensor = singa.Tensor
+
+
+class Tensor(object):
+ '''Python Tensor, which wraps a swig converted Tensor from CPP Tensor.
+
+ Args:
+ shape (tuple): a tuple of integers for the tensor shape. If shape
+ is not specified, the created tensor is called a dummy tensor.
+ device: a swig device. If None, the default host device is used.
+ dtype: data type. currently, most operations only accept float32.
+ data: a numpy array or swig tensor.
+ requires_grad: boolean indicator for computing the gradient.
+ stores_grad: boolean indicator for storing and returning the gradient.
+ Some intermediate tensors' gradient can be released
+ during the backward propagation. A tensor may require
+ grad but not store grad; But if a tensor stores grad
+ then it must require grad.
+ '''
+ tensor_count = 0
+
+ def __init__(self,
+ shape=(),
+ device=None,
+ dtype=float32,
+ data=None,
+ requires_grad=True,
+ stores_grad=False,
+ creator=None,
+ name=None):
+ if device is None:
+ device = get_default_device()
+ if isinstance(data, np.ndarray):
+ self.data = CTensor(list(data.shape), device, dtype)
+ copy_from_numpy(self.data, data)
+ elif isinstance(data, CTensor):
+ self.data = data
+ assert data.device().id() == device.id(), 'not the same device'
+ else:
+ self.data = CTensor(list(shape), device, dtype)
+
+ self.shape = tuple(self.data.shape())
+ self.device = device
+ self.dtype = self.data.data_type()
+ self.requires_grad = requires_grad
+ self.stores_grad = stores_grad
+ if name is None:
+ self.name = 'Dummy#{}'.format(Tensor.tensor_count)
+ Tensor.tensor_count += 1
+ else:
+ self.name = name
+ if creator is None:
+ from . import autograd
+ self.creator = autograd.Dummy(self, name)
+ else:
+ self.creator = creator
+
+ def __getitem__(self, keys):
+ if type(keys) != tuple:
+ keys = (keys,)
+
+ ret = self.clone()
+ axis_index = 0
+ for key in keys:
+ if type(key) == int:
+ key += self.shape[axis_index] if key < 0 else 0
+
+ if not (key >= 0 and key < self.shape[axis_index]):
+ raise ValueError("Invalid Index")
+
+ ret.data = singa.SliceOn(ret.data, key, key + 1, axis_index)
+ elif type(key) == slice:
+ start = key.start if key.start else 0
+ end = key.stop if key.stop else self.shape[axis_index]
+
+ start += self.shape[axis_index] if start < 0 else 0
+ end += self.shape[axis_index] if end < 0 else 0
+
+ if not (start >= 0 and start < end and
+ end <= self.shape[axis_index]):
+ raise ValueError("Invalid Index")
+
+ ret.data = singa.SliceOn(ret.data, start, end, axis_index)
+ else:
+ raise ValueError("Invalid Index")
+ axis_index += 1
+
+ return ret
+
+ def is_dummy(self):
+ '''
+ Returns:
+ True if the tensor is a dummy tensor
+ '''
+ match = re.match(r'Dummy#\d+', self.name)
+ if match:
+ return True
+ else:
+ return False
+
+ def ndim(self):
+ '''
+ Returns:
+ the number of dimensions of the tensor.
+ '''
+ return self.data.nDim()
+
+ def is_empty(self):
+ '''
+ Returns:
+ True if the tensor is empty according to its shape
+ '''
+ return self.ndim() == 0
+
+ def is_transpose(self):
+ '''
+ Returns:
+ True if the internal data is transposed; otherwise False.
+ '''
+ return self.data.transpose()
+
+ def transpose(self, axes=None):
+ ''' To transpose the tensor
+
+ Args:
+ axes: axes to transpose
+
+ Returns:
+ new transposed tensor
+ '''
+ t = Tensor(self.shape, self.device, self.dtype)
+ if axes is None:
+ tshape = [self.shape[x] for x in range(len(t.shape))]
+ t.shape = tuple(tshape)
+ t.data = singa.DefaultTranspose(self.data)
+ else:
+ if (len(axes) != len(self.shape)):
+ raise ValueError('dimensions do not match')
+ tshape = [self.shape[x] for x in axes]
+ t.shape = tuple(tshape)
+ t.data = singa.Transpose(self.data, list(axes))
+ return t
+
+ def size(self): # TODO(wangwei) compute size
+ '''
+ Returns:
+ the number of elements of the tensor.
+ '''
+ return self.data.Size()
+
+ def memsize(self):
+ '''
+ Returns:
+ the number of Bytes allocated for this tensor.
+ '''
+ return self.data.MemSize()
+
+ def contiguous(self):
+ t = Tensor(self.shape, self.device, self.dtype)
+ t.data = singa.Contiguous(self.data)
+ return t
+
+ def reshape(self, shape):
+ '''Return a new tensor with the given shape, and the original
+ tensor is not changed.
+
+ Args:
+ shape (list): new shape, which should have the same
+ volumn as the original shape.
+
+ Returns:
+ new tensor reshaped
+ '''
+ t = Tensor(self.shape, self.device, self.dtype)
+ assert product(self.shape) == product(shape), \
+ 'product of shape should be equal'
+ t.shape = shape
+ t.data = singa.Reshape(self.data, shape)
+ return t
+
+ def reset_like(self, t):
+ '''Reset the shape, dtype and device as the given tensor.
+
+ Args:
+ t (Tensor): a tensor
+ '''
+ self.data.ResetLike(t.data)
+ self.shape = t.shape
+ self.device = t.device
+ self.dtype = t.dtype
+
+ def as_type(self, dtype):
+ '''Change the data type.
+
+ Args:
+ dtype: accepts 'int', 'float', 'singa.kFloat32', 'singa.kInt'
+
+ Returns:
+ new tensor with new type
+ '''
+ if dtype == singa.kInt:
+ pass
+ elif dtype == singa.kFloat32:
+ pass
+ elif dtype == 'int':
+ dtype = singa.kInt
+ elif dtype == 'float':
+ dtype = singa.kFloat32
+ else:
+ raise TypeError("invalid data type %s" % dtype)
+ t = Tensor(self.shape, self.device, dtype)
+ t.data = self.data.AsType(dtype)
+ return t
+
+ def to_device(self, device):
+ '''Move the tensor data onto a given device.
+
+ Args:
+ device: a swig Device converted from CudaGPU or CppCPU or OpenclGPU
+ '''
+ self.data.ToDevice(device)
+ self.device = device
+
+ def to_host(self):
+ '''Move the tensor data onto the default host CppCPU device.
+ '''
+ self.data.ToHost()
+ self.device = get_default_device()
+
+ def l2(self):
+ '''
+ Returns:
+ the L2 norm.
+ '''
+ return self.data.L2()
+
+ def l1(self):
+ '''
+ Returns:
+ the L1 norm.
+ '''
+ return self.data.L1()
+
+ def set_value(self, x, inplace=True):
+ '''Set all elements of the tensor to be the give value.
+
+ Args:
+ x (float): a float value to be set to all elements.
+ inplace: inplace flag
+
+ Returns:
+ this tensor
+ '''
+ # assert type(x) == float, 'set value only accepts float input'
+ # if isinstance(x, float):
+ if not inplace:
+ # return new tensor filled with value
+ raise NotImplementedError
+
+ self.data.SetFloatValue(float(x))
+ return self
+
+ def copy_from_numpy(self, np_array, offset=0):
+ ''' Copy the data from the numpy array.
+
+ Args:
+ np_array: source numpy array
+ offset (int): destination offset
+ '''
+ assert np_array.size == self.size(), 'tensor shape should be the same'
+ if not np_array.ndim == 1:
+ np_array = np_array.flatten()
+ dt = np_array.dtype
+ if dt == np.float32:
+ self.data.CopyFloatDataFromHostPtr(np_array)
+ elif dt == int or dt == np.int32:
+ self.data.CopyIntDataFromHostPtr(np_array)
+ else:
+ print('Not implemented yet for ', dt)
+
+ def copy_data(self, t):
+ '''Copy data from other Tensor instance.
+
+ Args:
+ t (Tensor): source Tensor.
+ '''
+ assert (t.size() == self.size()), "tensor shape should be the same"
+ assert isinstance(t, Tensor), 't must be a singa Tensor instance'
+ self.data.CopyData(t.data)
+
+ def copy_from(self, t, offset=0):
+ ''' Copy the data from the numpy array or other Tensor instance
+
+ Args:
+ t (Tensor or np array): source Tensor or numpy array
+ offset (int): destination offset
+ '''
+ if isinstance(t, Tensor):
+ self.copy_data(t)
+ elif isinstance(t, np.ndarray):
+ self.copy_from_numpy(t)
+ else:
+ raise ValueError("t should be Tensor or numpy array.")
+
+ def clone(self):
+ '''
+ Returns:
+ a new Tensor which does deep copy of this tensor
+ '''
+ return _call_singa_func(self.data.Clone)
+
+ def repeat(self, repeats, axis):
+ '''Repeat data of a tensor
+
+ Args:
+ repeats(int or a sequence): the number that the tensor need to repeat for
+ axis (int):the axis to do repeat
+ If it is None, then the repeated tensor will be flattened.If it isn't None,
+ the repeats could be sequence, but it's size should match the axis's shape
+
+ Returns:
+ the tensor which has been repeated
+
+ '''
+ t = Tensor()
+ t_ndim = self.ndim()
+ if isinstance(repeats, int) or isinstance(repeats, complex):
+ if repeats < 0:
+ raise ValueError(
+ "'repeats' should not be negative: {}".format(repeats))
+ if axis != None and axis < 0:
+ axis += t_ndim
+ # broadcast = True
+ if axis is None:
+ axis = 9999
+ t.shape = (product(self.shape) * repeats,)
+ Repeats = [
+ repeats,
+ ]
+ t.data = self.data.Repeat(Repeats, axis)
+ elif axis >= 0:
+ t_shape = list(self.shape)
+ t_shape[axis] = self.shape[axis] * repeats
+ t.shape = tuple(t_shape)
+ Repeats = [
+ repeats,
+ ]
+ t.data = self.data.Repeat(Repeats, axis)
+
+ elif isinstance(repeats, tuple) or isinstance(repeats, list):
+ for rep in repeats:
+ if rep < 0:
+ raise ValueError(
+ "'repeats' should be int or sequence: {}".format(
+ repeats))
+
+ if axis != None and axis < 0:
+ axis += t_ndim
+ if axis is None:
+ raise ValueError(
+ "when axis us None, 'repeats' should be int: {}".format(
+ repeats))
+ elif axis >= 0:
+ t_shape = list(self.shape)
+ t_shape[axis] = sum(repeats)
+ t.shape = tuple(t_shape)
+ t.data = self.data.Repeat(list(repeats), axis)
+ else:
+ raise ValueError('repeats should be int or sequence')
+
+ return t
+
+ def T(self):
+ ''' shallow copy.
+
+ Returns:
+ a new Tensor which shares the underlying data memory (shallow copy).
+ '''
+ return _call_singa_func(singa.DefaultTranspose, self.data)
+
+ def copy(self):
+ '''shallow copy calls copy constructor of singa::Tensor
+
+ Returns:
+ new tensor copied
+ '''
+ return _call_singa_func(CTensor, self.data)
+
+ def deepcopy(self):
+ '''Same as clone().
+
+ Returns:
+ a new Tensor
+ '''
+ return self.clone()
+
+ def bernoulli(self, p, inplace=True):
+ '''Sample 0/1 for each element according to the given probability.
+
+ Args:
+ p (float): with probability p, each element is sample to 1.
+ inplace: inplace flag
+
+ Returns:
+ this tensor
+ '''
+ if not inplace:
+ # return new tensor
+ raise NotImplementedError
+
+ singa.Bernoulli(float(p), self.data)
+ return self
+
+ def gaussian(self, mean, std, inplace=True):
+ '''Generate a value for each element following a Gaussian distribution.
+
+ Args:
+ mean (float): mean of the distribution
+ std (float): standard variance of the distribution
+ inplace: inplace flag
+
+ Returns:
+ this tensor
+ '''
+ if not inplace:
+ # return new tensor
+ raise NotImplementedError
+
+ singa.Gaussian(float(mean), float(std), self.data)
+ return self
+
+ def uniform(self, low, high, inplace=True):
+ '''Generate a value for each element following a uniform distribution.
+
+ Args:
+ low (float): the lower bound
+ high (float): the hight bound
+ inplace: inplace flag
+
+ Returns:
+ this tensor
+ '''
+ if not inplace:
+ # return new tensor
+ raise NotImplementedError
+
+ singa.Uniform(float(low), float(high), self.data)
+ return self
+
+ @deprecated(reason="use broadcast instead")
+ def add_column(self, v):
+ '''(DEPRECATED, use broadcast)Add a tensor to each column of this tensor.
+
+ Args:
+ v (Tensor): a Tensor to be added as a column to this tensor.
+ '''
+ singa.AddColumn(v.data, self.data)
+
+ @deprecated(reason="use broadcast instead")
+ def add_row(self, v):
+ '''(DEPRECATED, use broadcast)Add a tensor to each row of this tensor.
+
+ Args:
+ v (Tensor): a Tensor to be added as a row to this tensor.
+ '''
+ singa.AddRow(v.data, self.data)
+
+ @deprecated(reason="use broadcast instead")
+ def div_column(self, v):
+ '''(DEPRECATED, use broadcast)Divide each column of this tensor by v.
+
+ Args:
+ v (Tensor): 1d tensor of the same length the column of self.
+ '''
+ singa.DivColumn(v.data, self.data)
+
+ @deprecated(reason="use broadcast instead")
+ def div_row(self, v):
+ '''(DEPRECATED, use broadcast)Divide each row of this tensor by v.
+
+ Args:
+ v (Tensor): 1d tensor of the same length the row of self.
+ '''
+ singa.DivRow(v.data, self.data)
+
+ @deprecated(reason="use broadcast instead")
+ def mult_column(self, v):
+ '''(DEPRECATED, use broadcast)Multiply each column of this tensor by v element-wisely.
+
+ Args:
+ v (Tensor): 1d tensor of the same length the column of self.
+ '''
+ singa.MultColumn(v.data, self.data)
+
+ @deprecated(reason="use broadcast instead")
+ def mult_row(self, v):
+ '''(DEPRECATED, use broadcast)Multiply each row of this tensor by v element-wisely.
+
+ Args:
+ v (Tensor): 1d tensor of the same length the row of self.
+ '''
+ singa.MultRow(v.data, self.data)
+
+ '''
+ python operators (+=, -=, *=, /=) for singa::Tensor unary operators
+ '''
+
+ def __iadd__(self, x):
+ ''' inplace element-wise addition with a tensor or a float value.
+
+ Args:
+ x (float or Tensor): input value
+
+ Returns:
+ this tensor
+ '''
+ if isinstance(x, Tensor):
+ self.data += x.data
+ else:
+ self.data += float(x)
+ return self
+
+ def __isub__(self, x):
+ ''' inplace element-wise subtraction with a tensor or a float value.
+
+ Args:
+ x (float or Tensor): input value
+
+ Returns:
+ this tensor
+ '''
+
+ if isinstance(x, Tensor):
+ self.data -= x.data
+ else:
+ self.data -= float(x)
+ return self
+
+ def __imul__(self, x):
+ ''' inplace element-wise multiplication with a tensor or a float value.
+
+ Args:
+ x (float or Tensor): input value
+
+ Returns:
+ this tensor
+ '''
+ if isinstance(x, Tensor):
+ self.data *= x.data
+ else:
+ self.data *= float(x)
+ return self
+
+ def __itruediv__(self, x):
+ ''' inplace element-wise division by a tensor or a float value.
+
+ Args:
+ x (float or Tensor): input value
+
+ Returns:
+ this tensor
+ '''
+ if isinstance(x, Tensor):
+ self.data /= x.data
+ else:
+ self.data /= float(x)
+ return self
+
+ '''
+ python operators (+, -, *, /, <, <=, >, >=) for singa binary operators
+ https://docs.python.org/2/library/operator.html#mapping-operators-to-functions
+ '''
+
+ def __add__(self, rhs):
+ if isinstance(rhs, Tensor):
+ return from_raw_tensor(singa.__add__(self.data, rhs.data))
+ else:
+ return _call_singa_func(singa.AddFloat, self.data, rhs)
+
+ def __sub__(self, rhs):
+ if isinstance(rhs, Tensor):
+ return from_raw_tensor(singa.__sub__(self.data, rhs.data))
+ else:
+ return _call_singa_func(singa.SubFloat, self.data, rhs)
+
+ def __mul__(self, rhs):
+ if isinstance(rhs, Tensor):
+ return from_raw_tensor(singa.__mul__(self.data, rhs.data))
+ else:
+ return _call_singa_func(singa.MultFloat, self.data, rhs)
+
+ def __div__(self, rhs):
+ if isinstance(rhs, Tensor):
+ return from_raw_tensor(singa.__div__(self.data, rhs.data))
+ else:
+ return _call_singa_func(singa.DivFloat, self.data, rhs)
+
+ def __truediv__(self, rhs):
+ if isinstance(rhs, Tensor):
+ return from_raw_tensor(singa.__div__(self.data, rhs.data))
+ else:
+ return _call_singa_func(singa.DivFloat, self.data, rhs)
+
+ def __floordiv__(self, rhs):
+ if isinstance(rhs, Tensor):
+ tmp = from_raw_tensor(singa.__div__(self.data, rhs.data))
+ return _call_singa_func(singa.Floor, tmp.data)
+ else:
+ tmp = _call_singa_func(singa.DivFloat, self.data, rhs)
+ return _call_singa_func(singa.Floor, tmp.data)
+
+ def __lt__(self, rhs):
+ if isinstance(rhs, Tensor):
+ return from_raw_tensor(singa.__lt__(self.data, rhs.data))
+ else:
+ return _call_singa_func(singa.LTFloat, self.data, rhs)
+
+ def __le__(self, rhs):
+ if isinstance(rhs, Tensor):
+ return from_raw_tensor(singa.__le__(self.data, rhs.data))
+ else:
+ return _call_singa_func(singa.LEFloat, self.data, rhs)
+
+ def __gt__(self, rhs):
+ if isinstance(rhs, Tensor):
+ return from_raw_tensor(singa.__gt__(self.data, rhs.data))
+ else:
+ return _call_singa_func(singa.GTFloat, self.data, rhs)
+
+ def __ge__(self, rhs):
+ if isinstance(rhs, Tensor):
+ return from_raw_tensor(singa.__ge__(self.data, rhs.data))
+ else:
+ return _call_singa_func(singa.GEFloat, self.data, rhs)
+
+ def __eq__(self, rhs):
+ if isinstance(rhs, Tensor):
+ return from_raw_tensor(singa.__eq__(self.data, rhs.data))
+ elif rhs is None:
+ return False
+ else:
+ return _call_singa_func(singa.EQFloat, self.data, rhs)
+
+ def __radd__(self, lhs):
+ lhs = float(lhs)
+ one = Tensor(self.shape, self.device, self.dtype)
+ one.set_value(lhs)
+ one += self
+ return one
+
+ def __rsub__(self, lhs):
+ lhs = float(lhs)
+ one = Tensor(self.shape, self.device, self.dtype)
+ one.set_value(lhs)
+ one -= self
+ return one
+
+ def __rmul__(self, lhs):
+ lhs = float(lhs)
+ one = Tensor(self.shape, self.device, self.dtype)
+ one.set_value(lhs)
+ one *= self
+ return one
+
+ def __rdiv__(self, lhs):
+ lhs = float(lhs)
+ one = Tensor(self.shape, self.device, self.dtype)
+ one.set_value(lhs)
+ one /= self
+ return one
+
+ def __rtruediv__(self, lhs):
+ lhs = float(lhs)
+ one = Tensor(self.shape, self.device, self.dtype)
+ one.set_value(lhs)
+ one /= self
+ return one
+
+ def __repr__(self):
+ return np.array2string(to_numpy(self))
+
+
+''' alias Tensor to PlaceHolder
+'''
+PlaceHolder = Tensor
+''' python functions for global functions in Tensor.h
+'''
+
+
+def from_raw_tensor(t):
+ x = Tensor(t.shape(), t.device(), t.data_type())
+ x.data = t
+ return x
+
+
+def from_raw_tensors(tt):
+ ret = []
+ for t in list(tt):
+ ret.append(from_raw_tensor(t))
+ return ret
+
+
+def zeros_like(t):
+ ret = Tensor(t.shape, t.device, t.dtype)
+ ret.set_value(float(0))
+ return ret
+
+
+def ones_like(t):
+ ret = Tensor(t.shape, t.device, t.dtype)
+ ret.set_value(float(1))
+ return ret
+
+
+def product(shape):
+ return reduce(lambda x, y: x * y, shape)
+
+
+def sizeof(dtype):
+ '''Get size of datatype
+
+ Args:
+ dtype: singa datatype
+
+ Returns:
+ the number of bytes of the given SINGA data type defined in core.proto
+ '''
+ return singa.SizeOf(dtype)
+
+
+def contiguous(tensor):
+ return _call_singa_func(singa.Contiguous, tensor.data)
+
+
+def reshape(tensor, shape):
+ '''Reshape the input tensor with the given shape and
+ the original tensor is not changed
+
+ Args:
+ tensor (Tensor): the tensor to be changed
+ shape (list): the new shape, which should have the same volumn as the
+ old shape.
+
+ Returns:
+ the new Tensor
+ '''
+ return _call_singa_func(singa.Reshape, tensor.data, shape)
+
+
+def transpose(t, axes=None):
+ '''To transpose the tensor
+
+ Args:
+ t: input tensor
+ axes: axes to transpose
+
+ Returns:
+ the transposed tensor
+ '''
+ ret = t.transpose(axes)
+ return ret
+
+
+def copy_data_to_from(dst, src, size, dst_offset=0, src_offset=0):
+ '''Copy the data between two Tensor instances which could be on different
+ devices.
+
+ Args:
+ dst (Tensor): destination Tensor
+ src (Tensor): source Tensor
+ size (int) : number of elements to copy
+ dst_offset (int): offset in terms of elements to the start of dst
+ src_offset (int): offset in terms of elements to the start of src
+ '''
+ singa.CopyDataToFrom(dst.data, src.data, size, dst_offset, src_offset)
+
+
+def from_numpy(np_array, dev=None):
+ '''Create a Tensor instance with the shape, dtype and values from the numpy
+ array.
+
+ Args:
+ np_array: the numpy array.
+
+ Returns:
+ A Tensor instance allocated on the default CppCPU device.
+ '''
+ assert type(np_array) is np.ndarray, 'Must input numpy array'
+ # convert to float32 array
+ if np_array.dtype == np.float64 or np_array.dtype == np.float:
+ np_array = np_array.astype(np.float32)
+
+ if np_array.dtype == np.int64 or np_array.dtype == int:
+ np_array = np_array.astype(np.int32)
+
+ if np_array.dtype == np.float32:
+ dtype = float32
+ else:
+ assert np_array.dtype == np.int32, \
+ 'Only float and int tensors are supported'
+ dtype = int32
+ ret = Tensor(np_array.shape, dtype=dtype)
+ ret.copy_from_numpy(np_array)
+ if dev:
+ ret.to_device(dev)
+ return ret
+
+
+def to_host(t):
+ '''Copy the data to a host tensor.
+
+ Args:
+ t (Tensor): a Tensor
+
+ Returns:
+ new Tensor at host
+ '''
+ ret = t.clone()
+ ret.to_host()
+ return ret
+
+
+def to_numpy(t):
+ '''Copy the tensor into a numpy array.
+
+ Args:
+ t (Tensor): a Tensor
+
+ Returns:
+ a numpy array
+ '''
+ th = to_host(t)
+ if th.dtype == float32:
+ np_array = th.data.GetFloatValue(int(th.size()))
+ elif th.dtype == int32:
+ np_array = th.data.GetIntValue(int(th.size()))
+ else:
+ print('Not implemented yet for ', th.dtype)
+ return np_array.reshape(th.shape)
+
+
+def abs(t):
+ '''
+ Args:
+ t (Tensor): input Tensor
+
+ Returns:
+ a new Tensor whose element y = abs(x), x is an element of t
+ '''
+ return _call_singa_func(singa.Abs, t.data)
+
+
+def exp(t):
+ '''
+ Args:
+ t (Tensor): input Tensor
+
+ Returns:
+ a new Tensor whose element y = exp(x), x is an element of t
+ '''
+ return _call_singa_func(singa.Exp, t.data)
+
+
+def ceil(t):
+ '''
+ Args:
+ t (Tensor): input Tensor
+
+ Returns:
+ a new Tensor whose element y = ceil(x), x is an element of t
+ '''
+ return _call_singa_func(singa.Ceil, t.data)
+
+
+def log(t):
+ '''
+ Args:
+ t (Tensor): input Tensor
+
+ Returns:
+ a new Tensor whose element y = log(x), x is an element of t
+ '''
+ return _call_singa_func(singa.Log, t.data)
+
+
+def sigmoid(t):
+ '''
+ Args:
+ t (Tensor): input Tensor
+
+ Returns:
+ a new Tensor whose element y = sigmoid(x); x is an element of t
+ '''
+ return _call_singa_func(singa.Sigmoid, t.data)
+
+
+def sign(t):
+ '''
+ Args:
+ t (Tensor): input Tensor
+
+ Returns:
+ a new Tensor whose element y = sign(x)
+ '''
+ return _call_singa_func(singa.Sign, t.data)
+
+
+def sqrt(t):
+ '''
+ Args:
+ t (Tensor): input Tensor
+
+ Returns:
+ a new Tensor whose element y = sqrt(x), x is an element of t
+ '''
+ return _call_singa_func(singa.Sqrt, t.data)
+
+
+def square(t):
+ '''
+ Args:
+ t (Tensor): input Tensor
+
+ Returns:
+ a new Tensor whose element y = x * x, x is an element of t
+ '''
+ return _call_singa_func(singa.Square, t.data)
+
+
+def tanh(t):
+ '''
+ Args:
+ t (Tensor): input Tensor
+
+ Returns:
+ a new Tensor whose element y = tanh(x), x is an element of t
+ '''
+ return _call_singa_func(singa.Tanh, t.data)
+
+
+def sum(t, axis=None, out=None):
+ '''Sum of tensor elements over given axis
+
+ Args:
+ t: Singa.tensor
+ The array_like tensor to be sumed
+ axis: None or int or tuple of ints, optional
+ Axis or axes along which a sum is performed.
+ The default, axis=None, will sum all of the elements of the input array.
+ If axis is negative it counts from the last to the first axis.
+ If axis is a tuple of ints, a sum is performed on all of the axes specified
+ in the tuple instead of a single axis or all the axes as before.
+ out:Singa.tensor optional
+ Alternative output array in which to place the result.
+ It must have the same shape as the expected output,
+ but the type of the output values will be cast if necessary.
+
+ Returns:
+ A tensor with the same shape as t, with the specified axis removed.
+ If a is a 0-d array, or if axis is None, a scalar is returned.
+ If an output array is specified, a reference to out is returned
+ '''
+
+ t_shape = t.shape
+ t_ndim = t.ndim()
+
+ if axis is None:
+ one = Tensor(t.shape, t.device)
+ one.set_value(1.0)
+ ret = tensordot(t, one, t_ndim)
+
+ if isinstance(axis, int):
+ if axis < 0:
+ axis += t_ndim
+
+ axis_shape = t_shape[axis]
+ axis_shape = int(axis_shape)
+ one = Tensor(shape=(axis_shape,), device=t.device)
+ one.set_value(1.0)
+ ret = tensordot(t, one, axes=([axis], [0]))
+
+ if isinstance(axis, tuple):
+ l_axis = list(axis)
+ axis_shape = [t_shape[x] for x in axis]
+ axisshape = tuple(axis_shape)
+ one = Tensor(axisshape, t.device)
+ one.set_value(1.0)
+ one_axis = [x for x in range(one.ndim())]
+ ret = tensordot(t, one, (l_axis, one_axis))
+
+ if out is not None:
+ if out.shape != ret.shape:
+ raise ValueError('dimensions do not match')
+ out[:] = ret
+ return out
+ else:
+ return ret
+
+
+def pow(t, x, out=None):
+ '''
+ Args:
+ t (Tensor): input tensor
+ x (float or Tensor): y[i] = t[i]^x if x is a float value; otherwise,
+ y[i]= t[i]^x[i] if x is a tensor.
+ out (None or Tensor): if None, a new Tensor would be constructed to
+ store the result; otherwise, the result is put into out.
+
+ Returns:
+ the result tensor.
+ '''
+ if out is None:
+ if isinstance(x, Tensor):
+ return _call_singa_func(singa.Pow, t.data, x.data)
+ else:
+ return _call_singa_func(singa.PowFloat, t.data, x)
+ else:
+ if isinstance(x, Tensor):
+ singa.PowWithRet(t.data, x.data, out.data)
+ else:
+ singa.PowFloatWitRet(t.data, x, out.data)
+ return out
+
+
+def average(t, axis=None):
+ '''
+ Args:
+ t (Tensor): input Tensor
+ axis (int, optional): if None, average all elements; otherwise average
+ along the given dimension. 0 for averaging each column; 1 for
+ averaging each row.
+
+ Returns:
+ a float value if axis is None; otherwise, a new Tensor for the result.
+ '''
+ if t.ndim() > 1:
+ return _call_singa_func(singa.Average, t.data, axis)
+ else:
+ return singa.SumAsFloat(t.data) / t.size()
+
+
+def softmax(t, out=None):
+ '''Apply SoftMax for each row of the Tensor.
+
+ Args:
+ t (Tensor): the input 1d or 2d tensor
+ out (Tensor, optional): if not None, it is used to store the result
+
+ Returns:
+ the result Tensor
+ '''
+ if out is None:
+ return _call_singa_func(singa.SoftMax, t.data)
+ else:
+ singa.SoftMax(t.data, out.data)
+ return out
+
+
+def lt(t, x):
+ '''Elementi-wise comparison for t < x
+
+ Args:
+ t (Tensor): left hand side operand
+ x (Tensor or float): right hand side operand
+
+ Returns:
+ a Tensor with each element being t[i] < x ? 1.0f:0.0f,
+ or t[i] < x[i] ? 1.0f:0.0f
+ '''
+ return t < x
+
+
+def le(t, x):
+ '''Elementi-wise comparison for t <= x.
+
+ Args:
+ t (Tensor): left hand side operand
+ x (Tensor or float): right hand side operand
+
+ Returns:
+ a Tensor with each element being t[i] <= x ? 1.0f:0.0f,
+ or t[i] <= x[i] ? 1.0f:0.0f
+ '''
+ return t <= x
+
+
+def gt(t, x):
+ '''Elementi-wise comparison for t > x.
+
+ Args:
+ t (Tensor): left hand side operand
+ x (Tensor or float): right hand side operand
+
+ Returns:
+ a Tensor with each element being t[i] > x ? 1.0f:0.0f,
+ or t[i] > x[i] ? 1.0f:0.0f
+ '''
+ return t > x
+
+
+def ge(t, x):
+ '''Elementi-wise comparison for t >= x.
+
+ Args:
+ t (Tensor): left hand side operand
+ x (Tensor or float): right hand side operand
+
+ Returns:
+ a Tensor with each element being t[i] >= x ? 1.0f:0.0f,
+ or t[i] >= x[i] ? 1.0f:0.0f
+ '''
+ return t >= x
+
+
+def eq(t, x):
+ '''Elementi-wise comparison for t == x.
+
+ Args:
+ t (Tensor): left hand side operand
+ x (Tensor or float): right hand side operand
+
+ Returns:
+ a Tensor with each element being t[i] == x ? 1.0f:0.0f,
+ or t[i] == x[i] ? 1.0f:0.0f
+ '''
+ return t == x
+
+
+def add(lhs, rhs, ret=None):
+ '''Elementi-wise addition.
+
+ Args:
+ lhs (Tensor): lhs tensor
+ rhs (Tensor): rhs tensor
+ ret (Tensor, optional): if not None, the result is stored in it;
+ otherwise, a new Tensor would be created for the result.
+
+ Returns:
+ the result Tensor
+ '''
+ if ret is None:
+ # call Tensor.__add__()
+ return lhs + rhs
+ else:
+ if isinstance(rhs, Tensor):
+ singa.Add(lhs.data, rhs.data, ret.data)
+ else:
+ singa.AddFloatWithRet(lhs.data, rhs, ret.data)
+ return ret
+
+
+def sub(lhs, rhs, ret=None):
+ '''Elementi-wise subtraction.
+
+ Args:
+ lhs (Tensor): lhs tensor
+ rhs (Tensor): rhs tensor
+ ret (Tensor, optional): if not None, the result is stored in it;
+ otherwise, a new Tensor would be created for the result.
+
+ Returns:
+ the result Tensor
+ '''
+ if ret is None:
+ # call Tensor.__sub__()
+ return lhs - rhs
+ else:
+ if isinstance(rhs, Tensor):
+ singa.Sub(lhs.data, rhs.data, ret.data)
+ else:
+ singa.SubFloatWithRet(lhs.data, rhs, ret.data)
+ return ret
+
+
+def eltwise_mult(lhs, rhs, ret=None):
+ '''Elementi-wise multiplication.
+
+ Args:
+ lhs (Tensor): lhs tensor
+ rhs (Tensor): rhs tensor
+ ret (Tensor, optional): if not None, the result is stored in it;
+ otherwise, a new Tensor would be created for the result.
+
+ Returns:
+ the result Tensor
+ '''
+
+ if ret is None:
+ # call Tensor.__mul__()
+ return lhs * rhs
+ else:
+ if isinstance(rhs, Tensor):
+ singa.EltwiseMult(lhs.data, rhs.data, ret.data)
+ else:
+ singa.EltwiseMultFloatWithRet(lhs.data, rhs, ret.data)
+ return ret
+
+
+def mult(A, B, C=None, alpha=1.0, beta=0.0):
+ '''Do matrix-matrix or matrix-vector multiplication.
+ This function returns C = alpha * A * B + beta * C
+ Currently below cases are supported
+ case 1 - matrix * vector:
+ A (Tensor): 2d Tensor
+ B (Tensor): 1d Tensor, GEMV would be invoked
+ case 2 - matrix * matrix:
+ A (Tensor): 2d Tensor
+ B (Tensor): 2d Tensor, GEMM would be invoked
+ case 3 - batched matrix * batched matrix:
+ A (Tensor): 3/4d Tensor
+ B (Tensor): 3/4d Tensor, batched GEMM would be invoked
+ Where first/first and second dimension(s) of A, B should be exactly the same
+ e.g. C{2,3,4,6} = A{2,3,4,5} * B{2,3,5,6}
+
+ Args:
+ A: n-d tensor
+ B: n-d tensor
+ C (Tensor, optional): for storing the result; If None, a new Tensor would be created.
+ alpha (float): scaling factor
+ beta (float): scaling factor
+
+ Returns:
+ the result Tensor
+ '''
+ if C is None:
+ return _call_singa_func(singa.Mult, A.data, B.data)
+ else:
+ singa.MultWithScale(alpha, A.data, B.data, beta, C.data)
+ return C
+
+
+def einsum(ops, *args):
+ ''' function TODO list to finish the function in cpp(just like numpy function):
+ 1.sum(A,axis = None)
+ 2.repeat(A,repeats)
+ 3.transpose(A,axes = None)
+ Do the matrix to matrix einsum calculation according to the operands
+ Warning : this function could only support two matrix' einsum calcultion
+
+ Args:
+ ops(string): the string specifies the subscripts for summation such as
+ 'ki,kj->kij' Here all the 26 lowercase letter can be used here.
+ args(list of array_like): These are the tensors for the operation,
+ but here only support two tensors.
+
+ Returns:
+ Singa.Tensor the output matirx of the einsum calculation
+
+ The best way to understand this function is to try the examples below:
+ A_ = [0,1,2,3,4,5,6,7,8,9,10,11]
+ A = A_.reshape(4,3)
+ B = A_.reshape(3,4)
+
+ Here this einsum calculation is the same as normal 'mult'
+ Res = einsum('ij,jk->ik',A,B)
+
+ >>> [[ 20 23 26 29]
+ [ 56 68 80 92]
+ [ 92 113 134 155]
+ [128 158 188 218]]
+
+ A_ = [0,1,2,3,4,5,6,7,8,9,10,11]
+ A = A_.reshape(4,3)
+ B = A_.reshape(4,3)
+
+ Here the einsum calculation is the same as normol 'eltwise_mult'
+ Res = einsum('ki,ki->ki',A,B)
+
+ >>> [[ 0 1 4]
+ [ 9 16 25]
+ [ 36 49 64]
+ [ 81 100 121]]
+
+ A = [0,1,2,3,4,5,6,7,8,9,10,11]
+ A = A.reshape(4,3)
+
+ Res = einsum('ki,kj->kij',A,A)
+ >>> [[[ 0 0 0]
+ [ 0 1 2]
+ [ 0 2 4]]
+ [[ 9 12 15]
+ [ 12 16 20]
+ [ 15 20 25]]
+ [[ 36 42 48]
+ [ 42 49 56]
+ [ 48 56 64]]
+ [[ 81 90 99]
+ [ 90 100 110]
+ [ 99 110 121]]]
+
+ A_ = [0,1,2,3,4,5,6,7,8,9,10,11]
+ A = A_.reshape(3,2,2)
+
+ Res = einsum('kia,kja->kij',A,A)
+ >>> [[[ 1 3]
+ [ 3 13]]
+ [[ 41 59]
+ [ 59 85]]
+ [[145 179]
+ [179 221]]]
+ '''
+
+ if len(ops) == 0:
+ raise ValueError("No input operands")
+
+ if len(args) != 2:
+ raise ValueError("Currently only two operands are supported")
+ # to get the input and output ops
+ inputops, outputops = ops.split('->')
+ inputops = inputops.split(',')
+
+ # to get the two input tensor
+ A = args[0]
+ B = args[1]
+
+ if A.ndim() != len(inputops[0]) or B.ndim() != len(inputops[1]):
+ raise ValueError("input dim doesn't match operands")
+
+ # to get the indices in input but not in output
+ sums = sorted(list((set(inputops[0]) | set(inputops[1])) - set(outputops)))
+
+ # to get the indices that A and B use to broadcast to each other
+ broadcast_A = sorted(list(set(inputops[1]) - set(inputops[0])))
+ broadcast_B = sorted(list(set(inputops[0]) - set(inputops[1])))
+ # to get all the indices in input
+ outputall = sorted(list(set(inputops[0]) | set(inputops[1])))
+
+ # Map indices to axis integers
+ sums = [outputall.index(x) for x in sums]
+ broadcast_idA = [inputops[1].find(x) for x in broadcast_A]
+ broadcast_idB = [inputops[0].find(x) for x in broadcast_B]
+
+ broadcast_a = [B.shape[x] for x in broadcast_idA]
+ broadcast_b = [A.shape[x] for x in broadcast_idB]
+
+ # get the the transpose and reshape parameter used in the elementwise
+ # calculation
+ transpose_A = [(list(inputops[0]) + broadcast_A).index(x) for x in outputall
+ ]
+ transpose_B = [(list(inputops[1]) + broadcast_B).index(x) for x in outputall
+ ]
+
+ reshape_A = list(A.shape) + broadcast_a
+ reshape_B = list(B.shape) + broadcast_b
+
+ if len(broadcast_a) == 0:
+ broadcast_a = [1]
+ if len(broadcast_b) == 0:
+ broadcast_b = [1]
+ mult_A = repeat(A, product(broadcast_a))
+ mult_A = mult_A.reshape(reshape_A)
+ mult_A = transpose(mult_A, transpose_A)
+ mult_B = repeat(B, product(broadcast_b))
+ mult_B = mult_B.reshape(reshape_B)
+ mult_B = transpose(mult_B, transpose_B)
+
+ if mult_A.shape != mult_B.shape:
+ raise ValueError("Error: matrix dimension mismatch")
+ res = eltwise_mult(mult_A, mult_B)
+ sum_R = sorted(sums, reverse=True)
+ for i in sum_R:
+ res = sum(res, axis=i)
+ transpose_res = [sorted(list(outputops)).index(x) for x in list(outputops)]
+ res = transpose(res, transpose_res)
+
+ return res
+
+
+def repeat(t, repeats, axis=None):
+ '''Return the repeated tensor
+
+ Args:
+ t(tensor): the tensor to be repeated
+ repeats(int or a sequence): the number that the tensor need to repeat for
+ axis (int):the axis to do repeat
+ If it is None, then the repeated tensor will be flattened.If it isn't None,
+ the repeats could be sequence, but it's size should match the axis's shape
+
+ Returns:
+ the tensor which has been repeated
+ '''
+ ret = t.repeat(repeats, axis)
+ return ret
+
+
+def tensordot(A, B, axes=2):
+ """Returns the tensor multiplication of two tensors along specified axes.
+
+ This is equivalent to compute dot product along the specified axes which
+ are treated as one axis by reshaping.
+
+ Args:
+ A: Singa.Tensor
+ B: Singa.Tensor
+ axes:
+ - If it is an integer, then ''axes'' represent axes at the last of ''a`'' and
+ the first of ''b'' are used.
+ - If it is a pair of sequences of integers, then these two
+ sequences specify the list of axes for ''a'' and ''b''. The
+ corresponding axes are paired for sum-product.
+
+ Returns:
+ singa.tensor: The tensor product of ''A'' and ''B'' along the
+ axes specified by ''axes''.
+
+ Thanks to numpy.tensordot.
+ the link is https://github.com/numpy/numpy/blob/v1.14.0/numpy/core/numeric.py#L1123-L1306
+ """
+ # when axes is an integer, axes_A and axes_B represent axes at the last of ''A'' and
+ # the first of ''B''. For example, when axes is 1, we do the normal multiplication :
+ # if A is in shape(3,2,4), B is in shape(4,2,5), it will return a matrix in shape(3,2,2,5)
+ # when axes is 2 and A,B are shape (3,2,4) and (2,4,5), it will return a
+ # matrix in shape(3,5)
+
+ if type(axes) == int:
+ axes_A = list(range(-axes, 0))
+ axes_B = list(range(0, axes))
+ else:
+ axes_A, axes_B = axes
+ # when axes is a pair of sequences of integers.For example, A is in shape(3,2,4),
+ # B is in shape(4,2,5), we set axes as ([1,2],[1,0]), it will return a
+ # matrix in shape(3,5)
+ if isinstance(axes_A, list):
+ na = len(axes_A)
+ axes_A = list(axes_A)
+ else:
+ axes_A = [axes_A]
+ na = 1
+ if isinstance(axes_B, list):
+ nb = len(axes_B)
+ axes_B = list(axes_B)
+ else:
+ axes_B = [axes_B]
+ nb = 1
+
+ # a_shape and b_shape are the shape of tensor A and B, while nda and ndb
+ # are the dim of A and B
+ a_shape = A.shape
+ nda = A.ndim()
+ b_shape = B.shape
+ ndb = B.ndim()
+ equal = True
+ # to check if the length of axe_A is equal to axes_B
+ if na != nb:
+ equal = False
+ else:
+ # to make the shape match
+ for k in range(na):
+ if a_shape[axes_A[k]] != b_shape[axes_B[k]]:
+ equal = False
+ break
+ if axes_A[k] < 0:
+ axes_A[k] += nda
+ if axes_B[k] < 0:
+ axes_B[k] += ndb
+ if not equal:
+ raise ValueError("shape-mismatch for sum")
+ '''start to do the calculation according to the axes'''
+
+ notin = [k for k in range(nda) if k not in axes_A]
+ # nda is the dim of A, and axes_a is the axis for A, notin is the axis
+ # which is not in axes_A
+ newaxes_a = notin + axes_A
+ N2 = 1
+ for axis in axes_A:
+ N2 *= a_shape[axis]
+ N1 = 1
+ for ax in notin:
+ N1 *= a_shape[ax]
+ # newshape_a is the shape to do multiplication.For example, A is in shape(3,2,4),
+ # B is in shape(4,2,5), we set axes as ([1,2],[1,0]), then newshape_a should be (3,5)
+ # olda is the shape that will be shown in the result.
+ newshape_a = (N1, N2)
+ olda = [a_shape[axis] for axis in notin]
+ notin = [k for k in range(ndb) if k not in axes_B]
+ newaxes_b = axes_B + notin
+ N2 = 1
+ for axis in axes_B:
+ N2 *= b_shape[axis]
+ N1 = 1
+ for bx in notin:
+ N1 *= b_shape[bx]
+ newshape_b = (N2, N1)
+ oldb = [b_shape[axis] for axis in notin]
+
+ A = transpose(A, newaxes_a)
+ B = transpose(B, newaxes_b)
+ at = reshape(A, newshape_a)
+ bt = reshape(B, newshape_b)
+
+ res = mult(at, bt)
+ if len(olda + oldb) == 0:
+ olda = [1]
+ oldb = [1]
+ res = res.reshape(tuple(olda + oldb))
+ else:
+ res = res.reshape(tuple(olda + oldb))
+
+ return res
+
+
+def div(lhs, rhs, ret=None):
+ '''Elementi-wise division.
+
+ Args:
+ lhs (Tensor): lhs tensor
+ rhs (Tensor): rhs tensor
+ ret (Tensor, optional): if not None, the result is stored in it;
+ otherwise, a new Tensor would be created for the result.
+
+ Returns:
+ the result Tensor
+ '''
+ if ret is None:
+ # call Tensor.__div__()
+ return lhs / rhs
+ else:
+ if isinstance(rhs, Tensor):
+ singa.Div(lhs.data, rhs.data, ret.data)
+ else:
+ singa.DivFloatWithRet(lhs.data, rhs, ret.data)
+ return ret
+
+
+def axpy(alpha, x, y):
+ '''Element-wise operation for y += alpha * x.
+
+ Args:
+ alpha (float): scaling factor
+ x (Tensor): a tensor
+ y (Tensor): a tensor
+
+ Returns:
+ y
+ '''
+ singa.Axpy(float(alpha), x.data, y.data)
+ return y
+
+
+def bernoulli(p, t):
+ '''Generate a binary value for each element of t.
+
+ Args:
+ p (float): each element is 1 with probability p; and 0 with 1 - p
+ t (Tensor): the results are put into t
+
+ Returns:
+ t
+ '''
+ singa.Bernoulli(float(p), t.data)
+ return t
+
+
+def gaussian(mean, std, t):
+ '''Generate values following a Gaussian distribution.
+
+ Args:
+ mean (float): the mean of the Gaussian distribution.
+ std (float): the standard variance of the Gaussian distribution.
+ t (Tensor): the results are put into t
+
+ Returns:
+ t
+ '''
+ singa.Gaussian(float(mean), float(std), t.data)
+ return t
+
+
+def uniform(low, high, t):
+ '''Generate values following a Uniform distribution.
+
+ Args:
+ low (float): the lower bound
+ high (float): the higher bound
+ t (Tensor): the results are put into t
+
+ Returns:
+ t
+ '''
+ singa.Uniform(float(low), float(high), t.data)
+ return t
+
+
+def add_column(alpha, v, beta, M):
+ '''Add v to each column of M.
+
+ Denote each column of M as m, m = alpha * v + beta * m
+
+ Args:
+ alpha (float): scalar factor
+ v (Tensor): a tensor
+ beta (float): scalar factor
+ M (Tensor): 2d tensor
+
+ Returns:
+ Resulted tensor M
+ '''
+ singa.AddColumnWithScale(float(alpha), float(beta), v.data, M.data)
+ return M
+
+
+def add_row(alpha, v, beta, M):
+ '''Add v to each row of M.
+
+ Denote each row of M as m, m = alpha * v + beta * m
+
+ Args:
+ alpha (float): scaling factor
+ v (Tensor): a tensor
+ beta (float): scaling factor
+ M (Tensor): 2d tensor
+
+ Returns:
+ Resulted tensor M
+ '''
+ singa.AddRowWithScale(alpha, beta, v.data, M.data)
+ return M
+
+
+def sum_columns(M):
+ '''Sum all columns into a single column.
+
+ Args:
+ M (Tensor): the input 2d tensor.
+
+ Returns:
+ a new Tensor as the resulted column.
+ '''
+ assert M.ndim() == 2, 'M.nDim() is supposed to be 2'
+ ret = Tensor((M.shape[0], 1), M.data.device())
+ singa.SumColumns(M.data, ret.data)
+ return ret
+
+
+def sum_rows(M):
+ '''Sum all rows into a single row.
+
+ Args:
+ M (Tensor): the input 2d tensor.
+
+ Returns:
+ a new Tensor as the resulted row.
+ '''
+ assert M.ndim() == 2, 'M.nDim() is supposed to be 2'
+ ret = Tensor((1, M.shape[1]), M.data.device())
+ singa.SumRows(M.data, ret.data)
+ return ret
+
+
+''' private functions, internally used
+'''
+
+
+def _call_singa_func(_singa_func, *args):
+ ''' this function calls singa global functions that returns Tensor
+ and create new python Tensor instance
+ e.g., Tensor [singa_func](args...)
+
+ Args:
+ _singa_func: singa CPP API
+ args: args for singa CPP API
+
+ Returns:
+ new singa tensor
+ '''
+ new_t = Tensor()
+ new_t.data = _singa_func(*args)
+ new_t.shape = tuple(new_t.data.shape())
+ new_t.device = new_t.data.device()
+ new_t.dtype = new_t.data.data_type()
+ return new_t
+
+
+def copy_from_numpy(data, np_array):
+ ''' Copy the data from the numpy array.
+ used as static method
+
+ Args:
+ data: singa ctensor
+ np_array: source numpy array
+ '''
+ assert np_array.size == data.Size(), \
+ 'tensor shape should be the same'
+ if not np_array.ndim == 1:
+ np_array = np_array.flatten()
+ dt = np_array.dtype
+ if dt == np.float32:
+ data.CopyFloatDataFromHostPtr(np_array)
+ elif dt == int or dt == np.int32:
+ data.CopyIntDataFromHostPtr(np_array)
+ else:
+ print('Not implemented yet for ', dt)
+
+
+def concatenate(tensors, axis):
+ '''concatenate list of tensors together based on given axis
+
+ Args:
+ tensors: list of tensors.
+ axis: number of axis to cancatenate on, all the dim should be the same
+ except the axis to be concatenated.
+
+ Returns:
+ new tensor concatenated
+ '''
+ ctensors = singa.VecTensor()
+ for t in tensors:
+ ctensors.append(t.data)
+ return _call_singa_func(singa.ConcatOn, ctensors, axis)
+
+
+def random(shape, device=get_default_device()):
+ ''' return a random tensor with given shape
+
+ Args:
+ shape: shape of generated tensor
+ device: device of generated tensor, default is cpu
+
+ Returns:
+ new tensor generated
+ '''
+ ret = Tensor(shape, device=device)
+ ret.uniform(0, 1)
+ return ret
+
+
+def zeros(shape, device=get_default_device()):
+ ret = Tensor(shape, device=device)
+ ret.set_value(0.0)
+ return ret
+
+
+def ones(shape, device=get_default_device()):
+ ret = Tensor(shape, device=device)
+ ret.set_value(1.0)
+ return ret
diff --git a/java/pom.xml b/java/pom.xml
index 32d223cfc0..7f40b2d153 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -90,17 +90,24 @@
doc/_static/*.png
doc/_static/*.gif
doc/_static/*.ai
- doc/_static/images/*.png
- CITATION.cff
- examples/model_selection/TRAILS-Database-Native-Model-Selection/requirement.txt
- examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/*.png
- examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/init_env
- examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/requirement.txt
- examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/Cargo.toml
- examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/pg_extension.control
+ doc/_static/images/*.png
+ CITATION.cff
+ examples/hfl/requirements.txt
+ examples/hfl/config/.gitignore
+ examples/hfl/data/.gitkeep
+ examples/hfl/src/proto/interface.proto
+ examples/model_selection/Trails/.gitignore
+ examples/model_selection/Trails/.gitmodules
+ examples/model_selection/Trails/requirement.txt
+ examples/model_selection/Trails/dataset/frappe/*.libsvm
+ examples/model_selection/Trails/internal/ml/model_selection/config.ini
+ examples/model_selection/Trails/internal/ml/model_selection/requirement.txt
+ examples/model_selection/Trails/internal/ml/model_slicing/data/*
+ examples/model_selection/Trails/internal/pg_extension/.gitignore
+ examples/model_selection/Trails/internal/pg_extension/pg_extension.control
+ examples/model_selection/Trails/internal/pg_extension/template/Cargo.pg11.toml
+ examples/model_selection/Trails/documents/ai_db.001.jpeg
examples/singa_easy/examples/data/SampleQuestion.json
- examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/*.png
- examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/pg_extension/.cargo/config.toml
True
diff --git a/setup.py b/setup.py
index b4c01e419d..c057b78c49 100644
--- a/setup.py
+++ b/setup.py
@@ -83,7 +83,7 @@
from datetime import date
# stable version
-VERSION = '4.1.0'
+VERSION = '4.2.0'
# get the git hash
# git_hash = subprocess.check_output(["git", "describe"]).strip().split('-')[-1][1:]
# comment the next line to build wheel for stable version
diff --git a/tool/conda/singa/meta.yaml b/tool/conda/singa/meta.yaml
index 5bcc362128..a3428a5980 100644
--- a/tool/conda/singa/meta.yaml
+++ b/tool/conda/singa/meta.yaml
@@ -20,7 +20,7 @@
# https://docs.conda.io/projects/conda-build/en/latest/resources/define-metadata.html#templating-with-jinja
# {% set data = load_setup_py_data(setup_file='../../../python/singa/setup.py', from_recipe_dir=True) %}
-{% set version = "4.1.0" %}
+{% set version = "4.2.0" %}
package:
name: singa