microsoft · shaahji · Jan 17, 2025 · Jan 29, 2025
diff --git a/.gitignore b/.gitignore
@@ -126,6 +126,7 @@ celerybeat.pid
 # Environments
 .env
 .venv
+.vs
 env/
 venv/
 ENV/

diff --git a/.lintrunner.toml b/.lintrunner.toml
@@ -117,37 +117,6 @@ init_command = [
   '--requirement=requirements-dev.txt'
 ]
 
-[[linter]]
-code = 'MYPY'
-include_patterns = [
-  '**/*.py',
-  '**/*.pyi'
-]
-exclude_patterns = [
-  'examples/pytorch/*.py'
-]
-command = [
-  'python',
-  '-m',
-  'lintrunner_adapters',
-  'run',
-  'mypy_linter',
-  '--config=pyproject.toml',
-  '--show-notes',
-  '--show-disable',
-  '--',
-  '@{{PATHSFILE}}'
-]
-init_command = [
-  'python',
-  '-m',
-  'lintrunner_adapters',
-  'run',
-  'pip_init',
-  '--dry-run={{DRYRUN}}',
-  'mypy==1.0.0'
-]
-
 [[linter]]
 code = 'NOQA'
 include_patterns = ['**/*.py', '**/*.pyi']

diff --git a/docs/architecture.md b/docs/architecture.md
@@ -13,12 +13,12 @@ This document describes the Olive components, and some implementation details. T
 - [Search](#search)
     - [Search Parameter](#searchparameter)
     - [Search Space](#searchspace)
-    - [Search Algorithm](#searchalgorithm)
+    - [Search Sampler](#searchsampler)
     - [Search Results](#searchresults)
-- [Search Strategy](#search-strategy)
-    - [Execution order](#execution-order)
-    - [User Interface](#user-interface)
-    - [Implementation](#implementation)
+    - [Search Strategy](#search-strategy)
+        - [Execution order](#execution-order)
+        - [User Interface](#user-interface)
+        - [Implementation](#implementation)
 - [System](#system)
     - [OliveSystem Class](#olivesystem-class)
 - [Data Container](#data-container)
@@ -109,9 +109,9 @@ The engine maintains a cache directory with three sub-directories:
 - `mlflow`: stores mlflow model files.
 
 ## Search
-Olive workflows support search parameters which are optimized using search algorithms.
+Olive workflows support search parameters which are optimized using various execution order.
 
-At the most basic level is `SearchParameter` which describes the options for search parameters. `SearchSpace` combines search parameters for one or more passes and `SearchAlgorithm` provides different sampling algorithms to search for the best parameter configuration (search point) from the search space.
+At the most basic level is `SearchParameter` which describes the options for search parameters. `SearchSpace` combines search parameters for one or more passes and `SearchSampler` provides different sampling algorithms to search for the best parameter configuration (search point) from the search space.
 
 ### SearchParameter
 A search parameter defines a discrete categorical distribution.
@@ -122,7 +122,7 @@ There are two types of search parameters:
 
 **Note:**
 - There cannot be any cyclic parent child dependencies.
-- Search algorithms order the search parameters topologically so that the parents are sampled before the children.
+- Search space orders the search parameters topologically so that the parents are sampled before the children.
 
 ### SearchSpace
 Search space combines search parameters from one or more passes and provides methods to iterate over the search space (`iterate`) or generate random samples (`random_sample`).
@@ -139,18 +139,18 @@ The corresponding conceptual search space is the space of all possible parameter
 {“pass_id/space_name”: {“param_name”: param_value}}
 ```
 
-### SearchAlgorithm
-Search algorithm operates over a search space and provides samples/trials (search points) from the search space to execute and evaluate.
+### SearchSampler
+Sampling algorithm queries over a search space and provides samples/trials (search points) from the search space to evaluate.
 
-Each search algorithm provides the methods:
+Each search sampler provides the methods:
 - `suggest`: returns a search point to execute and evaluate. The algorithm can sample a search point based on the evaluation results for previously suggested points.
 - `report`: report evaluation results for a search point. The search point can also be pruned if it contains invalid pass configs or failed during execution/evaluation.
 
-The following search algorithms have been implemented:
-- `ExhaustiveSearchAlgorithm`: Exhaustively iterates over the search space.
-- `RandomSearchAlgorithm`: Randomly samples points from the search space without replacement.
-- `OptunaSearchAlgorithm`: Abstract base class for algorithms built using `optuna` samplers. This class cannot be used directly
-    - `TPESearchAlgorithm`: Uses optuna `TPESampler`.
+The following sampling algorithms have been implemented:
+- `SequentialSampler`: Sequentially iterates over the search space.
+- `RandomSampler`: Randomly samples points from the search space.
+- `OptunaSampler`: Abstract base class for algorithms built using `optuna` samplers.
+    - `TPESampler`: Uses optuna's `TPESampler`.
 
 ### SearchResults
 `SearchResults` stores evaluation results for samples made from a search space and provides tools to analyze and select the best search point/s.
@@ -159,10 +159,10 @@ Results are reported using the `record` method.
 
 Currently `best_search_point` selects the best search point by maximizing/minimizing metrics using tie breaking. We intend to provide different model selection strategies for both single and multi-objective optimization.
 
-## Search Strategy
+## SearchStrategy
 Search strategy provides an optimization pipeline that finds the best search point from the search space of one or more passes.
 
-It consists of two sub-components – `execution_order` and `search_algorithm`. Search algorithm has been covered in the previous section.
+It consists of two sub-components – `execution_order` and `sampler`. Sampling algorithms have been covered in the previous section.
 
 ### Execution Order
 The execution order defines the order in which the passes are optimized.

diff --git a/docs/source/extending/design.md b/docs/source/extending/design.md
@@ -5,8 +5,7 @@ that are composed to construct a model optimization workflow.
 The workflow which is run by the **Engine** is composed of **Passes** that are executed in a specific order.
 Each Pass is responsible for performing a specific optimization on the model. Each Pass might have a set of parameters that
 can be tuned to achieve the best metrics, say accuracy and latency, that are evaluated by the respective **Evaluator**.
-The Engine employs a **Search Strategy** that uses a **Search Algorithm** to auto-tune each Pass one by one or set of Passes
-together.
+The Engine employs a **Search Strategy** that uses a **Search Samplers** to auto-tune each Pass one by one or set of Passes together.
 
 Each Pass can be run on any host **System** and its output model can be evaluated on the desired target **System**.
 
@@ -71,22 +70,23 @@ created and **registered** along with their host system and evaluators if any.
 The engine also maintains a cache directory to cache pass runs, models and evaluations.
 
 ## Search Strategy
-Search strategy provides an optimization pipeline that finds the best search point from the search space of one or more passes.
+Search strategy provides an optimization pipeline that finds the best search point from the search space built from one or more passes and search parameters within each of those passes. `include_pass_params` controls whether or not individual pass' search parameters are included in the search. `max_iter` and `max_time` can be configured for finer control.
 
-It consists of two sub-components – `execution_order` and `search_algorithm`.
+It consists of two sub-components – `execution_order` and `sampler`.
 
 ### Execution Order
-The execution order defines the order in which the passes are optimized.
+The execution order defines the order in which the search space is traversed.
 
 Currently, we support two execution orders:
-- `joint`: The search spaces of all passes are combined and searched together to find the best search point. Each search point
-that is evaluated has parameters for the search parameters of all passes.
-- `pass-by-pass`: The search space of each pass is searched and optimized independently in order.
+- `joint`: The search spaces of all passes and their corresponding parameters are combined and searched together to find the best search point. Each search point consists of values for all search parameters of at most one pass in each pass group.
+- `pass-by-pass`: The search space of each pass group is searched and optimized independently in order.
 
-### Search Algorithm
-Search algorithm operates over a search space and provides samples/trials (search points) from the search space to execute and evaluate.
+### Search Sampler
+Search sampler provides samples/trials (search points) from the search space to evaluate. Each search point consists of valuesfor all search parameters and all passes within the pass group.
 
-The following search algorithms have been implemented:
-- `exhaustive`: Exhaustively iterates over the search space.
-- `random`: Randomly samples points from the search space without replacement.
-- `tpe`: ample using TPE (Tree-structured Parzen Estimator) algorithm to sample from the search space.
+The following sampling algorithms have been implemented:
+- `sequential`: Sequentially iterates over the search space.
+- `random`: Randomly samples points from the search space.
+- `tpe`: Sample using TPE (Tree-structured Parzen Estimator) algorithm to sample the search space.
+
+Each of the sampler can be used as an exhaustive search by setting the `max_samples` field to zero.
diff --git a/docs/source/how-to/configure-workflows/auto-opt.md b/docs/source/how-to/configure-workflows/auto-opt.md
@@ -41,11 +41,9 @@ Here is a simple example of Auto Optimizer configuration, the item which is not
         "engine": {
             "search_strategy": {
                 "execution_order": "joint",
-                "search_algorithm": "tpe",
-                "search_algorithm_config": {
-                    "num_samples": 1,
-                    "seed": 0
-                }
+                "sampler": "tpe",
+                "max_samples": 1,
+                "seed": 0
             },
             "evaluator": "common_evaluator",
             "cache_dir": "cache",
@@ -149,8 +147,8 @@ Here is another quick comparison between Auto Optimizer and manual settings.
     },
     "search_strategy": {
         "execution_order": "joint",
-        "search_algorithm": "tpe",
-        "num_samples": 1,
+        "sampler": "tpe",
+        "max_samples": 1,
         "seed": 0
     },
     "evaluator": "common_evaluator",
@@ -261,8 +259,8 @@ Here is another quick comparison between Auto Optimizer and manual settings.
     ],
     "search_strategy": {
         "execution_order": "joint",
-        "search_algorithm": "tpe",
-        "num_samples": 1,
+        "sampler": "tpe",
+        "max_samples": 1,
         "seed": 0
     },
     "evaluator": "common_evaluator",

diff --git a/docs/source/how-to/configure-workflows/model-packaging.md b/docs/source/how-to/configure-workflows/model-packaging.md
@@ -197,11 +197,9 @@ You can add different types `PackagingConfig` as a list to Engine configurations
 "engine": {
     "search_strategy": {
         "execution_order": "joint",
-        "search_algorithm": "tpe",
-        "search_algorithm_config": {
-            "num_samples": 5,
-            "seed": 0
-        }
+        "sampler": "tpe",
+        "max_samples": 5,
+        "seed": 0
     },
     "evaluator": "common_evaluator",
     "host": "local_system",

diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst
@@ -33,11 +33,11 @@ Reference
       :octicon:`arrow-right;1em;sd-text-info` `Pass <pass.html>`_
 
    .. grid-item-card::
-      **Search Algorithm**
+      **Search Samplers**
 
-      Configure search strategies.
+      Configure search samplers.
 
-      :octicon:`arrow-right;1em;sd-text-info` `Search Algorithm <search-algorithm.html>`_
+      :octicon:`arrow-right;1em;sd-text-info` `Search Samplers <search-samplers.html>`_
 
 
 .. toctree::
@@ -48,4 +48,4 @@ Reference
    options
    model
    pass
-   search-algorithm
+   search-samplers
diff --git a/docs/source/reference/search-algorithm.rst b/docs/source/reference/search-algorithm.rst
diff --git a/docs/source/reference/search-samplers.rst b/docs/source/reference/search-samplers.rst
@@ -0,0 +1,30 @@
+Samplers
+=================================
+
+The following sampling algorithms are available in Olive.
+
+Each sampler is followed by a description of the algorithm and a list of the its configuration options.
+
+.. _sequential_sampler:
+
+SequentialSampler
+-----------------
+**Name:** :code:`"sequential"`
+
+.. autoconfigclass:: olive.search.samplers.SequentialSampler
+
+.. _random_sampler:
+
+RandomSampler
+-------------
+**Name:** :code:`"random"`
+
+.. autoconfigclass:: olive.search.samplers.RandomSampler
+
+.. _tpe_sampler:
+
+TPESampler
+----------
+**Name:** :code:`"tpe"`
+
+.. autoconfigclass:: olive.search.samplers.TPESampler
diff --git a/docs/source/why-olive.md b/docs/source/why-olive.md
@@ -16,7 +16,7 @@ Olive (**O**NNX **LIVE**) is a cutting-edge model optimization toolkit with an a
 
 The input to Olive is typically a PyTorch or Hugging Face model, and the output is an optimized ONNX model that is executed on a device (deployment target) running the ONNX runtime. Olive will optimize the model for the deployment target's AI accelerator (NPU, GPU, CPU) provided by a hardware vendor such as Qualcomm, AMD, Nvidia, or Intel.
 
-Olive executes a *workflow*, which is an ordered sequence of individual model optimization tasks called *passes* - example passes include model compression, graph capture, quantization, and graph optimization. Each pass has a set of parameters that can be tuned to achieve the best metrics, such as accuracy and latency, that are evaluated by the respective *evaluator*. Olive employs a *search strategy* that uses a *search algorithm* to auto-tune each pass individually or a set of passes together.
+Olive executes a *workflow*, which is an ordered sequence of individual model optimization tasks called *passes* - example passes include model compression, graph capture, quantization, and graph optimization. Each pass has a set of parameters that can be tuned to achieve the best metrics, such as accuracy and latency, that are evaluated by the respective *evaluator*. Olive employs a *search strategy* that uses a *search sampler* to auto-tune each pass individually or a set of passes together.
 ```
 
 ## Benefits of using Olive

diff --git a/olive/strategy/__init__.py → examples/__init__.py b/olive/strategy/__init__.py → examples/__init__.py
diff --git a/examples/bert/README.md b/examples/bert/README.md
@@ -90,8 +90,10 @@ Config file: [bert_qat_customized_train_loop_cpu.json](bert_qat_customized_train
 ### BERT optimization with CUDA/TensorRT on GPU
 This workflow performs BERT optimization on GPU with CUDA/TensorRT. It performs the optimization pipeline:
 1. CUDA: `CUDAExecutionProvider`
+    - *PyTorch Model -> Onnx Model -> ONNX Runtime performance tuning*
+    Run: [bert.py](bert.py)
     - *PyTorch Model -> Onnx Model -> Transformers Optimized Onnx Model with fp16 -> ONNX Runtime performance tuning*
-    Config file: [bert_cuda_gpu.json](bert_cuda_gpu.json)
+    Run: [bert.py](bert.py) --optimize
 2. TensorRT: `TensorrtExecutionProvider`
     - *PyTorch Model -> Onnx Model -> ONNX Runtime performance tuning with trt_fp16_enable*
     Config file: [bert_trt_gpu.json](bert_trt_gpu.json)

diff --git a/examples/bert/bert.py b/examples/bert/bert.py
@@ -0,0 +1,27 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import argparse
+import json
+from pathlib import Path
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--optimize",
+        action="store_true",
+        help="If set, run transformers optimization pass",
+    )
+    args = parser.parse_args()
+
+    input_filename = "bert_cuda_gpu.template.json"
+    with Path(input_filename).open("r") as f:
+        config = json.load(f)
+
+    if not args.optimize:
+        del config["passes"]["transformers_optimization"]
+
+    output_filename = input_filename.replace(".template", "")
+    with Path(output_filename).open("w") as strm:
+        json.dump(config, fp=strm, indent=4)
diff --git a/examples/bert/bert_cuda_gpu.json → examples/bert/bert_cuda_gpu.template.json b/examples/bert/bert_cuda_gpu.json → examples/bert/bert_cuda_gpu.template.json
@@ -51,11 +51,7 @@
         "transformers_optimization": { "type": "OrtTransformersOptimization", "float16": true },
         "session_params_tuning": { "type": "OrtSessionParamsTuning", "data_config": "glue_mrpc", "io_bind": true }
     },
-    "pass_flows": [
-        [ "conversion", "transformers_optimization", "session_params_tuning" ],
-        [ "conversion", "session_params_tuning" ]
-    ],
-    "search_strategy": { "execution_order": "joint", "search_algorithm": "tpe", "num_samples": 3, "seed": 0 },
+    "search_strategy": { "execution_order": "joint", "sampler": "tpe", "max_samples": 3, "seed": 0 },
     "host": "local_system",
     "target": "local_system",
     "evaluator": "common_evaluator",

diff --git a/examples/bert/bert_inc_dynamic_ptq_cpu.json b/examples/bert/bert_inc_dynamic_ptq_cpu.json
@@ -34,7 +34,7 @@
         "transformers_optimization": { "type": "OrtTransformersOptimization", "model_type": "bert" },
         "dynamic_quantization": { "type": "IncDynamicQuantization" }
     },
-    "search_strategy": { "execution_order": "joint", "search_algorithm": "exhaustive" },
+    "search_strategy": { "execution_order": "joint", "sampler": "sequential" },
     "evaluator": "common_evaluator",
     "cache_dir": "cache",
     "output_dir": "models/bert_inc_dynamic_ptq_cpu"

diff --git a/examples/bert/bert_inc_ptq_cpu.json b/examples/bert/bert_inc_ptq_cpu.json
@@ -64,7 +64,7 @@
             }
         }
     },
-    "search_strategy": { "execution_order": "joint", "search_algorithm": "exhaustive" },
+    "search_strategy": { "execution_order": "joint", "sampler": "sequential" },
     "evaluator": "common_evaluator",
     "cache_dir": "cache",
     "output_dir": "models/bert_inc_ptq_cpu"

diff --git a/examples/bert/bert_ptq_cpu.json b/examples/bert/bert_ptq_cpu.json
@@ -67,7 +67,7 @@
         },
         "session_params_tuning": { "type": "OrtSessionParamsTuning", "data_config": "glue_mrpc" }
     },
-    "search_strategy": { "execution_order": "joint", "search_algorithm": "tpe", "num_samples": 3, "seed": 0 },
+    "search_strategy": { "execution_order": "joint", "sampler": "tpe", "max_samples": 3, "seed": 0 },
     "evaluator": "common_evaluator",
     "host": "local_system",
     "target": "local_system",
-Original file line number
+Diff line change
@@ Expand Up / @@ -126,6 +126,7 @@ celerybeat.pid @@
     # Environments
     .env
     .venv
+    .vs
     env/
     venv/
     ENV/
@@ Expand Down @@