Model Builder: fix config access, add UT, clean up CI yamls (#1638)

## Describe your changes - Fix config access in model builder pass - Add unit test for model builder. CPU tests now has its own requirements file. - Do some clean up in the pipeline yamls: - set gpu defaults in gpu test templates - removed unused `device` template parameters - remove redundant parameter assignment from template calls - nightly gpu test uses gpu image ## Checklist before requesting a review - [x] Add unit tests for this change. - [x] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Lint and apply fixes to your code by running `lintrunner -a` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. - [ ] Is this PR including examples changes? If yes, please remember to update [example documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md) in a follow-up PR. ## (Optional) Issue link Fixes #1635
microsoft · Feb 21, 2025 · 0c7ff92 · 0c7ff92
1 parent ba7e187
commit 0c7ff92
Show file tree

Hide file tree

Showing 9 changed files with 74 additions and 26 deletions.
diff --git a/.azure_pipelines/job_templates/olive-example-win-template.yaml b/.azure_pipelines/job_templates/olive-example-win-template.yaml
@@ -4,7 +4,6 @@ parameters:
   name: ''
   pool: ''
   test_type: ''
-  device: 'cpu'
   python_version: '3.10'
   onnxruntime: 'onnxruntime'
   onnxruntime_nightly: false

diff --git a/.azure_pipelines/job_templates/olive-test-cpu-template.yaml b/.azure_pipelines/job_templates/olive-test-cpu-template.yaml
@@ -7,7 +7,7 @@ parameters:
   onnxruntime: 'onnxruntime'
   onnxruntime_nightly: false
   torch: 'torch'
-  requirements_file: 'requirements-test.txt'
+  requirements_file: 'requirements-test-cpu.txt'
 
 jobs:
 - job: ${{parameters.name}}

diff --git a/.azure_pipelines/job_templates/olive-test-linux-gpu-template.yaml b/.azure_pipelines/job_templates/olive-test-linux-gpu-template.yaml
@@ -4,15 +4,14 @@ parameters:
   name: ''
   pool: ''
   test_type: ''
-  device: 'cpu'
   dockerfile: '.azure_pipelines/dockerfiles/linux-gpu.dockerfile'
   docker_image: 'olive-pipeline:latest'
   base_image: 'nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04'
   trt_version: '10.5.0.18-1+cuda12.6'
   python_version: '3.10'
-  onnxruntime: 'onnxruntime'
+  onnxruntime: 'onnxruntime-gpu'
   torch: 'torch'
-  requirements_file: 'requirements-test.txt'
+  requirements_file: 'requirements-test-gpu.txt'
   test_script: 'run_test.sh'
   onnxruntime_nightly: false
 

diff --git a/.azure_pipelines/olive-ci.yaml b/.azure_pipelines/olive-ci.yaml
@@ -55,24 +55,19 @@ jobs:
   parameters:
     name: Linux_CPU_CI_Unit_Test
     pool: $(OLIVE_POOL_UBUNTU2004)
-    onnxruntime: onnxruntime
     test_type: 'unit_test'
 
 - template: job_templates/olive-test-linux-gpu-template.yaml
   parameters:
     name: Linux_GPU_CI_Unit_Test
     pool: $(OLIVE_POOL_UBUNTU2004_GPU_V100)
     test_type: 'unit_test'
-    device: 'gpu'
-    onnxruntime: onnxruntime-gpu
-    requirements_file: 'requirements-test-gpu.txt'
 
 # Windows unit tests
 - template: job_templates/olive-test-cpu-template.yaml
   parameters:
     name: Windows_CPU_CI_Unit_Test
     pool: $(OLIVE_POOL_WIN2019)
-    onnxruntime: onnxruntime
     test_type: 'unit_test'
     windows: True
 

diff --git a/.azure_pipelines/olive-ort-nightly.yaml b/.azure_pipelines/olive-ort-nightly.yaml
@@ -19,33 +19,26 @@ jobs:
   parameters:
     name: Linux_CI_Unit_Test_Olive
     pool: $(OLIVE_POOL_UBUNTU2004)
-    device: 'cpu'
     windows: False
     test_type: 'unit_test'
-    onnxruntime: onnxruntime
     onnxruntime_nightly: true
 
 # Linux GPU unit test
 - template: job_templates/olive-test-linux-gpu-template.yaml
   parameters:
     name: Linux_GPU_CI_Unit_Test_Olive
     pool: $(OLIVE_POOL_UBUNTU2004_GPU_V100)
-    device: 'gpu'
     windows: False
     test_type: 'unit_test'
-    onnxruntime: onnxruntime-gpu
     onnxruntime_nightly: true
-    requirements_file: 'requirements-test-gpu.txt'
 
 # Windows unit test
 - template: job_templates/olive-test-cpu-template.yaml
   parameters:
     name: Windows_CI_Unit_Test_Olive
     pool: $(OLIVE_POOL_WIN2019)
-    device: 'cpu'
     windows: True
     test_type: 'unit_test'
-    onnxruntime: onnxruntime-gpu
     onnxruntime_nightly: true
 
 # Linux examples test
@@ -101,8 +94,12 @@ jobs:
   parameters:
     name: Linux_GPU_CI
     pool: $(OLIVE_POOL_UBUNTU2004_GPU_V100)
+    device: 'gpu'
     onnxruntime: onnxruntime-gpu
     onnxruntime_nightly: true
+    dockerfile: '.azure_pipelines/dockerfiles/linux-gpu.dockerfile'
+    base_image: 'nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04'
+    trt_version: '10.5.0.18-1+cuda12.6'
     examples:
       bert_cuda_gpu:
         exampleFolder: bert

diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py
@@ -36,6 +36,13 @@ class Precision(StrEnumBase):
         INT8 = "int8"
         INT4 = "int4"
 
+    class BlockSize(IntEnumBase):
+        B16 = 16
+        B32 = 32
+        B64 = 64
+        B128 = 128
+        B256 = 256
+
     class AccuracyLevel(IntEnumBase):
         fp32 = 1
         fp16 = 2
@@ -60,7 +67,7 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> Dict[str, PassCon
                 type_=Dict[str, Any], required=False, description="Search options to use for generate loop."
             ),
             "int4_block_size": PassConfigParam(
-                type_=int,
+                type_=ModelBuilder.BlockSize,
                 required=False,
                 description="Specify the block_size for int4 quantization. Acceptable values: 16/32/64/128/256.",
             ),
@@ -170,23 +177,21 @@ def _run_for_config(
             if model.adapter_path:
                 extra_args["adapter_path"] = model.adapter_path
 
-        if config.get("int4_block_size"):
-            if int(config.int4_block_size) not in [16, 32, 64, 128, 256]:
-                raise ValueError("Invalid int4_block_size. Accepted values: 16/32/64/128/256.")
-            extra_args["int4_block_size"] = config.int4_block_size
+        if config.int4_block_size:
+            extra_args["int4_block_size"] = config.int4_block_size.value
 
-        if config.get("int4_accuracy_level"):
+        if config.int4_accuracy_level:
             extra_args["int4_accuracy_level"] = config.int4_accuracy_level.value
 
         # args that are only checked for presence, not value
         for arg in ["exclude_embeds", "exclude_lm_head"]:
-            if config[arg]:
+            if getattr(config, arg):
                 extra_args[arg] = True
 
         # args that are checked for presence and value (if present)
         for arg in ["enable_cuda_graph"]:
-            if config[arg] is not None:
-                extra_args[arg] = "1" if config[arg] else "0"
+            if getattr(config, arg) is not None:
+                extra_args[arg] = "1" if getattr(config, arg) else "0"
 
         model_attributes = copy.deepcopy(model.model_attributes or {})
 

diff --git a/test/requirements-test-cpu.txt b/test/requirements-test-cpu.txt
@@ -0,0 +1,2 @@
+-r requirements-test.txt
+onnxruntime-genai
diff --git a/test/requirements-test-gpu.txt b/test/requirements-test-gpu.txt
@@ -2,4 +2,5 @@
 auto-gptq
 autoawq
 bitsandbytes
+onnxruntime-genai-cuda
 triton
diff --git a/test/unit_test/passes/onnx/test_model_builder.py b/test/unit_test/passes/onnx/test_model_builder.py
@@ -0,0 +1,50 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+from pathlib import Path
+
+import pytest
+
+from olive.model import HfModelHandler, ONNXModelHandler
+from olive.passes.olive_pass import create_pass_from_dict
+from olive.passes.onnx.model_builder import ModelBuilder
+
+
+def make_local_model(save_path, model_type="hf"):
+    input_model = HfModelHandler(model_path="hf-internal-testing/tiny-random-LlamaForCausalLM")
+    loaded_model = input_model.load_model()
+    # this checkpoint has an invalid generation config that cannot be saved
+    loaded_model.generation_config.pad_token_id = 1
+
+    save_path.mkdir(parents=True, exist_ok=True)
+    if model_type == "hf":
+        loaded_model.save_pretrained(save_path)
+    else:
+        onnx_file_path = save_path / "model.onnx"
+        onnx_file_path.write_text("dummy onnx file")
+        loaded_model.config.save_pretrained(save_path)
+        loaded_model.generation_config.save_pretrained(save_path)
+    input_model.get_hf_tokenizer().save_pretrained(save_path)
+
+    return (
+        HfModelHandler(model_path=save_path)
+        if model_type == "hf"
+        else ONNXModelHandler(model_path=save_path, onnx_file_name="model.onnx")
+    )
+
+
+@pytest.mark.parametrize("metadata_only", [True, False])
+def test_model_builder(tmp_path, metadata_only):
+    input_model = make_local_model(tmp_path / "input_model", "onnx" if metadata_only else "hf")
+
+    p = create_pass_from_dict(ModelBuilder, {"precision": "fp32", "metadata_only": metadata_only}, disable_search=True)
+    output_folder = tmp_path / "output_model"
+
+    # execute the pass
+    output_model = p.run(input_model, output_folder)
+
+    # assert
+    assert isinstance(output_model, ONNXModelHandler)
+    assert Path(output_model.model_path).exists()
+    assert Path(output_folder / "genai_config.json").exists()