Skip to content

Commit

Permalink
Model Builder: fix config access, add UT, clean up CI yamls (#1638)
Browse files Browse the repository at this point in the history
## Describe your changes
- Fix config access in model builder pass
- Add unit test for model builder. CPU tests now has its own
requirements file.
- Do some clean up in the pipeline yamls:
  - set gpu defaults in gpu test templates
  - removed unused `device` template parameters
  - remove redundant parameter assignment from template calls
  - nightly gpu test uses gpu image

## Checklist before requesting a review
- [x] Add unit tests for this change.
- [x] Make sure all tests can pass.
- [ ] Update documents if necessary.
- [ ] Lint and apply fixes to your code by running `lintrunner -a`
- [ ] Is this a user-facing change? If yes, give a description of this
change to be included in the release notes.
- [ ] Is this PR including examples changes? If yes, please remember to
update [example
documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md)
in a follow-up PR.

## (Optional) Issue link
Fixes #1635
  • Loading branch information
jambayk authored Feb 21, 2025
1 parent ba7e187 commit 0c7ff92
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ parameters:
name: ''
pool: ''
test_type: ''
device: 'cpu'
python_version: '3.10'
onnxruntime: 'onnxruntime'
onnxruntime_nightly: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ parameters:
onnxruntime: 'onnxruntime'
onnxruntime_nightly: false
torch: 'torch'
requirements_file: 'requirements-test.txt'
requirements_file: 'requirements-test-cpu.txt'

jobs:
- job: ${{parameters.name}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@ parameters:
name: ''
pool: ''
test_type: ''
device: 'cpu'
dockerfile: '.azure_pipelines/dockerfiles/linux-gpu.dockerfile'
docker_image: 'olive-pipeline:latest'
base_image: 'nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04'
trt_version: '10.5.0.18-1+cuda12.6'
python_version: '3.10'
onnxruntime: 'onnxruntime'
onnxruntime: 'onnxruntime-gpu'
torch: 'torch'
requirements_file: 'requirements-test.txt'
requirements_file: 'requirements-test-gpu.txt'
test_script: 'run_test.sh'
onnxruntime_nightly: false

Expand Down
5 changes: 0 additions & 5 deletions .azure_pipelines/olive-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,24 +55,19 @@ jobs:
parameters:
name: Linux_CPU_CI_Unit_Test
pool: $(OLIVE_POOL_UBUNTU2004)
onnxruntime: onnxruntime
test_type: 'unit_test'

- template: job_templates/olive-test-linux-gpu-template.yaml
parameters:
name: Linux_GPU_CI_Unit_Test
pool: $(OLIVE_POOL_UBUNTU2004_GPU_V100)
test_type: 'unit_test'
device: 'gpu'
onnxruntime: onnxruntime-gpu
requirements_file: 'requirements-test-gpu.txt'

# Windows unit tests
- template: job_templates/olive-test-cpu-template.yaml
parameters:
name: Windows_CPU_CI_Unit_Test
pool: $(OLIVE_POOL_WIN2019)
onnxruntime: onnxruntime
test_type: 'unit_test'
windows: True

Expand Down
11 changes: 4 additions & 7 deletions .azure_pipelines/olive-ort-nightly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,33 +19,26 @@ jobs:
parameters:
name: Linux_CI_Unit_Test_Olive
pool: $(OLIVE_POOL_UBUNTU2004)
device: 'cpu'
windows: False
test_type: 'unit_test'
onnxruntime: onnxruntime
onnxruntime_nightly: true

# Linux GPU unit test
- template: job_templates/olive-test-linux-gpu-template.yaml
parameters:
name: Linux_GPU_CI_Unit_Test_Olive
pool: $(OLIVE_POOL_UBUNTU2004_GPU_V100)
device: 'gpu'
windows: False
test_type: 'unit_test'
onnxruntime: onnxruntime-gpu
onnxruntime_nightly: true
requirements_file: 'requirements-test-gpu.txt'

# Windows unit test
- template: job_templates/olive-test-cpu-template.yaml
parameters:
name: Windows_CI_Unit_Test_Olive
pool: $(OLIVE_POOL_WIN2019)
device: 'cpu'
windows: True
test_type: 'unit_test'
onnxruntime: onnxruntime-gpu
onnxruntime_nightly: true

# Linux examples test
Expand Down Expand Up @@ -101,8 +94,12 @@ jobs:
parameters:
name: Linux_GPU_CI
pool: $(OLIVE_POOL_UBUNTU2004_GPU_V100)
device: 'gpu'
onnxruntime: onnxruntime-gpu
onnxruntime_nightly: true
dockerfile: '.azure_pipelines/dockerfiles/linux-gpu.dockerfile'
base_image: 'nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04'
trt_version: '10.5.0.18-1+cuda12.6'
examples:
bert_cuda_gpu:
exampleFolder: bert
Expand Down
23 changes: 14 additions & 9 deletions olive/passes/onnx/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ class Precision(StrEnumBase):
INT8 = "int8"
INT4 = "int4"

class BlockSize(IntEnumBase):
B16 = 16
B32 = 32
B64 = 64
B128 = 128
B256 = 256

class AccuracyLevel(IntEnumBase):
fp32 = 1
fp16 = 2
Expand All @@ -60,7 +67,7 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> Dict[str, PassCon
type_=Dict[str, Any], required=False, description="Search options to use for generate loop."
),
"int4_block_size": PassConfigParam(
type_=int,
type_=ModelBuilder.BlockSize,
required=False,
description="Specify the block_size for int4 quantization. Acceptable values: 16/32/64/128/256.",
),
Expand Down Expand Up @@ -170,23 +177,21 @@ def _run_for_config(
if model.adapter_path:
extra_args["adapter_path"] = model.adapter_path

if config.get("int4_block_size"):
if int(config.int4_block_size) not in [16, 32, 64, 128, 256]:
raise ValueError("Invalid int4_block_size. Accepted values: 16/32/64/128/256.")
extra_args["int4_block_size"] = config.int4_block_size
if config.int4_block_size:
extra_args["int4_block_size"] = config.int4_block_size.value

if config.get("int4_accuracy_level"):
if config.int4_accuracy_level:
extra_args["int4_accuracy_level"] = config.int4_accuracy_level.value

# args that are only checked for presence, not value
for arg in ["exclude_embeds", "exclude_lm_head"]:
if config[arg]:
if getattr(config, arg):
extra_args[arg] = True

# args that are checked for presence and value (if present)
for arg in ["enable_cuda_graph"]:
if config[arg] is not None:
extra_args[arg] = "1" if config[arg] else "0"
if getattr(config, arg) is not None:
extra_args[arg] = "1" if getattr(config, arg) else "0"

model_attributes = copy.deepcopy(model.model_attributes or {})

Expand Down
2 changes: 2 additions & 0 deletions test/requirements-test-cpu.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-r requirements-test.txt
onnxruntime-genai
1 change: 1 addition & 0 deletions test/requirements-test-gpu.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
auto-gptq
autoawq
bitsandbytes
onnxruntime-genai-cuda
triton
50 changes: 50 additions & 0 deletions test/unit_test/passes/onnx/test_model_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
from pathlib import Path

import pytest

from olive.model import HfModelHandler, ONNXModelHandler
from olive.passes.olive_pass import create_pass_from_dict
from olive.passes.onnx.model_builder import ModelBuilder


def make_local_model(save_path, model_type="hf"):
input_model = HfModelHandler(model_path="hf-internal-testing/tiny-random-LlamaForCausalLM")
loaded_model = input_model.load_model()
# this checkpoint has an invalid generation config that cannot be saved
loaded_model.generation_config.pad_token_id = 1

save_path.mkdir(parents=True, exist_ok=True)
if model_type == "hf":
loaded_model.save_pretrained(save_path)
else:
onnx_file_path = save_path / "model.onnx"
onnx_file_path.write_text("dummy onnx file")
loaded_model.config.save_pretrained(save_path)
loaded_model.generation_config.save_pretrained(save_path)
input_model.get_hf_tokenizer().save_pretrained(save_path)

return (
HfModelHandler(model_path=save_path)
if model_type == "hf"
else ONNXModelHandler(model_path=save_path, onnx_file_name="model.onnx")
)


@pytest.mark.parametrize("metadata_only", [True, False])
def test_model_builder(tmp_path, metadata_only):
input_model = make_local_model(tmp_path / "input_model", "onnx" if metadata_only else "hf")

p = create_pass_from_dict(ModelBuilder, {"precision": "fp32", "metadata_only": metadata_only}, disable_search=True)
output_folder = tmp_path / "output_model"

# execute the pass
output_model = p.run(input_model, output_folder)

# assert
assert isinstance(output_model, ONNXModelHandler)
assert Path(output_model.model_path).exists()
assert Path(output_folder / "genai_config.json").exists()

0 comments on commit 0c7ff92

Please sign in to comment.