Skip to content

Commit

Permalink
Fix ort nightly pipeline (#1554)
Browse files Browse the repository at this point in the history
## Describe your changes

Fix ort nightly pipeline

- Upgrade to cuda 12.6 + TensorRT 10.5.
- Fix python 3.8 example test.
- Fix & skip some tests.
- Fix yaml format.

## Checklist before requesting a review
- [ ] Add unit tests for this change.
- [ ] Make sure all tests can pass.
- [ ] Update documents if necessary.
- [ ] Lint and apply fixes to your code by running `lintrunner -a`
- [ ] Is this a user-facing change? If yes, give a description of this
change to be included in the release notes.
- [ ] Is this PR including examples changes? If yes, please remember to
update [example
documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md)
in a follow-up PR.

## (Optional) Issue link
  • Loading branch information
xiaoyu-work authored Jan 17, 2025
1 parent 3d6e731 commit b0896bb
Show file tree
Hide file tree
Showing 17 changed files with 206 additions and 158 deletions.
3 changes: 2 additions & 1 deletion .azure_pipelines/dockerfiles/linux-cpu.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
FROM ubuntu:22.04
ARG BASE_IMAGE
FROM ${BASE_IMAGE}

ARG PYTHON_VERSION

Expand Down
5 changes: 3 additions & 2 deletions .azure_pipelines/dockerfiles/linux-gpu.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
ARG BASE_IMAGE
FROM ${BASE_IMAGE}

ARG PYTHON_VERSION
ARG TENSORRT_VERSION=10.0.1.6-1+cuda12.4
ARG TENSORRT_VERSION

RUN apt-get update && \
apt-get install -y \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,18 @@ parameters:
dockerfile: ''
python_version: ''
docker_image: ''
base_image: ''
trt_version: ''

steps:
- script: |
docker login -u $(docker-username) -p $(docker-password)
docker build --build-arg PYTHON_VERSION=${{ parameters.python_version }} -t ${{ parameters.docker_image }} -f $(Build.SourcesDirectory)/${{ parameters.dockerfile }} .
docker build \
--build-arg BASE_IMAGE=${{ parameters.base_image }} \
--build-arg TENSORRT_VERSION=${{ parameters.trt_version }} \
--build-arg PYTHON_VERSION=${{ parameters.python_version }} \
-t ${{ parameters.docker_image }} \
-f $(Build.SourcesDirectory)/${{ parameters.dockerfile }} .
displayName: Build Docker Image

- script: |
Expand Down
70 changes: 35 additions & 35 deletions .azure_pipelines/job_templates/olive-build-doc-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,42 +8,42 @@ parameters:
doc_version: 'latest'

jobs:
- job: ${{parameters.job_name}}
displayName: ${{parameters.display_name}}
pool: ${{ parameters.pool}}
variables:
PIP_CACHE_DIR: $(Pipeline.Workspace)/.cache/pip
- job: ${{parameters.job_name}}
displayName: ${{parameters.display_name}}
pool: ${{ parameters.pool}}
variables:
PIP_CACHE_DIR: $(Pipeline.Workspace)/.cache/pip

steps:
- template: olive-setup-template.yaml
parameters:
python_version: '3.9'
onnxruntime: 'onnxruntime'
torch: 'torch'
steps:
- template: olive-setup-template.yaml
parameters:
python_version: '3.9'
onnxruntime: 'onnxruntime'
torch: 'torch'

# checkout release branch if doc_version provided
- script: |
git config --global user.email "[email protected]"
git config --global user.name "olivedevteam"
git fetch origin rel-${{parameters.doc_version}}
git checkout rel-${{parameters.doc_version}}
displayName: Checkout release branch
condition: ne('${{parameters.doc_version}}', 'latest')
# checkout release branch if doc_version provided
- script: |
git config --global user.email "[email protected]"
git config --global user.name "olivedevteam"
git fetch origin rel-${{parameters.doc_version}}
git checkout rel-${{parameters.doc_version}}
displayName: Checkout release branch
condition: ne('${{parameters.doc_version}}', 'latest')
- script: |
# set -e, otherwise make html fails but the build continues
set -e
cd docs
python -m pip install -r requirements.txt
make html
make linkcheck
make schema
displayName: Make Docs
- script: |
# set -e, otherwise make html fails but the build continues
set -e
cd docs
python -m pip install -r requirements.txt
make html
make linkcheck
make schema
displayName: Make Docs
- task: PublishPipelineArtifact@1
inputs:
path: $(Build.SourcesDirectory)/docs/build/html
artifactName: olive_doc_src
artifactType: pipeline
displayName: Publish Docs to Pipeline Artifact
condition: and(succeeded(), eq('${{ parameters.publish_docs }}', 'true'))
- task: PublishPipelineArtifact@1
inputs:
path: $(Build.SourcesDirectory)/docs/build/html
artifactName: olive_doc_src
artifactType: pipeline
displayName: Publish Docs to Pipeline Artifact
condition: and(succeeded(), eq('${{ parameters.publish_docs }}', 'true'))
123 changes: 66 additions & 57 deletions .azure_pipelines/job_templates/olive-example-linux-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,72 +7,81 @@ parameters:
device: 'cpu'
dockerfile: '.azure_pipelines/dockerfiles/linux-cpu.dockerfile'
docker_image: 'olive-pipeline:latest'
base_image: 'ubuntu:22.04'
trt_version: ''
onnxruntime: 'onnxruntime'
subfolder: 'local'
torch: 'torch'
test_script: 'run_test.sh'
onnxruntime_nightly: false

jobs:
- job: ${{ parameters.name }}_Test_Examples
timeoutInMinutes: 300
pool:
name: ${{ parameters.pool }}
strategy:
matrix:
${{ insert }}: ${{ parameters.examples }}
variables:
PIP_CACHE_DIR: $(Pipeline.Workspace)/.cache/pip
HF_HOME: $(Pipeline.Workspace)/.cache/huggingface
OLIVE_TEMPDIR: $(Pipeline.Workspace)/.olive_tempdir
- job: ${{ parameters.name }}_Test_Examples
timeoutInMinutes: 300
pool:
name: ${{ parameters.pool }}
strategy:
matrix:
${{ insert }}: ${{ parameters.examples }}
variables:
PIP_CACHE_DIR: $(Pipeline.Workspace)/.cache/pip
HF_HOME: $(Pipeline.Workspace)/.cache/huggingface
OLIVE_TEMPDIR: $(Pipeline.Workspace)/.olive_tempdir

steps:
- template: build-docker-image-template.yaml
parameters:
python_version: ${{ parameters.python_version }}
dockerfile: ${{ parameters.dockerfile }}
docker_image: ${{ parameters.docker_image }}
steps:
- script: docker system df && docker system prune -a -f && docker system df
displayName: Clean docker images
continueOnError: true

# set exampleRequirements to requirements.txt if user does not specify
- script:
echo "##vso[task.setvariable variable=exampleRequirements]requirements.txt"
displayName: Set exampleRequirements
condition: eq(variables['exampleRequirements'], '')
- template: build-docker-image-template.yaml
parameters:
python_version: ${{ parameters.python_version }}
dockerfile: ${{ parameters.dockerfile }}
docker_image: ${{ parameters.docker_image }}
base_image: ${{ parameters.base_image }}
trt_version: ${{ parameters.trt_version }}

- script: |
GPU_OPTION=""
if [ "${{ parameters.device }}" = "gpu" ]; then
GPU_OPTION="--gpus=all"
fi
docker run \
$GPU_OPTION \
-v $(Build.SourcesDirectory)/logs:/logs \
-e WORKSPACE_SUBSCRIPTION_ID=$(workspace-subscription-id) \
-e WORKSPACE_RESOURCE_GROUP=$(workspace-resource-group) \
-e WORKSPACE_NAME=$(workspace-name) \
-e MANAGED_IDENTITY_CLIENT_ID=$(olive-1es-identity-client-id) \
-e PIPELINE_TEST_ACCOUNT_NAME=$(pipeline-test-account-name) \
-e PIPELINE_TEST_CONTAINER_NAME=$(pipeline-test-container-name) \
-e KEYVAULT_NAME=$(keyvault-name) \
-e HF_TOKEN=$(hf_token) \
${{ parameters.docker_image }} \
bash .azure_pipelines/scripts/${{ parameters.test_script }} \
${{ parameters.torch }} \
${{ parameters.onnxruntime }} \
${{ parameters.onnxruntime_nightly }} \
examples/$(exampleFolder)/$(exampleRequirements) \
examples/test/${{ parameters.subfolder }}/test_$(exampleName).py
displayName: Run Tests in Docker
# set exampleRequirements to requirements.txt if user does not specify
- script:
echo "##vso[task.setvariable variable=exampleRequirements]requirements.txt"
displayName: Set exampleRequirements
condition: eq(variables['exampleRequirements'], '')

# Step 3: Publish test results
- task: PublishTestResults@2
condition: succeededOrFailed()
inputs:
testResultsFiles: '**/logs/test_examples-TestOlive.xml'
testRunTitle: '$(Build.BuildNumber)[$(Agent.JobName)]'
failTaskOnFailedTests: true
displayName: Publish Test Results
- script: |
GPU_OPTION=""
if [ "${{ parameters.device }}" = "gpu" ]; then
GPU_OPTION="--gpus=all"
fi
docker run \
--shm-size=4g \
$GPU_OPTION \
-v $(Build.SourcesDirectory)/logs:/logs \
-e WORKSPACE_SUBSCRIPTION_ID=$(workspace-subscription-id) \
-e WORKSPACE_RESOURCE_GROUP=$(workspace-resource-group) \
-e WORKSPACE_NAME=$(workspace-name) \
-e MANAGED_IDENTITY_CLIENT_ID=$(olive-1es-identity-client-id) \
-e PIPELINE_TEST_ACCOUNT_NAME=$(pipeline-test-account-name) \
-e PIPELINE_TEST_CONTAINER_NAME=$(pipeline-test-container-name) \
-e KEYVAULT_NAME=$(keyvault-name) \
-e HF_TOKEN=$(hf_token) \
${{ parameters.docker_image }} \
bash .azure_pipelines/scripts/${{ parameters.test_script }} \
${{ parameters.torch }} \
${{ parameters.onnxruntime }} \
${{ parameters.onnxruntime_nightly }} \
examples/$(exampleFolder)/$(exampleRequirements) \
examples/test/${{ parameters.subfolder }}/test_$(exampleName).py
displayName: Run Tests in Docker
- script: sudo git clean -dfX
condition: always()
displayName: Clean remaining artifacts
# Step 3: Publish test results
- task: PublishTestResults@2
condition: succeededOrFailed()
inputs:
testResultsFiles: '**/logs/test_examples-TestOlive.xml'
testRunTitle: '$(Build.BuildNumber)[$(Agent.JobName)]'
failTaskOnFailedTests: true
displayName: Publish Test Results

- script: sudo git clean -dfX
condition: always()
displayName: Clean remaining artifacts
98 changes: 50 additions & 48 deletions .azure_pipelines/job_templates/olive-example-win-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,60 +7,62 @@ parameters:
device: 'cpu'
python_version: '3.10'
onnxruntime: 'onnxruntime'
onnxruntime_nightly: false
subfolder: 'local'
torch: 'torch'

jobs:
- job: ${{ parameters.name }}_Test_Examples
timeoutInMinutes: 300
pool:
name: ${{ parameters.pool }}
strategy:
matrix:
${{ insert }}: ${{ parameters.examples }}
variables:
PIP_CACHE_DIR: $(Pipeline.Workspace)/.cache/pip
HF_HOME: $(Pipeline.Workspace)/.cache/huggingface
OLIVE_TEMPDIR: $(Pipeline.Workspace)/.olive_tempdir
PYTEST_BASETEMP: $(Pipeline.Workspace)/.pytest_basetemp
- job: ${{ parameters.name }}_Test_Examples
timeoutInMinutes: 300
pool:
name: ${{ parameters.pool }}
strategy:
matrix:
${{ insert }}: ${{ parameters.examples }}
variables:
PIP_CACHE_DIR: $(Pipeline.Workspace)/.cache/pip
HF_HOME: $(Pipeline.Workspace)/.cache/huggingface
OLIVE_TEMPDIR: $(Pipeline.Workspace)/.olive_tempdir
PYTEST_BASETEMP: $(Pipeline.Workspace)/.pytest_basetemp

steps:
- template: olive-setup-template.yaml
parameters:
python_version: ${{ parameters.python_version }}
onnxruntime: ${{ parameters.onnxruntime }}
torch: ${{ parameters.torch }}
steps:
- template: olive-setup-template.yaml
parameters:
python_version: ${{ parameters.python_version }}
onnxruntime: ${{ parameters.onnxruntime }}
onnxruntime_nightly: ${{ parameters.onnxruntime_nightly }}
torch: ${{ parameters.torch }}

# set exampleRequirements to requirements.txt if user does not specify
- script:
echo "##vso[task.setvariable variable=exampleRequirements]requirements.txt"
displayName: Set exampleRequirements
condition: eq(variables['exampleRequirements'], '')
# set exampleRequirements to requirements.txt if user does not specify
- script:
echo "##vso[task.setvariable variable=exampleRequirements]requirements.txt"
displayName: Set exampleRequirements
condition: eq(variables['exampleRequirements'], '')

- script: |
python -m pip install pytest
python -m pip install azure-identity azure-storage-blob tabulate
python -m pip install -r $(Build.SourcesDirectory)/examples/$(exampleFolder)/$(exampleRequirements)
python -m pytest -v -s --log-cli-level=WARNING --junitxml=$(Build.SourcesDirectory)/logs/test_examples-TestOlive.xml $(Build.SourcesDirectory)/examples/test/${{ parameters.subfolder }}/test_$(exampleName).py --basetemp $(PYTEST_BASETEMP)
displayName: Test Examples
env:
WORKSPACE_SUBSCRIPTION_ID: $(workspace-subscription-id)
WORKSPACE_RESOURCE_GROUP: $(workspace-resource-group)
WORKSPACE_NAME: $(workspace-name)
MANAGED_IDENTITY_CLIENT_ID: $(olive-1es-identity-client-id)
PIPELINE_TEST_ACCOUNT_NAME: $(pipeline-test-account-name)
PIPELINE_TEST_CONTAINER_NAME: $(pipeline-test-container-name)
KEYVAULT_NAME: $(keyvault-name)
HF_TOKEN: $(hf_token)
- script: |
python -m pip install pytest
python -m pip install azure-identity azure-storage-blob tabulate
python -m pip install -r $(Build.SourcesDirectory)/examples/$(exampleFolder)/$(exampleRequirements)
python -m pytest -v -s --log-cli-level=WARNING --junitxml=$(Build.SourcesDirectory)/logs/test_examples-TestOlive.xml $(Build.SourcesDirectory)/examples/test/${{ parameters.subfolder }}/test_$(exampleName).py --basetemp $(PYTEST_BASETEMP)
displayName: Test Examples
env:
WORKSPACE_SUBSCRIPTION_ID: $(workspace-subscription-id)
WORKSPACE_RESOURCE_GROUP: $(workspace-resource-group)
WORKSPACE_NAME: $(workspace-name)
MANAGED_IDENTITY_CLIENT_ID: $(olive-1es-identity-client-id)
PIPELINE_TEST_ACCOUNT_NAME: $(pipeline-test-account-name)
PIPELINE_TEST_CONTAINER_NAME: $(pipeline-test-container-name)
KEYVAULT_NAME: $(keyvault-name)
HF_TOKEN: $(hf_token)
- task: PublishTestResults@2
condition: succeededOrFailed()
inputs:
testResultsFiles: '**/*TestOlive*.xml'
testRunTitle: '$(Build.BuildNumber)[$(Agent.JobName)]'
failTaskOnFailedTests: true
displayName: Upload pipeline run test results
- task: PublishTestResults@2
condition: succeededOrFailed()
inputs:
testResultsFiles: '**/*TestOlive*.xml'
testRunTitle: '$(Build.BuildNumber)[$(Agent.JobName)]'
failTaskOnFailedTests: true
displayName: Upload pipeline run test results

- script: git clean -dfX
condition: always()
displayName: Clean remaining artifacts
- script: git clean -dfX
condition: always()
displayName: Clean remaining artifacts
8 changes: 4 additions & 4 deletions .azure_pipelines/job_templates/olive-setup-template.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
parameters:
python_version: '3.10'
onnxruntime: 'onnxruntime'
onnxruntime_nightly: false
torch: torch

steps:
Expand All @@ -15,11 +16,10 @@ steps:
- script: python -m pip install .
displayName: Install Olive

- ${{ if startsWith(parameters.onnxruntime, 'ort-nightly') }}:
- ${{ if eq(parameters.onnxruntime_nightly, true) }}:
- script: |
pip install onnxruntime
pip uninstall -y onnxruntime
pip install ${{ parameters.onnxruntime }} --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/
pip install -r https://raw.githubusercontent.com/microsoft/onnxruntime/refs/heads/main/requirements.txt
pip install ${{ parameters.onnxruntime }} --pre --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ --disable-pip-version-check
displayName: Install ${{ parameters.onnxruntime }}
- ${{ else }}:
- script: |
Expand Down
Loading

0 comments on commit b0896bb

Please sign in to comment.