37
37
"""
38
38
import argparse
39
39
import logging
40
- import glob
41
40
42
41
from azureml .core .authentication import AzureCliAuthentication
43
42
from azureml .core import Workspace
@@ -146,7 +145,6 @@ def setup_persistent_compute_target(workspace, cluster_name, vm_size, max_nodes)
146
145
147
146
def create_run_config (
148
147
cpu_cluster ,
149
- docker_proc_type ,
150
148
add_gpu_dependencies ,
151
149
add_spark_dependencies ,
152
150
conda_pkg_jdk ,
@@ -165,7 +163,6 @@ def create_run_config(
165
163
the following:
166
164
- Reco_cpu_test
167
165
- Reco_gpu_test
168
- docker_proc_type (str) : processor type, cpu or gpu
169
166
add_gpu_dependencies (bool) : True if gpu packages should be
170
167
added to the conda environment, else False
171
168
add_spark_dependencies (bool) : True if PySpark packages should be
@@ -179,7 +176,39 @@ def create_run_config(
179
176
run_azuremlcompute = RunConfiguration ()
180
177
run_azuremlcompute .target = cpu_cluster
181
178
run_azuremlcompute .environment .docker .enabled = True
182
- run_azuremlcompute .environment .docker .base_image = docker_proc_type
179
+ if not add_gpu_dependencies :
180
+ # https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04
181
+ run_azuremlcompute .environment .docker .base_image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04"
182
+ else :
183
+ run_azuremlcompute .environment .docker .base_image = None
184
+ # Use the latest CUDA
185
+ # See
186
+ # * https://learn.microsoft.com/en-us/azure/machine-learning/how-to-train-with-custom-image?view=azureml-api-1#use-a-custom-dockerfile-optional
187
+ # * https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04
188
+ run_azuremlcompute .environment .docker .base_dockerfile = r"""
189
+ FROM nvcr.io/nvidia/cuda:12.3.1-devel-ubuntu22.04
190
+ USER root:root
191
+ ENV NVIDIA_VISIBLE_DEVICES all
192
+ ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
193
+ ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
194
+ ENV DEBIAN_FRONTEND noninteractive
195
+ RUN apt-get update && \
196
+ apt-get install -y wget git-all && \
197
+ apt-get clean -y && \
198
+ rm -rf /var/lib/apt/lists/*
199
+ # Conda Environment
200
+ ENV MINICONDA_VERSION py38_23.3.1-0
201
+ ENV PATH /opt/miniconda/bin:$PATH
202
+ ENV CONDA_PACKAGE 23.5.0
203
+ RUN wget -qO /tmp/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh && \
204
+ bash /tmp/miniconda.sh -bf -p /opt/miniconda && \
205
+ conda install conda=${CONDA_PACKAGE} -y && \
206
+ conda update --all -c conda-forge -y && \
207
+ conda clean -ay && \
208
+ rm -rf /opt/miniconda/pkgs && \
209
+ rm /tmp/miniconda.sh && \
210
+ find / -type d -name __pycache__ | xargs rm -rf
211
+ """
183
212
184
213
# Use conda_dependencies.yml to create a conda environment in
185
214
# the Docker image for execution
@@ -195,6 +224,7 @@ def create_run_config(
195
224
196
225
# install recommenders
197
226
reco_extras = "dev"
227
+ conda_dep .add_conda_package ("anaconda::git" )
198
228
if add_gpu_dependencies and add_spark_dependencies :
199
229
conda_dep .add_channel ("conda-forge" )
200
230
conda_dep .add_conda_package (conda_pkg_jdk )
@@ -326,13 +356,6 @@ def create_arg_parser():
326
356
default = "STANDARD_D3_V2" ,
327
357
help = "Set the size of the VM either STANDARD_D3_V2" ,
328
358
)
329
- # cpu or gpu
330
- parser .add_argument (
331
- "--dockerproc" ,
332
- action = "store" ,
333
- default = "cpu" ,
334
- help = "Base image used in docker container" ,
335
- )
336
359
# Azure subscription id, when used in a pipeline, it is stored in keyvault
337
360
parser .add_argument (
338
361
"--subid" , action = "store" , default = "123456" , help = "Azure Subscription ID"
@@ -421,16 +444,6 @@ def create_arg_parser():
421
444
422
445
logger = logging .getLogger ("submit_groupwise_azureml_pytest.py" )
423
446
args = create_arg_parser ()
424
-
425
- if args .dockerproc == "cpu" :
426
- from azureml .core .runconfig import DEFAULT_CPU_IMAGE
427
-
428
- docker_proc_type = DEFAULT_CPU_IMAGE
429
- else :
430
- from azureml .core .runconfig import DEFAULT_GPU_IMAGE
431
-
432
- docker_proc_type = DEFAULT_GPU_IMAGE
433
-
434
447
cli_auth = AzureCliAuthentication ()
435
448
436
449
workspace = setup_workspace (
@@ -450,7 +463,6 @@ def create_arg_parser():
450
463
451
464
run_config = create_run_config (
452
465
cpu_cluster = cpu_cluster ,
453
- docker_proc_type = docker_proc_type ,
454
466
add_gpu_dependencies = args .add_gpu_dependencies ,
455
467
add_spark_dependencies = args .add_spark_dependencies ,
456
468
conda_pkg_jdk = args .conda_pkg_jdk ,
0 commit comments