Skip to content

Commit f2abb55

Browse files
Merge multiple Dockerfiles into a single one (#2167)
Merge multiple Dockerfiles into a single one --------- Signed-off-by: Simon Zhao <[email protected]> Co-authored-by: Miguel Fierro <[email protected]>
1 parent 12bc1e4 commit f2abb55

File tree

7 files changed

+360
-350
lines changed

7 files changed

+360
-350
lines changed

.devcontainer/devcontainer.json

+29-25
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,18 @@
11
{
22
"name": "Recommenders",
3-
// Version list: https://github.com/devcontainers/images/tree/main/src/base-ubuntu
4-
// Includes: curl, wget, ca-certificates, git, Oh My Zsh!,
5-
"image": "mcr.microsoft.com/devcontainers/base:ubuntu-24.04",
6-
"hostRequirements": {
7-
"cpus": 4,
8-
"memory": "16gb",
9-
"storage": "32gb"
10-
},
11-
"features": {
12-
// https://github.com/devcontainers/features/blob/main/src/anaconda/devcontainer-feature.json
13-
"ghcr.io/devcontainers/features/anaconda:1": {
14-
"version": "2024.06-1"
3+
"build": {
4+
"dockerfile": "../tools/docker/Dockerfile",
5+
"context": "..",
6+
"target": "deps",
7+
"args": {
8+
"COMPUTE": "cpu",
9+
"PYTHON_VERSION": "3.11"
1510
}
1611
},
1712
"customizations": {
1813
"vscode": {
19-
// Set *default* container specific settings.json values on container create.
14+
// Set default container specific settings.json values on container
15+
// create
2016
"settings": {
2117
"[python]": {
2218
"editor.defaultFormatter": "ms-python.black-formatter",
@@ -27,24 +23,32 @@
2723
},
2824
"isort.args": ["--profile", "black"],
2925
"python.analysis.autoImportCompletions": true,
30-
"python.defaultInterpreterPath": "/usr/local/conda/envs/Recommenders/bin/python",
26+
// Conda env name *must* align with the one in Dockerfle
27+
"python.defaultInterpreterPath": "/root/conda/envs/Recommenders/bin/python",
3128
"python.testing.pytestEnabled": true,
32-
// set the directory where all tests are
29+
// Test directory
3330
"python.testing.pytestArgs": ["tests"]
3431
},
35-
// Add the IDs of extensions you want installed when the container is created.
32+
// VS Code extensions to install on container create
3633
"extensions": [
37-
"ms-python.black-formatter", // https://marketplace.visualstudio.com/items?itemName=ms-python.black-formatter
38-
"ms-python.isort", // https://marketplace.visualstudio.com/items?itemName=ms-python.isort
39-
"ms-python.mypy-type-checker", // https://marketplace.visualstudio.com/items?itemName=ms-python.mypy-type-checker
40-
"ms-python.pylint", // https://marketplace.visualstudio.com/items?itemName=ms-python.pylint
41-
"ms-python.python", // https://marketplace.visualstudio.com/items?itemName=ms-python.python
42-
"ms-toolsai.datawrangler", // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.datawrangler
43-
"ms-toolsai.jupyter" // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter
34+
// https://marketplace.visualstudio.com/items?itemName=ms-python.black-formatter
35+
"ms-python.black-formatter",
36+
// https://marketplace.visualstudio.com/items?itemName=ms-python.isort
37+
"ms-python.isort",
38+
// https://marketplace.visualstudio.com/items?itemName=ms-python.mypy-type-checker
39+
"ms-python.mypy-type-checker",
40+
// https://marketplace.visualstudio.com/items?itemName=ms-python.pylint
41+
"ms-python.pylint",
42+
// https://marketplace.visualstudio.com/items?itemName=ms-python.python
43+
"ms-python.python",
44+
// https://marketplace.visualstudio.com/items?itemName=ms-toolsai.datawrangler
45+
"ms-toolsai.datawrangler",
46+
// https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter
47+
"ms-toolsai.jupyter"
4448
]
4549
}
4650
},
4751

48-
// Use 'postCreateCommand' to run commands after the container is created.
49-
"postCreateCommand": "conda create -n Recommenders -c conda-forge -y python=3.10 openjdk=21 pip && conda init bash && bash -c -i 'conda activate Recommenders && pip install -e .[dev,spark]' && conda config --set auto_activate_base false"
52+
// Install Recommenders in development mode after container create
53+
"postCreateCommand": "bash -i -c 'conda activate Recommenders && conda install -c conda-forge -y openjdk=21 && pip install -e .[dev,spark]'"
5054
}

SETUP.md

+85
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,91 @@ git checkout staging
145145
pip install -e .[all]
146146
```
147147

148+
We also provide a [devcontainer.json](./.devcontainer/devcontainer.json)
149+
and [Dockerfile](./tools/docker/Dockerfile) for developers to
150+
facilitate the development on
151+
[Dev Containers with VS Code](https://code.visualstudio.com/docs/devcontainers/containers)
152+
and [GitHub Codespaces](https://github.com/features/codespaces).
153+
154+
<details>
155+
<summary><strong><em>VS Code Dev Containers</em></strong></summary>
156+
157+
The typical scenario using Docker containers for development is as
158+
follows. Say, we want to develop applications for a specific
159+
environment, so
160+
1. we create a contaienr with the dependencies required,
161+
1. and mount the folder containing the code to the container,
162+
1. then code parsing, debugging and testing are all performed against
163+
the container.
164+
This workflow seperates the development environment from your local
165+
environment, so that your local environment won't be affected. The
166+
container used here for this end is called Dev Container in the
167+
VS Code Dev Containers extension. And the extension eases this
168+
development workflow with Docker containers automatically without
169+
pain.
170+
171+
To use VS Code Dev Containers, your local machine must have the
172+
following applicatioins installed:
173+
* [Docker](https://docs.docker.com/get-started/get-docker/)
174+
* [VS Code Remote Development Extension Pack](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.vscode-remote-extensionpack)
175+
176+
Then
177+
* When you open your local Recommenders folder in VS Code, it will
178+
detect [devcontainer.json](./.devcontainer/devcontainer.json), and
179+
prompt you to **Reopen in Container**. If you'd like to reopen,
180+
it will create a container with the required environment described
181+
in `devcontainer.json`, install a VS Code server in the container,
182+
and mount the folder into the container.
183+
+ If you don't see the prompt, you can use the command
184+
**Dev Containers: Reopen in Container**
185+
* If you don't have a local clone of Recommenders, you can also use
186+
the command **Dev Containers: Clone Repository in Container Volume**,
187+
and type in a branch/PR URL of Recommenders you'd like to develop
188+
on, such as https://github.com/recommenders-team/recommenders,
189+
https://github.com/recommenders-team/recommenders/tree/staging, or
190+
https://github.com/recommenders-team/recommenders/pull/2098. VS
191+
Code will create a container with the environment described in
192+
`devcontainer.json`, and clone the specified branch of Recommenders
193+
into the container.
194+
195+
Once everything is set up, VS Code will act as a client to the server
196+
in the container, and all subsequent operations on VS Code will be
197+
performed against the container.
198+
199+
</details>
200+
201+
<details>
202+
<summary><strong><em>GitHub Codespaces</em></strong></summary>
203+
204+
GitHub Codespaces also uses `devcontainer.json` and Dockerfile in the
205+
repo to create the environment on a VM for you to develop on the Web
206+
VS Code. To use the GitHub Codespaces on Recommenders, you can go to
207+
[Recommenders](https://github.com/recommenders-team/recommenders)
208+
$\to$ switch to the branch of interest $\to$ Code $\to$ Codespaces
209+
$\to$ Create codespaces on the branch.
210+
211+
</details>
212+
213+
<details>
214+
<summary><strong><em>devcontainer.json & Dockerfile</em></strong></summary>
215+
216+
[devcontainer.json](./.devcontainer/devcontainer.json) describes:
217+
* the Dockerfile to use with configurable build arguments, such as
218+
`COMPUTE` and `PYTHON_VERSION`.
219+
* settings on VS Code server, such as Python interpreter path in the
220+
container, Python formatter.
221+
* extensions on VS Code server, such as black-formatter, pylint.
222+
* how to create the Conda environment for Recommenders in
223+
`postCreateCommand`
224+
225+
[Dockerfile](./tools/docker/Dockerfile) is used in 3 places:
226+
* Dev containers on VS Code and GitHub Codespaces
227+
* [Testing workflows on AzureML](./tests/README.md)
228+
* [Jupyter notebook examples on Docker](./tools/docker/README.md)
229+
230+
</details>
231+
232+
148233
## Test Environments
149234

150235
Depending on the type of recommender system and the notebook that needs to be run, there are different computational requirements.

tests/README.md

+20-3
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,26 @@ GitHub workflows `azureml-unit-tests.yml`, `azureml-cpu-nightly.yml`, `azureml-g
6363

6464
There are three scripts used with each workflow, all of them are located in [ci/azureml_tests](./ci/azureml_tests/):
6565

66-
* `submit_groupwise_azureml_pytest.py`: this script uses parameters in the workflow yml to set up the AzureML environment for testing using the AzureML SDK.
67-
* `run_groupwise_pytest.py`: this script uses pytest to run the tests of the libraries and notebooks. This script runs in an AzureML workspace with the environment created by the script above.
68-
* `test_groups.py`: this script defines the groups of tests. If the tests are part of the unit tests, the total compute time of each group should be less than 15min. If the tests are part of the nightly builds, the total time of each group should be less than 35min.
66+
* [`submit_groupwise_azureml_pytest.py`](./ci/azureml_tests/submit_groupwise_azureml_pytest.py):
67+
this script uses parameters in the workflow yml to set up the
68+
AzureML environment for testing using the AzureML SDK.
69+
* [`run_groupwise_pytest.py`](./ci/azureml_tests/run_groupwise_pytest.pyy):
70+
this script uses pytest to run the tests of the libraries and
71+
notebooks. This script runs in an AzureML workspace with the
72+
environment created by the script above.
73+
* [`aml_utils.py`](./ci/azureml_tests/aml_utils.py): this script
74+
defines several utility functions using
75+
[the AzureML Python SDK v2](https://learn.microsoft.com/en-us/azure/machine-learning/concept-v2?view=azureml-api-2).
76+
These functions are used by the scripts above to set up the compute and
77+
the environment for the tests on AzureML. For example, the
78+
environment with all dependencies of Recommenders is created by the
79+
function `get_or_create_environment` via the [Dockerfile](../tools/docker/Dockerfile).
80+
More details on Docker support can be found at [tools/docker/README.md](../tools/docker/README.md).
81+
* [`test_groups.py`](./ci/azureml_tests/test_groups.py): this script
82+
defines the groups of tests. If the tests are part of the unit
83+
tests, the total compute time of each group should be less than
84+
15min. If the tests are part of the nightly builds, the total time
85+
of each group should be less than 35min.
6986

7087
## How to contribute tests to the repository
7188

tests/ci/azureml_tests/aml_utils.py

+34-76
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,15 @@
88
* https://learn.microsoft.com/en-us/azure/machine-learning/reference-migrate-sdk-v1-mlflow-tracking?view=azureml-api-2&tabs=aml%2Ccli%2Cmlflow
99
"""
1010
import pathlib
11-
import tempfile
11+
import re
1212

1313
from azure.ai.ml import MLClient, command
1414
from azure.ai.ml.entities import AmlCompute, BuildContext, Environment, Workspace
1515
from azure.ai.ml.exceptions import JobException
1616
from azure.core.exceptions import ResourceExistsError
1717
from azure.identity import DefaultAzureCredential
1818

19+
1920
def get_client(subscription_id, resource_group, workspace_name):
2021
"""
2122
Get the client with specified AzureML workspace, or create one if not existing.
@@ -61,9 +62,8 @@ def get_or_create_environment(
6162
environment_name,
6263
use_gpu,
6364
use_spark,
64-
conda_pkg_jdk,
65+
conda_openjdk_version,
6566
python_version,
66-
commit_sha,
6767
):
6868
"""
6969
AzureML requires the run environment to be setup prior to submission.
@@ -77,81 +77,39 @@ def get_or_create_environment(
7777
added to the conda environment, else False
7878
use_spark (bool): True if PySpark packages should be
7979
added to the conda environment, else False
80-
conda_pkg_jdk (str): "openjdk=8" by default
81-
python_version (str): python version, such as "3.9"
82-
commit_sha (str): the commit that triggers the workflow
80+
conda_openjdk_version (str): "21" by default
81+
python_version (str): python version, such as "3.11"
8382
"""
84-
conda_env_name = "reco"
85-
conda_env_yml = "environment.yml"
86-
condafile = fr"""
87-
name: {conda_env_name}
88-
channels:
89-
- conda-forge
90-
dependencies:
91-
- python={python_version}
92-
- {conda_pkg_jdk}
93-
- pip
94-
- pip:
95-
- recommenders[dev{",gpu" if use_gpu else ""}{",spark" if use_spark else ""}]@git+https://github.com/recommenders-team/recommenders.git@{commit_sha}
96-
"""
97-
# See https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04
98-
image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04"
99-
# See https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04
100-
dockerfile = fr"""# syntax=docker/dockerfile:1
101-
FROM nvcr.io/nvidia/cuda:12.5.1-devel-ubuntu22.04
102-
SHELL ["/bin/bash", "-c"]
103-
USER root:root
104-
ENV NVIDIA_VISIBLE_DEVICES all
105-
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
106-
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
107-
ENV DEBIAN_FRONTEND noninteractive
108-
RUN apt-get update && \
109-
apt-get install -y wget git-all && \
110-
apt-get clean -y && \
111-
rm -rf /var/lib/apt/lists/*
112-
113-
# Install Conda
114-
ENV CONDA_PREFIX /opt/miniconda
115-
RUN wget -qO /tmp/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py311_24.5.0-0-Linux-x86_64.sh && \
116-
bash /tmp/miniconda.sh -bf -p ${{CONDA_PREFIX}} && \
117-
${{CONDA_PREFIX}}/bin/conda update --all -c conda-forge -y && \
118-
${{CONDA_PREFIX}}/bin/conda clean -ay && \
119-
rm -rf ${{CONDA_PREFIX}}/pkgs && \
120-
rm /tmp/miniconda.sh && \
121-
find / -type d -name __pycache__ | xargs rm -rf
122-
123-
# Create Conda environment
124-
COPY {conda_env_yml} /tmp/{conda_env_yml}
125-
RUN ${{CONDA_PREFIX}}/bin/conda env create -f /tmp/{conda_env_yml}
126-
127-
# Activate Conda environment
128-
ENV CONDA_DEFAULT_ENV {conda_env_name}
129-
ENV CONDA_PREFIX ${{CONDA_PREFIX}}/envs/${{CONDA_DEFAULT_ENV}}
130-
ENV PATH="${{CONDA_PREFIX}}/bin:${{PATH}}" LD_LIBRARY_PATH="${{CONDA_PREFIX}}/lib:$LD_LIBRARY_PATH"
131-
"""
132-
133-
with tempfile.TemporaryDirectory() as tmpdir:
134-
tmpdir = pathlib.Path(tmpdir)
135-
dockerfile_path = tmpdir / "Dockerfile"
136-
condafile_path = tmpdir / conda_env_yml
137-
build = BuildContext(path=tmpdir, dockerfile_path=dockerfile_path.name)
138-
139-
with open(dockerfile_path, "w") as file:
140-
file.write(dockerfile)
141-
with open(condafile_path, "w") as file:
142-
file.write(condafile)
143-
144-
try:
145-
client.environments.create_or_update(
146-
Environment(
147-
name=environment_name,
148-
image=None if use_gpu else image,
149-
build=build if use_gpu else None,
150-
conda_file=None if use_gpu else condafile_path,
151-
)
83+
compute = "gpu" if use_gpu else "cpu"
84+
extras = (
85+
"[dev" + (",gpu" if use_gpu else "") + (",spark" if use_spark else "") + "]"
86+
)
87+
dockerfile = pathlib.Path("tools/docker/Dockerfile")
88+
89+
# Docker's --build-args is not supported by AzureML Python SDK v2 as shown
90+
# in [the issue #33902](https://github.com/Azure/azure-sdk-for-python/issues/33902)
91+
# so the build args are configured by regex substituion
92+
text = dockerfile.read_text()
93+
text = re.sub(r"(ARG\sCOMPUTE=).*", rf'\1"{compute}"', text)
94+
text = re.sub(r"(ARG\sEXTRAS=).*", rf'\1"{extras}"', text)
95+
text = re.sub(r"(ARG\sGIT_REF=).*", r'\1""', text)
96+
text = re.sub(r"(ARG\sJDK_VERSION=).*", rf'\1"{conda_openjdk_version}"', text)
97+
text = re.sub(r"(ARG\sPYTHON_VERSION=).*", rf'\1"{python_version}"', text)
98+
dockerfile.write_text(text)
99+
100+
try:
101+
client.environments.create_or_update(
102+
Environment(
103+
name=environment_name,
104+
build=BuildContext(
105+
# Set path for Docker to access to Recommenders root
106+
path=".",
107+
dockerfile_path=dockerfile,
108+
),
152109
)
153-
except ResourceExistsError:
154-
pass
110+
)
111+
except ResourceExistsError:
112+
pass
155113

156114

157115
def run_tests(

0 commit comments

Comments
 (0)