docker: add some tweaks from Ken

mloubout · mloubout · commit 34cfdf4c0e4e · 2023-07-27T10:54:02.000-04:00
diff --git a/.github/workflows/docker-bases.yml b/.github/workflows/docker-bases.yml
@@ -162,47 +162,6 @@ jobs:
           build-args: 'arch=nvc-host'
           tags: 'devitocodes/bases:cpu-nvc'
 
-#######################################################
-################### Nvidia clang ######################
-#######################################################
-  deploy-nvidia-clang-base:
-    name: "nvidia-clang-base"
-    runs-on: ["self-hosted", "nvidiagpu"]
-    env:
-      DOCKER_BUILDKIT: "1"
-
-    steps:
-      - name: Checkout devito
-        uses: actions/checkout@v3
-
-      - name: Check event name
-        run: echo ${{ github.event_name }}
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v2
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
-
-      - name: Login to DockerHub
-        uses: docker/login-action@v2
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_PASSWORD }}
-
-      - name: cleanup
-        run: docker system prune -a -f
-
-      - name: Nvidia clang image
-        uses: docker/build-push-action@v3
-        with:
-          context: .
-          file: './docker/Dockerfile.nvidia'
-          push: true
-          target: 'clang'
-          build-args: 'arch=clang'
-          tags: 'devitocodes/bases:nvidia-clang'
-
 #######################################################
 ##################### AMD #############################
 #######################################################
@@ -252,4 +211,47 @@ jobs:
           target: 'hip'
           build-args: |
             arch=hip
-          tags: devitocodes/bases:amd-hip
+          tags: devitocodes/bases:amd-hip
+
+#######################################################
+################### Nvidia clang ######################
+# Currently disabled as nvc+openacc is the config of choice and this is time and resources consuming
+# Keeping it as legacy in case we want to reactivate it
+#######################################################
+# deploy-nvidia-clang-base:
+#   name: "nvidia-clang-base"
+#   runs-on: ["self-hosted", "nvidiagpu"]
+#   env:
+#     DOCKER_BUILDKIT: "1"
+
+#   steps:
+#     - name: Checkout devito
+#       uses: actions/checkout@v3
+
+#     - name: Check event name
+#       run: echo ${{ github.event_name }}
+
+#     - name: Set up QEMU
+#       uses: docker/setup-qemu-action@v2
+
+#     - name: Set up Docker Buildx
+#       uses: docker/setup-buildx-action@v2
+
+#     - name: Login to DockerHub
+#       uses: docker/login-action@v2
+#       with:
+#         username: ${{ secrets.DOCKER_USERNAME }}
+#         password: ${{ secrets.DOCKER_PASSWORD }}
+
+#     - name: cleanup
+#       run: docker system prune -a -f
+
+#     - name: Nvidia clang image
+#       uses: docker/build-push-action@v3
+#       with:
+#         context: .
+#         file: './docker/Dockerfile.nvidia'
+#         push: true
+#         target: 'clang'
+#         build-args: 'arch=clang'
+#         tags: 'devitocodes/bases:nvidia-clang'
diff --git a/.github/workflows/docker-devito.yml b/.github/workflows/docker-devito.yml
@@ -24,12 +24,6 @@ jobs:
             test: 'tests/test_gpu_openacc.py tests/test_gpu_common.py'
             runner: ["self-hosted", "nvidiagpu"]
 
-          - base: 'bases:nvidia-clang'
-            tag: 'nvidia-clang'
-            flag: '--gpus all'
-            test: 'tests/test_gpu_openmp.py tests/test_gpu_common.py'
-            runner: ["self-hosted", "nvidiagpu"]
-
           # Runtime gpu flags from https://hub.docker.com/r/rocm/tensorflow/
           - base: 'bases:amd'
             tag: 'amd'
diff --git a/.github/workflows/pytest-gpu.yml b/.github/workflows/pytest-gpu.yml
@@ -46,20 +46,12 @@ jobs:
 
       matrix:
         name: [
-          pytest-gpu-omp-nvidia,
           pytest-gpu-acc-nvidia,
           pytest-gpu-omp-amd
         ]
         test_examples: ["examples/seismic/tti/tti_example.py examples/seismic/acoustic/acoustic_example.py examples/seismic/viscoacoustic/viscoacoustic_example.py examples/seismic/viscoelastic/viscoelastic_example.py examples/seismic/elastic/elastic_example.py"]
 
         include:
-        - name: pytest-gpu-omp-nvidia
-          test_files: "tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openmp.py"
-          base: "devitocodes/bases:nvidia-clang"
-          tags: ["self-hosted", "nvidiagpu"]
-          test_drive_cmd: "nvidia-smi"
-          flags: '--gpus all --rm --name testrun-clang-nvidia'
-
         - name: pytest-gpu-acc-nvidia
           test_files: "tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openacc.py"
           base: "devitocodes/bases:nvidia-nvc"
diff --git a/docker/Dockerfile.nvidia b/docker/Dockerfile.nvidia
@@ -8,11 +8,16 @@ ARG arch="nvc"
 ########################################################################
 # Build base image with apt setup and common env
 ########################################################################
-FROM ${pyversion}-slim-bullseye as sdk-base
+FROM ubuntu:22.04 as sdk-base
 
 ENV DEBIAN_FRONTEND noninteractive
 
-RUN apt-get update -y && apt-get install -y -q gpg apt-utils curl wget vim libnuma-dev tmux numactl
+RUN apt-get update -y && \
+    apt-get install -y -q gpg apt-utils curl wget vim libnuma-dev tmux numactl
+
+#Install python
+RUN apt-get update && \
+    apt-get install -y dh-autoreconf python3-venv python3-dev python3-pip    
 
 # nodesource: nvdashboard requires nodejs>=10
 RUN curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | gpg --yes --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg
@@ -81,7 +86,7 @@ RUN export NVARCH=$(ls -1 /opt/nvidia/hpc_sdk/Linux_x86_64/ | grep '\.' | head -
     ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/comm_libs/${CUDA_V}/nccl /opt/nvhpc/comm_libs/nccl
 
 # Starting nvhpc 23.5 and cuda 12.1, hpcx and openmpi are inside the cuda version folder, only the bin is in the comm_libs path
-RUN export CUDA_V=$(ls /opt/nvhpc/${NVARCH}/cuda/ | grep '\.') && \
+RUN export CUDA_V=$(nvcc --version | sed -n 's/^.*release \([0-9]\+\.[0-9]\+\).*$/\1/p') && \
     ls /opt/nvhpc/comm_libs/${CUDA_V}/hpcx/ &&\
     if [ -d /opt/nvhpc/comm_libs/${CUDA_V}/hpcx ]; then \
         rm -rf /opt/nvhpc/comm_libs/hpcx && rm -rf /opt/nvhpc/comm_libs/openmpi4 && \
diff --git a/requirements-nvidia.txt b/requirements-nvidia.txt
@@ -1,4 +1,4 @@
-cupy-cuda110
+cupy-cuda12x
 dask-cuda
 jupyterlab>=3
 jupyterlab-nvdashboard

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-cupy-cuda110`
	`1`	`+cupy-cuda12x`
`2`	`2`	`dask-cuda`
`3`	`3`	`jupyterlab>=3`
`4`	`4`	`jupyterlab-nvdashboard`