42
42
description : Artifact name in current run w/ manifest/patches. Leaving empty uses manifest/patches in current branch
43
43
default : ' '
44
44
required : false
45
+ JAX_LIBNCCL_PACKAGE :
46
+ type : string
47
+ description : NCCL lib package version to be installed (in the format `2.19.3-1+cuda12.3`)
48
+ default : ' '
49
+ required : false
45
50
outputs :
46
51
DOCKER_TAG :
47
52
description : " Tag of the image built"
@@ -56,8 +61,44 @@ permissions:
56
61
packages : write # to upload container
57
62
58
63
jobs :
64
+ nccl-version :
65
+ runs-on : ubuntu-22.04
66
+ outputs :
67
+ JAX_NCCL_VERSION : ${{ steps.get-nccl-version.outputs.JAX_NCCL_VERSION }}
68
+ JAX_LIBNCCL_PACKAGE : ${{ steps.get-nccl-version.outputs.JAX_LIBNCCL_PACKAGE }}
69
+ steps :
70
+ - name : Print environment variables
71
+ run : env
72
+
73
+ - name : Check out the repository under ${GITHUB_WORKSPACE}
74
+ uses : actions/checkout@v4
75
+
76
+ - name : Get NCCL version
77
+ id : get-nccl-version
78
+ shell : bash -x -e {0}
79
+ run : |
80
+ JAX_LIBNCCL_PACKAGE=${{ inputs.JAX_LIBNCCL_PACKAGE }}
81
+ if [[ -z $JAX_LIBNCCL_PACKAGE ]]; then
82
+ BASE_IMAGE=${{ inputs.BASE_IMAGE }}
83
+ if [[ $BASE_IMAGE == latest ]]; then
84
+ BASE_IMAGE=$(cat .github/container/Dockerfile.base | sed -n "s/^ARG BASE_IMAGE=\(.*\)$/\1/p")
85
+ fi
86
+ # try to get NCCL version from provided BASE_IMAGE of x86-arch
87
+ if [[ -z "$BASE_IMAGE" ]]; then
88
+ echo "Need to pass non-empty BASE_IMAGE variable"
89
+ exit 1
90
+ fi
91
+ source .github/workflows/scripts/get_remote_env.sh
92
+ JAX_LIBNCCL_PACKAGE=$(get_remote_env ${BASE_IMAGE} linux amd64 | jq -r '.[]' | egrep '^NV_LIBNCCL_PACKAGE')
93
+ JAX_NCCL_VERSION=$(get_remote_env ${BASE_IMAGE} linux amd64 | jq -r '.[]' | egrep '^NCCL_VERSION=' | cut -d= -f2-)
94
+ else
95
+ JAX_NCCL_VERSION=$(echo $JAX_LIBNCCL_PACKAGE | cut -d= -f2 | cut -d+ -f1)
96
+ fi
97
+ echo "JAX_NCCL_VERSION=$JAX_NCCL_VERSION" >> $GITHUB_OUTPUT
98
+ echo "JAX_LIBNCCL_PACKAGE=$JAX_LIBNCCL_PACKAGE" >> $GITHUB_OUTPUT
59
99
60
100
build-base :
101
+ needs : nccl-version
61
102
runs-on : [self-hosted, "${{ inputs.ARCHITECTURE }}", small]
62
103
env :
63
104
BADGE_FILENAME_FULL : ${{ inputs.BADGE_FILENAME }}-${{ inputs.ARCHITECTURE }}.json
@@ -133,7 +174,9 @@ jobs:
133
174
GIT_USER_EMAIL=${{ inputs.GIT_USER_EMAIL }}
134
175
BUILD_DATE=${{ inputs.BUILD_DATE }}
135
176
${{ inputs.BASE_IMAGE != 'latest' && format('BASE_IMAGE={0}', inputs.BASE_IMAGE) || '' }}
136
-
177
+ JAX_NCCL_VERSION=${{ needs.nccl-version.outputs.JAX_NCCL_VERSION }}
178
+ JAX_LIBNCCL_PACKAGE=${{ needs.nccl-version.outputs.JAX_LIBNCCL_PACKAGE }}
179
+
137
180
- name : Generate sitrep
138
181
if : " !cancelled()"
139
182
shell : bash -x -e {0}
0 commit comments