69
69
"nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | "
70
70
"nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | "
71
71
"nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
72
- "nvidia-nccl-cu12==2.21.5 ; platform_system == 'Linux' and platform_machine == 'x86_64' | "
72
+ "nvidia-nccl-cu12==2.25.1 ; platform_system == 'Linux' and platform_machine == 'x86_64' | "
73
73
"nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | "
74
74
"nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'"
75
75
),
84
84
"nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
85
85
"nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
86
86
"nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
87
- "nvidia-nccl-cu12==2.21.5 ; platform_system == 'Linux' and platform_machine == 'x86_64' | "
87
+ "nvidia-nccl-cu12==2.25.1 ; platform_system == 'Linux' and platform_machine == 'x86_64' | "
88
88
"nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
89
89
"nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | "
90
90
"nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'"
100
100
"nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | "
101
101
"nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | "
102
102
"nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
103
- "nvidia-nccl-cu12==2.21.5 ; platform_system == 'Linux' and platform_machine == 'x86_64' | "
103
+ "nvidia-nccl-cu12==2.25.1 ; platform_system == 'Linux' and platform_machine == 'x86_64' | "
104
104
"nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | "
105
105
"nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | "
106
106
"nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'"
117
117
}
118
118
119
119
120
- def get_nccl_submodule_version () -> str :
121
- from pathlib import Path
122
-
123
- nccl_version_mk = (
124
- Path (__file__ ).absolute ().parents [2 ]
125
- / "third_party"
126
- / "nccl"
127
- / "nccl"
128
- / "makefiles"
129
- / "version.mk"
130
- )
131
- if not nccl_version_mk .exists ():
132
- raise RuntimeError (
133
- "Please make sure that nccl submodule is checked out when importing this script"
134
- )
135
- with nccl_version_mk .open ("r" ) as f :
136
- content = f .read ()
137
- d = {}
138
- for l in content .split ("\n " ):
139
- if not l .startswith ("NCCL_" ):
140
- continue
141
- (k , v ) = l .split (":=" )
142
- d [k .strip ()] = v .strip ()
143
- return f"{ d ['NCCL_MAJOR' ]} .{ d ['NCCL_MINOR' ]} .{ d ['NCCL_PATCH' ]} "
144
-
145
-
146
120
def get_nccl_wheel_version (arch_version : str ) -> str :
147
121
import re
148
122
@@ -154,12 +128,26 @@ def get_nccl_wheel_version(arch_version: str) -> str:
154
128
]
155
129
156
130
131
+ def read_nccl_pin (arch_version : str ) -> str :
132
+ from pathlib import Path
133
+
134
+ nccl_pin_path = os .path .join (
135
+ Path (__file__ ).absolute ().parents [2 ],
136
+ ".ci" ,
137
+ "docker" ,
138
+ "ci_commit_pins" ,
139
+ f"nccl-cu{ arch_version [:2 ]} .txt" ,
140
+ )
141
+ with open (nccl_pin_path ) as f :
142
+ return f .read ().strip ()
143
+
144
+
157
145
def validate_nccl_dep_consistency (arch_version : str ) -> None :
146
+ nccl_release_tag = read_nccl_pin (arch_version )
158
147
wheel_ver = get_nccl_wheel_version (arch_version )
159
- submodule_ver = get_nccl_submodule_version ()
160
- if wheel_ver != submodule_ver :
148
+ if not nccl_release_tag .startswith (f"v{ wheel_ver } " ):
161
149
raise RuntimeError (
162
- f"NCCL submodule version { submodule_ver } differs from wheel version { wheel_ver } "
150
+ f"{ arch_version } NCCL release tag version { nccl_release_tag } does not correspond to wheel version { wheel_ver } "
163
151
)
164
152
165
153
@@ -356,7 +344,7 @@ def generate_wheels_matrix(
356
344
else arch_version
357
345
)
358
346
359
- # TODO: Enable python 3.13t cpu-s390x or MacOS or Windows
347
+ # TODO: Enable python 3.13t on cpu-s390x
360
348
if gpu_arch_type == "cpu-s390x" and python_version == "3.13t" :
361
349
continue
362
350
0 commit comments