21
21
#include < random>
22
22
#include < resource_managers/resource_manager_core.hpp>
23
23
#include < utils.hpp>
24
+
25
+ #ifndef DISABLE_CUDF
24
26
#pragma GCC diagnostic push
25
27
#pragma GCC diagnostic ignored "-Wunused-variable"
26
28
#include < rmm/mr/device/cuda_memory_resource.hpp>
27
29
#include < rmm/mr/device/per_device_resource.hpp>
28
30
#include < rmm/mr/device/pool_memory_resource.hpp>
29
31
#pragma GCC diagnostic pop
32
+ #endif
30
33
31
34
namespace HugeCTR {
32
35
@@ -98,27 +101,6 @@ void ResourceManagerCore::enable_all_peer_accesses() {
98
101
}
99
102
}
100
103
101
- void ResourceManagerCore::initialize_rmm_resources () {
102
- const size_t pool_alloc_size = 256 * 1024 * 1024 ;
103
- using dmmr = rmm::mr::device_memory_resource;
104
- static const char * allow_set_char = getenv (" HCTR_RMM_SETTABLE" );
105
- bool allow_set = true ;
106
- if (allow_set_char && allow_set_char[0 ] == ' 0' ) {
107
- allow_set = false ;
108
- }
109
- CudaDeviceContext context;
110
- auto local_gpu_device_id_list = get_local_gpu_device_id_list ();
111
- for (size_t i = 0 ; i < local_gpu_device_id_list.size (); i++) {
112
- context.set_device (local_gpu_device_id_list[i]);
113
- base_cuda_mr_.emplace_back (std::make_shared<rmm::mr::cuda_memory_resource>());
114
- memory_resource_.emplace_back (std::make_shared<rmm::mr::pool_memory_resource<dmmr>>(
115
- base_cuda_mr_.back ().get (), pool_alloc_size));
116
- if (allow_set) {
117
- original_device_resource_.push_back (
118
- rmm::mr::set_current_device_resource (memory_resource_.back ().get ()));
119
- }
120
- }
121
- }
122
104
ResourceManagerCore::ResourceManagerCore (int num_process, int process_id, DeviceMap&& device_map,
123
105
unsigned long long seed)
124
106
: num_process_(num_process), process_id_(process_id), device_map_(std::move(device_map)) {
@@ -204,13 +186,17 @@ ResourceManagerCore::ResourceManagerCore(int num_process, int process_id, Device
204
186
205
187
all2all_warmup ();
206
188
189
+ #ifndef DISABLE_CUDF
207
190
initialize_rmm_resources ();
191
+ #endif
208
192
// int dev_id = 0;
209
193
// cudaGetDevice(&dev_id);
210
194
// HCTR_LOG(INFO, WORLD, "ResourceManagerCore ctor getCurrentDeviceId after rmm_init %d\n",
211
195
// dev_id);
212
196
}
197
+
213
198
ResourceManagerCore::~ResourceManagerCore () {
199
+ #ifndef DISABLE_CUDF
214
200
if (original_device_resource_.empty ()) {
215
201
return ;
216
202
}
@@ -220,6 +206,7 @@ ResourceManagerCore::~ResourceManagerCore() {
220
206
context.set_device (local_gpu_device_id_list[i]);
221
207
rmm::mr::set_current_device_resource (original_device_resource_[i]);
222
208
}
209
+ #endif
223
210
}
224
211
bool ResourceManagerCore::p2p_enabled (int src_device_id, int dst_device_id) const {
225
212
return p2p_matrix_[src_device_id][dst_device_id];
@@ -240,12 +227,35 @@ bool ResourceManagerCore::all_p2p_enabled() const {
240
227
return true ;
241
228
}
242
229
230
+ #ifndef DISABLE_CUDF
231
+ void ResourceManagerCore::initialize_rmm_resources () {
232
+ const size_t pool_alloc_size = 256 * 1024 * 1024 ;
233
+ using dmmr = rmm::mr::device_memory_resource;
234
+ static const char * allow_set_char = getenv (" HCTR_RMM_SETTABLE" );
235
+ bool allow_set = true ;
236
+ if (allow_set_char && allow_set_char[0 ] == ' 0' ) {
237
+ allow_set = false ;
238
+ }
239
+ CudaDeviceContext context;
240
+ auto local_gpu_device_id_list = get_local_gpu_device_id_list ();
241
+ for (size_t i = 0 ; i < local_gpu_device_id_list.size (); i++) {
242
+ context.set_device (local_gpu_device_id_list[i]);
243
+ base_cuda_mr_.emplace_back (std::make_shared<rmm::mr::cuda_memory_resource>());
244
+ memory_resource_.emplace_back (std::make_shared<rmm::mr::pool_memory_resource<dmmr>>(
245
+ base_cuda_mr_.back ().get (), pool_alloc_size));
246
+ if (allow_set) {
247
+ original_device_resource_.push_back (
248
+ rmm::mr::set_current_device_resource (memory_resource_.back ().get ()));
249
+ }
250
+ }
251
+ }
252
+
243
253
const std::shared_ptr<rmm::mr::device_memory_resource>&
244
254
ResourceManagerCore::get_device_rmm_device_memory_resource (int local_gpu_id) const {
245
255
auto dev_list = get_local_gpu_device_id_list ();
246
256
auto it = std::find (dev_list.begin (), dev_list.end (), local_gpu_id);
247
257
auto index = std::distance (dev_list.begin (), it);
248
258
return memory_resource_[index ];
249
259
}
250
-
260
+ # endif
251
261
} // namespace HugeCTR
0 commit comments