3
3
#ifdef USE_C10D_NCCL
4
4
5
5
#include < sched.h>
6
- #include < stdio.h >
7
- #include < stdlib.h >
6
+ #include < cstdio >
7
+ #include < cstdlib >
8
8
9
9
#include < memory>
10
10
#include < mutex>
11
- #include < thread>
12
11
13
12
#include < ATen/ATen.h>
14
13
#include < ATen/cuda/CUDAEvent.h>
@@ -265,7 +264,7 @@ class TORCH_API DebugInfoWriter {
265
264
}
266
265
267
266
protected:
268
- DebugInfoWriter (std::string namePrefix, int rank) {
267
+ DebugInfoWriter (const std::string& namePrefix, int rank) {
269
268
filename_ = c10::str (namePrefix, rank);
270
269
}
271
270
std::string filename_;
@@ -278,14 +277,9 @@ class TORCH_API DebugInfoWriter {
278
277
// RAII wrapper for NCCL communicator
279
278
class NCCLComm {
280
279
public:
281
- explicit NCCLComm (ncclComm_t ncclComm)
282
- : aborted_(false ),
283
- ncclAsyncErr_(ncclSuccess),
284
- commFailureReason_(std::nullopt),
285
- initialized_(false ),
286
- ncclComm_(ncclComm) {}
280
+ explicit NCCLComm (ncclComm_t ncclComm) : ncclComm_(ncclComm) {}
287
281
288
- NCCLComm () : NCCLComm( nullptr ) {}
282
+ NCCLComm () = default ;
289
283
290
284
~NCCLComm () noexcept {
291
285
// Add lock in this destructor, as aborted_ needs to be read after memory
@@ -379,6 +373,7 @@ class NCCLComm {
379
373
NCCLComm& operator =(NCCLComm&& other) = delete ;
380
374
381
375
// Move constructable
376
+ // NOLINTNEXTLINE(.*-noexcept-move-.*)
382
377
NCCLComm (NCCLComm&& other) {
383
378
// Using other's lock, as it reads other's states
384
379
// Can not use this.mutex_, as this object is being constructed.
@@ -488,7 +483,7 @@ class NCCLComm {
488
483
" has already been registered on ncclComm_ " ,
489
484
ncclComm_);
490
485
491
- void * handle;
486
+ void * handle = nullptr ;
492
487
// Use getNcclComm to make sure comm is ready before calling nccl APIs
493
488
auto comm = getNcclComm ();
494
489
C10D_NCCL_CHECK (
@@ -544,16 +539,16 @@ class NCCLComm {
544
539
545
540
protected:
546
541
// Unique nccl_id for this communicator.
547
- ncclUniqueId ncclId_;
548
- bool aborted_;
542
+ ncclUniqueId ncclId_{} ;
543
+ bool aborted_{ false } ;
549
544
uint64_t ncclCommSplitCounter_{0 };
550
- ncclResult_t ncclAsyncErr_;
545
+ ncclResult_t ncclAsyncErr_{ncclSuccess} ;
551
546
mutable std::mutex mutex_;
552
547
// Rank that this communicator corresponds to.
553
- int rank_;
548
+ int rank_{} ;
554
549
// Optional reason for communicator failure, provided by ProcessGroupNCCL for
555
550
// better error messaging.
556
- std::optional<std::string> commFailureReason_;
551
+ std::optional<std::string> commFailureReason_{} ;
557
552
bool initialized_{false };
558
553
#ifdef NCCL_HAS_COMM_REGISTER
559
554
// Stores handlers for tensors registered by NCCL
@@ -572,7 +567,7 @@ struct ncclRedOpRAII {
572
567
: op_(op), comm_(comm), premul_sum_(true ) {}
573
568
ncclRedOpRAII (const ncclRedOpRAII&) = delete ;
574
569
ncclRedOpRAII& operator =(const ncclRedOpRAII&) = delete ;
575
- ncclRedOpRAII (ncclRedOpRAII&& tmp) : ncclRedOpRAII() {
570
+ ncclRedOpRAII (ncclRedOpRAII&& tmp) noexcept : ncclRedOpRAII() {
576
571
std::swap (tmp.op_ , this ->op_ );
577
572
std::swap (tmp.comm_ , this ->comm_ );
578
573
std::swap (tmp.premul_sum_ , this ->premul_sum_ );
@@ -587,8 +582,8 @@ struct ncclRedOpRAII {
587
582
operator ncclRedOp_t () const {
588
583
return op_;
589
584
}
590
- ncclRedOp_t op_;
591
- ncclComm_t comm_;
585
+ ncclRedOp_t op_{} ;
586
+ ncclComm_t comm_{} ;
592
587
bool premul_sum_ = false ;
593
588
};
594
589
0 commit comments