Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: aws/aws-ofi-nccl
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 7d8d380425baccc0cedf2fcf4df0cfd3a48a3528
Choose a base ref
..
head repository: aws/aws-ofi-nccl
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 31497a5bef6cc3f48f303e9f4547f5193e9cf8ca
Choose a head ref
Showing with 217 additions and 334 deletions.
  1. +1 −1 .ci/aws/Jenkinsfile
  2. +5 −12 include/nccl_ofi_rdma.h
  3. +211 −321 src/nccl_ofi_rdma.c
2 changes: 1 addition & 1 deletion .ci/aws/Jenkinsfile
Original file line number Diff line number Diff line change
@@ -172,7 +172,7 @@ pipeline {
def nccl_test_iter = "--test-aws-ofi-nccl-nccltest-iterations 5"
def efa_installer = "--use-prebuilt-ami-with-efa-installer true"

def persistent_manual_cluster_addl_args = " --keep-cluster --skip-fixture-setup --skip-health-checks --use-existing-installer --cleanup-pf-directory --enable-placement-group false"
def persistent_manual_cluster_addl_args = " --keep-cluster --skip-fixture-setup --skip-health-checks --use-existing-installer --cleanup-pf-directory --enable-placement-group false --lean-cluster-setup"
def container_addl_args = " --test-in-containers-on-ec2"

def base_args = "${efa_installer} ${nccl_version} ${timeout} ${cluster_type} ${test_target} ${test_type} ${build_type} ${pr_num} ${nccl_test_iter} ${persistent_manual_cluster_addl_args}"
17 changes: 5 additions & 12 deletions include/nccl_ofi_rdma.h
Original file line number Diff line number Diff line change
@@ -124,17 +124,13 @@ typedef uint16_t nccl_ofi_rdma_msg_type_t;
* allocate a RDMA memory registration handle with `num_rails`+`num_control_rails` rails.
*/
typedef struct nccl_net_ofi_rdma_mr_handle {

int num_rails;

int num_control_rails;
/* value of mr key id, if keys must be requested */
int mr_key;

/* Array of size `num_rails' */
struct fid_mr **mr;

/* Array of size `num_control_rails' */
struct fid_mr **control_mr;

} nccl_net_ofi_rdma_mr_handle_t;

/* Contents of ctrl message sent from receiver to sender to advertise
@@ -599,9 +595,6 @@ typedef struct nccl_net_ofi_rdma_recv_comm {
/* Comm ID provided by remote endpoint */
uint32_t remote_comm_id;

/* The flush buffer */
nccl_net_ofi_rdma_flush_buffer_t flush_buff;

uint16_t next_msg_seq_num;

nccl_ofi_msgbuff_t *msgbuff;
@@ -688,9 +681,6 @@ struct nccl_net_ofi_ep_rail {
/* Completion Queue handle */
struct fid_cq *cq;

/* Access domain handles */
struct fid_domain *domain;

/*
* Rx buffer management
*/
@@ -860,6 +850,9 @@ typedef struct nccl_net_ofi_rdma_domain {
int num_rails;
nccl_net_ofi_rdma_domain_rail_t *domain_rails;

/* The flush buffer */
nccl_net_ofi_rdma_flush_buffer_t flush_buff;

/* List of endpoints and set of addresses they have connections to */
nccl_ofi_ep_addr_list_t *ep_addr_list;
} nccl_net_ofi_rdma_domain_t;
Loading