From 96e8cbe25f22e0dc45bd14a6b5d929085e847a57 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 13 Jul 2020 14:40:40 -0400 Subject: [PATCH] First step on fixing the BTL API conversion for the SMCUDA BTL Signed-off-by: George Bosilca --- ompi/mca/pml/ob1/pml_ob1_recvfrag.c | 4 +-- ompi/mca/pml/ob1/pml_ob1_recvreq.c | 2 +- ompi/mca/pml/ob1/pml_ob1_recvreq.h | 2 +- opal/mca/btl/smcuda/btl_smcuda_component.c | 33 ++++++++-------------- 4 files changed, 16 insertions(+), 25 deletions(-) diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index 0ead3e15bf..1e5c19d4ac 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -624,10 +624,10 @@ void mca_pml_ob1_recv_frag_callback_frag (mca_btl_base_module_t *btl, assert(btl->btl_flags & MCA_BTL_FLAGS_CUDA_COPY_ASYNC_RECV); /* This will trigger the opal_convertor_pack to start asynchronous copy. */ - mca_pml_ob1_recv_request_frag_copy_start(recvreq,btl,segments,descriptor->des_segment_count,des); + mca_pml_ob1_recv_request_frag_copy_start(recvreq, btl, segments, descriptor->des_segment_count, NULL); /* Let BTL know that it CANNOT free the frag */ - descriptor->des_flags |= MCA_BTL_DES_FLAGS_CUDA_COPY_ASYNC; + //TODO: GB: descriptor->des_flags |= MCA_BTL_DES_FLAGS_CUDA_COPY_ASYNC; return; } diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index dc05e5c4ee..6bef26a387 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -577,7 +577,7 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq */ void mca_pml_ob1_recv_request_frag_copy_start( mca_pml_ob1_recv_request_t* recvreq, mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, + const mca_btl_base_segment_t* segments, size_t num_segments, mca_btl_base_descriptor_t* des) { diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.h b/ompi/mca/pml/ob1/pml_ob1_recvreq.h index bcac6ac4d2..1d5123deeb 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.h @@ -332,7 +332,7 @@ void mca_pml_ob1_recv_request_progress_frag( void mca_pml_ob1_recv_request_frag_copy_start( mca_pml_ob1_recv_request_t* req, struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, + const mca_btl_base_segment_t* segments, size_t num_segments, mca_btl_base_descriptor_t* des); diff --git a/opal/mca/btl/smcuda/btl_smcuda_component.c b/opal/mca/btl/smcuda/btl_smcuda_component.c index df07df06f1..fba4d3044a 100644 --- a/opal/mca/btl/smcuda/btl_smcuda_component.c +++ b/opal/mca/btl/smcuda/btl_smcuda_component.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2011 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -679,20 +679,15 @@ static void mca_btl_smcuda_send_cuda_ipc_ack(struct mca_btl_base_module_t* btl, * BTL. It handles smcuda specific control messages that are triggered * when GPU memory transfers are initiated. */ static void btl_smcuda_control(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, void* cbdata) + const mca_btl_base_receive_descriptor_t *descriptor) { int mydevnum, ipcaccess, res; ctrlhdr_t ctrlhdr; opal_proc_t *ep_proc; - struct mca_btl_base_endpoint_t *endpoint; mca_btl_smcuda_t *smcuda_btl = (mca_btl_smcuda_t *)btl; - mca_btl_smcuda_frag_t *frag = (mca_btl_smcuda_frag_t *)des; - mca_btl_base_segment_t* segments = des->des_segments; + const mca_btl_base_segment_t* segments = descriptor->des_segments; + struct mca_btl_base_endpoint_t *endpoint = descriptor->endpoint; - /* Use the rank of the peer that sent the data to get to the endpoint - * structure. This is needed for PML callback. */ - endpoint = mca_btl_smcuda_component.sm_peers[frag->hdr->my_smp_rank]; ep_proc = endpoint->proc_opal; /* Copy out control message payload to examine it */ @@ -764,7 +759,6 @@ static void btl_smcuda_control(mca_btl_base_module_t* btl, } } - assert(endpoint->peer_smp_rank == frag->hdr->my_smp_rank); opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output, "Analyzed CUDA IPC request: myrank=%d, mydev=%d, peerrank=%d, " "peerdev=%d --> ACCESS=%d", @@ -999,7 +993,6 @@ int mca_btl_smcuda_component_progress(void) /* local variables */ mca_btl_base_segment_t seg; mca_btl_smcuda_frag_t *frag; - mca_btl_smcuda_frag_t Frag; sm_fifo_t *fifo = NULL; mca_btl_smcuda_hdr_t *hdr; int my_smp_rank = mca_btl_smcuda_component.my_smp_rank; @@ -1046,7 +1039,6 @@ int mca_btl_smcuda_component_progress(void) switch(((uintptr_t)hdr) & MCA_BTL_SMCUDA_FRAG_TYPE_MASK) { case MCA_BTL_SMCUDA_FRAG_SEND: { - mca_btl_active_message_callback_t* reg; /* change the address from address relative to the shared * memory address, to a true virtual address */ hdr = (mca_btl_smcuda_hdr_t *) RELATIVE2VIRTUAL(hdr); @@ -1058,17 +1050,16 @@ int mca_btl_smcuda_component_progress(void) my_smp_rank, peer_smp_rank, j, FIFO_MAP(peer_smp_rank)); } #endif - /* recv upcall */ - reg = mca_btl_base_active_message_trigger + hdr->tag; seg.seg_addr.pval = ((char *)hdr) + sizeof(mca_btl_smcuda_hdr_t); seg.seg_len = hdr->len; - Frag.base.des_segment_count = 1; - Frag.base.des_segments = &seg; -#if OPAL_CUDA_SUPPORT - Frag.hdr = hdr; /* needed for peer rank in control messages */ -#endif /* OPAL_CUDA_SUPPORT */ - reg->cbfunc(&mca_btl_smcuda.super, hdr->tag, &(Frag.base), - reg->cbdata); + + mca_btl_active_message_callback_t *reg = mca_btl_base_active_message_trigger + hdr->tag; + mca_btl_base_receive_descriptor_t recv_desc = {.endpoint = mca_btl_smcuda_component.sm_peers[peer_smp_rank], + .des_segments = &seg, + .des_segment_count = 1, + .tag = hdr->tag, + .cbdata = reg->cbdata}; + reg->cbfunc(&mca_btl_smcuda.super, &recv_desc); /* return the fragment */ MCA_BTL_SMCUDA_FIFO_WRITE( mca_btl_smcuda_component.sm_peers[peer_smp_rank],