From 3d32dbd7934f4016b646bdc6c78dfb9770852fe5 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 10 Mar 2015 14:34:38 -0600 Subject: [PATCH] btl/openib: cuda: fix CUDA-aware support with async copy This commit should resolve an issue seen with CUDA-aware support. The problem came in with BTL 3.0. Before 3.0 the size of the copy was stored in the incoming segment's des_remote_count field. This field does not exist in BTL 3.0 so I stored the value in the des_segment_count field. This caused problems with the cuda support code. To fix the issue the endpoint pointer is now stored in the in fragment's endpoint pointer which free's up the segment's des_cbdata pointer for storing the transfer size. Signed-off-by: Nathan Hjelm --- ompi/mca/pml/ob1/pml_ob1_recvreq.c | 8 ++++---- opal/mca/btl/openib/btl_openib_component.c | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 2cbe5e64ca..36693e00ab 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -567,8 +567,8 @@ void mca_pml_ob1_recv_request_frag_copy_start( mca_pml_ob1_recv_request_t* recvr bytes_delivered ); /* Store the receive request in unused context pointer. */ des->des_context = (void *)recvreq; - /* Store the amount of bytes in unused remote count value */ - des->des_segment_count = bytes_delivered; + /* Store the amount of bytes in unused cbdata pointer */ + des->des_cbdata = (void *) (intptr_t) bytes_delivered; /* Then record an event that will get triggered by a PML progress call which * checks the stream events. If we get an error, abort. Should get message * from CUDA code about what went wrong. */ @@ -593,12 +593,12 @@ void mca_pml_ob1_recv_request_frag_copy_finished( mca_btl_base_module_t* btl, int status ) { mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)des->des_context; - size_t bytes_received = des->des_segment_count; + size_t bytes_received = (size_t) (intptr_t) des->des_cbdata; OPAL_OUTPUT((-1, "frag_copy_finished (delivered=%d), frag=%p", (int)bytes_received, (void *)des)); /* Call into the BTL so it can free the descriptor. At this point, it is * known that the data has been copied out of the descriptor. */ - des->des_cbfunc(NULL, (struct mca_btl_base_endpoint_t *)des->des_cbdata, des, 0); + des->des_cbfunc(NULL, NULL, des, 0); OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index 1d3a50e68d..b5831fbe59 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -3071,7 +3071,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl, * up callback for PML to call when complete, add argument into * descriptor and return. */ des->des_cbfunc = btl_openib_handle_incoming_completion; - des->des_cbdata = (void *)ep; + to_in_frag(des)->endpoint = ep; return OPAL_SUCCESS; } #endif /* OPAL_CUDA_SUPPORT */ @@ -3180,6 +3180,8 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl, int rqp = to_base_frag(frag)->base.order, cqp; uint16_t rcredits = 0, credits; + ep = to_in_frag (des)->endpoint; + OPAL_OUTPUT((-1, "handle_incoming_complete frag=%p", (void *)des)); if(MCA_BTL_OPENIB_RDMA_FRAG(frag)) {