btl/openib: cuda: fix CUDA-aware support with async copy
This commit should resolve an issue seen with CUDA-aware support. The problem came in with BTL 3.0. Before 3.0 the size of the copy was stored in the incoming segment's des_remote_count field. This field does not exist in BTL 3.0 so I stored the value in the des_segment_count field. This caused problems with the cuda support code. To fix the issue the endpoint pointer is now stored in the in fragment's endpoint pointer which free's up the segment's des_cbdata pointer for storing the transfer size. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
d929137768
Коммит
3d32dbd793
@ -567,8 +567,8 @@ void mca_pml_ob1_recv_request_frag_copy_start( mca_pml_ob1_recv_request_t* recvr
|
||||
bytes_delivered );
|
||||
/* Store the receive request in unused context pointer. */
|
||||
des->des_context = (void *)recvreq;
|
||||
/* Store the amount of bytes in unused remote count value */
|
||||
des->des_segment_count = bytes_delivered;
|
||||
/* Store the amount of bytes in unused cbdata pointer */
|
||||
des->des_cbdata = (void *) (intptr_t) bytes_delivered;
|
||||
/* Then record an event that will get triggered by a PML progress call which
|
||||
* checks the stream events. If we get an error, abort. Should get message
|
||||
* from CUDA code about what went wrong. */
|
||||
@ -593,12 +593,12 @@ void mca_pml_ob1_recv_request_frag_copy_finished( mca_btl_base_module_t* btl,
|
||||
int status )
|
||||
{
|
||||
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)des->des_context;
|
||||
size_t bytes_received = des->des_segment_count;
|
||||
size_t bytes_received = (size_t) (intptr_t) des->des_cbdata;
|
||||
|
||||
OPAL_OUTPUT((-1, "frag_copy_finished (delivered=%d), frag=%p", (int)bytes_received, (void *)des));
|
||||
/* Call into the BTL so it can free the descriptor. At this point, it is
|
||||
* known that the data has been copied out of the descriptor. */
|
||||
des->des_cbfunc(NULL, (struct mca_btl_base_endpoint_t *)des->des_cbdata, des, 0);
|
||||
des->des_cbfunc(NULL, NULL, des, 0);
|
||||
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
|
||||
|
@ -3071,7 +3071,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
|
||||
* up callback for PML to call when complete, add argument into
|
||||
* descriptor and return. */
|
||||
des->des_cbfunc = btl_openib_handle_incoming_completion;
|
||||
des->des_cbdata = (void *)ep;
|
||||
to_in_frag(des)->endpoint = ep;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
@ -3180,6 +3180,8 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl,
|
||||
int rqp = to_base_frag(frag)->base.order, cqp;
|
||||
uint16_t rcredits = 0, credits;
|
||||
|
||||
ep = to_in_frag (des)->endpoint;
|
||||
|
||||
OPAL_OUTPUT((-1, "handle_incoming_complete frag=%p", (void *)des));
|
||||
|
||||
if(MCA_BTL_OPENIB_RDMA_FRAG(frag)) {
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user