1
1

btl/openib: cuda: fix CUDA-aware support with async copy

This commit should resolve an issue seen with CUDA-aware support. The
problem came in with BTL 3.0. Before 3.0 the size of the copy was
stored in the incoming segment's des_remote_count field. This field
does not exist in BTL 3.0 so I stored the value in the
des_segment_count field. This caused problems with the cuda support
code. To fix the issue the endpoint pointer is now stored in the in
fragment's endpoint pointer which free's up the segment's des_cbdata
pointer for storing the transfer size.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2015-03-10 14:34:38 -06:00
родитель d929137768
Коммит 3d32dbd793
2 изменённых файлов: 7 добавлений и 5 удалений

Просмотреть файл

@ -567,8 +567,8 @@ void mca_pml_ob1_recv_request_frag_copy_start( mca_pml_ob1_recv_request_t* recvr
bytes_delivered );
/* Store the receive request in unused context pointer. */
des->des_context = (void *)recvreq;
/* Store the amount of bytes in unused remote count value */
des->des_segment_count = bytes_delivered;
/* Store the amount of bytes in unused cbdata pointer */
des->des_cbdata = (void *) (intptr_t) bytes_delivered;
/* Then record an event that will get triggered by a PML progress call which
* checks the stream events. If we get an error, abort. Should get message
* from CUDA code about what went wrong. */
@ -593,12 +593,12 @@ void mca_pml_ob1_recv_request_frag_copy_finished( mca_btl_base_module_t* btl,
int status )
{
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)des->des_context;
size_t bytes_received = des->des_segment_count;
size_t bytes_received = (size_t) (intptr_t) des->des_cbdata;
OPAL_OUTPUT((-1, "frag_copy_finished (delivered=%d), frag=%p", (int)bytes_received, (void *)des));
/* Call into the BTL so it can free the descriptor. At this point, it is
* known that the data has been copied out of the descriptor. */
des->des_cbfunc(NULL, (struct mca_btl_base_endpoint_t *)des->des_cbdata, des, 0);
des->des_cbfunc(NULL, NULL, des, 0);
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);

Просмотреть файл

@ -3071,7 +3071,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
* up callback for PML to call when complete, add argument into
* descriptor and return. */
des->des_cbfunc = btl_openib_handle_incoming_completion;
des->des_cbdata = (void *)ep;
to_in_frag(des)->endpoint = ep;
return OPAL_SUCCESS;
}
#endif /* OPAL_CUDA_SUPPORT */
@ -3180,6 +3180,8 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl,
int rqp = to_base_frag(frag)->base.order, cqp;
uint16_t rcredits = 0, credits;
ep = to_in_frag (des)->endpoint;
OPAL_OUTPUT((-1, "handle_incoming_complete frag=%p", (void *)des));
if(MCA_BTL_OPENIB_RDMA_FRAG(frag)) {