First step on fixing the BTL API conversion for the SMCUDA BTL
Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
родитель
1f237f5fc9
Коммит
96e8cbe25f
@ -624,10 +624,10 @@ void mca_pml_ob1_recv_frag_callback_frag (mca_btl_base_module_t *btl,
|
|||||||
assert(btl->btl_flags & MCA_BTL_FLAGS_CUDA_COPY_ASYNC_RECV);
|
assert(btl->btl_flags & MCA_BTL_FLAGS_CUDA_COPY_ASYNC_RECV);
|
||||||
|
|
||||||
/* This will trigger the opal_convertor_pack to start asynchronous copy. */
|
/* This will trigger the opal_convertor_pack to start asynchronous copy. */
|
||||||
mca_pml_ob1_recv_request_frag_copy_start(recvreq,btl,segments,descriptor->des_segment_count,des);
|
mca_pml_ob1_recv_request_frag_copy_start(recvreq, btl, segments, descriptor->des_segment_count, NULL);
|
||||||
|
|
||||||
/* Let BTL know that it CANNOT free the frag */
|
/* Let BTL know that it CANNOT free the frag */
|
||||||
descriptor->des_flags |= MCA_BTL_DES_FLAGS_CUDA_COPY_ASYNC;
|
//TODO: GB: descriptor->des_flags |= MCA_BTL_DES_FLAGS_CUDA_COPY_ASYNC;
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -577,7 +577,7 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq
|
|||||||
*/
|
*/
|
||||||
void mca_pml_ob1_recv_request_frag_copy_start( mca_pml_ob1_recv_request_t* recvreq,
|
void mca_pml_ob1_recv_request_frag_copy_start( mca_pml_ob1_recv_request_t* recvreq,
|
||||||
mca_btl_base_module_t* btl,
|
mca_btl_base_module_t* btl,
|
||||||
mca_btl_base_segment_t* segments,
|
const mca_btl_base_segment_t* segments,
|
||||||
size_t num_segments,
|
size_t num_segments,
|
||||||
mca_btl_base_descriptor_t* des)
|
mca_btl_base_descriptor_t* des)
|
||||||
{
|
{
|
||||||
|
@ -332,7 +332,7 @@ void mca_pml_ob1_recv_request_progress_frag(
|
|||||||
void mca_pml_ob1_recv_request_frag_copy_start(
|
void mca_pml_ob1_recv_request_frag_copy_start(
|
||||||
mca_pml_ob1_recv_request_t* req,
|
mca_pml_ob1_recv_request_t* req,
|
||||||
struct mca_btl_base_module_t* btl,
|
struct mca_btl_base_module_t* btl,
|
||||||
mca_btl_base_segment_t* segments,
|
const mca_btl_base_segment_t* segments,
|
||||||
size_t num_segments,
|
size_t num_segments,
|
||||||
mca_btl_base_descriptor_t* des);
|
mca_btl_base_descriptor_t* des);
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
* Copyright (c) 2004-2011 The Trustees of Indiana University and Indiana
|
* Copyright (c) 2004-2011 The Trustees of Indiana University and Indiana
|
||||||
* University Research and Technology
|
* University Research and Technology
|
||||||
* Corporation. All rights reserved.
|
* Corporation. All rights reserved.
|
||||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
@ -679,20 +679,15 @@ static void mca_btl_smcuda_send_cuda_ipc_ack(struct mca_btl_base_module_t* btl,
|
|||||||
* BTL. It handles smcuda specific control messages that are triggered
|
* BTL. It handles smcuda specific control messages that are triggered
|
||||||
* when GPU memory transfers are initiated. */
|
* when GPU memory transfers are initiated. */
|
||||||
static void btl_smcuda_control(mca_btl_base_module_t* btl,
|
static void btl_smcuda_control(mca_btl_base_module_t* btl,
|
||||||
mca_btl_base_tag_t tag,
|
const mca_btl_base_receive_descriptor_t *descriptor)
|
||||||
mca_btl_base_descriptor_t* des, void* cbdata)
|
|
||||||
{
|
{
|
||||||
int mydevnum, ipcaccess, res;
|
int mydevnum, ipcaccess, res;
|
||||||
ctrlhdr_t ctrlhdr;
|
ctrlhdr_t ctrlhdr;
|
||||||
opal_proc_t *ep_proc;
|
opal_proc_t *ep_proc;
|
||||||
struct mca_btl_base_endpoint_t *endpoint;
|
|
||||||
mca_btl_smcuda_t *smcuda_btl = (mca_btl_smcuda_t *)btl;
|
mca_btl_smcuda_t *smcuda_btl = (mca_btl_smcuda_t *)btl;
|
||||||
mca_btl_smcuda_frag_t *frag = (mca_btl_smcuda_frag_t *)des;
|
const mca_btl_base_segment_t* segments = descriptor->des_segments;
|
||||||
mca_btl_base_segment_t* segments = des->des_segments;
|
struct mca_btl_base_endpoint_t *endpoint = descriptor->endpoint;
|
||||||
|
|
||||||
/* Use the rank of the peer that sent the data to get to the endpoint
|
|
||||||
* structure. This is needed for PML callback. */
|
|
||||||
endpoint = mca_btl_smcuda_component.sm_peers[frag->hdr->my_smp_rank];
|
|
||||||
ep_proc = endpoint->proc_opal;
|
ep_proc = endpoint->proc_opal;
|
||||||
|
|
||||||
/* Copy out control message payload to examine it */
|
/* Copy out control message payload to examine it */
|
||||||
@ -764,7 +759,6 @@ static void btl_smcuda_control(mca_btl_base_module_t* btl,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(endpoint->peer_smp_rank == frag->hdr->my_smp_rank);
|
|
||||||
opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output,
|
opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output,
|
||||||
"Analyzed CUDA IPC request: myrank=%d, mydev=%d, peerrank=%d, "
|
"Analyzed CUDA IPC request: myrank=%d, mydev=%d, peerrank=%d, "
|
||||||
"peerdev=%d --> ACCESS=%d",
|
"peerdev=%d --> ACCESS=%d",
|
||||||
@ -999,7 +993,6 @@ int mca_btl_smcuda_component_progress(void)
|
|||||||
/* local variables */
|
/* local variables */
|
||||||
mca_btl_base_segment_t seg;
|
mca_btl_base_segment_t seg;
|
||||||
mca_btl_smcuda_frag_t *frag;
|
mca_btl_smcuda_frag_t *frag;
|
||||||
mca_btl_smcuda_frag_t Frag;
|
|
||||||
sm_fifo_t *fifo = NULL;
|
sm_fifo_t *fifo = NULL;
|
||||||
mca_btl_smcuda_hdr_t *hdr;
|
mca_btl_smcuda_hdr_t *hdr;
|
||||||
int my_smp_rank = mca_btl_smcuda_component.my_smp_rank;
|
int my_smp_rank = mca_btl_smcuda_component.my_smp_rank;
|
||||||
@ -1046,7 +1039,6 @@ int mca_btl_smcuda_component_progress(void)
|
|||||||
switch(((uintptr_t)hdr) & MCA_BTL_SMCUDA_FRAG_TYPE_MASK) {
|
switch(((uintptr_t)hdr) & MCA_BTL_SMCUDA_FRAG_TYPE_MASK) {
|
||||||
case MCA_BTL_SMCUDA_FRAG_SEND:
|
case MCA_BTL_SMCUDA_FRAG_SEND:
|
||||||
{
|
{
|
||||||
mca_btl_active_message_callback_t* reg;
|
|
||||||
/* change the address from address relative to the shared
|
/* change the address from address relative to the shared
|
||||||
* memory address, to a true virtual address */
|
* memory address, to a true virtual address */
|
||||||
hdr = (mca_btl_smcuda_hdr_t *) RELATIVE2VIRTUAL(hdr);
|
hdr = (mca_btl_smcuda_hdr_t *) RELATIVE2VIRTUAL(hdr);
|
||||||
@ -1058,17 +1050,16 @@ int mca_btl_smcuda_component_progress(void)
|
|||||||
my_smp_rank, peer_smp_rank, j, FIFO_MAP(peer_smp_rank));
|
my_smp_rank, peer_smp_rank, j, FIFO_MAP(peer_smp_rank));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
/* recv upcall */
|
|
||||||
reg = mca_btl_base_active_message_trigger + hdr->tag;
|
|
||||||
seg.seg_addr.pval = ((char *)hdr) + sizeof(mca_btl_smcuda_hdr_t);
|
seg.seg_addr.pval = ((char *)hdr) + sizeof(mca_btl_smcuda_hdr_t);
|
||||||
seg.seg_len = hdr->len;
|
seg.seg_len = hdr->len;
|
||||||
Frag.base.des_segment_count = 1;
|
|
||||||
Frag.base.des_segments = &seg;
|
mca_btl_active_message_callback_t *reg = mca_btl_base_active_message_trigger + hdr->tag;
|
||||||
#if OPAL_CUDA_SUPPORT
|
mca_btl_base_receive_descriptor_t recv_desc = {.endpoint = mca_btl_smcuda_component.sm_peers[peer_smp_rank],
|
||||||
Frag.hdr = hdr; /* needed for peer rank in control messages */
|
.des_segments = &seg,
|
||||||
#endif /* OPAL_CUDA_SUPPORT */
|
.des_segment_count = 1,
|
||||||
reg->cbfunc(&mca_btl_smcuda.super, hdr->tag, &(Frag.base),
|
.tag = hdr->tag,
|
||||||
reg->cbdata);
|
.cbdata = reg->cbdata};
|
||||||
|
reg->cbfunc(&mca_btl_smcuda.super, &recv_desc);
|
||||||
/* return the fragment */
|
/* return the fragment */
|
||||||
MCA_BTL_SMCUDA_FIFO_WRITE(
|
MCA_BTL_SMCUDA_FIFO_WRITE(
|
||||||
mca_btl_smcuda_component.sm_peers[peer_smp_rank],
|
mca_btl_smcuda_component.sm_peers[peer_smp_rank],
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user