Add runtime support to turn off CUDA IPC support.
This commit was SVN r29444.
Этот коммит содержится в:
родитель
9f83405c78
Коммит
0cd1e8dfd9
@ -921,7 +921,7 @@ int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl,
|
|||||||
|
|
||||||
#if OMPI_CUDA_SUPPORT
|
#if OMPI_CUDA_SUPPORT
|
||||||
/* Initiate setting up CUDA IPC support. */
|
/* Initiate setting up CUDA IPC support. */
|
||||||
if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate)) {
|
if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate) && mca_btl_smcuda_component.use_cuda_ipc) {
|
||||||
mca_btl_smcuda_send_cuda_ipc_request(btl, endpoint);
|
mca_btl_smcuda_send_cuda_ipc_request(btl, endpoint);
|
||||||
}
|
}
|
||||||
#endif /* OMPI_CUDA_SUPPORT */
|
#endif /* OMPI_CUDA_SUPPORT */
|
||||||
@ -1004,10 +1004,12 @@ int mca_btl_smcuda_send( struct mca_btl_base_module_t* btl,
|
|||||||
mca_btl_smcuda_component_progress();
|
mca_btl_smcuda_component_progress();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if OMPI_CUDA_SUPPORT
|
||||||
/* Initiate setting up CUDA IPC support */
|
/* Initiate setting up CUDA IPC support */
|
||||||
if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate)) {
|
if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate) && mca_btl_smcuda_component.use_cuda_ipc) {
|
||||||
mca_btl_smcuda_send_cuda_ipc_request(btl, endpoint);
|
mca_btl_smcuda_send_cuda_ipc_request(btl, endpoint);
|
||||||
}
|
}
|
||||||
|
#endif /* OMPI_CUDA_SUPPORT */
|
||||||
|
|
||||||
/* available header space */
|
/* available header space */
|
||||||
frag->hdr->len = frag->segment.base.seg_len;
|
frag->hdr->len = frag->segment.base.seg_len;
|
||||||
|
@ -13,7 +13,7 @@
|
|||||||
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2010 Los Alamos National Security, LLC.
|
* Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
* Copyright (c) 2012-2013 NVIDIA Corporation. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -205,6 +205,8 @@ struct mca_btl_smcuda_component_t {
|
|||||||
#if OMPI_CUDA_SUPPORT
|
#if OMPI_CUDA_SUPPORT
|
||||||
int cuda_ipc_verbose;
|
int cuda_ipc_verbose;
|
||||||
int cuda_ipc_output;
|
int cuda_ipc_output;
|
||||||
|
int use_cuda_ipc;
|
||||||
|
int use_cuda_ipc_same_gpu;
|
||||||
#endif /* OMPI_CUDA_SUPPORT */
|
#endif /* OMPI_CUDA_SUPPORT */
|
||||||
};
|
};
|
||||||
typedef struct mca_btl_smcuda_component_t mca_btl_smcuda_component_t;
|
typedef struct mca_btl_smcuda_component_t mca_btl_smcuda_component_t;
|
||||||
|
@ -170,6 +170,8 @@ static int smcuda_register(void)
|
|||||||
} else {
|
} else {
|
||||||
mca_btl_smcuda.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_LOW;
|
mca_btl_smcuda.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_LOW;
|
||||||
}
|
}
|
||||||
|
mca_btl_smcuda_param_register_int("use_cuda_ipc", 1, &mca_btl_smcuda_component.use_cuda_ipc);
|
||||||
|
mca_btl_smcuda_param_register_int("use_cuda_ipc_same_gpu", 1, &mca_btl_smcuda_component.use_cuda_ipc_same_gpu);
|
||||||
mca_btl_smcuda_param_register_int("cuda_ipc_verbose", 0, &mca_btl_smcuda_component.cuda_ipc_verbose);
|
mca_btl_smcuda_param_register_int("cuda_ipc_verbose", 0, &mca_btl_smcuda_component.cuda_ipc_verbose);
|
||||||
mca_btl_smcuda_component.cuda_ipc_output = opal_output_open(NULL);
|
mca_btl_smcuda_component.cuda_ipc_output = opal_output_open(NULL);
|
||||||
opal_output_set_verbosity(mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component.cuda_ipc_verbose);
|
opal_output_set_verbosity(mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component.cuda_ipc_verbose);
|
||||||
@ -734,12 +736,36 @@ static void btl_smcuda_control(mca_btl_base_module_t* btl,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check for IPC support between devices. If the CUDA API call fails, then
|
/* Check for IPC support between devices. If they are the
|
||||||
* just move endpoint into bad state. No need to send a reply. */
|
* same device and use_cuda_ipc_same_gpu is 1 (default),
|
||||||
res = mca_common_cuda_device_can_access_peer(&ipcaccess, mydevnum, ctrlhdr.cudev);
|
* then assume CUDA IPC is possible. This could be a
|
||||||
if (0 != res) {
|
* device running in DEFAULT mode or running under MPS.
|
||||||
endpoint->ipcstate = IPC_BAD;
|
* Otherwise, check peer acces to determine CUDA IPC
|
||||||
return;
|
* support. If the CUDA API call fails, then just move
|
||||||
|
* endpoint into bad state. No need to send a reply. */
|
||||||
|
if (mydevnum == ctrlhdr.cudev) {
|
||||||
|
if (mca_btl_smcuda_component.use_cuda_ipc_same_gpu) {
|
||||||
|
ipcaccess = 1;
|
||||||
|
} else {
|
||||||
|
opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output,
|
||||||
|
"Analyzed CUDA IPC request: myrank=%d, mydev=%d, peerrank=%d, "
|
||||||
|
"peerdev=%d --> Access is disabled by btl_smcuda_use_cuda_ipc_same_gpu",
|
||||||
|
endpoint->my_smp_rank, mydevnum, endpoint->peer_smp_rank,
|
||||||
|
ctrlhdr.cudev);
|
||||||
|
endpoint->ipcstate = IPC_BAD;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
res = mca_common_cuda_device_can_access_peer(&ipcaccess, mydevnum, ctrlhdr.cudev);
|
||||||
|
if (0 != res) {
|
||||||
|
opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output,
|
||||||
|
"Analyzed CUDA IPC request: myrank=%d, mydev=%d, peerrank=%d, "
|
||||||
|
"peerdev=%d --> Access is disabled because peer check failed with err=%d",
|
||||||
|
endpoint->my_smp_rank, mydevnum, endpoint->peer_smp_rank,
|
||||||
|
ctrlhdr.cudev, res);
|
||||||
|
endpoint->ipcstate = IPC_BAD;
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(endpoint->peer_smp_rank == frag->hdr->my_smp_rank);
|
assert(endpoint->peer_smp_rank == frag->hdr->my_smp_rank);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user