Add runtime support to turn off CUDA IPC support.
This commit was SVN r29444.
Этот коммит содержится в:
родитель
9f83405c78
Коммит
0cd1e8dfd9
@ -921,7 +921,7 @@ int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl,
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
/* Initiate setting up CUDA IPC support. */
|
||||
if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate)) {
|
||||
if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate) && mca_btl_smcuda_component.use_cuda_ipc) {
|
||||
mca_btl_smcuda_send_cuda_ipc_request(btl, endpoint);
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
@ -1004,10 +1004,12 @@ int mca_btl_smcuda_send( struct mca_btl_base_module_t* btl,
|
||||
mca_btl_smcuda_component_progress();
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
/* Initiate setting up CUDA IPC support */
|
||||
if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate)) {
|
||||
if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate) && mca_btl_smcuda_component.use_cuda_ipc) {
|
||||
mca_btl_smcuda_send_cuda_ipc_request(btl, endpoint);
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
|
||||
/* available header space */
|
||||
frag->hdr->len = frag->segment.base.seg_len;
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2012-2013 NVIDIA Corporation. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -205,6 +205,8 @@ struct mca_btl_smcuda_component_t {
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
int cuda_ipc_verbose;
|
||||
int cuda_ipc_output;
|
||||
int use_cuda_ipc;
|
||||
int use_cuda_ipc_same_gpu;
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
};
|
||||
typedef struct mca_btl_smcuda_component_t mca_btl_smcuda_component_t;
|
||||
|
@ -170,6 +170,8 @@ static int smcuda_register(void)
|
||||
} else {
|
||||
mca_btl_smcuda.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_LOW;
|
||||
}
|
||||
mca_btl_smcuda_param_register_int("use_cuda_ipc", 1, &mca_btl_smcuda_component.use_cuda_ipc);
|
||||
mca_btl_smcuda_param_register_int("use_cuda_ipc_same_gpu", 1, &mca_btl_smcuda_component.use_cuda_ipc_same_gpu);
|
||||
mca_btl_smcuda_param_register_int("cuda_ipc_verbose", 0, &mca_btl_smcuda_component.cuda_ipc_verbose);
|
||||
mca_btl_smcuda_component.cuda_ipc_output = opal_output_open(NULL);
|
||||
opal_output_set_verbosity(mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component.cuda_ipc_verbose);
|
||||
@ -734,12 +736,36 @@ static void btl_smcuda_control(mca_btl_base_module_t* btl,
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check for IPC support between devices. If the CUDA API call fails, then
|
||||
* just move endpoint into bad state. No need to send a reply. */
|
||||
res = mca_common_cuda_device_can_access_peer(&ipcaccess, mydevnum, ctrlhdr.cudev);
|
||||
if (0 != res) {
|
||||
endpoint->ipcstate = IPC_BAD;
|
||||
return;
|
||||
/* Check for IPC support between devices. If they are the
|
||||
* same device and use_cuda_ipc_same_gpu is 1 (default),
|
||||
* then assume CUDA IPC is possible. This could be a
|
||||
* device running in DEFAULT mode or running under MPS.
|
||||
* Otherwise, check peer acces to determine CUDA IPC
|
||||
* support. If the CUDA API call fails, then just move
|
||||
* endpoint into bad state. No need to send a reply. */
|
||||
if (mydevnum == ctrlhdr.cudev) {
|
||||
if (mca_btl_smcuda_component.use_cuda_ipc_same_gpu) {
|
||||
ipcaccess = 1;
|
||||
} else {
|
||||
opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output,
|
||||
"Analyzed CUDA IPC request: myrank=%d, mydev=%d, peerrank=%d, "
|
||||
"peerdev=%d --> Access is disabled by btl_smcuda_use_cuda_ipc_same_gpu",
|
||||
endpoint->my_smp_rank, mydevnum, endpoint->peer_smp_rank,
|
||||
ctrlhdr.cudev);
|
||||
endpoint->ipcstate = IPC_BAD;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
res = mca_common_cuda_device_can_access_peer(&ipcaccess, mydevnum, ctrlhdr.cudev);
|
||||
if (0 != res) {
|
||||
opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output,
|
||||
"Analyzed CUDA IPC request: myrank=%d, mydev=%d, peerrank=%d, "
|
||||
"peerdev=%d --> Access is disabled because peer check failed with err=%d",
|
||||
endpoint->my_smp_rank, mydevnum, endpoint->peer_smp_rank,
|
||||
ctrlhdr.cudev, res);
|
||||
endpoint->ipcstate = IPC_BAD;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
assert(endpoint->peer_smp_rank == frag->hdr->my_smp_rank);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user