Merge pull request #687 from rolfv/pr/fix-smcuda-perfprob
Add the ability use different size buffers for host and CUDA buffers
Этот коммит содержится в:
Коммит
77367ca02c
@ -965,9 +965,17 @@ cannot_pack:
|
||||
|
||||
/* makes sure that we don't exceed BTL max send size */
|
||||
if(bml_btl->btl->btl_max_send_size != 0) {
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
size_t max_send_size;
|
||||
if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) && (bml_btl->btl->btl_cuda_max_send_size != 0)) {
|
||||
max_send_size = bml_btl->btl->btl_cuda_max_send_size - sizeof(mca_pml_ob1_frag_hdr_t);
|
||||
} else {
|
||||
max_send_size = bml_btl->btl->btl_max_send_size - sizeof(mca_pml_ob1_frag_hdr_t);
|
||||
}
|
||||
#else /* OPAL_CUDA_SUPPORT */
|
||||
size_t max_send_size = bml_btl->btl->btl_max_send_size -
|
||||
sizeof(mca_pml_ob1_frag_hdr_t);
|
||||
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
if (size > max_send_size) {
|
||||
size = max_send_size;
|
||||
}
|
||||
|
@ -135,6 +135,14 @@ int mca_btl_base_param_register(mca_base_component_t *version,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&module->btl_cuda_rdma_limit);
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
module->btl_cuda_max_send_size = 0;
|
||||
(void) mca_base_component_var_register(version, "cuda_max_send_size", "Maximum size (in bytes) of a single GPU \"phase 2\" fragment of a long message when using the pipeline protocol (must be >= 1) (only valid on smcuda btl)",
|
||||
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_4,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&module->btl_cuda_max_send_size);
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
(void) mca_base_component_var_register(version, "max_send_size", "Maximum size (in bytes) of a single \"phase 2\" fragment of a long message when using the pipeline protocol (must be >= 1)",
|
||||
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
|
||||
|
@ -1170,6 +1170,9 @@ struct mca_btl_base_module_t {
|
||||
size_t btl_cuda_eager_limit; /**< switch from eager to RDMA */
|
||||
size_t btl_cuda_rdma_limit; /**< switch from RDMA to rndv pipeline */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
size_t btl_cuda_max_send_size; /**< set if CUDA max send_size is different from host max send size */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
};
|
||||
typedef struct mca_btl_base_module_t mca_btl_base_module_t;
|
||||
|
||||
|
@ -815,6 +815,11 @@ int btl_openib_verify_mca_params (void)
|
||||
}
|
||||
}
|
||||
#endif /* Workaround */
|
||||
if (0 != mca_btl_openib_module.super.btl_cuda_max_send_size) {
|
||||
opal_show_help("help-mpi-btl-openib.txt", "do_not_set_openib_value",
|
||||
true, opal_process_info.nodename);
|
||||
mca_btl_openib_module.super.btl_cuda_max_send_size = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if BTL_OPENIB_MALLOC_HOOKS_ENABLED
|
||||
|
@ -700,3 +700,9 @@ with CUDA GPU Direct RDMA. Either disable GPU Direct RDMA support or
|
||||
enable "leave pinned" support. Deactivating the openib BTL.
|
||||
|
||||
Local host: %s
|
||||
#
|
||||
[do_not_set_openib_value]
|
||||
Open MPI has detected that you have attempted to set the btl_openib_cuda_max_send_size
|
||||
value. This is not supported. Setting back to default value of 0.
|
||||
|
||||
Local host: %s
|
||||
|
@ -173,7 +173,7 @@ static int smcuda_register(void)
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
mca_btl_smcuda.super.btl_eager_limit = 4*1024;
|
||||
mca_btl_smcuda.super.btl_rndv_eager_limit = 4*1024;
|
||||
mca_btl_smcuda.super.btl_max_send_size = 128*1024;
|
||||
mca_btl_smcuda.super.btl_max_send_size = 32*1024;
|
||||
mca_btl_smcuda.super.btl_rdma_pipeline_send_length = 64*1024;
|
||||
mca_btl_smcuda.super.btl_rdma_pipeline_frag_size = 64*1024;
|
||||
mca_btl_smcuda.super.btl_min_rdma_pipeline_size = 64*1024;
|
||||
@ -185,7 +185,12 @@ static int smcuda_register(void)
|
||||
/* Call the BTL based to register its MCA params */
|
||||
mca_btl_base_param_register(&mca_btl_smcuda_component.super.btl_version,
|
||||
&mca_btl_smcuda.super);
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
/* If user has not set the value, then set to the defalt */
|
||||
if (0 == mca_btl_smcuda.super.btl_cuda_max_send_size) {
|
||||
mca_btl_smcuda.super.btl_cuda_max_send_size = 128*1024;
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
return mca_btl_smcuda_component_verify();
|
||||
}
|
||||
|
||||
@ -214,6 +219,17 @@ static int mca_btl_smcuda_component_open(void)
|
||||
mca_btl_smcuda_component.max_frag_size = mca_btl_smcuda.super.btl_max_send_size;
|
||||
mca_btl_smcuda_component.eager_limit = mca_btl_smcuda.super.btl_eager_limit;
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
/* Possibly adjust max_frag_size if the cuda size is bigger */
|
||||
if (mca_btl_smcuda.super.btl_cuda_max_send_size > mca_btl_smcuda.super.btl_max_send_size) {
|
||||
mca_btl_smcuda_component.max_frag_size = mca_btl_smcuda.super.btl_cuda_max_send_size;
|
||||
}
|
||||
opal_output_verbose(10, opal_btl_base_framework.framework_output,
|
||||
"btl: smcuda: cuda_max_send_size=%d, max_send_size=%d, max_frag_size=%d",
|
||||
(int)mca_btl_smcuda.super.btl_cuda_max_send_size, (int)mca_btl_smcuda.super.btl_max_send_size,
|
||||
(int)mca_btl_smcuda_component.max_frag_size);
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
/* initialize objects */
|
||||
OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_frags_eager, opal_free_list_t);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user