1
1

Merge pull request #687 from rolfv/pr/fix-smcuda-perfprob

Add the ability use different size buffers for host and CUDA buffers
Этот коммит содержится в:
bosilca 2015-07-02 18:42:41 -04:00
родитель 4e7d979f8d 30a872b478
Коммит 77367ca02c
6 изменённых файлов: 49 добавлений и 3 удалений

Просмотреть файл

@ -965,9 +965,17 @@ cannot_pack:
/* makes sure that we don't exceed BTL max send size */
if(bml_btl->btl->btl_max_send_size != 0) {
#if OPAL_CUDA_SUPPORT
size_t max_send_size;
if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) && (bml_btl->btl->btl_cuda_max_send_size != 0)) {
max_send_size = bml_btl->btl->btl_cuda_max_send_size - sizeof(mca_pml_ob1_frag_hdr_t);
} else {
max_send_size = bml_btl->btl->btl_max_send_size - sizeof(mca_pml_ob1_frag_hdr_t);
}
#else /* OPAL_CUDA_SUPPORT */
size_t max_send_size = bml_btl->btl->btl_max_send_size -
sizeof(mca_pml_ob1_frag_hdr_t);
#endif /* OPAL_CUDA_SUPPORT */
if (size > max_send_size) {
size = max_send_size;
}

Просмотреть файл

@ -135,6 +135,14 @@ int mca_btl_base_param_register(mca_base_component_t *version,
MCA_BASE_VAR_SCOPE_READONLY,
&module->btl_cuda_rdma_limit);
#endif /* OPAL_CUDA_GDR_SUPPORT */
#if OPAL_CUDA_SUPPORT
module->btl_cuda_max_send_size = 0;
(void) mca_base_component_var_register(version, "cuda_max_send_size", "Maximum size (in bytes) of a single GPU \"phase 2\" fragment of a long message when using the pipeline protocol (must be >= 1) (only valid on smcuda btl)",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
OPAL_INFO_LVL_4,
MCA_BASE_VAR_SCOPE_READONLY,
&module->btl_cuda_max_send_size);
#endif /* OPAL_CUDA_SUPPORT */
(void) mca_base_component_var_register(version, "max_send_size", "Maximum size (in bytes) of a single \"phase 2\" fragment of a long message when using the pipeline protocol (must be >= 1)",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,

Просмотреть файл

@ -1170,6 +1170,9 @@ struct mca_btl_base_module_t {
size_t btl_cuda_eager_limit; /**< switch from eager to RDMA */
size_t btl_cuda_rdma_limit; /**< switch from RDMA to rndv pipeline */
#endif /* OPAL_CUDA_GDR_SUPPORT */
#if OPAL_CUDA_SUPPORT
size_t btl_cuda_max_send_size; /**< set if CUDA max send_size is different from host max send size */
#endif /* OPAL_CUDA_SUPPORT */
};
typedef struct mca_btl_base_module_t mca_btl_base_module_t;

Просмотреть файл

@ -815,6 +815,11 @@ int btl_openib_verify_mca_params (void)
}
}
#endif /* Workaround */
if (0 != mca_btl_openib_module.super.btl_cuda_max_send_size) {
opal_show_help("help-mpi-btl-openib.txt", "do_not_set_openib_value",
true, opal_process_info.nodename);
mca_btl_openib_module.super.btl_cuda_max_send_size = 0;
}
#endif
#if BTL_OPENIB_MALLOC_HOOKS_ENABLED

Просмотреть файл

@ -700,3 +700,9 @@ with CUDA GPU Direct RDMA. Either disable GPU Direct RDMA support or
enable "leave pinned" support. Deactivating the openib BTL.
Local host: %s
#
[do_not_set_openib_value]
Open MPI has detected that you have attempted to set the btl_openib_cuda_max_send_size
value. This is not supported. Setting back to default value of 0.
Local host: %s

Просмотреть файл

@ -173,7 +173,7 @@ static int smcuda_register(void)
#endif /* OPAL_CUDA_SUPPORT */
mca_btl_smcuda.super.btl_eager_limit = 4*1024;
mca_btl_smcuda.super.btl_rndv_eager_limit = 4*1024;
mca_btl_smcuda.super.btl_max_send_size = 128*1024;
mca_btl_smcuda.super.btl_max_send_size = 32*1024;
mca_btl_smcuda.super.btl_rdma_pipeline_send_length = 64*1024;
mca_btl_smcuda.super.btl_rdma_pipeline_frag_size = 64*1024;
mca_btl_smcuda.super.btl_min_rdma_pipeline_size = 64*1024;
@ -185,7 +185,12 @@ static int smcuda_register(void)
/* Call the BTL based to register its MCA params */
mca_btl_base_param_register(&mca_btl_smcuda_component.super.btl_version,
&mca_btl_smcuda.super);
#if OPAL_CUDA_SUPPORT
/* If user has not set the value, then set to the defalt */
if (0 == mca_btl_smcuda.super.btl_cuda_max_send_size) {
mca_btl_smcuda.super.btl_cuda_max_send_size = 128*1024;
}
#endif /* OPAL_CUDA_SUPPORT */
return mca_btl_smcuda_component_verify();
}
@ -214,6 +219,17 @@ static int mca_btl_smcuda_component_open(void)
mca_btl_smcuda_component.max_frag_size = mca_btl_smcuda.super.btl_max_send_size;
mca_btl_smcuda_component.eager_limit = mca_btl_smcuda.super.btl_eager_limit;
#if OPAL_CUDA_SUPPORT
/* Possibly adjust max_frag_size if the cuda size is bigger */
if (mca_btl_smcuda.super.btl_cuda_max_send_size > mca_btl_smcuda.super.btl_max_send_size) {
mca_btl_smcuda_component.max_frag_size = mca_btl_smcuda.super.btl_cuda_max_send_size;
}
opal_output_verbose(10, opal_btl_base_framework.framework_output,
"btl: smcuda: cuda_max_send_size=%d, max_send_size=%d, max_frag_size=%d",
(int)mca_btl_smcuda.super.btl_cuda_max_send_size, (int)mca_btl_smcuda.super.btl_max_send_size,
(int)mca_btl_smcuda_component.max_frag_size);
#endif /* OPAL_CUDA_SUPPORT */
/* initialize objects */
OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_frags_eager, opal_free_list_t);