diff --git a/ompi/mca/mtl/psm2/help-mtl-psm2.txt b/ompi/mca/mtl/psm2/help-mtl-psm2.txt index 719b060a22..ee876efd20 100644 --- a/ompi/mca/mtl/psm2/help-mtl-psm2.txt +++ b/ompi/mca/mtl/psm2/help-mtl-psm2.txt @@ -45,3 +45,7 @@ Unknown path record query mechanism %s. Supported mechanisms are %s. # [message too big] Message size %llu bigger than supported by PSM2 API. Max = %llu +# +[no psm2 cuda env] +Using CUDA enabled OpenMPI but PSM2_CUDA environment variable is %s. +This is not a recommended combination. If the application uses %s. diff --git a/ompi/mca/mtl/psm2/mtl_psm2.c b/ompi/mca/mtl/psm2/mtl_psm2.c index 6d461a2c76..f0d04a2159 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2.c +++ b/ompi/mca/mtl/psm2/mtl_psm2.c @@ -100,9 +100,6 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) { char *generated_key; char env_string[256]; int rc; -#if OPAL_CUDA_SUPPORT - char *cuda_env; -#endif generated_key = getenv(OPAL_MCA_PREFIX"orte_precondition_transports"); memset(uu, 0, sizeof(psm2_uuid_t)); @@ -178,11 +175,6 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) { #if OPAL_CUDA_SUPPORT ompi_mtl_psm2.super.mtl_flags |= MCA_MTL_BASE_FLAG_CUDA_INIT_DISABLE; - - cuda_env = getenv("PSM2_CUDA"); - if (!cuda_env || ( strcmp(cuda_env, "0") == 0) ) - opal_output(0, "Warning: If running with device buffers, there is a" - " chance the application might fail. Try setting PSM2_CUDA=1.\n"); #endif return OMPI_SUCCESS; diff --git a/ompi/mca/mtl/psm2/mtl_psm2_component.c b/ompi/mca/mtl/psm2/mtl_psm2_component.c index a536fd6efb..b2d74aeaf2 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_component.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_component.c @@ -199,6 +199,9 @@ static int ompi_mtl_psm2_component_register(void) { int num_local_procs, num_total_procs; +#if OPAL_CUDA_SUPPORT + char *cuda_env; +#endif ompi_mtl_psm2.connect_timeout = 180; (void) mca_base_component_var_register(&mca_mtl_psm2_component.super.mtl_version, @@ -223,6 +226,30 @@ ompi_mtl_psm2_component_register(void) param_priority = 40; } +#if OPAL_CUDA_SUPPORT + /* + * If using CUDA enabled OpenMPI, the user likely intends to + * run with CUDA buffers. So, force-set the envvar here if user failed + * to set it. + */ + cuda_env = getenv("PSM2_CUDA"); + if (!cuda_env) { + opal_show_help("help-mtl-psm2.txt", + "no psm2 cuda env", true, + "not set", + "Host buffers,\nthere will be a performance penalty" + " due to OMPI force setting this variable now.\n" + "Set environment variable to 0 if using Host buffers" ); + setenv("PSM2_CUDA", "1", 0); + } else if (strcmp(cuda_env, "0") == 0) { + opal_show_help("help-mtl-psm2.txt", + "no psm2 cuda env", true, + "set to 0", + "CUDA buffers,\nthe execution will SEGFAULT." + " Set environment variable to 1 if using CUDA buffers"); + } +#endif + (void) mca_base_component_var_register (&mca_mtl_psm2_component.super.mtl_version, "priority", "Priority of the PSM2 MTL component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, diff --git a/ompi/mca/pml/cm/pml_cm.h b/ompi/mca/pml/cm/pml_cm.h index 262294fc43..b3c06eb83b 100644 --- a/ompi/mca/pml/cm/pml_cm.h +++ b/ompi/mca/pml/cm/pml_cm.h @@ -185,7 +185,7 @@ mca_pml_cm_recv(void *addr, &(datatype->super), count, addr, - flags, + flags, &convertor ); #else MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); @@ -195,7 +195,7 @@ mca_pml_cm_recv(void *addr, &(datatype->super), count, addr, - flags, + flags, &convertor ); #endif diff --git a/ompi/mca/pml/cm/pml_cm_recvreq.h b/ompi/mca/pml/cm/pml_cm_recvreq.h index 0c79bf4937..6729cac886 100644 --- a/ompi/mca/pml/cm/pml_cm_recvreq.h +++ b/ompi/mca/pml/cm/pml_cm_recvreq.h @@ -94,7 +94,7 @@ do { \ datatype, \ addr, \ count, \ - flags ) \ + flags ) \ do { \ OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, false); \ (request)->req_base.req_ompi.req_mpi_object.comm = comm; \ @@ -116,7 +116,7 @@ do { \ &(datatype->super), \ count, \ addr, \ - flags, \ + flags, \ &(request)->req_base.req_convertor ); \ } while(0) #else @@ -127,7 +127,7 @@ do { \ datatype, \ addr, \ count, \ - flags ) \ + flags ) \ do { \ OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, false); \ (request)->req_base.req_ompi.req_mpi_object.comm = comm; \ @@ -144,7 +144,7 @@ do { \ &(datatype->super), \ count, \ addr, \ - flags, \ + flags, \ &(request)->req_base.req_convertor ); \ } while(0) #endif @@ -158,7 +158,7 @@ do { \ datatype, \ addr, \ count, \ - flags, \ + flags, \ persistent) \ do { \ OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, persistent); \ @@ -197,7 +197,7 @@ do { \ datatype, \ addr, \ count, \ - flags, \ + flags, \ persistent) \ do { \ OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, persistent); \ @@ -219,7 +219,7 @@ do { \ &(datatype->super), \ count, \ addr, \ - flags, \ + flags, \ &(request)->req_base.req_convertor ); \ } while(0) #endif diff --git a/ompi/mca/pml/cm/pml_cm_sendreq.h b/ompi/mca/pml/cm/pml_cm_sendreq.h index 0d006da0f8..ab6dbb631d 100644 --- a/ompi/mca/pml/cm/pml_cm_sendreq.h +++ b/ompi/mca/pml/cm/pml_cm_sendreq.h @@ -127,7 +127,7 @@ do { \ sendmode, \ buf, \ count, \ - flags ) \ + flags ) \ { \ OBJ_RETAIN(comm); \ OMPI_DATATYPE_RETAIN(datatype); \ @@ -139,7 +139,7 @@ do { \ &(datatype->super), \ count, \ buf, \ - flags, \ + flags, \ &(req_send)->req_base.req_convertor ); \ (req_send)->req_base.req_ompi.req_mpi_object.comm = comm; \ (req_send)->req_base.req_ompi.req_status.MPI_SOURCE = \ @@ -158,7 +158,7 @@ do { \ sendmode, \ buf, \ count, \ - flags ) \ + flags ) \ { \ OBJ_RETAIN(comm); \ OMPI_DATATYPE_RETAIN(datatype); \ @@ -170,7 +170,7 @@ do { \ &(datatype->super), \ count, \ buf, \ - flags, \ + flags, \ &(req_send)->req_base.req_convertor ); \ (req_send)->req_base.req_ompi.req_mpi_object.comm = comm; \ (req_send)->req_base.req_ompi.req_status.MPI_SOURCE = \ @@ -191,7 +191,7 @@ do { \ sendmode, \ buf, \ count, \ - flags ) \ + flags ) \ { \ OBJ_RETAIN(comm); \ OMPI_DATATYPE_RETAIN(datatype); \ @@ -203,7 +203,7 @@ do { \ &(datatype->super), \ count, \ buf, \ - flags, \ + flags, \ &(req_send)->req_base.req_convertor ); \ (req_send)->req_base.req_ompi.req_mpi_object.comm = comm; \ (req_send)->req_base.req_ompi.req_status.MPI_SOURCE = \ @@ -223,7 +223,7 @@ do { \ sendmode, \ buf, \ count, \ - flags ) \ + flags ) \ { \ OBJ_RETAIN(comm); \ OMPI_DATATYPE_RETAIN(datatype); \ @@ -249,7 +249,7 @@ do { \ &(datatype->super), \ count, \ buf, \ - flags, \ + flags, \ &(req_send)->req_base.req_convertor ); \ } \ (req_send)->req_base.req_ompi.req_mpi_object.comm = comm; \ @@ -273,7 +273,7 @@ do { \ blocking, \ buf, \ count, \ - flags ) \ + flags ) \ do { \ OMPI_REQUEST_INIT(&(sendreq->req_send.req_base.req_ompi), \ persistent); \ @@ -289,7 +289,7 @@ do { \ sendmode, \ buf, \ count, \ - flags ) \ + flags ) \ opal_convertor_get_packed_size( \ &sendreq->req_send.req_base.req_convertor, \ &sendreq->req_count ); \ @@ -309,7 +309,7 @@ do { \ sendmode, \ buf, \ count, \ - flags ) \ + flags ) \ do { \ OMPI_REQUEST_INIT(&(sendreq->req_send.req_base.req_ompi), \ false); \