diff --git a/opal/mca/btl/openib/btl_openib.h b/opal/mca/btl/openib/btl_openib.h index 9415fda217..4471c1a9c2 100644 --- a/opal/mca/btl/openib/btl_openib.h +++ b/opal/mca/btl/openib/btl_openib.h @@ -278,8 +278,6 @@ struct mca_btl_openib_component_t { unsigned int cq_poll_progress; unsigned int cq_poll_batch; unsigned int eager_rdma_poll_ratio; - /** Whether we want fork support or not */ - int want_fork_support; int rdma_qp; int credits_qp; /* qp used for software flow control */ bool cpc_explicitly_defined; diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index 73a40342d0..6b84f93ccf 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -2634,23 +2634,10 @@ btl_openib_component_init(int *num_btl_modules, goto no_btls; } - /* If we want fork support, try to enable it */ -#ifdef HAVE_IBV_FORK_INIT - if (0 != mca_btl_openib_component.want_fork_support) { - if (0 != ibv_fork_init()) { - /* If the want_fork_support MCA parameter is >0, then the - user was specifically asking for fork support and we - couldn't provide it. So print an error and deactivate - this BTL. */ - if (mca_btl_openib_component.want_fork_support > 0) { - opal_show_help("help-mpi-btl-openib.txt", - "ibv_fork_init fail", true, - opal_process_info.nodename); - goto no_btls; - } - } + /* If fork support is requested, try to enable it */ + if (OPAL_SUCCESS != (ret = opal_common_verbs_fork_test())) { + goto no_btls; } -#endif /* Parse the include and exclude lists, checking for errors */ mca_btl_openib_component.if_include_list = diff --git a/opal/mca/btl/openib/btl_openib_mca.c b/opal/mca/btl/openib/btl_openib_mca.c index 207769fe3f..9ef4753d00 100644 --- a/opal/mca/btl/openib/btl_openib_mca.c +++ b/opal/mca/btl/openib/btl_openib_mca.c @@ -270,11 +270,6 @@ int btl_openib_register_mca_params(void) MCA_BTL_OPENIB_CQ_POLL_BATCH_DEFAULT, &mca_btl_openib_component.cq_poll_batch, REGINT_GE_ONE)); - CHECK(reg_int("want_fork_support", NULL, - "Whether fork support is desired or not " - "(negative = try to enable fork support, but continue even if it is not available, 0 = do not enable fork support, positive = try to enable fork support and fail if it is not available)", - 0, &mca_btl_openib_component.want_fork_support, 0)); - asprintf(&str, "%s/mca-btl-openib-device-params.ini", opal_install_dirs.opaldatadir); if (NULL == str) { diff --git a/opal/mca/common/verbs/common_verbs.h b/opal/mca/common/verbs/common_verbs.h index b951e3212e..3ace814e73 100644 --- a/opal/mca/common/verbs/common_verbs.h +++ b/opal/mca/common/verbs/common_verbs.h @@ -164,6 +164,20 @@ opal_common_verbs_find_max_inline(struct ibv_device *device, */ OPAL_DECLSPEC int opal_common_verbs_qp_test(struct ibv_context *device_context, int flags); +/* + * ibv_fork_init testing - if fork support is requested then ibv_fork_init + * should be called right at the beginning of the verbs initialization flow, before ibv_create_* call. + * + * Known limitations: + * If ibv_fork_init is called after ibv_create_* functions - it will have no effect. + * OMPI initializes verbs many times during initialization in the following verbs components: + * oob/ud, btl/openib, mtl/mxm, pml/yalla, oshmem/ikrit, oshmem/yoda, ompi/mca/coll/{fca,hcoll} + * + * So, ibv_fork_init should be called once, in the beginning of the init flow of every verb component + * to proper request fork support. + * + */ +int opal_common_verbs_fork_test(void); END_C_DECLS diff --git a/opal/mca/common/verbs/common_verbs_basics.c b/opal/mca/common/verbs/common_verbs_basics.c index 1f89dfbb09..a2e09cf4eb 100644 --- a/opal/mca/common/verbs/common_verbs_basics.c +++ b/opal/mca/common/verbs/common_verbs_basics.c @@ -35,6 +35,9 @@ const char *ibv_get_sysfs_path(void); #endif #include "common_verbs.h" +#include "opal/runtime/opal_params.h" +#include "opal/util/show_help.h" +#include "opal/util/proc.h" /***********************************************************************/ @@ -61,3 +64,31 @@ bool opal_common_verbs_check_basics(void) return true; } +int opal_common_verbs_fork_test(void) +{ + /* Make sure that ibv_fork_init is called before the calls to other memory registering verbs, + * which will be called after this function */ +#ifdef HAVE_IBV_FORK_INIT + if (0 != opal_verbs_want_fork_support) { + /* Check if fork support is requested by the user */ + if (0 != ibv_fork_init()) { + /* If the opal_want_fork_support MCA parameter is >0 but + * the call to ibv_fork_init() failed, then return an error code. + */ + if (opal_verbs_want_fork_support > 0) { + opal_show_help("help-opal-common-verbs.txt", + "ibv_fork_init fail", true, + opal_proc_local_get()->proc_hostname, errno, + strerror(errno)); + return OPAL_ERROR; + } + } else { + return OPAL_SUCCESS; + } + } else { + return OPAL_SUCCESS; + } +#endif + return OPAL_SUCCESS; +} + diff --git a/opal/mca/common/verbs/help-opal-common-verbs.txt b/opal/mca/common/verbs/help-opal-common-verbs.txt index de354eef98..e8c3ec257f 100644 --- a/opal/mca/common/verbs/help-opal-common-verbs.txt +++ b/opal/mca/common/verbs/help-opal-common-verbs.txt @@ -46,3 +46,9 @@ specified: These entities will be ignored. You can disable this warning by setting the ompi_common_verbs_warn_nonexistent_if MCA parameter to 0. # +[ibv_fork_init fail] +Fork support was requested but the library call ibv_fork_init() failed. + + Hostname: %s + Error (%d): %s +# diff --git a/opal/runtime/opal_params.c b/opal/runtime/opal_params.c index 3b4d4a7c2c..d07dd5c712 100644 --- a/opal/runtime/opal_params.c +++ b/opal/runtime/opal_params.c @@ -275,6 +275,16 @@ int opal_register_params(void) return ret; } + opal_verbs_want_fork_support = 1; + ret = mca_base_var_register("opal", "opal", NULL, "verbs_want_fork_support", + "Whether fork support is desired or not " + "(negative = try to enable fork support, but continue even " + "if it is not available, 0 = do not enable fork support, " + "positive = try to enable fork support and fail if it is not available)", + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, + OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ, + &opal_verbs_want_fork_support); + return OPAL_SUCCESS; } diff --git a/opal/runtime/opal_params.h b/opal/runtime/opal_params.h index 25a92b5720..66b8da3d00 100644 --- a/opal/runtime/opal_params.h +++ b/opal/runtime/opal_params.h @@ -30,6 +30,8 @@ extern char *opal_signal_string; extern char *opal_net_private_ipv4; extern char *opal_set_max_sys_limits; +int opal_verbs_want_fork_support; + #if OPAL_ENABLE_TIMING extern char *opal_timing_sync_file; extern char *opal_timing_output; @@ -38,6 +40,7 @@ extern bool opal_timing_overhead; OPAL_DECLSPEC extern int opal_initialized; OPAL_DECLSPEC extern bool opal_built_with_cuda_support; + /** * * Whether we want to enable CUDA GPU buffer send and receive support. * */ diff --git a/orte/mca/oob/ud/Makefile.am b/orte/mca/oob/ud/Makefile.am index e1ab6259ec..0924501c02 100644 --- a/orte/mca/oob/ud/Makefile.am +++ b/orte/mca/oob/ud/Makefile.am @@ -55,7 +55,8 @@ mcacomponentdir = $(ortelibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_oob_ud_la_SOURCES = $(sources) mca_oob_ud_la_LDFLAGS = -module -avoid-version $(orte_oob_ud_LDFLAGS) -mca_oob_ud_la_LIBADD = $(orte_oob_ud_LIBS) +mca_oob_ud_la_LIBADD = $(orte_oob_ud_LIBS) \ + $(OPAL_TOP_BUILDDIR)/opal/mca/common/verbs/lib@OPAL_LIB_PREFIX@mca_common_verbs.la noinst_LTLIBRARIES = $(component_noinst) libmca_oob_ud_la_SOURCES = $(sources) diff --git a/orte/mca/oob/ud/oob_ud_component.c b/orte/mca/oob/ud/oob_ud_component.c index 3c6773b122..76098dbc35 100644 --- a/orte/mca/oob/ud/oob_ud_component.c +++ b/orte/mca/oob/ud/oob_ud_component.c @@ -24,6 +24,8 @@ #include "oob_ud_component.h" +#include "opal/mca/common/verbs/common_verbs.h" + static int mca_oob_ud_component_open (void); static int mca_oob_ud_component_close (void); static int mca_oob_ud_component_register (void); @@ -217,6 +219,16 @@ static inline int mca_oob_ud_device_setup (mca_oob_ud_device_t *device, "%s oob:ud:device_setup attempting to setup ib device %p", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void *) ib_device); + + /* If fork support is requested, try to enable it */ + rc = opal_common_verbs_fork_test(); + if (OPAL_SUCCESS != rc) { + opal_output_verbose(5, orte_oob_base_framework.framework_output, + "%s oob:ud:device_setup failed in ibv_fork_init. errno = %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno); + return ORTE_ERROR; + } + device->ib_context = ibv_open_device (ib_device); if (NULL == device->ib_context) { opal_output_verbose(5, orte_oob_base_framework.framework_output, diff --git a/oshmem/mca/sshmem/verbs/Makefile.am b/oshmem/mca/sshmem/verbs/Makefile.am index bd6e4546bb..6b139617e5 100644 --- a/oshmem/mca/sshmem/verbs/Makefile.am +++ b/oshmem/mca/sshmem/verbs/Makefile.am @@ -30,7 +30,8 @@ mcacomponentdir = $(oshmemlibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_sshmem_verbs_la_SOURCES = $(sources) mca_sshmem_verbs_la_LDFLAGS = -module -avoid-version $(oshmem_verbs_LDFLAGS) -mca_sshmem_verbs_la_LIBADD = $(oshmem_verbs_LIBS) +mca_sshmem_verbs_la_LIBADD = $(oshmem_verbs_LIBS) \ + $(OPAL_TOP_BUILDDIR)/opal/mca/common/verbs/lib@OPAL_LIB_PREFIX@mca_common_verbs.la noinst_LTLIBRARIES = $(component_noinst) libmca_sshmem_verbs_la_SOURCES =$(sources) diff --git a/oshmem/mca/sshmem/verbs/sshmem_verbs_component.c b/oshmem/mca/sshmem/verbs/sshmem_verbs_component.c index eb9322dc80..146b1802af 100644 --- a/oshmem/mca/sshmem/verbs/sshmem_verbs_component.c +++ b/oshmem/mca/sshmem/verbs/sshmem_verbs_component.c @@ -19,6 +19,7 @@ #include "opal/constants.h" #include "opal/util/sys_limits.h" +#include "opal/mca/common/verbs/common_verbs.h" #include "oshmem/mca/sshmem/sshmem.h" #include "oshmem/mca/sshmem/base/base.h" @@ -100,6 +101,11 @@ verbs_runtime_query(mca_base_module_t **module, *priority = 0; *module = NULL; + /* If fork support is requested, try to enable it */ + if (OSHMEM_SUCCESS != (rc = opal_common_verbs_fork_test())) { + return OSHMEM_ERROR; + } + memset(device, 0, sizeof(*device)); #ifdef HAVE_IBV_GET_DEVICE_LIST