From afec1454f52367fe433ffe7391e35ad55b142e60 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Tue, 10 Mar 2015 07:41:28 -0700 Subject: [PATCH] usnic: only setup the connectivity checker if we have modules If we ended up with no modules (e.g., all usnic devices were excluded), there was a race condition in that the connectivity agent could tear down its local socket before one or more of the local clients saw it. Therefore, the local clients would timeout waiting for the socket to appear. So move the connectivity checker init later in the bootstrapping process (it *must* be setup before module_init()), and have it only invoked if we actually ended up with one or more modules. --- opal/mca/btl/usnic/btl_usnic_component.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/opal/mca/btl/usnic/btl_usnic_component.c b/opal/mca/btl/usnic/btl_usnic_component.c index 9f80283d14..90de92c724 100644 --- a/opal/mca/btl/usnic/btl_usnic_component.c +++ b/opal/mca/btl/usnic/btl_usnic_component.c @@ -662,14 +662,6 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, opal_output_verbose(5, USNIC_OUT, "btl:usnic: usNIC fabrics found"); - /* Setup the connectivity checking agent and client. */ - if (mca_btl_usnic_component.connectivity_enabled) { - if (OPAL_SUCCESS != opal_btl_usnic_connectivity_agent_init() || - OPAL_SUCCESS != opal_btl_usnic_connectivity_client_init()) { - return NULL; - } - } - /* libnl initialization */ opal_proc_t *me = opal_proc_local_get(); opal_process_name_t *name = &(me->proc_name); @@ -863,6 +855,16 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, filter = NULL; } + /* If we actually have some modules, setup the connectivity + checking agent and client. */ + if (mca_btl_usnic_component.num_modules > 0 && + mca_btl_usnic_component.connectivity_enabled) { + if (OPAL_SUCCESS != opal_btl_usnic_connectivity_agent_init() || + OPAL_SUCCESS != opal_btl_usnic_connectivity_client_init()) { + return NULL; + } + } + /* Now that we know how many modules there are, let the modules initialize themselves (it's useful to know how many modules there are before doing this). */