1
1

usnic: fix bootstrap error paths

Fix previously-unfinished error paths during startup/bootstrapping.
Instead of just blindly continuing on when an fi_* function call
fails, opal_show_help and skip that device.

Also, only check the usnic config minimums once.  They're VIC-wide and
won't change on a per-device basis -- we only need to check them once.

Fixes CSCut19179.
Этот коммит содержится в:
Jeff Squyres 2015-03-09 16:57:41 -07:00
родитель 0d80bfb391
Коммит 4b2cba46f4

Просмотреть файл

@ -738,15 +738,29 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
ret = fi_fabric(info->fabric_attr, &fabric, NULL);
if (0 != ret) {
BTL_ERROR(("fi_fabric"));
/* JMS error */
opal_show_help("help-mpi-btl-usnic.txt",
"libfabric API failed",
true,
opal_process_info.nodename,
info->fabric_attr->name,
"fi_fabric()", __FILE__, __LINE__,
ret,
strerror(-ret));
continue;
}
opal_memchecker_base_mem_defined(&fabric, sizeof(fabric));
ret = fi_domain(fabric, info, &domain, NULL);
if (0 != ret) {
BTL_ERROR(("fi_domain"));
/* JMS error */
opal_show_help("help-mpi-btl-usnic.txt",
"libfabric API failed",
true,
opal_process_info.nodename,
info->fabric_attr->name,
"fi_domain()", __FILE__, __LINE__,
ret,
strerror(-ret));
continue;
}
opal_memchecker_base_mem_defined(&domain, sizeof(domain));
@ -815,14 +829,21 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
}
}
/* Check some usNIC configuration minimum settings */
if (check_usnic_config(module, num_local_procs) != OPAL_SUCCESS) {
/* The first time through, check some usNIC configuration
minimum settings with information we got back from the fi_*
probes (these are VIC-wide settings -- they don't change
for each module we create, so we only need to check
once). */
if (0 == j &&
check_usnic_config(module, num_local_procs) != OPAL_SUCCESS) {
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: device %s is not provisioned with enough resources -- skipping",
info->fabric_attr->name);
fi_close(&domain->fid);
fi_close(&fabric->fid);
continue;
mca_btl_usnic_component.num_modules = 0;
goto error;
}
/*************************************************/