1
1

usnic: fix bootstrap error paths

Fix previously-unfinished error paths during startup/bootstrapping.
Instead of just blindly continuing on when an fi_* function call
fails, opal_show_help and skip that device.

Also, only check the usnic config minimums once.  They're VIC-wide and
won't change on a per-device basis -- we only need to check them once.

Fixes CSCut19179.
Этот коммит содержится в:
Jeff Squyres 2015-03-09 16:57:41 -07:00
родитель 0d80bfb391
Коммит 4b2cba46f4

Просмотреть файл

@ -738,15 +738,29 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
ret = fi_fabric(info->fabric_attr, &fabric, NULL); ret = fi_fabric(info->fabric_attr, &fabric, NULL);
if (0 != ret) { if (0 != ret) {
BTL_ERROR(("fi_fabric")); opal_show_help("help-mpi-btl-usnic.txt",
/* JMS error */ "libfabric API failed",
true,
opal_process_info.nodename,
info->fabric_attr->name,
"fi_fabric()", __FILE__, __LINE__,
ret,
strerror(-ret));
continue;
} }
opal_memchecker_base_mem_defined(&fabric, sizeof(fabric)); opal_memchecker_base_mem_defined(&fabric, sizeof(fabric));
ret = fi_domain(fabric, info, &domain, NULL); ret = fi_domain(fabric, info, &domain, NULL);
if (0 != ret) { if (0 != ret) {
BTL_ERROR(("fi_domain")); opal_show_help("help-mpi-btl-usnic.txt",
/* JMS error */ "libfabric API failed",
true,
opal_process_info.nodename,
info->fabric_attr->name,
"fi_domain()", __FILE__, __LINE__,
ret,
strerror(-ret));
continue;
} }
opal_memchecker_base_mem_defined(&domain, sizeof(domain)); opal_memchecker_base_mem_defined(&domain, sizeof(domain));
@ -815,14 +829,21 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
} }
} }
/* Check some usNIC configuration minimum settings */ /* The first time through, check some usNIC configuration
if (check_usnic_config(module, num_local_procs) != OPAL_SUCCESS) { minimum settings with information we got back from the fi_*
probes (these are VIC-wide settings -- they don't change
for each module we create, so we only need to check
once). */
if (0 == j &&
check_usnic_config(module, num_local_procs) != OPAL_SUCCESS) {
opal_output_verbose(5, USNIC_OUT, opal_output_verbose(5, USNIC_OUT,
"btl:usnic: device %s is not provisioned with enough resources -- skipping", "btl:usnic: device %s is not provisioned with enough resources -- skipping",
info->fabric_attr->name); info->fabric_attr->name);
fi_close(&domain->fid); fi_close(&domain->fid);
fi_close(&fabric->fid); fi_close(&fabric->fid);
continue;
mca_btl_usnic_component.num_modules = 0;
goto error;
} }
/*************************************************/ /*************************************************/