Merge pull request #6294 from bwbarrett/mtl-ofi-no-device-warning
mtl/ofi: Print descriptive error message on modex failure
Этот коммит содержится в:
Коммит
23da9fac23
@ -65,3 +65,13 @@ are more threads than the available contexts.
|
||||
|
||||
Local host: %s
|
||||
Location: %s:%d
|
||||
|
||||
[modex failed]
|
||||
The OFI MTL was not able to find endpoint information for a remote
|
||||
endpoint. Most likely, this means that the remote process was unable
|
||||
to initialize the Libfabric NIC correctly. This error is not
|
||||
recoverable and your application is likely to abort.
|
||||
|
||||
Local host: %s
|
||||
Remote host: %s
|
||||
Error: %s (%d)
|
||||
|
@ -98,9 +98,10 @@ ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl,
|
||||
(void**)&ep_name,
|
||||
&size);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: modex_recv failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
opal_show_help("help-mtl-ofi.txt", "modex failed",
|
||||
true, ompi_process_info.nodename,
|
||||
procs[i]->super.proc_hostname,
|
||||
opal_strerror(ret), ret);
|
||||
goto bail;
|
||||
}
|
||||
memcpy(&ep_names[i*namelen], ep_name, namelen);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user