Update in PSM and PSM2 MTLs to detect entries created by drivers for
Intel TrueScale and Intel OmniPath, and detect a link in ACTIVE state. This fix addresses the scenario reported in the below OMPI users email, including formerly named Qlogic IB, now Intel True scale. Given the nature of the PSM/PSM2 mtls this fix applies to OmniPath: https://www.open-mpi.org/community/lists/users/2016/04/29018.php
Этот коммит содержится в:
родитель
44d95cb610
Коммит
d28ee62a96
@ -37,6 +37,7 @@
|
|||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <glob.h>
|
||||||
|
|
||||||
static int param_priority;
|
static int param_priority;
|
||||||
|
|
||||||
@ -185,12 +186,41 @@ ompi_mtl_psm_component_open(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Component available only if Truescale hardware is present */
|
/* Component available only if Truescale hardware is present */
|
||||||
if (0 == stat("/dev/ipath", &st)) {
|
if (0 != stat("/dev/ipath", &st)) {
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return OPAL_ERR_NOT_AVAILABLE;
|
return OPAL_ERR_NOT_AVAILABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Component available only if at least one qib port is ACTIVE */
|
||||||
|
bool foundOnlineQibPort = false;
|
||||||
|
size_t i;
|
||||||
|
char portState[128];
|
||||||
|
FILE *devFile;
|
||||||
|
glob_t globbuf;
|
||||||
|
globbuf.gl_offs = 0;
|
||||||
|
if (glob("/sys/class/infiniband/qib*/ports/*/state",
|
||||||
|
GLOB_DOOFFS, NULL, &globbuf) != 0) {
|
||||||
|
return OPAL_ERR_NOT_AVAILABLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i=0;i < globbuf.gl_pathc; i++) {
|
||||||
|
devFile = fopen(globbuf.gl_pathv[i], "r");
|
||||||
|
fgets(portState, sizeof(portState), devFile);
|
||||||
|
fclose(devFile);
|
||||||
|
|
||||||
|
if (strstr(portState, "ACTIVE") != NULL) {
|
||||||
|
/* Found at least one ACTIVE port */
|
||||||
|
foundOnlineQibPort = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
globfree(&globbuf);
|
||||||
|
|
||||||
|
if (!foundOnlineQibPort) {
|
||||||
|
return OPAL_ERR_NOT_AVAILABLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <glob.h>
|
||||||
|
|
||||||
static int param_priority;
|
static int param_priority;
|
||||||
|
|
||||||
@ -101,15 +102,46 @@ ompi_mtl_psm2_component_register(void)
|
|||||||
static int
|
static int
|
||||||
ompi_mtl_psm2_component_open(void)
|
ompi_mtl_psm2_component_open(void)
|
||||||
{
|
{
|
||||||
struct stat st;
|
glob_t globbuf;
|
||||||
|
globbuf.gl_offs = 0;
|
||||||
|
|
||||||
/* Component available only if Omni-Path hardware is present */
|
/* Component available only if Omni-Path hardware is present */
|
||||||
if (0 == stat("/dev/hfi1", &st)) {
|
if ((glob("/dev/hfi1_[0-9]", GLOB_DOOFFS, NULL, &globbuf) != 0) &&
|
||||||
return OMPI_SUCCESS;
|
(glob("/dev/hfi1_[0-9][0-9]", GLOB_APPEND, NULL, &globbuf) != 0)) {
|
||||||
}
|
|
||||||
else {
|
|
||||||
return OPAL_ERR_NOT_AVAILABLE;
|
return OPAL_ERR_NOT_AVAILABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
globfree(&globbuf);
|
||||||
|
|
||||||
|
/* Component available only if at least one hfi1 port is ACTIVE */
|
||||||
|
bool foundOnlineHfi1Port = false;
|
||||||
|
size_t i;
|
||||||
|
char portState[128];
|
||||||
|
FILE *devFile;
|
||||||
|
if (glob("/sys/class/infiniband/hfi1_*/ports/*/state",
|
||||||
|
GLOB_DOOFFS, NULL, &globbuf) != 0) {
|
||||||
|
return OPAL_ERR_NOT_AVAILABLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i=0;i < globbuf.gl_pathc; i++) {
|
||||||
|
devFile = fopen(globbuf.gl_pathv[i], "r");
|
||||||
|
fgets(portState, sizeof(portState), devFile);
|
||||||
|
fclose(devFile);
|
||||||
|
|
||||||
|
if (strstr(portState, "ACTIVE") != NULL) {
|
||||||
|
/* Found at least one ACTIVE port */
|
||||||
|
foundOnlineHfi1Port = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
globfree(&globbuf);
|
||||||
|
|
||||||
|
if (!foundOnlineHfi1Port) {
|
||||||
|
return OPAL_ERR_NOT_AVAILABLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user