Add diagnostics for hwloc get_topology
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
cd8db5313e
Коммит
41df973359
@ -277,8 +277,8 @@ int opal_hwloc_base_get_topology(void)
|
||||
char *shmemfile;
|
||||
#endif
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base:get_topology"));
|
||||
opal_output_verbose(2, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base:get_topology");
|
||||
|
||||
/* see if we already have it */
|
||||
if (NULL != opal_hwloc_topology) {
|
||||
@ -289,8 +289,8 @@ int opal_hwloc_base_get_topology(void)
|
||||
|
||||
if (NULL != opal_pmix.get) {
|
||||
#if HWLOC_API_VERSION >= 0x20000
|
||||
OPAL_OUTPUT_VERBOSE((2, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base: looking for topology in shared memory"));
|
||||
opal_output_verbose(2, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base: looking for topology in shared memory");
|
||||
|
||||
/* first try to get the shmem link, if available */
|
||||
aptr = &addr;
|
||||
@ -304,15 +304,17 @@ int opal_hwloc_base_get_topology(void)
|
||||
if (OPAL_SUCCESS == rc && OPAL_SUCCESS == rc2 && OPAL_SUCCESS == rc3) {
|
||||
if (0 > (fd = open(shmemfile, O_RDONLY))) {
|
||||
free(shmemfile);
|
||||
return OPAL_ERROR;
|
||||
OPAL_ERROR_LOG(OPAL_ERR_FILE_OPEN_FAILURE)
|
||||
return OPAL_ERR_FILE_OPEN_FAILURE;
|
||||
}
|
||||
free(shmemfile);
|
||||
if (0 != hwloc_shmem_topology_adopt(&opal_hwloc_topology, fd,
|
||||
0, (void*)addr, size, 0)) {
|
||||
return OPAL_ERROR;
|
||||
OPAL_ERROR_LOG(OPAL_ERR_FILE_READ_FAILURE);
|
||||
return OPAL_ERR_FILE_READ_FAILURE;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((2, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base: topology in shared memory"));
|
||||
opal_output_verbose(2, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base: topology in shared memory");
|
||||
topo_in_shmem = true;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
@ -320,14 +322,18 @@ int opal_hwloc_base_get_topology(void)
|
||||
/* if that isn't available, then try to retrieve
|
||||
* the xml representation from the PMIx data store */
|
||||
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base instantiating topology");
|
||||
"hwloc:base getting topology XML string");
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCAL_TOPO,
|
||||
&wildcard_rank, &val, OPAL_STRING);
|
||||
} else {
|
||||
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base PMIx not available");
|
||||
rc = OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
if (OPAL_SUCCESS == rc && NULL != val) {
|
||||
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base loading topology from XML");
|
||||
/* load the topology */
|
||||
if (0 != hwloc_topology_init(&opal_hwloc_topology)) {
|
||||
free(val);
|
||||
@ -361,9 +367,12 @@ int opal_hwloc_base_get_topology(void)
|
||||
return rc;
|
||||
}
|
||||
} else if (NULL == opal_hwloc_base_topo_file) {
|
||||
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base discovering topology");
|
||||
if (0 != hwloc_topology_init(&opal_hwloc_topology) ||
|
||||
0 != opal_hwloc_base_topology_set_flags(opal_hwloc_topology, 0, true) ||
|
||||
0 != hwloc_topology_load(opal_hwloc_topology)) {
|
||||
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
/* filter the cpus thru any default cpu set */
|
||||
@ -372,6 +381,9 @@ int opal_hwloc_base_get_topology(void)
|
||||
return rc;
|
||||
}
|
||||
} else {
|
||||
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base loading topology from file %s",
|
||||
opal_hwloc_base_topo_file);
|
||||
if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(opal_hwloc_base_topo_file))) {
|
||||
return rc;
|
||||
}
|
||||
|
@ -16,11 +16,15 @@ int main(int argc, char* argv[])
|
||||
int rank, size, rc;
|
||||
hwloc_cpuset_t cpus;
|
||||
char *bindings = NULL;
|
||||
pid_t pid;
|
||||
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
pid = getpid();
|
||||
|
||||
printf("[%lu] Rank %d: getting topology\n", (unsigned long)pid, rank);
|
||||
fflush(stdout);
|
||||
if (OPAL_SUCCESS == opal_hwloc_base_get_topology()) {
|
||||
cpus = hwloc_bitmap_alloc();
|
||||
rc = hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS);
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user