1
1

Add diagnostics for hwloc get_topology

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-08-16 14:15:11 -07:00
родитель cd8db5313e
Коммит 41df973359
2 изменённых файлов: 25 добавлений и 9 удалений

Просмотреть файл

@ -277,8 +277,8 @@ int opal_hwloc_base_get_topology(void)
char *shmemfile;
#endif
OPAL_OUTPUT_VERBOSE((2, opal_hwloc_base_framework.framework_output,
"hwloc:base:get_topology"));
opal_output_verbose(2, opal_hwloc_base_framework.framework_output,
"hwloc:base:get_topology");
/* see if we already have it */
if (NULL != opal_hwloc_topology) {
@ -289,8 +289,8 @@ int opal_hwloc_base_get_topology(void)
if (NULL != opal_pmix.get) {
#if HWLOC_API_VERSION >= 0x20000
OPAL_OUTPUT_VERBOSE((2, opal_hwloc_base_framework.framework_output,
"hwloc:base: looking for topology in shared memory"));
opal_output_verbose(2, opal_hwloc_base_framework.framework_output,
"hwloc:base: looking for topology in shared memory");
/* first try to get the shmem link, if available */
aptr = &addr;
@ -304,15 +304,17 @@ int opal_hwloc_base_get_topology(void)
if (OPAL_SUCCESS == rc && OPAL_SUCCESS == rc2 && OPAL_SUCCESS == rc3) {
if (0 > (fd = open(shmemfile, O_RDONLY))) {
free(shmemfile);
return OPAL_ERROR;
OPAL_ERROR_LOG(OPAL_ERR_FILE_OPEN_FAILURE)
return OPAL_ERR_FILE_OPEN_FAILURE;
}
free(shmemfile);
if (0 != hwloc_shmem_topology_adopt(&opal_hwloc_topology, fd,
0, (void*)addr, size, 0)) {
return OPAL_ERROR;
OPAL_ERROR_LOG(OPAL_ERR_FILE_READ_FAILURE);
return OPAL_ERR_FILE_READ_FAILURE;
}
OPAL_OUTPUT_VERBOSE((2, opal_hwloc_base_framework.framework_output,
"hwloc:base: topology in shared memory"));
opal_output_verbose(2, opal_hwloc_base_framework.framework_output,
"hwloc:base: topology in shared memory");
topo_in_shmem = true;
return OPAL_SUCCESS;
}
@ -320,14 +322,18 @@ int opal_hwloc_base_get_topology(void)
/* if that isn't available, then try to retrieve
* the xml representation from the PMIx data store */
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
"hwloc:base instantiating topology");
"hwloc:base getting topology XML string");
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCAL_TOPO,
&wildcard_rank, &val, OPAL_STRING);
} else {
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
"hwloc:base PMIx not available");
rc = OPAL_ERR_NOT_SUPPORTED;
}
if (OPAL_SUCCESS == rc && NULL != val) {
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
"hwloc:base loading topology from XML");
/* load the topology */
if (0 != hwloc_topology_init(&opal_hwloc_topology)) {
free(val);
@ -361,9 +367,12 @@ int opal_hwloc_base_get_topology(void)
return rc;
}
} else if (NULL == opal_hwloc_base_topo_file) {
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
"hwloc:base discovering topology");
if (0 != hwloc_topology_init(&opal_hwloc_topology) ||
0 != opal_hwloc_base_topology_set_flags(opal_hwloc_topology, 0, true) ||
0 != hwloc_topology_load(opal_hwloc_topology)) {
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
return OPAL_ERR_NOT_SUPPORTED;
}
/* filter the cpus thru any default cpu set */
@ -372,6 +381,9 @@ int opal_hwloc_base_get_topology(void)
return rc;
}
} else {
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
"hwloc:base loading topology from file %s",
opal_hwloc_base_topo_file);
if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(opal_hwloc_base_topo_file))) {
return rc;
}

Просмотреть файл

@ -16,11 +16,15 @@ int main(int argc, char* argv[])
int rank, size, rc;
hwloc_cpuset_t cpus;
char *bindings = NULL;
pid_t pid;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
pid = getpid();
printf("[%lu] Rank %d: getting topology\n", (unsigned long)pid, rank);
fflush(stdout);
if (OPAL_SUCCESS == opal_hwloc_base_get_topology()) {
cpus = hwloc_bitmap_alloc();
rc = hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS);