Merge pull request #4106 from rhc54/topic/hwloc
Add diagnostics for hwloc get_topology
Этот коммит содержится в:
Коммит
1f799afa30
@ -277,8 +277,8 @@ int opal_hwloc_base_get_topology(void)
|
|||||||
char *shmemfile;
|
char *shmemfile;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((2, opal_hwloc_base_framework.framework_output,
|
opal_output_verbose(2, opal_hwloc_base_framework.framework_output,
|
||||||
"hwloc:base:get_topology"));
|
"hwloc:base:get_topology");
|
||||||
|
|
||||||
/* see if we already have it */
|
/* see if we already have it */
|
||||||
if (NULL != opal_hwloc_topology) {
|
if (NULL != opal_hwloc_topology) {
|
||||||
@ -289,8 +289,8 @@ int opal_hwloc_base_get_topology(void)
|
|||||||
|
|
||||||
if (NULL != opal_pmix.get) {
|
if (NULL != opal_pmix.get) {
|
||||||
#if HWLOC_API_VERSION >= 0x20000
|
#if HWLOC_API_VERSION >= 0x20000
|
||||||
OPAL_OUTPUT_VERBOSE((2, opal_hwloc_base_framework.framework_output,
|
opal_output_verbose(2, opal_hwloc_base_framework.framework_output,
|
||||||
"hwloc:base: looking for topology in shared memory"));
|
"hwloc:base: looking for topology in shared memory");
|
||||||
|
|
||||||
/* first try to get the shmem link, if available */
|
/* first try to get the shmem link, if available */
|
||||||
aptr = &addr;
|
aptr = &addr;
|
||||||
@ -304,15 +304,17 @@ int opal_hwloc_base_get_topology(void)
|
|||||||
if (OPAL_SUCCESS == rc && OPAL_SUCCESS == rc2 && OPAL_SUCCESS == rc3) {
|
if (OPAL_SUCCESS == rc && OPAL_SUCCESS == rc2 && OPAL_SUCCESS == rc3) {
|
||||||
if (0 > (fd = open(shmemfile, O_RDONLY))) {
|
if (0 > (fd = open(shmemfile, O_RDONLY))) {
|
||||||
free(shmemfile);
|
free(shmemfile);
|
||||||
return OPAL_ERROR;
|
OPAL_ERROR_LOG(OPAL_ERR_FILE_OPEN_FAILURE)
|
||||||
|
return OPAL_ERR_FILE_OPEN_FAILURE;
|
||||||
}
|
}
|
||||||
free(shmemfile);
|
free(shmemfile);
|
||||||
if (0 != hwloc_shmem_topology_adopt(&opal_hwloc_topology, fd,
|
if (0 != hwloc_shmem_topology_adopt(&opal_hwloc_topology, fd,
|
||||||
0, (void*)addr, size, 0)) {
|
0, (void*)addr, size, 0)) {
|
||||||
return OPAL_ERROR;
|
OPAL_ERROR_LOG(OPAL_ERR_FILE_READ_FAILURE);
|
||||||
|
return OPAL_ERR_FILE_READ_FAILURE;
|
||||||
}
|
}
|
||||||
OPAL_OUTPUT_VERBOSE((2, opal_hwloc_base_framework.framework_output,
|
opal_output_verbose(2, opal_hwloc_base_framework.framework_output,
|
||||||
"hwloc:base: topology in shared memory"));
|
"hwloc:base: topology in shared memory");
|
||||||
topo_in_shmem = true;
|
topo_in_shmem = true;
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -320,14 +322,18 @@ int opal_hwloc_base_get_topology(void)
|
|||||||
/* if that isn't available, then try to retrieve
|
/* if that isn't available, then try to retrieve
|
||||||
* the xml representation from the PMIx data store */
|
* the xml representation from the PMIx data store */
|
||||||
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
|
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
|
||||||
"hwloc:base instantiating topology");
|
"hwloc:base getting topology XML string");
|
||||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCAL_TOPO,
|
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCAL_TOPO,
|
||||||
&wildcard_rank, &val, OPAL_STRING);
|
&wildcard_rank, &val, OPAL_STRING);
|
||||||
} else {
|
} else {
|
||||||
|
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
|
||||||
|
"hwloc:base PMIx not available");
|
||||||
rc = OPAL_ERR_NOT_SUPPORTED;
|
rc = OPAL_ERR_NOT_SUPPORTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (OPAL_SUCCESS == rc && NULL != val) {
|
if (OPAL_SUCCESS == rc && NULL != val) {
|
||||||
|
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
|
||||||
|
"hwloc:base loading topology from XML");
|
||||||
/* load the topology */
|
/* load the topology */
|
||||||
if (0 != hwloc_topology_init(&opal_hwloc_topology)) {
|
if (0 != hwloc_topology_init(&opal_hwloc_topology)) {
|
||||||
free(val);
|
free(val);
|
||||||
@ -361,9 +367,12 @@ int opal_hwloc_base_get_topology(void)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
} else if (NULL == opal_hwloc_base_topo_file) {
|
} else if (NULL == opal_hwloc_base_topo_file) {
|
||||||
|
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
|
||||||
|
"hwloc:base discovering topology");
|
||||||
if (0 != hwloc_topology_init(&opal_hwloc_topology) ||
|
if (0 != hwloc_topology_init(&opal_hwloc_topology) ||
|
||||||
0 != opal_hwloc_base_topology_set_flags(opal_hwloc_topology, 0, true) ||
|
0 != opal_hwloc_base_topology_set_flags(opal_hwloc_topology, 0, true) ||
|
||||||
0 != hwloc_topology_load(opal_hwloc_topology)) {
|
0 != hwloc_topology_load(opal_hwloc_topology)) {
|
||||||
|
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
|
||||||
return OPAL_ERR_NOT_SUPPORTED;
|
return OPAL_ERR_NOT_SUPPORTED;
|
||||||
}
|
}
|
||||||
/* filter the cpus thru any default cpu set */
|
/* filter the cpus thru any default cpu set */
|
||||||
@ -372,6 +381,9 @@ int opal_hwloc_base_get_topology(void)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
|
||||||
|
"hwloc:base loading topology from file %s",
|
||||||
|
opal_hwloc_base_topo_file);
|
||||||
if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(opal_hwloc_base_topo_file))) {
|
if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(opal_hwloc_base_topo_file))) {
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
@ -16,11 +16,15 @@ int main(int argc, char* argv[])
|
|||||||
int rank, size, rc;
|
int rank, size, rc;
|
||||||
hwloc_cpuset_t cpus;
|
hwloc_cpuset_t cpus;
|
||||||
char *bindings = NULL;
|
char *bindings = NULL;
|
||||||
|
pid_t pid;
|
||||||
|
|
||||||
MPI_Init(&argc, &argv);
|
MPI_Init(&argc, &argv);
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||||
|
pid = getpid();
|
||||||
|
|
||||||
|
printf("[%lu] Rank %d: getting topology\n", (unsigned long)pid, rank);
|
||||||
|
fflush(stdout);
|
||||||
if (OPAL_SUCCESS == opal_hwloc_base_get_topology()) {
|
if (OPAL_SUCCESS == opal_hwloc_base_get_topology()) {
|
||||||
cpus = hwloc_bitmap_alloc();
|
cpus = hwloc_bitmap_alloc();
|
||||||
rc = hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS);
|
rc = hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user