1
1

Correct computation of relative locality

Ensure we always pass the cpuset as well as the locality string for each
proc. Correct the mtl/ofi component's computation of relative locality
as the function being called expects to be given the locality string of
each proc, not the cpuset. If the locality string of the current proc
isn't available, then use the cpuset if available and compute the
locality before trying to compute relative localities of our peers.

Signed-off-by: Ralph Castain <rhc@pmix.org>
This commit is contained in:
Ralph Castain 2020-11-10 00:12:22 -08:00 committed by Nikola Dancejic
parent 3f863aab8a
commit ec3589389a
4 changed files with 65 additions and 15 deletions

1
.gitignore vendored
View File

@ -396,6 +396,7 @@ opal/mca/pmix/pmix*/pmix/examples/jctrl
opal/mca/pmix/pmix*/pmix/examples/pub
opal/mca/pmix/pmix*/pmix/examples/server
opal/mca/pmix/pmix*/pmix/examples/tool
opal/mca/pmix/pmix*/pmix/maint/pmix.pc
opal/mca/pmix/ext3x/ext3x.c
opal/mca/pmix/ext3x/ext3x.h

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2020 Triad National Security, LLC. All rights
@ -308,6 +308,7 @@ static uint32_t get_package_rank(int32_t num_local_peers, uint16_t my_local_rank
char **peers = NULL;
char *local_peers = NULL;
char *locality_string = NULL;
char *mylocality = NULL;
pname.jobid = OPAL_PROC_MY_NAME.jobid;
pname.vpid = OPAL_VPID_WILDCARD;
@ -333,6 +334,20 @@ static uint32_t get_package_rank(int32_t num_local_peers, uint16_t my_local_rank
peers = opal_argv_split(local_peers, ',');
free(local_peers);
// Get my locality
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCALITY_STRING,
&OPAL_PROC_MY_NAME, &mylocality, OPAL_STRING);
if (OPAL_SUCCESS != rc || NULL == mylocality) {
// can we fall back to cpuset?
if (NULL != cpuset && NULL != opal_hwloc_topology) {
mylocality = opal_hwloc_base_get_locality_string(opal_hwloc_topology, cpuset);
} else {
// We can't find package_rank, fall back to procid
opal_show_help("help-common-ofi.txt", "package_rank failed", true);
return pid;
}
}
for (i = 0; NULL != peers[i]; i++) {
pname.vpid = strtoul(peers[i], NULL, 10);
locality_string = NULL;
@ -346,7 +361,7 @@ static uint32_t get_package_rank(int32_t num_local_peers, uint16_t my_local_rank
}
// compute relative locality
relative_locality = opal_hwloc_compute_relative_locality(cpuset, locality_string);
relative_locality = opal_hwloc_compute_relative_locality(mylocality, locality_string);
free(locality_string);
if (relative_locality & OPAL_PROC_ON_SOCKET) {
@ -354,6 +369,7 @@ static uint32_t get_package_rank(int32_t num_local_peers, uint16_t my_local_rank
current_package_rank++;
}
}
free(mylocality);
return (uint32_t)package_ranks[my_local_rank];
}

View File

@ -12,7 +12,7 @@
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -54,6 +54,7 @@ int orte_ess_base_proc_binding(void)
int ret;
char *error=NULL;
hwloc_cpuset_t mycpus;
opal_value_t val;
/* Determine if we were pre-bound or not - this also indicates
* that we were launched via mpirun, bound or not */
@ -66,23 +67,39 @@ int orte_ess_base_proc_binding(void)
goto error;
}
}
if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) {
/* print out a shorthand notation to avoid pulling in the entire topology tree */
map = NULL;
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY_STRING,
ORTE_PROC_MY_NAME, &map, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != map) {
/* get our cpuset */
if (NULL != orte_process_info.cpuset) {
free(orte_process_info.cpuset);
orte_process_info.cpuset = NULL;
}
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_CPUSET,
ORTE_PROC_MY_NAME, &orte_process_info.cpuset, OPAL_STRING);
/* try to get our locality as well */
map = NULL;
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY_STRING,
ORTE_PROC_MY_NAME, &map, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != map) {
/* we were - no need to pull in the topology */
if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) {
opal_output(0, "MCW rank %s bound to %s",
ORTE_VPID_PRINT(ORTE_PROC_MY_NAME->vpid), map);
free(map);
} else {
opal_output(0, "MCW rank %s not bound", ORTE_VPID_PRINT(ORTE_PROC_MY_NAME->vpid));
}
free(map);
} else {
opal_output(0, "MCW rank %s not bound", ORTE_VPID_PRINT(ORTE_PROC_MY_NAME->vpid));
}
return ORTE_SUCCESS;
} else if (NULL != getenv(OPAL_MCA_PREFIX"orte_externally_bound")) {
orte_proc_is_bound = true;
/* see if we were launched by a PMIx-enabled system */
/* get our cpuset, if available */
if (NULL != orte_process_info.cpuset) {
free(orte_process_info.cpuset);
orte_process_info.cpuset = NULL;
}
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_CPUSET,
ORTE_PROC_MY_NAME, &orte_process_info.cpuset, OPAL_STRING);
/* see if we also have our locality - this is the one we require */
map = NULL;
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY_STRING,
ORTE_PROC_MY_NAME, &map, OPAL_STRING);
@ -323,6 +340,17 @@ int orte_ess_base_proc_binding(void)
if (NULL != orte_process_info.cpuset) {
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_CPUSET,
orte_process_info.cpuset, OPAL_STRING);
/* save our locality string so we can retrieve it elsewhere */
OBJ_CONSTRUCT(&val, opal_value_t);
val.key = OPAL_PMIX_LOCALITY_STRING;
val.type = OPAL_STRING;
val.data.string = opal_hwloc_base_get_locality_string(opal_hwloc_topology, orte_process_info.cpuset);
if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &val))) {
ORTE_ERROR_LOG(ret);
}
val.key = NULL;
val.data.string = NULL;
OBJ_DESTRUCT(&val);
}
return ORTE_SUCCESS;

View File

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
@ -386,7 +386,12 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata, bool force)
kv->type = OPAL_STRING;
kv->data.string = opal_hwloc_base_get_locality_string(opal_hwloc_topology, tmp);
opal_list_append(pmap, &kv->super);
free(tmp);
/* pass the cpuset itself as well */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_CPUSET);
kv->type = OPAL_STRING;
kv->data.string = tmp;
opal_list_append(pmap, &kv->super);
} else {
/* the proc is not bound */
kv = OBJ_NEW(opal_value_t);