diff --git a/.gitignore b/.gitignore index ff8e303f9d..baf4cafb2a 100644 --- a/.gitignore +++ b/.gitignore @@ -396,6 +396,7 @@ opal/mca/pmix/pmix*/pmix/examples/jctrl opal/mca/pmix/pmix*/pmix/examples/pub opal/mca/pmix/pmix*/pmix/examples/server opal/mca/pmix/pmix*/pmix/examples/tool +opal/mca/pmix/pmix*/pmix/maint/pmix.pc opal/mca/pmix/ext3x/ext3x.c opal/mca/pmix/ext3x/ext3x.h diff --git a/opal/mca/common/ofi/common_ofi.c b/opal/mca/common/ofi/common_ofi.c index 9b4631856c..593d6e5135 100644 --- a/opal/mca/common/ofi/common_ofi.c +++ b/opal/mca/common/ofi/common_ofi.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2020 Intel, Inc. All rights reserved. * Copyright (c) 2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2020 Triad National Security, LLC. All rights @@ -308,6 +308,7 @@ static uint32_t get_package_rank(int32_t num_local_peers, uint16_t my_local_rank char **peers = NULL; char *local_peers = NULL; char *locality_string = NULL; + char *mylocality = NULL; pname.jobid = OPAL_PROC_MY_NAME.jobid; pname.vpid = OPAL_VPID_WILDCARD; @@ -333,6 +334,20 @@ static uint32_t get_package_rank(int32_t num_local_peers, uint16_t my_local_rank peers = opal_argv_split(local_peers, ','); free(local_peers); + // Get my locality + OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCALITY_STRING, + &OPAL_PROC_MY_NAME, &mylocality, OPAL_STRING); + if (OPAL_SUCCESS != rc || NULL == mylocality) { + // can we fall back to cpuset? + if (NULL != cpuset && NULL != opal_hwloc_topology) { + mylocality = opal_hwloc_base_get_locality_string(opal_hwloc_topology, cpuset); + } else { + // We can't find package_rank, fall back to procid + opal_show_help("help-common-ofi.txt", "package_rank failed", true); + return pid; + } + } + for (i = 0; NULL != peers[i]; i++) { pname.vpid = strtoul(peers[i], NULL, 10); locality_string = NULL; @@ -346,7 +361,7 @@ static uint32_t get_package_rank(int32_t num_local_peers, uint16_t my_local_rank } // compute relative locality - relative_locality = opal_hwloc_compute_relative_locality(cpuset, locality_string); + relative_locality = opal_hwloc_compute_relative_locality(mylocality, locality_string); free(locality_string); if (relative_locality & OPAL_PROC_ON_SOCKET) { @@ -354,6 +369,7 @@ static uint32_t get_package_rank(int32_t num_local_peers, uint16_t my_local_rank current_package_rank++; } } + free(mylocality); return (uint32_t)package_ranks[my_local_rank]; } diff --git a/orte/mca/ess/base/ess_base_fns.c b/orte/mca/ess/base/ess_base_fns.c index 9b57519e80..fc97a4b499 100644 --- a/orte/mca/ess/base/ess_base_fns.c +++ b/orte/mca/ess/base/ess_base_fns.c @@ -12,7 +12,7 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -54,6 +54,7 @@ int orte_ess_base_proc_binding(void) int ret; char *error=NULL; hwloc_cpuset_t mycpus; + opal_value_t val; /* Determine if we were pre-bound or not - this also indicates * that we were launched via mpirun, bound or not */ @@ -66,23 +67,39 @@ int orte_ess_base_proc_binding(void) goto error; } } - if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { - /* print out a shorthand notation to avoid pulling in the entire topology tree */ - map = NULL; - OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY_STRING, - ORTE_PROC_MY_NAME, &map, OPAL_STRING); - if (OPAL_SUCCESS == ret && NULL != map) { + /* get our cpuset */ + if (NULL != orte_process_info.cpuset) { + free(orte_process_info.cpuset); + orte_process_info.cpuset = NULL; + } + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_CPUSET, + ORTE_PROC_MY_NAME, &orte_process_info.cpuset, OPAL_STRING); + /* try to get our locality as well */ + map = NULL; + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY_STRING, + ORTE_PROC_MY_NAME, &map, OPAL_STRING); + if (OPAL_SUCCESS == ret && NULL != map) { + /* we were - no need to pull in the topology */ + if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { opal_output(0, "MCW rank %s bound to %s", ORTE_VPID_PRINT(ORTE_PROC_MY_NAME->vpid), map); - free(map); - } else { - opal_output(0, "MCW rank %s not bound", ORTE_VPID_PRINT(ORTE_PROC_MY_NAME->vpid)); } + free(map); + } else { + opal_output(0, "MCW rank %s not bound", ORTE_VPID_PRINT(ORTE_PROC_MY_NAME->vpid)); } return ORTE_SUCCESS; } else if (NULL != getenv(OPAL_MCA_PREFIX"orte_externally_bound")) { orte_proc_is_bound = true; - /* see if we were launched by a PMIx-enabled system */ + /* get our cpuset, if available */ + if (NULL != orte_process_info.cpuset) { + free(orte_process_info.cpuset); + orte_process_info.cpuset = NULL; + } + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_CPUSET, + ORTE_PROC_MY_NAME, &orte_process_info.cpuset, OPAL_STRING); + + /* see if we also have our locality - this is the one we require */ map = NULL; OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY_STRING, ORTE_PROC_MY_NAME, &map, OPAL_STRING); @@ -323,6 +340,17 @@ int orte_ess_base_proc_binding(void) if (NULL != orte_process_info.cpuset) { OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_CPUSET, orte_process_info.cpuset, OPAL_STRING); + /* save our locality string so we can retrieve it elsewhere */ + OBJ_CONSTRUCT(&val, opal_value_t); + val.key = OPAL_PMIX_LOCALITY_STRING; + val.type = OPAL_STRING; + val.data.string = opal_hwloc_base_get_locality_string(opal_hwloc_topology, orte_process_info.cpuset); + if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &val))) { + ORTE_ERROR_LOG(ret); + } + val.key = NULL; + val.data.string = NULL; + OBJ_DESTRUCT(&val); } return ORTE_SUCCESS; diff --git a/orte/orted/pmix/pmix_server_register_fns.c b/orte/orted/pmix/pmix_server_register_fns.c index 0a0a54d764..f61e1ff4f5 100644 --- a/orte/orted/pmix/pmix_server_register_fns.c +++ b/orte/orted/pmix/pmix_server_register_fns.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science @@ -386,7 +386,12 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata, bool force) kv->type = OPAL_STRING; kv->data.string = opal_hwloc_base_get_locality_string(opal_hwloc_topology, tmp); opal_list_append(pmap, &kv->super); - free(tmp); + /* pass the cpuset itself as well */ + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_CPUSET); + kv->type = OPAL_STRING; + kv->data.string = tmp; + opal_list_append(pmap, &kv->super); } else { /* the proc is not bound */ kv = OBJ_NEW(opal_value_t);