From 24b91839aa8c7b4d7f5f0c4970eb1fcdd69c945a Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 26 Mar 2013 19:14:23 +0000 Subject: [PATCH] Ensure the process knows it local cpuset early enough to perform the locality computation This commit was SVN r28221. --- orte/mca/ess/base/ess_base_fns.c | 35 ++++++++++++++++++++++---------- orte/util/nidmap.c | 10 +++++++-- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/orte/mca/ess/base/ess_base_fns.c b/orte/mca/ess/base/ess_base_fns.c index e86788b1a1..981249e23c 100644 --- a/orte/mca/ess/base/ess_base_fns.c +++ b/orte/mca/ess/base/ess_base_fns.c @@ -51,6 +51,7 @@ int orte_ess_base_proc_binding(void) char *map; int ret; char *error; + hwloc_cpuset_t mycpus; /* Determine if we were pre-bound or not */ if (NULL != getenv("OMPI_MCA_orte_bound_at_launch")) { @@ -253,25 +254,37 @@ int orte_ess_base_proc_binding(void) * times, so it's more efficient to keep a global copy */ opal_hwloc_base_get_local_cpuset(); - /* report bindings, if requested */ - if (opal_hwloc_report_bindings) { - char tmp1[1024], tmp2[1024]; - hwloc_cpuset_t mycpus; - /* get the cpus we are bound to */ - mycpus = hwloc_bitmap_alloc(); - if (hwloc_get_cpubind(opal_hwloc_topology, - mycpus, - HWLOC_CPUBIND_PROCESS) < 0) { + + /* get the cpus we are bound to */ + mycpus = hwloc_bitmap_alloc(); + if (hwloc_get_cpubind(opal_hwloc_topology, + mycpus, + HWLOC_CPUBIND_PROCESS) < 0) { + if (NULL != orte_process_info.cpuset) { + free(orte_process_info.cpuset); + orte_process_info.cpuset = NULL; + } + if (opal_hwloc_report_bindings) { opal_output(0, "MCW rank %d is not bound", ORTE_PROC_MY_NAME->vpid); - } else { + } + } else { + /* store/update the string representation of our local binding */ + if (NULL != orte_process_info.cpuset) { + free(orte_process_info.cpuset); + orte_process_info.cpuset = NULL; + } + hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, mycpus); + /* report the binding, if requested */ + if (opal_hwloc_report_bindings) { + char tmp1[1024], tmp2[1024]; opal_hwloc_base_cset2str(tmp1, sizeof(tmp1), mycpus); opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), mycpus); opal_output(0, "MCW rank %d bound to %s: %s", ORTE_PROC_MY_NAME->vpid, tmp1, tmp2); } - hwloc_bitmap_free(mycpus); } + hwloc_bitmap_free(mycpus); return ORTE_SUCCESS; diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 50403a21fc..a31b681cc8 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -662,7 +662,6 @@ int orte_util_encode_pidmap(opal_byte_object_t *boptr, bool update) ORTE_ERROR_LOG(rc); goto cleanup_and_return; } - /* cycle thru the job's procs, including only those that have * been updated so we minimize the amount of info being sent */ @@ -843,6 +842,11 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo) /* set mine */ orte_process_info.my_local_rank = local_rank; orte_process_info.my_node_rank = node_rank; +#if OPAL_HAVE_HWLOC + if (NULL != cpu_bitmap) { + orte_process_info.cpuset = strdup(cpu_bitmap); + } +#endif } /* apps don't need the rest of the data in the buffer for this proc, * but we have to unpack it anyway to stay in sync @@ -881,6 +885,9 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo) ORTE_ERROR_LOG(rc); goto cleanup; } + if (NULL != cpu_bitmap) { + free(cpu_bitmap); + } #endif /* we don't need to store the rest of the values * for ourself in the database @@ -1032,7 +1039,6 @@ int orte_util_decode_daemon_pidmap(opal_byte_object_t *bo) orte_local_rank_t local_rank; orte_node_rank_t node_rank; #if OPAL_HAVE_HWLOC - opal_hwloc_level_t bind_level = OPAL_HWLOC_NODE_LEVEL; char *cpu_bitmap; #endif orte_std_cntr_t n;