diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index 5f0ad67023..3e2f9d5a24 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -249,6 +249,12 @@ static int rte_init(void) error = "could not get PMI clique size"; goto error; } + /* store that info - remember, we want the number of peers that + * share the node WITH ME, so we have to subtract ourselves from + * that number + */ + orte_process_info.num_local_peers = i - 1; + /* now get the specific ranks */ ranks = (int*)malloc(i * sizeof(int)); if (PMI_SUCCESS != (ret = PMI_Get_clique_ranks(ranks, i))) { ORTE_PMI_ERROR(ret, "PMI_Get_clique_ranks"); diff --git a/orte/test/mpi/hello.c b/orte/test/mpi/hello.c index 48e3c8ba3c..e8dbb2ac2c 100644 --- a/orte/test/mpi/hello.c +++ b/orte/test/mpi/hello.c @@ -9,6 +9,7 @@ #include "opal/mca/hwloc/hwloc.h" #include "mpi.h" +#include "orte/util/proc_info.h" int main(int argc, char* argv[]) { @@ -24,7 +25,8 @@ int main(int argc, char* argv[]) rc = hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS); hwloc_bitmap_list_asprintf(&bindings, cpus); - printf("Hello, World, I am %d of %d: get_cpubind: %d bitmap %s\n", rank, size, rc, + printf("Hello, World, I am %d of %d [%d local peers]: get_cpubind: %d bitmap %s\n", + rank, size, orte_process_info.num_local_peers, rc, (NULL == bindings) ? "NULL" : bindings); MPI_Finalize(); diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index f7b7776025..0d853ea4c2 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -950,6 +950,10 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo) "%s orte:util:decode:pidmap proc %s shares node", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc))); + /* we share a node, so add them to the count of peers + * sharing the node with me + */ + orte_process_info.num_local_peers++; #if OPAL_HAVE_HWLOC /* retrieve the bind level for the other proc's job */ lvptr = &pbind; diff --git a/orte/util/proc_info.c b/orte/util/proc_info.c index ce024a950f..c95662b54e 100644 --- a/orte/util/proc_info.c +++ b/orte/util/proc_info.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, LLC. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -59,6 +61,7 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = { /* .num_restarts = */ 0, /* .my_node_rank = */ ORTE_NODE_RANK_INVALID, /* .my_local_rank = */ ORTE_LOCAL_RANK_INVALID, + /* .num_local_peers = */ 0, /* .tmpdir_base = */ NULL, /* .top_session_dir = */ NULL, /* .job_session_dir = */ NULL, diff --git a/orte/util/proc_info.h b/orte/util/proc_info.h index 00e2f10847..3e34d5569e 100644 --- a/orte/util/proc_info.h +++ b/orte/util/proc_info.h @@ -104,6 +104,7 @@ struct orte_proc_info_t { int32_t num_restarts; /**< number of times this proc has restarted */ orte_node_rank_t my_node_rank; /**< node rank */ orte_local_rank_t my_local_rank; /**< local rank */ + int32_t num_local_peers; /**< number of procs from my job that share my node with me */ /* The session directory has the form * ///, where the prefix * can either be provided by the user via the