1
1

Merge pull request #2646 from rhc54/topic/squeze

Begin to reduce reliance of application procs on the topology tree it…
Этот коммит содержится в:
rhc54 2016-12-28 10:16:58 -08:00 коммит произвёл GitHub
родитель 75be023f90 3a2d6a5ab6
Коммит acbf1cbaef
7 изменённых файлов: 401 добавлений и 158 удалений

Просмотреть файл

@ -16,7 +16,7 @@
* All rights reserved.
* Copyright (c) 2010-2012 IBM Corporation. All rights reserved.
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 ARM, Inc. All rights reserved.
@ -52,6 +52,7 @@
#include "opal/util/show_help.h"
#include "opal/util/printf.h"
#include "opal/mca/hwloc/base/base.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/mca/shmem/base/base.h"
#include "opal/mca/shmem/shmem.h"
@ -223,23 +224,28 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl,
int my_mem_node, num_mem_nodes, i, rc;
mca_common_sm_mpool_resources_t *res = NULL;
mca_btl_sm_component_t* m = &mca_btl_sm_component;
char *loc, *mynuma;
opal_process_name_t wildcard_rank;
/* Assume we don't have hwloc support and fill in dummy info */
mca_btl_sm_component.mem_node = my_mem_node = 0;
mca_btl_sm_component.num_mem_nodes = num_mem_nodes = 1;
/* If we have hwloc support, then get accurate information */
if (NULL != opal_hwloc_topology) {
i = opal_hwloc_base_get_nbobjs_by_type(opal_hwloc_topology,
HWLOC_OBJ_NODE, 0,
OPAL_HWLOC_AVAILABLE);
/* If we find >0 NUMA nodes, then investigate further */
if (i > 0) {
int numa=0, w;
unsigned n_bound=0;
hwloc_cpuset_t avail;
hwloc_obj_t obj;
/* see if we were given a topology signature */
wildcard_rank.jobid = OPAL_PROC_MY_NAME.jobid;
wildcard_rank.vpid = OPAL_VPID_WILDCARD;
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_TOPOLOGY_SIGNATURE,
&wildcard_rank, &loc, OPAL_STRING);
if (OPAL_SUCCESS == rc) {
/* the number of NUMA nodes is right at the front */
mca_btl_sm_component.num_mem_nodes = num_mem_nodes = strtoul(loc, NULL, 10);
free(loc);
} else {
/* If we have hwloc support, then get accurate information */
if (NULL != opal_hwloc_topology) {
i = opal_hwloc_base_get_nbobjs_by_type(opal_hwloc_topology,
HWLOC_OBJ_NODE, 0,
OPAL_HWLOC_AVAILABLE);
/* JMS This tells me how many numa nodes are *available*,
but it's not how many are being used *by this job*.
@ -248,33 +254,65 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl,
should be improved to be how many NUMA nodes are being
used *in this job*. */
mca_btl_sm_component.num_mem_nodes = num_mem_nodes = i;
}
}
/* see if we were given our location */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCALITY_STRING,
&OPAL_PROC_MY_NAME, &loc, OPAL_STRING);
if (OPAL_SUCCESS == rc) {
if (NULL == loc) {
mca_btl_sm_component.mem_node = my_mem_node = -1;
} else {
/* get our NUMA location */
mynuma = opal_hwloc_base_get_location(loc, HWLOC_OBJ_NODE, 0);
if (NULL == mynuma ||
NULL != strchr(mynuma, ',') ||
NULL != strchr(mynuma, '-')) {
/* we either have no idea what NUMA we are on, or we
* are on multiple NUMA nodes */
mca_btl_sm_component.mem_node = my_mem_node = -1;
} else {
/* we are bound to a single NUMA node */
my_mem_node = strtoul(mynuma, NULL, 10);
mca_btl_sm_component.mem_node = my_mem_node;
}
if (NULL != mynuma) {
free(mynuma);
}
free(loc);
}
} else {
/* If we have hwloc support, then get accurate information */
if (NULL != opal_hwloc_topology && num_mem_nodes > 0 &&
NULL != opal_process_info.cpuset) {
int numa=0, w;
unsigned n_bound=0;
hwloc_cpuset_t avail;
hwloc_obj_t obj;
/* if we are not bound, then there is nothing further to do */
if (NULL != opal_process_info.cpuset) {
/* count the number of NUMA nodes to which we are bound */
for (w=0; w < i; w++) {
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology,
HWLOC_OBJ_NODE, 0, w,
OPAL_HWLOC_AVAILABLE))) {
continue;
}
/* get that NUMA node's available cpus */
avail = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
/* see if we intersect */
if (hwloc_bitmap_intersects(avail, opal_hwloc_my_cpuset)) {
n_bound++;
numa = w;
}
/* count the number of NUMA nodes to which we are bound */
for (w=0; w < i; w++) {
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology,
HWLOC_OBJ_NODE, 0, w,
OPAL_HWLOC_AVAILABLE))) {
continue;
}
/* if we are located on more than one NUMA, or we didn't find
* a NUMA we are on, then not much we can do
*/
if (1 == n_bound) {
mca_btl_sm_component.mem_node = my_mem_node = numa;
} else {
mca_btl_sm_component.mem_node = my_mem_node = -1;
/* get that NUMA node's available cpus */
avail = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
/* see if we intersect */
if (hwloc_bitmap_intersects(avail, opal_hwloc_my_cpuset)) {
n_bound++;
numa = w;
}
}
/* if we are located on more than one NUMA, or we didn't find
* a NUMA we are on, then not much we can do
*/
if (1 == n_bound) {
mca_btl_sm_component.mem_node = my_mem_node = numa;
} else {
mca_btl_sm_component.mem_node = my_mem_node = -1;
}
}
}

Просмотреть файл

@ -1,6 +1,6 @@
/*
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -276,6 +276,16 @@ OPAL_DECLSPEC hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo,
OPAL_DECLSPEC char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo);
/* get a string describing the locality of a given process */
OPAL_DECLSPEC char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo, char *bitmap);
/* extract a location from the locality string */
OPAL_DECLSPEC char* opal_hwloc_base_get_location(char *locality,
hwloc_obj_type_t type,
unsigned index);
OPAL_DECLSPEC opal_hwloc_locality_t opal_hwloc_compute_relative_locality(char *loc1, char *loc2);
END_C_DECLS
#endif /* OPAL_HWLOC_BASE_H */

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2015 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -1502,9 +1502,9 @@ static char *hwloc_getline(FILE *fp)
ret = fgets(input, OPAL_HWLOC_MAX_ELOG_LINE, fp);
if (NULL != ret) {
input[strlen(input)-1] = '\0'; /* remove newline */
buff = strdup(input);
return buff;
input[strlen(input)-1] = '\0'; /* remove newline */
buff = strdup(input);
return buff;
}
return NULL;
@ -2128,3 +2128,249 @@ char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo)
}
return sig;
}
char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo,
char *bitmap)
{
hwloc_obj_t obj;
char *locality=NULL, *tmp, *t2;
unsigned depth, d, width, w;
hwloc_cpuset_t cpuset, avail, result;
hwloc_obj_type_t type;
/* if this proc is not bound, then there is no locality. We
* know it isn't bound if the cpuset is NULL, or if it is
* all 1's */
if (NULL == bitmap) {
return NULL;
}
cpuset = hwloc_bitmap_alloc();
hwloc_bitmap_list_sscanf(cpuset, bitmap);
if (hwloc_bitmap_isfull(cpuset)) {
hwloc_bitmap_free(cpuset);
return NULL;
}
/* we are going to use a bitmap to save the results so
* that we can use a hwloc utility to print them */
result = hwloc_bitmap_alloc();
/* get the max depth of the topology */
depth = hwloc_topology_get_depth(topo);
/* start at the first depth below the top machine level */
for (d=1; d < depth; d++) {
/* get the object type at this depth */
type = hwloc_get_depth_type(topo, d);
/* if it isn't one of interest, then ignore it */
if (HWLOC_OBJ_NODE != type &&
HWLOC_OBJ_SOCKET != type &&
HWLOC_OBJ_CACHE != type &&
HWLOC_OBJ_CORE != type &&
HWLOC_OBJ_PU != type) {
continue;
}
/* get the width of the topology at this depth */
width = hwloc_get_nbobjs_by_depth(topo, d);
/* scan all objects at this depth to see if
* the location overlaps with them
*/
for (w=0; w < width; w++) {
/* get the object at this depth/index */
obj = hwloc_get_obj_by_depth(topo, d, w);
/* get the available cpuset for this obj */
avail = opal_hwloc_base_get_available_cpus(topo, obj);
/* see if the location intersects with it */
if (hwloc_bitmap_intersects(avail, cpuset)) {
hwloc_bitmap_set(result, w);
}
}
/* it should be impossible, but allow for the possibility
* that we came up empty at this depth */
if (!hwloc_bitmap_iszero(result)) {
hwloc_bitmap_list_asprintf(&tmp, result);
switch(obj->type) {
case HWLOC_OBJ_NODE:
asprintf(&t2, "%sNM%s:", (NULL == locality) ? "" : locality, tmp);
if (NULL != locality) {
free(locality);
}
locality = t2;
break;
case HWLOC_OBJ_SOCKET:
asprintf(&t2, "%sSK%s:", (NULL == locality) ? "" : locality, tmp);
if (NULL != locality) {
free(locality);
}
locality = t2;
break;
case HWLOC_OBJ_CACHE:
if (3 == obj->attr->cache.depth) {
asprintf(&t2, "%sL3%s:", (NULL == locality) ? "" : locality, tmp);
if (NULL != locality) {
free(locality);
}
locality = t2;
break;
} else if (2 == obj->attr->cache.depth) {
asprintf(&t2, "%sL2%s:", (NULL == locality) ? "" : locality, tmp);
if (NULL != locality) {
free(locality);
}
locality = t2;
break;
} else {
asprintf(&t2, "%sL1%s:", (NULL == locality) ? "" : locality, tmp);
if (NULL != locality) {
free(locality);
}
locality = t2;
break;
}
break;
case HWLOC_OBJ_CORE:
asprintf(&t2, "%sCR%s:", (NULL == locality) ? "" : locality, tmp);
if (NULL != locality) {
free(locality);
}
locality = t2;
break;
case HWLOC_OBJ_PU:
asprintf(&t2, "%sHT%s:", (NULL == locality) ? "" : locality, tmp);
if (NULL != locality) {
free(locality);
}
locality = t2;
break;
default:
/* just ignore it */
break;
}
free(tmp);
}
hwloc_bitmap_zero(result);
}
hwloc_bitmap_free(result);
hwloc_bitmap_free(cpuset);
/* remove the trailing colon */
if (NULL != locality) {
locality[strlen(locality)-1] = '\0';
}
return locality;
}
char* opal_hwloc_base_get_location(char *locality,
hwloc_obj_type_t type,
unsigned index)
{
char **loc;
char *srch, *ans = NULL;
size_t n;
if (NULL == locality) {
return NULL;
}
switch(type) {
case HWLOC_OBJ_NODE:
srch = "NM";
break;
case HWLOC_OBJ_SOCKET:
srch = "SK";
break;
case HWLOC_OBJ_CACHE:
if (3 == index) {
srch = "L3";
} else if (2 == index) {
srch = "L2";
} else {
srch = "L0";
}
break;
case HWLOC_OBJ_CORE:
srch = "CR";
break;
case HWLOC_OBJ_PU:
srch = "HT";
break;
default:
return NULL;
}
loc = opal_argv_split(locality, ':');
for (n=0; NULL != loc[n]; n++) {
if (0 == strncmp(loc[n], srch, 2)) {
ans = strdup(&loc[n][2]);
break;
}
}
opal_argv_free(loc);
return ans;
}
opal_hwloc_locality_t opal_hwloc_compute_relative_locality(char *loc1, char *loc2)
{
opal_hwloc_locality_t locality;
char **set1, **set2;
hwloc_bitmap_t bit1, bit2;
size_t n1, n2;
/* start with what we know - they share a node on a cluster
* NOTE: we may alter that latter part as hwloc's ability to
* sense multi-cu, multi-cluster systems grows
*/
locality = OPAL_PROC_ON_NODE | OPAL_PROC_ON_HOST | OPAL_PROC_ON_CU | OPAL_PROC_ON_CLUSTER;
/* if either location is NULL, then that isn't bound */
if (NULL == loc1 || NULL == loc2) {
return locality;
}
set1 = opal_argv_split(loc1, ':');
set2 = opal_argv_split(loc2, ':');
bit1 = hwloc_bitmap_alloc();
bit2 = hwloc_bitmap_alloc();
/* check each matching type */
for (n1=0; NULL != set1[n1]; n1++) {
/* convert the location into bitmap */
hwloc_bitmap_list_sscanf(bit1, &set1[n1][2]);
/* find the matching type in set2 */
for (n2=0; NULL != set2[n2]; n2++) {
if (0 == strncmp(set1[n1], set2[n2], 2)) {
/* convert the location into bitmap */
hwloc_bitmap_list_sscanf(bit2, &set2[n2][2]);
/* see if they intersect */
if (hwloc_bitmap_intersects(bit1, bit2)) {
/* set the corresponding locality bit */
if (0 == strncmp(set1[n1], "NM", 2)) {
locality |= OPAL_PROC_ON_NUMA;
} else if (0 == strncmp(set1[n1], "SK", 2)) {
locality |= OPAL_PROC_ON_SOCKET;
} else if (0 == strncmp(set1[n1], "L3", 2)) {
locality |= OPAL_PROC_ON_L3CACHE;
} else if (0 == strncmp(set1[n1], "L2", 2)) {
locality |= OPAL_PROC_ON_L2CACHE;
} else if (0 == strncmp(set1[n1], "L1", 2)) {
locality |= OPAL_PROC_ON_L1CACHE;
} else if (0 == strncmp(set1[n1], "CR", 2)) {
locality |= OPAL_PROC_ON_CORE;
} else if (0 == strncmp(set1[n1], "HT", 2)) {
locality |= OPAL_PROC_ON_HWTHREAD;
} else {
/* should never happen */
opal_output(0, "UNRECOGNIZED LOCALITY %s", set1[n1]);
}
}
break;
}
}
}
opal_argv_free(set1);
opal_argv_free(set2);
hwloc_bitmap_free(bit1);
hwloc_bitmap_free(bit2);
return locality;
}

Просмотреть файл

@ -104,6 +104,8 @@ BEGIN_C_DECLS
/**** no PMIx equivalent ****/
#define OPAL_PMIX_LOCALITY "pmix.loc" // (uint16_t) relative locality of two procs
#define OPAL_PMIX_TOPOLOGY_SIGNATURE "pmix.toposig" // (char*) topology signature string
#define OPAL_PMIX_LOCALITY_STRING "pmix.locstr" // (char*) string describing a proc's location
#define OPAL_PMIX_NODE_LIST "pmix.nlist" // (char*) comma-delimited list of nodes running procs for the specified nspace
#define OPAL_PMIX_ALLOCATED_NODELIST "pmix.alist" // (char*) comma-delimited list of all nodes in this allocation regardless of

Просмотреть файл

@ -94,7 +94,7 @@ static int rte_init(void)
char *val;
int u32, *u32ptr;
uint16_t u16, *u16ptr;
char **peers=NULL, *mycpuset, **cpusets=NULL;
char **peers=NULL, *mycpuset;
opal_process_name_t wildcard_rank, pname;
bool bool_val, *bool_ptr = &bool_val, tdir_mca_override = false;
size_t i;
@ -248,7 +248,7 @@ static int rte_init(void)
/* retrieve temp directories info */
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TMPDIR, &wildcard_rank, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
/* We want to provide user with ability
/* We want to provide user with ability
* to override RM settings at his own risk
*/
if( NULL == orte_process_info.top_session_dir ){
@ -264,7 +264,7 @@ static int rte_init(void)
if( !tdir_mca_override ){
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_NSDIR, &wildcard_rank, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
/* We want to provide user with ability
/* We want to provide user with ability
* to override RM settings at his own risk
*/
if( NULL == orte_process_info.job_session_dir ){
@ -281,7 +281,7 @@ static int rte_init(void)
if( !tdir_mca_override ){
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_PROCDIR, &wildcard_rank, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
/* We want to provide user with ability
/* We want to provide user with ability
* to override RM settings at his own risk
*/
if( NULL == orte_process_info.proc_session_dir ){
@ -385,65 +385,64 @@ static int rte_init(void)
if (OPAL_SUCCESS == ret && NULL != val) {
peers = opal_argv_split(val, ',');
free(val);
/* and their cpusets, if available */
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_CPUSETS,
&wildcard_rank, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
cpusets = opal_argv_split(val, ':');
free(val);
} else {
cpusets = NULL;
}
} else {
peers = NULL;
cpusets = NULL;
}
} else {
peers = NULL;
cpusets = NULL;
}
/* set the locality */
if (NULL != peers) {
/* indentify our cpuset */
if (NULL != cpusets) {
mycpuset = cpusets[orte_process_info.my_local_rank];
/* identify our location */
val = NULL;
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY_STRING,
ORTE_PROC_MY_NAME, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
mycpuset = val;
} else {
mycpuset = NULL;
}
pname.jobid = ORTE_PROC_MY_NAME->jobid;
for (i=0; NULL != peers[i]; i++) {
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCALITY);
kv->type = OPAL_UINT16;
pname.vpid = strtoul(peers[i], NULL, 10);
if (pname.vpid == ORTE_PROC_MY_NAME->vpid) {
/* we are fully local to ourselves */
u16 = OPAL_PROC_ALL_LOCAL;
} else if (NULL == mycpuset || NULL == cpusets[i] ||
0 == strcmp(cpusets[i], "UNBOUND")) {
/* all we can say is that it shares our node */
u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
} else {
/* we have it, so compute the locality */
u16 = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, mycpuset, cpusets[i]);
val = NULL;
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY_STRING,
&pname, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
u16 = opal_hwloc_compute_relative_locality(mycpuset, val);
} else {
/* all we can say is that it shares our node */
u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
}
}
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCALITY);
kv->type = OPAL_UINT16;
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
"%s ess:pmi:locality: proc %s locality %x",
"%s ess:pmi:locality: proc %s locality %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&pname), u16));
ORTE_NAME_PRINT(&pname), opal_hwloc_base_print_locality(u16)));
kv->data.uint16 = u16;
ret = opal_pmix.store_local(&pname, kv);
if (OPAL_SUCCESS != ret) {
error = "local store of locality";
opal_argv_free(peers);
opal_argv_free(cpusets);
if (NULL != mycpuset) {
free(mycpuset);
}
goto error;
}
OBJ_RELEASE(kv);
}
opal_argv_free(peers);
opal_argv_free(cpusets);
if (NULL != mycpuset) {
free(mycpuset);
}
}
/* now that we have all required info, complete the setup */

0
orte/mca/rmaps/base/rmaps_base_map_job.c Исполняемый файл → Обычный файл
Просмотреть файл

Просмотреть файл

@ -38,6 +38,7 @@
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/util/error.h"
#include "opal/mca/hwloc/base/base.h"
#include "opal/mca/pmix/pmix.h"
#include "orte/util/name_fns.h"
@ -59,7 +60,7 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
opal_value_t *kv;
orte_node_t *node, *mynode;
opal_vpid_t vpid;
char **list, **procs, **micro, *tmp, *regex, *cpulist, *peerlist;
char **list, **procs, **micro, *tmp, *regex;
orte_job_t *dmns;
orte_job_map_t *map;
orte_app_context_t *app;
@ -239,13 +240,22 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
kv->data.uint32 = jdata->total_slots_alloc;
opal_list_append(info, &kv->super);
/* topology signature */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_TOPOLOGY_SIGNATURE);
kv->type = OPAL_STRING;
kv->data.string = strdup(orte_topo_signature);
opal_list_append(info, &kv->super);
/* register any local clients */
vpid = ORTE_VPID_MAX;
micro = NULL;
for (i=0; i < mynode->procs->size; i++) {
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(mynode->procs, i))) {
continue;
}
if (pptr->name.jobid == jdata->jobid) {
opal_argv_append_nosize(&micro, ORTE_VPID_PRINT(pptr->name.vpid));
if (pptr->name.vpid < vpid) {
vpid = pptr->name.vpid;
}
@ -256,6 +266,16 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
}
}
}
if (NULL != micro) {
/* pass the local peers */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCAL_PEERS);
kv->type = OPAL_STRING;
kv->data.string = opal_argv_join(micro, ',');
opal_argv_free(micro);
opal_list_append(info, &kv->super);
}
/* pass the local ldr */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCALLDR);
@ -274,71 +294,7 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, n))) {
continue;
}
/* construct the list of local peers, while adding
* each proc's locality info */
list = NULL;
procs = NULL;
cpulist = NULL;
peerlist = NULL;
vpid = ORTE_VPID_MAX;
for (i=0; i < node->procs->size; i++) {
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
continue;
}
if (pptr->name.jobid == jdata->jobid) {
opal_argv_append_nosize(&list, ORTE_VPID_PRINT(pptr->name.vpid));
if (pptr->name.vpid < vpid) {
vpid = pptr->name.vpid;
}
/* note that we have to pass the cpuset for each local
* peer so locality can be computed */
tmp = NULL;
if (orte_get_attribute(&pptr->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING)) {
if (NULL != tmp) {
opal_argv_append_nosize(&procs, tmp);
free(tmp);
} else {
opal_argv_append_nosize(&procs, "UNBOUND");
}
} else {
opal_argv_append_nosize(&procs, "UNBOUND");
}
}
}
/* construct the list of peers for transmission */
if (NULL != list) {
peerlist = opal_argv_join(list, ',');
opal_argv_free(list);
list = NULL;
}
/* construct the list of cpusets for transmission */
if (NULL != procs) {
cpulist = opal_argv_join(procs, ':');
opal_argv_free(procs);
procs = NULL;
}
/* if this is me, then pass the peers and cpusets to myself
* in order to maintain backward compatibility for the non-pmix
* components in OPAL/pmix */
if (node == mynode) {
/* pass the list of peers */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCAL_PEERS);
kv->type = OPAL_STRING;
kv->data.string = strdup(peerlist);
opal_list_append(info, &kv->super);
/* pass the list of cpusets */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCAL_CPUSETS);
kv->type = OPAL_STRING;
kv->data.string = strdup(cpulist);
opal_list_append(info, &kv->super);
}
/* now cycle across each proc on this node, passing all data that
/* cycle across each proc on this node, passing all data that
* varies by proc */
for (i=0; i < node->procs->size; i++) {
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
@ -363,19 +319,18 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
kv->data.name.vpid = pptr->name.vpid;
opal_list_append(pmap, &kv->super);
/* pass the list of peers */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCAL_PEERS);
kv->type = OPAL_STRING;
kv->data.string = strdup(peerlist);
opal_list_append(pmap, &kv->super);
/* pass the list of cpusets */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCAL_CPUSETS);
kv->type = OPAL_STRING;
kv->data.string = strdup(cpulist);
opal_list_append(pmap, &kv->super);
/* location, for local procs */
if (node == mynode) {
if (orte_get_attribute(&pptr->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING)) {
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCALITY_STRING);
kv->type = OPAL_STRING;
kv->data.string = opal_hwloc_base_get_locality_string(opal_hwloc_topology, tmp);
opal_output(0, "PROC %s LOCALITY %s", ORTE_NAME_PRINT(&pptr->name), kv->data.string);
opal_list_append(pmap, &kv->super);
free(tmp);
}
}
/* appnum */
kv = OBJ_NEW(opal_value_t);
@ -441,13 +396,6 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
kv->data.uint32 = pptr->node->index;
opal_list_append(pmap, &kv->super);
}
/* cleanup */
if (NULL != cpulist) {
free(cpulist);
}
if (NULL != peerlist) {
free(peerlist);
}
}
/* mark the job as registered */