Merge pull request #2646 from rhc54/topic/squeze
Begin to reduce reliance of application procs on the topology tree it…
Этот коммит содержится в:
Коммит
acbf1cbaef
@ -16,7 +16,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2012 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016 ARM, Inc. All rights reserved.
|
||||
@ -52,6 +52,7 @@
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/printf.h"
|
||||
#include "opal/mca/hwloc/base/base.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
#include "opal/mca/shmem/base/base.h"
|
||||
#include "opal/mca/shmem/shmem.h"
|
||||
|
||||
@ -223,23 +224,28 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl,
|
||||
int my_mem_node, num_mem_nodes, i, rc;
|
||||
mca_common_sm_mpool_resources_t *res = NULL;
|
||||
mca_btl_sm_component_t* m = &mca_btl_sm_component;
|
||||
char *loc, *mynuma;
|
||||
opal_process_name_t wildcard_rank;
|
||||
|
||||
/* Assume we don't have hwloc support and fill in dummy info */
|
||||
mca_btl_sm_component.mem_node = my_mem_node = 0;
|
||||
mca_btl_sm_component.num_mem_nodes = num_mem_nodes = 1;
|
||||
|
||||
/* If we have hwloc support, then get accurate information */
|
||||
if (NULL != opal_hwloc_topology) {
|
||||
i = opal_hwloc_base_get_nbobjs_by_type(opal_hwloc_topology,
|
||||
HWLOC_OBJ_NODE, 0,
|
||||
OPAL_HWLOC_AVAILABLE);
|
||||
|
||||
/* If we find >0 NUMA nodes, then investigate further */
|
||||
if (i > 0) {
|
||||
int numa=0, w;
|
||||
unsigned n_bound=0;
|
||||
hwloc_cpuset_t avail;
|
||||
hwloc_obj_t obj;
|
||||
/* see if we were given a topology signature */
|
||||
wildcard_rank.jobid = OPAL_PROC_MY_NAME.jobid;
|
||||
wildcard_rank.vpid = OPAL_VPID_WILDCARD;
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_TOPOLOGY_SIGNATURE,
|
||||
&wildcard_rank, &loc, OPAL_STRING);
|
||||
if (OPAL_SUCCESS == rc) {
|
||||
/* the number of NUMA nodes is right at the front */
|
||||
mca_btl_sm_component.num_mem_nodes = num_mem_nodes = strtoul(loc, NULL, 10);
|
||||
free(loc);
|
||||
} else {
|
||||
/* If we have hwloc support, then get accurate information */
|
||||
if (NULL != opal_hwloc_topology) {
|
||||
i = opal_hwloc_base_get_nbobjs_by_type(opal_hwloc_topology,
|
||||
HWLOC_OBJ_NODE, 0,
|
||||
OPAL_HWLOC_AVAILABLE);
|
||||
|
||||
/* JMS This tells me how many numa nodes are *available*,
|
||||
but it's not how many are being used *by this job*.
|
||||
@ -248,33 +254,65 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl,
|
||||
should be improved to be how many NUMA nodes are being
|
||||
used *in this job*. */
|
||||
mca_btl_sm_component.num_mem_nodes = num_mem_nodes = i;
|
||||
}
|
||||
}
|
||||
/* see if we were given our location */
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCALITY_STRING,
|
||||
&OPAL_PROC_MY_NAME, &loc, OPAL_STRING);
|
||||
if (OPAL_SUCCESS == rc) {
|
||||
if (NULL == loc) {
|
||||
mca_btl_sm_component.mem_node = my_mem_node = -1;
|
||||
} else {
|
||||
/* get our NUMA location */
|
||||
mynuma = opal_hwloc_base_get_location(loc, HWLOC_OBJ_NODE, 0);
|
||||
if (NULL == mynuma ||
|
||||
NULL != strchr(mynuma, ',') ||
|
||||
NULL != strchr(mynuma, '-')) {
|
||||
/* we either have no idea what NUMA we are on, or we
|
||||
* are on multiple NUMA nodes */
|
||||
mca_btl_sm_component.mem_node = my_mem_node = -1;
|
||||
} else {
|
||||
/* we are bound to a single NUMA node */
|
||||
my_mem_node = strtoul(mynuma, NULL, 10);
|
||||
mca_btl_sm_component.mem_node = my_mem_node;
|
||||
}
|
||||
if (NULL != mynuma) {
|
||||
free(mynuma);
|
||||
}
|
||||
free(loc);
|
||||
}
|
||||
} else {
|
||||
/* If we have hwloc support, then get accurate information */
|
||||
if (NULL != opal_hwloc_topology && num_mem_nodes > 0 &&
|
||||
NULL != opal_process_info.cpuset) {
|
||||
int numa=0, w;
|
||||
unsigned n_bound=0;
|
||||
hwloc_cpuset_t avail;
|
||||
hwloc_obj_t obj;
|
||||
|
||||
/* if we are not bound, then there is nothing further to do */
|
||||
if (NULL != opal_process_info.cpuset) {
|
||||
/* count the number of NUMA nodes to which we are bound */
|
||||
for (w=0; w < i; w++) {
|
||||
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology,
|
||||
HWLOC_OBJ_NODE, 0, w,
|
||||
OPAL_HWLOC_AVAILABLE))) {
|
||||
continue;
|
||||
}
|
||||
/* get that NUMA node's available cpus */
|
||||
avail = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
|
||||
/* see if we intersect */
|
||||
if (hwloc_bitmap_intersects(avail, opal_hwloc_my_cpuset)) {
|
||||
n_bound++;
|
||||
numa = w;
|
||||
}
|
||||
/* count the number of NUMA nodes to which we are bound */
|
||||
for (w=0; w < i; w++) {
|
||||
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology,
|
||||
HWLOC_OBJ_NODE, 0, w,
|
||||
OPAL_HWLOC_AVAILABLE))) {
|
||||
continue;
|
||||
}
|
||||
/* if we are located on more than one NUMA, or we didn't find
|
||||
* a NUMA we are on, then not much we can do
|
||||
*/
|
||||
if (1 == n_bound) {
|
||||
mca_btl_sm_component.mem_node = my_mem_node = numa;
|
||||
} else {
|
||||
mca_btl_sm_component.mem_node = my_mem_node = -1;
|
||||
/* get that NUMA node's available cpus */
|
||||
avail = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
|
||||
/* see if we intersect */
|
||||
if (hwloc_bitmap_intersects(avail, opal_hwloc_my_cpuset)) {
|
||||
n_bound++;
|
||||
numa = w;
|
||||
}
|
||||
}
|
||||
/* if we are located on more than one NUMA, or we didn't find
|
||||
* a NUMA we are on, then not much we can do
|
||||
*/
|
||||
if (1 == n_bound) {
|
||||
mca_btl_sm_component.mem_node = my_mem_node = numa;
|
||||
} else {
|
||||
mca_btl_sm_component.mem_node = my_mem_node = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -276,6 +276,16 @@ OPAL_DECLSPEC hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo,
|
||||
OPAL_DECLSPEC char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo);
|
||||
|
||||
|
||||
/* get a string describing the locality of a given process */
|
||||
OPAL_DECLSPEC char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo, char *bitmap);
|
||||
|
||||
/* extract a location from the locality string */
|
||||
OPAL_DECLSPEC char* opal_hwloc_base_get_location(char *locality,
|
||||
hwloc_obj_type_t type,
|
||||
unsigned index);
|
||||
|
||||
OPAL_DECLSPEC opal_hwloc_locality_t opal_hwloc_compute_relative_locality(char *loc1, char *loc2);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* OPAL_HWLOC_BASE_H */
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -1502,9 +1502,9 @@ static char *hwloc_getline(FILE *fp)
|
||||
|
||||
ret = fgets(input, OPAL_HWLOC_MAX_ELOG_LINE, fp);
|
||||
if (NULL != ret) {
|
||||
input[strlen(input)-1] = '\0'; /* remove newline */
|
||||
buff = strdup(input);
|
||||
return buff;
|
||||
input[strlen(input)-1] = '\0'; /* remove newline */
|
||||
buff = strdup(input);
|
||||
return buff;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
@ -2128,3 +2128,249 @@ char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo)
|
||||
}
|
||||
return sig;
|
||||
}
|
||||
|
||||
char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo,
|
||||
char *bitmap)
|
||||
{
|
||||
hwloc_obj_t obj;
|
||||
char *locality=NULL, *tmp, *t2;
|
||||
unsigned depth, d, width, w;
|
||||
hwloc_cpuset_t cpuset, avail, result;
|
||||
hwloc_obj_type_t type;
|
||||
|
||||
/* if this proc is not bound, then there is no locality. We
|
||||
* know it isn't bound if the cpuset is NULL, or if it is
|
||||
* all 1's */
|
||||
if (NULL == bitmap) {
|
||||
return NULL;
|
||||
}
|
||||
cpuset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_list_sscanf(cpuset, bitmap);
|
||||
if (hwloc_bitmap_isfull(cpuset)) {
|
||||
hwloc_bitmap_free(cpuset);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* we are going to use a bitmap to save the results so
|
||||
* that we can use a hwloc utility to print them */
|
||||
result = hwloc_bitmap_alloc();
|
||||
|
||||
/* get the max depth of the topology */
|
||||
depth = hwloc_topology_get_depth(topo);
|
||||
|
||||
/* start at the first depth below the top machine level */
|
||||
for (d=1; d < depth; d++) {
|
||||
/* get the object type at this depth */
|
||||
type = hwloc_get_depth_type(topo, d);
|
||||
/* if it isn't one of interest, then ignore it */
|
||||
if (HWLOC_OBJ_NODE != type &&
|
||||
HWLOC_OBJ_SOCKET != type &&
|
||||
HWLOC_OBJ_CACHE != type &&
|
||||
HWLOC_OBJ_CORE != type &&
|
||||
HWLOC_OBJ_PU != type) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* get the width of the topology at this depth */
|
||||
width = hwloc_get_nbobjs_by_depth(topo, d);
|
||||
|
||||
/* scan all objects at this depth to see if
|
||||
* the location overlaps with them
|
||||
*/
|
||||
for (w=0; w < width; w++) {
|
||||
/* get the object at this depth/index */
|
||||
obj = hwloc_get_obj_by_depth(topo, d, w);
|
||||
/* get the available cpuset for this obj */
|
||||
avail = opal_hwloc_base_get_available_cpus(topo, obj);
|
||||
/* see if the location intersects with it */
|
||||
if (hwloc_bitmap_intersects(avail, cpuset)) {
|
||||
hwloc_bitmap_set(result, w);
|
||||
}
|
||||
}
|
||||
/* it should be impossible, but allow for the possibility
|
||||
* that we came up empty at this depth */
|
||||
if (!hwloc_bitmap_iszero(result)) {
|
||||
hwloc_bitmap_list_asprintf(&tmp, result);
|
||||
switch(obj->type) {
|
||||
case HWLOC_OBJ_NODE:
|
||||
asprintf(&t2, "%sNM%s:", (NULL == locality) ? "" : locality, tmp);
|
||||
if (NULL != locality) {
|
||||
free(locality);
|
||||
}
|
||||
locality = t2;
|
||||
break;
|
||||
case HWLOC_OBJ_SOCKET:
|
||||
asprintf(&t2, "%sSK%s:", (NULL == locality) ? "" : locality, tmp);
|
||||
if (NULL != locality) {
|
||||
free(locality);
|
||||
}
|
||||
locality = t2;
|
||||
break;
|
||||
case HWLOC_OBJ_CACHE:
|
||||
if (3 == obj->attr->cache.depth) {
|
||||
asprintf(&t2, "%sL3%s:", (NULL == locality) ? "" : locality, tmp);
|
||||
if (NULL != locality) {
|
||||
free(locality);
|
||||
}
|
||||
locality = t2;
|
||||
break;
|
||||
} else if (2 == obj->attr->cache.depth) {
|
||||
asprintf(&t2, "%sL2%s:", (NULL == locality) ? "" : locality, tmp);
|
||||
if (NULL != locality) {
|
||||
free(locality);
|
||||
}
|
||||
locality = t2;
|
||||
break;
|
||||
} else {
|
||||
asprintf(&t2, "%sL1%s:", (NULL == locality) ? "" : locality, tmp);
|
||||
if (NULL != locality) {
|
||||
free(locality);
|
||||
}
|
||||
locality = t2;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case HWLOC_OBJ_CORE:
|
||||
asprintf(&t2, "%sCR%s:", (NULL == locality) ? "" : locality, tmp);
|
||||
if (NULL != locality) {
|
||||
free(locality);
|
||||
}
|
||||
locality = t2;
|
||||
break;
|
||||
case HWLOC_OBJ_PU:
|
||||
asprintf(&t2, "%sHT%s:", (NULL == locality) ? "" : locality, tmp);
|
||||
if (NULL != locality) {
|
||||
free(locality);
|
||||
}
|
||||
locality = t2;
|
||||
break;
|
||||
default:
|
||||
/* just ignore it */
|
||||
break;
|
||||
}
|
||||
free(tmp);
|
||||
}
|
||||
hwloc_bitmap_zero(result);
|
||||
}
|
||||
hwloc_bitmap_free(result);
|
||||
hwloc_bitmap_free(cpuset);
|
||||
|
||||
/* remove the trailing colon */
|
||||
if (NULL != locality) {
|
||||
locality[strlen(locality)-1] = '\0';
|
||||
}
|
||||
return locality;
|
||||
}
|
||||
|
||||
char* opal_hwloc_base_get_location(char *locality,
|
||||
hwloc_obj_type_t type,
|
||||
unsigned index)
|
||||
{
|
||||
char **loc;
|
||||
char *srch, *ans = NULL;
|
||||
size_t n;
|
||||
|
||||
if (NULL == locality) {
|
||||
return NULL;
|
||||
}
|
||||
switch(type) {
|
||||
case HWLOC_OBJ_NODE:
|
||||
srch = "NM";
|
||||
break;
|
||||
case HWLOC_OBJ_SOCKET:
|
||||
srch = "SK";
|
||||
break;
|
||||
case HWLOC_OBJ_CACHE:
|
||||
if (3 == index) {
|
||||
srch = "L3";
|
||||
} else if (2 == index) {
|
||||
srch = "L2";
|
||||
} else {
|
||||
srch = "L0";
|
||||
}
|
||||
break;
|
||||
case HWLOC_OBJ_CORE:
|
||||
srch = "CR";
|
||||
break;
|
||||
case HWLOC_OBJ_PU:
|
||||
srch = "HT";
|
||||
break;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
loc = opal_argv_split(locality, ':');
|
||||
for (n=0; NULL != loc[n]; n++) {
|
||||
if (0 == strncmp(loc[n], srch, 2)) {
|
||||
ans = strdup(&loc[n][2]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
opal_argv_free(loc);
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
opal_hwloc_locality_t opal_hwloc_compute_relative_locality(char *loc1, char *loc2)
|
||||
{
|
||||
opal_hwloc_locality_t locality;
|
||||
char **set1, **set2;
|
||||
hwloc_bitmap_t bit1, bit2;
|
||||
size_t n1, n2;
|
||||
|
||||
/* start with what we know - they share a node on a cluster
|
||||
* NOTE: we may alter that latter part as hwloc's ability to
|
||||
* sense multi-cu, multi-cluster systems grows
|
||||
*/
|
||||
locality = OPAL_PROC_ON_NODE | OPAL_PROC_ON_HOST | OPAL_PROC_ON_CU | OPAL_PROC_ON_CLUSTER;
|
||||
|
||||
/* if either location is NULL, then that isn't bound */
|
||||
if (NULL == loc1 || NULL == loc2) {
|
||||
return locality;
|
||||
}
|
||||
|
||||
set1 = opal_argv_split(loc1, ':');
|
||||
set2 = opal_argv_split(loc2, ':');
|
||||
bit1 = hwloc_bitmap_alloc();
|
||||
bit2 = hwloc_bitmap_alloc();
|
||||
|
||||
/* check each matching type */
|
||||
for (n1=0; NULL != set1[n1]; n1++) {
|
||||
/* convert the location into bitmap */
|
||||
hwloc_bitmap_list_sscanf(bit1, &set1[n1][2]);
|
||||
/* find the matching type in set2 */
|
||||
for (n2=0; NULL != set2[n2]; n2++) {
|
||||
if (0 == strncmp(set1[n1], set2[n2], 2)) {
|
||||
/* convert the location into bitmap */
|
||||
hwloc_bitmap_list_sscanf(bit2, &set2[n2][2]);
|
||||
/* see if they intersect */
|
||||
if (hwloc_bitmap_intersects(bit1, bit2)) {
|
||||
/* set the corresponding locality bit */
|
||||
if (0 == strncmp(set1[n1], "NM", 2)) {
|
||||
locality |= OPAL_PROC_ON_NUMA;
|
||||
} else if (0 == strncmp(set1[n1], "SK", 2)) {
|
||||
locality |= OPAL_PROC_ON_SOCKET;
|
||||
} else if (0 == strncmp(set1[n1], "L3", 2)) {
|
||||
locality |= OPAL_PROC_ON_L3CACHE;
|
||||
} else if (0 == strncmp(set1[n1], "L2", 2)) {
|
||||
locality |= OPAL_PROC_ON_L2CACHE;
|
||||
} else if (0 == strncmp(set1[n1], "L1", 2)) {
|
||||
locality |= OPAL_PROC_ON_L1CACHE;
|
||||
} else if (0 == strncmp(set1[n1], "CR", 2)) {
|
||||
locality |= OPAL_PROC_ON_CORE;
|
||||
} else if (0 == strncmp(set1[n1], "HT", 2)) {
|
||||
locality |= OPAL_PROC_ON_HWTHREAD;
|
||||
} else {
|
||||
/* should never happen */
|
||||
opal_output(0, "UNRECOGNIZED LOCALITY %s", set1[n1]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
opal_argv_free(set1);
|
||||
opal_argv_free(set2);
|
||||
hwloc_bitmap_free(bit1);
|
||||
hwloc_bitmap_free(bit2);
|
||||
return locality;
|
||||
}
|
||||
|
@ -104,6 +104,8 @@ BEGIN_C_DECLS
|
||||
|
||||
/**** no PMIx equivalent ****/
|
||||
#define OPAL_PMIX_LOCALITY "pmix.loc" // (uint16_t) relative locality of two procs
|
||||
#define OPAL_PMIX_TOPOLOGY_SIGNATURE "pmix.toposig" // (char*) topology signature string
|
||||
#define OPAL_PMIX_LOCALITY_STRING "pmix.locstr" // (char*) string describing a proc's location
|
||||
|
||||
#define OPAL_PMIX_NODE_LIST "pmix.nlist" // (char*) comma-delimited list of nodes running procs for the specified nspace
|
||||
#define OPAL_PMIX_ALLOCATED_NODELIST "pmix.alist" // (char*) comma-delimited list of all nodes in this allocation regardless of
|
||||
|
@ -94,7 +94,7 @@ static int rte_init(void)
|
||||
char *val;
|
||||
int u32, *u32ptr;
|
||||
uint16_t u16, *u16ptr;
|
||||
char **peers=NULL, *mycpuset, **cpusets=NULL;
|
||||
char **peers=NULL, *mycpuset;
|
||||
opal_process_name_t wildcard_rank, pname;
|
||||
bool bool_val, *bool_ptr = &bool_val, tdir_mca_override = false;
|
||||
size_t i;
|
||||
@ -248,7 +248,7 @@ static int rte_init(void)
|
||||
/* retrieve temp directories info */
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TMPDIR, &wildcard_rank, &val, OPAL_STRING);
|
||||
if (OPAL_SUCCESS == ret && NULL != val) {
|
||||
/* We want to provide user with ability
|
||||
/* We want to provide user with ability
|
||||
* to override RM settings at his own risk
|
||||
*/
|
||||
if( NULL == orte_process_info.top_session_dir ){
|
||||
@ -264,7 +264,7 @@ static int rte_init(void)
|
||||
if( !tdir_mca_override ){
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_NSDIR, &wildcard_rank, &val, OPAL_STRING);
|
||||
if (OPAL_SUCCESS == ret && NULL != val) {
|
||||
/* We want to provide user with ability
|
||||
/* We want to provide user with ability
|
||||
* to override RM settings at his own risk
|
||||
*/
|
||||
if( NULL == orte_process_info.job_session_dir ){
|
||||
@ -281,7 +281,7 @@ static int rte_init(void)
|
||||
if( !tdir_mca_override ){
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_PROCDIR, &wildcard_rank, &val, OPAL_STRING);
|
||||
if (OPAL_SUCCESS == ret && NULL != val) {
|
||||
/* We want to provide user with ability
|
||||
/* We want to provide user with ability
|
||||
* to override RM settings at his own risk
|
||||
*/
|
||||
if( NULL == orte_process_info.proc_session_dir ){
|
||||
@ -385,65 +385,64 @@ static int rte_init(void)
|
||||
if (OPAL_SUCCESS == ret && NULL != val) {
|
||||
peers = opal_argv_split(val, ',');
|
||||
free(val);
|
||||
/* and their cpusets, if available */
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_CPUSETS,
|
||||
&wildcard_rank, &val, OPAL_STRING);
|
||||
if (OPAL_SUCCESS == ret && NULL != val) {
|
||||
cpusets = opal_argv_split(val, ':');
|
||||
free(val);
|
||||
} else {
|
||||
cpusets = NULL;
|
||||
}
|
||||
} else {
|
||||
peers = NULL;
|
||||
cpusets = NULL;
|
||||
}
|
||||
} else {
|
||||
peers = NULL;
|
||||
cpusets = NULL;
|
||||
}
|
||||
|
||||
/* set the locality */
|
||||
if (NULL != peers) {
|
||||
/* indentify our cpuset */
|
||||
if (NULL != cpusets) {
|
||||
mycpuset = cpusets[orte_process_info.my_local_rank];
|
||||
/* identify our location */
|
||||
val = NULL;
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY_STRING,
|
||||
ORTE_PROC_MY_NAME, &val, OPAL_STRING);
|
||||
if (OPAL_SUCCESS == ret && NULL != val) {
|
||||
mycpuset = val;
|
||||
} else {
|
||||
mycpuset = NULL;
|
||||
}
|
||||
pname.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
for (i=0; NULL != peers[i]; i++) {
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_LOCALITY);
|
||||
kv->type = OPAL_UINT16;
|
||||
pname.vpid = strtoul(peers[i], NULL, 10);
|
||||
if (pname.vpid == ORTE_PROC_MY_NAME->vpid) {
|
||||
/* we are fully local to ourselves */
|
||||
u16 = OPAL_PROC_ALL_LOCAL;
|
||||
} else if (NULL == mycpuset || NULL == cpusets[i] ||
|
||||
0 == strcmp(cpusets[i], "UNBOUND")) {
|
||||
/* all we can say is that it shares our node */
|
||||
u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
|
||||
} else {
|
||||
/* we have it, so compute the locality */
|
||||
u16 = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, mycpuset, cpusets[i]);
|
||||
val = NULL;
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY_STRING,
|
||||
&pname, &val, OPAL_STRING);
|
||||
if (OPAL_SUCCESS == ret && NULL != val) {
|
||||
u16 = opal_hwloc_compute_relative_locality(mycpuset, val);
|
||||
} else {
|
||||
/* all we can say is that it shares our node */
|
||||
u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
|
||||
}
|
||||
}
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_LOCALITY);
|
||||
kv->type = OPAL_UINT16;
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
|
||||
"%s ess:pmi:locality: proc %s locality %x",
|
||||
"%s ess:pmi:locality: proc %s locality %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pname), u16));
|
||||
ORTE_NAME_PRINT(&pname), opal_hwloc_base_print_locality(u16)));
|
||||
kv->data.uint16 = u16;
|
||||
ret = opal_pmix.store_local(&pname, kv);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
error = "local store of locality";
|
||||
opal_argv_free(peers);
|
||||
opal_argv_free(cpusets);
|
||||
if (NULL != mycpuset) {
|
||||
free(mycpuset);
|
||||
}
|
||||
goto error;
|
||||
}
|
||||
OBJ_RELEASE(kv);
|
||||
}
|
||||
opal_argv_free(peers);
|
||||
opal_argv_free(cpusets);
|
||||
if (NULL != mycpuset) {
|
||||
free(mycpuset);
|
||||
}
|
||||
}
|
||||
|
||||
/* now that we have all required info, complete the setup */
|
||||
|
0
orte/mca/rmaps/base/rmaps_base_map_job.c
Исполняемый файл → Обычный файл
0
orte/mca/rmaps/base/rmaps_base_map_job.c
Исполняемый файл → Обычный файл
@ -38,6 +38,7 @@
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/mca/hwloc/base/base.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
@ -59,7 +60,7 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
|
||||
opal_value_t *kv;
|
||||
orte_node_t *node, *mynode;
|
||||
opal_vpid_t vpid;
|
||||
char **list, **procs, **micro, *tmp, *regex, *cpulist, *peerlist;
|
||||
char **list, **procs, **micro, *tmp, *regex;
|
||||
orte_job_t *dmns;
|
||||
orte_job_map_t *map;
|
||||
orte_app_context_t *app;
|
||||
@ -239,13 +240,22 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
|
||||
kv->data.uint32 = jdata->total_slots_alloc;
|
||||
opal_list_append(info, &kv->super);
|
||||
|
||||
/* topology signature */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_TOPOLOGY_SIGNATURE);
|
||||
kv->type = OPAL_STRING;
|
||||
kv->data.string = strdup(orte_topo_signature);
|
||||
opal_list_append(info, &kv->super);
|
||||
|
||||
/* register any local clients */
|
||||
vpid = ORTE_VPID_MAX;
|
||||
micro = NULL;
|
||||
for (i=0; i < mynode->procs->size; i++) {
|
||||
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(mynode->procs, i))) {
|
||||
continue;
|
||||
}
|
||||
if (pptr->name.jobid == jdata->jobid) {
|
||||
opal_argv_append_nosize(µ, ORTE_VPID_PRINT(pptr->name.vpid));
|
||||
if (pptr->name.vpid < vpid) {
|
||||
vpid = pptr->name.vpid;
|
||||
}
|
||||
@ -256,6 +266,16 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
|
||||
}
|
||||
}
|
||||
}
|
||||
if (NULL != micro) {
|
||||
/* pass the local peers */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_LOCAL_PEERS);
|
||||
kv->type = OPAL_STRING;
|
||||
kv->data.string = opal_argv_join(micro, ',');
|
||||
opal_argv_free(micro);
|
||||
opal_list_append(info, &kv->super);
|
||||
}
|
||||
|
||||
/* pass the local ldr */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_LOCALLDR);
|
||||
@ -274,71 +294,7 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, n))) {
|
||||
continue;
|
||||
}
|
||||
/* construct the list of local peers, while adding
|
||||
* each proc's locality info */
|
||||
list = NULL;
|
||||
procs = NULL;
|
||||
cpulist = NULL;
|
||||
peerlist = NULL;
|
||||
vpid = ORTE_VPID_MAX;
|
||||
for (i=0; i < node->procs->size; i++) {
|
||||
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
|
||||
continue;
|
||||
}
|
||||
if (pptr->name.jobid == jdata->jobid) {
|
||||
opal_argv_append_nosize(&list, ORTE_VPID_PRINT(pptr->name.vpid));
|
||||
if (pptr->name.vpid < vpid) {
|
||||
vpid = pptr->name.vpid;
|
||||
}
|
||||
/* note that we have to pass the cpuset for each local
|
||||
* peer so locality can be computed */
|
||||
tmp = NULL;
|
||||
if (orte_get_attribute(&pptr->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING)) {
|
||||
if (NULL != tmp) {
|
||||
opal_argv_append_nosize(&procs, tmp);
|
||||
free(tmp);
|
||||
} else {
|
||||
opal_argv_append_nosize(&procs, "UNBOUND");
|
||||
}
|
||||
} else {
|
||||
opal_argv_append_nosize(&procs, "UNBOUND");
|
||||
}
|
||||
}
|
||||
}
|
||||
/* construct the list of peers for transmission */
|
||||
if (NULL != list) {
|
||||
peerlist = opal_argv_join(list, ',');
|
||||
opal_argv_free(list);
|
||||
list = NULL;
|
||||
}
|
||||
/* construct the list of cpusets for transmission */
|
||||
if (NULL != procs) {
|
||||
cpulist = opal_argv_join(procs, ':');
|
||||
opal_argv_free(procs);
|
||||
procs = NULL;
|
||||
}
|
||||
|
||||
/* if this is me, then pass the peers and cpusets to myself
|
||||
* in order to maintain backward compatibility for the non-pmix
|
||||
* components in OPAL/pmix */
|
||||
if (node == mynode) {
|
||||
/* pass the list of peers */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_LOCAL_PEERS);
|
||||
kv->type = OPAL_STRING;
|
||||
kv->data.string = strdup(peerlist);
|
||||
opal_list_append(info, &kv->super);
|
||||
|
||||
/* pass the list of cpusets */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_LOCAL_CPUSETS);
|
||||
kv->type = OPAL_STRING;
|
||||
kv->data.string = strdup(cpulist);
|
||||
opal_list_append(info, &kv->super);
|
||||
|
||||
}
|
||||
|
||||
/* now cycle across each proc on this node, passing all data that
|
||||
/* cycle across each proc on this node, passing all data that
|
||||
* varies by proc */
|
||||
for (i=0; i < node->procs->size; i++) {
|
||||
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
|
||||
@ -363,19 +319,18 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
|
||||
kv->data.name.vpid = pptr->name.vpid;
|
||||
opal_list_append(pmap, &kv->super);
|
||||
|
||||
/* pass the list of peers */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_LOCAL_PEERS);
|
||||
kv->type = OPAL_STRING;
|
||||
kv->data.string = strdup(peerlist);
|
||||
opal_list_append(pmap, &kv->super);
|
||||
|
||||
/* pass the list of cpusets */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_LOCAL_CPUSETS);
|
||||
kv->type = OPAL_STRING;
|
||||
kv->data.string = strdup(cpulist);
|
||||
opal_list_append(pmap, &kv->super);
|
||||
/* location, for local procs */
|
||||
if (node == mynode) {
|
||||
if (orte_get_attribute(&pptr->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING)) {
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_LOCALITY_STRING);
|
||||
kv->type = OPAL_STRING;
|
||||
kv->data.string = opal_hwloc_base_get_locality_string(opal_hwloc_topology, tmp);
|
||||
opal_output(0, "PROC %s LOCALITY %s", ORTE_NAME_PRINT(&pptr->name), kv->data.string);
|
||||
opal_list_append(pmap, &kv->super);
|
||||
free(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
/* appnum */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
@ -441,13 +396,6 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
|
||||
kv->data.uint32 = pptr->node->index;
|
||||
opal_list_append(pmap, &kv->super);
|
||||
}
|
||||
/* cleanup */
|
||||
if (NULL != cpulist) {
|
||||
free(cpulist);
|
||||
}
|
||||
if (NULL != peerlist) {
|
||||
free(peerlist);
|
||||
}
|
||||
}
|
||||
|
||||
/* mark the job as registered */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user