Restore the cpus-per-proc option for byslot and bynode mapping. Remove the bind_idx (which recorded the index of the hwloc object where the proc was bound) as this would no longer be unique, and just use the bitmap as the standard reference for location. Update the relative locality computation to take bitmaps as its argument.
This commit was SVN r28219.
Этот коммит содержится в:
родитель
6c8d0450a3
Коммит
6ee32767d4
@ -93,6 +93,12 @@ OPAL_DECLSPEC extern char *opal_hwloc_base_slot_list;
|
|||||||
OPAL_DECLSPEC extern char *opal_hwloc_base_cpu_set;
|
OPAL_DECLSPEC extern char *opal_hwloc_base_cpu_set;
|
||||||
OPAL_DECLSPEC extern hwloc_cpuset_t opal_hwloc_base_given_cpus;
|
OPAL_DECLSPEC extern hwloc_cpuset_t opal_hwloc_base_given_cpus;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
opal_list_item_t super;
|
||||||
|
hwloc_obj_t obj;
|
||||||
|
} opal_hwloc_obj_list_item_t;
|
||||||
|
OBJ_CLASS_DECLARATION(opal_hwloc_obj_list_item_t);
|
||||||
|
|
||||||
/* convenience macro for debugging */
|
/* convenience macro for debugging */
|
||||||
#define OPAL_HWLOC_SHOW_BINDING(n, v) \
|
#define OPAL_HWLOC_SHOW_BINDING(n, v) \
|
||||||
do { \
|
do { \
|
||||||
@ -114,10 +120,7 @@ OPAL_DECLSPEC extern hwloc_cpuset_t opal_hwloc_base_given_cpus;
|
|||||||
} while(0);
|
} while(0);
|
||||||
|
|
||||||
OPAL_DECLSPEC opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t topo,
|
OPAL_DECLSPEC opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t topo,
|
||||||
opal_hwloc_level_t level1,
|
char *cpuset1, char *cpuset2);
|
||||||
unsigned int peer1,
|
|
||||||
opal_hwloc_level_t level2,
|
|
||||||
unsigned int peer2);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Loads opal_hwloc_my_cpuset (global variable in
|
* Loads opal_hwloc_my_cpuset (global variable in
|
||||||
|
@ -479,4 +479,8 @@ OBJ_CLASS_INSTANCE(opal_hwloc_topo_data_t,
|
|||||||
opal_object_t,
|
opal_object_t,
|
||||||
topo_data_const,
|
topo_data_const,
|
||||||
topo_data_dest);
|
topo_data_dest);
|
||||||
|
|
||||||
|
OBJ_CLASS_INSTANCE(opal_hwloc_obj_list_item_t,
|
||||||
|
opal_list_item_t,
|
||||||
|
NULL, NULL);
|
||||||
#endif
|
#endif
|
||||||
|
@ -1202,50 +1202,17 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str,
|
|||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static opal_hwloc_locality_t get_locality(opal_hwloc_level_t level)
|
|
||||||
{
|
|
||||||
opal_hwloc_locality_t lvl = OPAL_PROC_LOCALITY_UNKNOWN;
|
|
||||||
|
|
||||||
switch(level) {
|
|
||||||
case OPAL_HWLOC_NODE_LEVEL:
|
|
||||||
lvl = OPAL_PROC_ON_NODE;
|
|
||||||
break;
|
|
||||||
case OPAL_HWLOC_NUMA_LEVEL:
|
|
||||||
lvl = OPAL_PROC_ON_NUMA;
|
|
||||||
break;
|
|
||||||
case OPAL_HWLOC_SOCKET_LEVEL:
|
|
||||||
lvl = OPAL_PROC_ON_SOCKET;
|
|
||||||
break;
|
|
||||||
case OPAL_HWLOC_L3CACHE_LEVEL:
|
|
||||||
lvl = OPAL_PROC_ON_L3CACHE;
|
|
||||||
break;
|
|
||||||
case OPAL_HWLOC_L2CACHE_LEVEL:
|
|
||||||
lvl = OPAL_PROC_ON_L2CACHE;
|
|
||||||
break;
|
|
||||||
case OPAL_HWLOC_L1CACHE_LEVEL:
|
|
||||||
lvl = OPAL_PROC_ON_L1CACHE;
|
|
||||||
break;
|
|
||||||
case OPAL_HWLOC_CORE_LEVEL:
|
|
||||||
lvl = OPAL_PROC_ON_CORE;
|
|
||||||
break;
|
|
||||||
case OPAL_HWLOC_HWTHREAD_LEVEL:
|
|
||||||
lvl = OPAL_PROC_ON_HWTHREAD;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return lvl;
|
|
||||||
}
|
|
||||||
|
|
||||||
opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t topo,
|
opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t topo,
|
||||||
opal_hwloc_level_t level1,
|
char *cpuset1, char *cpuset2)
|
||||||
unsigned int peer1,
|
|
||||||
opal_hwloc_level_t level2,
|
|
||||||
unsigned int peer2)
|
|
||||||
{
|
{
|
||||||
opal_hwloc_locality_t locality;
|
opal_hwloc_locality_t locality;
|
||||||
hwloc_obj_t obj1, obj2;
|
hwloc_obj_t obj;
|
||||||
unsigned cache_level=0;
|
unsigned depth, d, width, w;
|
||||||
opal_hwloc_level_t i, lvl;
|
hwloc_cpuset_t avail;
|
||||||
|
bool shared;
|
||||||
|
hwloc_obj_type_t type;
|
||||||
|
int sect1, sect2;
|
||||||
|
hwloc_cpuset_t loc1, loc2;
|
||||||
|
|
||||||
/* start with what we know - they share a node on a cluster
|
/* start with what we know - they share a node on a cluster
|
||||||
* NOTE: we may alter that latter part as hwloc's ability to
|
* NOTE: we may alter that latter part as hwloc's ability to
|
||||||
@ -1253,100 +1220,88 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top
|
|||||||
*/
|
*/
|
||||||
locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE | OPAL_PROC_ON_BOARD;
|
locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE | OPAL_PROC_ON_BOARD;
|
||||||
|
|
||||||
/* TBD: handle procs bound at different levels - means they
|
/* if either cpuset is NULL, then that isn't bound */
|
||||||
* are from different jobs
|
if (NULL == cpuset1 || NULL == cpuset2) {
|
||||||
*/
|
|
||||||
if (level1 != level2) {
|
|
||||||
return locality;
|
return locality;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if the binding level is NODE, then there is nothing to do */
|
/* get the max depth of the topology */
|
||||||
if (OPAL_HWLOC_NODE_LEVEL == level1) {
|
depth = hwloc_topology_get_depth(topo);
|
||||||
return locality;
|
|
||||||
}
|
|
||||||
|
|
||||||
lvl = level1;
|
/* convert the strings to cpusets */
|
||||||
|
loc1 = hwloc_bitmap_alloc();
|
||||||
|
hwloc_bitmap_list_sscanf(loc1, cpuset1);
|
||||||
|
loc2 = hwloc_bitmap_alloc();
|
||||||
|
hwloc_bitmap_list_sscanf(loc2, cpuset2);
|
||||||
|
|
||||||
/* we know that the objects are bound to the same level, so
|
/* start at the first depth below the top machine level */
|
||||||
* if the two objects are the index, then they share
|
for (d=1; d < depth; d++) {
|
||||||
* all levels down to and including their own
|
shared = false;
|
||||||
*/
|
/* get the object type at this depth */
|
||||||
if (peer1 == peer2) {
|
type = hwloc_get_depth_type(topo, d);
|
||||||
for (i=lvl; 0 < i; i--) {
|
/* if it isn't one of interest, then ignore it */
|
||||||
opal_output_verbose(5, opal_hwloc_base_output,
|
if (HWLOC_OBJ_NODE != type &&
|
||||||
"equal level - computing locality: %s",
|
HWLOC_OBJ_SOCKET != type &&
|
||||||
opal_hwloc_base_print_level(i));
|
HWLOC_OBJ_CACHE != type &&
|
||||||
locality |= get_locality(i);
|
HWLOC_OBJ_CORE != type &&
|
||||||
|
HWLOC_OBJ_PU != type) {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
goto checkpu;
|
/* get the width of the topology at this depth */
|
||||||
}
|
width = hwloc_get_nbobjs_by_depth(topo, d);
|
||||||
|
/* scan all objects at this depth to see if
|
||||||
/* get cache level if required */
|
* our locations overlap with them
|
||||||
if (OPAL_HWLOC_L3CACHE_LEVEL == lvl) {
|
|
||||||
cache_level = 3;
|
|
||||||
} else if (OPAL_HWLOC_L2CACHE_LEVEL == lvl) {
|
|
||||||
cache_level = 2;
|
|
||||||
} else if (OPAL_HWLOC_L1CACHE_LEVEL == lvl) {
|
|
||||||
cache_level = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* get the objects for these peers */
|
|
||||||
opal_output_verbose(5, opal_hwloc_base_output,
|
|
||||||
"computing locality - getting object at level %s, index %u",
|
|
||||||
opal_hwloc_base_print_level(lvl), peer1);
|
|
||||||
obj1 = opal_hwloc_base_get_obj_by_type(topo, opal_hwloc_levels[lvl],
|
|
||||||
cache_level, peer1, OPAL_HWLOC_AVAILABLE);
|
|
||||||
opal_output_verbose(5, opal_hwloc_base_output,
|
|
||||||
"computing locality - getting object at level %s, index %u",
|
|
||||||
opal_hwloc_base_print_level(lvl), peer2);
|
|
||||||
obj2 = opal_hwloc_base_get_obj_by_type(topo, opal_hwloc_levels[lvl],
|
|
||||||
cache_level, peer2, OPAL_HWLOC_AVAILABLE);
|
|
||||||
|
|
||||||
/* climb the levels
|
|
||||||
* NOTE: for now, we will just assume that the two objects
|
|
||||||
* have a common topology above them - i.e., that each
|
|
||||||
* object has the same levels above them. In cases where
|
|
||||||
* nodes have heterogeneous sockets, this won't be true - but
|
|
||||||
* leave that problem for another day
|
|
||||||
*/
|
|
||||||
--lvl;
|
|
||||||
while (OPAL_HWLOC_NODE_LEVEL < lvl &&
|
|
||||||
NULL != obj1 && NULL != obj2 && obj1 != obj2) {
|
|
||||||
opal_output_verbose(5, opal_hwloc_base_output,
|
|
||||||
"computing locality - shifting up from %s",
|
|
||||||
opal_hwloc_base_print_level(lvl));
|
|
||||||
obj1 = obj1->parent;
|
|
||||||
obj2 = obj2->parent;
|
|
||||||
--lvl;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* set the locality */
|
|
||||||
for (i=lvl; 0 < i; i--) {
|
|
||||||
opal_output_verbose(5, opal_hwloc_base_output,
|
|
||||||
"computing locality - filling level %s",
|
|
||||||
opal_hwloc_base_print_level(i));
|
|
||||||
locality |= get_locality(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
checkpu:
|
|
||||||
/* NOTE: hwloc isn't able to find cores on all platforms. Example:
|
|
||||||
PPC64 running RHEL 5.4 (linux kernel 2.6.18) only reports NUMA
|
|
||||||
nodes and PU's. Fine.
|
|
||||||
|
|
||||||
However, note that hwloc_get_obj_by_type() will return NULL in
|
|
||||||
2 (effectively) different cases:
|
|
||||||
|
|
||||||
- no objects of the requested type were found
|
|
||||||
- the Nth object of the requested type was not found
|
|
||||||
|
|
||||||
So see if we can find *any* cores by looking for the 0th core.
|
|
||||||
*/
|
|
||||||
if (NULL == hwloc_get_obj_by_type(topo, HWLOC_OBJ_CORE, 0)) {
|
|
||||||
/* nope - so if the two peer's share a HWTHREAD, also
|
|
||||||
* declare them as sharing a core
|
|
||||||
*/
|
*/
|
||||||
if (OPAL_PROC_ON_LOCAL_HWTHREAD(locality)) {
|
for (w=0; w < width; w++) {
|
||||||
locality |= OPAL_PROC_ON_CORE;
|
/* get the object at this depth/index */
|
||||||
|
obj = hwloc_get_obj_by_depth(topo, d, w);
|
||||||
|
/* get the available cpuset for this obj */
|
||||||
|
avail = opal_hwloc_base_get_available_cpus(topo, obj);
|
||||||
|
/* see if our locations intersect with it */
|
||||||
|
sect1 = hwloc_bitmap_intersects(avail, loc1);
|
||||||
|
sect2 = hwloc_bitmap_intersects(avail, loc2);
|
||||||
|
/* if both intersect, then we share this level */
|
||||||
|
if (sect1 && sect2) {
|
||||||
|
shared = true;
|
||||||
|
switch(obj->type) {
|
||||||
|
case HWLOC_OBJ_NODE:
|
||||||
|
locality |= OPAL_PROC_ON_NUMA;
|
||||||
|
break;
|
||||||
|
case HWLOC_OBJ_SOCKET:
|
||||||
|
locality |= OPAL_PROC_ON_SOCKET;
|
||||||
|
break;
|
||||||
|
case HWLOC_OBJ_CACHE:
|
||||||
|
if (3 == obj->attr->cache.depth) {
|
||||||
|
locality |= OPAL_PROC_ON_L3CACHE;
|
||||||
|
} else if (2 == obj->attr->cache.depth) {
|
||||||
|
locality |= OPAL_PROC_ON_L2CACHE;
|
||||||
|
} else {
|
||||||
|
locality |= OPAL_PROC_ON_L1CACHE;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case HWLOC_OBJ_CORE:
|
||||||
|
locality |= OPAL_PROC_ON_CORE;
|
||||||
|
break;
|
||||||
|
case HWLOC_OBJ_PU:
|
||||||
|
locality |= OPAL_PROC_ON_HWTHREAD;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
/* just ignore it */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* otherwise, we don't share this
|
||||||
|
* object - but we still might share another object
|
||||||
|
* on this level, so we have to keep searching
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
/* if we spanned the entire width without finding
|
||||||
|
* a point of intersection, then no need to go
|
||||||
|
* deeper
|
||||||
|
*/
|
||||||
|
if (!shared) {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,6 +97,7 @@ int orte_ess_base_proc_binding(void)
|
|||||||
* so that we know
|
* so that we know
|
||||||
*/
|
*/
|
||||||
orte_proc_is_bound = true;
|
orte_proc_is_bound = true;
|
||||||
|
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||||
hwloc_bitmap_free(cpus);
|
hwloc_bitmap_free(cpus);
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
|
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
|
||||||
"%s Process was externally bound",
|
"%s Process was externally bound",
|
||||||
@ -120,9 +121,7 @@ int orte_ess_base_proc_binding(void)
|
|||||||
hwloc_bitmap_free(cpus);
|
hwloc_bitmap_free(cpus);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
/* try to find a level and index for this location */
|
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||||
opal_hwloc_base_get_level_and_index(cpus, &orte_process_info.bind_level, &orte_process_info.bind_idx);
|
|
||||||
/* cleanup */
|
|
||||||
hwloc_bitmap_free(cpus);
|
hwloc_bitmap_free(cpus);
|
||||||
orte_proc_is_bound = true;
|
orte_proc_is_bound = true;
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
|
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
|
||||||
@ -158,8 +157,8 @@ int orte_ess_base_proc_binding(void)
|
|||||||
error = "Setting processor affinity failed";
|
error = "Setting processor affinity failed";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
orte_process_info.bind_level = OPAL_HWLOC_HWTHREAD_LEVEL;
|
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||||
orte_process_info.bind_idx = orte_process_info.my_node_rank;
|
hwloc_bitmap_free(cpus);
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
|
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
|
||||||
"%s Process bound to hwthread",
|
"%s Process bound to hwthread",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
@ -179,8 +178,8 @@ int orte_ess_base_proc_binding(void)
|
|||||||
ret = ORTE_ERROR;
|
ret = ORTE_ERROR;
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
orte_process_info.bind_level = OPAL_HWLOC_CORE_LEVEL;
|
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||||
orte_process_info.bind_idx = orte_process_info.my_node_rank;
|
hwloc_bitmap_free(cpus);
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
|
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
|
||||||
"%s Process bound to core",
|
"%s Process bound to core",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
@ -197,21 +196,16 @@ int orte_ess_base_proc_binding(void)
|
|||||||
if (OPAL_BIND_TO_L1CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
if (OPAL_BIND_TO_L1CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||||
target = HWLOC_OBJ_CACHE;
|
target = HWLOC_OBJ_CACHE;
|
||||||
cache_level = 1;
|
cache_level = 1;
|
||||||
orte_process_info.bind_level = OPAL_HWLOC_L1CACHE_LEVEL;
|
|
||||||
} else if (OPAL_BIND_TO_L2CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
} else if (OPAL_BIND_TO_L2CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||||
target = HWLOC_OBJ_CACHE;
|
target = HWLOC_OBJ_CACHE;
|
||||||
cache_level = 2;
|
cache_level = 2;
|
||||||
orte_process_info.bind_level = OPAL_HWLOC_L2CACHE_LEVEL;
|
|
||||||
} else if (OPAL_BIND_TO_L3CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
} else if (OPAL_BIND_TO_L3CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||||
target = HWLOC_OBJ_CACHE;
|
target = HWLOC_OBJ_CACHE;
|
||||||
cache_level = 3;
|
cache_level = 3;
|
||||||
orte_process_info.bind_level = OPAL_HWLOC_L3CACHE_LEVEL;
|
|
||||||
} else if (OPAL_BIND_TO_SOCKET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
} else if (OPAL_BIND_TO_SOCKET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||||
target = HWLOC_OBJ_SOCKET;
|
target = HWLOC_OBJ_SOCKET;
|
||||||
orte_process_info.bind_level = OPAL_HWLOC_SOCKET_LEVEL;
|
|
||||||
} else if (OPAL_BIND_TO_NUMA == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
} else if (OPAL_BIND_TO_NUMA == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||||
target = HWLOC_OBJ_NODE;
|
target = HWLOC_OBJ_NODE;
|
||||||
orte_process_info.bind_level = OPAL_HWLOC_NUMA_LEVEL;
|
|
||||||
} else {
|
} else {
|
||||||
ret = ORTE_ERR_NOT_FOUND;
|
ret = ORTE_ERR_NOT_FOUND;
|
||||||
error = "Binding policy not known";
|
error = "Binding policy not known";
|
||||||
@ -229,13 +223,13 @@ int orte_ess_base_proc_binding(void)
|
|||||||
error = "Setting processor affinity failed";
|
error = "Setting processor affinity failed";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
orte_process_info.bind_idx = opal_hwloc_base_get_obj_idx(opal_hwloc_topology,
|
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||||
obj, OPAL_HWLOC_LOGICAL);
|
hwloc_bitmap_free(cpus);
|
||||||
orte_proc_is_bound = true;
|
orte_proc_is_bound = true;
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
|
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
|
||||||
"%s Process bound to %s",
|
"%s Process bound to %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
opal_hwloc_base_print_level(orte_process_info.bind_level)));
|
hwloc_obj_type_string(target)));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -299,9 +299,14 @@ static int rte_init(void)
|
|||||||
|
|
||||||
/* construct the PMI RTE string */
|
/* construct the PMI RTE string */
|
||||||
rmluri = orte_rml.get_contact_info();
|
rmluri = orte_rml.get_contact_info();
|
||||||
asprintf(&pmirte, "%s,%s,%d,%d,%d,%d", rmluri, orte_process_info.nodename,
|
if (NULL == orte_process_info.cpuset) {
|
||||||
(int)orte_process_info.bind_level, (int)orte_process_info.bind_idx,
|
asprintf(&pmirte, "%s,%s,%d,%d", rmluri, orte_process_info.nodename,
|
||||||
(int)orte_process_info.my_local_rank, (int)orte_process_info.my_node_rank);
|
(int)orte_process_info.my_local_rank, (int)orte_process_info.my_node_rank);
|
||||||
|
} else {
|
||||||
|
asprintf(&pmirte, "%s,%s,%d,%d,%s", rmluri, orte_process_info.nodename,
|
||||||
|
(int)orte_process_info.my_local_rank, (int)orte_process_info.my_node_rank,
|
||||||
|
orte_process_info.cpuset);
|
||||||
|
}
|
||||||
/* push our info into the cloud */
|
/* push our info into the cloud */
|
||||||
id = (opal_identifier_t*)ORTE_PROC_MY_NAME;
|
id = (opal_identifier_t*)ORTE_PROC_MY_NAME;
|
||||||
if (ORTE_SUCCESS != (ret = opal_db.store((*id), OPAL_DB_GLOBAL, "RTE", pmirte, OPAL_STRING))) {
|
if (ORTE_SUCCESS != (ret = opal_db.store((*id), OPAL_DB_GLOBAL, "RTE", pmirte, OPAL_STRING))) {
|
||||||
@ -319,12 +324,8 @@ static int rte_init(void)
|
|||||||
error = "db store hostname";
|
error = "db store hostname";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
if (ORTE_SUCCESS != (ret = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_BIND_LEVEL, &orte_process_info.bind_level, OPAL_HWLOC_LEVEL_T))) {
|
if (ORTE_SUCCESS != (ret = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_CPUSET, orte_process_info.cpuset, OPAL_STRING))) {
|
||||||
error = "db store bind_level";
|
error = "db store cpuset";
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (ORTE_SUCCESS != (ret = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_BIND_INDEX, &orte_process_info.bind_idx, OPAL_UINT))) {
|
|
||||||
error = "db store bind_idx";
|
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
if (ORTE_SUCCESS != (ret = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_LOCALRANK, &orte_process_info.my_local_rank, ORTE_LOCAL_RANK))) {
|
if (ORTE_SUCCESS != (ret = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_LOCALRANK, &orte_process_info.my_local_rank, ORTE_LOCAL_RANK))) {
|
||||||
|
@ -184,11 +184,7 @@ void orte_grpcomm_base_modex(int fd, short args, void *cbdata)
|
|||||||
|
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
/* pack our binding info so other procs can determine our locality */
|
/* pack our binding info so other procs can determine our locality */
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, &orte_process_info.bind_level, 1, OPAL_HWLOC_LEVEL_T))) {
|
if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, &orte_process_info.cpuset, 1, OPAL_STRING))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, &orte_process_info.bind_idx, 1, OPAL_UINT))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
@ -288,33 +284,22 @@ void orte_grpcomm_base_store_peer_modex(opal_buffer_t *rbuf, void *cbdata)
|
|||||||
/* compute the locality and store in the database */
|
/* compute the locality and store in the database */
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
{
|
{
|
||||||
opal_hwloc_level_t bind_level;
|
char *cpuset;
|
||||||
unsigned int bind_idx;
|
|
||||||
|
|
||||||
/* unpack and store the locality info */
|
/* unpack and store the cpuset - could be NULL */
|
||||||
cnt = 1;
|
cnt = 1;
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(rbuf, &bind_level, &cnt, OPAL_HWLOC_LEVEL_T))) {
|
if (ORTE_SUCCESS != (rc = opal_dss.unpack(rbuf, &cpuset, &cnt, OPAL_STRING))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_BIND_LEVEL, &bind_level, OPAL_HWLOC_LEVEL_T))) {
|
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_CPUSET, cpuset, OPAL_STRING))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
cnt = 1;
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(rbuf, &bind_idx, &cnt, OPAL_UINT))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_BIND_INDEX, &bind_idx, OPAL_UINT))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||||
"%s store:peer:modex setting proc %s level %s idx %u",
|
"%s store:peer:modex setting proc %s cpuset %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
ORTE_NAME_PRINT(&pname),
|
ORTE_NAME_PRINT(&pname), cpuset));
|
||||||
opal_hwloc_base_print_level(bind_level), bind_idx));
|
|
||||||
|
|
||||||
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &pname, ORTE_PROC_MY_NAME)) {
|
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &pname, ORTE_PROC_MY_NAME)) {
|
||||||
/* if this data is from myself, then set locality to all */
|
/* if this data is from myself, then set locality to all */
|
||||||
@ -330,8 +315,7 @@ void orte_grpcomm_base_store_peer_modex(opal_buffer_t *rbuf, void *cbdata)
|
|||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
ORTE_NAME_PRINT(&pname)));
|
ORTE_NAME_PRINT(&pname)));
|
||||||
locality = OPAL_PROC_NON_LOCAL;
|
locality = OPAL_PROC_NON_LOCAL;
|
||||||
} else if (OPAL_HWLOC_NODE_LEVEL == orte_process_info.bind_level ||
|
} else if (NULL == cpuset || NULL == orte_process_info.cpuset) {
|
||||||
OPAL_HWLOC_NODE_LEVEL == bind_level) {
|
|
||||||
/* one or both of us is not bound, so all we can say is we are on the
|
/* one or both of us is not bound, so all we can say is we are on the
|
||||||
* same node
|
* same node
|
||||||
*/
|
*/
|
||||||
@ -339,9 +323,8 @@ void orte_grpcomm_base_store_peer_modex(opal_buffer_t *rbuf, void *cbdata)
|
|||||||
} else {
|
} else {
|
||||||
/* determine relative location on our node */
|
/* determine relative location on our node */
|
||||||
locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
|
locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
|
||||||
orte_process_info.bind_level,
|
orte_process_info.cpuset,
|
||||||
orte_process_info.bind_idx,
|
cpuset);
|
||||||
bind_level, bind_idx);
|
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||||
"%s store:peer:modex setting proc %s locale %s",
|
"%s store:peer:modex setting proc %s locale %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
@ -535,8 +518,7 @@ int orte_grpcomm_base_pack_modex_entries(opal_buffer_t *buf)
|
|||||||
0 == strcmp(kv->key, ORTE_DB_DAEMON_VPID) ||
|
0 == strcmp(kv->key, ORTE_DB_DAEMON_VPID) ||
|
||||||
0 == strcmp(kv->key, ORTE_DB_NODERANK) ||
|
0 == strcmp(kv->key, ORTE_DB_NODERANK) ||
|
||||||
0 == strcmp(kv->key, ORTE_DB_LOCALRANK) ||
|
0 == strcmp(kv->key, ORTE_DB_LOCALRANK) ||
|
||||||
0 == strcmp(kv->key, ORTE_DB_BIND_LEVEL) ||
|
0 == strcmp(kv->key, ORTE_DB_CPUSET)) {
|
||||||
0 == strcmp(kv->key, ORTE_DB_BIND_INDEX)) {
|
|
||||||
opal_list_remove_item(&data, item);
|
opal_list_remove_item(&data, item);
|
||||||
} else {
|
} else {
|
||||||
num_entries++;
|
num_entries++;
|
||||||
|
@ -176,11 +176,10 @@ static int modex(orte_grpcomm_collective_t *coll)
|
|||||||
orte_process_name_t name;
|
orte_process_name_t name;
|
||||||
int rc;
|
int rc;
|
||||||
opal_identifier_t *id;
|
opal_identifier_t *id;
|
||||||
opal_hwloc_level_t bind_level;
|
|
||||||
opal_hwloc_locality_t locality;
|
opal_hwloc_locality_t locality;
|
||||||
unsigned int bind_idx;
|
|
||||||
orte_local_rank_t local_rank;
|
orte_local_rank_t local_rank;
|
||||||
orte_node_rank_t node_rank;
|
orte_node_rank_t node_rank;
|
||||||
|
bool bound;
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||||
"%s grpcomm:pmi: modex entered",
|
"%s grpcomm:pmi: modex entered",
|
||||||
@ -250,40 +249,47 @@ static int modex(orte_grpcomm_collective_t *coll)
|
|||||||
opal_argv_free(fields);
|
opal_argv_free(fields);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
/* next is the bind level */
|
|
||||||
bind_level = strtoul(fields[2], NULL, 10);
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_BIND_LEVEL, &bind_level, OPAL_HWLOC_LEVEL_T))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
opal_argv_free(fields);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
/* next is the bind index */
|
|
||||||
bind_idx = strtoul(fields[3], NULL, 10);
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_BIND_INDEX, &bind_idx, OPAL_UINT))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
opal_argv_free(fields);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
/* local rank */
|
/* local rank */
|
||||||
local_rank = strtoul(fields[4], NULL, 10);
|
local_rank = strtoul(fields[2], NULL, 10);
|
||||||
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_LOCALRANK, &local_rank, ORTE_LOCAL_RANK))) {
|
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_LOCALRANK, &local_rank, ORTE_LOCAL_RANK))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
opal_argv_free(fields);
|
opal_argv_free(fields);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
/* node rank */
|
/* node rank */
|
||||||
node_rank = strtoul(fields[5], NULL, 10);
|
node_rank = strtoul(fields[3], NULL, 10);
|
||||||
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_NODERANK, &node_rank, ORTE_NODE_RANK))) {
|
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_NODERANK, &node_rank, ORTE_NODE_RANK))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
opal_argv_free(fields);
|
opal_argv_free(fields);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
/* if the process was bound, then there will be another field
|
||||||
|
* that contains its cpuset
|
||||||
|
*/
|
||||||
|
if (5 == opal_argv_count(fields)) {
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_CPUSET, fields[4], OPAL_STRING))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
opal_argv_free(fields);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
bound = true;
|
||||||
|
} else {
|
||||||
|
/* store a placeholder so we know that this value was retrieved,
|
||||||
|
* but the proc wasn't bound
|
||||||
|
*/
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_CPUSET, NULL, OPAL_STRING))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
opal_argv_free(fields);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
bound = false;
|
||||||
|
}
|
||||||
|
|
||||||
/* compute and store the locality as it isn't something that gets pushed to PMI */
|
/* compute and store the locality as it isn't something that gets pushed to PMI */
|
||||||
if (0 != strcmp(fields[1], orte_process_info.nodename)) {
|
if (0 != strcmp(fields[1], orte_process_info.nodename)) {
|
||||||
/* this is on a different node, then mark as non-local */
|
/* this is on a different node, then mark as non-local */
|
||||||
locality = OPAL_PROC_NON_LOCAL;
|
locality = OPAL_PROC_NON_LOCAL;
|
||||||
} else if (OPAL_HWLOC_NODE_LEVEL == bind_level) {
|
} else if (!bound) {
|
||||||
/* if we share a node, but we don't know anything more, then
|
/* if we share a node, but we don't know anything more, then
|
||||||
* mark us as on the node as this is all we know
|
* mark us as on the node as this is all we know
|
||||||
*/
|
*/
|
||||||
@ -291,9 +297,8 @@ static int modex(orte_grpcomm_collective_t *coll)
|
|||||||
} else {
|
} else {
|
||||||
/* determine relative location on our node */
|
/* determine relative location on our node */
|
||||||
locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
|
locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
|
||||||
orte_process_info.bind_level,
|
orte_process_info.cpuset,
|
||||||
orte_process_info.bind_idx,
|
fields[4]);
|
||||||
bind_level, bind_idx);
|
|
||||||
}
|
}
|
||||||
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_LOCALITY, &locality, OPAL_HWLOC_LOCALITY_T))) {
|
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_LOCALITY, &locality, OPAL_HWLOC_LOCALITY_T))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
|
@ -174,7 +174,6 @@ static int bind_upwards(orte_job_t *jdata,
|
|||||||
return ORTE_ERR_SILENT;
|
return ORTE_ERR_SILENT;
|
||||||
}
|
}
|
||||||
/* bind it here */
|
/* bind it here */
|
||||||
proc->bind_idx = idx;
|
|
||||||
cpus = opal_hwloc_base_get_available_cpus(node->topology, obj);
|
cpus = opal_hwloc_base_get_available_cpus(node->topology, obj);
|
||||||
hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, cpus);
|
hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, cpus);
|
||||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||||
@ -209,11 +208,13 @@ static int bind_downwards(orte_job_t *jdata,
|
|||||||
orte_job_map_t *map;
|
orte_job_map_t *map;
|
||||||
orte_node_t *node;
|
orte_node_t *node;
|
||||||
orte_proc_t *proc;
|
orte_proc_t *proc;
|
||||||
hwloc_obj_t trg_obj;
|
hwloc_obj_t trg_obj, nxt_obj;
|
||||||
hwloc_cpuset_t cpus;
|
hwloc_cpuset_t cpus;
|
||||||
unsigned int ncpus, idx;
|
unsigned int ncpus;
|
||||||
struct hwloc_topology_support *support;
|
struct hwloc_topology_support *support;
|
||||||
opal_hwloc_obj_data_t *data;
|
opal_hwloc_obj_data_t *data;
|
||||||
|
int total_cpus;
|
||||||
|
hwloc_cpuset_t totalcpuset;
|
||||||
|
|
||||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||||
"mca:rmaps: bind downward for job %s with bindings %s",
|
"mca:rmaps: bind downward for job %s with bindings %s",
|
||||||
@ -221,6 +222,7 @@ static int bind_downwards(orte_job_t *jdata,
|
|||||||
opal_hwloc_base_print_binding(jdata->map->binding));
|
opal_hwloc_base_print_binding(jdata->map->binding));
|
||||||
/* initialize */
|
/* initialize */
|
||||||
map = jdata->map;
|
map = jdata->map;
|
||||||
|
totalcpuset = hwloc_bitmap_alloc();
|
||||||
|
|
||||||
for (i=0; i < map->nodes->size; i++) {
|
for (i=0; i < map->nodes->size; i++) {
|
||||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
||||||
@ -243,6 +245,7 @@ static int bind_downwards(orte_job_t *jdata,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
|
orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
|
||||||
|
hwloc_bitmap_free(totalcpuset);
|
||||||
return ORTE_ERR_SILENT;
|
return ORTE_ERR_SILENT;
|
||||||
}
|
}
|
||||||
/* check if topology supports membind - have to be careful here
|
/* check if topology supports membind - have to be careful here
|
||||||
@ -259,6 +262,7 @@ static int bind_downwards(orte_job_t *jdata,
|
|||||||
membind_warned = true;
|
membind_warned = true;
|
||||||
} else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
|
} else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
|
||||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
|
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
|
||||||
|
hwloc_bitmap_free(totalcpuset);
|
||||||
return ORTE_ERR_SILENT;
|
return ORTE_ERR_SILENT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -267,7 +271,7 @@ static int bind_downwards(orte_job_t *jdata,
|
|||||||
/* clear the topology of any prior usage numbers */
|
/* clear the topology of any prior usage numbers */
|
||||||
opal_hwloc_base_clear_usage(node->topology);
|
opal_hwloc_base_clear_usage(node->topology);
|
||||||
|
|
||||||
/* cycle thru the procs */
|
/* cycle thru the procs */
|
||||||
for (j=0; j < node->procs->size; j++) {
|
for (j=0; j < node->procs->size; j++) {
|
||||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
||||||
continue;
|
continue;
|
||||||
@ -284,7 +288,7 @@ static int bind_downwards(orte_job_t *jdata,
|
|||||||
}
|
}
|
||||||
/* we don't know if the target is a direct child of this locale,
|
/* we don't know if the target is a direct child of this locale,
|
||||||
* or if it is some depth below it, so we have to conduct a bit
|
* or if it is some depth below it, so we have to conduct a bit
|
||||||
* of a search. Let hwloc find the min usage one for us
|
* of a search. Let hwloc find the min usage one for us.
|
||||||
*/
|
*/
|
||||||
trg_obj = opal_hwloc_base_find_min_bound_target_under_obj(node->topology,
|
trg_obj = opal_hwloc_base_find_min_bound_target_under_obj(node->topology,
|
||||||
proc->locale,
|
proc->locale,
|
||||||
@ -292,48 +296,59 @@ static int bind_downwards(orte_job_t *jdata,
|
|||||||
if (NULL == trg_obj) {
|
if (NULL == trg_obj) {
|
||||||
/* there aren't any such targets under this object */
|
/* there aren't any such targets under this object */
|
||||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name);
|
orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name);
|
||||||
|
hwloc_bitmap_free(totalcpuset);
|
||||||
return ORTE_ERR_SILENT;
|
return ORTE_ERR_SILENT;
|
||||||
}
|
}
|
||||||
/* get the index of the object */
|
/* start with a clean slate */
|
||||||
if (UINT_MAX == (idx = opal_hwloc_base_get_obj_idx(node->topology, trg_obj, OPAL_HWLOC_AVAILABLE))) {
|
hwloc_bitmap_zero(totalcpuset);
|
||||||
return ORTE_ERR_SILENT;
|
total_cpus = 0;
|
||||||
|
nxt_obj = trg_obj;
|
||||||
|
do {
|
||||||
|
if (NULL == nxt_obj) {
|
||||||
|
/* could not find enough cpus to meet request */
|
||||||
|
orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name);
|
||||||
|
hwloc_bitmap_free(totalcpuset);
|
||||||
|
return ORTE_ERR_SILENT;
|
||||||
|
}
|
||||||
|
trg_obj = nxt_obj;
|
||||||
|
/* get the number of cpus under this location */
|
||||||
|
ncpus = opal_hwloc_base_get_npus(node->topology, trg_obj);
|
||||||
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||||
|
"%s GOT %d CPUS",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ncpus);
|
||||||
|
/* track the number bound */
|
||||||
|
data = (opal_hwloc_obj_data_t*)trg_obj->userdata;
|
||||||
|
data->num_bound++;
|
||||||
|
/* error out if adding a proc would cause overload and that wasn't allowed */
|
||||||
|
if (ncpus < data->num_bound &&
|
||||||
|
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) {
|
||||||
|
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
|
||||||
|
opal_hwloc_base_print_binding(map->binding), node->name,
|
||||||
|
data->num_bound, ncpus);
|
||||||
|
hwloc_bitmap_free(totalcpuset);
|
||||||
|
return ORTE_ERR_SILENT;
|
||||||
|
}
|
||||||
|
/* bind the proc here */
|
||||||
|
cpus = opal_hwloc_base_get_available_cpus(node->topology, trg_obj);
|
||||||
|
hwloc_bitmap_or(totalcpuset, totalcpuset, cpus);
|
||||||
|
total_cpus += ncpus;
|
||||||
|
/* move to the next location, in case we need it */
|
||||||
|
nxt_obj = trg_obj->next_cousin;
|
||||||
|
} while (total_cpus < orte_rmaps_base.cpus_per_rank);
|
||||||
|
hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, totalcpuset);
|
||||||
|
if (4 < opal_output_get_verbosity(orte_rmaps_base.rmaps_output)) {
|
||||||
|
char tmp1[1024], tmp2[1024];
|
||||||
|
opal_hwloc_base_cset2str(tmp1, sizeof(tmp1), totalcpuset);
|
||||||
|
opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), totalcpuset);
|
||||||
|
opal_output(orte_rmaps_base.rmaps_output,
|
||||||
|
"%s BOUND PROC %s[%s] TO %s: %s",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
ORTE_NAME_PRINT(&proc->name), node->name,
|
||||||
|
tmp1, tmp2);
|
||||||
}
|
}
|
||||||
/* track the number bound */
|
|
||||||
data = (opal_hwloc_obj_data_t*)trg_obj->userdata;
|
|
||||||
data->num_bound++;
|
|
||||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
||||||
"%s GETTING NUMBER OF CPUS UNDER OBJECT %s[%d]",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
|
||||||
hwloc_obj_type_string(target), trg_obj->logical_index);
|
|
||||||
/* get the number of cpus under this location */
|
|
||||||
ncpus = opal_hwloc_base_get_npus(node->topology, trg_obj);
|
|
||||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
||||||
"%s GOT %d CPUS",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ncpus);
|
|
||||||
if (0 == ncpus) {
|
|
||||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name);
|
|
||||||
return ORTE_ERR_SILENT;
|
|
||||||
}
|
|
||||||
/* error out if adding a proc would cause overload and that wasn't allowed */
|
|
||||||
if (ncpus < data->num_bound &&
|
|
||||||
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) {
|
|
||||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
|
|
||||||
opal_hwloc_base_print_binding(map->binding), node->name,
|
|
||||||
data->num_bound, ncpus);
|
|
||||||
return ORTE_ERR_SILENT;
|
|
||||||
}
|
|
||||||
/* bind the proc here */
|
|
||||||
proc->bind_idx = idx;
|
|
||||||
cpus = opal_hwloc_base_get_available_cpus(node->topology, trg_obj);
|
|
||||||
hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, cpus);
|
|
||||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
||||||
"%s BOUND PROC %s TO %s[%s:%u] on node %s",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
|
||||||
ORTE_NAME_PRINT(&proc->name),
|
|
||||||
proc->cpu_bitmap, hwloc_obj_type_string(trg_obj->type),
|
|
||||||
proc->bind_idx, node->name);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
hwloc_bitmap_free(totalcpuset);
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -447,7 +462,6 @@ static int bind_in_place(orte_job_t *jdata,
|
|||||||
return ORTE_ERR_SILENT;
|
return ORTE_ERR_SILENT;
|
||||||
}
|
}
|
||||||
/* bind the proc here */
|
/* bind the proc here */
|
||||||
proc->bind_idx = idx;
|
|
||||||
cpus = opal_hwloc_base_get_available_cpus(node->topology, proc->locale);
|
cpus = opal_hwloc_base_get_available_cpus(node->topology, proc->locale);
|
||||||
hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, cpus);
|
hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, cpus);
|
||||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||||
@ -556,6 +570,11 @@ static int bind_to_cpuset(orte_job_t *jdata)
|
|||||||
|
|
||||||
int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
|
int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
|
||||||
{
|
{
|
||||||
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||||
|
"mca:rmaps: compute bindings for job %s with policy %s",
|
||||||
|
ORTE_JOBID_PRINT(jdata->jobid),
|
||||||
|
opal_hwloc_base_print_binding(jdata->map->binding));
|
||||||
|
|
||||||
if (ORTE_MAPPING_BYUSER == ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
if (ORTE_MAPPING_BYUSER == ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||||
/* user specified binding by rankfile - nothing for us to do */
|
/* user specified binding by rankfile - nothing for us to do */
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
@ -256,12 +256,11 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
|||||||
if (NULL != proc->locale) {
|
if (NULL != proc->locale) {
|
||||||
hwloc_bitmap_list_snprintf(locale, 64, proc->locale->cpuset);
|
hwloc_bitmap_list_snprintf(locale, 64, proc->locale->cpuset);
|
||||||
}
|
}
|
||||||
opal_output(orte_clean_output, "\t\t<process rank=%s app_idx=%ld local_rank=%lu node_rank=%lu locale=%s binding=%s[%s:%u]>",
|
opal_output(orte_clean_output, "\t\t<process rank=%s app_idx=%ld local_rank=%lu node_rank=%lu locale=%s binding=%s>",
|
||||||
ORTE_VPID_PRINT(proc->name.vpid), (long)proc->app_idx,
|
ORTE_VPID_PRINT(proc->name.vpid), (long)proc->app_idx,
|
||||||
(unsigned long)proc->local_rank,
|
(unsigned long)proc->local_rank,
|
||||||
(unsigned long)proc->node_rank, locale,
|
(unsigned long)proc->node_rank, locale,
|
||||||
(NULL == proc->cpu_bitmap) ? "NULL" : proc->cpu_bitmap,
|
(NULL == proc->cpu_bitmap) ? "NULL" : proc->cpu_bitmap);
|
||||||
opal_hwloc_base_print_level(jdata->map->bind_level), proc->bind_idx);
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
opal_output(orte_clean_output, "\t\t<process rank=%s app_idx=%ld local_rank=%lu node_rank=%lu>",
|
opal_output(orte_clean_output, "\t\t<process rank=%s app_idx=%ld local_rank=%lu node_rank=%lu>",
|
||||||
@ -288,15 +287,12 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
locality = opal_hwloc_base_get_relative_locality(node->topology,
|
locality = opal_hwloc_base_get_relative_locality(node->topology,
|
||||||
jdata->map->bind_level,
|
p0->cpu_bitmap,
|
||||||
p0->bind_idx,
|
proc->cpu_bitmap);
|
||||||
jdata->map->bind_level,
|
opal_output(orte_clean_output, "\t\t<rank=%s rank=%s locality=%s>",
|
||||||
proc->bind_idx);
|
|
||||||
opal_output(orte_clean_output, "\t\t<bind_level=%s rank=%s bind_idx=%u rank=%s bind_idx=%u locality=%s>",
|
|
||||||
opal_hwloc_base_print_level(jdata->map->bind_level),
|
|
||||||
ORTE_VPID_PRINT(p0->name.vpid),
|
ORTE_VPID_PRINT(p0->name.vpid),
|
||||||
p0->bind_idx, ORTE_VPID_PRINT(proc->name.vpid),
|
ORTE_VPID_PRINT(proc->name.vpid),
|
||||||
proc->bind_idx, opal_hwloc_base_print_locality(locality));
|
opal_hwloc_base_print_locality(locality));
|
||||||
}
|
}
|
||||||
opal_output(orte_clean_output, "\t</locality>\n</map>");
|
opal_output(orte_clean_output, "\t</locality>\n</map>");
|
||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
|
@ -281,7 +281,6 @@ int orte_rmaps_base_open(void)
|
|||||||
ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN);
|
ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* #cpus/rank to use */
|
/* #cpus/rank to use */
|
||||||
param = mca_base_param_reg_int_name("rmaps", "base_cpus_per_proc",
|
param = mca_base_param_reg_int_name("rmaps", "base_cpus_per_proc",
|
||||||
"Number of cpus to use for each rank [1-2**15 (default=1)]",
|
"Number of cpus to use for each rank [1-2**15 (default=1)]",
|
||||||
@ -294,9 +293,12 @@ int orte_rmaps_base_open(void)
|
|||||||
*/
|
*/
|
||||||
if (1 < orte_rmaps_base.cpus_per_rank &&
|
if (1 < orte_rmaps_base.cpus_per_rank &&
|
||||||
!OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
|
!OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
|
||||||
opal_hwloc_binding_policy |= OPAL_BIND_TO_CORE;
|
if (opal_hwloc_use_hwthreads_as_cpus) {
|
||||||
|
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_HWTHREAD);
|
||||||
|
} else {
|
||||||
|
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Should we schedule on the local node or not? */
|
/* Should we schedule on the local node or not? */
|
||||||
mca_base_param_reg_int_name("rmaps", "base_no_schedule_local",
|
mca_base_param_reg_int_name("rmaps", "base_no_schedule_local",
|
||||||
|
@ -560,7 +560,7 @@ orte_proc_t* orte_rmaps_base_setup_proc(orte_job_t *jdata,
|
|||||||
proc->nodename = node->name;
|
proc->nodename = node->name;
|
||||||
node->num_procs++;
|
node->num_procs++;
|
||||||
if (node->slots_inuse < node->slots) {
|
if (node->slots_inuse < node->slots) {
|
||||||
node->slots_inuse++;
|
node->slots_inuse += orte_rmaps_base.cpus_per_rank;
|
||||||
}
|
}
|
||||||
if (0 > (rc = opal_pointer_array_add(node->procs, (void*)proc))) {
|
if (0 > (rc = opal_pointer_array_add(node->procs, (void*)proc))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
|
@ -120,7 +120,6 @@ static int orte_rmaps_ppr_open(void)
|
|||||||
/* this implies binding to the sockets, unless otherwise directed */
|
/* this implies binding to the sockets, unless otherwise directed */
|
||||||
if (!OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
|
if (!OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
|
||||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_SOCKET);
|
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_SOCKET);
|
||||||
opal_hwloc_binding_policy |= OPAL_BIND_GIVEN;
|
|
||||||
}
|
}
|
||||||
asprintf(&orte_rmaps_base.ppr, "%d:socket", value);
|
asprintf(&orte_rmaps_base.ppr, "%d:socket", value);
|
||||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
|
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
|
||||||
|
@ -56,7 +56,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
|
|||||||
ORTE_JOBID_PRINT(jdata->jobid), (int)num_slots, (unsigned long)num_procs);
|
ORTE_JOBID_PRINT(jdata->jobid), (int)num_slots, (unsigned long)num_procs);
|
||||||
|
|
||||||
/* check to see if we can map all the procs */
|
/* check to see if we can map all the procs */
|
||||||
if (num_slots < (int)app->num_procs) {
|
if (num_slots < ((int)app->num_procs * orte_rmaps_base.cpus_per_rank)) {
|
||||||
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
|
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
|
||||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
|
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
|
||||||
true, app->num_procs, app->app);
|
true, app->num_procs, app->app);
|
||||||
@ -77,7 +77,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
|
|||||||
node->name);
|
node->name);
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
/* get the root object as we are not assigning
|
/* get the root object as we are not assigning
|
||||||
* locale except at the node level
|
* locale here except at the node level
|
||||||
*/
|
*/
|
||||||
if (NULL != node->topology) {
|
if (NULL != node->topology) {
|
||||||
obj = hwloc_get_root_obj(node->topology);
|
obj = hwloc_get_root_obj(node->topology);
|
||||||
@ -89,7 +89,11 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
|
|||||||
node->name);
|
node->name);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
num_procs_to_assign = node->slots - node->slots_inuse;
|
/* assign a number of procs equal to the number of available
|
||||||
|
* slots divided by the number of cpus/rank the user
|
||||||
|
* requested
|
||||||
|
*/
|
||||||
|
num_procs_to_assign = (node->slots - node->slots_inuse) / orte_rmaps_base.cpus_per_rank;
|
||||||
opal_output_verbose(2, orte_rmaps_base.rmaps_output,
|
opal_output_verbose(2, orte_rmaps_base.rmaps_output,
|
||||||
"mca:rmaps:rr:slot assigning %d procs to node %s",
|
"mca:rmaps:rr:slot assigning %d procs to node %s",
|
||||||
(int)num_procs_to_assign, node->name);
|
(int)num_procs_to_assign, node->name);
|
||||||
@ -173,7 +177,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
|
|||||||
--nxtra_nodes;
|
--nxtra_nodes;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
num_procs_to_assign = (node->slots - node->slots_inuse) + extra_procs_to_assign;
|
num_procs_to_assign = ((node->slots - node->slots_inuse)/orte_rmaps_base.cpus_per_rank) + extra_procs_to_assign;
|
||||||
opal_output_verbose(2, orte_rmaps_base.rmaps_output,
|
opal_output_verbose(2, orte_rmaps_base.rmaps_output,
|
||||||
"mca:rmaps:rr:slot adding up to %d procs to node %s",
|
"mca:rmaps:rr:slot adding up to %d procs to node %s",
|
||||||
num_procs_to_assign, node->name);
|
num_procs_to_assign, node->name);
|
||||||
@ -224,7 +228,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
|
|||||||
(int)num_slots, (unsigned long)num_procs);
|
(int)num_slots, (unsigned long)num_procs);
|
||||||
|
|
||||||
/* quick check to see if we can map all the procs */
|
/* quick check to see if we can map all the procs */
|
||||||
if (num_slots < (int)app->num_procs) {
|
if (num_slots < ((int)app->num_procs * orte_rmaps_base.cpus_per_rank)) {
|
||||||
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
|
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
|
||||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
|
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
|
||||||
true, app->num_procs, app->app);
|
true, app->num_procs, app->app);
|
||||||
@ -323,23 +327,25 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
|
|||||||
/* update how many we are lagging behind */
|
/* update how many we are lagging behind */
|
||||||
lag += navg;
|
lag += navg;
|
||||||
} else {
|
} else {
|
||||||
/* if slots < avg, then take all */
|
/* if slots < avg (adjusted for cpus/proc), then take all */
|
||||||
if ((node->slots - node->slots_inuse) < navg) {
|
if ((node->slots - node->slots_inuse) < (navg * orte_rmaps_base.cpus_per_rank)) {
|
||||||
num_procs_to_assign = (node->slots - node->slots_inuse) + extra_procs_to_assign;
|
num_procs_to_assign = (node->slots - node->slots_inuse)/orte_rmaps_base.cpus_per_rank;
|
||||||
/* update how many we are lagging behind */
|
/* update how many we are lagging behind */
|
||||||
lag += navg - (node->slots - node->slots_inuse);
|
lag += navg - num_procs_to_assign;
|
||||||
} else {
|
} else {
|
||||||
/* take the avg plus as much of the "lag" as we can */
|
/* take the avg plus as much of the "lag" as we can */
|
||||||
delta = 0;
|
delta = 0;
|
||||||
if (0 < lag) {
|
if (0 < lag) {
|
||||||
delta = (node->slots - node->slots_inuse) - navg;
|
delta = ((node->slots - node->slots_inuse)/orte_rmaps_base.cpus_per_rank) - navg;
|
||||||
if (lag < delta) {
|
if (lag < delta) {
|
||||||
delta = lag;
|
delta = lag;
|
||||||
}
|
}
|
||||||
lag -= delta;
|
lag -= delta;
|
||||||
}
|
}
|
||||||
num_procs_to_assign = navg + delta + extra_procs_to_assign;
|
num_procs_to_assign = navg;
|
||||||
}
|
}
|
||||||
|
/* add in the extras */
|
||||||
|
num_procs_to_assign += extra_procs_to_assign;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (j=0; j < num_procs_to_assign && nprocs_mapped < app->num_procs; j++) {
|
for (j=0; j < num_procs_to_assign && nprocs_mapped < app->num_procs; j++) {
|
||||||
@ -354,7 +360,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
|
|||||||
/* not all nodes are equal, so only set oversubscribed for
|
/* not all nodes are equal, so only set oversubscribed for
|
||||||
* this node if it is in that state
|
* this node if it is in that state
|
||||||
*/
|
*/
|
||||||
if (node->slots < (int)node->num_procs) {
|
if (node->slots < ((int)node->num_procs * orte_rmaps_base.cpus_per_rank)) {
|
||||||
/* flag the node as oversubscribed so that sched-yield gets
|
/* flag the node as oversubscribed so that sched-yield gets
|
||||||
* properly set
|
* properly set
|
||||||
*/
|
*/
|
||||||
@ -384,7 +390,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
|
|||||||
/* not all nodes are equal, so only set oversubscribed for
|
/* not all nodes are equal, so only set oversubscribed for
|
||||||
* this node if it is in that state
|
* this node if it is in that state
|
||||||
*/
|
*/
|
||||||
if (node->slots < (int)node->num_procs) {
|
if (node->slots < ((int)node->num_procs * orte_rmaps_base.cpus_per_rank)) {
|
||||||
/* flag the node as oversubscribed so that sched-yield gets
|
/* flag the node as oversubscribed so that sched-yield gets
|
||||||
* properly set
|
* properly set
|
||||||
*/
|
*/
|
||||||
|
@ -538,9 +538,6 @@ int orte_daemon(int argc, char *argv[])
|
|||||||
proc->alive = true;
|
proc->alive = true;
|
||||||
proc->app_idx = 0;
|
proc->app_idx = 0;
|
||||||
proc->local_proc = true;
|
proc->local_proc = true;
|
||||||
#if OPAL_HAVE_HWLOC
|
|
||||||
proc->bind_idx = 0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* create the collectives for its modex/barriers */
|
/* create the collectives for its modex/barriers */
|
||||||
jdata->peer_modex = orte_grpcomm_base_get_coll_id();
|
jdata->peer_modex = orte_grpcomm_base_get_coll_id();
|
||||||
|
@ -495,10 +495,10 @@ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_
|
|||||||
if (NULL != src->locale) {
|
if (NULL != src->locale) {
|
||||||
hwloc_bitmap_list_asprintf(&locale, src->locale->cpuset);
|
hwloc_bitmap_list_asprintf(&locale, src->locale->cpuset);
|
||||||
}
|
}
|
||||||
asprintf(&tmp2, "%s\n%s\tState: %s\tRestarts: %d\tApp_context: %ld\tLocale: %s\tBinding: %s[%u]", tmp, pfx2,
|
asprintf(&tmp2, "%s\n%s\tState: %s\tRestarts: %d\tApp_context: %ld\tLocale: %s\tBinding: %s", tmp, pfx2,
|
||||||
orte_proc_state_to_str(src->state), src->restarts, (long)src->app_idx,
|
orte_proc_state_to_str(src->state), src->restarts, (long)src->app_idx,
|
||||||
(NULL == locale) ? "UNKNOWN" : locale,
|
(NULL == locale) ? "UNKNOWN" : locale,
|
||||||
(NULL == src->cpu_bitmap) ? "NULL" : src->cpu_bitmap, src->bind_idx);
|
(NULL == src->cpu_bitmap) ? "NULL" : src->cpu_bitmap);
|
||||||
if (NULL != locale) {
|
if (NULL != locale) {
|
||||||
free(locale);
|
free(locale);
|
||||||
}
|
}
|
||||||
|
@ -918,7 +918,6 @@ static void orte_proc_construct(orte_proc_t* proc)
|
|||||||
proc->app_idx = 0;
|
proc->app_idx = 0;
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
proc->locale = NULL;
|
proc->locale = NULL;
|
||||||
proc->bind_idx = 0;
|
|
||||||
proc->cpu_bitmap = NULL;
|
proc->cpu_bitmap = NULL;
|
||||||
#endif
|
#endif
|
||||||
proc->node = NULL;
|
proc->node = NULL;
|
||||||
|
@ -120,13 +120,11 @@ ORTE_DECLSPEC extern int orte_exit_status;
|
|||||||
#define ORTE_DB_DAEMON_VPID "orte.daemon.vpid"
|
#define ORTE_DB_DAEMON_VPID "orte.daemon.vpid"
|
||||||
#define ORTE_DB_NODERANK "orte.node.rank"
|
#define ORTE_DB_NODERANK "orte.node.rank"
|
||||||
#define ORTE_DB_LOCALRANK "orte.local.rank"
|
#define ORTE_DB_LOCALRANK "orte.local.rank"
|
||||||
#define ORTE_DB_BIND_LEVEL "orte.bind.level"
|
|
||||||
#define ORTE_DB_BIND_INDEX "orte.bind.index"
|
|
||||||
#define ORTE_DB_LOCALITY "orte.locality"
|
#define ORTE_DB_LOCALITY "orte.locality"
|
||||||
#define ORTE_DB_ARCH "orte.arch"
|
#define ORTE_DB_ARCH "orte.arch"
|
||||||
#define ORTE_DB_NPROCS "orte.nprocs"
|
#define ORTE_DB_NPROCS "orte.nprocs"
|
||||||
#define ORTE_DB_RMLURI "orte.rmluri"
|
#define ORTE_DB_RMLURI "orte.rmluri"
|
||||||
#define ORTE_DB_BIND_BITMAP "orte.bind.bitmap"
|
#define ORTE_DB_CPUSET "orte.cpuset"
|
||||||
|
|
||||||
|
|
||||||
/* State Machine lists */
|
/* State Machine lists */
|
||||||
@ -505,8 +503,6 @@ struct orte_proc_t {
|
|||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
/* hwloc object to which this process was mapped */
|
/* hwloc object to which this process was mapped */
|
||||||
hwloc_obj_t locale;
|
hwloc_obj_t locale;
|
||||||
/* where the proc was bound */
|
|
||||||
unsigned int bind_idx;
|
|
||||||
/* string representation of cpu bindings */
|
/* string representation of cpu bindings */
|
||||||
char *cpu_bitmap;
|
char *cpu_bitmap;
|
||||||
#endif
|
#endif
|
||||||
|
@ -325,14 +325,12 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
|||||||
{ "rmaps_base_oversubscribe", '\0', "oversubscribe", "oversubscribe", 0,
|
{ "rmaps_base_oversubscribe", '\0', "oversubscribe", "oversubscribe", 0,
|
||||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||||
"Nodes are allowed to be oversubscribed, even on a managed system"},
|
"Nodes are allowed to be oversubscribed, even on a managed system"},
|
||||||
#if 0
|
|
||||||
{ "rmaps_base_cpus_per_rank", '\0', "cpus-per-proc", "cpus-per-proc", 1,
|
{ "rmaps_base_cpus_per_rank", '\0', "cpus-per-proc", "cpus-per-proc", 1,
|
||||||
NULL, OPAL_CMD_LINE_TYPE_INT,
|
NULL, OPAL_CMD_LINE_TYPE_INT,
|
||||||
"Number of cpus to use for each process [default=1]" },
|
"Number of cpus to use for each process [default=1]" },
|
||||||
{ "rmaps_base_cpus_per_rank", '\0', "cpus-per-rank", "cpus-per-rank", 1,
|
{ "rmaps_base_cpus_per_rank", '\0', "cpus-per-rank", "cpus-per-rank", 1,
|
||||||
NULL, OPAL_CMD_LINE_TYPE_INT,
|
NULL, OPAL_CMD_LINE_TYPE_INT,
|
||||||
"Synonym for cpus-per-proc" },
|
"Synonym for cpus-per-proc" },
|
||||||
#endif
|
|
||||||
|
|
||||||
/* backward compatiblity */
|
/* backward compatiblity */
|
||||||
{ "rmaps_base_bynode", '\0', "bynode", "bynode", 0,
|
{ "rmaps_base_bynode", '\0', "bynode", "bynode", 0,
|
||||||
|
@ -662,13 +662,6 @@ int orte_util_encode_pidmap(opal_byte_object_t *boptr, bool update)
|
|||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup_and_return;
|
goto cleanup_and_return;
|
||||||
}
|
}
|
||||||
#if OPAL_HAVE_HWLOC
|
|
||||||
/* pack the bind level */
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &(jdata->map->bind_level), 1, OPAL_HWLOC_LEVEL_T))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup_and_return;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* cycle thru the job's procs, including only those that have
|
/* cycle thru the job's procs, including only those that have
|
||||||
* been updated so we minimize the amount of info being sent
|
* been updated so we minimize the amount of info being sent
|
||||||
@ -698,10 +691,6 @@ int orte_util_encode_pidmap(opal_byte_object_t *boptr, bool update)
|
|||||||
goto cleanup_and_return;
|
goto cleanup_and_return;
|
||||||
}
|
}
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &proc->bind_idx, 1, OPAL_UINT))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup_and_return;
|
|
||||||
}
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &proc->cpu_bitmap, 1, OPAL_STRING))) {
|
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &proc->cpu_bitmap, 1, OPAL_STRING))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup_and_return;
|
goto cleanup_and_return;
|
||||||
@ -765,8 +754,6 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
|
|||||||
orte_local_rank_t local_rank;
|
orte_local_rank_t local_rank;
|
||||||
orte_node_rank_t node_rank;
|
orte_node_rank_t node_rank;
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
opal_hwloc_level_t bind_level = OPAL_HWLOC_NODE_LEVEL, pbind, *lvptr;
|
|
||||||
unsigned int bind_idx, pbidx, *uiptr;
|
|
||||||
char *cpu_bitmap;
|
char *cpu_bitmap;
|
||||||
#endif
|
#endif
|
||||||
opal_hwloc_locality_t locality;
|
opal_hwloc_locality_t locality;
|
||||||
@ -821,31 +808,6 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
|
|||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if OPAL_HAVE_HWLOC
|
|
||||||
/* unpack and store the binding level */
|
|
||||||
n=1;
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &bind_level, &n, OPAL_HWLOC_LEVEL_T))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
/* store it */
|
|
||||||
proc.vpid = ORTE_VPID_INVALID;
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_BIND_LEVEL, &bind_level, OPAL_HWLOC_LEVEL_T))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
/* set mine */
|
|
||||||
if (proc.jobid == ORTE_PROC_MY_NAME->jobid) {
|
|
||||||
orte_process_info.bind_level = bind_level;
|
|
||||||
}
|
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((2, orte_nidmap_output,
|
|
||||||
"%s orte:util:decode:pidmap nprocs %s bind level %s",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
|
||||||
ORTE_VPID_PRINT(num_procs),
|
|
||||||
opal_hwloc_base_print_level(bind_level)));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* cycle thru the data until we hit an INVALID vpid indicating
|
/* cycle thru the data until we hit an INVALID vpid indicating
|
||||||
* all data for this job has been read
|
* all data for this job has been read
|
||||||
*/
|
*/
|
||||||
@ -870,11 +832,6 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
|
|||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
n=1;
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &bind_idx, &n, OPAL_UINT))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
n=1;
|
n=1;
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &cpu_bitmap, &n, OPAL_STRING))) {
|
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &cpu_bitmap, &n, OPAL_STRING))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
@ -886,9 +843,6 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
|
|||||||
/* set mine */
|
/* set mine */
|
||||||
orte_process_info.my_local_rank = local_rank;
|
orte_process_info.my_local_rank = local_rank;
|
||||||
orte_process_info.my_node_rank = node_rank;
|
orte_process_info.my_node_rank = node_rank;
|
||||||
#if OPAL_HAVE_HWLOC
|
|
||||||
orte_process_info.bind_idx = bind_idx;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
/* apps don't need the rest of the data in the buffer for this proc,
|
/* apps don't need the rest of the data in the buffer for this proc,
|
||||||
* but we have to unpack it anyway to stay in sync
|
* but we have to unpack it anyway to stay in sync
|
||||||
@ -923,11 +877,7 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
|
|||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_BIND_INDEX, &bind_idx, OPAL_UINT))) {
|
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_CPUSET, cpu_bitmap, OPAL_STRING))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_db.store((*id), OPAL_DB_INTERNAL, ORTE_DB_BIND_BITMAP, cpu_bitmap, OPAL_STRING))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
@ -1027,27 +977,15 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
|
|||||||
*/
|
*/
|
||||||
orte_process_info.num_local_peers++;
|
orte_process_info.num_local_peers++;
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
/* retrieve the bind level for the other proc's job */
|
/* retrieve the binding for the other proc */
|
||||||
lvptr = &pbind;
|
if (ORTE_SUCCESS != (rc = opal_db.fetch((*id), ORTE_DB_CPUSET, (void**)&cpu_bitmap, OPAL_STRING))) {
|
||||||
proc.vpid = ORTE_VPID_INVALID;
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_db.fetch((*id), ORTE_DB_BIND_LEVEL, (void**)&lvptr, OPAL_HWLOC_LEVEL_T))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* retrieve the other's proc's bind idx */
|
|
||||||
uiptr = &pbidx;
|
|
||||||
proc.vpid = i;
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_db.fetch((*id), ORTE_DB_BIND_INDEX, (void**)&uiptr, OPAL_UINT))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* we share a node - see what else we share */
|
/* we share a node - see what else we share */
|
||||||
locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
|
locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
|
||||||
orte_process_info.bind_level,
|
orte_process_info.cpuset,
|
||||||
orte_process_info.bind_idx,
|
cpu_bitmap);
|
||||||
pbind, pbidx);
|
|
||||||
#else
|
#else
|
||||||
locality = OPAL_PROC_ON_NODE;
|
locality = OPAL_PROC_ON_NODE;
|
||||||
#endif
|
#endif
|
||||||
@ -1095,7 +1033,6 @@ int orte_util_decode_daemon_pidmap(opal_byte_object_t *bo)
|
|||||||
orte_node_rank_t node_rank;
|
orte_node_rank_t node_rank;
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
opal_hwloc_level_t bind_level = OPAL_HWLOC_NODE_LEVEL;
|
opal_hwloc_level_t bind_level = OPAL_HWLOC_NODE_LEVEL;
|
||||||
unsigned int bind_idx;
|
|
||||||
char *cpu_bitmap;
|
char *cpu_bitmap;
|
||||||
#endif
|
#endif
|
||||||
orte_std_cntr_t n;
|
orte_std_cntr_t n;
|
||||||
@ -1148,15 +1085,6 @@ int orte_util_decode_daemon_pidmap(opal_byte_object_t *bo)
|
|||||||
}
|
}
|
||||||
jdata->num_procs = num_procs;
|
jdata->num_procs = num_procs;
|
||||||
|
|
||||||
#if OPAL_HAVE_HWLOC
|
|
||||||
/* unpack the binding level */
|
|
||||||
n=1;
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &bind_level, &n, OPAL_HWLOC_LEVEL_T))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
jdata->map->bind_level = bind_level;
|
|
||||||
#endif
|
|
||||||
/* cycle thru the data until we hit an INVALID vpid indicating
|
/* cycle thru the data until we hit an INVALID vpid indicating
|
||||||
* all data for this job has been read
|
* all data for this job has been read
|
||||||
*/
|
*/
|
||||||
@ -1181,11 +1109,6 @@ int orte_util_decode_daemon_pidmap(opal_byte_object_t *bo)
|
|||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
n=1;
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &bind_idx, &n, OPAL_UINT))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
n=1;
|
n=1;
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &cpu_bitmap, &n, OPAL_STRING))) {
|
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &cpu_bitmap, &n, OPAL_STRING))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
@ -1295,7 +1218,6 @@ int orte_util_decode_daemon_pidmap(opal_byte_object_t *bo)
|
|||||||
proc->restarts = restarts;
|
proc->restarts = restarts;
|
||||||
proc->state = state;
|
proc->state = state;
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
proc->bind_idx = bind_idx;
|
|
||||||
proc->cpu_bitmap = cpu_bitmap;
|
proc->cpu_bitmap = cpu_bitmap;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -75,8 +75,7 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = {
|
|||||||
/* .sock_stdout = */ NULL,
|
/* .sock_stdout = */ NULL,
|
||||||
/* .sock_stderr = */ NULL,
|
/* .sock_stderr = */ NULL,
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
/* .bind_level = */ OPAL_HWLOC_NODE_LEVEL,
|
/* .cpuset = */ NULL,
|
||||||
/* .bind_idx = */ 0,
|
|
||||||
#endif
|
#endif
|
||||||
/* .app_rank = */ -1,
|
/* .app_rank = */ -1,
|
||||||
/* .peer_modex = */ -1,
|
/* .peer_modex = */ -1,
|
||||||
|
@ -122,8 +122,7 @@ struct orte_proc_info_t {
|
|||||||
char *sock_stdout; /**< Path name to temp file for stdout. */
|
char *sock_stdout; /**< Path name to temp file for stdout. */
|
||||||
char *sock_stderr; /**< Path name to temp file for stderr. */
|
char *sock_stderr; /**< Path name to temp file for stderr. */
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
opal_hwloc_level_t bind_level;
|
char *cpuset; /**< String-representation of bitmap where we are bound */
|
||||||
unsigned int bind_idx;
|
|
||||||
#endif
|
#endif
|
||||||
int32_t app_rank; /**< rank within my app_context */
|
int32_t app_rank; /**< rank within my app_context */
|
||||||
orte_grpcomm_coll_id_t peer_modex; /**< modex collective id */
|
orte_grpcomm_coll_id_t peer_modex; /**< modex collective id */
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user