1
1

Begin cleaning up debugger support

Debugger daemons do not count against available slots. Clean up some leftover errors from the upgrade to HWLOC 2 in the mappers. Properly flag debugger jobs that come in via PMIx.

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-09-27 16:18:43 -05:00
родитель 98aee5407f
Коммит d5ce3c38e1
4 изменённых файлов: 26 добавлений и 13 удалений

Просмотреть файл

@ -590,10 +590,14 @@ orte_proc_t* orte_rmaps_base_setup_proc(orte_job_t *jdata,
OBJ_RETAIN(node); /* maintain accounting on object */
proc->node = node;
/* if this is a debugger job, then it doesn't count against
* available slots - otherwise, it does */
if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
node->num_procs++;
if (node->slots_inuse < node->slots) {
++node->slots_inuse;
}
}
if (0 > (rc = opal_pointer_array_add(node->procs, (void*)proc))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(proc);

Просмотреть файл

@ -276,6 +276,8 @@ static int ppr_mapper(orte_job_t *jdata)
/* add the node to the map, if needed */
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
OBJ_RETAIN(node);
opal_pointer_array_add(jdata->map->nodes, node);
jdata->map->num_nodes++;
}
/* if we are mapping solely at the node level, just put
@ -620,7 +622,7 @@ static int assign_locations(orte_job_t *jdata)
orte_node_t *node;
orte_proc_t *proc;
orte_app_context_t *app;
opal_hwloc_level_t level;
hwloc_obj_type_t level;
hwloc_obj_t obj;
unsigned int cache_level=0;
int ppr, cnt, nobjs, nprocs_mapped;
@ -643,24 +645,24 @@ static int assign_locations(orte_job_t *jdata)
/* pickup the object level */
if (ORTE_MAPPING_BYNODE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
level = OPAL_HWLOC_NODE_LEVEL;
level = HWLOC_OBJ_MACHINE;
} else if (ORTE_MAPPING_BYHWTHREAD == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
level = OPAL_HWLOC_HWTHREAD_LEVEL;
level = HWLOC_OBJ_PU;
} else if (ORTE_MAPPING_BYCORE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
level = OPAL_HWLOC_CORE_LEVEL;
level = HWLOC_OBJ_CORE;
} else if (ORTE_MAPPING_BYSOCKET == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
level = OPAL_HWLOC_SOCKET_LEVEL;
level = HWLOC_OBJ_SOCKET;
} else if (ORTE_MAPPING_BYL1CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
level = OPAL_HWLOC_L1CACHE_LEVEL;
level = HWLOC_OBJ_L1CACHE;
cache_level = 1;
} else if (ORTE_MAPPING_BYL2CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
level = OPAL_HWLOC_L2CACHE_LEVEL;
level = HWLOC_OBJ_L2CACHE;
cache_level = 2;
} else if (ORTE_MAPPING_BYL3CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
level = OPAL_HWLOC_L3CACHE_LEVEL;
level = HWLOC_OBJ_L3CACHE;
cache_level = 3;
} else if (ORTE_MAPPING_BYNUMA == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
level = OPAL_HWLOC_NUMA_LEVEL;
level = HWLOC_OBJ_NUMANODE;
} else {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_TAKE_NEXT_OPTION;
@ -689,7 +691,7 @@ static int assign_locations(orte_job_t *jdata)
true, node->name);
return ORTE_ERR_SILENT;
}
if (OPAL_HWLOC_NODE_LEVEL == level) {
if (HWLOC_OBJ_MACHINE == level) {
obj = hwloc_get_root_obj(node->topology->topo);
for (j=0; j < node->procs->size; j++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {

Просмотреть файл

@ -95,6 +95,8 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
/* add this node to the map - do it only once */
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
OBJ_RETAIN(node);
opal_pointer_array_add(jdata->map->nodes, node);
++(jdata->map->num_nodes);
}
if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) {

Просмотреть файл

@ -448,6 +448,11 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor,
orte_set_attribute(&jdata->attributes, ORTE_JOB_INDEX_ARGV,
ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL);
/*** DEBUGGER DAEMONS ***/
} else if (0 == strcmp(info->key, OPAL_PMIX_DEBUGGER_DAEMONS)) {
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON);
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_DEBUGGER);
/*** DEFAULT - CACHE FOR INCLUSION WITH JOB INFO ***/
} else {
/* cache for inclusion with job info at registration */