1
1

Merge pull request #3336 from rhc54/topic/launchmon

Update the debugger launch code to reflect the new backend mapping method.
Этот коммит содержится в:
Ralph Castain 2017-04-12 15:10:23 -07:00 коммит произвёл GitHub
родитель 5df9567a23 0500cc1c66
Коммит f1403ac3c2
8 изменённых файлов: 113 добавлений и 157 удалений

Просмотреть файл

@ -556,7 +556,7 @@ static void xcast_recv(int status, orte_process_name_t* sender,
/* now pass the relay buffer to myself for processing - don't /* now pass the relay buffer to myself for processing - don't
* inject it into the RML system via send as that will compete * inject it into the RML system via send as that will compete
* with the relay messages down in the OOB. Instead, pass it * with the relay messages down in the OOB. Instead, pass it
* directly to the orted command processor */ * directly to the RML message processor */
if (ORTE_DAEMON_DVM_NIDMAP_CMD != command) { if (ORTE_DAEMON_DVM_NIDMAP_CMD != command) {
ORTE_RML_POST_MESSAGE(ORTE_PROC_MY_NAME, tag, 1, ORTE_RML_POST_MESSAGE(ORTE_PROC_MY_NAME, tag, 1,
relay->base_ptr, relay->bytes_used); relay->base_ptr, relay->bytes_used);

Просмотреть файл

@ -694,7 +694,7 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata)
} }
/* did the user request we display output in xterms? */ /* did the user request we display output in xterms? */
if (NULL != orte_xterm) { if (NULL != orte_xterm && !ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
opal_list_item_t *nmitem; opal_list_item_t *nmitem;
orte_namelist_t *nm; orte_namelist_t *nm;
/* see if this rank is one of those requested */ /* see if this rank is one of those requested */
@ -740,9 +740,6 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata)
for (i=0; NULL != app->argv[i]; i++) { for (i=0; NULL != app->argv[i]; i++) {
opal_argv_append_nosize(&cd->argv, app->argv[i]); opal_argv_append_nosize(&cd->argv, app->argv[i]);
} }
/* the app exe name itself is in the argvsav array, so
* we can recover it from there later
*/
cd->cmd = opal_path_findv(orte_fork_agent[0], X_OK, orte_launch_environ, NULL); cd->cmd = opal_path_findv(orte_fork_agent[0], X_OK, orte_launch_environ, NULL);
if (NULL == cd->cmd) { if (NULL == cd->cmd) {
orte_show_help("help-orte-odls-base.txt", orte_show_help("help-orte-odls-base.txt",
@ -766,7 +763,7 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata)
} }
/* if we are indexing the argv by rank, do so now */ /* if we are indexing the argv by rank, do so now */
if (cd->index_argv) { if (cd->index_argv && !ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
char *param; char *param;
asprintf(&param, "%s-%d", cd->argv[0], (int)child->name.vpid); asprintf(&param, "%s-%d", cd->argv[0], (int)child->name.vpid);
free(cd->argv[0]); free(cd->argv[0]);
@ -1805,12 +1802,6 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child,
opal_event_set_priority(&cd->ev, ORTE_MSG_PRI); opal_event_set_priority(&cd->ev, ORTE_MSG_PRI);
opal_event_active(&cd->ev, OPAL_EV_WRITE, 1); opal_event_active(&cd->ev, OPAL_EV_WRITE, 1);
if (ORTE_SUCCESS != (rc = fork_local(cd))) {
orte_wait_cb_cancel(child);
child->exit_code = ORTE_ERR_SILENT; /* error message already output */
ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START);
}
CLEANUP: CLEANUP:
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
"%s odls:restart of proc %s %s", "%s odls:restart of proc %s %s",

Просмотреть файл

@ -92,7 +92,6 @@ typedef uint8_t orte_daemon_cmd_flag_t;
/* tell DVM daemons to cleanup resources from job */ /* tell DVM daemons to cleanup resources from job */
#define ORTE_DAEMON_DVM_CLEANUP_JOB_CMD (orte_daemon_cmd_flag_t) 34 #define ORTE_DAEMON_DVM_CLEANUP_JOB_CMD (orte_daemon_cmd_flag_t) 34
/* /*
* Struct written up the pipe from the child to the parent. * Struct written up the pipe from the child to the parent.
*/ */

Просмотреть файл

@ -477,55 +477,60 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
(int)opal_list_get_size(allocated_nodes))); (int)opal_list_get_size(allocated_nodes)));
complete: complete:
num_slots = 0;
/* remove all nodes that are already at max usage, and /* remove all nodes that are already at max usage, and
* compute the total number of allocated slots while * compute the total number of allocated slots while
* we do so */ * we do so - can ignore this if we are mapping debugger
num_slots = 0; * daemons as they do not count against the allocation */
item = opal_list_get_first(allocated_nodes); if (ORTE_MAPPING_DEBUGGER & ORTE_GET_MAPPING_DIRECTIVE(policy)) {
while (item != opal_list_get_end(allocated_nodes)) { num_slots = opal_list_get_size(allocated_nodes); // tell the mapper there is one slot/node for debuggers
/** save the next pointer in case we remove this node */ } else {
next = opal_list_get_next(item); item = opal_list_get_first(allocated_nodes);
/** check to see if this node is fully used - remove if so */ while (item != opal_list_get_end(allocated_nodes)) {
node = (orte_node_t*)item; /** save the next pointer in case we remove this node */
if (0 != node->slots_max && node->slots_inuse > node->slots_max) { next = opal_list_get_next(item);
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, /** check to see if this node is fully used - remove if so */
"%s Removing node %s: max %d inuse %d", node = (orte_node_t*)item;
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), if (0 != node->slots_max && node->slots_inuse > node->slots_max) {
node->name, node->slots_max, node->slots_inuse));
opal_list_remove_item(allocated_nodes, item);
OBJ_RELEASE(item); /* "un-retain" it */
} else if (node->slots <= node->slots_inuse &&
(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) {
/* remove the node as fully used */
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
"%s Removing node %s slots %d inuse %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
node->name, node->slots, node->slots_inuse));
opal_list_remove_item(allocated_nodes, item);
OBJ_RELEASE(item); /* "un-retain" it */
} else if (node->slots > node->slots_inuse) {
/* add the available slots */
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
"%s node %s has %d slots available", "%s Removing node %s: max %d inuse %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
node->name, node->slots - node->slots_inuse)); node->name, node->slots_max, node->slots_inuse));
num_slots += node->slots - node->slots_inuse; opal_list_remove_item(allocated_nodes, item);
} else if (!(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { OBJ_RELEASE(item); /* "un-retain" it */
/* nothing needed to do here - we don't add slots to the } else if (node->slots <= node->slots_inuse &&
* count as we don't have any available. Just let the mapper (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) {
* do what it needs to do to meet the request /* remove the node as fully used */
*/
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
"%s node %s is fully used, but available for oversubscrition", "%s Removing node %s slots %d inuse %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
node->name)); node->name, node->slots, node->slots_inuse));
} else { opal_list_remove_item(allocated_nodes, item);
/* if we cannot use it, remove it from list */ OBJ_RELEASE(item); /* "un-retain" it */
opal_list_remove_item(allocated_nodes, item); } else if (node->slots > node->slots_inuse) {
OBJ_RELEASE(item); /* "un-retain" it */ /* add the available slots */
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
"%s node %s has %d slots available",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
node->name, node->slots - node->slots_inuse));
num_slots += node->slots - node->slots_inuse;
} else if (!(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) {
/* nothing needed to do here - we don't add slots to the
* count as we don't have any available. Just let the mapper
* do what it needs to do to meet the request
*/
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
"%s node %s is fully used, but available for oversubscription",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
node->name));
} else {
/* if we cannot use it, remove it from list */
opal_list_remove_item(allocated_nodes, item);
OBJ_RELEASE(item); /* "un-retain" it */
}
/** go on to next item */
item = next;
} }
/** go on to next item */
item = next;
} }
/* Sanity check to make sure we have resources available */ /* Sanity check to make sure we have resources available */

Просмотреть файл

@ -327,47 +327,49 @@ static int ppr_mapper(orte_job_t *jdata)
} }
} }
/* set the total slots used */ if (!(ORTE_MAPPING_DEBUGGER & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
if ((int)node->num_procs <= node->slots) { /* set the total slots used */
node->slots_inuse = (int)node->num_procs; if ((int)node->num_procs <= node->slots) {
} else { node->slots_inuse = (int)node->num_procs;
node->slots_inuse = node->slots; } else {
} node->slots_inuse = node->slots;
/* if no-oversubscribe was specified, check to see if
* we have violated the total slot specification - regardless,
* if slots_max was given, we are not allowed to violate it!
*/
if ((node->slots < (int)node->num_procs) ||
(0 < node->slots_max && node->slots_max < (int)node->num_procs)) {
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
true, node->num_procs, app->app);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
rc = ORTE_ERR_SILENT;
goto error;
} }
/* flag the node as oversubscribed so that sched-yield gets
* properly set /* if no-oversubscribe was specified, check to see if
* we have violated the total slot specification - regardless,
* if slots_max was given, we are not allowed to violate it!
*/ */
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED); if ((node->slots < (int)node->num_procs) ||
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED); (0 < node->slots_max && node->slots_max < (int)node->num_procs)) {
/* check for permission */ if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
/* if we weren't given a directive either way, then we will error out
* as the #slots were specifically given, either by the host RM or
* via hostfile/dash-host */
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
true, app->num_procs, app->app); true, node->num_procs, app->app);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
return ORTE_ERR_SILENT; rc = ORTE_ERR_SILENT;
} else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { goto error;
/* if we were explicitly told not to oversubscribe, then don't */ }
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", /* flag the node as oversubscribed so that sched-yield gets
true, app->num_procs, app->app); * properly set
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); */
return ORTE_ERR_SILENT; ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
/* check for permission */
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
/* if we weren't given a directive either way, then we will error out
* as the #slots were specifically given, either by the host RM or
* via hostfile/dash-host */
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
true, app->num_procs, app->app);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
return ORTE_ERR_SILENT;
} else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
/* if we were explicitly told not to oversubscribe, then don't */
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
true, app->num_procs, app->app);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
return ORTE_ERR_SILENT;
}
} }
} }
} }

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -91,6 +91,8 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_job_map_t);
/* an error flag */ /* an error flag */
#define ORTE_MAPPING_CONFLICTED 0x2000 #define ORTE_MAPPING_CONFLICTED 0x2000
#define ORTE_MAPPING_GIVEN 0x4000 #define ORTE_MAPPING_GIVEN 0x4000
/* mapping a debugger job */
#define ORTE_MAPPING_DEBUGGER 0x8000
#define ORTE_SET_MAPPING_DIRECTIVE(target, pol) \ #define ORTE_SET_MAPPING_DIRECTIVE(target, pol) \
(target) |= (pol) (target) |= (pol)
#define ORTE_UNSET_MAPPING_DIRECTIVE(target, pol) \ #define ORTE_UNSET_MAPPING_DIRECTIVE(target, pol) \

Просмотреть файл

@ -529,7 +529,6 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
} }
break; break;
/**** TERMINATE JOB COMMAND ****/ /**** TERMINATE JOB COMMAND ****/
case ORTE_DAEMON_TERMINATE_JOB_CMD: case ORTE_DAEMON_TERMINATE_JOB_CMD:

Просмотреть файл

@ -2383,15 +2383,13 @@ static void orte_debugger_dump(void)
"NULL" : (char*) MPIR_server_arguments); "NULL" : (char*) MPIR_server_arguments);
} }
static void setup_debugger_job(void) static void setup_debugger_job(orte_jobid_t jobid)
{ {
orte_job_t *debugger; orte_job_t *debugger;
orte_app_context_t *app; orte_app_context_t *app;
orte_proc_t *proc; int rc;
int i, rc;
orte_node_t *node;
orte_vpid_t vpid=0;
char cwd[OPAL_PATH_MAX]; char cwd[OPAL_PATH_MAX];
bool flag = true;
/* setup debugger daemon job */ /* setup debugger daemon job */
debugger = OBJ_NEW(orte_job_t); debugger = OBJ_NEW(orte_job_t);
@ -2427,68 +2425,28 @@ static void setup_debugger_job(void)
return; return;
} }
app->cwd = strdup(cwd); app->cwd = strdup(cwd);
orte_remove_attribute(&app->attributes, ORTE_APP_USER_CWD); orte_set_attribute(&app->attributes, ORTE_APP_USER_CWD, ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL);
opal_argv_append_nosize(&app->argv, app->app); opal_argv_append_nosize(&app->argv, app->app);
build_debugger_args(app); build_debugger_args(app);
opal_pointer_array_add(debugger->apps, app); opal_pointer_array_add(debugger->apps, app);
debugger->num_apps = 1; debugger->num_apps = 1;
/* create a job map */ /* create the map object and set the policy to 1ppn */
debugger->map = OBJ_NEW(orte_job_map_t); debugger->map = OBJ_NEW(orte_job_map_t);
/* in building the map, we want to launch one debugger daemon ORTE_SET_MAPPING_POLICY(debugger->map->mapping, ORTE_MAPPING_PPR);
* on each node that *already has an application process on it*. ORTE_SET_MAPPING_DIRECTIVE(debugger->map->mapping, ORTE_MAPPING_GIVEN);
* We cannot just launch one debugger daemon on EVERY node because ORTE_SET_MAPPING_DIRECTIVE(debugger->map->mapping, ORTE_MAPPING_DEBUGGER);
* the original job may not have placed procs on every node. So /* define the ppr */
* we construct the map here by cycling across all nodes, adding debugger->map->ppr = strdup("1:node");
* only those nodes where num_procs > 0. /* mark that we do not want the daemon bound */
*/ if (ORTE_SUCCESS != (rc = opal_hwloc_base_set_binding_policy(&debugger->map->binding, "none"))) {
for (i=0; i < orte_node_pool->size; i++) { ORTE_ERROR_LOG(rc);
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { return;
continue; }
} /* spawn it */
/* if this node wasn't included in the vm, ignore it */ rc = orte_plm.spawn(debugger);
if (NULL == node->daemon) { if (ORTE_SUCCESS != rc) {
continue; ORTE_ERROR_LOG(rc);
}
/* if the node doesn't have any app procs on it, ignore it */
if (node->num_procs < 1) {
continue;
}
/* this node has at least one proc, so add it to our map */
OBJ_RETAIN(node);
opal_pointer_array_add(debugger->map->nodes, node);
debugger->map->num_nodes++;
/* add a debugger daemon to the node - note that the
* debugger daemon does NOT count against our subscribed slots
*/
proc = OBJ_NEW(orte_proc_t);
proc->name.jobid = debugger->jobid;
proc->name.vpid = vpid++;
/* point the proc at the local ORTE daemon as its parent */
proc->parent = node->daemon->name.vpid;
/* set the local/node ranks - we don't actually care
* what these are, but the odls needs them
*/
proc->local_rank = 0;
proc->node_rank = 0;
proc->app_rank = proc->name.vpid;
/* flag the proc as ready for launch */
proc->state = ORTE_PROC_STATE_INIT;
proc->app_idx = 0;
OBJ_RETAIN(node); /* maintain accounting on object */
proc->node = node;
/* add the proc to the job */
opal_pointer_array_set_item(debugger->procs, proc->name.vpid, proc);
debugger->num_procs++;
/* add the proc to the node's array */
OBJ_RETAIN(proc);
opal_pointer_array_add(node->procs, (void*)proc);
node->num_procs++;
} }
/* schedule it for launch */
debugger->state = ORTE_JOB_STATE_INIT;
ORTE_ACTIVATE_JOB_STATE(debugger, ORTE_JOB_STATE_LAUNCH_APPS);
} }
/* /*
@ -2644,7 +2602,7 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == orte_debugger_test_daemon) ? (NULL == orte_debugger_test_daemon) ?
MPIR_executable_path : orte_debugger_test_daemon); MPIR_executable_path : orte_debugger_test_daemon);
setup_debugger_job(); setup_debugger_job(jdata->jobid);
} }
/* we don't have anything else to do */ /* we don't have anything else to do */
OBJ_RELEASE(caddy); OBJ_RELEASE(caddy);
@ -2936,7 +2894,7 @@ static void attach_debugger(int fd, short event, void *arg)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == orte_debugger_test_daemon) ? (NULL == orte_debugger_test_daemon) ?
MPIR_executable_path : orte_debugger_test_daemon); MPIR_executable_path : orte_debugger_test_daemon);
setup_debugger_job(); setup_debugger_job(ORTE_JOBID_WILDCARD);
did_once = true; did_once = true;
} }