1
1

The ability to add procs to a running job was unfortunately borked when we added the detection of a proc exiting before calling init. Re-enable it here, ensuring that procs that are being restarted and/or added to a job do -not- call barrier during orte_init.

This commit was SVN r22404.
Этот коммит содержится в:
Ralph Castain 2010-01-14 17:59:42 +00:00
родитель 370b1c75c4
Коммит cec840f6b9
8 изменённых файлов: 52 добавлений и 7 удалений

Просмотреть файл

@ -224,7 +224,7 @@ int orte_ess_base_app_setup(void)
* Cannot do this on a restart as the rest of the processes
* in the job won't be executing this step, so we would hang
*/
if (0 == orte_process_info.num_restarts && ORTE_PROC_IS_NON_MPI) {
if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) {
if (ORTE_SUCCESS != (ret = orte_grpcomm.barrier())) {
ORTE_ERROR_LOG(ret);
error = "orte barrier";

Просмотреть файл

@ -896,7 +896,7 @@ find_my_procs:
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:constructing child list - checking proc %s on daemon %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_VPID_PRINT(j),
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc),
ORTE_VPID_PRINT(host_daemon)));
/* does this proc belong to us? */
@ -904,7 +904,7 @@ find_my_procs:
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:constructing child list - found proc %s for me!",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_VPID_PRINT(j)));
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc)));
add_child = true;
/* if this job is restarting procs, then we need to treat things
@ -927,6 +927,7 @@ find_my_procs:
(child->alive) ? "ALIVE" : "DEAD"));
add_child = false;
child->restarts = restarts[j];
child->do_not_barrier = true;
/* mark that this app_context is being used on this node */
jobdat->apps[app_idx[j]]->used_on_node = true;
break;
@ -936,6 +937,9 @@ find_my_procs:
/* if we need to add the child, do so */
if (add_child) {
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"adding proc %s to my local list",
ORTE_NAME_PRINT(&proc)));
/* keep tabs of the number of local procs */
jobdat->num_local_procs++;
/* add this proc to our child list */
@ -947,7 +951,11 @@ find_my_procs:
}
child->app_idx = app_idx[j]; /* save the index into the app_context objects */
child->restarts = restarts[j];
if (NULL != slot_str && NULL != slot_str[j]) {
/* if the job is in restart mode, the child must not barrier when launched */
if (ORTE_JOB_STATE_RESTART == jobdat->state) {
child->do_not_barrier = true;
}
if (NULL != slot_str && NULL != slot_str[j]) {
child->slot_list = strdup(slot_str[j]);
}
/* mark that this app_context is being used on this node */
@ -1556,6 +1564,11 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
item = opal_list_get_next(item)) {
child = (orte_odls_child_t*)item;
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:launch working child %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(child->name)));
/* does this child belong to this app? */
if (i != child->app_idx) {
continue;
@ -1789,6 +1802,17 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
free(param);
free(value);
/* if the proc should not barrier in orte_init, tell it */
if (child->do_not_barrier || 0 < child->restarts) {
if (NULL == (param = mca_base_param_environ_variable("orte","do_not","barrier"))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
opal_setenv(param, "1", true, &app->env);
free(param);
}
/* if the proc isn't going to forward IO, then we need to flag that
* it has "completed" iof termination as otherwise it will never fire
*/

Просмотреть файл

@ -92,6 +92,7 @@ static void orte_odls_child_constructor(orte_odls_child_t *ptr)
ptr->slot_list = NULL;
ptr->waitpid_recvd = false;
ptr->iof_complete = false;
ptr->do_not_barrier = false;
}
static void orte_odls_child_destructor(orte_odls_child_t *ptr)
{

Просмотреть файл

@ -99,6 +99,7 @@ typedef struct {
bool waitpid_recvd; /* waitpid has detected proc termination */
bool iof_complete; /* IOF has noted proc terminating all channels */
struct timeval starttime; /* when the proc was started - for timing purposes only */
bool do_not_barrier; /* the proc should not barrier in orte_init */
} orte_odls_child_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_odls_child_t);

Просмотреть файл

@ -174,6 +174,9 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
* so check to see if any nodes are in the map - this will be our
* indicator that this is the prior map for a failed job that
* needs to be re-mapped
*
* NOTE: if a proc is being ADDED to an existing job, then its
* node field will be NULL.
*/
if (0 < jdata->map->num_nodes) {
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
@ -197,7 +200,8 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: proc %s from node %s is to be restarted",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&proc->name), proc->node->name));
ORTE_NAME_PRINT(&proc->name),
(NULL == proc->node) ? "NULL" : proc->node->name));
/* if we have fault groups, flag all the fault groups that
* include this node so we don't reuse them
*/
@ -214,7 +218,7 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(&ftgrp->nodes, k))) {
continue;
}
if (0 == strcmp(node->name, proc->node->name)) {
if (NULL != proc->node && 0 == strcmp(node->name, proc->node->name)) {
/* yes - mark it to not be included */
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: node %s is in fault group %d, which will be excluded",
@ -315,7 +319,9 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
"%s rmaps:resilient: placing proc %s into fault group %d node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&proc->name), target->ftgrp, nd->name));
OBJ_RELEASE(proc->node); /* required to maintain bookkeeping */
if (NULL != proc->node) {
OBJ_RELEASE(proc->node); /* required to maintain bookkeeping */
}
/* put proc on the found node */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, nd, jdata->map->cpus_per_rank, proc->app_idx,
NULL, jdata->map->oversubscribe, false, &proc))) {

Просмотреть файл

@ -159,6 +159,9 @@ char *orte_report_events_uri = NULL;
/* report bindings */
bool orte_report_bindings = false;
/* barrier control */
bool orte_do_not_barrier = false;
#endif /* !ORTE_DISABLE_FULL_RTE */
int orte_debug_output = -1;

Просмотреть файл

@ -634,6 +634,9 @@ ORTE_DECLSPEC extern char *orte_report_events_uri;
/* report bindings */
ORTE_DECLSPEC extern bool orte_report_bindings;
/* barrier control */
ORTE_DECLSPEC extern bool orte_do_not_barrier;
#endif /* ORTE_DISABLE_FULL_SUPPORT */
END_C_DECLS

Просмотреть файл

@ -417,6 +417,13 @@ int orte_register_params(void)
orte_report_events = true;
}
/* barrier control */
mca_base_param_reg_int_name("orte", "do_not_barrier",
"Do not barrier in orte_init",
true, false,
(int) false, &value);
orte_do_not_barrier = OPAL_INT_TO_BOOL(value);
#endif /* ORTE_DISABLE_FULL_SUPPORT */
return ORTE_SUCCESS;