From 2dd2694f2557952291ad8cf3267019cda942d241 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 15 Dec 2011 18:04:48 +0000 Subject: [PATCH] Fix comm_spawn in oversubscribed conditions. IF oversubscription is allowed, let nodes flow into the mapper even if they are oversubscribed, constrained by the slots_max absolute ceiling. Cleanup error messages when comm_spawn fails so it correctly and succintly reports the ereror. This commit was SVN r25659. --- orte/mca/plm/alps/plm_alps_module.c | 13 ++++++++++--- orte/mca/plm/base/plm_base_receive.c | 3 ++- orte/mca/plm/ccp/plm_ccp_module.c | 13 ++++++++++--- orte/mca/plm/lsf/plm_lsf_module.c | 13 ++++++++++--- orte/mca/plm/rsh/plm_rsh_module.c | 13 ++++++++++--- orte/mca/plm/slurm/plm_slurm_module.c | 13 ++++++++++--- orte/mca/plm/tm/plm_tm_module.c | 13 ++++++++++--- orte/mca/rmaps/base/rmaps_base_map_job.c | 8 ++++++++ orte/mca/rmaps/base/rmaps_base_support_fns.c | 16 +++++++++++----- 9 files changed, 81 insertions(+), 24 deletions(-) diff --git a/orte/mca/plm/alps/plm_alps_module.c b/orte/mca/plm/alps/plm_alps_module.c index b2a4082025..19f38a5682 100644 --- a/orte/mca/plm/alps/plm_alps_module.c +++ b/orte/mca/plm/alps/plm_alps_module.c @@ -391,6 +391,7 @@ static int plm_alps_launch_job(orte_job_t *jdata) /* setup the job */ if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) { ORTE_ERROR_LOG(rc); + failed_job = jdata->jobid; goto cleanup; } failed_job = jdata->jobid; @@ -435,9 +436,15 @@ static int plm_alps_launch_job(orte_job_t *jdata) /* check for failed launch - if so, force terminate */ if (failed_launch) { - orte_errmgr.update_state(failed_job, job_state, - NULL, ORTE_PROC_STATE_UNDEF, - 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + if (ORTE_ERR_SILENT == rc) { + orte_errmgr.update_state(failed_job, ORTE_JOB_STATE_SILENT_ABORT, + NULL, ORTE_PROC_STATE_UNDEF, + 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + } else { + orte_errmgr.update_state(failed_job, job_state, + NULL, ORTE_PROC_STATE_UNDEF, + 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + } } return rc; diff --git a/orte/mca/plm/base/plm_base_receive.c b/orte/mca/plm/base/plm_base_receive.c index 3660a121cb..37c3552ae1 100644 --- a/orte/mca/plm/base/plm_base_receive.c +++ b/orte/mca/plm/base/plm_base_receive.c @@ -255,7 +255,8 @@ static void process_msg(int fd, short event, void *data) OPAL_RELEASE_THREAD(&lock, &cond, &processing); if (ORTE_SUCCESS != (rc = orte_plm.spawn(jdata))) { ORTE_ERROR_LOG(rc); - goto ANSWER_LAUNCH; + OPAL_ACQUIRE_THREAD(&lock, &cond, &processing); + goto DEPART; } OPAL_ACQUIRE_THREAD(&lock, &cond, &processing); diff --git a/orte/mca/plm/ccp/plm_ccp_module.c b/orte/mca/plm/ccp/plm_ccp_module.c index 4a8fff749c..bee6023ff2 100644 --- a/orte/mca/plm/ccp/plm_ccp_module.c +++ b/orte/mca/plm/ccp/plm_ccp_module.c @@ -516,6 +516,7 @@ static int plm_ccp_launch_job(orte_job_t *jdata) /* setup the job */ if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) { ORTE_ERROR_LOG(rc); + failed_job = jdata->jobid; goto cleanup; } failed_job = jdata->jobid; @@ -575,9 +576,15 @@ static int plm_ccp_launch_job(orte_job_t *jdata) /* check for failed launch - if so, force terminate */ if (failed_launch) { - orte_errmgr.update_state(failed_job, job_state, - NULL, ORTE_PROC_STATE_UNDEF, - 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + if (ORTE_ERR_SILENT == rc) { + orte_errmgr.update_state(failed_job, ORTE_JOB_STATE_SILENT_ABORT, + NULL, ORTE_PROC_STATE_UNDEF, + 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + } else { + orte_errmgr.update_state(failed_job, job_state, + NULL, ORTE_PROC_STATE_UNDEF, + 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + } } /* check for timing request - get stop time and process if so */ diff --git a/orte/mca/plm/lsf/plm_lsf_module.c b/orte/mca/plm/lsf/plm_lsf_module.c index 0bef1620a5..91c299ff82 100644 --- a/orte/mca/plm/lsf/plm_lsf_module.c +++ b/orte/mca/plm/lsf/plm_lsf_module.c @@ -345,6 +345,7 @@ launch_apps: /* setup the job */ if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) { ORTE_ERROR_LOG(rc); + failed_job = jdata->jobid; goto cleanup; } /* daemons succeeded - any failure now would be from apps */ @@ -388,9 +389,15 @@ cleanup: /* check for failed launch - if so, force terminate */ if (failed_launch) { - orte_errmgr.update_state(failed_job, job_state, - NULL, ORTE_PROC_STATE_UNDEF, - 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + if (ORTE_ERR_SILENT == rc) { + orte_errmgr.update_state(failed_job, ORTE_JOB_STATE_SILENT_ABORT, + NULL, ORTE_PROC_STATE_UNDEF, + 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + } else { + orte_errmgr.update_state(failed_job, job_state, + NULL, ORTE_PROC_STATE_UNDEF, + 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + } } return rc; diff --git a/orte/mca/plm/rsh/plm_rsh_module.c b/orte/mca/plm/rsh/plm_rsh_module.c index 60474b230b..c2be326471 100644 --- a/orte/mca/plm/rsh/plm_rsh_module.c +++ b/orte/mca/plm/rsh/plm_rsh_module.c @@ -1162,6 +1162,7 @@ static int rsh_launch(orte_job_t *jdata) /* setup the job */ if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) { ORTE_ERROR_LOG(rc); + failed_job = jdata->jobid; goto cleanup; } failed_job = jdata->jobid; @@ -1205,9 +1206,15 @@ static int rsh_launch(orte_job_t *jdata) /* check for failed launch - if so, force terminate */ if (failed_launch) { - orte_errmgr.update_state(failed_job, job_state, - NULL, ORTE_PROC_STATE_UNDEF, - 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + if (ORTE_ERR_SILENT == rc) { + orte_errmgr.update_state(failed_job, ORTE_JOB_STATE_SILENT_ABORT, + NULL, ORTE_PROC_STATE_UNDEF, + 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + } else { + orte_errmgr.update_state(failed_job, job_state, + NULL, ORTE_PROC_STATE_UNDEF, + 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + } } return rc; diff --git a/orte/mca/plm/slurm/plm_slurm_module.c b/orte/mca/plm/slurm/plm_slurm_module.c index 09ebe69f58..410242bee5 100644 --- a/orte/mca/plm/slurm/plm_slurm_module.c +++ b/orte/mca/plm/slurm/plm_slurm_module.c @@ -405,6 +405,7 @@ static int plm_slurm_launch_job(orte_job_t *jdata) /* setup the job */ if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) { ORTE_ERROR_LOG(rc); + failed_job = jdata->jobid; goto cleanup; } failed_job = jdata->jobid; @@ -449,9 +450,15 @@ static int plm_slurm_launch_job(orte_job_t *jdata) /* check for failed launch - if so, force terminate */ if (failed_launch) { - orte_errmgr.update_state(failed_job, ORTE_JOB_STATE_FAILED_TO_START, - NULL, ORTE_PROC_STATE_UNDEF, - 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + if (ORTE_ERR_SILENT == rc) { + orte_errmgr.update_state(failed_job, ORTE_JOB_STATE_SILENT_ABORT, + NULL, ORTE_PROC_STATE_UNDEF, + 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + } else { + orte_errmgr.update_state(failed_job, ORTE_JOB_STATE_FAILED_TO_START, + NULL, ORTE_PROC_STATE_UNDEF, + 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + } } return rc; diff --git a/orte/mca/plm/tm/plm_tm_module.c b/orte/mca/plm/tm/plm_tm_module.c index ed09006e40..1e05320502 100644 --- a/orte/mca/plm/tm/plm_tm_module.c +++ b/orte/mca/plm/tm/plm_tm_module.c @@ -413,6 +413,7 @@ launch_apps: /* setup the job */ if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) { ORTE_ERROR_LOG(rc); + failed_job = jdata->jobid; goto cleanup; } /* since the daemons have launched, any failures now will be for the @@ -458,9 +459,15 @@ launch_apps: /* check for failed launch - if so, force terminate */ if (failed_launch) { - orte_errmgr.update_state(failed_job, ORTE_JOB_STATE_FAILED_TO_START, - NULL, ORTE_PROC_STATE_UNDEF, - 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + if (ORTE_ERR_SILENT == rc) { + orte_errmgr.update_state(failed_job, ORTE_JOB_STATE_SILENT_ABORT, + NULL, ORTE_PROC_STATE_UNDEF, + 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + } else { + orte_errmgr.update_state(failed_job, ORTE_JOB_STATE_FAILED_TO_START, + NULL, ORTE_PROC_STATE_UNDEF, + 0, ORTE_ERROR_DEFAULT_EXIT_CODE); + } } OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c index c9dd685515..f6ba00ed9e 100644 --- a/orte/mca/rmaps/base/rmaps_base_map_job.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -70,6 +70,10 @@ int orte_rmaps_base_map_job(orte_job_t *jdata) /* a map has not been defined yet for this job, so set one * up here */ + opal_output_verbose(5, orte_rmaps_base.rmaps_output, + "mca:rmaps: creating new map for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + /* create a map object where we will store the results */ map = OBJ_NEW(orte_job_map_t); if (NULL == map) { @@ -90,6 +94,10 @@ int orte_rmaps_base_map_job(orte_job_t *jdata) /* assign the map object to this job */ jdata->map = map; } else { + opal_output_verbose(5, orte_rmaps_base.rmaps_output, + "mca:rmaps: setting mapping policies for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + if (!jdata->map->display_map) { jdata->map->display_map = orte_rmaps_base.display_map; } diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index 664500c42b..81b97adc45 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -134,7 +134,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr *total_num_slots = 0; /* if the hnp was allocated, include it unless flagged not to */ - if (orte_hnp_is_allocated && !(policy & ORTE_MAPPING_NO_USE_LOCAL)) { + if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(policy) & ORTE_MAPPING_NO_USE_LOCAL)) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) { if (ORTE_NODE_STATE_DO_NOT_USE == node->state) { /* clear this for future use, but don't include it */ @@ -264,12 +264,18 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr if (0 != node->slots_max && node->slots_inuse > node->slots_max) { opal_list_remove_item(allocated_nodes, item); OBJ_RELEASE(item); /* "un-retain" it */ - } else { /** otherwise, add the slots for our job to the total */ - if (0 == node->slots_alloc) { + } else if (node->slots_alloc <= node->slots_inuse && + (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { + /* remove the node as fully used */ + opal_list_remove_item(allocated_nodes, item); + OBJ_RELEASE(item); /* "un-retain" it */ + } else { + if (node->slots_alloc > node->slots_inuse) { + /* add the available slots */ + num_slots += node->slots_alloc - node->slots_inuse; + } else { /* always allocate at least one */ num_slots++; - } else if (node->slots_alloc > node->slots_inuse) { - num_slots += node->slots_alloc - node->slots_inuse; } }