From 510ade950301f91d887ee445b5f5d5f019126b73 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 1 Jul 2010 19:39:31 +0000 Subject: [PATCH] Do not use nodes that are flagged as down or do-not-use for this map. Modify error output to reflect possible reasons no nodes would be available This commit was SVN r23333. --- orte/mca/rmaps/base/help-orte-rmaps-base.txt | 5 ++++- orte/mca/rmaps/base/rmaps_base_support_fns.c | 19 +++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/orte/mca/rmaps/base/help-orte-rmaps-base.txt b/orte/mca/rmaps/base/help-orte-rmaps-base.txt index c23b063d9b..839998870a 100644 --- a/orte/mca/rmaps/base/help-orte-rmaps-base.txt +++ b/orte/mca/rmaps/base/help-orte-rmaps-base.txt @@ -49,7 +49,10 @@ no nodes were found or all the available nodes were already used. Note that since the -nolocal option was given no processes can be launched on the local node. [orte-rmaps-base:no-available-resources] -There are no nodes allocated to this job. +No nodes are available for this job, either due to a failure to +allocate nodes to the job, or allocated nodes being marked +as unavailable (e.g., down, rebooting, or a process attempting +to be relocated to another node when none are available). [orte-rmaps-base:all-available-resources-used] All nodes which are allocated for this job are already filled. # diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index 110d523df9..5f994a6e0b 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -57,20 +57,31 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr /** set default answer */ *total_num_slots = 0; - /* if the hnp was allocated, include it */ + /* if the hnp was allocated, include it unless flagged not to */ if (orte_hnp_is_allocated) { node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); - OBJ_RETAIN(node); - opal_list_append(allocated_nodes, &node->super); + if (ORTE_NODE_STATE_UP == node->state) { + OBJ_RETAIN(node); + opal_list_append(allocated_nodes, &node->super); + } else if (ORTE_NODE_STATE_DO_NOT_USE == node->state) { + /* clear this for future use */ + node->state = ORTE_NODE_STATE_UP; + } } - /* add everything in the node pool */ + /* add everything in the node pool that can be used */ for (i=1; i < orte_node_pool->size; i++) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { /* ignore nodes that are "down" */ if (ORTE_NODE_STATE_DOWN == node->state) { continue; } + /* ignore nodes that are marked as do-not-use for this mapping */ + if (ORTE_NODE_STATE_DO_NOT_USE == node->state) { + /* reset the state so it can be used another time */ + node->state = ORTE_NODE_STATE_UP; + continue; + } /* retain a copy for our use in case the item gets * destructed along the way */