diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index ebf8a276a1..4464ae53ba 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -341,11 +341,16 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr /* Sanity check to make sure we have resources available */ if (0 == num_slots) { - if (!silent) { + if (silent) { + /* let the caller know that the resources exist, + * but are currently busy + */ + return ORTE_ERR_RESOURCE_BUSY; + } else { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:all-available-resources-used", true); + return ORTE_ERR_SILENT; } - return ORTE_ERR_SILENT; } *total_num_slots = num_slots; diff --git a/orte/mca/rmaps/staged/rmaps_staged.c b/orte/mca/rmaps/staged/rmaps_staged.c index c51e382fd2..03117ac677 100644 --- a/orte/mca/rmaps/staged/rmaps_staged.c +++ b/orte/mca/rmaps/staged/rmaps_staged.c @@ -95,19 +95,32 @@ static int staged_mapper(orte_job_t *jdata) */ OBJ_CONSTRUCT(&node_list, opal_list_t); if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, - jdata->map->mapping, false, true)) && - ORTE_ERR_SILENT != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* if nothing is available, then move on */ - if (0 == num_slots || 0 == opal_list_get_size(&node_list)) { - opal_output_verbose(5, orte_rmaps_base.rmaps_output, - "%s mca:rmaps:staged: no nodes available for this app", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - OBJ_DESTRUCT(&node_list); - continue; + jdata->map->mapping, false, true))) { + if (ORTE_ERR_RESOURCE_BUSY == rc) { + /* if the return is "busy", then at least one of the + * specified resources must exist, but no slots are + * currently available. This means there is at least + * a hope of eventually being able to map this app + * within its specified constraints, so continue working + */ + opal_output_verbose(5, orte_rmaps_base.rmaps_output, + "%s mca:rmaps:staged: all nodes for this app are currently busy", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + OBJ_DESTRUCT(&node_list); + continue; + } else { + /* this indicates that there are no nodes that match + * the specified constraints, so there is no hope of + * ever being able to execute this app. This is an + * unrecoverable error - note that a return of + * "silent" means that the function already printed + * an error message, so the error_log will print nothing + */ + ORTE_ERROR_LOG(rc); + return rc; + } } + /* assign any unmapped procs to an available slot */ for (j=0; j < app->procs.size; j++) { if (NULL == (proc = opal_pointer_array_get_item(&app->procs, j))) {