diff --git a/orte/mca/ras/base/ras_base_allocate.c b/orte/mca/ras/base/ras_base_allocate.c index 9c9b1aeb43..7efd0ed357 100644 --- a/orte/mca/ras/base/ras_base_allocate.c +++ b/orte/mca/ras/base/ras_base_allocate.c @@ -61,6 +61,12 @@ int orte_ras_base_allocate(orte_job_t *jdata) "%s ras:base:allocate allocation already read", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + /* loop through the global node pool and set the + * number of allocated slots to the difference + * between slots and slots_in_use. Note that + * oversubscription will still allow procs to + * be mapped up to slots_max + */ return ORTE_SUCCESS; } diff --git a/orte/mca/ras/base/ras_base_node.c b/orte/mca/ras/base/ras_base_node.c index f7e3bd2abf..39f1f729b9 100644 --- a/orte/mca/ras/base/ras_base_node.c +++ b/orte/mca/ras/base/ras_base_node.c @@ -113,8 +113,6 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata) /* use the local name for our node - don't trust what * we got from an RM */ - /* set the node to available for use */ - hnp_node->allocate = true; /* update the total slots in the job */ jdata->total_slots_alloc += hnp_node->slots; /* don't keep duplicate copy */ @@ -125,8 +123,9 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata) "%s ras:base:node_insert node %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == node->name) ? "NULL" : node->name)); - /* set node to available for use */ - node->allocate = true; + /* allocate all the available slots */ + node->slots_alloc = node->slots; + /* insert it into the array */ node->index = opal_pointer_array_add(orte_node_pool, (void*)node); if (ORTE_SUCCESS > (rc = node->index)) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index 1f76a5d579..985dfea1ba 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -60,13 +60,11 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr if (NULL == nodes[i]) { break; /* nodes are left aligned, so stop when we hit a null */ } - if (nodes[i]->allocate) { - /* retain a copy for our use in case the item gets - * destructed along the way - */ - OBJ_RETAIN(nodes[i]); - opal_list_append(allocated_nodes, &nodes[i]->super); - } + /* retain a copy for our use in case the item gets + * destructed along the way + */ + OBJ_RETAIN(nodes[i]); + opal_list_append(allocated_nodes, &nodes[i]->super); } /** check that anything is here */ @@ -139,11 +137,12 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr item != opal_list_get_end(allocated_nodes); item = opal_list_get_next(item) ) { node = (orte_node_t*)item; - /* by this time, we have adjusted all local node - * names to be our node name, so we don't need - * to keep checking for that condition + /* need to check ifislocal because the name in the + * hostfile may not have been FQDN, while name returned + * by gethostname may have been (or vice versa) */ - if (0 == strcmp(node->name, orte_process_info.nodename)) { + if (0 == strcmp(node->name, orte_process_info.nodename) || + opal_ifislocal(node->name)) { opal_list_remove_item(allocated_nodes, item); OBJ_RELEASE(item); /* "un-retain" it */ break; diff --git a/orte/mca/rmaps/rank_file/rmaps_rank_file_component.c b/orte/mca/rmaps/rank_file/rmaps_rank_file_component.c index ad75016b82..217548110f 100644 --- a/orte/mca/rmaps/rank_file/rmaps_rank_file_component.c +++ b/orte/mca/rmaps/rank_file/rmaps_rank_file_component.c @@ -74,9 +74,10 @@ orte_rmaps_rank_file_component_t mca_rmaps_rank_file_component = { */ static int orte_rmaps_rank_file_open(void) { - mca_rmaps_rank_file_component.priority = 0; int index = 0; + mca_rmaps_rank_file_component.priority = 0; + mca_base_param_reg_string(&mca_rmaps_rank_file_component.super.base_version, "path", "The path to the rank mapping file", diff --git a/orte/mca/rmaps/round_robin/rmaps_rr.c b/orte/mca/rmaps/round_robin/rmaps_rr.c index 05ba21b889..0873401a76 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr.c @@ -312,9 +312,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) if (NULL == nodes[i]) { break; /* nodes are left aligned, so stop when we hit a null */ } - if (nodes[i]->allocate) { - num_nodes++; - } + num_nodes++; } /* compute the balance */ res = ((float)ppn / num_nodes); @@ -449,14 +447,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) goto error; } } else if (0 == app->num_procs) { - /** set the num_procs to equal the number of slots on these mapped nodes - if - user has specified "-bynode", then set it to the number of nodes - */ - if (map->policy & ORTE_RMAPS_BYNODE) { - app->num_procs = num_nodes; - } else if (map->policy & ORTE_RMAPS_BYSLOT) { - app->num_procs = num_slots; - } else if (map->policy & ORTE_RMAPS_BYUSER) { + if (map->policy & ORTE_RMAPS_BYUSER) { /* we can't handle this - it should have been set when we got * the map info. If it wasn't, then we can only error out */ @@ -465,6 +456,8 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) rc = ORTE_ERR_SILENT; goto error; } + /** set the num_procs to equal the number of slots on these mapped nodes */ + app->num_procs = num_slots; } /** track the total number of processes we mapped */ diff --git a/orte/mca/rmaps/seq/rmaps_seq.c b/orte/mca/rmaps/seq/rmaps_seq.c index a6f464928d..13aa41199b 100644 --- a/orte/mca/rmaps/seq/rmaps_seq.c +++ b/orte/mca/rmaps/seq/rmaps_seq.c @@ -32,6 +32,7 @@ #include "opal/mca/base/mca_base_param.h" #include "opal/util/trace.h" #include "opal/util/argv.h" +#include "opal/util/if.h" #include "orte/util/show_help.h" #include "orte/mca/errmgr/errmgr.h" @@ -106,6 +107,26 @@ static int orte_rmaps_seq_map(orte_job_t *jdata) } else { node_list = default_node_list; } + + /* check for nolocal and remove the head node, if required */ + if (map->policy & ORTE_RMAPS_NO_USE_LOCAL) { + for (item = opal_list_get_first(node_list); + item != opal_list_get_end(node_list); + item = opal_list_get_next(item) ) { + node = (orte_node_t*)item; + /* need to check ifislocal because the name in the + * hostfile may not have been FQDN, while name returned + * by gethostname may have been (or vice versa) + */ + if (0 == strcmp(node->name, orte_process_info.nodename) || + opal_ifislocal(node->name)) { + opal_list_remove_item(node_list, item); + OBJ_RELEASE(item); /* "un-retain" it */ + break; + } + } + } + if (NULL == node_list || 0 == (num_nodes = (orte_std_cntr_t)opal_list_get_size(node_list))) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-available-resources", @@ -174,7 +195,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata) if (NULL == nodes[j]) { break; /* nodes are left aligned, so stop when we hit a null */ } - if (nodes[j]->allocate && 0 == strcmp(nd->name, nodes[j]->name)) { + if (0 == strcmp(nd->name, nodes[j]->name)) { node = nodes[j]; break; } diff --git a/orte/runtime/data_type_support/orte_dt_packing_fns.c b/orte/runtime/data_type_support/orte_dt_packing_fns.c index c726c38e68..862bbff433 100644 --- a/orte/runtime/data_type_support/orte_dt_packing_fns.c +++ b/orte/runtime/data_type_support/orte_dt_packing_fns.c @@ -309,7 +309,7 @@ int orte_dt_pack_node(opal_buffer_t *buffer, const void *src, return rc; } - /* do not pack the allocate flag, daemon name, or launch id */ + /* do not pack the daemon name or launch id */ /* pack the number of procs on the node */ if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, diff --git a/orte/runtime/data_type_support/orte_dt_print_fns.c b/orte/runtime/data_type_support/orte_dt_print_fns.c index 5893ea2ffc..65fa0a340f 100644 --- a/orte/runtime/data_type_support/orte_dt_print_fns.c +++ b/orte/runtime/data_type_support/orte_dt_print_fns.c @@ -292,8 +292,8 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_ return ORTE_SUCCESS; } - asprintf(&tmp, "\n%sData for node: Name: %s\tAllocate: %s\n%s\tLaunch id: %ld\tArch: %0x\tState: %0x", - pfx2, src->name, (src->allocate) ? "Yes" : "No", + asprintf(&tmp, "\n%sData for node: Name: %s\t%s\tLaunch id: %ld\tArch: %0x\tState: %0x", + pfx2, src->name, pfx2, (long)src->launch_id, src->arch, src->state); diff --git a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c index 053437b9ac..6c4e56459c 100644 --- a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c +++ b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c @@ -331,7 +331,7 @@ int orte_dt_unpack_node(opal_buffer_t *buffer, void *dest, return rc; } - /* do not unpack the allocate flag, daemon name, or launch id */ + /* do not unpack the daemon name or launch id */ /* unpack the number of procs on the node */ n = 1; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index 1b1f573f78..aa718900c9 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -127,8 +127,6 @@ typedef struct { orte_std_cntr_t index; /** String node name */ char *name; - /* whether or not this node is available for allocation */ - bool allocate; /* daemon on this node */ struct orte_proc_t *daemon; /* whether or not this daemon has been launched */ diff --git a/orte/runtime/orte_globals_class_instances.h b/orte/runtime/orte_globals_class_instances.h index c17145c4ff..ed2ddc22e7 100644 --- a/orte/runtime/orte_globals_class_instances.h +++ b/orte/runtime/orte_globals_class_instances.h @@ -180,7 +180,6 @@ OBJ_CLASS_INSTANCE(orte_job_t, static void orte_node_construct(orte_node_t* node) { node->name = NULL; - node->allocate = false; node->index = -1; node->daemon = NULL; node->daemon_launched = false;