1
1

Fix a breakage in the ranking system

While it may be faster to reverse the order of the assignment loops, it also results in the wrong answer

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2018-03-22 20:50:47 -05:00
родитель c1c0c02f06
Коммит 322f6c5056
6 изменённых файлов: 111 добавлений и 42 удалений

Просмотреть файл

@ -190,9 +190,17 @@ void orte_plm_base_allocation_complete(int fd, short args, void *cbdata)
ORTE_ACQUIRE_OBJECT(caddy);
/* move the state machine along */
caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS);
/* if we don't want to launch, then we at least want
* to map so we can see where the procs would have
* gone - so skip to the mapping state */
if (orte_do_not_launch) {
caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_MAP);
} else {
/* move the state machine along */
caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS);
}
/* cleanup */
OBJ_RELEASE(caddy);

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -50,6 +50,8 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
bool hnp_alone = true, skiphnp = false;
orte_attribute_t *kv;
char **alias=NULL, **nalias;
orte_proc_t *daemon;
orte_job_t *djob;
/* get the number of nodes */
num_nodes = (orte_std_cntr_t)opal_list_get_size(nodes);
@ -76,6 +78,9 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
return rc;
}
/* if we are not launching, get the daemon job */
djob = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
/* get the hnp node's info */
hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
@ -189,6 +194,21 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
ORTE_ERROR_LOG(rc);
return rc;
}
if (orte_do_not_launch) {
/* create a daemon for this node since we won't be launching
* and the mapper needs to see a daemon - this is used solely
* for testing the mappers */
daemon = OBJ_NEW(orte_proc_t);
daemon->name.jobid = ORTE_PROC_MY_NAME->jobid;
daemon->name.vpid = node->index;
daemon->state = ORTE_PROC_STATE_RUNNING;
OBJ_RETAIN(node);
daemon->node = node;
opal_pointer_array_set_item(djob->procs, daemon->name.vpid, daemon);
djob->num_procs++;
OBJ_RETAIN(daemon);
node->daemon = daemon;
}
/* update the total slots in the job */
orte_ras_base.total_slots_alloc += node->slots;
/* check if we have fqdn names in the allocation */

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -23,6 +23,7 @@
#include "opal/mca/hwloc/hwloc-internal.h"
#include "opal/util/argv.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/show_help.h"
#include "orte/runtime/orte_globals.h"
@ -179,6 +180,10 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo);
support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind;
support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind;
/* pass it thru the filter so we create the summaries required by the mappers */
if (OPAL_SUCCESS != opal_hwloc_base_filter_cpus(topo)) {
ORTE_ERROR_LOG(ORTE_ERROR);
}
/* add it to our array */
t = OBJ_NEW(orte_topology_t);
t->topo = topo;

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -246,7 +246,7 @@ static int bind_downwards(orte_job_t *jdata,
hwloc_obj_type_t target,
unsigned cache_level)
{
int j;
int j, rc;
orte_job_map_t *map;
orte_proc_t *proc;
hwloc_obj_t trg_obj, nxt_obj;
@ -367,7 +367,10 @@ static int bind_downwards(orte_job_t *jdata,
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&proc->name), node->name);
} else {
opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), node->topology->topo, totalcpuset);
rc = opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), node->topology->topo, totalcpuset);
if (OPAL_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
opal_output(orte_rmaps_base_framework.framework_output,
"%s BOUND PROC %s[%s] TO %s: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@ -841,7 +844,8 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
continue;
}
if (!orte_no_vm && (int)ORTE_PROC_MY_NAME->vpid != node->index) {
if (!orte_no_vm && !orte_do_not_launch &&
(int)ORTE_PROC_MY_NAME->vpid != node->index) {
continue;
}
if (!orte_do_not_launch) {

Просмотреть файл

@ -417,7 +417,33 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
}
}
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
if (orte_do_not_launch) {
/* compute the ranks and add the proc objects
* to the jdata->procs array */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
ORTE_ERROR_LOG(rc);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
goto cleanup;
}
/* compute and save local ranks */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
ORTE_ERROR_LOG(rc);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
goto cleanup;
}
/* compute and save location assignments */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) {
ORTE_ERROR_LOG(rc);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
goto cleanup;
}
/* compute and save bindings */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) {
ORTE_ERROR_LOG(rc);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
goto cleanup;
}
} else if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
/* compute and save location assignments */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) {
ORTE_ERROR_LOG(rc);
@ -454,6 +480,11 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
}
}
if (orte_do_not_launch) {
/* display the devel map */
orte_rmaps_base_display_map(jdata);
}
/* set the job state to the next position */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_COMPLETE);

Просмотреть файл

@ -10,7 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -379,35 +379,34 @@ static int rank_by(orte_job_t *jdata,
all_done = false;
while (!all_done && cnt < app->num_procs) {
all_done = true;
/* cycle across the objects */
for (i=0; i < num_objs && cnt < app->num_procs && all_done; i++) {
obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i);
/* find the next proc for this job and app_context */
for (j=0; j < node->procs->size && cnt < app->num_procs; j++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
continue;
}
/* ignore procs from other jobs */
if (proc->name.jobid != jdata->jobid) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rank_by skipping proc %s - from another job, num_ranked %d",
ORTE_NAME_PRINT(&proc->name), num_ranked);
continue;
}
/* ignore procs that are already ranked */
if (ORTE_VPID_INVALID != proc->name.vpid) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rank_by skipping proc %s - already ranked, num_ranked %d",
ORTE_NAME_PRINT(&proc->name), num_ranked);
continue;
}
/* ignore procs from other apps */
if (proc->app_idx != app->idx) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rank_by skipping proc %s - from another app, num_ranked %d",
ORTE_NAME_PRINT(&proc->name), num_ranked);
continue;
}
for (j=0; j < node->procs->size && cnt < app->num_procs; j++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
continue;
}
/* ignore procs from other jobs */
if (proc->name.jobid != jdata->jobid) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rank_by skipping proc %s - from another job, num_ranked %d",
ORTE_NAME_PRINT(&proc->name), num_ranked);
continue;
}
/* ignore procs that are already ranked */
if (ORTE_VPID_INVALID != proc->name.vpid) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rank_by skipping proc %s - already ranked, num_ranked %d",
ORTE_NAME_PRINT(&proc->name), num_ranked);
continue;
}
/* ignore procs from other apps */
if (proc->app_idx != app->idx) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rank_by skipping proc %s - from another app, num_ranked %d",
ORTE_NAME_PRINT(&proc->name), num_ranked);
continue;
}
/* cycle across the objects */
for (i=0; i < num_objs && cnt < app->num_procs && all_done; i++) {
obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i);
/* protect against bozo case */
locale = NULL;
if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) {
@ -429,7 +428,8 @@ static int rank_by(orte_job_t *jdata,
}
cnt++;
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rank_by: assigned rank %s", ORTE_VPID_PRINT(proc->name.vpid));
"mca:rmaps:rank_by: proc in position %d is on object %d assigned rank %s",
j, i, ORTE_VPID_PRINT(proc->name.vpid));
/* insert the proc into the jdata array */
if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
OBJ_RELEASE(pptr);
@ -440,7 +440,8 @@ static int rank_by(orte_job_t *jdata,
OBJ_DESTRUCT(&objs);
return rc;
}
/* flag that one was mapped */
num_ranked++;
/* flag that one was mapped */
all_done = false;
/* track where the highest vpid landed - this is our
* new bookmark