diff --git a/orte/mca/rmaps/base/rmaps_base_ranking.c b/orte/mca/rmaps/base/rmaps_base_ranking.c index aa75a2a144..3b4d084d97 100644 --- a/orte/mca/rmaps/base/rmaps_base_ranking.c +++ b/orte/mca/rmaps/base/rmaps_base_ranking.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Huawei Technologies Co., Ltd. All rights reserved. @@ -138,13 +138,18 @@ static int rank_span(orte_job_t *jdata, } /* protect against bozo case */ locale = NULL; - if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) { + if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR) || + NULL == locale) { + /* all mappers are _required_ to set the locale where the proc + * has been mapped - it is therefore an error for this attribute + * not to be set. Likewise, only a programming error could allow + * the attribute to be set to a NULL value - however, we add that + * conditional here to silence any compiler warnings */ ORTE_ERROR_LOG(ORTE_ERROR); return ORTE_ERROR; } /* ignore procs not on this object */ - if (NULL == locale || - !hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) { + if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rank_span: proc at position %d is not on object %d", j, i); @@ -180,7 +185,7 @@ static int rank_span(orte_job_t *jdata, /* Are all the procs ranked? we don't want to crash on INVALID ranks */ if (cnt < app->num_procs) { - return ORTE_ERR_NOT_SUPPORTED; + return ORTE_ERR_FAILED_TO_MAP; } } @@ -265,13 +270,18 @@ static int rank_fill(orte_job_t *jdata, } /* protect against bozo case */ locale = NULL; - if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) { + if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR) || + NULL == locale) { + /* all mappers are _required_ to set the locale where the proc + * has been mapped - it is therefore an error for this attribute + * not to be set. Likewise, only a programming error could allow + * the attribute to be set to a NULL value - however, we add that + * conditional here to silence any compiler warnings */ ORTE_ERROR_LOG(ORTE_ERROR); return ORTE_ERROR; } /* ignore procs not on this object */ - if (NULL == locale || - !hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) { + if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rank_fill: proc at position %d is not on object %d", j, i); @@ -304,7 +314,7 @@ static int rank_fill(orte_job_t *jdata, /* Are all the procs ranked? we don't want to crash on INVALID ranks */ if (cnt < app->num_procs) { - return ORTE_ERR_NOT_SUPPORTED; + return ORTE_ERR_FAILED_TO_MAP; } } @@ -321,11 +331,12 @@ static int rank_by(orte_job_t *jdata, orte_vpid_t num_ranked=0; orte_node_t *node; orte_proc_t *proc, *pptr; - orte_vpid_t vpid, np; + orte_vpid_t vpid; int cnt; opal_pointer_array_t objs; hwloc_obj_t locale; orte_app_idx_t napp; + bool noassign; if (ORTE_RANKING_SPAN & ORTE_GET_RANKING_DIRECTIVE(jdata->map->ranking)) { return rank_span(jdata, target, cache_level); @@ -385,89 +396,97 @@ static int rank_by(orte_job_t *jdata, * of procs on the node can't be used to tell us when we * are done. Instead, we have to just keep going until all * procs are ranked - which means we have to make one extra - * pass thru the loop + * pass thru the loop. In addition, if we pass thru the entire + * loop without assigning anything then we are done * * Perhaps someday someone will come up with a more efficient * algorithm, but this works for now. */ - i = 0; - while (cnt < app->num_procs && - ((i < (int)node->num_procs) || (i < num_objs))) { - /* get the next object */ - obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i % num_objs); - if (NULL == obj) { + while (cnt < app->num_procs) { + noassign = true; + for (i=0; i < num_objs && cnt < app->num_procs; i++) { + /* get the next object */ + obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i); + if (NULL == obj) { + break; + } + /* scan across the procs and find the first unassigned one that includes this object */ + for (j=0; j < node->procs->size && cnt < app->num_procs; j++) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { + continue; + } + /* ignore procs from other jobs */ + if (proc->name.jobid != jdata->jobid) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_by skipping proc %s - from another job, num_ranked %d", + ORTE_NAME_PRINT(&proc->name), num_ranked); + continue; + } + /* ignore procs that are already ranked */ + if (ORTE_VPID_INVALID != proc->name.vpid) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_by skipping proc %s - already ranked, num_ranked %d", + ORTE_NAME_PRINT(&proc->name), num_ranked); + continue; + } + /* ignore procs from other apps - we will get to them */ + if (proc->app_idx != app->idx) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_by skipping proc %s - from another app, num_ranked %d", + ORTE_NAME_PRINT(&proc->name), num_ranked); + continue; + } + /* protect against bozo case */ + locale = NULL; + if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR) || + NULL == locale) { + /* all mappers are _required_ to set the locale where the proc + * has been mapped - it is therefore an error for this attribute + * not to be set. Likewise, only a programming error could allow + * the attribute to be set to a NULL value - however, we add that + * conditional here to silence any compiler warnings */ + ORTE_ERROR_LOG(ORTE_ERROR); + return ORTE_ERROR; + } + /* ignore procs not on this object */ + if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_by: proc at position %d is not on object %d", + j, i); + continue; + } + /* assign the vpid */ + proc->name.vpid = vpid; + if (0 == cnt) { + app->first_rank = proc->name.vpid; + } + cnt++; + noassign = false; + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_by: proc in position %d is on object %d assigned rank %s", + j, i, ORTE_VPID_PRINT(proc->name.vpid)); + /* insert the proc into the jdata array */ + if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) { + OBJ_RELEASE(pptr); + } + OBJ_RETAIN(proc); + if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&objs); + return rc; + } + num_ranked++; + /* track where the highest vpid landed - this is our + * new bookmark + */ + jdata->bookmark = node; + /* move to next object */ + break; + } + } + if (noassign) { break; } - /* scan across the procs and find the one that is on this object */ - np = 0; - for (j=0; np < node->num_procs && j < node->procs->size && cnt < app->num_procs; j++) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { - continue; - } - np++; - /* ignore procs from other jobs */ - if (proc->name.jobid != jdata->jobid) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_by skipping proc %s - from another job, num_ranked %d", - ORTE_NAME_PRINT(&proc->name), num_ranked); - continue; - } - /* ignore procs that are already ranked */ - if (ORTE_VPID_INVALID != proc->name.vpid) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_by skipping proc %s - already ranked, num_ranked %d", - ORTE_NAME_PRINT(&proc->name), num_ranked); - continue; - } - /* ignore procs from other apps */ - if (proc->app_idx != app->idx) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_by skipping proc %s - from another app, num_ranked %d", - ORTE_NAME_PRINT(&proc->name), num_ranked); - continue; - } - /* protect against bozo case */ - locale = NULL; - if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) { - ORTE_ERROR_LOG(ORTE_ERROR); - return ORTE_ERROR; - } - /* ignore procs not on this object */ - if (NULL == locale || - !hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_by: proc at position %d is not on object %d", - j, i % num_objs); - continue; - } - /* assign the vpid */ - proc->name.vpid = vpid++; - if (0 == cnt) { - app->first_rank = proc->name.vpid; - } - cnt++; - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_by: proc in position %d is on object %d assigned rank %s", - j, i, ORTE_VPID_PRINT(proc->name.vpid)); - /* insert the proc into the jdata array */ - if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) { - OBJ_RELEASE(pptr); - } - OBJ_RETAIN(proc); - if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&objs); - return rc; - } - num_ranked++; - /* track where the highest vpid landed - this is our - * new bookmark - */ - jdata->bookmark = node; - /* move to next object */ - break; - } - i++; } } /* cleanup */ @@ -475,7 +494,7 @@ static int rank_by(orte_job_t *jdata, /* Are all the procs ranked? we don't want to crash on INVALID ranks */ if (cnt < app->num_procs) { - return ORTE_ERR_NOT_SUPPORTED; + return ORTE_ERR_FAILED_TO_MAP; } } return ORTE_SUCCESS;