1
1

rmaps/base: fix logic (crash, in some cases) when num_procs > num_objects

Signed-off-by: Alex Margolin <alex.margolin@huawei.com>
Этот коммит содержится в:
Alex Margolin 2020-04-17 16:11:34 +03:00
родитель 34c4f934e1
Коммит 1cd89c9d7b
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 2C5A7F2E075E22B0

Просмотреть файл

@ -13,6 +13,7 @@
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2020 Huawei Technologies Co., Ltd. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -142,7 +143,8 @@ static int rank_span(orte_job_t *jdata,
return ORTE_ERROR;
}
/* ignore procs not on this object */
if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
if (NULL == locale ||
!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rank_span: proc at position %d is not on object %d",
j, i);
@ -175,6 +177,11 @@ static int rank_span(orte_job_t *jdata,
}
}
}
/* Are all the procs ranked? we don't want to crash on INVALID ranks */
if (cnt < app->num_procs) {
return ORTE_ERR_NOT_SUPPORTED;
}
}
return ORTE_SUCCESS;
@ -263,7 +270,8 @@ static int rank_fill(orte_job_t *jdata,
return ORTE_ERROR;
}
/* ignore procs not on this object */
if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
if (NULL == locale ||
!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rank_fill: proc at position %d is not on object %d",
j, i);
@ -293,6 +301,11 @@ static int rank_fill(orte_job_t *jdata,
}
}
}
/* Are all the procs ranked? we don't want to crash on INVALID ranks */
if (cnt < app->num_procs) {
return ORTE_ERR_NOT_SUPPORTED;
}
}
return ORTE_SUCCESS;
@ -378,7 +391,8 @@ static int rank_by(orte_job_t *jdata,
* algorithm, but this works for now.
*/
i = 0;
while (cnt < app->num_procs && i < (int)node->num_procs) {
while (cnt < app->num_procs &&
((i < (int)node->num_procs) || (i < num_objs))) {
/* get the next object */
obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i % num_objs);
if (NULL == obj) {
@ -423,7 +437,7 @@ static int rank_by(orte_job_t *jdata,
!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rank_by: proc at position %d is not on object %d",
j, i);
j, i % num_objs);
continue;
}
/* assign the vpid */
@ -458,6 +472,11 @@ static int rank_by(orte_job_t *jdata,
}
/* cleanup */
OBJ_DESTRUCT(&objs);
/* Are all the procs ranked? we don't want to crash on INVALID ranks */
if (cnt < app->num_procs) {
return ORTE_ERR_NOT_SUPPORTED;
}
}
return ORTE_SUCCESS;
}