rmaps/base: fix logic (crash, in some cases) when num_procs > num_objects
Signed-off-by: Alex Margolin <alex.margolin@huawei.com>
Этот коммит содержится в:
родитель
34c4f934e1
Коммит
1cd89c9d7b
@ -13,6 +13,7 @@
|
|||||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||||
* Copyright (c) 2017 Research Organization for Information Science
|
* Copyright (c) 2017 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
|
* Copyright (c) 2020 Huawei Technologies Co., Ltd. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -142,7 +143,8 @@ static int rank_span(orte_job_t *jdata,
|
|||||||
return ORTE_ERROR;
|
return ORTE_ERROR;
|
||||||
}
|
}
|
||||||
/* ignore procs not on this object */
|
/* ignore procs not on this object */
|
||||||
if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
|
if (NULL == locale ||
|
||||||
|
!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
|
||||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||||
"mca:rmaps:rank_span: proc at position %d is not on object %d",
|
"mca:rmaps:rank_span: proc at position %d is not on object %d",
|
||||||
j, i);
|
j, i);
|
||||||
@ -175,6 +177,11 @@ static int rank_span(orte_job_t *jdata,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Are all the procs ranked? we don't want to crash on INVALID ranks */
|
||||||
|
if (cnt < app->num_procs) {
|
||||||
|
return ORTE_ERR_NOT_SUPPORTED;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
@ -263,7 +270,8 @@ static int rank_fill(orte_job_t *jdata,
|
|||||||
return ORTE_ERROR;
|
return ORTE_ERROR;
|
||||||
}
|
}
|
||||||
/* ignore procs not on this object */
|
/* ignore procs not on this object */
|
||||||
if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
|
if (NULL == locale ||
|
||||||
|
!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
|
||||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||||
"mca:rmaps:rank_fill: proc at position %d is not on object %d",
|
"mca:rmaps:rank_fill: proc at position %d is not on object %d",
|
||||||
j, i);
|
j, i);
|
||||||
@ -293,6 +301,11 @@ static int rank_fill(orte_job_t *jdata,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Are all the procs ranked? we don't want to crash on INVALID ranks */
|
||||||
|
if (cnt < app->num_procs) {
|
||||||
|
return ORTE_ERR_NOT_SUPPORTED;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
@ -378,7 +391,8 @@ static int rank_by(orte_job_t *jdata,
|
|||||||
* algorithm, but this works for now.
|
* algorithm, but this works for now.
|
||||||
*/
|
*/
|
||||||
i = 0;
|
i = 0;
|
||||||
while (cnt < app->num_procs && i < (int)node->num_procs) {
|
while (cnt < app->num_procs &&
|
||||||
|
((i < (int)node->num_procs) || (i < num_objs))) {
|
||||||
/* get the next object */
|
/* get the next object */
|
||||||
obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i % num_objs);
|
obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i % num_objs);
|
||||||
if (NULL == obj) {
|
if (NULL == obj) {
|
||||||
@ -423,7 +437,7 @@ static int rank_by(orte_job_t *jdata,
|
|||||||
!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
|
!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
|
||||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||||
"mca:rmaps:rank_by: proc at position %d is not on object %d",
|
"mca:rmaps:rank_by: proc at position %d is not on object %d",
|
||||||
j, i);
|
j, i % num_objs);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/* assign the vpid */
|
/* assign the vpid */
|
||||||
@ -458,6 +472,11 @@ static int rank_by(orte_job_t *jdata,
|
|||||||
}
|
}
|
||||||
/* cleanup */
|
/* cleanup */
|
||||||
OBJ_DESTRUCT(&objs);
|
OBJ_DESTRUCT(&objs);
|
||||||
|
|
||||||
|
/* Are all the procs ranked? we don't want to crash on INVALID ranks */
|
||||||
|
if (cnt < app->num_procs) {
|
||||||
|
return ORTE_ERR_NOT_SUPPORTED;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user