Add protection against a bozo case where we could end up in an infinite loop while calculating ranks
This commit was SVN r25491.
Этот коммит содержится в:
родитель
88d32312d6
Коммит
1e5e9bde77
@ -185,3 +185,9 @@ Unknown binding level:
|
|||||||
|
|
||||||
Target: %s
|
Target: %s
|
||||||
Cache level: %u
|
Cache level: %u
|
||||||
|
#
|
||||||
|
[ranking-error]
|
||||||
|
An infinite loop condition has been detected in the ranking
|
||||||
|
code when computing ranks %s. This indicates a problem
|
||||||
|
in the mapper. Please refer the problem to the attention
|
||||||
|
of the OMPI developers.
|
||||||
|
@ -381,7 +381,8 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
|
|||||||
orte_node_t *node;
|
orte_node_t *node;
|
||||||
orte_proc_t *proc, *ptr;
|
orte_proc_t *proc, *ptr;
|
||||||
int rc;
|
int rc;
|
||||||
|
bool added_one=false;
|
||||||
|
|
||||||
map = jdata->map;
|
map = jdata->map;
|
||||||
|
|
||||||
if (ORTE_RANK_BY_NODE == ORTE_GET_RANKING_POLICY(map->ranking) ||
|
if (ORTE_RANK_BY_NODE == ORTE_GET_RANKING_POLICY(map->ranking) ||
|
||||||
@ -395,6 +396,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
|
|||||||
cnt=0;
|
cnt=0;
|
||||||
vpid=0;
|
vpid=0;
|
||||||
while (cnt < jdata->num_procs) {
|
while (cnt < jdata->num_procs) {
|
||||||
|
added_one = false;
|
||||||
for (i=0; i < map->nodes->size; i++) {
|
for (i=0; i < map->nodes->size; i++) {
|
||||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
||||||
continue;
|
continue;
|
||||||
@ -408,8 +410,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (ORTE_VPID_INVALID != proc->name.vpid) {
|
if (ORTE_VPID_INVALID != proc->name.vpid) {
|
||||||
/* vpid was already assigned, probably by the
|
/* vpid was already assigned. Some mappers require that
|
||||||
* round-robin mapper. Some mappers require that
|
|
||||||
* we insert the proc into the jdata->procs
|
* we insert the proc into the jdata->procs
|
||||||
* array, while others will have already done it - so check and
|
* array, while others will have already done it - so check and
|
||||||
* do the operation if required
|
* do the operation if required
|
||||||
@ -423,6 +424,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
|
|||||||
* it in our loop - otherwise don't as we would be
|
* it in our loop - otherwise don't as we would be
|
||||||
* double counting
|
* double counting
|
||||||
*/
|
*/
|
||||||
|
added_one = true;
|
||||||
cnt++;
|
cnt++;
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
@ -444,8 +446,17 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
cnt++;
|
cnt++;
|
||||||
|
added_one = true;
|
||||||
break; /* move on to next node */
|
break; /* move on to next node */
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
/* it should be impossible, but check to see if there was nothing
|
||||||
|
* added during this pass and error out if not
|
||||||
|
*/
|
||||||
|
if (!added_one) {
|
||||||
|
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:ranking-error",
|
||||||
|
true, "bynode");
|
||||||
|
return ORTE_ERR_SILENT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user