Add protection against a bozo case where we could end up in an infinite loop while calculating ranks
This commit was SVN r25491.
Этот коммит содержится в:
родитель
88d32312d6
Коммит
1e5e9bde77
@ -185,3 +185,9 @@ Unknown binding level:
|
||||
|
||||
Target: %s
|
||||
Cache level: %u
|
||||
#
|
||||
[ranking-error]
|
||||
An infinite loop condition has been detected in the ranking
|
||||
code when computing ranks %s. This indicates a problem
|
||||
in the mapper. Please refer the problem to the attention
|
||||
of the OMPI developers.
|
||||
|
@ -381,7 +381,8 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
|
||||
orte_node_t *node;
|
||||
orte_proc_t *proc, *ptr;
|
||||
int rc;
|
||||
|
||||
bool added_one=false;
|
||||
|
||||
map = jdata->map;
|
||||
|
||||
if (ORTE_RANK_BY_NODE == ORTE_GET_RANKING_POLICY(map->ranking) ||
|
||||
@ -395,6 +396,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
|
||||
cnt=0;
|
||||
vpid=0;
|
||||
while (cnt < jdata->num_procs) {
|
||||
added_one = false;
|
||||
for (i=0; i < map->nodes->size; i++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
||||
continue;
|
||||
@ -408,8 +410,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
|
||||
continue;
|
||||
}
|
||||
if (ORTE_VPID_INVALID != proc->name.vpid) {
|
||||
/* vpid was already assigned, probably by the
|
||||
* round-robin mapper. Some mappers require that
|
||||
/* vpid was already assigned. Some mappers require that
|
||||
* we insert the proc into the jdata->procs
|
||||
* array, while others will have already done it - so check and
|
||||
* do the operation if required
|
||||
@ -423,6 +424,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
|
||||
* it in our loop - otherwise don't as we would be
|
||||
* double counting
|
||||
*/
|
||||
added_one = true;
|
||||
cnt++;
|
||||
}
|
||||
continue;
|
||||
@ -444,8 +446,17 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
|
||||
return rc;
|
||||
}
|
||||
cnt++;
|
||||
added_one = true;
|
||||
break; /* move on to next node */
|
||||
}
|
||||
}
|
||||
}
|
||||
/* it should be impossible, but check to see if there was nothing
|
||||
* added during this pass and error out if not
|
||||
*/
|
||||
if (!added_one) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:ranking-error",
|
||||
true, "bynode");
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user