1
1

Add protection against a bozo case where we could end up in an infinite loop while calculating ranks

This commit was SVN r25491.
Этот коммит содержится в:
Ralph Castain 2011-11-18 15:35:55 +00:00
родитель 88d32312d6
Коммит 1e5e9bde77
2 изменённых файлов: 21 добавлений и 4 удалений

Просмотреть файл

@ -185,3 +185,9 @@ Unknown binding level:
Target: %s Target: %s
Cache level: %u Cache level: %u
#
[ranking-error]
An infinite loop condition has been detected in the ranking
code when computing ranks %s. This indicates a problem
in the mapper. Please refer the problem to the attention
of the OMPI developers.

Просмотреть файл

@ -381,7 +381,8 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
orte_node_t *node; orte_node_t *node;
orte_proc_t *proc, *ptr; orte_proc_t *proc, *ptr;
int rc; int rc;
bool added_one=false;
map = jdata->map; map = jdata->map;
if (ORTE_RANK_BY_NODE == ORTE_GET_RANKING_POLICY(map->ranking) || if (ORTE_RANK_BY_NODE == ORTE_GET_RANKING_POLICY(map->ranking) ||
@ -395,6 +396,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
cnt=0; cnt=0;
vpid=0; vpid=0;
while (cnt < jdata->num_procs) { while (cnt < jdata->num_procs) {
added_one = false;
for (i=0; i < map->nodes->size; i++) { for (i=0; i < map->nodes->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
continue; continue;
@ -408,8 +410,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
continue; continue;
} }
if (ORTE_VPID_INVALID != proc->name.vpid) { if (ORTE_VPID_INVALID != proc->name.vpid) {
/* vpid was already assigned, probably by the /* vpid was already assigned. Some mappers require that
* round-robin mapper. Some mappers require that
* we insert the proc into the jdata->procs * we insert the proc into the jdata->procs
* array, while others will have already done it - so check and * array, while others will have already done it - so check and
* do the operation if required * do the operation if required
@ -423,6 +424,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
* it in our loop - otherwise don't as we would be * it in our loop - otherwise don't as we would be
* double counting * double counting
*/ */
added_one = true;
cnt++; cnt++;
} }
continue; continue;
@ -444,8 +446,17 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
return rc; return rc;
} }
cnt++; cnt++;
added_one = true;
break; /* move on to next node */ break; /* move on to next node */
} }
}
/* it should be impossible, but check to see if there was nothing
* added during this pass and error out if not
*/
if (!added_one) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:ranking-error",
true, "bynode");
return ORTE_ERR_SILENT;
} }
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;