Merge pull request #1250 from rhc54/topic/rf
Fix the default slot mapping in rank file mapper
Этот коммит содержится в:
Коммит
d9cd451a16
@ -196,10 +196,42 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
|||||||
rank = vpid_start + k;
|
rank = vpid_start + k;
|
||||||
/* get the rankfile entry for this rank */
|
/* get the rankfile entry for this rank */
|
||||||
if (NULL == (rfmap = (orte_rmaps_rank_file_map_t*)opal_pointer_array_get_item(&rankmap, rank))) {
|
if (NULL == (rfmap = (orte_rmaps_rank_file_map_t*)opal_pointer_array_get_item(&rankmap, rank))) {
|
||||||
|
/* if we were give a default slot-list, then use it */
|
||||||
|
if (NULL != opal_hwloc_base_slot_list) {
|
||||||
|
slots = opal_hwloc_base_slot_list;
|
||||||
|
/* take the next node off of the available list */
|
||||||
|
node = NULL;
|
||||||
|
OPAL_LIST_FOREACH(nd, &node_list, orte_node_t) {
|
||||||
|
/* if adding one to this node would oversubscribe it, then try
|
||||||
|
* the next one */
|
||||||
|
if (nd->slots <= (int)nd->num_procs) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* take this one */
|
||||||
|
node = nd;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (NULL == node) {
|
||||||
|
/* all would be oversubscribed, so take the least loaded one */
|
||||||
|
k = UINT32_MAX;
|
||||||
|
OPAL_LIST_FOREACH(nd, &node_list, orte_node_t) {
|
||||||
|
if (nd->num_procs < k) {
|
||||||
|
k = nd->num_procs;
|
||||||
|
node = nd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* if we still have nothing, then something is very wrong */
|
||||||
|
if (NULL == node) {
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
/* all ranks must be specified */
|
/* all ranks must be specified */
|
||||||
orte_show_help("help-rmaps_rank_file.txt", "missing-rank", true, rank, orte_rankfile);
|
orte_show_help("help-rmaps_rank_file.txt", "missing-rank", true, rank, orte_rankfile);
|
||||||
rc = ORTE_ERR_SILENT;
|
rc = ORTE_ERR_SILENT;
|
||||||
goto error;
|
goto error;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if (0 == strlen(rfmap->slot_list)) {
|
if (0 == strlen(rfmap->slot_list)) {
|
||||||
/* rank was specified but no slot list given - that's an error */
|
/* rank was specified but no slot list given - that's an error */
|
||||||
@ -208,8 +240,6 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
slots = rfmap->slot_list;
|
slots = rfmap->slot_list;
|
||||||
}
|
|
||||||
|
|
||||||
/* find the node where this proc was assigned */
|
/* find the node where this proc was assigned */
|
||||||
node = NULL;
|
node = NULL;
|
||||||
OPAL_LIST_FOREACH(nd, &node_list, orte_node_t) {
|
OPAL_LIST_FOREACH(nd, &node_list, orte_node_t) {
|
||||||
@ -235,7 +265,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
|||||||
node = root_node;
|
node = root_node;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (NULL == node) {
|
if (NULL == node) {
|
||||||
orte_show_help("help-rmaps_rank_file.txt","bad-host", true, rfmap->node_name);
|
orte_show_help("help-rmaps_rank_file.txt","bad-host", true, rfmap->node_name);
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user