1
1

Merge pull request #1250 from rhc54/topic/rf

Fix the default slot mapping in rank file mapper
Этот коммит содержится в:
rhc54 2015-12-21 10:57:52 -08:00
родитель 38830e41b4 7cc5879bdd
Коммит d9cd451a16

Просмотреть файл

@ -196,10 +196,42 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
rank = vpid_start + k;
/* get the rankfile entry for this rank */
if (NULL == (rfmap = (orte_rmaps_rank_file_map_t*)opal_pointer_array_get_item(&rankmap, rank))) {
/* all ranks must be specified */
orte_show_help("help-rmaps_rank_file.txt", "missing-rank", true, rank, orte_rankfile);
rc = ORTE_ERR_SILENT;
goto error;
/* if we were give a default slot-list, then use it */
if (NULL != opal_hwloc_base_slot_list) {
slots = opal_hwloc_base_slot_list;
/* take the next node off of the available list */
node = NULL;
OPAL_LIST_FOREACH(nd, &node_list, orte_node_t) {
/* if adding one to this node would oversubscribe it, then try
* the next one */
if (nd->slots <= (int)nd->num_procs) {
continue;
}
/* take this one */
node = nd;
break;
}
if (NULL == node) {
/* all would be oversubscribed, so take the least loaded one */
k = UINT32_MAX;
OPAL_LIST_FOREACH(nd, &node_list, orte_node_t) {
if (nd->num_procs < k) {
k = nd->num_procs;
node = nd;
}
}
}
/* if we still have nothing, then something is very wrong */
if (NULL == node) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto error;
}
} else {
/* all ranks must be specified */
orte_show_help("help-rmaps_rank_file.txt", "missing-rank", true, rank, orte_rankfile);
rc = ORTE_ERR_SILENT;
goto error;
}
} else {
if (0 == strlen(rfmap->slot_list)) {
/* rank was specified but no slot list given - that's an error */
@ -208,34 +240,32 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
goto error;
}
slots = rfmap->slot_list;
}
/* find the node where this proc was assigned */
node = NULL;
OPAL_LIST_FOREACH(nd, &node_list, orte_node_t) {
if (NULL != rfmap->node_name &&
0 == strcmp(nd->name, rfmap->node_name)) {
node = nd;
break;
} else if (NULL != rfmap->node_name &&
(('+' == rfmap->node_name[0]) &&
(('n' == rfmap->node_name[1]) ||
('N' == rfmap->node_name[1])))) {
/* find the node where this proc was assigned */
node = NULL;
OPAL_LIST_FOREACH(nd, &node_list, orte_node_t) {
if (NULL != rfmap->node_name &&
0 == strcmp(nd->name, rfmap->node_name)) {
node = nd;
break;
} else if (NULL != rfmap->node_name &&
(('+' == rfmap->node_name[0]) &&
(('n' == rfmap->node_name[1]) ||
('N' == rfmap->node_name[1])))) {
relative_index=atoi(strtok(rfmap->node_name,"+n"));
if ( relative_index >= (int)opal_list_get_size (&node_list) || ( 0 > relative_index)){
orte_show_help("help-rmaps_rank_file.txt","bad-index", true,rfmap->node_name);
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
relative_index=atoi(strtok(rfmap->node_name,"+n"));
if ( relative_index >= (int)opal_list_get_size (&node_list) || ( 0 > relative_index)){
orte_show_help("help-rmaps_rank_file.txt","bad-index", true,rfmap->node_name);
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
root_node = (orte_node_t*) opal_list_get_first(&node_list);
for(tmp_cnt=0; tmp_cnt<relative_index; tmp_cnt++) {
root_node = (orte_node_t*) opal_list_get_next(root_node);
}
node = root_node;
break;
}
root_node = (orte_node_t*) opal_list_get_first(&node_list);
for(tmp_cnt=0; tmp_cnt<relative_index; tmp_cnt++) {
root_node = (orte_node_t*) opal_list_get_next(root_node);
}
node = root_node;
break;
}
}
if (NULL == node) {
orte_show_help("help-rmaps_rank_file.txt","bad-host", true, rfmap->node_name);