1
1

Clean up an error in the mapper that caused "-hosts" to bomb.

Update the mapper so it correctly points to the next node to be used if we are mapping by slots. As it was, if we had an app_context that used only one slot on a node, the next app_context would start on the next node - leaving a blank slot in-between.

This commit was SVN r12193.
Этот коммит содержится в:
Ralph Castain 2006-10-19 18:57:29 +00:00
родитель 581a4b0a4e
Коммит 263f4379e8

Просмотреть файл

@ -219,12 +219,15 @@ static int map_app_by_slot(
/** if all the procs have been mapped OR we have fully used up this node, then
* break from the loop
*/
if(num_alloc >= app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) {
if(num_alloc == app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) {
break;
}
}
cur_node_item = next;
/* if we still have procs to map, or this node is full, move to next node */
if (num_alloc < app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) {
cur_node_item = next;
}
}
@ -285,6 +288,7 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, opal_list_t *attributes)
opal_list_t master_node_list, mapped_node_list, max_used_nodes, *working_node_list;
opal_list_item_t *item, *item2;
orte_ras_node_t *node, *node2;
char *save_bookmark;
orte_vpid_t vpid_start, job_vpid_start=0;
orte_std_cntr_t num_procs = 0, total_num_slots, mapped_num_slots;
int rc;
@ -349,13 +353,18 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, opal_list_t *attributes)
}
/* see if we found it - if not, just start at the beginning */
if (NULL == cur_node_item) {
cur_node_item = opal_list_get_first(&master_node_list);
cur_node_item = opal_list_get_first(&master_node_list);
}
} else {
/* if no bookmark, then just start at the beginning of the list */
cur_node_item = opal_list_get_first(&master_node_list);
}
/* save the node name for the bookmark just in case we don't do anything
* useful down below
*/
save_bookmark = strdup(((orte_ras_node_t*)cur_node_item)->node_name);
/** construct the list to hold any nodes that get fully used during this
* mapping. We need to keep a record of these so we can update their
* information on the registry when we are done, but we want to remove
@ -463,6 +472,12 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, opal_list_t *attributes)
goto cleanup;
}
/* save the next node name bookmark as we will - in the case of mapped nodes -
* release the node information being pointed to by cur_node_item
*/
free(save_bookmark);
save_bookmark = strdup(((orte_ras_node_t*)cur_node_item)->node_name);
/** cleanup the mapped_node_list, if necessary */
if (0 < app->num_map) {
/** before we get rid of the mapped_node_list, we first need to update
@ -573,9 +588,8 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, opal_list_t *attributes)
/* save a bookmark indicating what node we finished with so that subsequent children (if any)
* can start at the right place
*/
node = (orte_ras_node_t*)cur_node_item;
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_BOOKMARK,
ORTE_STRING, node->node_name,
ORTE_STRING, save_bookmark,
ORTE_RMGR_ATTR_OVERRIDE))) {
ORTE_ERROR_LOG(rc);
}
@ -590,6 +604,8 @@ cleanup:
OBJ_DESTRUCT(&fully_used_nodes);
OBJ_DESTRUCT(&mapped_node_list);
free(save_bookmark);
return rc;
}