A few mapping cleanups - mostly aimed to properly balancing loads so multi app-context comm_spawns don't dump everything on one node.
This commit was SVN r19519.
Этот коммит содержится в:
родитель
9b8473fdbf
Коммит
c0d7fbaf88
@ -321,12 +321,18 @@ int orte_rmaps_base_claim_slot(orte_job_t *jdata,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Remove this node if it has reached its max number of allocatable slots OR it has
|
||||
* reached the soft limit AND we are in a "no oversubscribe" state
|
||||
/* If this node has reached its max number of allocatable slots OR it has
|
||||
* reached the soft limit AND we are in a "no oversubscribe" state, then
|
||||
* we need to return a flag telling the mapper this is the case so it
|
||||
* can move on to the next node
|
||||
*/
|
||||
if ((0 != current_node->slots_max &&
|
||||
current_node->slots_inuse >= current_node->slots_max) ||
|
||||
(!oversubscribe && current_node->slots_inuse >= current_node->slots)) {
|
||||
/* see if we are supposed to remove the node from the list - some
|
||||
* mappers want us to do so to avoid any chance of continuing to
|
||||
* add procs to it
|
||||
*/
|
||||
if (remove_from_list) {
|
||||
opal_list_remove_item(nodes, (opal_list_item_t*)current_node);
|
||||
/* release it - it was retained when we started, so this
|
||||
@ -334,7 +340,9 @@ int orte_rmaps_base_claim_slot(orte_job_t *jdata,
|
||||
*/
|
||||
OBJ_RELEASE(current_node);
|
||||
}
|
||||
/** now return the proper code so the caller knows we removed the node! */
|
||||
/* now return the proper code so the caller knows this node
|
||||
* is fully used
|
||||
*/
|
||||
return ORTE_ERR_NODE_FULLY_USED;
|
||||
}
|
||||
|
||||
@ -392,8 +400,8 @@ int orte_rmaps_base_compute_usage(orte_job_t *jdata)
|
||||
}
|
||||
}
|
||||
if (NULL == psave && NULL == psave2) {
|
||||
/* we must have processed them all! */
|
||||
goto DONE;
|
||||
/* we must have processed them all for this node! */
|
||||
break;
|
||||
}
|
||||
if (NULL != psave) {
|
||||
psave->local_rank = local_rank;
|
||||
@ -406,7 +414,6 @@ int orte_rmaps_base_compute_usage(orte_job_t *jdata)
|
||||
}
|
||||
}
|
||||
|
||||
DONE:
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -187,6 +187,7 @@ static int map_app_by_slot(
|
||||
* (b) if some of the slots are in-use, then we take the number of
|
||||
* remaining slots before hitting the soft limit (node_slots)
|
||||
* (c) if we are at or above the soft limit, we take a full node_slots
|
||||
* unless we are loadbalancing, in which case we only take one
|
||||
*
|
||||
* Note: if node_slots is zero, then we always just take 1 slot
|
||||
*
|
||||
@ -197,8 +198,13 @@ static int map_app_by_slot(
|
||||
* many processes as another before oversubscribing, it will continue
|
||||
* to do so after oversubscribing).
|
||||
*/
|
||||
if (0 == node->slots_inuse ||
|
||||
node->slots_inuse >= node->slots_alloc) {
|
||||
if (node->slots_inuse >= node->slots_alloc) {
|
||||
if (orte_rmaps_base.loadbalance) {
|
||||
num_slots_to_take = 1;
|
||||
} else {
|
||||
num_slots_to_take = (node->slots_alloc == 0) ? 1 : node->slots_alloc;
|
||||
}
|
||||
} else if (0 == node->slots_inuse) {
|
||||
num_slots_to_take = (node->slots_alloc == 0) ? 1 : node->slots_alloc;
|
||||
} else {
|
||||
num_slots_to_take = node->slots_alloc - node->slots_inuse;
|
||||
@ -269,11 +275,12 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
orte_std_cntr_t i;
|
||||
opal_list_t node_list;
|
||||
opal_list_item_t *item;
|
||||
orte_node_t *node, **nodes, *nd1;
|
||||
orte_node_t *node, **nodes, *nd1, *ndmin;
|
||||
orte_vpid_t vpid_start;
|
||||
orte_std_cntr_t num_nodes, num_slots;
|
||||
int rc;
|
||||
orte_std_cntr_t slots_per_node;
|
||||
int overload;
|
||||
|
||||
OPAL_TRACE(1);
|
||||
|
||||
@ -372,9 +379,14 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
cur_node_item = opal_list_get_first(&node_list);
|
||||
}
|
||||
|
||||
/* is this node oversubscribed? */
|
||||
/* is this node fully subscribed? If so, then the first
|
||||
* proc we assign will oversubscribe it, so let's look
|
||||
* for another candidate
|
||||
*/
|
||||
node = (orte_node_t*)cur_node_item;
|
||||
if (node->slots_inuse > node->slots) {
|
||||
ndmin = node;
|
||||
overload = ndmin->slots_inuse - ndmin->slots;
|
||||
if (node->slots_inuse >= node->slots) {
|
||||
/* work down the list - is there another node that
|
||||
* would not be oversubscribed?
|
||||
*/
|
||||
@ -390,6 +402,15 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
cur_node_item = item;
|
||||
goto proceed;
|
||||
}
|
||||
/* this one was also oversubscribed, keep track of the
|
||||
* node that has the least usage - if we can't
|
||||
* find anyone who isn't fully utilized, we will
|
||||
* start with the least used node
|
||||
*/
|
||||
if (overload >= (nd1->slots_inuse - nd1->slots)) {
|
||||
ndmin = nd1;
|
||||
overload = ndmin->slots_inuse - ndmin->slots;
|
||||
}
|
||||
if (item == opal_list_get_last(&node_list)) {
|
||||
item = opal_list_get_first(&node_list);
|
||||
} else {
|
||||
@ -397,9 +418,10 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
}
|
||||
}
|
||||
/* if we get here, then we cycled all the way around the
|
||||
* list without finding a better answer - just use what
|
||||
* we have
|
||||
* list without finding a better answer - just use the node
|
||||
* that is minimally overloaded
|
||||
*/
|
||||
cur_node_item = (opal_list_item_t*)ndmin;
|
||||
}
|
||||
|
||||
proceed:
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user