Correctly detect and handle oversubscription for comm_spawn
cmr=v1.7.4:reviewer=jsquyres:subject=Correctly detect and handle oversubscription for comm_spawn This commit was SVN r30186.
Этот коммит содержится в:
родитель
6e5fedeb04
Коммит
9fcb46d85a
@ -490,22 +490,13 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
|
||||
node->name, node->slots, node->slots_inuse));
|
||||
opal_list_remove_item(allocated_nodes, item);
|
||||
OBJ_RELEASE(item); /* "un-retain" it */
|
||||
} else {
|
||||
if (node->slots > node->slots_inuse) {
|
||||
} else if (node->slots > node->slots_inuse) {
|
||||
/* add the available slots */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
|
||||
"%s node %s has %d slots available",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
node->name, node->slots));
|
||||
node->name, node->slots - node->slots_inuse));
|
||||
num_slots += node->slots - node->slots_inuse;
|
||||
} else {
|
||||
/* always allocate at least one */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
|
||||
"%s node %s has %d slots %d used",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
node->name, node->slots, node->slots_inuse));
|
||||
num_slots++;
|
||||
}
|
||||
}
|
||||
|
||||
/** go on to next item */
|
||||
|
@ -98,7 +98,8 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
|
||||
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rr:slot assigning %d procs to node %s",
|
||||
(int)num_procs_to_assign, node->name);
|
||||
for (i=0; i < num_procs_to_assign && nprocs_mapped < app->num_procs; i++) {
|
||||
|
||||
for (i=0; i < num_procs_to_assign && nprocs_mapped < app->num_procs; i++) {
|
||||
/* add this node to the map - do it only once */
|
||||
if (!node->mapped) {
|
||||
if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
|
||||
@ -507,18 +508,8 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
|
||||
true, node->name);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* add this node to the map, if reqd */
|
||||
if (!node->mapped) {
|
||||
if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
|
||||
ORTE_ERROR_LOG(idx);
|
||||
return idx;
|
||||
}
|
||||
node->mapped = true;
|
||||
OBJ_RETAIN(node); /* maintain accounting on object */
|
||||
++(jdata->map->num_nodes);
|
||||
}
|
||||
|
||||
/* compute the number of procs to go on this node */
|
||||
/* compute the number of procs to go on this node */
|
||||
if (add_one) {
|
||||
if (0 == nxtra_nodes) {
|
||||
--extra_procs_to_assign;
|
||||
@ -538,6 +529,29 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
|
||||
}
|
||||
}
|
||||
|
||||
/* if this would oversubscribe the node and the user hasn't permitted
|
||||
* oversubscription, then don't use it - since the total number of
|
||||
* slots is adequate for this app, there should be room somewhere else
|
||||
*/
|
||||
if (node->slots < (node->slots_inuse + num_procs_to_assign) &&
|
||||
ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
|
||||
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rr: mapping no-span would oversubscribe node %s - ignoring it",
|
||||
node->name);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* add this node to the map, if reqd */
|
||||
if (!node->mapped) {
|
||||
if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
|
||||
ORTE_ERROR_LOG(idx);
|
||||
return idx;
|
||||
}
|
||||
node->mapped = true;
|
||||
OBJ_RETAIN(node); /* maintain accounting on object */
|
||||
++(jdata->map->num_nodes);
|
||||
}
|
||||
|
||||
/* get the number of objects of this type on this node */
|
||||
nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology, target, cache_level, OPAL_HWLOC_AVAILABLE);
|
||||
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
|
||||
@ -551,7 +565,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
|
||||
if (0 == nobjs) {
|
||||
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects",
|
||||
true, hwloc_obj_type_string(target), node->name);
|
||||
true, hwloc_obj_type_string(target), node->name);
|
||||
return ORTE_ERR_SILENT;
|
||||
} else {
|
||||
/* this was the default mapping policy, so clear the map
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user