1
1

Add detection of oversubscription with binding requested - if binding requested to core or hwt, warn and do not bind or else we will hurt performance. Also, if no binding directive was given, turn off the default binding

Refs trac:4317

This commit was SVN r30888.

The following Trac tickets were found above:
  Ticket 4317 --> https://svn.open-mpi.org/trac/ompi/ticket/4317
Этот коммит содержится в:
Ralph Castain 2014-02-28 16:08:52 +00:00
родитель 8500247c7b
Коммит 4a645f0342
2 изменённых файлов: 67 добавлений и 0 удалений

Просмотреть файл

@ -274,3 +274,14 @@ the required syntax of #:object
Pattern: %s
Please check your request and try again.
#
[orte-rmaps-base:oversubscribed]
The requested number of processors exceeds the allocated
number of slots:
#slots: %d
#processors: %d
This creates an oversubscribed condition that may adversely
impact performance when combined with the requested binding
operation. We will continue, but will not bind the processes.

Просмотреть файл

@ -62,6 +62,20 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
true, app->num_procs, app->app);
return ORTE_ERR_SILENT;
}
/* if we will and are allowed to oversubscribe, and binding was given, then
* we really should warn the user that we cannot bind
*/
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
if (OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) ||
OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:oversubscribed",
true, num_slots, app->num_procs * orte_rmaps_base.cpus_per_rank);
}
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
} else {
/* don't default to bound */
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
}
}
/* first pass: map the number of procs to each node until we
@ -229,6 +243,20 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
return ORTE_ERR_SILENT;
}
oversubscribed = true;
/* if we will and are allowed to oversubscribe, and binding was given, then
* we really should warn the user that we cannot bind
*/
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
if (OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) ||
OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:oversubscribed",
true, num_slots, app->num_procs * orte_rmaps_base.cpus_per_rank);
}
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
} else {
/* don't default to bound */
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
}
}
nnodes = opal_list_get_size(node_list);
@ -469,6 +497,20 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
true, app->num_procs, app->app);
return ORTE_ERR_SILENT;
}
/* if we will and are allowed to oversubscribe, and binding was given, then
* we really should warn the user that we cannot bind
*/
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
if (OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) ||
OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:oversubscribed",
true, num_slots, app->num_procs * orte_rmaps_base.cpus_per_rank);
}
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
} else {
/* don't default to bound */
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
}
}
/* we know we have enough slots, or that oversubscrption is allowed, so
@ -599,6 +641,20 @@ static int byobj_span(orte_job_t *jdata,
true, app->num_procs, app->app);
return ORTE_ERR_SILENT;
}
/* if we will and are allowed to oversubscribe, and binding was given, then
* we really should warn the user that we cannot bind
*/
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
if (OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) ||
OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:oversubscribed",
true, num_slots, app->num_procs * orte_rmaps_base.cpus_per_rank);
}
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
} else {
/* don't default to bound */
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
}
}
/* we know we have enough slots, or that oversubscrption is allowed, so