1
1

Per the ticket, resolve our handling of overload conditions to provide a more consistent response. If we are overloaded (i.e., attempting to bind more processes to a location than the number of cpus under that location), then we consider the following conditions:

(a) default binding policy is in effect. In this case, we will emit a
warning and default to not binding unless the user provided the
"oversubscribe" or "overload" modifier to the "bind-to" option.

(b) user-specified binding policy is in effect. In this case, we will
error out unless the user provided the "oversubscribe" or "overload"
modifier to the "bind-to" option as we cannot meet the directive.

Either "bind-to" modifier (oversubscribe or overload) will be accepted for
now - in 1.9, we will deprecate the "overload" term in favor of
"oversubscribe".

Also added the ability to accept a --bind-to modifier without specifying the binding policy itself so a user can specify overload-allowed with the default policy.

Closes trac:4345

cmr=v1.8.2:reviewer=rhc:subject=resolve handling of overload conditions

This commit was SVN r32005.

The following Trac tickets were found above:
  Ticket 4345 --> https://svn.open-mpi.org/trac/ompi/ticket/4345
Этот коммит содержится в:
Ralph Castain 2014-06-14 15:38:32 +00:00
родитель 811f3d0665
Коммит 3f04d50cb0
4 изменённых файлов: 80 добавлений и 130 удалений

Просмотреть файл

@ -517,8 +517,12 @@ int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_NONE);
} else {
tmpvals = opal_argv_split(spec, ':');
if (1 < opal_argv_count(tmpvals)) {
quals = opal_argv_split(tmpvals[1], ',');
if (1 < opal_argv_count(tmpvals) || ':' == spec[0]) {
if (':' == spec[0]) {
quals = opal_argv_split(&spec[1], ',');
} else {
quals = opal_argv_split(tmpvals[1], ',');
}
for (i=0; NULL != quals[i]; i++) {
if (0 == strncasecmp(quals[i], "if-supported", strlen(quals[i]))) {
tmp |= OPAL_BIND_IF_SUPPORTED;
@ -533,7 +537,7 @@ int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec
}
opal_argv_free(quals);
}
if (NULL == tmpvals[0]) {
if (NULL == tmpvals[0] || ':' == spec[0]) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_CORE);
tmp &= ~OPAL_BIND_GIVEN;
} else {

Просмотреть файл

@ -123,10 +123,10 @@ be found on node %s.
A request was made to bind to that would result in binding more
processes than cpus on a resource:
Bind to: %s
Node: %s
Bind to: %s
Node: %s
#processes: %d
#cpus: %d
#cpus: %d
You can override this protection by adding the "overload-allowed"
option to your binding directive.
@ -312,13 +312,3 @@ directive.
Please specify a mapping level that has more than one cpu, or
else let us define a default mapping that will allow multiple
cpus-per-proc.
#
[unrecog-modifier]
A modifier was given to the --map-by directive that is not
recognized:
Modifier: %s
Please see "mpirun --help" for a description of supported
modifiers.

Просмотреть файл

@ -104,6 +104,20 @@ static void reset_usage(orte_node_t *node, orte_jobid_t jobid)
}
}
static void unbind_procs(orte_job_t *jdata)
{
int j;
orte_proc_t *proc;
for (j=0; j < jdata->procs->size; j++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, j))) {
continue;
}
orte_remove_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND);
orte_remove_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP);
}
}
static int bind_upwards(orte_job_t *jdata,
orte_node_t *node,
hwloc_obj_type_t target,
@ -176,12 +190,27 @@ static int bind_upwards(orte_job_t *jdata,
* and it wasn't a default binding policy (i.e., the user requested it)
*/
if (ncpus < data->num_bound &&
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) &&
OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
opal_hwloc_base_print_binding(map->binding), node->name,
data->num_bound, ncpus);
return ORTE_ERR_SILENT;
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) {
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
/* if the user specified a binding policy, then we cannot meet
* it since overload isn't allowed, so error out - have the
* message indicate that setting overload allowed will remove
* this restriction */
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
opal_hwloc_base_print_binding(map->binding), node->name,
data->num_bound, ncpus);
return ORTE_ERR_SILENT;
} else {
/* if we have the default binding policy, emit a warning
* that we won't be binding-by-default and include a statement
* that setting overload allowed will silence the warning */
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
opal_hwloc_base_print_binding(map->binding), node->name,
data->num_bound, ncpus);
OPAL_SET_BINDING_POLICY(map->binding, OPAL_BIND_TO_NONE);
unbind_procs(jdata);
return ORTE_SUCCESS;
}
}
/* bind it here */
cpus = opal_hwloc_base_get_available_cpus(node->topology, obj);
@ -298,20 +327,26 @@ static int bind_downwards(orte_job_t *jdata,
if (ncpus < data->num_bound &&
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) {
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
/* if the user specified a binding policy, then we cannot meet
* it since overload isn't allowed, so error out - have the
* message indicate that setting overload allowed will remove
* this restriction */
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
opal_hwloc_base_print_binding(map->binding), node->name,
data->num_bound, ncpus);
hwloc_bitmap_free(totalcpuset);
return ORTE_ERR_SILENT;
} else {
/* if this is the default binding policy, then just don't
* bind this proc
*/
data->num_bound--; // maintain count
/* show the proc as not bound */
orte_remove_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND);
/* if we have the default binding policy, emit a warning
* that we won't be binding-by-default and include a statement
* that setting overload allowed will silence the warning */
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
opal_hwloc_base_print_binding(map->binding), node->name,
data->num_bound, ncpus);
OPAL_SET_BINDING_POLICY(map->binding, OPAL_BIND_TO_NONE);
unbind_procs(jdata);
hwloc_bitmap_zero(totalcpuset);
break;
return ORTE_SUCCESS;
}
}
/* bind the proc here */
@ -501,15 +536,28 @@ static int bind_in_place(orte_job_t *jdata,
}
}
if (!found) {
/* no place to put this - see if overload is allowed and
* error out if adding a proc would cause overload and that wasn't allowed,
* and it wasn't a default binding policy (i.e., the user requested it)*/
if (!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) &&
OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
opal_hwloc_base_print_binding(map->binding), node->name,
data->num_bound, ncpus);
return ORTE_ERR_SILENT;
/* no place to put this - see if overload is allowed */
if (!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) {
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
/* if the user specified a binding policy, then we cannot meet
* it since overload isn't allowed, so error out - have the
* message indicate that setting overload allowed will remove
* this restriction */
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
opal_hwloc_base_print_binding(map->binding), node->name,
data->num_bound, ncpus);
return ORTE_ERR_SILENT;
} else {
/* if we have the default binding policy, emit a warning
* that we won't be binding-by-default and include a statement
* that setting overload allowed will silence the warning */
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
opal_hwloc_base_print_binding(map->binding), node->name,
data->num_bound, ncpus);
OPAL_SET_BINDING_POLICY(map->binding, OPAL_BIND_TO_NONE);
unbind_procs(jdata);
return ORTE_SUCCESS;
}
}
}
}

Просмотреть файл

@ -62,30 +62,6 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
true, app->num_procs, app->app);
return ORTE_ERR_SILENT;
}
#if OPAL_HAVE_HWLOC
/* if we will and are allowed to oversubscribe, and binding was given, then
* we really should warn the user that we cannot bind
*/
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
if ((OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(jdata->map->binding) ||
OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(jdata->map->binding)) &&
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)){
/* RHC: don't emit this warning at this time while we try to
* determine the best path forward. See
* https://svn.open-mpi.org/trac/ompi/ticket/4345
* for an explanation
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:oversubscribed",
true, num_slots, app->num_procs * orte_rmaps_base.cpus_per_rank);
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
*/
}
} else {
/* don't default to bound */
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rr: mapping by slot resetting binding policy to NONE as node is oversubscribed");
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
}
#endif
}
/* first pass: map the number of procs to each node until we
@ -253,30 +229,6 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
return ORTE_ERR_SILENT;
}
oversubscribed = true;
#if OPAL_HAVE_HWLOC
/* if we will and are allowed to oversubscribe, and binding was given, then
* we really should warn the user that we cannot bind
*/
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
if ((OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(jdata->map->binding) ||
OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(jdata->map->binding)) &&
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)){
/* RHC: don't emit this warning at this time while we try to
* determine the best path forward. See
* https://svn.open-mpi.org/trac/ompi/ticket/4345
* for an explanation
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:oversubscribed",
true, num_slots, app->num_procs * orte_rmaps_base.cpus_per_rank);
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
*/
}
} else {
/* don't default to bound */
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rr: mapping by node resetting binding policy to NONE as node is oversubscribed");
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
}
#endif
}
nnodes = opal_list_get_size(node_list);
@ -517,28 +469,6 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
true, app->num_procs, app->app);
return ORTE_ERR_SILENT;
}
/* if we will and are allowed to oversubscribe, and binding was given, then
* we really should warn the user that we cannot bind
*/
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
if ((OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(jdata->map->binding) ||
OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(jdata->map->binding)) &&
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)){
/* RHC: don't emit this warning at this time while we try to
* determine the best path forward. See
* https://svn.open-mpi.org/trac/ompi/ticket/4345
* for an explanation
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:oversubscribed",
true, num_slots, app->num_procs * orte_rmaps_base.cpus_per_rank);
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
*/
}
} else {
/* don't default to bound */
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rr: mapping no-span resetting binding policy to NONE as node is oversubscribed");
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
}
}
/* we know we have enough slots, or that oversubscrption is allowed, so
@ -675,28 +605,6 @@ static int byobj_span(orte_job_t *jdata,
true, app->num_procs, app->app);
return ORTE_ERR_SILENT;
}
/* if we will and are allowed to oversubscribe, and binding was given, then
* we really should warn the user that we cannot bind
*/
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
if ((OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(jdata->map->binding) ||
OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(jdata->map->binding)) &&
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)){
/* RHC: don't emit this warning at this time while we try to
* determine the best path forward. See
* https://svn.open-mpi.org/trac/ompi/ticket/4345
* for an explanation
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:oversubscribed",
true, num_slots, app->num_procs * orte_rmaps_base.cpus_per_rank);
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
*/
}
} else {
/* don't default to bound */
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rr: mapping span resetting binding policy to NONE as node is oversubscribed");
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
}
}
/* we know we have enough slots, or that oversubscrption is allowed, so