Per the ticket, resolve our handling of overload conditions to provide a more consistent response. If we are overloaded (i.e., attempting to bind more processes to a location than the number of cpus under that location), then we consider the following conditions:
(a) default binding policy is in effect. In this case, we will emit a warning and default to not binding unless the user provided the "oversubscribe" or "overload" modifier to the "bind-to" option. (b) user-specified binding policy is in effect. In this case, we will error out unless the user provided the "oversubscribe" or "overload" modifier to the "bind-to" option as we cannot meet the directive. Either "bind-to" modifier (oversubscribe or overload) will be accepted for now - in 1.9, we will deprecate the "overload" term in favor of "oversubscribe". Also added the ability to accept a --bind-to modifier without specifying the binding policy itself so a user can specify overload-allowed with the default policy. Closes trac:4345 cmr=v1.8.2:reviewer=rhc:subject=resolve handling of overload conditions This commit was SVN r32005. The following Trac tickets were found above: Ticket 4345 --> https://svn.open-mpi.org/trac/ompi/ticket/4345
Этот коммит содержится в:
родитель
811f3d0665
Коммит
3f04d50cb0
@ -517,8 +517,12 @@ int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec
|
||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_NONE);
|
||||
} else {
|
||||
tmpvals = opal_argv_split(spec, ':');
|
||||
if (1 < opal_argv_count(tmpvals)) {
|
||||
quals = opal_argv_split(tmpvals[1], ',');
|
||||
if (1 < opal_argv_count(tmpvals) || ':' == spec[0]) {
|
||||
if (':' == spec[0]) {
|
||||
quals = opal_argv_split(&spec[1], ',');
|
||||
} else {
|
||||
quals = opal_argv_split(tmpvals[1], ',');
|
||||
}
|
||||
for (i=0; NULL != quals[i]; i++) {
|
||||
if (0 == strncasecmp(quals[i], "if-supported", strlen(quals[i]))) {
|
||||
tmp |= OPAL_BIND_IF_SUPPORTED;
|
||||
@ -533,7 +537,7 @@ int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec
|
||||
}
|
||||
opal_argv_free(quals);
|
||||
}
|
||||
if (NULL == tmpvals[0]) {
|
||||
if (NULL == tmpvals[0] || ':' == spec[0]) {
|
||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_CORE);
|
||||
tmp &= ~OPAL_BIND_GIVEN;
|
||||
} else {
|
||||
|
@ -123,10 +123,10 @@ be found on node %s.
|
||||
A request was made to bind to that would result in binding more
|
||||
processes than cpus on a resource:
|
||||
|
||||
Bind to: %s
|
||||
Node: %s
|
||||
Bind to: %s
|
||||
Node: %s
|
||||
#processes: %d
|
||||
#cpus: %d
|
||||
#cpus: %d
|
||||
|
||||
You can override this protection by adding the "overload-allowed"
|
||||
option to your binding directive.
|
||||
@ -312,13 +312,3 @@ directive.
|
||||
Please specify a mapping level that has more than one cpu, or
|
||||
else let us define a default mapping that will allow multiple
|
||||
cpus-per-proc.
|
||||
#
|
||||
[unrecog-modifier]
|
||||
A modifier was given to the --map-by directive that is not
|
||||
recognized:
|
||||
|
||||
Modifier: %s
|
||||
|
||||
Please see "mpirun --help" for a description of supported
|
||||
modifiers.
|
||||
|
||||
|
@ -104,6 +104,20 @@ static void reset_usage(orte_node_t *node, orte_jobid_t jobid)
|
||||
}
|
||||
}
|
||||
|
||||
static void unbind_procs(orte_job_t *jdata)
|
||||
{
|
||||
int j;
|
||||
orte_proc_t *proc;
|
||||
|
||||
for (j=0; j < jdata->procs->size; j++) {
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, j))) {
|
||||
continue;
|
||||
}
|
||||
orte_remove_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND);
|
||||
orte_remove_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP);
|
||||
}
|
||||
}
|
||||
|
||||
static int bind_upwards(orte_job_t *jdata,
|
||||
orte_node_t *node,
|
||||
hwloc_obj_type_t target,
|
||||
@ -176,12 +190,27 @@ static int bind_upwards(orte_job_t *jdata,
|
||||
* and it wasn't a default binding policy (i.e., the user requested it)
|
||||
*/
|
||||
if (ncpus < data->num_bound &&
|
||||
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) &&
|
||||
OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
|
||||
opal_hwloc_base_print_binding(map->binding), node->name,
|
||||
data->num_bound, ncpus);
|
||||
return ORTE_ERR_SILENT;
|
||||
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) {
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
/* if the user specified a binding policy, then we cannot meet
|
||||
* it since overload isn't allowed, so error out - have the
|
||||
* message indicate that setting overload allowed will remove
|
||||
* this restriction */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
|
||||
opal_hwloc_base_print_binding(map->binding), node->name,
|
||||
data->num_bound, ncpus);
|
||||
return ORTE_ERR_SILENT;
|
||||
} else {
|
||||
/* if we have the default binding policy, emit a warning
|
||||
* that we won't be binding-by-default and include a statement
|
||||
* that setting overload allowed will silence the warning */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
|
||||
opal_hwloc_base_print_binding(map->binding), node->name,
|
||||
data->num_bound, ncpus);
|
||||
OPAL_SET_BINDING_POLICY(map->binding, OPAL_BIND_TO_NONE);
|
||||
unbind_procs(jdata);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
}
|
||||
/* bind it here */
|
||||
cpus = opal_hwloc_base_get_available_cpus(node->topology, obj);
|
||||
@ -298,20 +327,26 @@ static int bind_downwards(orte_job_t *jdata,
|
||||
if (ncpus < data->num_bound &&
|
||||
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) {
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
/* if the user specified a binding policy, then we cannot meet
|
||||
* it since overload isn't allowed, so error out - have the
|
||||
* message indicate that setting overload allowed will remove
|
||||
* this restriction */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
|
||||
opal_hwloc_base_print_binding(map->binding), node->name,
|
||||
data->num_bound, ncpus);
|
||||
hwloc_bitmap_free(totalcpuset);
|
||||
return ORTE_ERR_SILENT;
|
||||
} else {
|
||||
/* if this is the default binding policy, then just don't
|
||||
* bind this proc
|
||||
*/
|
||||
data->num_bound--; // maintain count
|
||||
/* show the proc as not bound */
|
||||
orte_remove_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND);
|
||||
/* if we have the default binding policy, emit a warning
|
||||
* that we won't be binding-by-default and include a statement
|
||||
* that setting overload allowed will silence the warning */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
|
||||
opal_hwloc_base_print_binding(map->binding), node->name,
|
||||
data->num_bound, ncpus);
|
||||
OPAL_SET_BINDING_POLICY(map->binding, OPAL_BIND_TO_NONE);
|
||||
unbind_procs(jdata);
|
||||
hwloc_bitmap_zero(totalcpuset);
|
||||
break;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
}
|
||||
/* bind the proc here */
|
||||
@ -501,15 +536,28 @@ static int bind_in_place(orte_job_t *jdata,
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
/* no place to put this - see if overload is allowed and
|
||||
* error out if adding a proc would cause overload and that wasn't allowed,
|
||||
* and it wasn't a default binding policy (i.e., the user requested it)*/
|
||||
if (!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) &&
|
||||
OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
|
||||
opal_hwloc_base_print_binding(map->binding), node->name,
|
||||
data->num_bound, ncpus);
|
||||
return ORTE_ERR_SILENT;
|
||||
/* no place to put this - see if overload is allowed */
|
||||
if (!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) {
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
/* if the user specified a binding policy, then we cannot meet
|
||||
* it since overload isn't allowed, so error out - have the
|
||||
* message indicate that setting overload allowed will remove
|
||||
* this restriction */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
|
||||
opal_hwloc_base_print_binding(map->binding), node->name,
|
||||
data->num_bound, ncpus);
|
||||
return ORTE_ERR_SILENT;
|
||||
} else {
|
||||
/* if we have the default binding policy, emit a warning
|
||||
* that we won't be binding-by-default and include a statement
|
||||
* that setting overload allowed will silence the warning */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
|
||||
opal_hwloc_base_print_binding(map->binding), node->name,
|
||||
data->num_bound, ncpus);
|
||||
OPAL_SET_BINDING_POLICY(map->binding, OPAL_BIND_TO_NONE);
|
||||
unbind_procs(jdata);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -62,30 +62,6 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
|
||||
true, app->num_procs, app->app);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
#if OPAL_HAVE_HWLOC
|
||||
/* if we will and are allowed to oversubscribe, and binding was given, then
|
||||
* we really should warn the user that we cannot bind
|
||||
*/
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
if ((OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(jdata->map->binding) ||
|
||||
OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(jdata->map->binding)) &&
|
||||
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)){
|
||||
/* RHC: don't emit this warning at this time while we try to
|
||||
* determine the best path forward. See
|
||||
* https://svn.open-mpi.org/trac/ompi/ticket/4345
|
||||
* for an explanation
|
||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:oversubscribed",
|
||||
true, num_slots, app->num_procs * orte_rmaps_base.cpus_per_rank);
|
||||
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
|
||||
*/
|
||||
}
|
||||
} else {
|
||||
/* don't default to bound */
|
||||
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rr: mapping by slot resetting binding policy to NONE as node is oversubscribed");
|
||||
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* first pass: map the number of procs to each node until we
|
||||
@ -253,30 +229,6 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
oversubscribed = true;
|
||||
#if OPAL_HAVE_HWLOC
|
||||
/* if we will and are allowed to oversubscribe, and binding was given, then
|
||||
* we really should warn the user that we cannot bind
|
||||
*/
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
if ((OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(jdata->map->binding) ||
|
||||
OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(jdata->map->binding)) &&
|
||||
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)){
|
||||
/* RHC: don't emit this warning at this time while we try to
|
||||
* determine the best path forward. See
|
||||
* https://svn.open-mpi.org/trac/ompi/ticket/4345
|
||||
* for an explanation
|
||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:oversubscribed",
|
||||
true, num_slots, app->num_procs * orte_rmaps_base.cpus_per_rank);
|
||||
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
|
||||
*/
|
||||
}
|
||||
} else {
|
||||
/* don't default to bound */
|
||||
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rr: mapping by node resetting binding policy to NONE as node is oversubscribed");
|
||||
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
nnodes = opal_list_get_size(node_list);
|
||||
@ -517,28 +469,6 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
|
||||
true, app->num_procs, app->app);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* if we will and are allowed to oversubscribe, and binding was given, then
|
||||
* we really should warn the user that we cannot bind
|
||||
*/
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
if ((OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(jdata->map->binding) ||
|
||||
OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(jdata->map->binding)) &&
|
||||
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)){
|
||||
/* RHC: don't emit this warning at this time while we try to
|
||||
* determine the best path forward. See
|
||||
* https://svn.open-mpi.org/trac/ompi/ticket/4345
|
||||
* for an explanation
|
||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:oversubscribed",
|
||||
true, num_slots, app->num_procs * orte_rmaps_base.cpus_per_rank);
|
||||
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
|
||||
*/
|
||||
}
|
||||
} else {
|
||||
/* don't default to bound */
|
||||
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rr: mapping no-span resetting binding policy to NONE as node is oversubscribed");
|
||||
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
/* we know we have enough slots, or that oversubscrption is allowed, so
|
||||
@ -675,28 +605,6 @@ static int byobj_span(orte_job_t *jdata,
|
||||
true, app->num_procs, app->app);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* if we will and are allowed to oversubscribe, and binding was given, then
|
||||
* we really should warn the user that we cannot bind
|
||||
*/
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
if ((OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(jdata->map->binding) ||
|
||||
OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(jdata->map->binding)) &&
|
||||
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)){
|
||||
/* RHC: don't emit this warning at this time while we try to
|
||||
* determine the best path forward. See
|
||||
* https://svn.open-mpi.org/trac/ompi/ticket/4345
|
||||
* for an explanation
|
||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:oversubscribed",
|
||||
true, num_slots, app->num_procs * orte_rmaps_base.cpus_per_rank);
|
||||
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
|
||||
*/
|
||||
}
|
||||
} else {
|
||||
/* don't default to bound */
|
||||
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rr: mapping span resetting binding policy to NONE as node is oversubscribed");
|
||||
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
/* we know we have enough slots, or that oversubscrption is allowed, so
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user