diff --git a/opal/mca/hwloc/base/hwloc_base_frame.c b/opal/mca/hwloc/base/hwloc_base_frame.c index 412cf29b3b..143163c290 100644 --- a/opal/mca/hwloc/base/hwloc_base_frame.c +++ b/opal/mca/hwloc/base/hwloc_base_frame.c @@ -189,9 +189,8 @@ static int opal_hwloc_base_open(mca_base_open_flag_t flags) /* binding specification */ if (NULL == opal_hwloc_base_binding_policy) { + /* default to bind-to core, and that no binding policy was specified */ opal_hwloc_binding_policy = OPAL_BIND_TO_CORE; - /* mark that no binding policy was specified */ - opal_hwloc_binding_policy &= ~OPAL_BIND_GIVEN; } else if (0 == strncasecmp(opal_hwloc_base_binding_policy, "none", strlen("none"))) { OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_NONE); } else { diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index beb86a5292..bf01ae47fa 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -15,6 +15,7 @@ * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -441,9 +442,12 @@ static int do_child(orte_app_context_t* context, if (NULL == msg) { msg = "failed to convert bitmap list to hwloc bitmap"; } - if (OPAL_BINDING_REQUIRED(jobdat->map->binding)) { - /* If binding is required, send an error up the pipe (which exits - -- it doesn't return). */ + if (OPAL_BINDING_REQUIRED(jobdat->map->binding) && + (OPAL_BIND_GIVEN & jobdat->map->binding)) { + /* If binding is required and a binding directive was explicitly + * given (i.e., we are not binding due to a default policy), + * send an error up the pipe (which exits -- it doesn't return). + */ send_error_show_help(write_fd, 1, "help-orte-odls-default.txt", "binding generic error", orte_process_info.nodename, @@ -459,7 +463,8 @@ static int do_child(orte_app_context_t* context, } /* bind as specified */ rc = hwloc_set_cpubind(opal_hwloc_topology, cpuset, 0); - if (rc < 0) { + /* if we got an error and this wasn't a default binding policy, then report it */ + if (rc < 0 && (OPAL_BIND_GIVEN & jobdat->map->binding)) { char *tmp = NULL; if (errno == ENOSYS) { msg = "hwloc indicates cpu binding not supported"; @@ -514,8 +519,11 @@ static int do_child(orte_app_context_t* context, opal_unsetenv(param, &environ_copy); free(param); } - /* set memory affinity policy */ - if (ORTE_SUCCESS != opal_hwloc_base_set_process_membind_policy()) { + /* set memory affinity policy - if we get an error, don't report + * anything unless the user actually specified the binding policy + */ + rc = opal_hwloc_base_set_process_membind_policy(); + if (ORTE_SUCCESS != rc && (OPAL_BIND_GIVEN & jobdat->map->binding)) { if (errno == ENOSYS) { msg = "hwloc indicates memory binding not supported"; } else if (errno == EXDEV) { diff --git a/orte/mca/rmaps/base/rmaps_base_binding.c b/orte/mca/rmaps/base/rmaps_base_binding.c index 7e3eec83cb..54a2049ec8 100644 --- a/orte/mca/rmaps/base/rmaps_base_binding.c +++ b/orte/mca/rmaps/base/rmaps_base_binding.c @@ -134,10 +134,12 @@ static int bind_upwards(orte_job_t *jdata, * expected. Per hwloc, Linux memory binding is at the thread, * and not process, level. Thus, hwloc sets the "thisproc" flag * to "false" on all Linux systems, and uses the "thisthread" flag - * to indicate binding capability + * to indicate binding capability - don't warn if the user didn't + * specifically request binding */ if (!support->membind->set_thisproc_membind && - !support->membind->set_thisthread_membind) { + !support->membind->set_thisthread_membind && + (OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name); membind_warned = true; @@ -148,13 +150,11 @@ static int bind_upwards(orte_job_t *jdata, } } - if (!orte_hetero_nodes) { - /* if the nodes are homogeneous, we share topologies in order - * to save space, so we need to reset the usage info to reflect - * our own current state - */ - reset_usage(node, jdata->jobid); - } + /* we share topologies in order + * to save space, so we need to reset the usage info to reflect + * our own current state + */ + reset_usage(node, jdata->jobid); /* cycle thru the procs */ for (j=0; j < node->procs->size; j++) { @@ -203,9 +203,12 @@ static int bind_upwards(orte_job_t *jdata, orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name); return ORTE_ERR_SILENT; } - /* error out if adding a proc would cause overload and that wasn't allowed */ + /* error out if adding a proc would cause overload and that wasn't allowed, + * and it wasn't a default binding policy (i.e., the user requested it) + */ if (ncpus < data->num_bound && - !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) { + !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) && + (OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true, opal_hwloc_base_print_binding(map->binding), node->name, data->num_bound, ncpus); @@ -294,10 +297,12 @@ static int bind_downwards(orte_job_t *jdata, * expected. Per hwloc, Linux memory binding is at the thread, * and not process, level. Thus, hwloc sets the "thisproc" flag * to "false" on all Linux systems, and uses the "thisthread" flag - * to indicate binding capability + * to indicate binding capability - don't warn if the user didn't + * specifically request binding */ if (!support->membind->set_thisproc_membind && - !support->membind->set_thisthread_membind) { + !support->membind->set_thisthread_membind && + (OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name); membind_warned = true; @@ -309,13 +314,11 @@ static int bind_downwards(orte_job_t *jdata, } } - if (!orte_hetero_nodes) { - /* if the nodes are homogeneous, we share topologies in order - * to save space, so we need to reset the usage info to reflect - * our own current state - */ - reset_usage(node, jdata->jobid); - } + /* we share topologies in order + * to save space, so we need to reset the usage info to reflect + * our own current state + */ + reset_usage(node, jdata->jobid); /* cycle thru the procs */ for (j=0; j < node->procs->size; j++) { @@ -370,9 +373,12 @@ static int bind_downwards(orte_job_t *jdata, trg_obj->userdata = data; } data->num_bound++; - /* error out if adding a proc would cause overload and that wasn't allowed */ + /* error out if adding a proc would cause overload and that wasn't allowed, + * and it wasn't a default binding policy (i.e., the user requested it) + */ if (ncpus < data->num_bound && - !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) { + !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) && + (OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true, opal_hwloc_base_print_binding(map->binding), node->name, data->num_bound, ncpus); @@ -458,10 +464,12 @@ static int bind_in_place(orte_job_t *jdata, * expected. Per hwloc, Linux memory binding is at the thread, * and not process, level. Thus, hwloc sets the "thisproc" flag * to "false" on all Linux systems, and uses the "thisthread" flag - * to indicate binding capability + * to indicate binding capability - don't warn if the user didn't + * specifically request binding */ if (!support->membind->set_thisproc_membind && - !support->membind->set_thisthread_membind) { + !support->membind->set_thisthread_membind && + (OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name); membind_warned = true; @@ -472,13 +480,11 @@ static int bind_in_place(orte_job_t *jdata, } } - if (!orte_hetero_nodes) { - /* if the nodes are homogeneous, we share topologies in order - * to save space, so we need to reset the usage info to reflect - * our own current state - */ - reset_usage(node, jdata->jobid); - } + /* we share topologies in order + * to save space, so we need to reset the usage info to reflect + * our own current state + */ + reset_usage(node, jdata->jobid); /* cycle thru the procs */ for (j=0; j < node->procs->size; j++) { @@ -511,9 +517,12 @@ static int bind_in_place(orte_job_t *jdata, orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name); return ORTE_ERR_SILENT; } - /* error out if adding a proc would cause overload and that wasn't allowed */ + /* error out if adding a proc would cause overload and that wasn't allowed, + * and it wasn't a default binding policy (i.e., the user requested it) + */ if (ncpus < data->num_bound && - !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) { + !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) && + (OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true, opal_hwloc_base_print_binding(map->binding), node->name, data->num_bound, ncpus);