From 55cd65b1491c34080391f699d8e0e5812b2dbe7d Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 19 Dec 2013 16:31:45 +0000 Subject: [PATCH] Don't warn about binding (process and/or memory) if the node cannot do it or if we would overload, but it wasn't specifically requested by the user (i.e., it is the result of the default policy). Instead, just don't bind and quietly move along. Reset topology usage for each node as we bind as multiple nodes may be linked to the same topology object. This will need to be revisited for scale as it does take some non-zero time to reset the usage each iteration. However, storing individual topology objects for every node consumes memory, so it's a tradeoff. cmr=v1.7.4:reviewer=jsquyres:subject=Eliminate excessive binding/memory warnings This commit was SVN r29978. --- opal/mca/hwloc/base/hwloc_base_frame.c | 3 +- orte/mca/odls/default/odls_default_module.c | 20 ++++-- orte/mca/rmaps/base/rmaps_base_binding.c | 75 ++++++++++++--------- 3 files changed, 57 insertions(+), 41 deletions(-) diff --git a/opal/mca/hwloc/base/hwloc_base_frame.c b/opal/mca/hwloc/base/hwloc_base_frame.c index 412cf29b3b..143163c290 100644 --- a/opal/mca/hwloc/base/hwloc_base_frame.c +++ b/opal/mca/hwloc/base/hwloc_base_frame.c @@ -189,9 +189,8 @@ static int opal_hwloc_base_open(mca_base_open_flag_t flags) /* binding specification */ if (NULL == opal_hwloc_base_binding_policy) { + /* default to bind-to core, and that no binding policy was specified */ opal_hwloc_binding_policy = OPAL_BIND_TO_CORE; - /* mark that no binding policy was specified */ - opal_hwloc_binding_policy &= ~OPAL_BIND_GIVEN; } else if (0 == strncasecmp(opal_hwloc_base_binding_policy, "none", strlen("none"))) { OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_NONE); } else { diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index beb86a5292..bf01ae47fa 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -15,6 +15,7 @@ * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -441,9 +442,12 @@ static int do_child(orte_app_context_t* context, if (NULL == msg) { msg = "failed to convert bitmap list to hwloc bitmap"; } - if (OPAL_BINDING_REQUIRED(jobdat->map->binding)) { - /* If binding is required, send an error up the pipe (which exits - -- it doesn't return). */ + if (OPAL_BINDING_REQUIRED(jobdat->map->binding) && + (OPAL_BIND_GIVEN & jobdat->map->binding)) { + /* If binding is required and a binding directive was explicitly + * given (i.e., we are not binding due to a default policy), + * send an error up the pipe (which exits -- it doesn't return). + */ send_error_show_help(write_fd, 1, "help-orte-odls-default.txt", "binding generic error", orte_process_info.nodename, @@ -459,7 +463,8 @@ static int do_child(orte_app_context_t* context, } /* bind as specified */ rc = hwloc_set_cpubind(opal_hwloc_topology, cpuset, 0); - if (rc < 0) { + /* if we got an error and this wasn't a default binding policy, then report it */ + if (rc < 0 && (OPAL_BIND_GIVEN & jobdat->map->binding)) { char *tmp = NULL; if (errno == ENOSYS) { msg = "hwloc indicates cpu binding not supported"; @@ -514,8 +519,11 @@ static int do_child(orte_app_context_t* context, opal_unsetenv(param, &environ_copy); free(param); } - /* set memory affinity policy */ - if (ORTE_SUCCESS != opal_hwloc_base_set_process_membind_policy()) { + /* set memory affinity policy - if we get an error, don't report + * anything unless the user actually specified the binding policy + */ + rc = opal_hwloc_base_set_process_membind_policy(); + if (ORTE_SUCCESS != rc && (OPAL_BIND_GIVEN & jobdat->map->binding)) { if (errno == ENOSYS) { msg = "hwloc indicates memory binding not supported"; } else if (errno == EXDEV) { diff --git a/orte/mca/rmaps/base/rmaps_base_binding.c b/orte/mca/rmaps/base/rmaps_base_binding.c index 7e3eec83cb..54a2049ec8 100644 --- a/orte/mca/rmaps/base/rmaps_base_binding.c +++ b/orte/mca/rmaps/base/rmaps_base_binding.c @@ -134,10 +134,12 @@ static int bind_upwards(orte_job_t *jdata, * expected. Per hwloc, Linux memory binding is at the thread, * and not process, level. Thus, hwloc sets the "thisproc" flag * to "false" on all Linux systems, and uses the "thisthread" flag - * to indicate binding capability + * to indicate binding capability - don't warn if the user didn't + * specifically request binding */ if (!support->membind->set_thisproc_membind && - !support->membind->set_thisthread_membind) { + !support->membind->set_thisthread_membind && + (OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name); membind_warned = true; @@ -148,13 +150,11 @@ static int bind_upwards(orte_job_t *jdata, } } - if (!orte_hetero_nodes) { - /* if the nodes are homogeneous, we share topologies in order - * to save space, so we need to reset the usage info to reflect - * our own current state - */ - reset_usage(node, jdata->jobid); - } + /* we share topologies in order + * to save space, so we need to reset the usage info to reflect + * our own current state + */ + reset_usage(node, jdata->jobid); /* cycle thru the procs */ for (j=0; j < node->procs->size; j++) { @@ -203,9 +203,12 @@ static int bind_upwards(orte_job_t *jdata, orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name); return ORTE_ERR_SILENT; } - /* error out if adding a proc would cause overload and that wasn't allowed */ + /* error out if adding a proc would cause overload and that wasn't allowed, + * and it wasn't a default binding policy (i.e., the user requested it) + */ if (ncpus < data->num_bound && - !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) { + !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) && + (OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true, opal_hwloc_base_print_binding(map->binding), node->name, data->num_bound, ncpus); @@ -294,10 +297,12 @@ static int bind_downwards(orte_job_t *jdata, * expected. Per hwloc, Linux memory binding is at the thread, * and not process, level. Thus, hwloc sets the "thisproc" flag * to "false" on all Linux systems, and uses the "thisthread" flag - * to indicate binding capability + * to indicate binding capability - don't warn if the user didn't + * specifically request binding */ if (!support->membind->set_thisproc_membind && - !support->membind->set_thisthread_membind) { + !support->membind->set_thisthread_membind && + (OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name); membind_warned = true; @@ -309,13 +314,11 @@ static int bind_downwards(orte_job_t *jdata, } } - if (!orte_hetero_nodes) { - /* if the nodes are homogeneous, we share topologies in order - * to save space, so we need to reset the usage info to reflect - * our own current state - */ - reset_usage(node, jdata->jobid); - } + /* we share topologies in order + * to save space, so we need to reset the usage info to reflect + * our own current state + */ + reset_usage(node, jdata->jobid); /* cycle thru the procs */ for (j=0; j < node->procs->size; j++) { @@ -370,9 +373,12 @@ static int bind_downwards(orte_job_t *jdata, trg_obj->userdata = data; } data->num_bound++; - /* error out if adding a proc would cause overload and that wasn't allowed */ + /* error out if adding a proc would cause overload and that wasn't allowed, + * and it wasn't a default binding policy (i.e., the user requested it) + */ if (ncpus < data->num_bound && - !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) { + !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) && + (OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true, opal_hwloc_base_print_binding(map->binding), node->name, data->num_bound, ncpus); @@ -458,10 +464,12 @@ static int bind_in_place(orte_job_t *jdata, * expected. Per hwloc, Linux memory binding is at the thread, * and not process, level. Thus, hwloc sets the "thisproc" flag * to "false" on all Linux systems, and uses the "thisthread" flag - * to indicate binding capability + * to indicate binding capability - don't warn if the user didn't + * specifically request binding */ if (!support->membind->set_thisproc_membind && - !support->membind->set_thisthread_membind) { + !support->membind->set_thisthread_membind && + (OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name); membind_warned = true; @@ -472,13 +480,11 @@ static int bind_in_place(orte_job_t *jdata, } } - if (!orte_hetero_nodes) { - /* if the nodes are homogeneous, we share topologies in order - * to save space, so we need to reset the usage info to reflect - * our own current state - */ - reset_usage(node, jdata->jobid); - } + /* we share topologies in order + * to save space, so we need to reset the usage info to reflect + * our own current state + */ + reset_usage(node, jdata->jobid); /* cycle thru the procs */ for (j=0; j < node->procs->size; j++) { @@ -511,9 +517,12 @@ static int bind_in_place(orte_job_t *jdata, orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name); return ORTE_ERR_SILENT; } - /* error out if adding a proc would cause overload and that wasn't allowed */ + /* error out if adding a proc would cause overload and that wasn't allowed, + * and it wasn't a default binding policy (i.e., the user requested it) + */ if (ncpus < data->num_bound && - !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) { + !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) && + (OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true, opal_hwloc_base_print_binding(map->binding), node->name, data->num_bound, ncpus);