From 53cd00fe16c7d1dd077123cf89810091d9c1da2b Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 17 Dec 2013 14:50:10 +0000 Subject: [PATCH] By setting a default mapping/ranking/binding policy that wasn't "none", we introduced a problem for users of the Mac and any other machine where sockets aren't defined and/or binding is not supported. Fix that by checking to see if the user specified the failing policy - if not, then fall back to the old map/rank by slot and no binding. Refs trac:3977 This commit was SVN r29933. The following Trac tickets were found above: Ticket 3977 --> https://svn.open-mpi.org/trac/ompi/ticket/3977 --- orte/mca/rmaps/base/rmaps_base_binding.c | 9 +- orte/mca/rmaps/base/rmaps_base_ranking.c | 194 +++++++++++------- orte/mca/rmaps/round_robin/rmaps_rr.c | 56 +++++ orte/mca/rmaps/round_robin/rmaps_rr_mappers.c | 36 +++- 4 files changed, 217 insertions(+), 78 deletions(-) diff --git a/orte/mca/rmaps/base/rmaps_base_binding.c b/orte/mca/rmaps/base/rmaps_base_binding.c index 2c72a85780..50663fffa8 100644 --- a/orte/mca/rmaps/base/rmaps_base_binding.c +++ b/orte/mca/rmaps/base/rmaps_base_binding.c @@ -121,7 +121,8 @@ static int bind_upwards(orte_job_t *jdata, */ if (!support->cpubind->set_thisproc_cpubind && !support->cpubind->set_thisthread_cpubind) { - if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) { + if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy) || + !(OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { /* we are not required to bind, so ignore this */ continue; } @@ -276,7 +277,8 @@ static int bind_downwards(orte_job_t *jdata, */ if (!support->cpubind->set_thisproc_cpubind && !support->cpubind->set_thisthread_cpubind) { - if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) { + if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy) || + !(OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { /* we are not required to bind, so ignore this */ continue; } @@ -440,7 +442,8 @@ static int bind_in_place(orte_job_t *jdata, */ if (!support->cpubind->set_thisproc_cpubind && !support->cpubind->set_thisthread_cpubind) { - if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) { + if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy) || + !(OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { /* we are not required to bind, so ignore this */ continue; } diff --git a/orte/mca/rmaps/base/rmaps_base_ranking.c b/orte/mca/rmaps/base/rmaps_base_ranking.c index 214e512d77..44602bfdb2 100644 --- a/orte/mca/rmaps/base/rmaps_base_ranking.c +++ b/orte/mca/rmaps/base/rmaps_base_ranking.c @@ -97,6 +97,9 @@ static int rank_span(orte_job_t *jdata, opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rank_span: found %d objects on node %s with %d procs", num_objs, node->name, (int)node->num_procs); + if (0 == num_objs) { + return ORTE_ERR_NOT_SUPPORTED; + } /* for each object */ for (i=0; i < num_objs && cnt < app->num_procs; i++) { @@ -205,6 +208,9 @@ static int rank_fill(orte_job_t *jdata, opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rank_fill: found %d objects on node %s with %d procs", num_objs, node->name, (int)node->num_procs); + if (0 == num_objs) { + return ORTE_ERR_NOT_SUPPORTED; + } /* for each object */ for (i=0; i < num_objs && cnt < app->num_procs; i++) { @@ -321,6 +327,9 @@ static int rank_by(orte_job_t *jdata, opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rank_by: found %d objects on node %s with %d procs", num_objs, node->name, (int)node->num_procs); + if (0 == num_objs) { + return ORTE_ERR_NOT_SUPPORTED; + } /* collect all the objects */ for (i=0; i < num_objs; i++) { obj = opal_hwloc_base_get_obj_by_type(node->topology, target, @@ -420,6 +429,118 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata, map = jdata->map; + /* start with the rank-by object options - if the object isn't + * included in the topology, then we obviously cannot rank by it. + * However, if this was the default ranking policy (as opposed to + * something given by the user), then fall back to rank-by slot + */ +#if OPAL_HAVE_HWLOC + if (ORTE_RANK_BY_NUMA == ORTE_GET_RANKING_POLICY(map->ranking)) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps: computing ranks by NUMA for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_NODE, 0))) { + if (ORTE_ERR_NOT_SUPPORTED == rc && + !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { + ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); + goto rankbyslot; + } + ORTE_ERROR_LOG(rc); + } + return rc; + } + + if (ORTE_RANK_BY_SOCKET == ORTE_GET_RANKING_POLICY(map->ranking)) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps: computing ranks by socket for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_SOCKET, 0))) { + if (ORTE_ERR_NOT_SUPPORTED == rc && + !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { + ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); + goto rankbyslot; + } + ORTE_ERROR_LOG(rc); + } + return rc; + } + + if (ORTE_RANK_BY_L3CACHE == ORTE_GET_RANKING_POLICY(map->ranking)) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps: computing ranks by L3cache for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CACHE, 3))) { + if (ORTE_ERR_NOT_SUPPORTED == rc && + !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { + ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); + goto rankbyslot; + } + ORTE_ERROR_LOG(rc); + } + return rc; + } + + if (ORTE_RANK_BY_L2CACHE == ORTE_GET_RANKING_POLICY(map->ranking)) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps: computing ranks by L2cache for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CACHE, 2))) { + if (ORTE_ERR_NOT_SUPPORTED == rc && + !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { + ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); + goto rankbyslot; + } + ORTE_ERROR_LOG(rc); + } + return rc; + } + + if (ORTE_RANK_BY_L1CACHE == ORTE_GET_RANKING_POLICY(map->ranking)) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps: computing ranks by L1cache for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CACHE, 1))) { + if (ORTE_ERR_NOT_SUPPORTED == rc && + !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { + ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); + goto rankbyslot; + } + ORTE_ERROR_LOG(rc); + } + return rc; + } + + if (ORTE_RANK_BY_CORE == ORTE_GET_RANKING_POLICY(map->ranking)) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps: computing ranks by core for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CORE, 0))) { + if (ORTE_ERR_NOT_SUPPORTED == rc && + !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { + ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); + goto rankbyslot; + } + ORTE_ERROR_LOG(rc); + } + return rc; + } + + if (ORTE_RANK_BY_HWTHREAD == ORTE_GET_RANKING_POLICY(map->ranking)) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps: computing ranks by hwthread for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_PU, 0))) { + if (ORTE_ERR_NOT_SUPPORTED == rc && + !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { + ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); + goto rankbyslot; + } + ORTE_ERROR_LOG(rc); + } + return rc; + } +#endif + if (ORTE_RANK_BY_NODE == ORTE_GET_RANKING_POLICY(map->ranking) || ORTE_RANK_BY_BOARD == ORTE_GET_RANKING_POLICY(map->ranking)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, @@ -480,6 +601,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata, return ORTE_SUCCESS; } + rankbyslot: if (ORTE_RANK_BY_SLOT == ORTE_GET_RANKING_POLICY(map->ranking)) { /* assign the ranks sequentially */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, @@ -523,78 +645,6 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata, return ORTE_SUCCESS; } -#if OPAL_HAVE_HWLOC - if (ORTE_RANK_BY_NUMA == ORTE_GET_RANKING_POLICY(map->ranking)) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps: computing ranks by NUMA for job %s", - ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_NODE, 0))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - if (ORTE_RANK_BY_SOCKET == ORTE_GET_RANKING_POLICY(map->ranking)) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps: computing ranks by socket for job %s", - ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_SOCKET, 0))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - if (ORTE_RANK_BY_L3CACHE == ORTE_GET_RANKING_POLICY(map->ranking)) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps: computing ranks by L3cache for job %s", - ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CACHE, 3))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - if (ORTE_RANK_BY_L2CACHE == ORTE_GET_RANKING_POLICY(map->ranking)) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps: computing ranks by L2cache for job %s", - ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CACHE, 2))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - if (ORTE_RANK_BY_L1CACHE == ORTE_GET_RANKING_POLICY(map->ranking)) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps: computing ranks by L1cache for job %s", - ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CACHE, 1))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - if (ORTE_RANK_BY_CORE == ORTE_GET_RANKING_POLICY(map->ranking)) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps: computing ranks by core for job %s", - ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CORE, 0))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } - - if (ORTE_RANK_BY_HWTHREAD == ORTE_GET_RANKING_POLICY(map->ranking)) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps: computing ranks by hwthread for job %s", - ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_PU, 0))) { - ORTE_ERROR_LOG(rc); - } - return rc; - } -#endif - return ORTE_ERR_NOT_IMPLEMENTED; } diff --git a/orte/mca/rmaps/round_robin/rmaps_rr.c b/orte/mca/rmaps/round_robin/rmaps_rr.c index b0bc08c7e8..083b04e277 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr.c @@ -143,24 +143,80 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) } else if (ORTE_MAPPING_BYHWTHREAD == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_PU, 0); + if (ORTE_ERR_NOT_SUPPORTED == rc) { + /* if the mapper couldn't map by this object because + * it isn't available, but the error allows us to try + * byslot, then do so + */ + rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, + app->num_procs); + } } else if (ORTE_MAPPING_BYCORE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_CORE, 0); + if (ORTE_ERR_NOT_SUPPORTED == rc) { + /* if the mapper couldn't map by this object because + * it isn't available, but the error allows us to try + * byslot, then do so + */ + rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, + app->num_procs); + } } else if (ORTE_MAPPING_BYL1CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_CACHE, 1); + if (ORTE_ERR_NOT_SUPPORTED == rc) { + /* if the mapper couldn't map by this object because + * it isn't available, but the error allows us to try + * byslot, then do so + */ + rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, + app->num_procs); + } } else if (ORTE_MAPPING_BYL2CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_CACHE, 2); + if (ORTE_ERR_NOT_SUPPORTED == rc) { + /* if the mapper couldn't map by this object because + * it isn't available, but the error allows us to try + * byslot, then do so + */ + rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, + app->num_procs); + } } else if (ORTE_MAPPING_BYL3CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_CACHE, 3); + if (ORTE_ERR_NOT_SUPPORTED == rc) { + /* if the mapper couldn't map by this object because + * it isn't available, but the error allows us to try + * byslot, then do so + */ + rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, + app->num_procs); + } } else if (ORTE_MAPPING_BYSOCKET == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_SOCKET, 0); + if (ORTE_ERR_NOT_SUPPORTED == rc) { + /* if the mapper couldn't map by this object because + * it isn't available, but the error allows us to try + * byslot, then do so + */ + rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, + app->num_procs); + } } else if (ORTE_MAPPING_BYNUMA == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_NODE, 0); + if (ORTE_ERR_NOT_SUPPORTED == rc) { + /* if the mapper couldn't map by this object because + * it isn't available, but the error allows us to try + * byslot, then do so + */ + rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, + app->num_procs); + } #endif } else { /* unrecognized mapping directive */ diff --git a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c index d614ebb26a..6b0ed24a07 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c @@ -539,12 +539,42 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, "mca:rmaps:rr:byobj: nprocs-to-assign %d for %d objs on node %s", num_procs_to_assign, nobjs, node->name); /* if there are no objects of this type, then report the error * and abort - this can happen, for example, on systems that - * don't report "sockets" as an independent object + * don't report "sockets" as an independent object. However, IF + * this object is the default one - i.e., not specified by the + * user - then we can fall back to mapping by slot */ if (0 == nobjs) { - orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects", + if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { + orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects", true, hwloc_obj_type_string(target), node->name); - return ORTE_ERR_SILENT; + return ORTE_ERR_SILENT; + } else { + /* this was the default mapping policy, so clear the map + * of any prior work and indicate that map-by slot is reqd + */ + for (i=0; i < jdata->map->nodes->size; i++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) { + continue; + } + for (idx=0; idx < node->procs->size; idx++) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, idx))) { + continue; + } + if (proc->name.jobid != jdata->jobid) { + continue; + } + --node->num_procs; + OBJ_RELEASE(proc); + opal_pointer_array_set_item(node->procs, idx, NULL); + } + if (0 == node->num_procs) { + node->mapped = false; + OBJ_RELEASE(node); + opal_pointer_array_set_item(jdata->map->nodes, i, NULL); + } + } + return ORTE_ERR_NOT_SUPPORTED; + } } /* compute the number of procs to go on each object */