diff --git a/opal/mca/hwloc/hwloc.h b/opal/mca/hwloc/hwloc.h index a074be86e0..c8ba54ef14 100644 --- a/opal/mca/hwloc/hwloc.h +++ b/opal/mca/hwloc/hwloc.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * @@ -191,7 +191,7 @@ typedef uint16_t opal_binding_policy_t; #define OPAL_GET_BINDING_POLICY(pol) \ ((pol) & 0x0fff) #define OPAL_SET_BINDING_POLICY(target, pol) \ - (target) = (pol) | (((target) & 0xf000) | OPAL_BIND_GIVEN) + (target) = (pol) | (((target) & 0x2000) | OPAL_BIND_GIVEN) #define OPAL_SET_DEFAULT_BINDING_POLICY(target, pol) \ do { \ if (!OPAL_BINDING_POLICY_IS_SET((target))) { \ diff --git a/orte/mca/rmaps/base/rmaps_base_frame.c b/orte/mca/rmaps/base/rmaps_base_frame.c index c1b03e8890..f3e55b5f33 100644 --- a/orte/mca/rmaps/base/rmaps_base_frame.c +++ b/orte/mca/rmaps/base/rmaps_base_frame.c @@ -12,7 +12,7 @@ * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -149,7 +149,7 @@ static int orte_rmaps_base_register(mca_base_register_flag_t flags) MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_bynode); /* #cpus/rank to use */ - orte_rmaps_base.cpus_per_rank = 1; + orte_rmaps_base.cpus_per_rank = 0; var_id = mca_base_var_register("orte", "rmaps", "base", "cpus_per_proc", "Number of cpus to use for each rank [1-2**15 (default=1)]", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, @@ -280,7 +280,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags) return ORTE_ERR_SILENT; } } - if (1 < orte_rmaps_base.cpus_per_rank) { + if (0 < orte_rmaps_base.cpus_per_rank) { orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, "--cpus-per-proc, -cpus-per-proc, --cpus-per-rank, -cpus-per-rank", "--map-by :PE=N, default =NUMA", @@ -376,8 +376,8 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags) ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN); } - if (1 < orte_rmaps_base.cpus_per_rank) { - /* if we were asked for multiple cpus/proc, then we have to + if (0 < orte_rmaps_base.cpus_per_rank) { + /* if we were asked for cpus/proc, then we have to * bind to those cpus - any other binding policy is an * error */ @@ -403,24 +403,27 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags) if (opal_hwloc_use_hwthreads_as_cpus) { OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_HWTHREAD); } else { + opal_output(0, "SETTING BINDING TO CORE"); OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE); } } - /* we also need to ensure we are mapping to a high-enough level to have - * multiple cpus beneath it - by default, we'll go to the NUMA level */ - if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { - if (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) == ORTE_MAPPING_BYHWTHREAD || - (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) == ORTE_MAPPING_BYCORE && - !opal_hwloc_use_hwthreads_as_cpus)) { - orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low-init", true); - return ORTE_ERR_SILENT; + if (1 < orte_rmaps_base.cpus_per_rank) { + /* we need to ensure we are mapping to a high-enough level to have + * multiple cpus beneath it - by default, we'll go to the NUMA level */ + if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { + if (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) == ORTE_MAPPING_BYHWTHREAD || + (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) == ORTE_MAPPING_BYCORE && + !opal_hwloc_use_hwthreads_as_cpus)) { + orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low-init", true); + return ORTE_ERR_SILENT; + } + } else { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "%s rmaps:base pe/rank set - setting mapping to BYNUMA", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNUMA); + ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); } - } else { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "%s rmaps:base pe/rank set - setting mapping to BYNUMA", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNUMA); - ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); } } diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c index b7cfcd7cc7..baa04eb57b 100644 --- a/orte/mca/rmaps/base/rmaps_base_map_job.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -50,8 +50,8 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) { orte_job_t *jdata; orte_node_t *node; - int rc, i; - bool did_map, given; + int rc, i, ppx; + bool did_map, given, pernode; orte_rmaps_base_selected_module_t *mod; orte_job_t *parent; orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; @@ -71,6 +71,22 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) "mca:rmaps: mapping job %s", ORTE_JOBID_PRINT(jdata->jobid)); + if (NULL == jdata->map->ppr && NULL != orte_rmaps_base.ppr) { + jdata->map->ppr = strdup(orte_rmaps_base.ppr); + } + if (NULL != jdata->map->ppr) { + /* get the procs/object */ + ppx = strtoul(jdata->map->ppr, NULL, 10); + if (NULL != strstr(jdata->map->ppr, "node")) { + pernode = true; + } else { + pernode = false; + } + } + if (0 == jdata->map->cpus_per_rank) { + jdata->map->cpus_per_rank = orte_rmaps_base.cpus_per_rank; + } + /* compute the number of procs and check validity */ nprocs = 0; for (i=0; i < jdata->apps->size; i++) { @@ -80,34 +96,47 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) orte_std_cntr_t slots; OBJ_CONSTRUCT(&nodes, opal_list_t); orte_rmaps_base_get_target_nodes(&nodes, &slots, app, ORTE_MAPPING_BYNODE, true, true); - /* if we are in a managed allocation, then all is good - otherwise, - * we have to do a little more checking */ - if (!orte_managed_allocation) { - /* if all the nodes have their slots given, then we are okay */ - given = true; - OPAL_LIST_FOREACH(node, &nodes, orte_node_t) { - if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) { - given = false; - break; + if (NULL != jdata->map->ppr) { + if (pernode) { + nprocs += ppx * opal_list_get_size(&nodes); + } else { + /* must be procs/socket, so add in #sockets for each node */ + slots = 0; + OPAL_LIST_FOREACH(node, &nodes, orte_node_t) { + slots += ppx * opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, + HWLOC_OBJ_SOCKET, 0, + OPAL_HWLOC_AVAILABLE); + } + nprocs += slots; + } + } else { + /* if we are in a managed allocation, then all is good - otherwise, + * we have to do a little more checking */ + if (!orte_managed_allocation) { + /* if all the nodes have their slots given, then we are okay */ + given = true; + OPAL_LIST_FOREACH(node, &nodes, orte_node_t) { + if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) { + given = false; + break; + } + } + /* if -host or -hostfile was given, and the slots were not, + * then this is no longer allowed */ + if (!given && + (orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, NULL, OPAL_STRING) || + orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, NULL, OPAL_STRING))) { + /* inform the user of the error */ + orte_show_help("help-orte-rmaps-base.txt", "num-procs-not-specified", true); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); + OBJ_RELEASE(caddy); + OPAL_LIST_DESTRUCT(&nodes); + return; } } - /* if -host or -hostfile was given, and the slots were not, - * then this is no longer allowed */ - if (!given && - (orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, NULL, OPAL_STRING) || - orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, NULL, OPAL_STRING))) { - /* inform the user of the error */ - orte_show_help("help-orte-rmaps-base.txt", "num-procs-not-specified", true); - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); - OBJ_RELEASE(caddy); - OPAL_LIST_DESTRUCT(&nodes); - return; - } - } - OPAL_LIST_DESTRUCT(&nodes); - if (ORTE_MAPPING_PPR != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { nprocs += slots; } + OPAL_LIST_DESTRUCT(&nodes); } else { nprocs += app->num_procs; } @@ -116,8 +145,8 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps: setting mapping policies for job %s", - ORTE_JOBID_PRINT(jdata->jobid)); + "mca:rmaps: setting mapping policies for job %s nprocs %d", + ORTE_JOBID_PRINT(jdata->jobid), (int)nprocs); if (!jdata->map->display_map) { jdata->map->display_map = orte_rmaps_base.display_map; @@ -187,13 +216,6 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) jdata->map->ranking = orte_rmaps_base.ranking; } - if (NULL == jdata->map->ppr && NULL != orte_rmaps_base.ppr) { - jdata->map->ppr = strdup(orte_rmaps_base.ppr); - } - if (0 == jdata->map->cpus_per_rank) { - jdata->map->cpus_per_rank = orte_rmaps_base.cpus_per_rank; - } - /* define the binding policy for this job - if the user specified one * already (e.g., during the call to comm_spawn), then we don't * override it */ @@ -205,7 +227,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps[%d] binding policy given", __LINE__); jdata->map->binding = opal_hwloc_binding_policy; - } else if (1 < jdata->map->cpus_per_rank) { + } else if (0 < jdata->map->cpus_per_rank) { /* bind to cpus */ if (opal_hwloc_use_hwthreads_as_cpus) { /* if we are using hwthread cpus, then bind to those */ diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 16b6a75376..29470035c4 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -833,7 +833,7 @@ static void orte_job_map_construct(orte_job_map_t* map) map->ranking = 0; map->binding = 0; map->ppr = NULL; - map->cpus_per_rank = 1; + map->cpus_per_rank = 0; map->display_map = false; map->num_new_daemons = 0; map->daemon_vpid_start = ORTE_VPID_INVALID;