diff --git a/orte/mca/rmaps/base/help-orte-rmaps-base.txt b/orte/mca/rmaps/base/help-orte-rmaps-base.txt index cf08ce7e0a..259cdffaf6 100644 --- a/orte/mca/rmaps/base/help-orte-rmaps-base.txt +++ b/orte/mca/rmaps/base/help-orte-rmaps-base.txt @@ -250,15 +250,15 @@ situation and try again. # [mapping-too-low] A request for multiple cpus-per-proc was given, but a directive -was also give to map to an object level that is unlikely to -have multiple cpus underneath it: +was also give to map to an object level that has less cpus than +requested ones: #cpus-per-proc: %d + number of cpus: %d map-by: %s -Please specify a mapping level that is no lower than socket, or -else let us define a default mapping that will allow multiple -cpus-per-proc. +Please specify a mapping level that has more cpus, or else let us +define a default mapping that will allow multiple cpus-per-proc. # [unrecognized-modifier] The mapping request contains an unrecognized modifier: diff --git a/orte/mca/rmaps/base/rmaps_base_frame.c b/orte/mca/rmaps/base/rmaps_base_frame.c index 1571a4820f..c0fe3dac37 100644 --- a/orte/mca/rmaps/base/rmaps_base_frame.c +++ b/orte/mca/rmaps/base/rmaps_base_frame.c @@ -379,15 +379,6 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags) } if (1 < orte_rmaps_base.cpus_per_rank) { - /* check to see if we were told to map at too low a level */ - if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && - ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) > ORTE_MAPPING_BYSOCKET && - ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) < ORTE_MAPPING_BYSLOT) { - orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low", true, - orte_rmaps_base.cpus_per_rank, - orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); - return ORTE_ERR_SILENT; - } #if OPAL_HAVE_HWLOC /* if we were asked for multiple cpus/proc, then we have to * bind to those cpus - any other binding policy is an diff --git a/orte/mca/rmaps/round_robin/rmaps_rr.c b/orte/mca/rmaps/round_robin/rmaps_rr.c index acd70d8fb5..817dcb281c 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr.c @@ -150,6 +150,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) * it isn't available, but the error allows us to try * byslot, then do so */ + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } @@ -161,6 +162,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) * it isn't available, but the error allows us to try * byslot, then do so */ + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } @@ -172,6 +174,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) * it isn't available, but the error allows us to try * byslot, then do so */ + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } @@ -183,6 +186,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) * it isn't available, but the error allows us to try * byslot, then do so */ + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } @@ -194,6 +198,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) * it isn't available, but the error allows us to try * byslot, then do so */ + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } @@ -205,6 +210,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) * it isn't available, but the error allows us to try * byslot, then do so */ + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } @@ -216,6 +222,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) * it isn't available, but the error allows us to try * byslot, then do so */ + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } diff --git a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c index 3aedbeb1b4..d49c88d475 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c @@ -601,6 +601,12 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } + if (orte_rmaps_base.cpus_per_rank > (orte_vpid_t)opal_hwloc_base_get_npus(node->topology, obj)) { + orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low", true, + orte_rmaps_base.cpus_per_rank, opal_hwloc_base_get_npus(node->topology, obj), + orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); + return ORTE_ERR_SILENT; + } if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) { return ORTE_ERR_OUT_OF_RESOURCE; } @@ -745,6 +751,12 @@ static int byobj_span(orte_job_t *jdata, ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } + if (orte_rmaps_base.cpus_per_rank > (orte_vpid_t)opal_hwloc_base_get_npus(node->topology, obj)) { + orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low", true, + orte_rmaps_base.cpus_per_rank, opal_hwloc_base_get_npus(node->topology, obj), + orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); + return ORTE_ERR_SILENT; + } /* determine how many to map */ nprocs = navg; if (0 < nxtra_objs) {