1
1

Control inheritance of launch directives by child jobs

Do not have child jobs inherit launch directives unless requested to do so. This affects the map-by, rank-by, bind-to, npernode, pernode, npersocket, persocket, and cpus-per-rank directives. Values provided in the spawn call always take precedence - if a particular value isn't specified, then the ORTE defaults will be used if inheritance is not requested, and the values specified by MCA param will be used if inheritance is set.

Always inherit oversubscribe for now as otherwise MTT will break

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2018-07-10 12:22:18 -07:00
родитель f92daa6494
Коммит 6b6e63a346
3 изменённых файлов: 40 добавлений и 23 удалений

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. * Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -71,6 +71,8 @@ typedef struct {
orte_ranking_policy_t ranking; orte_ranking_policy_t ranking;
/* device specification for min distance mapping */ /* device specification for min distance mapping */
char *device; char *device;
/* whether or not child jobs should inherit launch directives */
bool inherit;
} orte_rmaps_base_t; } orte_rmaps_base_t;
/** /**

Просмотреть файл

@ -69,6 +69,7 @@ static bool rmaps_base_display_devel_map = false;
static bool rmaps_base_display_diffable_map = false; static bool rmaps_base_display_diffable_map = false;
static char *rmaps_base_topo_file = NULL; static char *rmaps_base_topo_file = NULL;
static char *rmaps_dist_device = NULL; static char *rmaps_dist_device = NULL;
static bool rmaps_base_inherit = false;
static int orte_rmaps_base_register(mca_base_register_flag_t flags) static int orte_rmaps_base_register(mca_base_register_flag_t flags)
{ {
@ -223,6 +224,12 @@ static int orte_rmaps_base_register(mca_base_register_flag_t flags)
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_topo_file); MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_topo_file);
rmaps_base_inherit = false;
(void) mca_base_var_register("orte", "rmaps", "base", "inherit",
"Whether child jobs shall inherit launch directives",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_inherit);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -254,6 +261,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
orte_rmaps_base.mapping = 0; orte_rmaps_base.mapping = 0;
orte_rmaps_base.ranking = 0; orte_rmaps_base.ranking = 0;
orte_rmaps_base.device = NULL; orte_rmaps_base.device = NULL;
orte_rmaps_base.inherit = rmaps_base_inherit;
/* if a topology file was given, then set our topology /* if a topology file was given, then set our topology
* from it. Even though our actual topology may differ, * from it. Even though our actual topology may differ,

Просмотреть файл

@ -54,6 +54,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
orte_job_t *parent; orte_job_t *parent;
orte_vpid_t nprocs; orte_vpid_t nprocs;
orte_app_context_t *app; orte_app_context_t *app;
bool inherit = false;
ORTE_ACQUIRE_OBJECT(caddy); ORTE_ACQUIRE_OBJECT(caddy);
jdata = caddy->jdata; jdata = caddy->jdata;
@ -64,32 +65,36 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
"mca:rmaps: mapping job %s", "mca:rmaps: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid)); ORTE_JOBID_PRINT(jdata->jobid));
if (NULL == jdata->map->ppr && NULL != orte_rmaps_base.ppr) { /* if this is a dynamic job launch and they didn't explicitly
jdata->map->ppr = strdup(orte_rmaps_base.ppr); * request inheritance, then don't inherit the launch directives */
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCH_PROXY, NULL, OPAL_NAME)) {
inherit = orte_rmaps_base.inherit;
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps: dynamic job %s %s inherit launch directives",
ORTE_JOBID_PRINT(jdata->jobid),
inherit ? "will" : "will not");
} else {
/* initial launch always takes on MCA params */
inherit = true;
}
if (inherit) {
if (NULL == jdata->map->ppr && NULL != orte_rmaps_base.ppr) {
jdata->map->ppr = strdup(orte_rmaps_base.ppr);
}
if (0 == jdata->map->cpus_per_rank) {
jdata->map->cpus_per_rank = orte_rmaps_base.cpus_per_rank;
}
} }
if (NULL != jdata->map->ppr) { if (NULL != jdata->map->ppr) {
/* get the procs/object */ /* get the procs/object */
ppx = strtoul(jdata->map->ppr, NULL, 10); ppx = strtoul(jdata->map->ppr, NULL, 10);
if (NULL != strstr(jdata->map->ppr, "node")) { if (NULL != strstr(jdata->map->ppr, "node")) {
pernode = true; pernode = true;
} else { } else if (NULL != strstr(jdata->map->ppr, "socket")) {
pernode = false;
}
} else {
if (orte_rmaps_base_pernode) {
ppx = 1;
pernode = true;
} else if (0 < orte_rmaps_base_n_pernode) {
ppx = orte_rmaps_base_n_pernode;
pernode = true;
} else if (0 < orte_rmaps_base_n_persocket) {
ppx = orte_rmaps_base_n_persocket;
persocket = true; persocket = true;
} }
} }
if (0 == jdata->map->cpus_per_rank) {
jdata->map->cpus_per_rank = orte_rmaps_base.cpus_per_rank;
}
/* compute the number of procs and check validity */ /* compute the number of procs and check validity */
nprocs = 0; nprocs = 0;
@ -151,12 +156,13 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
"mca:rmaps: setting mapping policies for job %s nprocs %d", "mca:rmaps: setting mapping policies for job %s nprocs %d",
ORTE_JOBID_PRINT(jdata->jobid), (int)nprocs); ORTE_JOBID_PRINT(jdata->jobid), (int)nprocs);
if (!jdata->map->display_map) { if (inherit && !jdata->map->display_map) {
jdata->map->display_map = orte_rmaps_base.display_map; jdata->map->display_map = orte_rmaps_base.display_map;
} }
/* set the default mapping policy IFF it wasn't provided */ /* set the default mapping policy IFF it wasn't provided */
if (!ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { if (!ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { if (inherit && (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output, opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps mapping given by MCA param"); "mca:rmaps mapping given by MCA param");
jdata->map->mapping = orte_rmaps_base.mapping; jdata->map->mapping = orte_rmaps_base.mapping;
@ -216,12 +222,13 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
/* check for no-use-local directive */ /* check for no-use-local directive */
if (!(ORTE_MAPPING_LOCAL_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) { if (!(ORTE_MAPPING_LOCAL_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
if (ORTE_MAPPING_NO_USE_LOCAL & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { if (inherit && (ORTE_MAPPING_NO_USE_LOCAL & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL); ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL);
} }
} }
/* ditto for rank policy */ /* we don't have logic to determine default rank policy, so
* just inherit it if they didn't give us one */
if (!ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) { if (!ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) {
jdata->map->ranking = orte_rmaps_base.ranking; jdata->map->ranking = orte_rmaps_base.ranking;
} }
@ -230,7 +237,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
* already (e.g., during the call to comm_spawn), then we don't * already (e.g., during the call to comm_spawn), then we don't
* override it */ * override it */
if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) { if (inherit && OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
/* if the user specified a default binding policy via /* if the user specified a default binding policy via
* MCA param, then we use it - this can include a directive * MCA param, then we use it - this can include a directive
* to overload */ * to overload */