Ensure that we don't bind-by-default in an oversubscribed condition
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
15b6eaf2d4
Коммит
2af677b1cf
@ -327,6 +327,16 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
||||
return;
|
||||
}
|
||||
|
||||
/* if any node is oversubscribed, then check to see if a binding
|
||||
* directive was given - if not, then we want to clear the default
|
||||
* binding policy so we don't attempt to bind */
|
||||
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED)) {
|
||||
if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
/* clear any default binding policy we might have set */
|
||||
OPAL_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
/* compute and save local ranks */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -230,6 +230,7 @@ static int mindist_map(orte_job_t *jdata)
|
||||
goto error;
|
||||
} else {
|
||||
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
|
||||
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -349,6 +350,7 @@ static int mindist_map(orte_job_t *jdata)
|
||||
if (nprocs_mapped == app->num_procs)
|
||||
break;
|
||||
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
|
||||
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
|
||||
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:mindist: second pass assigning %d extra procs to node %s",
|
||||
(int)num_procs_to_assign, node->name);
|
||||
|
@ -351,6 +351,7 @@ static int ppr_mapper(orte_job_t *jdata)
|
||||
* properly set
|
||||
*/
|
||||
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
|
||||
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
|
||||
/* check for permission */
|
||||
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
|
||||
/* if we weren't given a directive either way, then we will error out
|
||||
|
@ -299,6 +299,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
||||
* properly set
|
||||
*/
|
||||
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
|
||||
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
|
||||
}
|
||||
/* set the vpid */
|
||||
proc->name.vpid = rank;
|
||||
|
@ -840,6 +840,7 @@ static int map_to_ftgrps(orte_job_t *jdata)
|
||||
* properly set
|
||||
*/
|
||||
ORTE_FLAG_SET(nd, ORTE_NODE_FLAG_OVERSUBSCRIBED);
|
||||
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
|
||||
}
|
||||
|
||||
/* track number of procs mapped */
|
||||
|
@ -184,6 +184,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
|
||||
* properly set
|
||||
*/
|
||||
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
|
||||
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
|
||||
/* check for permission */
|
||||
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
|
||||
/* if we weren't given a directive either way, then we will error out
|
||||
@ -368,6 +369,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
|
||||
* properly set
|
||||
*/
|
||||
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
|
||||
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
|
||||
/* check for permission */
|
||||
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
|
||||
/* if we weren't given a directive either way, then we will error out
|
||||
@ -420,6 +422,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
|
||||
* properly set
|
||||
*/
|
||||
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
|
||||
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
|
||||
}
|
||||
if (nprocs_mapped == app->num_procs) {
|
||||
/* we are done */
|
||||
@ -588,6 +591,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
|
||||
* properly set
|
||||
*/
|
||||
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
|
||||
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
|
||||
/* check for permission */
|
||||
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
|
||||
/* if we weren't given a directive either way, then we will error out
|
||||
@ -745,6 +749,7 @@ static int byobj_span(orte_job_t *jdata,
|
||||
* properly set
|
||||
*/
|
||||
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
|
||||
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
|
||||
}
|
||||
if (nprocs_mapped == app->num_procs) {
|
||||
/* we are done */
|
||||
|
@ -412,6 +412,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
|
||||
* properly set
|
||||
*/
|
||||
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
|
||||
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
|
||||
/* check for permission */
|
||||
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
|
||||
/* if we weren't given a directive either way, then we will error out
|
||||
|
@ -88,6 +88,7 @@ typedef uint16_t orte_job_flags_t;
|
||||
#define ORTE_JOB_FLAG_RECOVERABLE 0x0100 // job is recoverable
|
||||
#define ORTE_JOB_FLAG_RESTART 0x0200 //
|
||||
#define ORTE_JOB_FLAG_PROCS_MIGRATING 0x0400 // some procs in job are migrating from one node to another
|
||||
#define ORTE_JOB_FLAG_OVERSUBSCRIBED 0x0800 // at least one node in the job is oversubscribed
|
||||
|
||||
|
||||
/*** JOB ATTRIBUTE KEYS ***/
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user