1
1

Ensure that we don't bind-by-default in an oversubscribed condition

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2016-12-15 07:58:52 -08:00
родитель 15b6eaf2d4
Коммит 2af677b1cf
8 изменённых файлов: 22 добавлений и 0 удалений

Просмотреть файл

@ -327,6 +327,16 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
return;
}
/* if any node is oversubscribed, then check to see if a binding
* directive was given - if not, then we want to clear the default
* binding policy so we don't attempt to bind */
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED)) {
if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
/* clear any default binding policy we might have set */
OPAL_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
}
}
/* compute and save local ranks */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -230,6 +230,7 @@ static int mindist_map(orte_job_t *jdata)
goto error;
} else {
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
}
}
}
@ -349,6 +350,7 @@ static int mindist_map(orte_job_t *jdata)
if (nprocs_mapped == app->num_procs)
break;
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
"mca:rmaps:mindist: second pass assigning %d extra procs to node %s",
(int)num_procs_to_assign, node->name);

Просмотреть файл

@ -351,6 +351,7 @@ static int ppr_mapper(orte_job_t *jdata)
* properly set
*/
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
/* check for permission */
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
/* if we weren't given a directive either way, then we will error out

Просмотреть файл

@ -299,6 +299,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
* properly set
*/
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
}
/* set the vpid */
proc->name.vpid = rank;

Просмотреть файл

@ -840,6 +840,7 @@ static int map_to_ftgrps(orte_job_t *jdata)
* properly set
*/
ORTE_FLAG_SET(nd, ORTE_NODE_FLAG_OVERSUBSCRIBED);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
}
/* track number of procs mapped */

Просмотреть файл

@ -184,6 +184,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
* properly set
*/
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
/* check for permission */
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
/* if we weren't given a directive either way, then we will error out
@ -368,6 +369,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
* properly set
*/
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
/* check for permission */
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
/* if we weren't given a directive either way, then we will error out
@ -420,6 +422,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
* properly set
*/
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
}
if (nprocs_mapped == app->num_procs) {
/* we are done */
@ -588,6 +591,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
* properly set
*/
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
/* check for permission */
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
/* if we weren't given a directive either way, then we will error out
@ -745,6 +749,7 @@ static int byobj_span(orte_job_t *jdata,
* properly set
*/
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
}
if (nprocs_mapped == app->num_procs) {
/* we are done */

Просмотреть файл

@ -412,6 +412,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
* properly set
*/
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
/* check for permission */
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
/* if we weren't given a directive either way, then we will error out

Просмотреть файл

@ -88,6 +88,7 @@ typedef uint16_t orte_job_flags_t;
#define ORTE_JOB_FLAG_RECOVERABLE 0x0100 // job is recoverable
#define ORTE_JOB_FLAG_RESTART 0x0200 //
#define ORTE_JOB_FLAG_PROCS_MIGRATING 0x0400 // some procs in job are migrating from one node to another
#define ORTE_JOB_FLAG_OVERSUBSCRIBED 0x0800 // at least one node in the job is oversubscribed
/*** JOB ATTRIBUTE KEYS ***/