1
1

When doing comm-spawn, track the last object we bound to and ensure that we start the next job on the next object so we avoid overload situations when they aren't necessary

Этот коммит содержится в:
Ralph Castain 2015-06-17 09:20:08 -07:00
родитель 8ab2b11f88
Коммит 869b2891c4
4 изменённых файлов: 11 добавлений и 0 удалений

Просмотреть файл

@ -224,6 +224,8 @@ void orte_plm_base_recv(int status, orte_process_name_t* sender,
jdata->bookmark = parent->bookmark; jdata->bookmark = parent->bookmark;
} }
} }
/* provide the parent's last object */
jdata->bkmark_obj = parent->bkmark_obj;
/* launch it */ /* launch it */
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,

Просмотреть файл

@ -500,6 +500,11 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
"mca:rmaps:rr: found %u %s objects on node %s", "mca:rmaps:rr: found %u %s objects on node %s",
nobjs, hwloc_obj_type_string(target), node->name); nobjs, hwloc_obj_type_string(target), node->name);
/* if this is a comm_spawn situation, start with the object
* where the parent left off and increment */
if (ORTE_JOBID_INVALID != jdata->originator.jobid) {
start = (jdata->bkmark_obj + 1) % nobjs;
}
/* compute the number of procs to go on this node */ /* compute the number of procs to go on this node */
nprocs = (node->slots - node->slots_inuse) / orte_rmaps_base.cpus_per_rank; nprocs = (node->slots - node->slots_inuse) / orte_rmaps_base.cpus_per_rank;
opal_output_verbose(2, orte_rmaps_base_framework.framework_output, opal_output_verbose(2, orte_rmaps_base_framework.framework_output,

Просмотреть файл

@ -651,6 +651,7 @@ static void orte_job_construct(orte_job_t* job)
ORTE_GLOBAL_ARRAY_BLOCK_SIZE); ORTE_GLOBAL_ARRAY_BLOCK_SIZE);
job->map = NULL; job->map = NULL;
job->bookmark = NULL; job->bookmark = NULL;
job->bkmark_obj = 0;
job->state = ORTE_JOB_STATE_UNDEF; job->state = ORTE_JOB_STATE_UNDEF;
job->num_mapped = 0; job->num_mapped = 0;

Просмотреть файл

@ -333,6 +333,9 @@ typedef struct {
* indicates the node where we stopped * indicates the node where we stopped
*/ */
orte_node_t *bookmark; orte_node_t *bookmark;
/* if we are binding, bookmark the index of the
* last object we bound to */
unsigned int bkmark_obj;
/* state of the overall job */ /* state of the overall job */
orte_job_state_t state; orte_job_state_t state;
/* number of procs mapped */ /* number of procs mapped */