1
1

Cleanup our handling of VMs to ensure daemons don't get mapped when operating with a VM.

Have each mapper flag it did the map so we can see who did it later.

Ensure procs are flagged as "ready to launch".

This commit was SVN r24406.
Этот коммит содержится в:
Ralph Castain 2011-02-16 23:01:57 +00:00
родитель 3f4d4886f2
Коммит 65ba6af44d
8 изменённых файлов: 44 добавлений и 21 удалений

Просмотреть файл

@ -120,8 +120,10 @@ int orte_rmaps_base_map_job(orte_job_t *jdata)
return rc; return rc;
} }
} }
/* if we get here without doing the map, then that's an error */ /* if we get here without doing the map, or with zero procs in
if (!did_map) { * the map, then that's an error
*/
if (!did_map || 0 == jdata->num_procs) {
orte_show_help("help-orte-rmaps-base.txt", "failed-map", true); orte_show_help("help-orte-rmaps-base.txt", "failed-map", true);
return ORTE_ERR_FAILED_TO_MAP; return ORTE_ERR_FAILED_TO_MAP;
} }

Просмотреть файл

@ -372,6 +372,8 @@ int orte_rmaps_base_claim_slot(orte_job_t *jdata,
} }
/* set the jobid */ /* set the jobid */
proc->name.jobid = jdata->jobid; proc->name.jobid = jdata->jobid;
/* flag the proc as ready for launch */
proc->state = ORTE_PROC_STATE_INIT;
/* we do not set the vpid here - this will be done /* we do not set the vpid here - this will be done
* during a second phase * during a second phase
*/ */
@ -673,8 +675,9 @@ retry_lr:
} }
int orte_rmaps_base_define_daemons(orte_job_map_t *map) int orte_rmaps_base_define_daemons(orte_job_t *jdata)
{ {
orte_job_map_t *map;
orte_node_t *node; orte_node_t *node;
orte_proc_t *proc; orte_proc_t *proc;
orte_job_t *daemons; orte_job_t *daemons;
@ -685,6 +688,13 @@ int orte_rmaps_base_define_daemons(orte_job_map_t *map)
"%s rmaps:base:define_daemons", "%s rmaps:base:define_daemons",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (ORTE_MAPPING_USE_VM & jdata->map->policy) {
/* nothing for us to do - all daemons are
* defined by definition!
*/
return ORTE_SUCCESS;
}
/* get the daemon job data struct */ /* get the daemon job data struct */
if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_HNP->jobid))) { if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_HNP->jobid))) {
/* bad news */ /* bad news */
@ -693,6 +703,7 @@ int orte_rmaps_base_define_daemons(orte_job_map_t *map)
} }
/* initialize the #new daemons */ /* initialize the #new daemons */
map = jdata->map;
map->num_new_daemons = 0; map->num_new_daemons = 0;
/* go through the nodes in the map, checking each one's daemon name /* go through the nodes in the map, checking each one's daemon name

Просмотреть файл

@ -88,7 +88,7 @@ ORTE_DECLSPEC void orte_rmaps_base_update_local_ranks(orte_job_t *jdata, orte_no
ORTE_DECLSPEC int orte_rmaps_base_rearrange_map(orte_app_context_t *app, orte_job_map_t *map, opal_list_t *procs); ORTE_DECLSPEC int orte_rmaps_base_rearrange_map(orte_app_context_t *app, orte_job_map_t *map, opal_list_t *procs);
ORTE_DECLSPEC int orte_rmaps_base_define_daemons(orte_job_map_t *map); ORTE_DECLSPEC int orte_rmaps_base_define_daemons(orte_job_t *jdata);
ORTE_DECLSPEC int orte_rmaps_base_setup_virtual_machine(orte_job_t *jdata); ORTE_DECLSPEC int orte_rmaps_base_setup_virtual_machine(orte_job_t *jdata);

Просмотреть файл

@ -71,10 +71,13 @@ static int switchyard(orte_job_t *jdata)
ORTE_JOBID_PRINT(jdata->jobid)); ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION; return ORTE_ERR_TAKE_NEXT_OPTION;
} }
opal_output_verbose(5, orte_rmaps_base.rmaps_output, opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:loadbalance: mapping job %s", "mca:rmaps:loadbalance: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid)); ORTE_JOBID_PRINT(jdata->jobid));
/* flag that I did the mapping */
jdata->map->mapper = ORTE_RMAPS_LOADBALANCE;
if (0 < orte_rmaps_base.npernode) { if (0 < orte_rmaps_base.npernode) {
rc = npernode(jdata); rc = npernode(jdata);
@ -97,7 +100,7 @@ static int switchyard(orte_job_t *jdata)
} }
/* define the daemons that we will use for this job */ /* define the daemons that we will use for this job */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata->map))) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }

Просмотреть файл

@ -308,10 +308,13 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
ORTE_JOBID_PRINT(jdata->jobid)); ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION; return ORTE_ERR_TAKE_NEXT_OPTION;
} }
opal_output_verbose(5, orte_rmaps_base.rmaps_output, opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rank_file: mapping job %s", "mca:rmaps:rank_file: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid)); ORTE_JOBID_PRINT(jdata->jobid));
/* flag that I did the mapping */
jdata->map->mapper = ORTE_RMAPS_RF;
/* convenience def */ /* convenience def */
map = jdata->map; map = jdata->map;
@ -597,7 +600,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
} }
/* define the daemons that we will use for this job */ /* define the daemons that we will use for this job */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(map))) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }

Просмотреть файл

@ -86,6 +86,8 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
"mca:rmaps:resilient: mapping job %s", "mca:rmaps:resilient: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid)); ORTE_JOBID_PRINT(jdata->jobid));
/* flag that I did the mapping */
jdata->map->mapper = ORTE_RMAPS_RESILIENT;
/* have we already constructed the fault group list? */ /* have we already constructed the fault group list? */
if (!have_ftgrps) { if (!have_ftgrps) {
@ -217,12 +219,10 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
*/ */
orte_rmaps_base_update_local_ranks(jdata, oldnode, nd, proc); orte_rmaps_base_update_local_ranks(jdata, oldnode, nd, proc);
} }
if (!(ORTE_MAPPING_USE_VM & jdata->map->policy)) { /* define the daemons that we will use for this job */
/* define the daemons that we will use for this job */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata))) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata->map))) { ORTE_ERROR_LOG(rc);
ORTE_ERROR_LOG(rc); return rc;
return rc;
}
} }
error: error:
@ -746,12 +746,10 @@ static int map_to_ftgrps(orte_job_t *jdata)
return rc; return rc;
} }
if (!(ORTE_MAPPING_USE_VM & jdata->map->policy)) { /* define the daemons that we will use for this job */
/* define the daemons that we will use for this job */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata))) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata->map))) { ORTE_ERROR_LOG(rc);
ORTE_ERROR_LOG(rc); return rc;
return rc;
}
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;

Просмотреть файл

@ -77,6 +77,9 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
"mca:rmaps:rr: mapping job %s", "mca:rmaps:rr: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid)); ORTE_JOBID_PRINT(jdata->jobid));
/* flag that I did the mapping */
jdata->map->mapper = ORTE_RMAPS_RR;
/* start at the beginning... */ /* start at the beginning... */
jdata->num_procs = 0; jdata->num_procs = 0;
@ -155,7 +158,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
} }
/* define the daemons that we will use for this job */ /* define the daemons that we will use for this job */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata->map))) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }

Просмотреть файл

@ -93,10 +93,13 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
ORTE_JOBID_PRINT(jdata->jobid)); ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION; return ORTE_ERR_TAKE_NEXT_OPTION;
} }
opal_output_verbose(5, orte_rmaps_base.rmaps_output, opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:seq: mapping job %s", "mca:rmaps:seq: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid)); ORTE_JOBID_PRINT(jdata->jobid));
/* flag that I did the mapping */
jdata->map->mapper = ORTE_RMAPS_SEQ;
/* conveniece def */ /* conveniece def */
map = jdata->map; map = jdata->map;
@ -247,7 +250,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
} }
/* define the daemons that we will use for this job */ /* define the daemons that we will use for this job */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(map))) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }