1
1

Ensure params are registered prior to parsing global cmd line options in orterun so that debugger options are properly captured and acted upon.

Ensure that routes to remote procs are set on the HNP before completing launch so that the debugger message can be sent. Solves a race condition that can exist in those environments where the HNP does not have local procs.

This commit was SVN r18674.
Этот коммит содержится в:
Ralph Castain 2008-06-19 02:58:14 +00:00
родитель 955d117f5e
Коммит b56f8ced4f
2 изменённых файлов: 23 добавлений и 1 удалений

Просмотреть файл

@ -483,6 +483,7 @@ void orte_plm_base_app_report_launch(int fd, short event, void *data)
pid_t pid;
orte_job_t *jdata;
orte_proc_t **procs;
orte_process_name_t proc;
int rc;
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
@ -517,6 +518,9 @@ void orte_plm_base_app_report_launch(int fd, short event, void *data)
}
procs = (orte_proc_t**)(jdata->procs->addr);
/* setup the process name */
proc.jobid = jobid;
/* the daemon will report the vpid, state, and pid of each
* process it launches - we need the pid in particular so
* that any debuggers can attach to the process
@ -549,6 +553,23 @@ void orte_plm_base_app_report_launch(int fd, short event, void *data)
goto CLEANUP;
}
/* it is possible for a race condition to exist when the HNP does not have
* local procs whereby the HNP will need to communicate to a remote
* proc before it decodes the launch message itself and sets all the routes.
* This has been seen in cases where no local procs are launched and
* a debugger needs to attach to the job.
* To support that situation, go ahead and update the route here
*/
proc.vpid = vpid;
/* if the sender is me, the route is direct to avoid infinite loops. We
* know the jobid is the same since the sender was another daemon
*/
if (mev->sender.vpid == ORTE_PROC_MY_NAME->vpid) {
orte_routed.update_route(&proc, &proc);
} else {
orte_routed.update_route(&proc, &mev->sender);
}
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:app_report_launched for proc %s from daemon %s: pid %lu state %0x exit %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),

Просмотреть файл

@ -345,7 +345,8 @@ int orterun(int argc, char *argv[])
orte_process_info.hnp = true;
/* Setup MCA params */
orte_register_params();
/* Check for some "global" command line params */
parse_globals(argc, argv, &cmd_line);
OBJ_DESTRUCT(&cmd_line);