1
1

Allow debuggers to attach to a running mpirun by -always- setting up the MPIR_Proctable. Only wait for MPIR_Breakpoint and hold MPI proc

s if we are launching under a debugger.

This commit was SVN r19079.
Этот коммит содержится в:
Ralph Castain 2008-07-29 17:39:16 +00:00
родитель a4d905db4a
Коммит d45d728e8e
2 изменённых файлов: 25 добавлений и 24 удалений

Просмотреть файл

@ -392,10 +392,11 @@ void orte_debugger_init_before_spawn(orte_job_t *jdata)
/**
* Initialization of data structures for running under a debugger
* using the MPICH/TotalView parallel debugger interface. This stage
* of initialization must occur after stage2 of spawn and is invoked
* via a callback.
* of initialization must occur after spawn
*
* @param jobid The jobid returned by spawn.
* NOTE: We -always- perform this step to ensure that any debugger
* that attaches to us post-launch of the application can get a
* completed proctable
*/
void orte_debugger_init_after_spawn(orte_job_t *jdata)
{
@ -406,11 +407,6 @@ void orte_debugger_init_after_spawn(orte_job_t *jdata)
orte_process_name_t rank0;
int rc;
if (!MPIR_being_debugged) {
/* not being debugged */
return;
}
if (MPIR_proctable) {
/* already initialized */
return;
@ -462,22 +458,27 @@ void orte_debugger_init_after_spawn(orte_job_t *jdata)
dump();
}
/* wait for all procs to have reported their contact info - this
* ensures that (a) they are all into mpi_init, and (b) the system
* has the contact info to successfully send a message to rank=0
/* if we are being launched under a debugger, then we must wait
* for it to be ready to go and do some things to start the job
*/
ORTE_PROGRESSED_WAIT(false, jdata->num_reported, jdata->num_procs);
(void) MPIR_Breakpoint();
/* send a message to rank=0 to release it */
OBJ_CONSTRUCT(&buf, opal_buffer_t); /* don't need anything in this */
rank0.jobid = jdata->jobid;
rank0.vpid = 0;
if (0 > (rc = orte_rml.send_buffer(&rank0, &buf, ORTE_RML_TAG_DEBUGGER_RELEASE, 0))) {
opal_output(0, "Error: could not send debugger release to MPI procs - error %s", ORTE_ERROR_NAME(rc));
if (MPIR_being_debugged) {
/* wait for all procs to have reported their contact info - this
* ensures that (a) they are all into mpi_init, and (b) the system
* has the contact info to successfully send a message to rank=0
*/
ORTE_PROGRESSED_WAIT(false, jdata->num_reported, jdata->num_procs);
(void) MPIR_Breakpoint();
/* send a message to rank=0 to release it */
OBJ_CONSTRUCT(&buf, opal_buffer_t); /* don't need anything in this */
rank0.jobid = jdata->jobid;
rank0.vpid = 0;
if (0 > (rc = orte_rml.send_buffer(&rank0, &buf, ORTE_RML_TAG_DEBUGGER_RELEASE, 0))) {
opal_output(0, "Error: could not send debugger release to MPI procs - error %s", ORTE_ERROR_NAME(rc));
}
OBJ_DESTRUCT(&buf);
}
OBJ_DESTRUCT(&buf);
}

Просмотреть файл

@ -543,7 +543,7 @@ int orterun(int argc, char *argv[])
opal_signal_add(&sigusr2_handler, NULL);
#endif /* __WINDOWS__ */
/* setup for debugging, if we are doing so */
/* setup for debugging */
orte_debugger_init_before_spawn(jdata);
/* setup an event we can wait for that will tell
@ -562,7 +562,7 @@ int orterun(int argc, char *argv[])
/* Spawn the job */
rc = orte_plm.spawn(jdata);
/* complete debugger interface, if we are debugging */
/* complete debugger interface */
orte_debugger_init_after_spawn(jdata);
/* now wait until the termination event fires */