Allow debuggers to attach to a running mpirun by -always- setting up the MPIR_Proctable. Only wait for MPIR_Breakpoint and hold MPI proc
s if we are launching under a debugger. This commit was SVN r19079.
Этот коммит содержится в:
родитель
a4d905db4a
Коммит
d45d728e8e
@ -392,10 +392,11 @@ void orte_debugger_init_before_spawn(orte_job_t *jdata)
|
|||||||
/**
|
/**
|
||||||
* Initialization of data structures for running under a debugger
|
* Initialization of data structures for running under a debugger
|
||||||
* using the MPICH/TotalView parallel debugger interface. This stage
|
* using the MPICH/TotalView parallel debugger interface. This stage
|
||||||
* of initialization must occur after stage2 of spawn and is invoked
|
* of initialization must occur after spawn
|
||||||
* via a callback.
|
|
||||||
*
|
*
|
||||||
* @param jobid The jobid returned by spawn.
|
* NOTE: We -always- perform this step to ensure that any debugger
|
||||||
|
* that attaches to us post-launch of the application can get a
|
||||||
|
* completed proctable
|
||||||
*/
|
*/
|
||||||
void orte_debugger_init_after_spawn(orte_job_t *jdata)
|
void orte_debugger_init_after_spawn(orte_job_t *jdata)
|
||||||
{
|
{
|
||||||
@ -406,11 +407,6 @@ void orte_debugger_init_after_spawn(orte_job_t *jdata)
|
|||||||
orte_process_name_t rank0;
|
orte_process_name_t rank0;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
if (!MPIR_being_debugged) {
|
|
||||||
/* not being debugged */
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (MPIR_proctable) {
|
if (MPIR_proctable) {
|
||||||
/* already initialized */
|
/* already initialized */
|
||||||
return;
|
return;
|
||||||
@ -462,22 +458,27 @@ void orte_debugger_init_after_spawn(orte_job_t *jdata)
|
|||||||
dump();
|
dump();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* wait for all procs to have reported their contact info - this
|
/* if we are being launched under a debugger, then we must wait
|
||||||
* ensures that (a) they are all into mpi_init, and (b) the system
|
* for it to be ready to go and do some things to start the job
|
||||||
* has the contact info to successfully send a message to rank=0
|
|
||||||
*/
|
*/
|
||||||
ORTE_PROGRESSED_WAIT(false, jdata->num_reported, jdata->num_procs);
|
if (MPIR_being_debugged) {
|
||||||
|
/* wait for all procs to have reported their contact info - this
|
||||||
(void) MPIR_Breakpoint();
|
* ensures that (a) they are all into mpi_init, and (b) the system
|
||||||
|
* has the contact info to successfully send a message to rank=0
|
||||||
/* send a message to rank=0 to release it */
|
*/
|
||||||
OBJ_CONSTRUCT(&buf, opal_buffer_t); /* don't need anything in this */
|
ORTE_PROGRESSED_WAIT(false, jdata->num_reported, jdata->num_procs);
|
||||||
rank0.jobid = jdata->jobid;
|
|
||||||
rank0.vpid = 0;
|
(void) MPIR_Breakpoint();
|
||||||
if (0 > (rc = orte_rml.send_buffer(&rank0, &buf, ORTE_RML_TAG_DEBUGGER_RELEASE, 0))) {
|
|
||||||
opal_output(0, "Error: could not send debugger release to MPI procs - error %s", ORTE_ERROR_NAME(rc));
|
/* send a message to rank=0 to release it */
|
||||||
|
OBJ_CONSTRUCT(&buf, opal_buffer_t); /* don't need anything in this */
|
||||||
|
rank0.jobid = jdata->jobid;
|
||||||
|
rank0.vpid = 0;
|
||||||
|
if (0 > (rc = orte_rml.send_buffer(&rank0, &buf, ORTE_RML_TAG_DEBUGGER_RELEASE, 0))) {
|
||||||
|
opal_output(0, "Error: could not send debugger release to MPI procs - error %s", ORTE_ERROR_NAME(rc));
|
||||||
|
}
|
||||||
|
OBJ_DESTRUCT(&buf);
|
||||||
}
|
}
|
||||||
OBJ_DESTRUCT(&buf);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -543,7 +543,7 @@ int orterun(int argc, char *argv[])
|
|||||||
opal_signal_add(&sigusr2_handler, NULL);
|
opal_signal_add(&sigusr2_handler, NULL);
|
||||||
#endif /* __WINDOWS__ */
|
#endif /* __WINDOWS__ */
|
||||||
|
|
||||||
/* setup for debugging, if we are doing so */
|
/* setup for debugging */
|
||||||
orte_debugger_init_before_spawn(jdata);
|
orte_debugger_init_before_spawn(jdata);
|
||||||
|
|
||||||
/* setup an event we can wait for that will tell
|
/* setup an event we can wait for that will tell
|
||||||
@ -562,7 +562,7 @@ int orterun(int argc, char *argv[])
|
|||||||
/* Spawn the job */
|
/* Spawn the job */
|
||||||
rc = orte_plm.spawn(jdata);
|
rc = orte_plm.spawn(jdata);
|
||||||
|
|
||||||
/* complete debugger interface, if we are debugging */
|
/* complete debugger interface */
|
||||||
orte_debugger_init_after_spawn(jdata);
|
orte_debugger_init_after_spawn(jdata);
|
||||||
|
|
||||||
/* now wait until the termination event fires */
|
/* now wait until the termination event fires */
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user