Update the debugger interface per email thread with Jeff and Brian. Handoff to them for final test and validation
This commit was SVN r18670.
Этот коммит содержится в:
родитель
558e68088c
Коммит
282a220e7e
@ -65,6 +65,9 @@
|
||||
#include "ompi/datatype/datatype.h"
|
||||
#include "ompi/include/mpi.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
|
||||
#if defined(OMPI_MSGQ_DLL)
|
||||
/* This variable is old/deprecated -- the mpimsgq_dll_locations[]
|
||||
method is preferred because it's more flexible */
|
||||
@ -107,6 +110,8 @@ OMPI_DECLSPEC ompi_datatype_t* ompi_datatype_t_type_inclusion = NULL;
|
||||
|
||||
OMPI_DECLSPEC volatile int MPIR_debug_gate=0;
|
||||
|
||||
/* we don't believe we need MPIR_being_debugged here */
|
||||
|
||||
/* Check for a file in few dirrect ways for portability */
|
||||
static void check(char *dir, char *file, char **locations)
|
||||
{
|
||||
@ -144,30 +149,33 @@ static void check(char *dir, char *file, char **locations)
|
||||
*/
|
||||
void ompi_wait_for_debugger(void)
|
||||
{
|
||||
int i, wait_for_debugger, wait_for_tv;
|
||||
int i, debugger, rc;
|
||||
char *a, *b, **dirs;
|
||||
opal_buffer_t buf;
|
||||
|
||||
/* Do we need to wait for a TotalView-like debugger? */
|
||||
/* are we being debugged by a TotalView-like debugger? */
|
||||
mca_base_param_reg_int_name("ompi",
|
||||
"mpi_wait_for_debugger",
|
||||
"mpi_being_debugged",
|
||||
"Whether the MPI application "
|
||||
"should wait for a debugger or not",
|
||||
"is being debugged (default: false)",
|
||||
false, false, (int) false,
|
||||
&wait_for_debugger);
|
||||
mca_base_param_reg_int_name("ompi",
|
||||
"mpi_wait_for_totalview",
|
||||
"Deprecated synonym for mpi_wait_for_debugger",
|
||||
false, false, (int) false,
|
||||
&wait_for_tv);
|
||||
wait_for_debugger |= wait_for_tv;
|
||||
|
||||
&debugger);
|
||||
|
||||
if (!debugger) {
|
||||
/* if not, just return */
|
||||
return;
|
||||
}
|
||||
|
||||
/* if we are being debugged, then we need to find
|
||||
* the correct plug-in
|
||||
*/
|
||||
a = strdup(opal_install_dirs.pkglibdir);
|
||||
mca_base_param_reg_string_name("ompi",
|
||||
"debugger_dll_path",
|
||||
"List of directories where MPI_INIT should search for debugger plugins",
|
||||
false, false, a, &b);
|
||||
free(a);
|
||||
|
||||
|
||||
/* Search the directory for MPI debugger DLLs */
|
||||
if (NULL != b) {
|
||||
dirs = opal_argv_split(b, ':');
|
||||
@ -176,23 +184,53 @@ void ompi_wait_for_debugger(void)
|
||||
check(dirs[i], OMPI_MSGQ_DLL_PREFIX, mpimsgq_dll_locations);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we're waiting for the debugger, then, well, wait for it. :-) */
|
||||
if (wait_for_debugger) {
|
||||
/* RHC: the following is a temporary hack until we figure
|
||||
* out how to resolve the problem of where to
|
||||
* instance the MPIR* variables so that multiple
|
||||
* launchers can access them
|
||||
|
||||
/* only the rank=0 proc waits for the debugger - everyone else will just
|
||||
* spin in the barrier in mpi_init until rank=0 joins them
|
||||
*/
|
||||
if (0 != ORTE_PROC_MY_NAME->vpid) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* we have to support at least two ways of completing the
|
||||
* debug attachment - either we will get a message from
|
||||
* the HNP telling us it is okay to release, or the debugger
|
||||
* itself will reach into us and set a gate.
|
||||
*
|
||||
* First, attempt to get a message-based release
|
||||
*/
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &buf, ORTE_RML_TAG_DEBUGGER_RELEASE, 0);
|
||||
OBJ_DESTRUCT(&buf); /* don't care about contents of message */
|
||||
|
||||
if (rc > 0) {
|
||||
/* message received - we can go! */
|
||||
return;
|
||||
} else if (ORTE_ERR_NOT_SUPPORTED == rc) {
|
||||
/* if the recv isn't supported, then we fall back
|
||||
* to the alternative method for waiting
|
||||
*/
|
||||
while (MPIR_debug_gate == 0) {
|
||||
goto spin_wait;
|
||||
} else {
|
||||
/* if it failed for some other reason, then we are
|
||||
* in trouble - for now, just report the problem
|
||||
* and give up waiting
|
||||
*/
|
||||
opal_output(0, "Debugger_attach[rank=%ld]: could not wait for debugger - error %s!",
|
||||
(long)ORTE_PROC_MY_NAME->vpid, ORTE_ERROR_NAME(rc));
|
||||
return;
|
||||
}
|
||||
|
||||
spin_wait:
|
||||
/* spin until debugger attaches and releases us */
|
||||
while (MPIR_debug_gate == 0) {
|
||||
#if defined(__WINDOWS__)
|
||||
Sleep(100); /* milliseconds */
|
||||
Sleep(100); /* milliseconds */
|
||||
#elif defined(HAVE_USLEEP)
|
||||
usleep(100000); /* microseconds */
|
||||
usleep(100000); /* microseconds */
|
||||
#else
|
||||
sleep(1); /* seconds */
|
||||
sleep(1); /* seconds */
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -592,12 +592,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
orte_process_info.nodename);
|
||||
}
|
||||
|
||||
/* wait for everyone to reach this point */
|
||||
if (OMPI_SUCCESS != (ret = orte_grpcomm.barrier())) {
|
||||
error = "orte_grpcomm_barrier failed";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* wire up the oob interface, if requested. Do this here because
|
||||
it will go much faster before the event library is switched
|
||||
into non-blocking mode */
|
||||
@ -606,11 +600,31 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Do we need to wait for a debugger? */
|
||||
ompi_wait_for_debugger();
|
||||
|
||||
/* check for timing request - get stop time and report elapsed
|
||||
time if so, then start the clock again */
|
||||
if (timing && 0 == ORTE_PROC_MY_NAME->vpid) {
|
||||
gettimeofday(&ompistop, NULL);
|
||||
opal_output(0, "ompi_mpi_init[%ld]: time from modex thru complete oob wireup %ld usec",
|
||||
(long)ORTE_PROC_MY_NAME->vpid,
|
||||
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - ompistart.tv_usec)));
|
||||
gettimeofday(&ompistart, NULL);
|
||||
}
|
||||
|
||||
/* wait for everyone to reach this point */
|
||||
if (OMPI_SUCCESS != (ret = orte_grpcomm.barrier())) {
|
||||
error = "orte_grpcomm_barrier failed";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* check for timing request - get stop time and report elapsed
|
||||
time if so, then start the clock again */
|
||||
if (timing && 0 == ORTE_PROC_MY_NAME->vpid) {
|
||||
gettimeofday(&ompistop, NULL);
|
||||
opal_output(0, "ompi_mpi_init[%ld]: time from stage 2 cast to complete oob wireup %ld usec",
|
||||
opal_output(0, "ompi_mpi_init[%ld]: time to execute barrier %ld usec",
|
||||
(long)ORTE_PROC_MY_NAME->vpid,
|
||||
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - ompistart.tv_usec)));
|
||||
@ -772,13 +786,10 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
|
||||
ompi_mpi_initialized = true;
|
||||
|
||||
/* Do we need to wait for a debugger? */
|
||||
ompi_wait_for_debugger();
|
||||
|
||||
/* check for timing request - get stop time and report elapsed time if so */
|
||||
if (timing && 0 == ORTE_PROC_MY_NAME->vpid) {
|
||||
gettimeofday(&ompistop, NULL);
|
||||
opal_output(0, "ompi_mpi_init[%ld]: time from oob wireup to complete mpi_init %ld usec",
|
||||
opal_output(0, "ompi_mpi_init[%ld]: time from barrier p to complete mpi_init %ld usec",
|
||||
(long)ORTE_PROC_MY_NAME->vpid,
|
||||
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - ompistart.tv_usec)));
|
||||
|
@ -50,7 +50,6 @@
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/totalview.h"
|
||||
#include "orte/util/nidmap.h"
|
||||
|
||||
#include "orte/mca/plm/base/plm_private.h"
|
||||
@ -214,9 +213,6 @@ int orte_plm_base_launch_apps(orte_jobid_t job)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* init any debuggers */
|
||||
orte_totalview_init_after_spawn(job);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:launch completed for job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
|
@ -103,6 +103,8 @@ BEGIN_C_DECLS
|
||||
/* show help */
|
||||
#define ORTE_RML_TAG_SHOW_HELP 31
|
||||
|
||||
/* debugger release */
|
||||
#define ORTE_RML_TAG_DEBUGGER_RELEASE 32
|
||||
|
||||
#define ORTE_RML_TAG_MAX 100
|
||||
|
||||
|
@ -39,6 +39,8 @@ endif # OMPI_INSTALL_BINARIES
|
||||
orterun_SOURCES = \
|
||||
main.c \
|
||||
orterun.c \
|
||||
orterun.h
|
||||
orterun.h \
|
||||
debuggers.h \
|
||||
debuggers.c
|
||||
|
||||
orterun_LDADD = $(top_builddir)/orte/libopen-rte.la
|
||||
|
@ -53,37 +53,34 @@
|
||||
#endif /* HAVE_UNISTD_H */
|
||||
#include <ctype.h>
|
||||
|
||||
#include "opal/util/opal_environ.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/path.h"
|
||||
#include "opal/util/os_path.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/plm/plm_types.h"
|
||||
#include "orte/mca/rmaps/rmaps.h"
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "orte/util/totalview.h"
|
||||
#include "debuggers.h"
|
||||
|
||||
/* +++ begin MPICH/TotalView interface definitions */
|
||||
|
||||
#define MPIR_DEBUG_SPAWNED 1
|
||||
#define MPIR_DEBUG_ABORTING 2
|
||||
/* +++ begin MPICH/TotalView std debugger interface definitions */
|
||||
|
||||
struct MPIR_PROCDESC {
|
||||
char *host_name; /* something that can be passed to inet_addr */
|
||||
char *executable_name; /* name of binary */
|
||||
int pid; /* process pid */
|
||||
};
|
||||
|
||||
struct MPIR_PROCDESC *MPIR_proctable = NULL;
|
||||
int MPIR_proctable_size = 0;
|
||||
int MPIR_being_debugged = 0;
|
||||
int MPIR_force_to_main = 0;
|
||||
bool MPIR_being_debugged = false;
|
||||
volatile int MPIR_debug_state = 0;
|
||||
volatile int MPIR_i_am_starter = 0;
|
||||
volatile int MPIR_acquired_pre_main = 0;
|
||||
volatile int MPIR_partial_attach_ok = 1;
|
||||
|
||||
/* --- end MPICH/TotalView interface definitions */
|
||||
/* --- end MPICH/TotalView std debugger interface definitions */
|
||||
|
||||
|
||||
#define DUMP_INT(X) fprintf(stderr, " %s = %d\n", # X, X);
|
||||
@ -94,7 +91,7 @@ static void dump(void)
|
||||
|
||||
DUMP_INT(MPIR_being_debugged);
|
||||
DUMP_INT(MPIR_debug_state);
|
||||
DUMP_INT(MPIR_acquired_pre_main);
|
||||
DUMP_INT(MPIR_partial_attach_ok);
|
||||
DUMP_INT(MPIR_i_am_starter);
|
||||
DUMP_INT(MPIR_proctable_size);
|
||||
fprintf(stderr, " MPIR_proctable:\n");
|
||||
@ -365,31 +362,29 @@ void orte_run_debugger(char *basename, opal_cmd_line_t *cmd_line,
|
||||
* spawn we need to check if we are being run under a TotalView-like
|
||||
* debugger; if so then inform applications via an MCA parameter.
|
||||
*/
|
||||
void orte_totalview_init_before_spawn(void)
|
||||
void orte_debugger_init_before_spawn(orte_job_t *jdata)
|
||||
{
|
||||
if (MPIR_DEBUG_SPAWNED == MPIR_being_debugged) {
|
||||
char *s;
|
||||
orte_app_context_t **apps;
|
||||
orte_std_cntr_t i;
|
||||
|
||||
int value;
|
||||
char *s;
|
||||
|
||||
if (orte_debug_flag) {
|
||||
opal_output(0, "Info: Spawned by a debugger");
|
||||
}
|
||||
|
||||
if (mca_base_param_reg_int_name("ompi", "mpi_wait_for_totalview",
|
||||
"Whether the MPI application should wait for a debugger or not",
|
||||
false, false, (int)false, &value) < 0) {
|
||||
opal_output(0, "Error: mca_base_param_reg_int_name\n");
|
||||
}
|
||||
|
||||
/* push mca parameter into the environment (not done automatically?) */
|
||||
|
||||
s = mca_base_param_environ_variable("ompi", "mpi_wait_for_totalview", NULL);
|
||||
if (ORTE_SUCCESS != opal_setenv(s, "1", true, &environ)) {
|
||||
opal_output(0, "Error: Can't setenv %s\n", s);
|
||||
}
|
||||
free(s);
|
||||
if (!MPIR_being_debugged) {
|
||||
/* not being debugged */
|
||||
return;
|
||||
}
|
||||
|
||||
if (orte_debug_flag) {
|
||||
opal_output(0, "Info: Spawned by a debugger");
|
||||
}
|
||||
|
||||
apps = (orte_app_context_t**)jdata->apps->addr;
|
||||
/* tell the procs they are being debugged */
|
||||
s = mca_base_param_environ_variable("ompi", "mpi_being_debugged", NULL);
|
||||
|
||||
for (i=0; i < jdata->num_apps; i++) {
|
||||
opal_setenv(s, "1", true, &apps[i]->env);
|
||||
}
|
||||
free(s);
|
||||
}
|
||||
|
||||
|
||||
@ -401,80 +396,65 @@ void orte_totalview_init_before_spawn(void)
|
||||
*
|
||||
* @param jobid The jobid returned by spawn.
|
||||
*/
|
||||
void orte_totalview_init_after_spawn(orte_jobid_t jobid)
|
||||
void orte_debugger_init_after_spawn(orte_job_t *jdata)
|
||||
{
|
||||
orte_job_t *jdata;
|
||||
orte_proc_t **procs;
|
||||
orte_app_context_t *appctx, **apps;
|
||||
orte_vpid_t i, j;
|
||||
|
||||
opal_buffer_t buf;
|
||||
orte_process_name_t rank0;
|
||||
int rc;
|
||||
|
||||
if (!MPIR_being_debugged) {
|
||||
/* not being debugged */
|
||||
return;
|
||||
}
|
||||
|
||||
if (MPIR_proctable) {
|
||||
/* already initialized */
|
||||
return;
|
||||
}
|
||||
|
||||
if (0) { /* debugging daemons <<-- needs work */
|
||||
|
||||
if (orte_debug_flag) {
|
||||
opal_output(0, "Info: Setting up debugger process table for daemons\n");
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
/*
|
||||
* Debugging applications or not being debugged.
|
||||
*
|
||||
* Either way, fill in the proc table for the application
|
||||
* processes in case someone attaches later.
|
||||
*/
|
||||
|
||||
if (orte_debug_flag) {
|
||||
opal_output(0, "Info: Setting up debugger process table for applications\n");
|
||||
}
|
||||
|
||||
MPIR_debug_state = 1;
|
||||
|
||||
/* Get the job data for this job */
|
||||
if (NULL == (jdata = orte_get_job_data_object(jobid))) {
|
||||
opal_output(0, "Error: Can't get job data\n");
|
||||
return;
|
||||
/* fill in the proc table for the application processes */
|
||||
|
||||
if (orte_debug_flag) {
|
||||
opal_output(0, "Info: Setting up debugger process table for applications\n");
|
||||
}
|
||||
|
||||
MPIR_debug_state = 1;
|
||||
|
||||
/* set the total number of processes in the job */
|
||||
MPIR_proctable_size = jdata->num_procs;
|
||||
|
||||
/* allocate MPIR_proctable */
|
||||
MPIR_proctable = (struct MPIR_PROCDESC *) malloc(sizeof(struct MPIR_PROCDESC) *
|
||||
MPIR_proctable_size);
|
||||
if (MPIR_proctable == NULL) {
|
||||
opal_output(0, "Error: Out of memory\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* initialize MPIR_proctable */
|
||||
i=0;
|
||||
procs = (orte_proc_t**)jdata->procs->addr;
|
||||
apps = (orte_app_context_t**)jdata->apps->addr;
|
||||
for (j=0; j < jdata->num_procs; j++) {
|
||||
if (NULL == procs[j]) {
|
||||
opal_output(0, "Error: undefined proc at position %ld\n", (long)j);
|
||||
}
|
||||
|
||||
/* set the total number of processes in the job */
|
||||
|
||||
MPIR_proctable_size = jdata->num_procs;
|
||||
|
||||
/* allocate MPIR_proctable */
|
||||
|
||||
MPIR_proctable = (struct MPIR_PROCDESC *) malloc(sizeof(struct MPIR_PROCDESC) *
|
||||
MPIR_proctable_size);
|
||||
if (MPIR_proctable == NULL) {
|
||||
opal_output(0, "Error: Out of memory\n");
|
||||
}
|
||||
|
||||
/* initialize MPIR_proctable */
|
||||
|
||||
i=0;
|
||||
procs = (orte_proc_t**)jdata->procs->addr;
|
||||
apps = (orte_app_context_t**)jdata->apps->addr;
|
||||
for (j=0; j < jdata->num_procs; j++) {
|
||||
if (NULL == procs[j]) {
|
||||
opal_output(0, "Error: undefined proc at position %ld\n", (long)j);
|
||||
}
|
||||
|
||||
appctx = apps[procs[j]->app_idx];
|
||||
|
||||
MPIR_proctable[i].host_name = strdup(procs[j]->node->name);
|
||||
if ( 0 == strncmp(appctx->app, OPAL_PATH_SEP, 1 )) {
|
||||
MPIR_proctable[i].executable_name =
|
||||
opal_os_path( false, appctx->app, NULL );
|
||||
} else {
|
||||
MPIR_proctable[i].executable_name =
|
||||
opal_os_path( false, appctx->cwd, appctx->app, NULL );
|
||||
}
|
||||
MPIR_proctable[i].pid = procs[j]->pid;
|
||||
i++;
|
||||
}
|
||||
appctx = apps[procs[j]->app_idx];
|
||||
|
||||
MPIR_proctable[i].host_name = strdup(procs[j]->node->name);
|
||||
if ( 0 == strncmp(appctx->app, OPAL_PATH_SEP, 1 )) {
|
||||
MPIR_proctable[i].executable_name =
|
||||
opal_os_path( false, appctx->app, NULL );
|
||||
} else {
|
||||
MPIR_proctable[i].executable_name =
|
||||
opal_os_path( false, appctx->cwd, appctx->app, NULL );
|
||||
}
|
||||
MPIR_proctable[i].pid = procs[j]->pid;
|
||||
i++;
|
||||
}
|
||||
|
||||
if (orte_debug_flag) {
|
||||
@ -482,6 +462,15 @@ void orte_totalview_init_after_spawn(orte_jobid_t jobid)
|
||||
}
|
||||
|
||||
(void) MPIR_Breakpoint();
|
||||
|
||||
/* send a message to rank=0 to release it */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t); /* don't need anything in this */
|
||||
rank0.jobid = jdata->jobid;
|
||||
rank0.vpid = 0;
|
||||
if (0 > (rc = orte_rml.send_buffer(&rank0, &buf, ORTE_RML_TAG_DEBUGGER_RELEASE, 0))) {
|
||||
opal_output(0, "Error: could not send debugger release to MPI procs - error %s", ORTE_ERROR_NAME(rc));
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
}
|
||||
|
||||
|
||||
@ -489,10 +478,11 @@ void orte_totalview_init_after_spawn(orte_jobid_t jobid)
|
||||
* Release resources associated with data structures for running under
|
||||
* a debugger using the MPICH/TotalView parallel debugger interface.
|
||||
*/
|
||||
void orte_totalview_finalize(void)
|
||||
void orte_debugger_finalize(void)
|
||||
{
|
||||
if (MPIR_proctable) {
|
||||
free(MPIR_proctable);
|
||||
MPIR_proctable = NULL;
|
||||
}
|
||||
}
|
||||
|
@ -17,33 +17,23 @@
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef ORTE_TOTALVIEW_H
|
||||
#define ORTE_TOTALVIEW_H
|
||||
#ifndef ORTE_DEBUGGERS_H
|
||||
#define ORTE_DEBUGGERS_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
ORTE_DECLSPEC void orte_run_debugger(char *basename, opal_cmd_line_t *cmd_line,
|
||||
void orte_run_debugger(char *basename, opal_cmd_line_t *cmd_line,
|
||||
int argc, char *argv[], int num_procs) __opal_attribute_noreturn__;
|
||||
ORTE_DECLSPEC void orte_totalview_init_before_spawn(void);
|
||||
ORTE_DECLSPEC void orte_totalview_init_after_spawn(orte_jobid_t jobid);
|
||||
ORTE_DECLSPEC void orte_totalview_finalize(void);
|
||||
void orte_debugger_init_before_spawn(orte_job_t *jdata);
|
||||
void orte_debugger_init_after_spawn(orte_job_t *jdata);
|
||||
void orte_debugger_finalize(void);
|
||||
|
||||
ORTE_DECLSPEC extern void *MPIR_Breakpoint(void);
|
||||
|
||||
struct MPIR_PROCDESC {
|
||||
char *host_name; /* something that can be passed to inet_addr */
|
||||
char *executable_name; /* name of binary */
|
||||
int pid; /* process pid */
|
||||
};
|
||||
|
||||
ORTE_DECLSPEC extern struct MPIR_PROCDESC *MPIR_proctable;
|
||||
ORTE_DECLSPEC extern int MPIR_proctable_size;
|
||||
ORTE_DECLSPEC extern int MPIR_being_debugged;
|
||||
ORTE_DECLSPEC extern volatile int MPIR_debug_state;
|
||||
extern void *MPIR_Breakpoint(void);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* ORTE_TOTALVIEW_H */
|
||||
#endif /* ORTE_DEBUGGERS_H */
|
@ -71,7 +71,6 @@
|
||||
#include "orte/util/pre_condition_transports.h"
|
||||
#include "orte/util/session_dir.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/totalview.h"
|
||||
|
||||
#include "orte/mca/odls/odls.h"
|
||||
#include "orte/mca/plm/plm.h"
|
||||
@ -90,6 +89,7 @@
|
||||
/* ensure I can behave like a daemon */
|
||||
#include "orte/orted/orted.h"
|
||||
|
||||
#include "debuggers.h"
|
||||
#include "orterun.h"
|
||||
|
||||
/*
|
||||
@ -524,7 +524,9 @@ int orterun(int argc, char *argv[])
|
||||
signal_forward_callback, &sigusr2_handler);
|
||||
opal_signal_add(&sigusr2_handler, NULL);
|
||||
#endif /* __WINDOWS__ */
|
||||
orte_totalview_init_before_spawn();
|
||||
|
||||
/* setup for debugging, if we are doing so */
|
||||
orte_debugger_init_before_spawn(jdata);
|
||||
|
||||
/* setup an event we can wait for that will tell
|
||||
* us to terminate - both normal and abnormal
|
||||
@ -542,6 +544,9 @@ int orterun(int argc, char *argv[])
|
||||
/* Spawn the job */
|
||||
rc = orte_plm.spawn(jdata);
|
||||
|
||||
/* complete debugger interface, if we are debugging */
|
||||
orte_debugger_init_after_spawn(jdata);
|
||||
|
||||
/* now wait until the termination event fires */
|
||||
opal_event_dispatch();
|
||||
|
||||
@ -604,7 +609,7 @@ static void job_completed(int trigpipe, short event, void *arg)
|
||||
}
|
||||
|
||||
/* if the debuggers were run, clean up */
|
||||
orte_totalview_finalize();
|
||||
orte_debugger_finalize();
|
||||
|
||||
/* the job is complete - now setup an event that will
|
||||
* trigger when the orteds are gone and tell the orteds that it is
|
||||
|
@ -47,8 +47,7 @@ headers += \
|
||||
util/hostfile/hostfile_lex.h \
|
||||
util/dash_host/dash_host.h \
|
||||
util/comm/comm.h \
|
||||
util/nidmap.h \
|
||||
util/totalview.h
|
||||
util/nidmap.h
|
||||
|
||||
libopen_rte_la_SOURCES += \
|
||||
util/context_fns.c \
|
||||
@ -58,6 +57,5 @@ libopen_rte_la_SOURCES += \
|
||||
util/hostfile/hostfile.c \
|
||||
util/dash_host/dash_host.c \
|
||||
util/comm/comm.c \
|
||||
util/nidmap.c \
|
||||
util/totalview.c
|
||||
util/nidmap.c
|
||||
endif
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user