1
1
openmpi/orte/mca/debugger/base/debugger_base_fns.c
Ralph Castain 2c1a658232 Fix debugger attach
This commit was SVN r23921.
2010-10-22 20:07:24 +00:00

189 строки
5.8 KiB
C

/*
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/util/output.h"
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include <stdio.h>
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif /* HAVE_STDLIB_H */
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif /* HAVE_STRINGS_H */
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include <ctype.h>
#include "opal/util/argv.h"
#include "opal/util/os_path.h"
#include "opal/util/path.h"
#include "opal/util/opal_environ.h"
#include "orte/util/name_fns.h"
#include "orte/util/show_help.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/debugger/base/base.h"
#define DUMP_INT(X) fprintf(stderr, " %s = %d\n", # X, X);
#if !ORTE_DISABLE_FULL_SUPPORT
void orte_debugger_base_dump(void)
{
int i;
DUMP_INT(MPIR_being_debugged);
DUMP_INT(MPIR_debug_state);
DUMP_INT(MPIR_partial_attach_ok);
DUMP_INT(MPIR_i_am_starter);
DUMP_INT(MPIR_forward_output);
DUMP_INT(MPIR_proctable_size);
fprintf(stderr, " MPIR_proctable:\n");
for (i = 0; i < MPIR_proctable_size; i++) {
fprintf(stderr,
" (i, host, exe, pid) = (%d, %s, %s, %d)\n",
i,
MPIR_proctable[i].host_name,
MPIR_proctable[i].executable_name,
MPIR_proctable[i].pid);
}
fprintf(stderr, "MPIR_executable_path: %s\n",
('\0' == MPIR_executable_path[0]) ?
"NULL" : (char*) MPIR_executable_path);
fprintf(stderr, "MPIR_server_arguments: %s\n",
('\0' == MPIR_server_arguments[0]) ?
"NULL" : (char*) MPIR_server_arguments);
}
/*
* Initialization of data structures for running under a debugger
* using the MPICH/TotalView parallel debugger interface. This stage
* of initialization must occur after spawn
*
* NOTE: We -always- perform this step to ensure that any debugger
* that attaches to us post-launch of the application can get a
* completed proctable
*/
void orte_debugger_base_init_after_spawn(orte_job_t *jdata)
{
orte_proc_t *proc;
orte_app_context_t *appctx;
orte_vpid_t i, j;
opal_buffer_t buf;
orte_process_name_t rank0;
int rc;
if (MPIR_proctable) {
/* already initialized */
opal_output_verbose(5, orte_debugger_base.output,
"%s: debugger already initialized",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
return;
}
/* fill in the proc table for the application processes */
opal_output_verbose(5, orte_debugger_base.output,
"%s: Setting up debugger process table for applications",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
MPIR_debug_state = 1;
/* set the total number of processes in the job */
MPIR_proctable_size = jdata->num_procs;
/* allocate MPIR_proctable */
MPIR_proctable = (struct MPIR_PROCDESC *) malloc(sizeof(struct MPIR_PROCDESC) *
MPIR_proctable_size);
if (MPIR_proctable == NULL) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return;
}
if (orte_debugger_base.dump_proctable) {
opal_output(orte_clean_output, "MPIR Proctable for job %s", ORTE_JOBID_PRINT(jdata->jobid));
}
/* initialize MPIR_proctable */
for (j=0; j < jdata->num_procs; j++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, j))) {
continue;
}
/* store this data in the location whose index
* corresponds to the proc's rank
*/
i = proc->name.vpid;
if (NULL == (appctx = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, proc->app_idx))) {
continue;
}
MPIR_proctable[i].host_name = strdup(proc->node->name);
if ( 0 == strncmp(appctx->app, OPAL_PATH_SEP, 1 )) {
MPIR_proctable[i].executable_name =
opal_os_path( false, appctx->app, NULL );
} else {
MPIR_proctable[i].executable_name =
opal_os_path( false, appctx->cwd, appctx->app, NULL );
}
MPIR_proctable[i].pid = proc->pid;
if (orte_debugger_base.dump_proctable) {
opal_output(orte_clean_output, "%s: Host %s Exe %s Pid %d",
ORTE_VPID_PRINT(i), MPIR_proctable[i].host_name,
MPIR_proctable[i].executable_name, MPIR_proctable[i].pid);
}
}
if (0 < opal_output_get_verbosity(orte_debugger_base.output)) {
orte_debugger_base_dump();
}
/* if we are being launched under a debugger, then we must wait
* for it to be ready to go and do some things to start the job
*/
if (MPIR_being_debugged) {
/* wait for all procs to have reported their contact info - this
* ensures that (a) they are all into mpi_init, and (b) the system
* has the contact info to successfully send a message to rank=0
*/
ORTE_PROGRESSED_WAIT(false, jdata->num_reported, jdata->num_procs);
(void) MPIR_Breakpoint();
/* send a message to rank=0 to release it */
OBJ_CONSTRUCT(&buf, opal_buffer_t); /* don't need anything in this */
rank0.jobid = jdata->jobid;
rank0.vpid = 0;
if (0 > (rc = orte_rml.send_buffer(&rank0, &buf, ORTE_RML_TAG_DEBUGGER_RELEASE, 0))) {
opal_output(0, "Error: could not send debugger release to MPI procs - error %s", ORTE_ERROR_NAME(rc));
}
OBJ_DESTRUCT(&buf);
}
}
#endif
/*
* Breakpoint function for parallel debuggers
*/
void *MPIR_Breakpoint(void)
{
return NULL;
}