
Since the MPIR symbols are now included in the ORTE library, remove duplicate declarations in OMPI and replace them with extern references to their ORTE instantiations. This commit was SVN r23360.
195 строки
6.0 KiB
C
195 строки
6.0 KiB
C
/*
|
|
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
|
|
#include "orte_config.h"
|
|
#include "orte/constants.h"
|
|
|
|
#include "opal/util/output.h"
|
|
|
|
#ifdef HAVE_STRING_H
|
|
#include <string.h>
|
|
#endif
|
|
#include <stdio.h>
|
|
#ifdef HAVE_STDLIB_H
|
|
#include <stdlib.h>
|
|
#endif /* HAVE_STDLIB_H */
|
|
#ifdef HAVE_STRINGS_H
|
|
#include <strings.h>
|
|
#endif /* HAVE_STRINGS_H */
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif /* HAVE_UNISTD_H */
|
|
#include <ctype.h>
|
|
|
|
#include "opal/util/argv.h"
|
|
#include "opal/util/os_path.h"
|
|
#include "opal/util/path.h"
|
|
#include "opal/util/opal_environ.h"
|
|
|
|
#include "orte/util/name_fns.h"
|
|
#include "orte/util/show_help.h"
|
|
#include "orte/mca/rml/rml.h"
|
|
#include "orte/mca/errmgr/errmgr.h"
|
|
#include "orte/runtime/orte_globals.h"
|
|
|
|
#include "orte/mca/debugger/base/base.h"
|
|
|
|
/* instance the standard MPIR interfaces */
|
|
struct MPIR_PROCDESC *MPIR_proctable = NULL;
|
|
int MPIR_proctable_size = 0;
|
|
volatile int MPIR_being_debugged = 0;
|
|
volatile int MPIR_debug_state = 0;
|
|
volatile int MPIR_i_am_starter = 0;
|
|
volatile int MPIR_partial_attach_ok = 1;
|
|
volatile char MPIR_executable_path[MPIR_MAX_PATH_LENGTH];
|
|
volatile char MPIR_server_arguments[MPIR_MAX_ARG_LENGTH];
|
|
volatile int MPIR_forward_output = 0;
|
|
volatile int MPIR_forward_comm = 0;
|
|
char MPIR_attach_fifo[MPIR_MAX_PATH_LENGTH];
|
|
|
|
#define DUMP_INT(X) fprintf(stderr, " %s = %d\n", # X, X);
|
|
|
|
void orte_debugger_base_dump(void)
|
|
{
|
|
int i;
|
|
|
|
DUMP_INT(MPIR_being_debugged);
|
|
DUMP_INT(MPIR_debug_state);
|
|
DUMP_INT(MPIR_partial_attach_ok);
|
|
DUMP_INT(MPIR_i_am_starter);
|
|
DUMP_INT(MPIR_forward_output);
|
|
DUMP_INT(MPIR_proctable_size);
|
|
fprintf(stderr, " MPIR_proctable:\n");
|
|
for (i = 0; i < MPIR_proctable_size; i++) {
|
|
fprintf(stderr,
|
|
" (i, host, exe, pid) = (%d, %s, %s, %d)\n",
|
|
i,
|
|
MPIR_proctable[i].host_name,
|
|
MPIR_proctable[i].executable_name,
|
|
MPIR_proctable[i].pid);
|
|
}
|
|
fprintf(stderr, "MPIR_executable_path: %s\n",
|
|
('\0' == MPIR_executable_path[0]) ?
|
|
"NULL" : (char*) MPIR_executable_path);
|
|
fprintf(stderr, "MPIR_server_arguments: %s\n",
|
|
('\0' == MPIR_server_arguments[0]) ?
|
|
"NULL" : (char*) MPIR_server_arguments);
|
|
}
|
|
|
|
/*
|
|
* Initialization of data structures for running under a debugger
|
|
* using the MPICH/TotalView parallel debugger interface. This stage
|
|
* of initialization must occur after spawn
|
|
*
|
|
* NOTE: We -always- perform this step to ensure that any debugger
|
|
* that attaches to us post-launch of the application can get a
|
|
* completed proctable
|
|
*/
|
|
void orte_debugger_base_init_after_spawn(orte_job_t *jdata)
|
|
{
|
|
orte_proc_t *proc;
|
|
orte_app_context_t *appctx;
|
|
orte_vpid_t i, j;
|
|
opal_buffer_t buf;
|
|
orte_process_name_t rank0;
|
|
int rc;
|
|
|
|
if (MPIR_proctable) {
|
|
/* already initialized */
|
|
return;
|
|
}
|
|
|
|
/* fill in the proc table for the application processes */
|
|
|
|
if (orte_debug_flag) {
|
|
opal_output(0, "Info: Setting up debugger process table for applications\n");
|
|
}
|
|
|
|
MPIR_debug_state = 1;
|
|
|
|
/* set the total number of processes in the job */
|
|
MPIR_proctable_size = jdata->num_procs;
|
|
|
|
/* allocate MPIR_proctable */
|
|
MPIR_proctable = (struct MPIR_PROCDESC *) malloc(sizeof(struct MPIR_PROCDESC) *
|
|
MPIR_proctable_size);
|
|
if (MPIR_proctable == NULL) {
|
|
opal_output(0, "Error: Out of memory\n");
|
|
return;
|
|
}
|
|
|
|
if (orte_output_debugger_proctable) {
|
|
opal_output(orte_clean_output, "MPIR Proctable for job %s", ORTE_JOBID_PRINT(jdata->jobid));
|
|
}
|
|
|
|
/* initialize MPIR_proctable */
|
|
for (j=0; j < jdata->num_procs; j++) {
|
|
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, j))) {
|
|
continue;
|
|
}
|
|
/* store this data in the location whose index
|
|
* corresponds to the proc's rank
|
|
*/
|
|
i = proc->name.vpid;
|
|
if (NULL == (appctx = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, proc->app_idx))) {
|
|
continue;
|
|
}
|
|
|
|
MPIR_proctable[i].host_name = strdup(proc->node->name);
|
|
if ( 0 == strncmp(appctx->app, OPAL_PATH_SEP, 1 )) {
|
|
MPIR_proctable[i].executable_name =
|
|
opal_os_path( false, appctx->app, NULL );
|
|
} else {
|
|
MPIR_proctable[i].executable_name =
|
|
opal_os_path( false, appctx->cwd, appctx->app, NULL );
|
|
}
|
|
MPIR_proctable[i].pid = proc->pid;
|
|
if (orte_output_debugger_proctable) {
|
|
opal_output(orte_clean_output, "%s: Host %s Exe %s Pid %d",
|
|
ORTE_VPID_PRINT(i), MPIR_proctable[i].host_name,
|
|
MPIR_proctable[i].executable_name, MPIR_proctable[i].pid);
|
|
}
|
|
}
|
|
|
|
if (orte_debug_flag) {
|
|
orte_debugger_base_dump();
|
|
}
|
|
|
|
/* if we are being launched under a debugger, then we must wait
|
|
* for it to be ready to go and do some things to start the job
|
|
*/
|
|
if (MPIR_being_debugged) {
|
|
/* wait for all procs to have reported their contact info - this
|
|
* ensures that (a) they are all into mpi_init, and (b) the system
|
|
* has the contact info to successfully send a message to rank=0
|
|
*/
|
|
ORTE_PROGRESSED_WAIT(false, jdata->num_reported, jdata->num_procs);
|
|
|
|
(void) MPIR_Breakpoint();
|
|
|
|
/* send a message to rank=0 to release it */
|
|
OBJ_CONSTRUCT(&buf, opal_buffer_t); /* don't need anything in this */
|
|
rank0.jobid = jdata->jobid;
|
|
rank0.vpid = 0;
|
|
if (0 > (rc = orte_rml.send_buffer(&rank0, &buf, ORTE_RML_TAG_DEBUGGER_RELEASE, 0))) {
|
|
opal_output(0, "Error: could not send debugger release to MPI procs - error %s", ORTE_ERROR_NAME(rc));
|
|
}
|
|
OBJ_DESTRUCT(&buf);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Breakpoint function for parallel debuggers
|
|
*/
|
|
void *MPIR_Breakpoint(void)
|
|
{
|
|
return NULL;
|
|
}
|