1
1

Add in the standardized way to notify a debugger if the MPI job is

about to abort.  Fixes trac:1509.

This commit was SVN r19596.

The following Trac tickets were found above:
  Ticket 1509 --> https://svn.open-mpi.org/trac/ompi/ticket/1509
This commit is contained in:
Jeff Squyres 2008-09-20 11:34:37 +00:00
parent 16561fa297
commit 5fd742e769
4 changed files with 61 additions and 4 deletions

View File

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -35,6 +35,11 @@ BEGIN_C_DECLS
*/
OMPI_DECLSPEC void ompi_wait_for_debugger(void);
/**
* Notify a debugger that we're about to abort
*/
OMPI_DECLSPEC void ompi_debugger_notify_abort(char *string);
END_C_DECLS
#endif /* OMPI_DEBUGGERS_H */

View File

@ -90,6 +90,13 @@ OMPI_DECLSPEC int MPIR_debug_typedefs_sizeof[] = {
sizeof(size_t)
};
/*
* Values defined by the standardized interface; do not change these
* values
*/
#define MPIR_DEBUG_SPAWNED 1
#define MPIR_DEBUG_ABORTING 2
/**
* There is an issue with the debugger running on different architectures
* compared with the debugged program. We need to know the sizes of the types
@ -114,6 +121,8 @@ OMPI_DECLSPEC ompi_datatype_t* ompi_datatype_t_type_inclusion = NULL;
OMPI_DECLSPEC volatile int MPIR_debug_gate = 0;
OMPI_DECLSPEC volatile int MPIR_being_debugged = 0;
OMPI_DECLSPEC volatile int MPIR_debug_state = 0;
OMPI_DECLSPEC char *MPIR_debug_abort_string = "";
/* Check for a file in few direct ways for portability */
static void check(char *dir, char *file, char **locations)
@ -238,3 +247,30 @@ void ompi_wait_for_debugger(void)
}
}
}
/*
* Breakpoint function for parallel debuggers. This function is also
* defined in orterun for the starter. It should never conflict with
* this one, but we'll make it static, just to be sure.
*/
static void *MPIR_Breakpoint(void)
{
return NULL;
}
/*
* Tell the debugger that we are about to abort
*/
void ompi_debugger_notify_abort(char *reason)
{
MPIR_debug_state = MPIR_DEBUG_ABORTING;
if (NULL != reason && strlen(reason) > 0) {
MPIR_debug_abort_string = reason;
} else {
MPIR_debug_abort_string = "Unknown";
}
/* Now tell the debugger */
MPIR_Breakpoint();
}

View File

@ -10,6 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 University of Houston. All rights reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -213,5 +214,5 @@ static void backend_fatal(char *type, struct ompi_communicator_t *comm,
comm = &ompi_mpi_comm_self;
}
ompi_mpi_abort(comm, 1, false);
ompi_mpi_abort(comm, *error_code, false);
}

View File

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -42,6 +42,8 @@
#include "ompi/proc/proc.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/runtime/params.h"
#include "ompi/debuggers/debuggers.h"
#include "ompi/errhandler/errcode.h"
static bool have_been_invoked = false;
@ -51,7 +53,7 @@ ompi_mpi_abort(struct ompi_communicator_t* comm,
bool kill_remote_of_intercomm)
{
int count = 0, i;
char *host, hostname[MAXHOSTNAMELEN];
char *msg, *host, hostname[MAXHOSTNAMELEN];
pid_t pid = 0;
orte_process_name_t *abort_procs;
orte_std_cntr_t nabort_procs;
@ -94,6 +96,19 @@ ompi_mpi_abort(struct ompi_communicator_t* comm,
}
}
/* Notify the debugger that we're about to abort */
if (asprintf(&msg, "[%s:%d] aborting with MPI error %s%s",
host, (int) pid, ompi_mpi_errnum_get_string(errcode),
ompi_mpi_abort_print_stack ?
" (stack trace available on stderr)" : "") < 0) {
msg = NULL;
}
ompi_debugger_notify_abort(msg);
if (NULL != msg) {
free(msg);
}
/* Should we wait for a while before aborting? */
if (0 != ompi_mpi_abort_delay) {