2007-12-21 06:02:00 +00:00
|
|
|
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
2005-08-31 20:35:15 +00:00
|
|
|
/*
|
2005-11-05 19:57:48 +00:00
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
2008-11-06 00:00:15 +00:00
|
|
|
* Copyright (c) 2004-2008 The University of Tennessee and The University
|
2005-11-05 19:57:48 +00:00
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2005-08-31 20:35:15 +00:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2011-12-15 18:40:25 +00:00
|
|
|
* Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved.
|
2013-01-27 23:25:10 +00:00
|
|
|
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
|
|
|
* All rights reserved.
|
2005-08-31 20:35:15 +00:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
2008-07-31 22:11:46 +00:00
|
|
|
/*
|
2008-03-05 12:22:34 +00:00
|
|
|
* MPI portion of debugger support: initially based on the
|
|
|
|
* TotalView/Etnus API for debuggers to attach to MPI jobs.
|
2008-07-31 22:11:46 +00:00
|
|
|
*
|
|
|
|
* There is a lengthy explanation of how OMPI handles parallel
|
|
|
|
* debuggers attaching to MPI jobs in orte/tools/orterun/debuggers.c.
|
2005-08-31 20:35:15 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include "ompi_config.h"
|
|
|
|
|
2005-12-10 22:36:40 +00:00
|
|
|
#ifdef HAVE_UNISTD_H
|
2005-08-31 20:35:15 +00:00
|
|
|
#include <unistd.h>
|
2005-12-10 22:36:40 +00:00
|
|
|
#endif /* HAVE_UNISTD_H */
|
2008-03-05 12:22:34 +00:00
|
|
|
#ifdef HAVE_DIRENT_H
|
|
|
|
#include <dirent.h>
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_SYS_TYPES_H
|
|
|
|
#include <sys/types.h>
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_SYS_STAT_H
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_UNISTD_H
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif
|
2005-08-31 20:35:15 +00:00
|
|
|
|
|
|
|
#include "opal/mca/base/base.h"
|
2008-03-05 12:22:34 +00:00
|
|
|
#include "opal/util/argv.h"
|
|
|
|
#include "opal/mca/installdirs/installdirs.h"
|
2005-08-31 20:35:15 +00:00
|
|
|
#include "debuggers.h"
|
2013-01-27 23:25:10 +00:00
|
|
|
#include "ompi/mca/rte/rte.h"
|
2007-10-25 16:47:54 +00:00
|
|
|
/**
|
2008-11-06 00:00:15 +00:00
|
|
|
* BEWARE: The following headers are required by optimized builds in order
|
|
|
|
* to get access to the type information. Some compilers remove all type
|
|
|
|
* information on optimized build, and as a result we are unable to access
|
|
|
|
* the fields structure (i.e. to get their displacement). This file is
|
|
|
|
* included in the optimized build just to provide us with this missing
|
|
|
|
* informations. Therefore, it always have to be compiled with the -g flag,
|
|
|
|
* otherwise the type information will be missing and the parallel
|
|
|
|
* debuggers will be unable to initialize the Open MPI debug library.
|
2007-10-25 16:47:54 +00:00
|
|
|
*/
|
|
|
|
#include "opal/class/opal_list.h"
|
|
|
|
#include "ompi/class/ompi_free_list.h"
|
|
|
|
#include "ompi/request/request.h"
|
|
|
|
#include "ompi/mca/pml/base/pml_base_request.h"
|
|
|
|
#include "ompi/mca/pml/base/pml_base_sendreq.h"
|
|
|
|
#include "ompi/mca/pml/base/pml_base_recvreq.h"
|
2007-12-21 06:02:00 +00:00
|
|
|
#include "opal/class/opal_pointer_array.h"
|
2007-10-25 16:47:54 +00:00
|
|
|
#include "ompi/communicator/communicator.h"
|
2009-10-28 19:12:20 +00:00
|
|
|
#include "ompi/mca/topo/topo.h"
|
2007-10-25 16:47:54 +00:00
|
|
|
#include "ompi/group/group.h"
|
2009-10-28 19:12:20 +00:00
|
|
|
#include "opal/datatype/opal_datatype.h"
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 04:56:31 +00:00
|
|
|
#include "ompi/datatype/ompi_datatype.h"
|
2007-10-25 16:47:54 +00:00
|
|
|
#include "ompi/include/mpi.h"
|
2005-08-31 20:35:15 +00:00
|
|
|
|
2008-03-05 12:22:34 +00:00
|
|
|
#if defined(OMPI_MSGQ_DLL)
|
|
|
|
/* This variable is old/deprecated -- the mpimsgq_dll_locations[]
|
|
|
|
method is preferred because it's more flexible */
|
|
|
|
OMPI_DECLSPEC char MPIR_dll_name[] = OMPI_MSGQ_DLL;
|
|
|
|
#endif /* defined(OMPI_MSGQ_DLL) */
|
|
|
|
OMPI_DECLSPEC char **mpidbg_dll_locations = NULL;
|
|
|
|
OMPI_DECLSPEC char **mpimsgq_dll_locations = NULL;
|
2006-09-18 17:32:04 +00:00
|
|
|
|
2006-10-04 20:01:33 +00:00
|
|
|
OMPI_DECLSPEC int MPIR_debug_typedefs_sizeof[] = {
|
|
|
|
sizeof(short),
|
|
|
|
sizeof(int),
|
|
|
|
sizeof(long),
|
|
|
|
sizeof(long long),
|
2007-08-16 04:33:04 +00:00
|
|
|
sizeof(void*),
|
|
|
|
sizeof(bool),
|
2006-10-04 20:01:33 +00:00
|
|
|
sizeof(size_t)
|
|
|
|
};
|
|
|
|
|
2008-09-20 11:34:37 +00:00
|
|
|
/*
|
|
|
|
* Values defined by the standardized interface; do not change these
|
|
|
|
* values
|
|
|
|
*/
|
|
|
|
#define MPIR_DEBUG_SPAWNED 1
|
|
|
|
#define MPIR_DEBUG_ABORTING 2
|
|
|
|
|
2007-10-25 16:47:54 +00:00
|
|
|
/**
|
2008-11-06 00:00:15 +00:00
|
|
|
* BEWARE: Try to outsmart some compilers. In some cases, when variables
|
|
|
|
* are defined but not used, some compilers will optimized them out from
|
|
|
|
* the build. As we need to be able to access the structure sizes from the
|
|
|
|
* debugged program (in the case where the MPI library and the application
|
|
|
|
* is compiled with a different architecture flag than the parallel
|
|
|
|
* debugger, 32 vs. 64 bits), we have to have these variables defined.
|
2007-10-25 16:47:54 +00:00
|
|
|
*/
|
2008-11-06 00:00:15 +00:00
|
|
|
OMPI_DECLSPEC opal_list_item_t* opal_list_item_t_type_force_inclusion = NULL;
|
|
|
|
OMPI_DECLSPEC opal_list_t* opal_list_t_type_force_inclusion = NULL;
|
|
|
|
OMPI_DECLSPEC ompi_free_list_item_t* ompi_free_list_item_t_type_force_inclusion = NULL;
|
|
|
|
OMPI_DECLSPEC ompi_free_list_t* ompi_free_list_t_type_force_inclusion = NULL;
|
|
|
|
OMPI_DECLSPEC ompi_request_t* ompi_request_t_type_force_inclusion = NULL;
|
|
|
|
OMPI_DECLSPEC mca_pml_base_request_t* mca_pml_base_request_t_type_force_inclusion = NULL;
|
|
|
|
OMPI_DECLSPEC mca_pml_base_send_request_t* mca_pml_base_send_request_t_type_force_inclusion = NULL;
|
|
|
|
OMPI_DECLSPEC mca_pml_base_recv_request_t* mca_pml_base_recv_request_t_type_force_inclusion = NULL;
|
|
|
|
OMPI_DECLSPEC opal_pointer_array_t* opal_pointer_array_t_type_force_inclusion = NULL;
|
|
|
|
OMPI_DECLSPEC ompi_communicator_t* ompi_communicator_t_type_force_inclusion = NULL;
|
|
|
|
OMPI_DECLSPEC ompi_group_t* ompi_group_t_type_force_inclusion = NULL;
|
|
|
|
OMPI_DECLSPEC ompi_status_public_t* ompi_status_public_t_type_force_inclusion = NULL;
|
2009-10-28 19:12:20 +00:00
|
|
|
OMPI_DECLSPEC opal_datatype_t* opal_datatype_t_type_force_inclusion = NULL;
|
2008-11-06 00:00:15 +00:00
|
|
|
OMPI_DECLSPEC ompi_datatype_t* ompi_datatype_t_type_force_inclusion = NULL;
|
2007-10-25 16:47:54 +00:00
|
|
|
|
2008-07-31 22:11:46 +00:00
|
|
|
OMPI_DECLSPEC volatile int MPIR_debug_gate = 0;
|
2012-01-11 15:53:09 +00:00
|
|
|
OMPI_DECLSPEC volatile int MPIR_being_debugged = 0;
|
|
|
|
OMPI_DECLSPEC volatile int MPIR_debug_state = 0;
|
2008-09-20 11:34:37 +00:00
|
|
|
OMPI_DECLSPEC char *MPIR_debug_abort_string = "";
|
2008-06-18 03:15:56 +00:00
|
|
|
|
2013-03-27 21:09:41 +00:00
|
|
|
static char *ompi_debugger_dll_path = NULL;
|
|
|
|
|
2008-06-18 21:33:08 +00:00
|
|
|
/* Check for a file in few direct ways for portability */
|
2008-03-05 12:22:34 +00:00
|
|
|
static void check(char *dir, char *file, char **locations)
|
|
|
|
{
|
|
|
|
char *str;
|
|
|
|
|
|
|
|
asprintf(&str, "%s/%s.so", dir, file);
|
|
|
|
|
|
|
|
#if defined(HAVE_SYS_STAT_H)
|
|
|
|
{
|
|
|
|
struct stat buf;
|
|
|
|
|
|
|
|
/* Use stat() */
|
|
|
|
if (0 == stat(str, &buf)) {
|
|
|
|
opal_argv_append_nosize(&locations, file);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
{
|
|
|
|
FILE *fp;
|
|
|
|
|
|
|
|
/* Just try to open the file */
|
|
|
|
if (NULL != (fp = fopen(str, "r"))) {
|
|
|
|
fclose(fp);
|
|
|
|
opal_argv_append_nosize(&locations, file);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* defined(HAVE_SYS_STAT_H) */
|
|
|
|
|
|
|
|
free(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-01-27 23:25:10 +00:00
|
|
|
extern void
|
|
|
|
ompi_debugger_setup_dlls(void)
|
2005-08-31 20:35:15 +00:00
|
|
|
{
|
2013-01-27 23:25:10 +00:00
|
|
|
int i;
|
2013-03-27 21:09:41 +00:00
|
|
|
char **dirs, **tmp1 = NULL, **tmp2 = NULL;
|
2005-08-31 20:35:15 +00:00
|
|
|
|
2013-03-27 21:09:41 +00:00
|
|
|
ompi_debugger_dll_path = opal_install_dirs.pkglibdir;
|
|
|
|
(void) mca_base_var_register("ompi", "ompi", "debugger", "dll_path",
|
|
|
|
"List of directories where MPI_INIT should search for debugger plugins",
|
|
|
|
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
|
|
|
OPAL_INFO_LVL_9,
|
|
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
|
|
&ompi_debugger_dll_path);
|
2008-06-18 15:28:46 +00:00
|
|
|
|
2008-03-05 12:22:34 +00:00
|
|
|
/* Search the directory for MPI debugger DLLs */
|
2013-03-27 21:09:41 +00:00
|
|
|
if (NULL != ompi_debugger_dll_path) {
|
|
|
|
dirs = opal_argv_split(ompi_debugger_dll_path, ':');
|
2008-03-05 12:22:34 +00:00
|
|
|
for (i = 0; dirs[i] != NULL; ++i) {
|
2009-03-02 21:29:52 +00:00
|
|
|
check(dirs[i], OMPI_MPIHANDLES_DLL_PREFIX, tmp1);
|
|
|
|
check(dirs[i], OMPI_MSGQ_DLL_PREFIX, tmp2);
|
2008-03-05 12:22:34 +00:00
|
|
|
}
|
2013-03-15 15:04:35 +00:00
|
|
|
opal_argv_free(dirs);
|
2008-03-05 12:22:34 +00:00
|
|
|
}
|
2008-06-18 21:33:08 +00:00
|
|
|
|
2009-03-02 21:29:52 +00:00
|
|
|
/* Now that we have a full list of directories, assign the argv
|
|
|
|
arrays to the global variables (since the debugger may read the
|
|
|
|
global variables at any time, we want to ensure that they have
|
|
|
|
non-NULL values only when the entire array is ready). */
|
|
|
|
mpimsgq_dll_locations = tmp1;
|
|
|
|
mpidbg_dll_locations = tmp2;
|
2013-01-27 23:25:10 +00:00
|
|
|
}
|
2009-03-02 21:29:52 +00:00
|
|
|
|
2008-09-20 11:34:37 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Tell the debugger that we are about to abort
|
|
|
|
*/
|
|
|
|
void ompi_debugger_notify_abort(char *reason)
|
|
|
|
{
|
|
|
|
MPIR_debug_state = MPIR_DEBUG_ABORTING;
|
|
|
|
|
|
|
|
if (NULL != reason && strlen(reason) > 0) {
|
|
|
|
MPIR_debug_abort_string = reason;
|
|
|
|
} else {
|
|
|
|
MPIR_debug_abort_string = "Unknown";
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now tell the debugger */
|
|
|
|
MPIR_Breakpoint();
|
|
|
|
}
|
2012-01-11 15:53:09 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Breakpoint function for parallel debuggers. This function is also
|
|
|
|
* defined in orterun for the starter. It should never conflict with
|
|
|
|
* this
|
|
|
|
*/
|
|
|
|
void* MPIR_Breakpoint(void)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|