/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
 * Copyright (c) 2004-2007 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2007-2008 Cisco Systems, Inc.  All rights resereved.
 * $COPYRIGHT$
 * 
 * Additional copyrights may follow
 * 
 * $HEADER$
 */

/*
 * MPI portion of debugger support: initially based on the
 * TotalView/Etnus API for debuggers to attach to MPI jobs.
 *
 * There is a lengthy explanation of how OMPI handles parallel
 * debuggers attaching to MPI jobs in orte/tools/orterun/debuggers.c.
 */

#include "ompi_config.h"

#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif  /* HAVE_UNISTD_H */
#ifdef HAVE_DIRENT_H
#include <dirent.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif

#include "opal/mca/base/base.h"
#include "opal/util/argv.h"
#include "opal/mca/installdirs/installdirs.h"
#include "debuggers.h"
/**
 * A lot of include files that are required by al optimized builds in order
 * to get access to the type information. Beware, this file have to always
 * be compiled with the -g flag, otherwise the type information will be
 * missing and the parallel debuggers will be unable to initialize the
 * Open MPI debug library.
 */
#include "opal/class/opal_list.h"

#include "ompi/class/ompi_free_list.h"
#include "ompi/request/request.h"
#include "ompi/mca/pml/base/pml_base_request.h"
#include "ompi/mca/pml/base/pml_base_sendreq.h"
#include "ompi/mca/pml/base/pml_base_recvreq.h"
#include "opal/class/opal_pointer_array.h"
#include "ompi/communicator/communicator.h"
#include "ompi/group/group.h"
#include "ompi/datatype/datatype.h"
#include "ompi/include/mpi.h"

#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rml/rml.h"
#include "orte/runtime/orte_globals.h"

#if defined(OMPI_MSGQ_DLL)
/* This variable is old/deprecated -- the mpimsgq_dll_locations[]
   method is preferred because it's more flexible */
OMPI_DECLSPEC char MPIR_dll_name[] = OMPI_MSGQ_DLL;
#endif  /* defined(OMPI_MSGQ_DLL) */
OMPI_DECLSPEC char **mpidbg_dll_locations = NULL;
OMPI_DECLSPEC char **mpimsgq_dll_locations = NULL;

OMPI_DECLSPEC int MPIR_debug_typedefs_sizeof[] = {
    sizeof(short),
    sizeof(int),
    sizeof(long),
    sizeof(long long),
    sizeof(void*),
    sizeof(bool),
    sizeof(size_t)
};
    
/*
 * Values defined by the standardized interface; do not change these
 * values
 */
#define MPIR_DEBUG_SPAWNED   1
#define MPIR_DEBUG_ABORTING  2

/**
 * There is an issue with the debugger running on different architectures
 * compared with the debugged program. We need to know the sizes of the types
 * on the debugged program. The problem is that the size depend on the
 * compilation options (32 or 64 bits) and on the compiler. Therefore,
 * the simplest and more accurate way is to export these sizes directly from
 * the debugged program.
 */
OMPI_DECLSPEC opal_list_item_t* opal_list_item_t_type_inclusion = NULL;
OMPI_DECLSPEC opal_list_t* opal_list_t_type_inclusion = NULL;
OMPI_DECLSPEC ompi_free_list_item_t* ompi_free_list_item_t_type_inclusion = NULL;
OMPI_DECLSPEC ompi_free_list_t* ompi_free_list_t_type_inclusion = NULL;
OMPI_DECLSPEC ompi_request_t* ompi_request_t_type_inclusion = NULL;
OMPI_DECLSPEC mca_pml_base_request_t* mca_pml_base_request_t_type_inclusion = NULL;
OMPI_DECLSPEC mca_pml_base_send_request_t* mca_pml_base_send_request_t_type_inclusion = NULL;
OMPI_DECLSPEC mca_pml_base_recv_request_t* mca_pml_base_recv_request_t_type_inclusion = NULL;
OMPI_DECLSPEC opal_pointer_array_t* opal_pointer_array_t_type_inclusion = NULL;
OMPI_DECLSPEC ompi_communicator_t* ompi_communicator_t_type_inclusion = NULL;
OMPI_DECLSPEC ompi_group_t* ompi_group_t_type_inclusion = NULL;
OMPI_DECLSPEC ompi_status_public_t* ompi_status_public_t_type_inclusion = NULL;
OMPI_DECLSPEC ompi_datatype_t* ompi_datatype_t_type_inclusion = NULL;

OMPI_DECLSPEC volatile int MPIR_debug_gate = 0;
OMPI_DECLSPEC volatile int MPIR_being_debugged = 0;
OMPI_DECLSPEC volatile int MPIR_debug_state = 0;
OMPI_DECLSPEC char *MPIR_debug_abort_string = "";

/* Check for a file in few direct ways for portability */
static void check(char *dir, char *file, char **locations) 
{
    char *str;

    asprintf(&str, "%s/%s.so", dir, file);
    
#if defined(HAVE_SYS_STAT_H)
    {
        struct stat buf;
        
        /* Use stat() */
        if (0 == stat(str, &buf)) {
            opal_argv_append_nosize(&locations, file);
        }
    }
#else
    {
        FILE *fp;
        
        /* Just try to open the file */
        if (NULL != (fp = fopen(str, "r"))) {
            fclose(fp);
            opal_argv_append_nosize(&locations, file);
        }
    }
#endif /* defined(HAVE_SYS_STAT_H) */

    free(str);
}


/*
 * Wait for a debugger if asked.  We support two ways of waiting for
 * attaching debuggers -- see big comment in
 * orte/tools/orterun/debuggers.c explaning the two scenarios.
 */
void ompi_wait_for_debugger(void)
{
    int i, debugger, rc;
    char *a, *b, **dirs;
    opal_buffer_t buf;


    /* See lengthy comment in orte/tools/orterun/debuggers.c about
       orte_in_parallel_debugger */
#if ORTE_DISABLE_FULL_SUPPORT
    debugger = 0;
#else
    debugger = orte_in_parallel_debugger;
#endif

    /* Add in environment variables for other launchers, such as yod,
       srun, ...etc. */
    if (1 == MPIR_being_debugged) {
        debugger = 1;
    } else if (NULL != getenv("yod_you_are_being_debugged")) {
        debugger = 1;
    }
    if (1 == MPIR_being_debugged) {
        debugger = 1;
    }
    
    if (!debugger) {
        /* if not, just return */
        return;
    }
    
    /* if we are being debugged, then we need to find
     * the correct plug-ins
     */
    a = strdup(opal_install_dirs.pkglibdir);
    mca_base_param_reg_string_name("ompi",
                                   "debugger_dll_path",
                                   "List of directories where MPI_INIT should search for debugger plugins",
                                   false, false, a, &b);
    free(a);
    
    /* Search the directory for MPI debugger DLLs */
    if (NULL != b) {
        dirs = opal_argv_split(b, ':');
        for (i = 0; dirs[i] != NULL; ++i) {
            check(dirs[i], OMPI_MPIHANDLES_DLL_PREFIX, mpidbg_dll_locations);
            check(dirs[i], OMPI_MSGQ_DLL_PREFIX, mpimsgq_dll_locations);
        }
    }

    if (ORTE_DISABLE_FULL_SUPPORT) {
        /* spin until debugger attaches and releases us */
        while (MPIR_debug_gate == 0) {
#if defined(__WINDOWS__)
            Sleep(100);     /* milliseconds */
#elif defined(HAVE_USLEEP)
            usleep(100000); /* microseconds */
#else
            sleep(1);       /* seconds */
#endif
        }
    } else {
    
        /* only the rank=0 proc waits for either a message from the
         * HNP or for the debugger to attach - everyone else will just
         * spin in * the grpcomm barrier in ompi_mpi_init until rank=0
         * joins them.
         */
        if (0 != ORTE_PROC_MY_NAME->vpid) {
            return;
        }
    
        /* VPID 0 waits for a message from the HNP */
        OBJ_CONSTRUCT(&buf, opal_buffer_t);
        rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &buf, 
                                  ORTE_RML_TAG_DEBUGGER_RELEASE, 0);
        OBJ_DESTRUCT(&buf);  /* don't care about contents of message */
        if (rc < 0) {
            /* if it failed for some reason, then we are in trouble -
             * for now, just report the problem and give up waiting
             */
            opal_output(0, "Debugger_attach[rank=%ld]: could not wait for debugger - error %s!",
                        (long)ORTE_PROC_MY_NAME->vpid, ORTE_ERROR_NAME(rc));
        }
    }
}    

/*
 * Breakpoint function for parallel debuggers.  This function is also
 * defined in orterun for the starter.  It should never conflict with
 * this one, but we'll make it static, just to be sure.
 */
static void *MPIR_Breakpoint(void)
{
    return NULL;
}

/*
 * Tell the debugger that we are about to abort
 */
void ompi_debugger_notify_abort(char *reason)
{
    MPIR_debug_state = MPIR_DEBUG_ABORTING;

    if (NULL != reason && strlen(reason) > 0) {
        MPIR_debug_abort_string = reason;
    } else {
        MPIR_debug_abort_string = "Unknown";
    }

    /* Now tell the debugger */
    MPIR_Breakpoint();
}