1
1

Add support for DDT parallel debugger, which required several things:

* Making some symbols and types be global (vs. static) in orterun
 * Adding a "ddt" entry in the MCA parameter orte_base_user_debugger
   default value
 * Add support for @executable@, @executable_argv@, and @single_app@
   tokens in the orte_base_user_debugger MCA parameter.
 * Added various error checks and corresponding help messages after
   finding a debugger in the PATH

Fixes trac:1081

This commit was SVN r15323.

The following Trac tickets were found above:
  Ticket 1081 --> https://svn.open-mpi.org/trac/ompi/ticket/1081
Этот коммит содержится в:
Jeff Squyres 2007-07-10 12:53:48 +00:00
родитель a1bf04f39e
Коммит 64083570f5
6 изменённых файлов: 185 добавлений и 43 удалений

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -74,7 +75,7 @@ int orte_register_params(bool infrastructure)
mca_base_param_reg_string_name("orte", "base_user_debugger",
"Sequence of user-level debuggers to search for in orterun",
false, false, "totalview @mpirun@ -a @mpirun_args@ : fxp @mpirun@ -a @mpirun_args@", NULL);
false, false, "totalview @mpirun@ -a @mpirun_args@ : ddt -n @np@ -start @executable@ @executable_argv@ @single_app@ : fxp @mpirun@ -a @mpirun_args@", NULL);
mca_base_param_reg_int_name("orte", "abort_timeout",

Просмотреть файл

@ -233,4 +233,31 @@ when attempting to start process rank %lu.
[orterun:proc-failed-to-start-no-status]
%s was unable to start the specified application as it encountered an error
on node %s. More information may be available above.
#
[debugger requires -np]
The number of MPI processes to launch was not specified on the command
line.
The %s debugger requires that you specify a number of MPI processes to
launch on the command line via the "-np" command line parameter. For
example:
%s -np 4 %s
Skipping the %s debugger for now.
#
[debugger requires executable]
The %s debugger requires that you specify an executable on the %s
command line; you cannot specify application context files when
launching this job in the %s debugger. For example:
%s -np 4 my_mpi_executable
Skipping the %s debugger for now.
#
[debugger only accepts single app]
The %s debugger only accepts SPMD-style launching; specifying an
MPMD-style launch (with multiple applications separated via ':') is
not permitted.
Skipping the %s debugger for now.

Просмотреть файл

@ -109,34 +109,10 @@ static orte_std_cntr_t total_num_apps = 0;
static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT;
/*
* setup globals for catching orterun command line options
* Globals
*/
struct globals_t {
bool help;
bool version;
bool verbose;
bool quiet;
bool exit;
bool no_wait_for_job_completion;
bool by_node;
bool by_slot;
bool do_not_launch;
bool debugger;
int num_procs;
int exit_status;
char *hostfile;
char *env_val;
char *appfile;
char *wdir;
char *path;
bool preload_binary;
char* preload_files;
char* preload_files_dest_dir;
opal_mutex_t lock;
opal_condition_t cond;
} orterun_globals;
static bool globals_init = false;
struct globals_t orterun_globals;
bool globals_init = false;
opal_cmd_line_init_t cmd_line_init[] = {
/* Various "obvious" options */
@ -1069,13 +1045,12 @@ static int parse_globals(int argc, char* argv[], opal_cmd_line_t *cmd_line)
/* Do we want a user-level debugger? */
if (orterun_globals.debugger) {
orte_run_debugger(orterun_basename, argc, argv);
orte_run_debugger(orterun_basename, cmd_line, argc, argv);
}
/* Allocate and map by node or by slot? Shortcut for setting an
MCA param. */
/* JMS To be changed post-beta to LAM's C/N command line notation */
/* Don't initialize the MCA parameter here unless we have to,
* since it really should be initialized in rmaps_base_open */
if (orterun_globals.by_node || orterun_globals.by_slot) {

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -21,6 +22,58 @@
#include "orte_config.h"
#include "opal/threads/condition.h"
BEGIN_C_DECLS
/**
* Main body of orterun functionality
*/
int orterun(int argc, char *argv[]);
/**
* Global struct for catching orterun command line options.
*/
struct globals_t {
bool help;
bool version;
bool verbose;
bool quiet;
bool exit;
bool no_wait_for_job_completion;
bool by_node;
bool by_slot;
bool do_not_launch;
bool debugger;
int num_procs;
int exit_status;
char *hostfile;
char *env_val;
char *appfile;
char *wdir;
char *path;
bool preload_binary;
char *preload_files;
char *preload_files_dest_dir;
opal_mutex_t lock;
opal_condition_t cond;
};
/**
* Struct holding values gleaned from the orterun command line
*/
ORTE_DECLSPEC extern struct globals_t orterun_globals;
/**
* Whether orterun_globals has been initialized yet or not
*/
ORTE_DECLSPEC extern bool globals_init;
/**
* Struct holding list of allowable command line parameters
*/
ORTE_DECLSPEC extern opal_cmd_line_init_t cmd_line_init[];
END_C_DECLS
#endif /* ORTERUN_ORTERUN_H */

Просмотреть файл

@ -13,6 +13,7 @@
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -67,6 +68,7 @@
#include "orte/runtime/runtime.h"
#include "orte/runtime/params.h"
#include "orterun.h"
#include "totalview.h"
/* +++ begin MPICH/TotalView interface definitions */
@ -128,13 +130,16 @@ static void dump(void)
* look for that debugger in the path. If we find it, fill in
* new_argv.
*/
static int process(char *orig_line, char *basename, int argc, char **argv,
char ***new_argv)
static int process(char *orig_line, char *basename, opal_cmd_line_t *cmd_line,
int argc, char **argv, char ***new_argv)
{
int i;
char *line, *full_line = strdup(orig_line);
char *user_argv, *tmp, **tmp_argv;
char *user_argv, *tmp, *tmp2, **tmp_argv, **executable;
char cwd[PATH_MAX];
bool used_num_procs = false;
bool single_app = false;
bool fail_needed_executable = false;
line = full_line;
if (NULL == line) {
@ -153,6 +158,11 @@ static int process(char *orig_line, char *basename, int argc, char **argv,
return ORTE_ERROR;
}
/* Get the tail of the command line (i.e., the user executable /
argv) */
opal_cmd_line_get_tail(cmd_line, &i, &executable);
/* Remove --debug, --debugger, and -tv from the user command line
params */
@ -201,6 +211,44 @@ static int process(char *orig_line, char *basename, int argc, char **argv,
} else if (0 == strncmp(line + i, "@orterun_args@", 14)) {
line[i] = '\0';
asprintf(&tmp, "%s%s%s", line, user_argv, line + i + 14);
} else if (0 == strncmp(line + i, "@np@", 4)) {
line[i] = '\0';
asprintf(&tmp, "%s%d%s", line, orterun_globals.num_procs,
line + i + 4);
used_num_procs = true;
} else if (0 == strncmp(line + i, "@single_app@", 12)) {
line[i] = '\0';
/* This token is only a flag; it is not replaced with any
alternate text */
asprintf(&tmp, "%s%s", line, line + i + 12);
single_app = true;
} else if (0 == strncmp(line + i, "@executable@", 12)) {
line[i] = '\0';
/* If we found the executable, paste it in. Otherwise,
this is a possible error. */
if (NULL != executable) {
asprintf(&tmp, "%s%s%s", line, executable[0], line + i + 12);
} else {
fail_needed_executable = true;
}
} else if (0 == strncmp(line + i, "@executable_argv@", 17)) {
line[i] = '\0';
/* If we found the tail, paste in the argv. Otherwise,
this is a possible error. */
if (NULL != executable) {
if (NULL != executable[1]) {
/* Put in the argv */
tmp2 = opal_argv_join(executable + 1, ' ');
asprintf(&tmp, "%s%s%s", line, tmp2, line + i + 17);
free(tmp2);
} else {
/* There is no argv; just paste the front and back
together, removing the @token@ */
asprintf(&tmp, "%s%s", line, line + i + 17);
}
} else {
fail_needed_executable = true;
}
}
if (NULL != tmp) {
@ -221,7 +269,46 @@ static int process(char *orig_line, char *basename, int argc, char **argv,
tmp = opal_path_findv((*new_argv)[0], X_OK, environ, cwd);
if (NULL != tmp) {
free(tmp);
return ORTE_SUCCESS;
/* Ok, we found a good debugger. Check for some error
conditions. */
tmp = opal_argv_join(argv, ' ');
/* We do not support launching a debugger that requires the
-np value if the user did not specify -np on the command
line. */
if (used_num_procs && 0 == orterun_globals.num_procs) {
opal_show_help("help-orterun.txt", "debugger requires -np",
true, (*new_argv)[0], argv[0], user_argv,
(*new_argv)[0]);
/* Fall through to free / fail, below */
}
/* Some debuggers do not support launching MPMD */
else if (single_app && NULL != strchr(tmp, ':')) {
opal_show_help("help-orterun.txt",
"debugger only accepts single app", true,
(*new_argv)[0], (*new_argv)[0]);
/* Fall through to free / fail, below */
}
/* Some debuggers do not use orterun/mpirun, and therefore
must have an executable to run (e.g., cannot use mpirun's
app context file feature). */
else if (fail_needed_executable) {
opal_show_help("help-orterun.txt",
"debugger requires executable", true,
(*new_argv)[0], argv[0], (*new_argv)[0], argv[0],
(*new_argv)[0]);
/* Fall through to free / fail, below */
}
/* Otherwise, we succeeded. Return happiness. */
else {
free(tmp);
return ORTE_SUCCESS;
}
free(tmp);
}
/* All done -- didn't find it */
@ -234,7 +321,8 @@ static int process(char *orig_line, char *basename, int argc, char **argv,
/**
* Run a user-level debugger
*/
void orte_run_debugger(char *basename, int argc, char *argv[])
void orte_run_debugger(char *basename, opal_cmd_line_t *cmd_line,
int argc, char *argv[])
{
int i, id;
char **new_argv = NULL;
@ -262,7 +350,7 @@ void orte_run_debugger(char *basename, int argc, char *argv[])
lines = opal_argv_split(value, ':');
free(value);
for (i = 0; NULL != lines[i]; ++i) {
if (ORTE_SUCCESS == process(lines[i], basename, argc, argv,
if (ORTE_SUCCESS == process(lines[i], basename, cmd_line, argc, argv,
&new_argv)) {
break;
}

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -21,19 +22,16 @@
#include "orte_config.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
BEGIN_C_DECLS
void orte_run_debugger(char *basename, int argc, char *argv[]);
void orte_run_debugger(char *basename, opal_cmd_line_t *cmd_line,
int argc, char *argv[]);
void orte_totalview_init_before_spawn(void);
void orte_totalview_init_after_spawn(orte_jobid_t jobid);
void orte_totalview_finalize(void);
extern void *MPIR_Breakpoint(void);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
END_C_DECLS
#endif /* ORTERUN_TOTALVIEW_H */