Add support for DDT parallel debugger, which required several things:
* Making some symbols and types be global (vs. static) in orterun * Adding a "ddt" entry in the MCA parameter orte_base_user_debugger default value * Add support for @executable@, @executable_argv@, and @single_app@ tokens in the orte_base_user_debugger MCA parameter. * Added various error checks and corresponding help messages after finding a debugger in the PATH Fixes trac:1081 This commit was SVN r15323. The following Trac tickets were found above: Ticket 1081 --> https://svn.open-mpi.org/trac/ompi/ticket/1081
Этот коммит содержится в:
родитель
a1bf04f39e
Коммит
64083570f5
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -74,7 +75,7 @@ int orte_register_params(bool infrastructure)
|
||||
|
||||
mca_base_param_reg_string_name("orte", "base_user_debugger",
|
||||
"Sequence of user-level debuggers to search for in orterun",
|
||||
false, false, "totalview @mpirun@ -a @mpirun_args@ : fxp @mpirun@ -a @mpirun_args@", NULL);
|
||||
false, false, "totalview @mpirun@ -a @mpirun_args@ : ddt -n @np@ -start @executable@ @executable_argv@ @single_app@ : fxp @mpirun@ -a @mpirun_args@", NULL);
|
||||
|
||||
|
||||
mca_base_param_reg_int_name("orte", "abort_timeout",
|
||||
|
@ -233,4 +233,31 @@ when attempting to start process rank %lu.
|
||||
[orterun:proc-failed-to-start-no-status]
|
||||
%s was unable to start the specified application as it encountered an error
|
||||
on node %s. More information may be available above.
|
||||
#
|
||||
[debugger requires -np]
|
||||
The number of MPI processes to launch was not specified on the command
|
||||
line.
|
||||
|
||||
The %s debugger requires that you specify a number of MPI processes to
|
||||
launch on the command line via the "-np" command line parameter. For
|
||||
example:
|
||||
|
||||
%s -np 4 %s
|
||||
|
||||
Skipping the %s debugger for now.
|
||||
#
|
||||
[debugger requires executable]
|
||||
The %s debugger requires that you specify an executable on the %s
|
||||
command line; you cannot specify application context files when
|
||||
launching this job in the %s debugger. For example:
|
||||
|
||||
%s -np 4 my_mpi_executable
|
||||
|
||||
Skipping the %s debugger for now.
|
||||
#
|
||||
[debugger only accepts single app]
|
||||
The %s debugger only accepts SPMD-style launching; specifying an
|
||||
MPMD-style launch (with multiple applications separated via ':') is
|
||||
not permitted.
|
||||
|
||||
Skipping the %s debugger for now.
|
||||
|
@ -109,34 +109,10 @@ static orte_std_cntr_t total_num_apps = 0;
|
||||
static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT;
|
||||
|
||||
/*
|
||||
* setup globals for catching orterun command line options
|
||||
* Globals
|
||||
*/
|
||||
struct globals_t {
|
||||
bool help;
|
||||
bool version;
|
||||
bool verbose;
|
||||
bool quiet;
|
||||
bool exit;
|
||||
bool no_wait_for_job_completion;
|
||||
bool by_node;
|
||||
bool by_slot;
|
||||
bool do_not_launch;
|
||||
bool debugger;
|
||||
int num_procs;
|
||||
int exit_status;
|
||||
char *hostfile;
|
||||
char *env_val;
|
||||
char *appfile;
|
||||
char *wdir;
|
||||
char *path;
|
||||
bool preload_binary;
|
||||
char* preload_files;
|
||||
char* preload_files_dest_dir;
|
||||
opal_mutex_t lock;
|
||||
opal_condition_t cond;
|
||||
} orterun_globals;
|
||||
static bool globals_init = false;
|
||||
|
||||
struct globals_t orterun_globals;
|
||||
bool globals_init = false;
|
||||
|
||||
opal_cmd_line_init_t cmd_line_init[] = {
|
||||
/* Various "obvious" options */
|
||||
@ -1069,13 +1045,12 @@ static int parse_globals(int argc, char* argv[], opal_cmd_line_t *cmd_line)
|
||||
/* Do we want a user-level debugger? */
|
||||
|
||||
if (orterun_globals.debugger) {
|
||||
orte_run_debugger(orterun_basename, argc, argv);
|
||||
orte_run_debugger(orterun_basename, cmd_line, argc, argv);
|
||||
}
|
||||
|
||||
/* Allocate and map by node or by slot? Shortcut for setting an
|
||||
MCA param. */
|
||||
|
||||
/* JMS To be changed post-beta to LAM's C/N command line notation */
|
||||
/* Don't initialize the MCA parameter here unless we have to,
|
||||
* since it really should be initialized in rmaps_base_open */
|
||||
if (orterun_globals.by_node || orterun_globals.by_slot) {
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -21,6 +22,58 @@
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/threads/condition.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* Main body of orterun functionality
|
||||
*/
|
||||
int orterun(int argc, char *argv[]);
|
||||
|
||||
/**
|
||||
* Global struct for catching orterun command line options.
|
||||
*/
|
||||
struct globals_t {
|
||||
bool help;
|
||||
bool version;
|
||||
bool verbose;
|
||||
bool quiet;
|
||||
bool exit;
|
||||
bool no_wait_for_job_completion;
|
||||
bool by_node;
|
||||
bool by_slot;
|
||||
bool do_not_launch;
|
||||
bool debugger;
|
||||
int num_procs;
|
||||
int exit_status;
|
||||
char *hostfile;
|
||||
char *env_val;
|
||||
char *appfile;
|
||||
char *wdir;
|
||||
char *path;
|
||||
bool preload_binary;
|
||||
char *preload_files;
|
||||
char *preload_files_dest_dir;
|
||||
opal_mutex_t lock;
|
||||
opal_condition_t cond;
|
||||
};
|
||||
|
||||
/**
|
||||
* Struct holding values gleaned from the orterun command line
|
||||
*/
|
||||
ORTE_DECLSPEC extern struct globals_t orterun_globals;
|
||||
|
||||
/**
|
||||
* Whether orterun_globals has been initialized yet or not
|
||||
*/
|
||||
ORTE_DECLSPEC extern bool globals_init;
|
||||
|
||||
/**
|
||||
* Struct holding list of allowable command line parameters
|
||||
*/
|
||||
ORTE_DECLSPEC extern opal_cmd_line_init_t cmd_line_init[];
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* ORTERUN_ORTERUN_H */
|
||||
|
@ -13,6 +13,7 @@
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -67,6 +68,7 @@
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/params.h"
|
||||
|
||||
#include "orterun.h"
|
||||
#include "totalview.h"
|
||||
|
||||
/* +++ begin MPICH/TotalView interface definitions */
|
||||
@ -128,13 +130,16 @@ static void dump(void)
|
||||
* look for that debugger in the path. If we find it, fill in
|
||||
* new_argv.
|
||||
*/
|
||||
static int process(char *orig_line, char *basename, int argc, char **argv,
|
||||
char ***new_argv)
|
||||
static int process(char *orig_line, char *basename, opal_cmd_line_t *cmd_line,
|
||||
int argc, char **argv, char ***new_argv)
|
||||
{
|
||||
int i;
|
||||
char *line, *full_line = strdup(orig_line);
|
||||
char *user_argv, *tmp, **tmp_argv;
|
||||
char *user_argv, *tmp, *tmp2, **tmp_argv, **executable;
|
||||
char cwd[PATH_MAX];
|
||||
bool used_num_procs = false;
|
||||
bool single_app = false;
|
||||
bool fail_needed_executable = false;
|
||||
|
||||
line = full_line;
|
||||
if (NULL == line) {
|
||||
@ -153,6 +158,11 @@ static int process(char *orig_line, char *basename, int argc, char **argv,
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/* Get the tail of the command line (i.e., the user executable /
|
||||
argv) */
|
||||
|
||||
opal_cmd_line_get_tail(cmd_line, &i, &executable);
|
||||
|
||||
/* Remove --debug, --debugger, and -tv from the user command line
|
||||
params */
|
||||
|
||||
@ -201,6 +211,44 @@ static int process(char *orig_line, char *basename, int argc, char **argv,
|
||||
} else if (0 == strncmp(line + i, "@orterun_args@", 14)) {
|
||||
line[i] = '\0';
|
||||
asprintf(&tmp, "%s%s%s", line, user_argv, line + i + 14);
|
||||
} else if (0 == strncmp(line + i, "@np@", 4)) {
|
||||
line[i] = '\0';
|
||||
asprintf(&tmp, "%s%d%s", line, orterun_globals.num_procs,
|
||||
line + i + 4);
|
||||
used_num_procs = true;
|
||||
} else if (0 == strncmp(line + i, "@single_app@", 12)) {
|
||||
line[i] = '\0';
|
||||
/* This token is only a flag; it is not replaced with any
|
||||
alternate text */
|
||||
asprintf(&tmp, "%s%s", line, line + i + 12);
|
||||
single_app = true;
|
||||
} else if (0 == strncmp(line + i, "@executable@", 12)) {
|
||||
line[i] = '\0';
|
||||
/* If we found the executable, paste it in. Otherwise,
|
||||
this is a possible error. */
|
||||
if (NULL != executable) {
|
||||
asprintf(&tmp, "%s%s%s", line, executable[0], line + i + 12);
|
||||
} else {
|
||||
fail_needed_executable = true;
|
||||
}
|
||||
} else if (0 == strncmp(line + i, "@executable_argv@", 17)) {
|
||||
line[i] = '\0';
|
||||
/* If we found the tail, paste in the argv. Otherwise,
|
||||
this is a possible error. */
|
||||
if (NULL != executable) {
|
||||
if (NULL != executable[1]) {
|
||||
/* Put in the argv */
|
||||
tmp2 = opal_argv_join(executable + 1, ' ');
|
||||
asprintf(&tmp, "%s%s%s", line, tmp2, line + i + 17);
|
||||
free(tmp2);
|
||||
} else {
|
||||
/* There is no argv; just paste the front and back
|
||||
together, removing the @token@ */
|
||||
asprintf(&tmp, "%s%s", line, line + i + 17);
|
||||
}
|
||||
} else {
|
||||
fail_needed_executable = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (NULL != tmp) {
|
||||
@ -221,7 +269,46 @@ static int process(char *orig_line, char *basename, int argc, char **argv,
|
||||
tmp = opal_path_findv((*new_argv)[0], X_OK, environ, cwd);
|
||||
if (NULL != tmp) {
|
||||
free(tmp);
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
/* Ok, we found a good debugger. Check for some error
|
||||
conditions. */
|
||||
tmp = opal_argv_join(argv, ' ');
|
||||
|
||||
/* We do not support launching a debugger that requires the
|
||||
-np value if the user did not specify -np on the command
|
||||
line. */
|
||||
if (used_num_procs && 0 == orterun_globals.num_procs) {
|
||||
opal_show_help("help-orterun.txt", "debugger requires -np",
|
||||
true, (*new_argv)[0], argv[0], user_argv,
|
||||
(*new_argv)[0]);
|
||||
/* Fall through to free / fail, below */
|
||||
}
|
||||
|
||||
/* Some debuggers do not support launching MPMD */
|
||||
else if (single_app && NULL != strchr(tmp, ':')) {
|
||||
opal_show_help("help-orterun.txt",
|
||||
"debugger only accepts single app", true,
|
||||
(*new_argv)[0], (*new_argv)[0]);
|
||||
/* Fall through to free / fail, below */
|
||||
}
|
||||
|
||||
/* Some debuggers do not use orterun/mpirun, and therefore
|
||||
must have an executable to run (e.g., cannot use mpirun's
|
||||
app context file feature). */
|
||||
else if (fail_needed_executable) {
|
||||
opal_show_help("help-orterun.txt",
|
||||
"debugger requires executable", true,
|
||||
(*new_argv)[0], argv[0], (*new_argv)[0], argv[0],
|
||||
(*new_argv)[0]);
|
||||
/* Fall through to free / fail, below */
|
||||
}
|
||||
|
||||
/* Otherwise, we succeeded. Return happiness. */
|
||||
else {
|
||||
free(tmp);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
/* All done -- didn't find it */
|
||||
@ -234,7 +321,8 @@ static int process(char *orig_line, char *basename, int argc, char **argv,
|
||||
/**
|
||||
* Run a user-level debugger
|
||||
*/
|
||||
void orte_run_debugger(char *basename, int argc, char *argv[])
|
||||
void orte_run_debugger(char *basename, opal_cmd_line_t *cmd_line,
|
||||
int argc, char *argv[])
|
||||
{
|
||||
int i, id;
|
||||
char **new_argv = NULL;
|
||||
@ -262,7 +350,7 @@ void orte_run_debugger(char *basename, int argc, char *argv[])
|
||||
lines = opal_argv_split(value, ':');
|
||||
free(value);
|
||||
for (i = 0; NULL != lines[i]; ++i) {
|
||||
if (ORTE_SUCCESS == process(lines[i], basename, argc, argv,
|
||||
if (ORTE_SUCCESS == process(lines[i], basename, cmd_line, argc, argv,
|
||||
&new_argv)) {
|
||||
break;
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -21,19 +22,16 @@
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
BEGIN_C_DECLS
|
||||
|
||||
void orte_run_debugger(char *basename, int argc, char *argv[]);
|
||||
void orte_run_debugger(char *basename, opal_cmd_line_t *cmd_line,
|
||||
int argc, char *argv[]);
|
||||
void orte_totalview_init_before_spawn(void);
|
||||
void orte_totalview_init_after_spawn(orte_jobid_t jobid);
|
||||
void orte_totalview_finalize(void);
|
||||
|
||||
extern void *MPIR_Breakpoint(void);
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* ORTERUN_TOTALVIEW_H */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user