Good weekend brainless activity -- implement the orterun command line
debugger scheme described in http://www.open-mpi.org/community/lists/users/2005/10/0214.php. This makes our user-level debugger scheme much more vendor-independent (although the "-tv" option will still work for backwards compatibility -- it'll just be a synonum of "--debug"). This commit was SVN r8206.
Этот коммит содержится в:
родитель
20cea60b82
Коммит
8d96c21311
@ -25,12 +25,17 @@
|
||||
|
||||
int orte_register_params(bool infrastructure)
|
||||
{
|
||||
int id;
|
||||
mca_base_param_reg_int_name("orte", "infrastructure",
|
||||
"Whether we are ORTE infrastructure or an ORTE application",
|
||||
true, true, (int)infrastructure, NULL);
|
||||
|
||||
id = mca_base_param_reg_int_name("orte", "infrastructure",
|
||||
"Whether we are ORTE infrastructure or an ORTE application",
|
||||
true, true, (int)infrastructure, NULL);
|
||||
/* User-level debugger info string */
|
||||
|
||||
mca_base_param_reg_string_name("orte", "base_user_debugger",
|
||||
"Sequence of user-level debuggers to search for in orterun",
|
||||
false, false, "totalview @mpirun@ -a @mpirun_args@ : fx2 @mpirun@ -a @mpirun_args@", NULL);
|
||||
|
||||
/* All done */
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -91,9 +91,30 @@ that there are no runaway processes still executing.
|
||||
A prefix was supplied to %s that only contained slashes.
|
||||
|
||||
This is a fatal error; %s will now abort. No processes were launched.
|
||||
[totalview-exec-failed]
|
||||
%s was unable to launch the totalview debugger. Things to check:
|
||||
#
|
||||
[debugger-mca-param-not-found]
|
||||
Internal error -- the orte_base_debugger MCA parameter was not able to
|
||||
be found. Please contact the Open RTE developers; this should not
|
||||
happen.
|
||||
#
|
||||
[debugger-orte_base_user_debugger-empty]
|
||||
The MCA parameter "orte_base_user_debugger" was empty, indicating that
|
||||
no user-level debuggers have been defined. Please set this MCA
|
||||
parameter to a value and try again.
|
||||
#
|
||||
[debugger-not-found]
|
||||
A suitable debugger could not be found in your PATH. Check the values
|
||||
specified in the orte_base_user_debugger MCA parameter for the list of
|
||||
debuggers that was searched.
|
||||
#
|
||||
[debugger-exec-failed]
|
||||
%s was unable to launch the specified debugger. This is what was
|
||||
launched:
|
||||
|
||||
- Ensure that TotalView is installed properly
|
||||
- Ensure that the "totalview" executable is in your path
|
||||
- Ensure that valid licenses are available to run the TotalView debugger
|
||||
%s
|
||||
|
||||
Things to check:
|
||||
|
||||
- Ensure that the debugger is installed properly
|
||||
- Ensure that the "%s" executable is in your path
|
||||
- Ensure that any required licenses are available to run the debugger
|
||||
|
@ -94,6 +94,7 @@ struct globals_t {
|
||||
bool no_wait_for_job_completion;
|
||||
bool by_node;
|
||||
bool by_slot;
|
||||
bool debugger;
|
||||
size_t num_procs;
|
||||
int exit_status;
|
||||
char *hostfile;
|
||||
@ -191,8 +192,20 @@ opal_cmd_line_init_t cmd_line_init[] = {
|
||||
{ NULL, NULL, NULL, 'H', "host", "host", 1,
|
||||
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
||||
"List of hosts to invoke processes on" },
|
||||
|
||||
/* User-level debugger arguments */
|
||||
{ NULL, NULL, NULL, '\0', "tv", "tv", 0,
|
||||
&orterun_globals.debugger, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Deprecated backwards compatibility flag; synonym for \"--debug\"" },
|
||||
{ NULL, NULL, NULL, '\0', "debug", "debug", 0,
|
||||
&orterun_globals.debugger, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Invoke the user-level debugger indicated by the orte_base_user_debugger MCA parameter" },
|
||||
{ "orte", "base", "user_debugger", '\0', "debugger", "debugger", 1,
|
||||
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
||||
"Sequence of debuggers to search for when \"--debug\" is used" },
|
||||
|
||||
/* OpenRTE arguments */
|
||||
{ "orte", "debug", NULL, 'd', NULL, "debug", 0,
|
||||
{ "orte", "debug", NULL, 'd', NULL, "debug-devel", 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Enable debugging of OpenRTE" },
|
||||
{ "orte", "debug", "daemons", '\0', NULL, "debug-daemons", 0,
|
||||
@ -237,6 +250,11 @@ int orterun(int argc, char *argv[])
|
||||
int rc, i, num_apps, array_size, j;
|
||||
int id, iparam;
|
||||
|
||||
/* Setup MCA params */
|
||||
|
||||
mca_base_param_init();
|
||||
orte_register_params(false);
|
||||
|
||||
/* Setup the abort message (for use in the signal handler) */
|
||||
|
||||
orterun_basename = opal_basename(argv[0]);
|
||||
@ -652,6 +670,7 @@ static int init_globals(void)
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
@ -689,7 +708,6 @@ static int parse_globals(int argc, char* argv[])
|
||||
init_globals();
|
||||
opal_cmd_line_create(&cmd_line, cmd_line_init);
|
||||
mca_base_cmd_line_setup(&cmd_line);
|
||||
orte_totalview_cmd_line_setup(&cmd_line);
|
||||
if (OMPI_SUCCESS != (ret = opal_cmd_line_parse(&cmd_line, true,
|
||||
argc, argv)) ) {
|
||||
return ret;
|
||||
@ -708,9 +726,11 @@ static int parse_globals(int argc, char* argv[])
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/* Do we want totalview? */
|
||||
/* Do we want a user-level debugger? */
|
||||
|
||||
orte_totalview_cmd_line_process(&cmd_line, orterun_basename, argc, argv);
|
||||
if (orterun_globals.debugger) {
|
||||
orte_run_debugger(orterun_basename, argc, argv);
|
||||
}
|
||||
|
||||
/* Allocate and map by node or by slot? Shortcut for setting an
|
||||
MCA param. */
|
||||
|
@ -43,6 +43,7 @@
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
|
||||
/*
|
||||
* The environment
|
||||
@ -53,6 +54,7 @@ extern char **environ;
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/path.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "mca/base/base.h"
|
||||
#include "mca/errmgr/errmgr.h"
|
||||
@ -117,39 +119,164 @@ static void dump(void)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add "-tv" to the command line parsing options
|
||||
/*
|
||||
* Process one line from the orte_base_user_debugger MCA param and
|
||||
* look for that debugger in the path. If we find it, fill in
|
||||
* new_argv.
|
||||
*/
|
||||
void orte_totalview_cmd_line_setup(opal_cmd_line_t *cmd)
|
||||
static int process(char *orig_line, char *basename, int argc, char **argv,
|
||||
char ***new_argv)
|
||||
{
|
||||
opal_cmd_line_make_opt3(cmd, '\0', "tv", "tv", 0,
|
||||
"Convenience option to re-exec under the TotalView debugger");
|
||||
int i;
|
||||
char *line, *full_line = strdup(orig_line);
|
||||
char *user_argv, *tmp, **tmp_argv;
|
||||
char cwd[PATH_MAX];
|
||||
|
||||
line = full_line;
|
||||
if (NULL == line) {
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Trim off whitespace at the beginning and ending of line */
|
||||
|
||||
for (i = 0; '\0' != line[i] && isspace(line[i]); ++line) {
|
||||
continue;
|
||||
}
|
||||
for (i = strlen(line) - 2; i > 0 && isspace(line[i]); ++i) {
|
||||
line[i] = '\0';
|
||||
}
|
||||
if (strlen(line) <= 0) {
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/* Remove --debug and --debugger from the user command line
|
||||
params */
|
||||
|
||||
if (1 == argc) {
|
||||
user_argv = strdup("");
|
||||
} else {
|
||||
tmp_argv = opal_argv_copy(argv);
|
||||
for (i = 0; NULL != tmp_argv[i]; ++i) {
|
||||
if (0 == strcmp(tmp_argv[i], "-debug") ||
|
||||
0 == strcmp(tmp_argv[i], "--debug")) {
|
||||
free(tmp_argv[i]);
|
||||
tmp_argv[i] = strdup("");
|
||||
} else if (0 == strcmp(tmp_argv[i], "--debugger") ||
|
||||
0 == strcmp(tmp_argv[i], "-debugger")) {
|
||||
free(tmp_argv[i]);
|
||||
tmp_argv[i] = strdup("");
|
||||
if (NULL != tmp_argv[i + 1]) {
|
||||
++i;
|
||||
free(tmp_argv[i]);
|
||||
tmp_argv[i] = strdup("");
|
||||
}
|
||||
}
|
||||
}
|
||||
user_argv = opal_argv_join(tmp_argv + 1, ' ');
|
||||
opal_argv_free(tmp_argv);
|
||||
}
|
||||
|
||||
/* Replace @@ tokens */
|
||||
|
||||
for (i = 0; i < strlen(line); ++i) {
|
||||
tmp = NULL;
|
||||
if (0 == strncmp(line + i, "@mpirun@", 8)) {
|
||||
line[i] = '\0';
|
||||
asprintf(&tmp, "%s%s%s", line, argv[0], line + i + 8);
|
||||
} else if (0 == strncmp(line + i, "@orterun@", 9)) {
|
||||
line[i] = '\0';
|
||||
asprintf(&tmp, "%s%s%s", line, argv[0], line + i + 9);
|
||||
} else if (0 == strncmp(line + i, "@mpirun_args@", 13)) {
|
||||
line[i] = '\0';
|
||||
asprintf(&tmp, "%s%s%s", line, user_argv, line + i + 13);
|
||||
} else if (0 == strncmp(line + i, "@orterun_args@", 14)) {
|
||||
line[i] = '\0';
|
||||
asprintf(&tmp, "%s%s%s", line, user_argv, line + i + 14);
|
||||
}
|
||||
|
||||
if (NULL != tmp) {
|
||||
free(full_line);
|
||||
full_line = line = tmp;
|
||||
--i;
|
||||
}
|
||||
}
|
||||
|
||||
/* Split up into argv */
|
||||
|
||||
*new_argv = opal_argv_split(line, ' ');
|
||||
free(line);
|
||||
|
||||
/* Can we find argv[0] in the path? */
|
||||
|
||||
getcwd(cwd, PATH_MAX);
|
||||
tmp = opal_path_findv((*new_argv)[0], 0, environ, cwd);
|
||||
if (NULL != tmp) {
|
||||
free(tmp);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* All done -- didn't find it */
|
||||
|
||||
opal_argv_free(*new_argv);
|
||||
*new_argv = NULL;
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
/**
|
||||
* If -tv was given, re-exec under totalview
|
||||
* Run a user-level debugger
|
||||
*/
|
||||
void orte_totalview_cmd_line_process(opal_cmd_line_t *cmd, char *basename,
|
||||
int argc, char *argv[])
|
||||
void orte_run_debugger(char *basename, int argc, char *argv[])
|
||||
{
|
||||
if (opal_cmd_line_is_taken(cmd, "tv")) {
|
||||
int i;
|
||||
char **new_argv = NULL;
|
||||
printf("found -tv\n");
|
||||
int i, id;
|
||||
char **new_argv = NULL;
|
||||
char *value, **lines;
|
||||
|
||||
opal_argv_append_nosize(&new_argv, "totalview");
|
||||
opal_argv_append_nosize(&new_argv, argv[0]);
|
||||
opal_argv_append_nosize(&new_argv, "-a");
|
||||
for (i = 1; i < argc; ++i) {
|
||||
opal_argv_append_nosize(&new_argv, argv[i]);
|
||||
}
|
||||
|
||||
execvp(new_argv[0], new_argv);
|
||||
opal_show_help("help-orterun.txt", "totalview-exec-failed",
|
||||
true, basename);
|
||||
/* Get the orte_base_debug MCA parameter and search for a debugger
|
||||
that can run */
|
||||
|
||||
id = mca_base_param_find("orte", NULL, "base_user_debugger");
|
||||
if (id < 0) {
|
||||
opal_show_help("help-orterun.txt", "debugger-mca-param-not-found",
|
||||
true);
|
||||
exit(1);
|
||||
}
|
||||
value = NULL;
|
||||
mca_base_param_lookup_string(id, &value);
|
||||
if (NULL == value) {
|
||||
opal_show_help("help-orterun.txt", "debugger-orte_base_user_debugger-empty",
|
||||
true);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* Look through all the values in the MCA param */
|
||||
|
||||
lines = opal_argv_split(value, ':');
|
||||
free(value);
|
||||
for (i = 0; NULL != lines[i]; ++i) {
|
||||
if (ORTE_SUCCESS == process(lines[i], basename, argc, argv,
|
||||
&new_argv)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* If we didn't find one, abort */
|
||||
|
||||
if (NULL == lines[i]) {
|
||||
opal_show_help("help-orterun.txt", "debugger-not-found", true);
|
||||
free(str);
|
||||
exit(1);
|
||||
}
|
||||
opal_argv_free(lines);
|
||||
|
||||
/* We found one */
|
||||
|
||||
execvp(new_argv[0], new_argv);
|
||||
value = opal_argv_join(new_argv, ' ');
|
||||
opal_show_help("help-orterun.txt", "debugger-exec-failed",
|
||||
true, basename, value, new_argv[0]);
|
||||
free(value);
|
||||
opal_argv_free(new_argv);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
@ -21,9 +21,7 @@
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
void orte_totalview_cmd_line_setup(opal_cmd_line_t *cmd);
|
||||
void orte_totalview_cmd_line_process(opal_cmd_line_t *cmd, char *basename,
|
||||
int argc, char *argv[]);
|
||||
void orte_run_debugger(char *basename, int argc, char *argv[]);
|
||||
void orte_totalview_init_before_spawn(void);
|
||||
void orte_totalview_init_after_spawn(orte_jobid_t jobid);
|
||||
void orte_totalview_finalize(void);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user