1
1

Good weekend brainless activity -- implement the orterun command line

debugger scheme described in
http://www.open-mpi.org/community/lists/users/2005/10/0214.php.  This
makes our user-level debugger scheme much more vendor-independent
(although the "-tv" option will still work for backwards compatibility
-- it'll just be a synonum of "--debug").

This commit was SVN r8206.
Этот коммит содержится в:
Jeff Squyres 2005-11-20 16:06:53 +00:00
родитель 20cea60b82
Коммит 8d96c21311
5 изменённых файлов: 210 добавлений и 39 удалений

Просмотреть файл

@ -25,12 +25,17 @@
int orte_register_params(bool infrastructure)
{
int id;
mca_base_param_reg_int_name("orte", "infrastructure",
"Whether we are ORTE infrastructure or an ORTE application",
true, true, (int)infrastructure, NULL);
id = mca_base_param_reg_int_name("orte", "infrastructure",
"Whether we are ORTE infrastructure or an ORTE application",
true, true, (int)infrastructure, NULL);
/* User-level debugger info string */
mca_base_param_reg_string_name("orte", "base_user_debugger",
"Sequence of user-level debuggers to search for in orterun",
false, false, "totalview @mpirun@ -a @mpirun_args@ : fx2 @mpirun@ -a @mpirun_args@", NULL);
/* All done */
return ORTE_SUCCESS;
}

Просмотреть файл

@ -91,9 +91,30 @@ that there are no runaway processes still executing.
A prefix was supplied to %s that only contained slashes.
This is a fatal error; %s will now abort. No processes were launched.
[totalview-exec-failed]
%s was unable to launch the totalview debugger. Things to check:
#
[debugger-mca-param-not-found]
Internal error -- the orte_base_debugger MCA parameter was not able to
be found. Please contact the Open RTE developers; this should not
happen.
#
[debugger-orte_base_user_debugger-empty]
The MCA parameter "orte_base_user_debugger" was empty, indicating that
no user-level debuggers have been defined. Please set this MCA
parameter to a value and try again.
#
[debugger-not-found]
A suitable debugger could not be found in your PATH. Check the values
specified in the orte_base_user_debugger MCA parameter for the list of
debuggers that was searched.
#
[debugger-exec-failed]
%s was unable to launch the specified debugger. This is what was
launched:
- Ensure that TotalView is installed properly
- Ensure that the "totalview" executable is in your path
- Ensure that valid licenses are available to run the TotalView debugger
%s
Things to check:
- Ensure that the debugger is installed properly
- Ensure that the "%s" executable is in your path
- Ensure that any required licenses are available to run the debugger

Просмотреть файл

@ -94,6 +94,7 @@ struct globals_t {
bool no_wait_for_job_completion;
bool by_node;
bool by_slot;
bool debugger;
size_t num_procs;
int exit_status;
char *hostfile;
@ -191,8 +192,20 @@ opal_cmd_line_init_t cmd_line_init[] = {
{ NULL, NULL, NULL, 'H', "host", "host", 1,
NULL, OPAL_CMD_LINE_TYPE_STRING,
"List of hosts to invoke processes on" },
/* User-level debugger arguments */
{ NULL, NULL, NULL, '\0', "tv", "tv", 0,
&orterun_globals.debugger, OPAL_CMD_LINE_TYPE_BOOL,
"Deprecated backwards compatibility flag; synonym for \"--debug\"" },
{ NULL, NULL, NULL, '\0', "debug", "debug", 0,
&orterun_globals.debugger, OPAL_CMD_LINE_TYPE_BOOL,
"Invoke the user-level debugger indicated by the orte_base_user_debugger MCA parameter" },
{ "orte", "base", "user_debugger", '\0', "debugger", "debugger", 1,
NULL, OPAL_CMD_LINE_TYPE_STRING,
"Sequence of debuggers to search for when \"--debug\" is used" },
/* OpenRTE arguments */
{ "orte", "debug", NULL, 'd', NULL, "debug", 0,
{ "orte", "debug", NULL, 'd', NULL, "debug-devel", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Enable debugging of OpenRTE" },
{ "orte", "debug", "daemons", '\0', NULL, "debug-daemons", 0,
@ -237,6 +250,11 @@ int orterun(int argc, char *argv[])
int rc, i, num_apps, array_size, j;
int id, iparam;
/* Setup MCA params */
mca_base_param_init();
orte_register_params(false);
/* Setup the abort message (for use in the signal handler) */
orterun_basename = opal_basename(argv[0]);
@ -652,6 +670,7 @@ static int init_globals(void)
false,
false,
false,
false,
0,
0,
NULL,
@ -689,7 +708,6 @@ static int parse_globals(int argc, char* argv[])
init_globals();
opal_cmd_line_create(&cmd_line, cmd_line_init);
mca_base_cmd_line_setup(&cmd_line);
orte_totalview_cmd_line_setup(&cmd_line);
if (OMPI_SUCCESS != (ret = opal_cmd_line_parse(&cmd_line, true,
argc, argv)) ) {
return ret;
@ -708,9 +726,11 @@ static int parse_globals(int argc, char* argv[])
exit(0);
}
/* Do we want totalview? */
/* Do we want a user-level debugger? */
orte_totalview_cmd_line_process(&cmd_line, orterun_basename, argc, argv);
if (orterun_globals.debugger) {
orte_run_debugger(orterun_basename, argc, argv);
}
/* Allocate and map by node or by slot? Shortcut for setting an
MCA param. */

Просмотреть файл

@ -43,6 +43,7 @@
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <ctype.h>
/*
* The environment
@ -53,6 +54,7 @@ extern char **environ;
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "opal/util/show_help.h"
#include "opal/util/path.h"
#include "opal/class/opal_list.h"
#include "mca/base/base.h"
#include "mca/errmgr/errmgr.h"
@ -117,39 +119,164 @@ static void dump(void)
}
}
/**
* Add "-tv" to the command line parsing options
/*
* Process one line from the orte_base_user_debugger MCA param and
* look for that debugger in the path. If we find it, fill in
* new_argv.
*/
void orte_totalview_cmd_line_setup(opal_cmd_line_t *cmd)
static int process(char *orig_line, char *basename, int argc, char **argv,
char ***new_argv)
{
opal_cmd_line_make_opt3(cmd, '\0', "tv", "tv", 0,
"Convenience option to re-exec under the TotalView debugger");
int i;
char *line, *full_line = strdup(orig_line);
char *user_argv, *tmp, **tmp_argv;
char cwd[PATH_MAX];
line = full_line;
if (NULL == line) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* Trim off whitespace at the beginning and ending of line */
for (i = 0; '\0' != line[i] && isspace(line[i]); ++line) {
continue;
}
for (i = strlen(line) - 2; i > 0 && isspace(line[i]); ++i) {
line[i] = '\0';
}
if (strlen(line) <= 0) {
return ORTE_ERROR;
}
/* Remove --debug and --debugger from the user command line
params */
if (1 == argc) {
user_argv = strdup("");
} else {
tmp_argv = opal_argv_copy(argv);
for (i = 0; NULL != tmp_argv[i]; ++i) {
if (0 == strcmp(tmp_argv[i], "-debug") ||
0 == strcmp(tmp_argv[i], "--debug")) {
free(tmp_argv[i]);
tmp_argv[i] = strdup("");
} else if (0 == strcmp(tmp_argv[i], "--debugger") ||
0 == strcmp(tmp_argv[i], "-debugger")) {
free(tmp_argv[i]);
tmp_argv[i] = strdup("");
if (NULL != tmp_argv[i + 1]) {
++i;
free(tmp_argv[i]);
tmp_argv[i] = strdup("");
}
}
}
user_argv = opal_argv_join(tmp_argv + 1, ' ');
opal_argv_free(tmp_argv);
}
/* Replace @@ tokens */
for (i = 0; i < strlen(line); ++i) {
tmp = NULL;
if (0 == strncmp(line + i, "@mpirun@", 8)) {
line[i] = '\0';
asprintf(&tmp, "%s%s%s", line, argv[0], line + i + 8);
} else if (0 == strncmp(line + i, "@orterun@", 9)) {
line[i] = '\0';
asprintf(&tmp, "%s%s%s", line, argv[0], line + i + 9);
} else if (0 == strncmp(line + i, "@mpirun_args@", 13)) {
line[i] = '\0';
asprintf(&tmp, "%s%s%s", line, user_argv, line + i + 13);
} else if (0 == strncmp(line + i, "@orterun_args@", 14)) {
line[i] = '\0';
asprintf(&tmp, "%s%s%s", line, user_argv, line + i + 14);
}
if (NULL != tmp) {
free(full_line);
full_line = line = tmp;
--i;
}
}
/* Split up into argv */
*new_argv = opal_argv_split(line, ' ');
free(line);
/* Can we find argv[0] in the path? */
getcwd(cwd, PATH_MAX);
tmp = opal_path_findv((*new_argv)[0], 0, environ, cwd);
if (NULL != tmp) {
free(tmp);
return ORTE_SUCCESS;
}
/* All done -- didn't find it */
opal_argv_free(*new_argv);
*new_argv = NULL;
return ORTE_ERR_NOT_FOUND;
}
/**
* If -tv was given, re-exec under totalview
* Run a user-level debugger
*/
void orte_totalview_cmd_line_process(opal_cmd_line_t *cmd, char *basename,
int argc, char *argv[])
void orte_run_debugger(char *basename, int argc, char *argv[])
{
if (opal_cmd_line_is_taken(cmd, "tv")) {
int i;
char **new_argv = NULL;
printf("found -tv\n");
int i, id;
char **new_argv = NULL;
char *value, **lines;
opal_argv_append_nosize(&new_argv, "totalview");
opal_argv_append_nosize(&new_argv, argv[0]);
opal_argv_append_nosize(&new_argv, "-a");
for (i = 1; i < argc; ++i) {
opal_argv_append_nosize(&new_argv, argv[i]);
}
execvp(new_argv[0], new_argv);
opal_show_help("help-orterun.txt", "totalview-exec-failed",
true, basename);
/* Get the orte_base_debug MCA parameter and search for a debugger
that can run */
id = mca_base_param_find("orte", NULL, "base_user_debugger");
if (id < 0) {
opal_show_help("help-orterun.txt", "debugger-mca-param-not-found",
true);
exit(1);
}
value = NULL;
mca_base_param_lookup_string(id, &value);
if (NULL == value) {
opal_show_help("help-orterun.txt", "debugger-orte_base_user_debugger-empty",
true);
exit(1);
}
/* Look through all the values in the MCA param */
lines = opal_argv_split(value, ':');
free(value);
for (i = 0; NULL != lines[i]; ++i) {
if (ORTE_SUCCESS == process(lines[i], basename, argc, argv,
&new_argv)) {
break;
}
}
/* If we didn't find one, abort */
if (NULL == lines[i]) {
opal_show_help("help-orterun.txt", "debugger-not-found", true);
free(str);
exit(1);
}
opal_argv_free(lines);
/* We found one */
execvp(new_argv[0], new_argv);
value = opal_argv_join(new_argv, ' ');
opal_show_help("help-orterun.txt", "debugger-exec-failed",
true, basename, value, new_argv[0]);
free(value);
opal_argv_free(new_argv);
exit(1);
}

Просмотреть файл

@ -21,9 +21,7 @@
#include "orte_config.h"
void orte_totalview_cmd_line_setup(opal_cmd_line_t *cmd);
void orte_totalview_cmd_line_process(opal_cmd_line_t *cmd, char *basename,
int argc, char *argv[]);
void orte_run_debugger(char *basename, int argc, char *argv[]);
void orte_totalview_init_before_spawn(void);
void orte_totalview_init_after_spawn(orte_jobid_t jobid);
void orte_totalview_finalize(void);