1
1

Cleanup debug by converting to show_help, little more work to cleanup local vs remote ops when no preload is specified

This commit was SVN r20506.
Этот коммит содержится в:
Ralph Castain 2009-02-10 19:11:24 +00:00
родитель 03e61efe1f
Коммит 6a7fa79a09
2 изменённых файлов: 79 добавлений и 24 удалений

Просмотреть файл

@ -87,3 +87,42 @@ target, or by mistyping the desired rank. Remember that MPI ranks begin
with 0, not 1. with 0, not 1.
Please correct the cmd line and try again. Please correct the cmd line and try again.
#
[too-many-hosts]
A call was made to launch a local slave process, but more than one
target host was provided. Currently, each launch of a local slave
can only be to a single host. To launch slaves on multiple hosts,
you must issue one call/host.
Num hosts specified: %d
#
[abs-path-reqd]
A call was made to launch a local slave process that requested the
binaries be pre-positioned on the remote host. However, an absolute
path to the target directory was either not specified, or was provided
in a relative path format.
Path provided: %s
The path to the target directory must be given as an absolute path. The
target directory does NOT need to exist - the path to the target will
be created, if required.
#
[exec-not-found]
A call was made to launch a local slave process, but the specified
executable could not be found:
Exec: %s
Please either specify an absolute path to the executable, or check
that the executable is in your PATH.
#
[cp-not-found]
A call was made to launch a local slave process that requested the
binaries be pre-positioned on the remote host. However, we could not
find the %s command in your path.
Please check that %s is in your PATH and try again.

Просмотреть файл

@ -170,7 +170,7 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
return rc; return rc;
} }
if (1 < opal_list_get_size(&hosts)) { if (1 < opal_list_get_size(&hosts)) {
opal_output(0, "too many hosts: %d", (int)opal_list_get_size(&hosts)); orte_show_help("help-plm-base.txt", "too-many-hosts", true, (int)opal_list_get_size(&hosts));
return ORTE_ERROR; return ORTE_ERROR;
} }
node = (orte_node_t*)opal_list_remove_first(&hosts); node = (orte_node_t*)opal_list_remove_first(&hosts);
@ -179,7 +179,6 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
OBJ_DESTRUCT(&hosts); OBJ_DESTRUCT(&hosts);
/* is this a local operation? */ /* is this a local operation? */
opal_output(0, "local: %s node: %s", orte_process_info.nodename, nodename);
if (0 == strcmp(orte_process_info.nodename, nodename)) { if (0 == strcmp(orte_process_info.nodename, nodename)) {
local_op = true; local_op = true;
} }
@ -192,8 +191,8 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
/* the target location -must- be an absolute path */ /* the target location -must- be an absolute path */
if (NULL == app->preload_files_dest_dir || if (NULL == app->preload_files_dest_dir ||
!opal_path_is_absolute(app->preload_files_dest_dir)) { !opal_path_is_absolute(app->preload_files_dest_dir)) {
opal_output(0, "target location must be given and an absolute path: %s", orte_show_help("help-plm-base.txt", "abs-path-reqd", true,
(NULL == app->preload_files_dest_dir) ? "NULL" : app->preload_files_dest_dir); (NULL == app->preload_files_dest_dir) ? "NULL" : app->preload_files_dest_dir);
return ORTE_ERROR; return ORTE_ERROR;
} }
/* if the binary is not given in absolute path form, /* if the binary is not given in absolute path form,
@ -202,7 +201,7 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
if (!opal_path_is_absolute(app->app)) { if (!opal_path_is_absolute(app->app)) {
exefile = opal_find_absolute_path(app->app); exefile = opal_find_absolute_path(app->app);
if (NULL == exefile) { if (NULL == exefile) {
opal_output(0, "could not find executable %s", app->app); orte_show_help("help-plm-base.txt", "exec-not-found", true, app->app);
return ORTE_ERROR; return ORTE_ERROR;
} }
} else { } else {
@ -220,7 +219,7 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
if (local_op) { if (local_op) {
scp = opal_find_absolute_path("cp"); scp = opal_find_absolute_path("cp");
if (NULL == scp) { if (NULL == scp) {
opal_output(0, "could not find cp"); orte_show_help("help-plm-base.txt", "cp-not-found", true, "cp", "cp");
return ORTE_ERROR; return ORTE_ERROR;
} }
/* form and execute the cp commands */ /* form and execute the cp commands */
@ -239,7 +238,7 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
/* find the scp command */ /* find the scp command */
scp = opal_find_absolute_path("scp"); scp = opal_find_absolute_path("scp");
if (NULL == scp) { if (NULL == scp) {
opal_output(0, "could not find scp"); orte_show_help("help-plm-base.txt", "cp-not-found", true, "scp", "scp");
return ORTE_ERROR; return ORTE_ERROR;
} }
/* form and execute the scp commands */ /* form and execute the scp commands */
@ -262,19 +261,34 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
/* if we are not preloading the binaries, just setup /* if we are not preloading the binaries, just setup
* the path to the bootproxy script * the path to the bootproxy script
*/ */
/* set the exec path to the agent path */ if (local_op) {
exec_path = strdup(orte_plm_globals.rsh_agent_path); /* if this is a local operation, then just set
/* Start the argv with the rsh/ssh command */ * the exec_path to be the bootproxy
argv = opal_argv_copy(orte_plm_globals.rsh_agent_argv); */
/* add the hostname */ argv = NULL;
opal_argv_append_nosize(&argv, nodename); if (NULL != app->prefix_dir) {
/* add the bootproxy cmd */ asprintf(&cmd, "%s/bin/%s", app->prefix_dir, "orte-bootproxy.sh");
if (NULL != app->prefix_dir) { opal_argv_append_nosize(&argv, cmd);
asprintf(&cmd, "%s/bin/%s", app->prefix_dir, "orte-bootproxy.sh"); free(cmd);
opal_argv_append_nosize(&argv, cmd); } else {
free(cmd); opal_argv_append_nosize(&argv, "orte-bootproxy.sh");
}
exec_path = strdup(argv[0]);
} else { } else {
opal_argv_append_nosize(&argv, "orte-bootproxy.sh"); /* for remote execution, set the exec path to the agent path */
exec_path = strdup(orte_plm_globals.rsh_agent_path);
/* Start the argv with the rsh/ssh command */
argv = opal_argv_copy(orte_plm_globals.rsh_agent_argv);
/* add the hostname */
opal_argv_append_nosize(&argv, nodename);
/* add the bootproxy cmd */
if (NULL != app->prefix_dir) {
asprintf(&cmd, "%s/bin/%s", app->prefix_dir, "orte-bootproxy.sh");
opal_argv_append_nosize(&argv, cmd);
free(cmd);
} else {
opal_argv_append_nosize(&argv, "orte-bootproxy.sh");
}
} }
} }
@ -410,11 +424,13 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
opal_argv_append_nosize(&argv, app->argv[i]); opal_argv_append_nosize(&argv, app->argv[i]);
} }
param = opal_argv_join(argv, ' '); if (0 < opal_output_get_verbosity(orte_plm_globals.output)) {
opal_output(0, "%s plm:rsh: final bootproxy cmd:\n\t%s", param = opal_argv_join(argv, ' ');
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), opal_output(0, "%s plm:rsh: final bootproxy cmd:\n\t%s",
(NULL == param) ? "NULL" : param); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
if (NULL != param) free(param); (NULL == param) ? "NULL" : param);
if (NULL != param) free(param);
}
/* fork a child to exec the rsh/ssh session */ /* fork a child to exec the rsh/ssh session */
pid = fork(); pid = fork();