Since this has come up a number of times, have the rsh launcher add MCA params from the environment by default. If it finds that the cmd line is too long, error out with a message directing the user to set a param to ignore the environmental MCA params.
This commit was SVN r25581.
Этот коммит содержится в:
родитель
7510339725
Коммит
3e7ab1212a
@ -581,7 +581,8 @@ AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \
|
||||
sys/types.h sys/uio.h net/uio.h sys/utsname.h sys/vfs.h sys/wait.h syslog.h \
|
||||
time.h termios.h ulimit.h unistd.h util.h utmp.h malloc.h \
|
||||
ifaddrs.h sys/sysctl.h crt_externs.h regex.h signal.h \
|
||||
ioLib.h sockLib.h hostLib.h shlwapi.h sys/synch.h limits.h db.h ndbm.h])
|
||||
ioLib.h sockLib.h hostLib.h shlwapi.h sys/synch.h limits.h db.h ndbm.h \
|
||||
sys/syslimits.h])
|
||||
|
||||
# Needed to work around Darwin requiring sys/socket.h for
|
||||
# net/if.h
|
||||
|
@ -75,3 +75,13 @@ The prefix we were given are:
|
||||
|
||||
opal_prefix: %s
|
||||
prefix_dir: %s
|
||||
#
|
||||
[cmd-line-too-long]
|
||||
The cmd line to launch remote daemons is too long:
|
||||
|
||||
Length: %d
|
||||
Max length: %d
|
||||
|
||||
Consider setting -mca plm_rsh_pass_environ_mca_params 0 to
|
||||
avoid including any environmentally set MCA parameters on the
|
||||
command line.
|
||||
|
@ -61,6 +61,7 @@ struct orte_plm_rsh_component_t {
|
||||
opal_condition_t cond;
|
||||
char *agent;
|
||||
bool assume_same_shell;
|
||||
bool pass_environ_mca_params;
|
||||
};
|
||||
typedef struct orte_plm_rsh_component_t orte_plm_rsh_component_t;
|
||||
|
||||
|
@ -187,6 +187,11 @@ static int rsh_component_open(void)
|
||||
mca_base_param_lookup_int(tmp, &value);
|
||||
mca_plm_rsh_component.assume_same_shell = OPAL_INT_TO_BOOL(value);
|
||||
|
||||
mca_base_param_reg_int(c, "pass_environ_mca_params",
|
||||
"If set to 0, do not include mca params from the environment on the orted cmd line",
|
||||
false, false, 1, &tmp);
|
||||
mca_plm_rsh_component.pass_environ_mca_params = OPAL_INT_TO_BOOL(tmp);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -62,6 +62,12 @@
|
||||
#ifdef HAVE_PWD_H
|
||||
#include <pwd.h>
|
||||
#endif
|
||||
#if HAVE_LIMITS_H
|
||||
#include <limits.h>
|
||||
#endif
|
||||
#if HAVE_SYS_SYSLIMITS_H
|
||||
#include <sys/syslimits.h>
|
||||
#endif
|
||||
|
||||
#include "opal/mca/installdirs/installdirs.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
@ -287,7 +293,7 @@ static int setup_launch(int *argcptr, char ***argvptr,
|
||||
{
|
||||
int argc;
|
||||
char **argv;
|
||||
char *param;
|
||||
char *param, *value;
|
||||
orte_plm_rsh_shell_t remote_shell, local_shell;
|
||||
char *lib_base, *bin_base;
|
||||
int orted_argc;
|
||||
@ -295,34 +301,35 @@ static int setup_launch(int *argcptr, char ***argvptr,
|
||||
char *orted_cmd, *orted_prefix, *final_cmd;
|
||||
int orted_index;
|
||||
int rc;
|
||||
|
||||
int cnt, i, j;
|
||||
bool found;
|
||||
|
||||
/* Figure out the basenames for the libdir and bindir. This
|
||||
requires some explanation:
|
||||
requires some explanation:
|
||||
|
||||
- Use opal_install_dirs.libdir and opal_install_dirs.bindir.
|
||||
- Use opal_install_dirs.libdir and opal_install_dirs.bindir.
|
||||
|
||||
- After a discussion on the devel-core mailing list, the
|
||||
developers decided that we should use the local directory
|
||||
basenames as the basis for the prefix on the remote note.
|
||||
This does not handle a few notable cases (e.g., if the
|
||||
libdir/bindir is not simply a subdir under the prefix, if the
|
||||
libdir/bindir basename is not the same on the remote node as
|
||||
it is here on the local node, etc.), but we decided that
|
||||
--prefix was meant to handle "the common case". If you need
|
||||
something more complex than this, a) edit your shell startup
|
||||
files to set PATH/LD_LIBRARY_PATH properly on the remove
|
||||
node, or b) use some new/to-be-defined options that
|
||||
explicitly allow setting the bindir/libdir on the remote
|
||||
node. We decided to implement these options (e.g.,
|
||||
--remote-bindir and --remote-libdir) to orterun when it
|
||||
actually becomes a problem for someone (vs. a hypothetical
|
||||
situation).
|
||||
- After a discussion on the devel-core mailing list, the
|
||||
developers decided that we should use the local directory
|
||||
basenames as the basis for the prefix on the remote note.
|
||||
This does not handle a few notable cases (e.g., if the
|
||||
libdir/bindir is not simply a subdir under the prefix, if the
|
||||
libdir/bindir basename is not the same on the remote node as
|
||||
it is here on the local node, etc.), but we decided that
|
||||
--prefix was meant to handle "the common case". If you need
|
||||
something more complex than this, a) edit your shell startup
|
||||
files to set PATH/LD_LIBRARY_PATH properly on the remove
|
||||
node, or b) use some new/to-be-defined options that
|
||||
explicitly allow setting the bindir/libdir on the remote
|
||||
node. We decided to implement these options (e.g.,
|
||||
--remote-bindir and --remote-libdir) to orterun when it
|
||||
actually becomes a problem for someone (vs. a hypothetical
|
||||
situation).
|
||||
|
||||
Hence, for now, we simply take the basename of this install's
|
||||
libdir and bindir and use it to append this install's prefix
|
||||
and use that on the remote node.
|
||||
*/
|
||||
Hence, for now, we simply take the basename of this install's
|
||||
libdir and bindir and use it to append this install's prefix
|
||||
and use that on the remote node.
|
||||
*/
|
||||
|
||||
lib_base = opal_basename(opal_install_dirs.libdir);
|
||||
bin_base = opal_basename(opal_install_dirs.bindir);
|
||||
@ -429,13 +436,13 @@ static int setup_launch(int *argcptr, char ***argvptr,
|
||||
} else if (ORTE_PLM_RSH_SHELL_TCSH == remote_shell ||
|
||||
ORTE_PLM_RSH_SHELL_CSH == remote_shell) {
|
||||
/* [t]csh is a bit more challenging -- we
|
||||
have to check whether LD_LIBRARY_PATH
|
||||
is already set before we try to set it.
|
||||
Must be very careful about obeying
|
||||
[t]csh's order of evaluation and not
|
||||
using a variable before it is defined.
|
||||
See this thread for more details:
|
||||
http://www.open-mpi.org/community/lists/users/2006/01/0517.php. */
|
||||
have to check whether LD_LIBRARY_PATH
|
||||
is already set before we try to set it.
|
||||
Must be very careful about obeying
|
||||
[t]csh's order of evaluation and not
|
||||
using a variable before it is defined.
|
||||
See this thread for more details:
|
||||
http://www.open-mpi.org/community/lists/users/2006/01/0517.php. */
|
||||
/* if there is nothing preceding orted, then we can just
|
||||
* assemble the cmd with the orted_cmd at the end. Otherwise,
|
||||
* we have to insert the orted_prefix in the right place
|
||||
@ -489,9 +496,9 @@ static int setup_launch(int *argcptr, char ***argvptr,
|
||||
/* Daemonize when not using qrsh. Or, if using qrsh, only
|
||||
* daemonize if told to by user with daemonize_qrsh flag. */
|
||||
((!mca_plm_rsh_component.using_qrsh) ||
|
||||
(mca_plm_rsh_component.using_qrsh && mca_plm_rsh_component.daemonize_qrsh)) &&
|
||||
(mca_plm_rsh_component.using_qrsh && mca_plm_rsh_component.daemonize_qrsh)) &&
|
||||
((!mca_plm_rsh_component.using_llspawn) ||
|
||||
(mca_plm_rsh_component.using_llspawn && mca_plm_rsh_component.daemonize_llspawn))) {
|
||||
(mca_plm_rsh_component.using_llspawn && mca_plm_rsh_component.daemonize_llspawn))) {
|
||||
opal_argv_append(&argc, &argv, "--daemonize");
|
||||
}
|
||||
|
||||
@ -513,25 +520,66 @@ static int setup_launch(int *argcptr, char ***argvptr,
|
||||
* by enclosing them in quotes. Check for any multi-word
|
||||
* mca params passed to mpirun and include them
|
||||
*/
|
||||
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) {
|
||||
int cnt, i;
|
||||
cnt = opal_argv_count(orted_cmd_line);
|
||||
for (i=0; i < cnt; i+=3) {
|
||||
/* check if the specified option is more than one word - all
|
||||
* others have already been passed
|
||||
*/
|
||||
if (NULL != strchr(orted_cmd_line[i+2], ' ')) {
|
||||
/* must add quotes around it */
|
||||
asprintf(¶m, "\"%s\"", orted_cmd_line[i+2]);
|
||||
/* now pass it along */
|
||||
opal_argv_append(&argc, &argv, orted_cmd_line[i]);
|
||||
opal_argv_append(&argc, &argv, orted_cmd_line[i+1]);
|
||||
opal_argv_append(&argc, &argv, param);
|
||||
cnt = opal_argv_count(orted_cmd_line);
|
||||
for (i=0; i < cnt; i+=3) {
|
||||
/* check if the specified option is more than one word - all
|
||||
* others have already been passed
|
||||
*/
|
||||
if (NULL != strchr(orted_cmd_line[i+2], ' ')) {
|
||||
/* must add quotes around it */
|
||||
asprintf(¶m, "\"%s\"", orted_cmd_line[i+2]);
|
||||
/* now pass it along */
|
||||
opal_argv_append(&argc, &argv, orted_cmd_line[i]);
|
||||
opal_argv_append(&argc, &argv, orted_cmd_line[i+1]);
|
||||
opal_argv_append(&argc, &argv, param);
|
||||
free(param);
|
||||
}
|
||||
}
|
||||
|
||||
/* unless told otherwise... */
|
||||
if (mca_plm_rsh_component.pass_environ_mca_params) {
|
||||
/* now check our local environment for MCA params - add them
|
||||
* only if they aren't already present
|
||||
*/
|
||||
for (i = 0; NULL != environ[i]; ++i) {
|
||||
if (0 == strncmp("OMPI_", environ[i], 5)) {
|
||||
/* check for duplicate in app->env - this
|
||||
* would have been placed there by the
|
||||
* cmd line processor. By convention, we
|
||||
* always let the cmd line override the
|
||||
* environment
|
||||
*/
|
||||
param = strdup(&environ[i][9]);
|
||||
value = strchr(param, '=');
|
||||
*value = '\0';
|
||||
value++;
|
||||
/* see if this param exists on the cmd line */
|
||||
for (j=0; NULL != argv[j]; j++) {
|
||||
if (0 == strcmp(param, argv[j])) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
/* add it */
|
||||
opal_argv_append(&argc, &argv, "-mca");
|
||||
opal_argv_append(&argc, &argv, param);
|
||||
opal_argv_append(&argc, &argv, value);
|
||||
}
|
||||
free(param);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
value = opal_argv_join(argv, ' ');
|
||||
if (ARG_MAX < strlen(value)) {
|
||||
orte_show_help("help-plm-rsh.txt", "cmd-line-too-long",
|
||||
true, strlen(value), ARG_MAX);
|
||||
free(value);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
free(value);
|
||||
|
||||
if (ORTE_PLM_RSH_SHELL_SH == remote_shell ||
|
||||
ORTE_PLM_RSH_SHELL_KSH == remote_shell) {
|
||||
opal_argv_append(&argc, &argv, ")");
|
||||
@ -938,7 +986,7 @@ static int rsh_launch(orte_job_t *jdata)
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
|
||||
/* if we are tree launching, find our children and create the launch cmd */
|
||||
if (mca_plm_rsh_component.tree_spawn) {
|
||||
orte_daemon_cmd_flag_t command = ORTE_DAEMON_TREE_SPAWN;
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user