1
1

Since this has come up a number of times, have the rsh launcher add MCA params from the environment by default. If it finds that the cmd line is too long, error out with a message directing the user to set a param to ignore the environmental MCA params.

This commit was SVN r25581.
Этот коммит содержится в:
Ralph Castain 2011-12-07 01:24:36 +00:00
родитель 7510339725
Коммит 3e7ab1212a
5 изменённых файлов: 114 добавлений и 49 удалений

Просмотреть файл

@ -581,7 +581,8 @@ AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \
sys/types.h sys/uio.h net/uio.h sys/utsname.h sys/vfs.h sys/wait.h syslog.h \ sys/types.h sys/uio.h net/uio.h sys/utsname.h sys/vfs.h sys/wait.h syslog.h \
time.h termios.h ulimit.h unistd.h util.h utmp.h malloc.h \ time.h termios.h ulimit.h unistd.h util.h utmp.h malloc.h \
ifaddrs.h sys/sysctl.h crt_externs.h regex.h signal.h \ ifaddrs.h sys/sysctl.h crt_externs.h regex.h signal.h \
ioLib.h sockLib.h hostLib.h shlwapi.h sys/synch.h limits.h db.h ndbm.h]) ioLib.h sockLib.h hostLib.h shlwapi.h sys/synch.h limits.h db.h ndbm.h \
sys/syslimits.h])
# Needed to work around Darwin requiring sys/socket.h for # Needed to work around Darwin requiring sys/socket.h for
# net/if.h # net/if.h

Просмотреть файл

@ -75,3 +75,13 @@ The prefix we were given are:
opal_prefix: %s opal_prefix: %s
prefix_dir: %s prefix_dir: %s
#
[cmd-line-too-long]
The cmd line to launch remote daemons is too long:
Length: %d
Max length: %d
Consider setting -mca plm_rsh_pass_environ_mca_params 0 to
avoid including any environmentally set MCA parameters on the
command line.

Просмотреть файл

@ -61,6 +61,7 @@ struct orte_plm_rsh_component_t {
opal_condition_t cond; opal_condition_t cond;
char *agent; char *agent;
bool assume_same_shell; bool assume_same_shell;
bool pass_environ_mca_params;
}; };
typedef struct orte_plm_rsh_component_t orte_plm_rsh_component_t; typedef struct orte_plm_rsh_component_t orte_plm_rsh_component_t;

Просмотреть файл

@ -187,6 +187,11 @@ static int rsh_component_open(void)
mca_base_param_lookup_int(tmp, &value); mca_base_param_lookup_int(tmp, &value);
mca_plm_rsh_component.assume_same_shell = OPAL_INT_TO_BOOL(value); mca_plm_rsh_component.assume_same_shell = OPAL_INT_TO_BOOL(value);
mca_base_param_reg_int(c, "pass_environ_mca_params",
"If set to 0, do not include mca params from the environment on the orted cmd line",
false, false, 1, &tmp);
mca_plm_rsh_component.pass_environ_mca_params = OPAL_INT_TO_BOOL(tmp);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }

Просмотреть файл

@ -62,6 +62,12 @@
#ifdef HAVE_PWD_H #ifdef HAVE_PWD_H
#include <pwd.h> #include <pwd.h>
#endif #endif
#if HAVE_LIMITS_H
#include <limits.h>
#endif
#if HAVE_SYS_SYSLIMITS_H
#include <sys/syslimits.h>
#endif
#include "opal/mca/installdirs/installdirs.h" #include "opal/mca/installdirs/installdirs.h"
#include "opal/mca/base/mca_base_param.h" #include "opal/mca/base/mca_base_param.h"
@ -287,7 +293,7 @@ static int setup_launch(int *argcptr, char ***argvptr,
{ {
int argc; int argc;
char **argv; char **argv;
char *param; char *param, *value;
orte_plm_rsh_shell_t remote_shell, local_shell; orte_plm_rsh_shell_t remote_shell, local_shell;
char *lib_base, *bin_base; char *lib_base, *bin_base;
int orted_argc; int orted_argc;
@ -295,34 +301,35 @@ static int setup_launch(int *argcptr, char ***argvptr,
char *orted_cmd, *orted_prefix, *final_cmd; char *orted_cmd, *orted_prefix, *final_cmd;
int orted_index; int orted_index;
int rc; int rc;
int cnt, i, j;
bool found;
/* Figure out the basenames for the libdir and bindir. This /* Figure out the basenames for the libdir and bindir. This
requires some explanation: requires some explanation:
- Use opal_install_dirs.libdir and opal_install_dirs.bindir. - Use opal_install_dirs.libdir and opal_install_dirs.bindir.
- After a discussion on the devel-core mailing list, the - After a discussion on the devel-core mailing list, the
developers decided that we should use the local directory developers decided that we should use the local directory
basenames as the basis for the prefix on the remote note. basenames as the basis for the prefix on the remote note.
This does not handle a few notable cases (e.g., if the This does not handle a few notable cases (e.g., if the
libdir/bindir is not simply a subdir under the prefix, if the libdir/bindir is not simply a subdir under the prefix, if the
libdir/bindir basename is not the same on the remote node as libdir/bindir basename is not the same on the remote node as
it is here on the local node, etc.), but we decided that it is here on the local node, etc.), but we decided that
--prefix was meant to handle "the common case". If you need --prefix was meant to handle "the common case". If you need
something more complex than this, a) edit your shell startup something more complex than this, a) edit your shell startup
files to set PATH/LD_LIBRARY_PATH properly on the remove files to set PATH/LD_LIBRARY_PATH properly on the remove
node, or b) use some new/to-be-defined options that node, or b) use some new/to-be-defined options that
explicitly allow setting the bindir/libdir on the remote explicitly allow setting the bindir/libdir on the remote
node. We decided to implement these options (e.g., node. We decided to implement these options (e.g.,
--remote-bindir and --remote-libdir) to orterun when it --remote-bindir and --remote-libdir) to orterun when it
actually becomes a problem for someone (vs. a hypothetical actually becomes a problem for someone (vs. a hypothetical
situation). situation).
Hence, for now, we simply take the basename of this install's Hence, for now, we simply take the basename of this install's
libdir and bindir and use it to append this install's prefix libdir and bindir and use it to append this install's prefix
and use that on the remote node. and use that on the remote node.
*/ */
lib_base = opal_basename(opal_install_dirs.libdir); lib_base = opal_basename(opal_install_dirs.libdir);
bin_base = opal_basename(opal_install_dirs.bindir); bin_base = opal_basename(opal_install_dirs.bindir);
@ -429,13 +436,13 @@ static int setup_launch(int *argcptr, char ***argvptr,
} else if (ORTE_PLM_RSH_SHELL_TCSH == remote_shell || } else if (ORTE_PLM_RSH_SHELL_TCSH == remote_shell ||
ORTE_PLM_RSH_SHELL_CSH == remote_shell) { ORTE_PLM_RSH_SHELL_CSH == remote_shell) {
/* [t]csh is a bit more challenging -- we /* [t]csh is a bit more challenging -- we
have to check whether LD_LIBRARY_PATH have to check whether LD_LIBRARY_PATH
is already set before we try to set it. is already set before we try to set it.
Must be very careful about obeying Must be very careful about obeying
[t]csh's order of evaluation and not [t]csh's order of evaluation and not
using a variable before it is defined. using a variable before it is defined.
See this thread for more details: See this thread for more details:
http://www.open-mpi.org/community/lists/users/2006/01/0517.php. */ http://www.open-mpi.org/community/lists/users/2006/01/0517.php. */
/* if there is nothing preceding orted, then we can just /* if there is nothing preceding orted, then we can just
* assemble the cmd with the orted_cmd at the end. Otherwise, * assemble the cmd with the orted_cmd at the end. Otherwise,
* we have to insert the orted_prefix in the right place * we have to insert the orted_prefix in the right place
@ -489,9 +496,9 @@ static int setup_launch(int *argcptr, char ***argvptr,
/* Daemonize when not using qrsh. Or, if using qrsh, only /* Daemonize when not using qrsh. Or, if using qrsh, only
* daemonize if told to by user with daemonize_qrsh flag. */ * daemonize if told to by user with daemonize_qrsh flag. */
((!mca_plm_rsh_component.using_qrsh) || ((!mca_plm_rsh_component.using_qrsh) ||
(mca_plm_rsh_component.using_qrsh && mca_plm_rsh_component.daemonize_qrsh)) && (mca_plm_rsh_component.using_qrsh && mca_plm_rsh_component.daemonize_qrsh)) &&
((!mca_plm_rsh_component.using_llspawn) || ((!mca_plm_rsh_component.using_llspawn) ||
(mca_plm_rsh_component.using_llspawn && mca_plm_rsh_component.daemonize_llspawn))) { (mca_plm_rsh_component.using_llspawn && mca_plm_rsh_component.daemonize_llspawn))) {
opal_argv_append(&argc, &argv, "--daemonize"); opal_argv_append(&argc, &argv, "--daemonize");
} }
@ -513,25 +520,66 @@ static int setup_launch(int *argcptr, char ***argvptr,
* by enclosing them in quotes. Check for any multi-word * by enclosing them in quotes. Check for any multi-word
* mca params passed to mpirun and include them * mca params passed to mpirun and include them
*/ */
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) { cnt = opal_argv_count(orted_cmd_line);
int cnt, i; for (i=0; i < cnt; i+=3) {
cnt = opal_argv_count(orted_cmd_line); /* check if the specified option is more than one word - all
for (i=0; i < cnt; i+=3) { * others have already been passed
/* check if the specified option is more than one word - all */
* others have already been passed if (NULL != strchr(orted_cmd_line[i+2], ' ')) {
*/ /* must add quotes around it */
if (NULL != strchr(orted_cmd_line[i+2], ' ')) { asprintf(&param, "\"%s\"", orted_cmd_line[i+2]);
/* must add quotes around it */ /* now pass it along */
asprintf(&param, "\"%s\"", orted_cmd_line[i+2]); opal_argv_append(&argc, &argv, orted_cmd_line[i]);
/* now pass it along */ opal_argv_append(&argc, &argv, orted_cmd_line[i+1]);
opal_argv_append(&argc, &argv, orted_cmd_line[i]); opal_argv_append(&argc, &argv, param);
opal_argv_append(&argc, &argv, orted_cmd_line[i+1]); free(param);
opal_argv_append(&argc, &argv, param); }
}
/* unless told otherwise... */
if (mca_plm_rsh_component.pass_environ_mca_params) {
/* now check our local environment for MCA params - add them
* only if they aren't already present
*/
for (i = 0; NULL != environ[i]; ++i) {
if (0 == strncmp("OMPI_", environ[i], 5)) {
/* check for duplicate in app->env - this
* would have been placed there by the
* cmd line processor. By convention, we
* always let the cmd line override the
* environment
*/
param = strdup(&environ[i][9]);
value = strchr(param, '=');
*value = '\0';
value++;
/* see if this param exists on the cmd line */
for (j=0; NULL != argv[j]; j++) {
if (0 == strcmp(param, argv[j])) {
found = true;
break;
}
}
if (!found) {
/* add it */
opal_argv_append(&argc, &argv, "-mca");
opal_argv_append(&argc, &argv, param);
opal_argv_append(&argc, &argv, value);
}
free(param); free(param);
} }
} }
} }
value = opal_argv_join(argv, ' ');
if (ARG_MAX < strlen(value)) {
orte_show_help("help-plm-rsh.txt", "cmd-line-too-long",
true, strlen(value), ARG_MAX);
free(value);
return ORTE_ERR_SILENT;
}
free(value);
if (ORTE_PLM_RSH_SHELL_SH == remote_shell || if (ORTE_PLM_RSH_SHELL_SH == remote_shell ||
ORTE_PLM_RSH_SHELL_KSH == remote_shell) { ORTE_PLM_RSH_SHELL_KSH == remote_shell) {
opal_argv_append(&argc, &argv, ")"); opal_argv_append(&argc, &argv, ")");