Since this has come up a number of times, have the rsh launcher add MCA params from the environment by default. If it finds that the cmd line is too long, error out with a message directing the user to set a param to ignore the environmental MCA params.
This commit was SVN r25581.
Этот коммит содержится в:
родитель
7510339725
Коммит
3e7ab1212a
@ -581,7 +581,8 @@ AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \
|
|||||||
sys/types.h sys/uio.h net/uio.h sys/utsname.h sys/vfs.h sys/wait.h syslog.h \
|
sys/types.h sys/uio.h net/uio.h sys/utsname.h sys/vfs.h sys/wait.h syslog.h \
|
||||||
time.h termios.h ulimit.h unistd.h util.h utmp.h malloc.h \
|
time.h termios.h ulimit.h unistd.h util.h utmp.h malloc.h \
|
||||||
ifaddrs.h sys/sysctl.h crt_externs.h regex.h signal.h \
|
ifaddrs.h sys/sysctl.h crt_externs.h regex.h signal.h \
|
||||||
ioLib.h sockLib.h hostLib.h shlwapi.h sys/synch.h limits.h db.h ndbm.h])
|
ioLib.h sockLib.h hostLib.h shlwapi.h sys/synch.h limits.h db.h ndbm.h \
|
||||||
|
sys/syslimits.h])
|
||||||
|
|
||||||
# Needed to work around Darwin requiring sys/socket.h for
|
# Needed to work around Darwin requiring sys/socket.h for
|
||||||
# net/if.h
|
# net/if.h
|
||||||
|
@ -75,3 +75,13 @@ The prefix we were given are:
|
|||||||
|
|
||||||
opal_prefix: %s
|
opal_prefix: %s
|
||||||
prefix_dir: %s
|
prefix_dir: %s
|
||||||
|
#
|
||||||
|
[cmd-line-too-long]
|
||||||
|
The cmd line to launch remote daemons is too long:
|
||||||
|
|
||||||
|
Length: %d
|
||||||
|
Max length: %d
|
||||||
|
|
||||||
|
Consider setting -mca plm_rsh_pass_environ_mca_params 0 to
|
||||||
|
avoid including any environmentally set MCA parameters on the
|
||||||
|
command line.
|
||||||
|
@ -61,6 +61,7 @@ struct orte_plm_rsh_component_t {
|
|||||||
opal_condition_t cond;
|
opal_condition_t cond;
|
||||||
char *agent;
|
char *agent;
|
||||||
bool assume_same_shell;
|
bool assume_same_shell;
|
||||||
|
bool pass_environ_mca_params;
|
||||||
};
|
};
|
||||||
typedef struct orte_plm_rsh_component_t orte_plm_rsh_component_t;
|
typedef struct orte_plm_rsh_component_t orte_plm_rsh_component_t;
|
||||||
|
|
||||||
|
@ -187,6 +187,11 @@ static int rsh_component_open(void)
|
|||||||
mca_base_param_lookup_int(tmp, &value);
|
mca_base_param_lookup_int(tmp, &value);
|
||||||
mca_plm_rsh_component.assume_same_shell = OPAL_INT_TO_BOOL(value);
|
mca_plm_rsh_component.assume_same_shell = OPAL_INT_TO_BOOL(value);
|
||||||
|
|
||||||
|
mca_base_param_reg_int(c, "pass_environ_mca_params",
|
||||||
|
"If set to 0, do not include mca params from the environment on the orted cmd line",
|
||||||
|
false, false, 1, &tmp);
|
||||||
|
mca_plm_rsh_component.pass_environ_mca_params = OPAL_INT_TO_BOOL(tmp);
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,6 +62,12 @@
|
|||||||
#ifdef HAVE_PWD_H
|
#ifdef HAVE_PWD_H
|
||||||
#include <pwd.h>
|
#include <pwd.h>
|
||||||
#endif
|
#endif
|
||||||
|
#if HAVE_LIMITS_H
|
||||||
|
#include <limits.h>
|
||||||
|
#endif
|
||||||
|
#if HAVE_SYS_SYSLIMITS_H
|
||||||
|
#include <sys/syslimits.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "opal/mca/installdirs/installdirs.h"
|
#include "opal/mca/installdirs/installdirs.h"
|
||||||
#include "opal/mca/base/mca_base_param.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
@ -287,7 +293,7 @@ static int setup_launch(int *argcptr, char ***argvptr,
|
|||||||
{
|
{
|
||||||
int argc;
|
int argc;
|
||||||
char **argv;
|
char **argv;
|
||||||
char *param;
|
char *param, *value;
|
||||||
orte_plm_rsh_shell_t remote_shell, local_shell;
|
orte_plm_rsh_shell_t remote_shell, local_shell;
|
||||||
char *lib_base, *bin_base;
|
char *lib_base, *bin_base;
|
||||||
int orted_argc;
|
int orted_argc;
|
||||||
@ -295,34 +301,35 @@ static int setup_launch(int *argcptr, char ***argvptr,
|
|||||||
char *orted_cmd, *orted_prefix, *final_cmd;
|
char *orted_cmd, *orted_prefix, *final_cmd;
|
||||||
int orted_index;
|
int orted_index;
|
||||||
int rc;
|
int rc;
|
||||||
|
int cnt, i, j;
|
||||||
|
bool found;
|
||||||
|
|
||||||
/* Figure out the basenames for the libdir and bindir. This
|
/* Figure out the basenames for the libdir and bindir. This
|
||||||
requires some explanation:
|
requires some explanation:
|
||||||
|
|
||||||
- Use opal_install_dirs.libdir and opal_install_dirs.bindir.
|
- Use opal_install_dirs.libdir and opal_install_dirs.bindir.
|
||||||
|
|
||||||
- After a discussion on the devel-core mailing list, the
|
- After a discussion on the devel-core mailing list, the
|
||||||
developers decided that we should use the local directory
|
developers decided that we should use the local directory
|
||||||
basenames as the basis for the prefix on the remote note.
|
basenames as the basis for the prefix on the remote note.
|
||||||
This does not handle a few notable cases (e.g., if the
|
This does not handle a few notable cases (e.g., if the
|
||||||
libdir/bindir is not simply a subdir under the prefix, if the
|
libdir/bindir is not simply a subdir under the prefix, if the
|
||||||
libdir/bindir basename is not the same on the remote node as
|
libdir/bindir basename is not the same on the remote node as
|
||||||
it is here on the local node, etc.), but we decided that
|
it is here on the local node, etc.), but we decided that
|
||||||
--prefix was meant to handle "the common case". If you need
|
--prefix was meant to handle "the common case". If you need
|
||||||
something more complex than this, a) edit your shell startup
|
something more complex than this, a) edit your shell startup
|
||||||
files to set PATH/LD_LIBRARY_PATH properly on the remove
|
files to set PATH/LD_LIBRARY_PATH properly on the remove
|
||||||
node, or b) use some new/to-be-defined options that
|
node, or b) use some new/to-be-defined options that
|
||||||
explicitly allow setting the bindir/libdir on the remote
|
explicitly allow setting the bindir/libdir on the remote
|
||||||
node. We decided to implement these options (e.g.,
|
node. We decided to implement these options (e.g.,
|
||||||
--remote-bindir and --remote-libdir) to orterun when it
|
--remote-bindir and --remote-libdir) to orterun when it
|
||||||
actually becomes a problem for someone (vs. a hypothetical
|
actually becomes a problem for someone (vs. a hypothetical
|
||||||
situation).
|
situation).
|
||||||
|
|
||||||
Hence, for now, we simply take the basename of this install's
|
Hence, for now, we simply take the basename of this install's
|
||||||
libdir and bindir and use it to append this install's prefix
|
libdir and bindir and use it to append this install's prefix
|
||||||
and use that on the remote node.
|
and use that on the remote node.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
lib_base = opal_basename(opal_install_dirs.libdir);
|
lib_base = opal_basename(opal_install_dirs.libdir);
|
||||||
bin_base = opal_basename(opal_install_dirs.bindir);
|
bin_base = opal_basename(opal_install_dirs.bindir);
|
||||||
@ -429,13 +436,13 @@ static int setup_launch(int *argcptr, char ***argvptr,
|
|||||||
} else if (ORTE_PLM_RSH_SHELL_TCSH == remote_shell ||
|
} else if (ORTE_PLM_RSH_SHELL_TCSH == remote_shell ||
|
||||||
ORTE_PLM_RSH_SHELL_CSH == remote_shell) {
|
ORTE_PLM_RSH_SHELL_CSH == remote_shell) {
|
||||||
/* [t]csh is a bit more challenging -- we
|
/* [t]csh is a bit more challenging -- we
|
||||||
have to check whether LD_LIBRARY_PATH
|
have to check whether LD_LIBRARY_PATH
|
||||||
is already set before we try to set it.
|
is already set before we try to set it.
|
||||||
Must be very careful about obeying
|
Must be very careful about obeying
|
||||||
[t]csh's order of evaluation and not
|
[t]csh's order of evaluation and not
|
||||||
using a variable before it is defined.
|
using a variable before it is defined.
|
||||||
See this thread for more details:
|
See this thread for more details:
|
||||||
http://www.open-mpi.org/community/lists/users/2006/01/0517.php. */
|
http://www.open-mpi.org/community/lists/users/2006/01/0517.php. */
|
||||||
/* if there is nothing preceding orted, then we can just
|
/* if there is nothing preceding orted, then we can just
|
||||||
* assemble the cmd with the orted_cmd at the end. Otherwise,
|
* assemble the cmd with the orted_cmd at the end. Otherwise,
|
||||||
* we have to insert the orted_prefix in the right place
|
* we have to insert the orted_prefix in the right place
|
||||||
@ -489,9 +496,9 @@ static int setup_launch(int *argcptr, char ***argvptr,
|
|||||||
/* Daemonize when not using qrsh. Or, if using qrsh, only
|
/* Daemonize when not using qrsh. Or, if using qrsh, only
|
||||||
* daemonize if told to by user with daemonize_qrsh flag. */
|
* daemonize if told to by user with daemonize_qrsh flag. */
|
||||||
((!mca_plm_rsh_component.using_qrsh) ||
|
((!mca_plm_rsh_component.using_qrsh) ||
|
||||||
(mca_plm_rsh_component.using_qrsh && mca_plm_rsh_component.daemonize_qrsh)) &&
|
(mca_plm_rsh_component.using_qrsh && mca_plm_rsh_component.daemonize_qrsh)) &&
|
||||||
((!mca_plm_rsh_component.using_llspawn) ||
|
((!mca_plm_rsh_component.using_llspawn) ||
|
||||||
(mca_plm_rsh_component.using_llspawn && mca_plm_rsh_component.daemonize_llspawn))) {
|
(mca_plm_rsh_component.using_llspawn && mca_plm_rsh_component.daemonize_llspawn))) {
|
||||||
opal_argv_append(&argc, &argv, "--daemonize");
|
opal_argv_append(&argc, &argv, "--daemonize");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -513,25 +520,66 @@ static int setup_launch(int *argcptr, char ***argvptr,
|
|||||||
* by enclosing them in quotes. Check for any multi-word
|
* by enclosing them in quotes. Check for any multi-word
|
||||||
* mca params passed to mpirun and include them
|
* mca params passed to mpirun and include them
|
||||||
*/
|
*/
|
||||||
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) {
|
cnt = opal_argv_count(orted_cmd_line);
|
||||||
int cnt, i;
|
for (i=0; i < cnt; i+=3) {
|
||||||
cnt = opal_argv_count(orted_cmd_line);
|
/* check if the specified option is more than one word - all
|
||||||
for (i=0; i < cnt; i+=3) {
|
* others have already been passed
|
||||||
/* check if the specified option is more than one word - all
|
*/
|
||||||
* others have already been passed
|
if (NULL != strchr(orted_cmd_line[i+2], ' ')) {
|
||||||
*/
|
/* must add quotes around it */
|
||||||
if (NULL != strchr(orted_cmd_line[i+2], ' ')) {
|
asprintf(¶m, "\"%s\"", orted_cmd_line[i+2]);
|
||||||
/* must add quotes around it */
|
/* now pass it along */
|
||||||
asprintf(¶m, "\"%s\"", orted_cmd_line[i+2]);
|
opal_argv_append(&argc, &argv, orted_cmd_line[i]);
|
||||||
/* now pass it along */
|
opal_argv_append(&argc, &argv, orted_cmd_line[i+1]);
|
||||||
opal_argv_append(&argc, &argv, orted_cmd_line[i]);
|
opal_argv_append(&argc, &argv, param);
|
||||||
opal_argv_append(&argc, &argv, orted_cmd_line[i+1]);
|
free(param);
|
||||||
opal_argv_append(&argc, &argv, param);
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* unless told otherwise... */
|
||||||
|
if (mca_plm_rsh_component.pass_environ_mca_params) {
|
||||||
|
/* now check our local environment for MCA params - add them
|
||||||
|
* only if they aren't already present
|
||||||
|
*/
|
||||||
|
for (i = 0; NULL != environ[i]; ++i) {
|
||||||
|
if (0 == strncmp("OMPI_", environ[i], 5)) {
|
||||||
|
/* check for duplicate in app->env - this
|
||||||
|
* would have been placed there by the
|
||||||
|
* cmd line processor. By convention, we
|
||||||
|
* always let the cmd line override the
|
||||||
|
* environment
|
||||||
|
*/
|
||||||
|
param = strdup(&environ[i][9]);
|
||||||
|
value = strchr(param, '=');
|
||||||
|
*value = '\0';
|
||||||
|
value++;
|
||||||
|
/* see if this param exists on the cmd line */
|
||||||
|
for (j=0; NULL != argv[j]; j++) {
|
||||||
|
if (0 == strcmp(param, argv[j])) {
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!found) {
|
||||||
|
/* add it */
|
||||||
|
opal_argv_append(&argc, &argv, "-mca");
|
||||||
|
opal_argv_append(&argc, &argv, param);
|
||||||
|
opal_argv_append(&argc, &argv, value);
|
||||||
|
}
|
||||||
free(param);
|
free(param);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
value = opal_argv_join(argv, ' ');
|
||||||
|
if (ARG_MAX < strlen(value)) {
|
||||||
|
orte_show_help("help-plm-rsh.txt", "cmd-line-too-long",
|
||||||
|
true, strlen(value), ARG_MAX);
|
||||||
|
free(value);
|
||||||
|
return ORTE_ERR_SILENT;
|
||||||
|
}
|
||||||
|
free(value);
|
||||||
|
|
||||||
if (ORTE_PLM_RSH_SHELL_SH == remote_shell ||
|
if (ORTE_PLM_RSH_SHELL_SH == remote_shell ||
|
||||||
ORTE_PLM_RSH_SHELL_KSH == remote_shell) {
|
ORTE_PLM_RSH_SHELL_KSH == remote_shell) {
|
||||||
opal_argv_append(&argc, &argv, ")");
|
opal_argv_append(&argc, &argv, ")");
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user