1
1

Expand the coverage a little when looking at remote shells for rsh. Prior patch (r25758) works only if both ends of the rsh/ssh connection are Mac. What we really want is to use the Mac version of ld_library_path when the remote end is Mac, regardless of the OS where mpirun is executing. So add a test for system type to the remote_shell test, and set the ld_library_path name to match the remote system type.

This commit was SVN r25762.

The following SVN revision numbers were found above:
  r25758 --> open-mpi/ompi@1afb77e603
Этот коммит содержится в:
Ralph Castain 2012-01-21 23:48:42 +00:00
родитель 6c6d19f5f5
Коммит 469e40ace2

Просмотреть файл

@ -143,9 +143,11 @@ static void ssh_child(int argc, char **argv,
orte_vpid_t vpid, int proc_vpid_index)
__opal_attribute_noreturn__;
static int rsh_probe(char *nodename,
orte_plm_rsh_shell_t *shell);
orte_plm_rsh_shell_t *shell,
bool *remote_darwin);
static int setup_shell(orte_plm_rsh_shell_t *rshell,
orte_plm_rsh_shell_t *lshell,
bool *remote_darwin,
char *nodename, int *argc, char ***argv);
/* local global storage of timing variables */
@ -296,7 +298,9 @@ static int setup_launch(int *argcptr, char ***argvptr,
int rc;
int cnt, i, j;
bool found;
bool remote_darwin;
char *lib_path;
/* Figure out the basenames for the libdir and bindir. This
requires some explanation:
@ -337,11 +341,24 @@ static int setup_launch(int *argcptr, char ***argvptr,
/* setup the correct shell info */
if (ORTE_SUCCESS != (rc = setup_shell(&remote_shell, &local_shell,
&remote_darwin,
nodename, &argc, &argv))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (remote_darwin) {
/* the remote machine is a Mac, so the lib_path we need
* to set is actually DYLD_LIBRARY_PATH
*/
lib_path = "DYLD_LIBRARY_PATH";
} else {
/* the remote machine is Linux, or at least not a Mac, so
* the lib_path we need to set is LD_LIBRARY_PATH
*/
lib_path = "LD_LIBRARY_PATH";
}
/* now get the orted cmd - as specified by user - into our tmp array.
* The function returns the location where the actual orted command is
* located - usually in the final spot, but someone could
@ -417,17 +434,13 @@ static int setup_launch(int *argcptr, char ***argvptr,
*/
asprintf (&final_cmd,
"%s%s%s PATH=%s/%s:$PATH ; export PATH ; "
#if defined(__APPLE__)
"DYLD_LIBRARY_PATH=%s/%s:$DYLD_LIBRARY_PATH ; export DYLD_LIBRARY_PATH ; "
#else
"LD_LIBRARY_PATH=%s/%s:$LD_LIBRARY_PATH ; export LD_LIBRARY_PATH ; "
#endif
"%s=%s/%s:$%s ; export DYLD_LIBRARY_PATH ; "
"%s %s",
(opal_prefix != NULL ? "OPAL_PREFIX=" : " "),
(opal_prefix != NULL ? opal_prefix : " "),
(opal_prefix != NULL ? " ; export OPAL_PREFIX;" : " "),
prefix_dir, bin_base,
prefix_dir, lib_base,
lib_path, prefix_dir, lib_base, lib_path,
(orted_prefix != NULL ? orted_prefix : " "),
(full_orted_cmd != NULL ? full_orted_cmd : " "));
} else if (ORTE_PLM_RSH_SHELL_TCSH == remote_shell ||
@ -446,28 +459,20 @@ static int setup_launch(int *argcptr, char ***argvptr,
*/
asprintf (&final_cmd,
"%s%s%s set path = ( %s/%s $path ) ; "
#if defined(__APPLE__)
"if ( $?DYLD_LIBRARY_PATH == 1 ) "
"if ( $?%s == 1 ) "
"set OMPI_have_llp ; "
"if ( $?DYLD_LIBRARY_PATH == 0 ) "
"setenv DYLD_LIBRARY_PATH %s/%s ; "
"if ( $?%s == 0 ) "
"setenv %s %s/%s ; "
"if ( $?OMPI_have_llp == 1 ) "
"setenv DYLD_LIBRARY_PATH %s/%s:$DYLD_LIBRARY_PATH ; "
#else
"if ( $?LD_LIBRARY_PATH == 1 ) "
"set OMPI_have_llp ; "
"if ( $?LD_LIBRARY_PATH == 0 ) "
"setenv LD_LIBRARY_PATH %s/%s ; "
"if ( $?OMPI_have_llp == 1 ) "
"setenv LD_LIBRARY_PATH %s/%s:$LD_LIBRARY_PATH ; "
#endif
"setenv %s %s/%s:$%s ; "
"%s %s",
(opal_prefix != NULL ? "setenv OPAL_PREFIX " : " "),
(opal_prefix != NULL ? opal_prefix : " "),
(opal_prefix != NULL ? " ;" : " "),
prefix_dir, bin_base,
prefix_dir, lib_base,
prefix_dir, lib_base,
lib_path, lib_path, lib_path,
prefix_dir, lib_base, lib_path,
prefix_dir, lib_base, lib_path,
(orted_prefix != NULL ? orted_prefix : " "),
(full_orted_cmd != NULL ? full_orted_cmd : " "));
} else {
@ -1347,7 +1352,7 @@ static orte_plm_rsh_shell_t find_shell(char *shell)
++sh_name;
for (i = 0; i < (int)(sizeof (orte_plm_rsh_shell_name) /
sizeof(orte_plm_rsh_shell_name[0])); ++i) {
if (0 == strcmp(sh_name, orte_plm_rsh_shell_name[i])) {
if (NULL != strstr(sh_name, orte_plm_rsh_shell_name[i])) {
return (orte_plm_rsh_shell_t)i;
}
}
@ -1414,10 +1419,11 @@ static int launch_agent_setup(const char *agent, char *path)
}
/**
* Check the Shell variable on the specified node
* Check the Shell variable and system type on the specified node
*/
static int rsh_probe(char *nodename,
orte_plm_rsh_shell_t *shell)
orte_plm_rsh_shell_t *shell,
bool *remote_darwin)
{
char ** argv;
int argc, rc = ORTE_SUCCESS, i;
@ -1457,7 +1463,7 @@ static int rsh_probe(char *nodename,
argv = opal_argv_copy(rsh_agent_argv);
argc = opal_argv_count(rsh_agent_argv);
opal_argv_append(&argc, &argv, nodename);
opal_argv_append(&argc, &argv, "echo $SHELL");
opal_argv_append(&argc, &argv, "echo $SHELL; uname");
execvp(argv[0], argv);
exit(errno);
@ -1500,15 +1506,10 @@ static int rsh_probe(char *nodename,
char *sh_name = rindex(outbuf, '/');
if( NULL != sh_name ) {
sh_name++; /* skip '/' */
/* We cannot use "echo -n $SHELL" because -n is not portable. Therefore
* we have to remove the "\n" */
if ( sh_name[strlen(sh_name)-1] == '\n' ) {
sh_name[strlen(sh_name)-1] = '\0';
}
/* Search for the substring of known shell-names */
for (i = 0; i < (int)(sizeof (orte_plm_rsh_shell_name)/
sizeof(orte_plm_rsh_shell_name[0])); i++) {
if ( 0 == strcmp(sh_name, orte_plm_rsh_shell_name[i]) ) {
if ( NULL != strstr(sh_name, orte_plm_rsh_shell_name[i]) ) {
*shell = (orte_plm_rsh_shell_t)i;
break;
}
@ -1516,17 +1517,26 @@ static int rsh_probe(char *nodename,
}
}
/* check the system type */
if (NULL != strstr(outbuf, "Darwin")) {
*remote_darwin = true;
} else {
*remote_darwin = false;
}
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rsh: node %s has SHELL: %s",
"%s plm:rsh: node %s has SHELL: %s machine type: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
nodename,
(ORTE_PLM_RSH_SHELL_UNKNOWN == *shell) ? "UNHANDLED" : (char*)orte_plm_rsh_shell_name[*shell]));
(ORTE_PLM_RSH_SHELL_UNKNOWN == *shell) ? "UNHANDLED" : (char*)orte_plm_rsh_shell_name[*shell],
(*remote_darwin) ? "Mac" : "Linux"));
return rc;
}
static int setup_shell(orte_plm_rsh_shell_t *rshell,
orte_plm_rsh_shell_t *lshell,
bool *remote_darwin,
char *nodename, int *argc, char ***argv)
{
orte_plm_rsh_shell_t remote_shell, local_shell;
@ -1573,7 +1583,7 @@ static int setup_shell(orte_plm_rsh_shell_t *rshell,
"%s plm:rsh: assuming same remote shell as local shell",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
} else {
rc = rsh_probe(nodename, &remote_shell);
rc = rsh_probe(nodename, &remote_shell, remote_darwin);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);