1
1

Take the first small step towards rationalizing rsh support. Create a new "rshbase" component that contains a simple rsh module - no tree spawn, uses all the base functions for launch support. Extend the base rsh support functions to include those functions in common across all rsh modules.

Only a minor change made to the current rsh module to avoid a naming conflict. Otherwise, left it alone to avoid creating conflicts with other external work. The current rsh module remains the default for rsh/ssh support, and continues to contain the support for SGE and Loadleveler.

This commit was SVN r24593.
Этот коммит содержится в:
Ralph Castain 2011-03-30 01:15:07 +00:00
родитель 866ae8b43a
Коммит 30fb002524
17 изменённых файлов: 1418 добавлений и 105 удалений

Просмотреть файл

@ -28,7 +28,8 @@ if !ORTE_DISABLE_FULL_SUPPORT
dist_pkgdata_DATA += base/help-plm-base.txt
headers += \
base/plm_private.h
base/plm_private.h \
base/plm_base_rsh_support.h
libmca_plm_la_SOURCES += \
base/plm_base_close.c \

Просмотреть файл

@ -30,6 +30,7 @@
#include "orte/mca/plm/base/base.h"
#include "orte/mca/plm/base/plm_private.h"
#include "orte/mca/plm/base/plm_base_rsh_support.h"
int orte_plm_base_finalize(void)
{

Просмотреть файл

@ -37,6 +37,7 @@
#ifdef HAVE_PWD_H
#include <pwd.h>
#endif
#include <fcntl.h>
#include "opal/mca/installdirs/installdirs.h"
#include "opal/util/os_path.h"
@ -61,6 +62,19 @@
#include "orte/util/dash_host/dash_host.h"
#include "orte/mca/plm/base/plm_private.h"
#include "orte/mca/plm/base/plm_base_rsh_support.h"
/* These strings *must* follow the same order as the enum ORTE_PLM_RSH_SHELL_* */
const char *orte_plm_rsh_shell_name[7] = {
"bash",
"zsh",
"tcsh", /* tcsh has to be first otherwise strstr finds csh */
"csh",
"ksh",
"sh",
"unknown"
};
#ifndef __WINDOWS__
static char **search(const char* agent_list, const char *path);
@ -224,9 +238,9 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
jdata->jobid = orte_plm_globals.local_slaves;
/* setup the launch */
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_rsh_launch(nodename, app,
"orte-bootproxy.sh",
&argv, &exec_path))) {
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_slave_launch(nodename, app,
"orte-bootproxy.sh",
&argv, &exec_path))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -465,61 +479,40 @@ void orte_plm_base_local_slave_finalize(void)
}
}
typedef enum {
ORTE_PLM_SHELL_BASH = 0,
ORTE_PLM_SHELL_ZSH,
ORTE_PLM_SHELL_TCSH,
ORTE_PLM_SHELL_CSH,
ORTE_PLM_SHELL_KSH,
ORTE_PLM_SHELL_SH,
ORTE_PLM_SHELL_UNKNOWN
} orte_plm_shell_t;
/* These strings *must* follow the same order as the enum ORTE_PLM_SHELL_* */
static const char * orte_plm_shell_name[] = {
"bash",
"zsh",
"tcsh", /* tcsh has to be first otherwise strstr finds csh */
"csh",
"ksh",
"sh",
"unknown"
};
static orte_plm_shell_t find_shell(char *shell)
static orte_plm_rsh_shell_t find_shell(char *shell)
{
int i = 0;
char *sh_name = NULL;
if( (NULL == shell) || (strlen(shell) == 1) ) {
/* Malformed shell */
return ORTE_PLM_SHELL_UNKNOWN;
return ORTE_PLM_RSH_SHELL_UNKNOWN;
}
sh_name = rindex(shell, '/');
if( NULL == sh_name ) {
/* Malformed shell */
return ORTE_PLM_SHELL_UNKNOWN;
return ORTE_PLM_RSH_SHELL_UNKNOWN;
}
/* skip the '/' */
++sh_name;
for (i = 0; i < (int)(sizeof (orte_plm_shell_name) /
sizeof(orte_plm_shell_name[0])); ++i) {
if (0 == strcmp(sh_name, orte_plm_shell_name[i])) {
return (orte_plm_shell_t)i;
for (i = 0; i < (int)(sizeof (orte_plm_rsh_shell_name) /
sizeof(orte_plm_rsh_shell_name[0])); ++i) {
if (0 == strcmp(sh_name, orte_plm_rsh_shell_name[i])) {
return (orte_plm_rsh_shell_t)i;
}
}
/* We didn't find it */
return ORTE_PLM_SHELL_UNKNOWN;
return ORTE_PLM_RSH_SHELL_UNKNOWN;
}
/**
* Check the Shell variable on the specified node
*/
static int shell_probe(char *nodename, orte_plm_shell_t *shell)
int orte_plm_base_rsh_shell_probe(char *nodename, orte_plm_rsh_shell_t *shell)
{
char ** argv;
int argc, rc = ORTE_SUCCESS, i;
@ -532,7 +525,7 @@ static int shell_probe(char *nodename, orte_plm_shell_t *shell)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
nodename));
*shell = ORTE_PLM_SHELL_UNKNOWN;
*shell = ORTE_PLM_RSH_SHELL_UNKNOWN;
if (pipe(fd)) {
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rsh: pipe failed with errno=%d",
@ -608,10 +601,10 @@ static int shell_probe(char *nodename, orte_plm_shell_t *shell)
sh_name[strlen(sh_name)-1] = '\0';
}
/* Search for the substring of known shell-names */
for (i = 0; i < (int)(sizeof (orte_plm_shell_name)/
sizeof(orte_plm_shell_name[0])); i++) {
if ( 0 == strcmp(sh_name, orte_plm_shell_name[i]) ) {
*shell = (orte_plm_shell_t)i;
for (i = 0; i < (int)(sizeof (orte_plm_rsh_shell_name)/
sizeof(orte_plm_rsh_shell_name[0])); i++) {
if ( 0 == strcmp(sh_name, orte_plm_rsh_shell_name[i]) ) {
*shell = (orte_plm_rsh_shell_t)i;
break;
}
}
@ -622,22 +615,22 @@ static int shell_probe(char *nodename, orte_plm_shell_t *shell)
"%s plm:base: node %s has SHELL: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
nodename,
(ORTE_PLM_SHELL_UNKNOWN == *shell) ? "UNHANDLED" : (char*)orte_plm_shell_name[*shell]));
(ORTE_PLM_RSH_SHELL_UNKNOWN == *shell) ? "UNHANDLED" : (char*)orte_plm_rsh_shell_name[*shell]));
return rc;
}
static int setup_shell(orte_plm_shell_t *rshell,
orte_plm_shell_t *lshell,
char *nodename, char ***argv)
int orte_plm_base_rsh_setup_shell(orte_plm_rsh_shell_t *rshell,
orte_plm_rsh_shell_t *lshell,
char *nodename, char ***argv)
{
orte_plm_shell_t remote_shell, local_shell;
orte_plm_rsh_shell_t remote_shell, local_shell;
struct passwd *p;
char *param;
int rc;
/* What is our local shell? */
local_shell = ORTE_PLM_SHELL_UNKNOWN;
local_shell = ORTE_PLM_RSH_SHELL_UNKNOWN;
p = getpwuid(getuid());
if( NULL == p ) {
/* This user is unknown to the system. Therefore, there is no reason we
@ -652,21 +645,21 @@ static int setup_shell(orte_plm_shell_t *rshell,
/* If we didn't find it in getpwuid(), try looking at the $SHELL
environment variable (see https://svn.open-mpi.org/trac/ompi/ticket/1060)
*/
if (ORTE_PLM_SHELL_UNKNOWN == local_shell &&
if (ORTE_PLM_RSH_SHELL_UNKNOWN == local_shell &&
NULL != (param = getenv("SHELL"))) {
local_shell = find_shell(param);
}
if (ORTE_PLM_SHELL_UNKNOWN == local_shell) {
if (ORTE_PLM_RSH_SHELL_UNKNOWN == local_shell) {
opal_output(0, "WARNING: local probe returned unhandled shell:%s assuming bash\n",
(NULL != param) ? param : "unknown");
local_shell = ORTE_PLM_SHELL_BASH;
local_shell = ORTE_PLM_RSH_SHELL_BASH;
}
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:base: local shell: %d (%s)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
local_shell, orte_plm_shell_name[local_shell]));
local_shell, orte_plm_rsh_shell_name[local_shell]));
/* What is our remote shell? */
if (orte_assume_same_shell) {
@ -675,23 +668,23 @@ static int setup_shell(orte_plm_shell_t *rshell,
"%s plm:base: assuming same remote shell as local shell",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
} else {
rc = shell_probe(nodename, &remote_shell);
rc = orte_plm_base_rsh_shell_probe(nodename, &remote_shell);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_PLM_SHELL_UNKNOWN == remote_shell) {
if (ORTE_PLM_RSH_SHELL_UNKNOWN == remote_shell) {
opal_output(0, "WARNING: shell probe returned unhandled shell; assuming bash\n");
remote_shell = ORTE_PLM_SHELL_BASH;
remote_shell = ORTE_PLM_RSH_SHELL_BASH;
}
}
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:base: remote shell: %d (%s)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
remote_shell, orte_plm_shell_name[remote_shell]));
remote_shell, orte_plm_rsh_shell_name[remote_shell]));
/* Do we need to source .profile on the remote side?
- sh: yes (see bash(1))
@ -701,8 +694,8 @@ static int setup_shell(orte_plm_shell_t *rshell,
- zsh: no (see http://zsh.sourceforge.net/FAQ/zshfaq03.html#l19)
*/
if (ORTE_PLM_SHELL_SH == remote_shell ||
ORTE_PLM_SHELL_KSH == remote_shell) {
if (ORTE_PLM_RSH_SHELL_SH == remote_shell ||
ORTE_PLM_RSH_SHELL_KSH == remote_shell) {
int i;
char **tmp;
tmp = opal_argv_split("( test ! -r ./.profile || . ./.profile;", ' ');
@ -722,7 +715,7 @@ static int setup_shell(orte_plm_shell_t *rshell,
return ORTE_SUCCESS;
}
int orte_plm_base_setup_rsh_launch(char *nodename, orte_app_context_t *app,
int orte_plm_base_setup_slave_launch(char *nodename, orte_app_context_t *app,
char *rcmd, char ***argv, char **exec_path)
{
orte_slave_files_t *slave_node, *tst_node;
@ -734,7 +727,7 @@ int orte_plm_base_setup_rsh_launch(char *nodename, orte_app_context_t *app,
char cwd[OPAL_PATH_MAX];
int rc, i, j;
char *lib_base, *bin_base;
orte_plm_shell_t rshell, lshell;
orte_plm_rsh_shell_t rshell, lshell;
char **tmpargv=NULL;
char *opal_prefix;
@ -814,8 +807,8 @@ int orte_plm_base_setup_rsh_launch(char *nodename, orte_app_context_t *app,
"%s plm:base:local:slave: node %s is remote",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodename));
/* setup the correct shell info */
if (ORTE_SUCCESS != (rc = setup_shell(&rshell, &lshell,
nodename, &tmpargv))) {
if (ORTE_SUCCESS != (rc = orte_plm_base_rsh_setup_shell(&rshell, &lshell,
nodename, &tmpargv))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(slave_node);
return rc;
@ -828,10 +821,10 @@ int orte_plm_base_setup_rsh_launch(char *nodename, orte_app_context_t *app,
* LD_LIBRARY_PATH on the remote node, and prepend the eventual cmd
* with the prefix directory
*/
if (ORTE_PLM_SHELL_SH == rshell ||
ORTE_PLM_SHELL_KSH == rshell ||
ORTE_PLM_SHELL_ZSH == rshell ||
ORTE_PLM_SHELL_BASH == rshell) {
if (ORTE_PLM_RSH_SHELL_SH == rshell ||
ORTE_PLM_RSH_SHELL_KSH == rshell ||
ORTE_PLM_RSH_SHELL_ZSH == rshell ||
ORTE_PLM_RSH_SHELL_BASH == rshell) {
asprintf (&slave_node->prefix,
"%s%s%s PATH=%s/%s:$PATH ; export PATH ; "
"LD_LIBRARY_PATH=%s/%s:$LD_LIBRARY_PATH ; export LD_LIBRARY_PATH ; "
@ -842,8 +835,8 @@ int orte_plm_base_setup_rsh_launch(char *nodename, orte_app_context_t *app,
app->prefix_dir, bin_base,
app->prefix_dir, lib_base,
app->prefix_dir, bin_base);
} else if (ORTE_PLM_SHELL_TCSH == rshell ||
ORTE_PLM_SHELL_CSH == rshell) {
} else if (ORTE_PLM_RSH_SHELL_TCSH == rshell ||
ORTE_PLM_RSH_SHELL_CSH == rshell) {
/* [t]csh is a bit more challenging -- we
have to check whether LD_LIBRARY_PATH
is already set before we try to set it.
@ -1174,6 +1167,356 @@ PRELOAD_FILES:
return ORTE_SUCCESS;
}
int orte_plm_base_rsh_setup_launch(int *argcptr, char ***argvptr,
char *nodename,
int *node_name_index1,
int *proc_vpid_index, char *prefix_dir)
{
int argc;
char **argv;
char *param;
orte_plm_rsh_shell_t remote_shell, local_shell;
char *lib_base, *bin_base;
int orted_argc;
char **orted_argv;
char *orted_cmd, *orted_prefix, *final_cmd;
int orted_index;
int rc;
/* Figure out the basenames for the libdir and bindir. This
requires some explanation:
- Use opal_install_dirs.libdir and opal_install_dirs.bindir.
- After a discussion on the devel-core mailing list, the
developers decided that we should use the local directory
basenames as the basis for the prefix on the remote note.
This does not handle a few notable cases (e.g., if the
libdir/bindir is not simply a subdir under the prefix, if the
libdir/bindir basename is not the same on the remote node as
it is here on the local node, etc.), but we decided that
--prefix was meant to handle "the common case". If you need
something more complex than this, a) edit your shell startup
files to set PATH/LD_LIBRARY_PATH properly on the remove
node, or b) use some new/to-be-defined options that
explicitly allow setting the bindir/libdir on the remote
node. We decided to implement these options (e.g.,
--remote-bindir and --remote-libdir) to orterun when it
actually becomes a problem for someone (vs. a hypothetical
situation).
Hence, for now, we simply take the basename of this install's
libdir and bindir and use it to append this install's prefix
and use that on the remote node.
*/
lib_base = opal_basename(opal_install_dirs.libdir);
bin_base = opal_basename(opal_install_dirs.bindir);
/*
* Build argv array
*/
argv = opal_argv_copy(orte_plm_globals.rsh_agent_argv);
argc = opal_argv_count(orte_plm_globals.rsh_agent_argv);
*node_name_index1 = argc;
opal_argv_append(&argc, &argv, "<template>");
/* setup the correct shell info */
if (ORTE_SUCCESS != (rc = orte_plm_base_rsh_setup_shell(&remote_shell, &local_shell,
nodename, &argv))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* now get the orted cmd - as specified by user - into our tmp array.
* The function returns the location where the actual orted command is
* located - usually in the final spot, but someone could
* have added options. For example, it should be legal for them to use
* "orted --debug-devel" so they get debug output from the orteds, but
* not from mpirun. Also, they may have a customized version of orted
* that takes arguments in addition to the std ones we already support
*/
orted_argc = 0;
orted_argv = NULL;
orted_index = orte_plm_base_setup_orted_cmd(&orted_argc, &orted_argv);
/* look at the returned orted cmd argv to check several cases:
*
* - only "orted" was given. This is the default and thus most common
* case. In this situation, there is nothing we need to do
*
* - something was given that doesn't include "orted" - i.e., someone
* has substituted their own daemon. There isn't anything we can
* do here, so we want to avoid adding prefixes to the cmd
*
* - something was given that precedes "orted". For example, someone
* may have specified "valgrind [options] orted". In this case, we
* need to separate out that "orted_prefix" section so it can be
* treated separately below
*
* - something was given that follows "orted". An example was given above.
* In this case, we need to construct the effective "orted_cmd" so it
* can be treated properly below
*
* Obviously, the latter two cases can be combined - just to make it
* even more interesting! Gotta love rsh/ssh...
*/
if (0 == orted_index) {
/* single word cmd - this is the default scenario, but there could
* be options specified so we need to account for that possibility.
* However, we don't need/want a prefix as nothing precedes the orted
* cmd itself
*/
orted_cmd = opal_argv_join(orted_argv, ' ');
orted_prefix = NULL;
} else {
/* okay, so the "orted" cmd is somewhere in this array, with
* something preceding it and perhaps things following it.
*/
orted_prefix = opal_argv_join_range(orted_argv, 0, orted_index, ' ');
orted_cmd = opal_argv_join_range(orted_argv, orted_index, opal_argv_count(orted_argv), ' ');
}
opal_argv_free(orted_argv); /* done with this */
/* we now need to assemble the actual cmd that will be executed - this depends
* upon whether or not a prefix directory is being used
*/
if (NULL != prefix_dir) {
/* if we have a prefix directory, we need to set the PATH and
* LD_LIBRARY_PATH on the remote node, and prepend just the orted_cmd
* with the prefix directory
*/
char *opal_prefix = getenv("OPAL_PREFIX");
char* full_orted_cmd = NULL;
if( NULL != orted_cmd ) {
asprintf( &full_orted_cmd, "%s/%s/%s", prefix_dir, bin_base, orted_cmd );
}
if (ORTE_PLM_RSH_SHELL_SH == remote_shell ||
ORTE_PLM_RSH_SHELL_KSH == remote_shell ||
ORTE_PLM_RSH_SHELL_ZSH == remote_shell ||
ORTE_PLM_RSH_SHELL_BASH == remote_shell) {
/* if there is nothing preceding orted, then we can just
* assemble the cmd with the orted_cmd at the end. Otherwise,
* we have to insert the orted_prefix in the right place
*/
asprintf (&final_cmd,
"%s%s%s PATH=%s/%s:$PATH ; export PATH ; "
"LD_LIBRARY_PATH=%s/%s:$LD_LIBRARY_PATH ; export LD_LIBRARY_PATH ; "
"%s %s",
(opal_prefix != NULL ? "OPAL_PREFIX=" : " "),
(opal_prefix != NULL ? opal_prefix : " "),
(opal_prefix != NULL ? " ; export OPAL_PREFIX;" : " "),
prefix_dir, bin_base,
prefix_dir, lib_base,
(orted_prefix != NULL ? orted_prefix : " "),
(full_orted_cmd != NULL ? full_orted_cmd : " "));
} else if (ORTE_PLM_RSH_SHELL_TCSH == remote_shell ||
ORTE_PLM_RSH_SHELL_CSH == remote_shell) {
/* [t]csh is a bit more challenging -- we
have to check whether LD_LIBRARY_PATH
is already set before we try to set it.
Must be very careful about obeying
[t]csh's order of evaluation and not
using a variable before it is defined.
See this thread for more details:
http://www.open-mpi.org/community/lists/users/2006/01/0517.php. */
/* if there is nothing preceding orted, then we can just
* assemble the cmd with the orted_cmd at the end. Otherwise,
* we have to insert the orted_prefix in the right place
*/
asprintf (&final_cmd,
"%s%s%s set path = ( %s/%s $path ) ; "
"if ( $?LD_LIBRARY_PATH == 1 ) "
"set OMPI_have_llp ; "
"if ( $?LD_LIBRARY_PATH == 0 ) "
"setenv LD_LIBRARY_PATH %s/%s ; "
"if ( $?OMPI_have_llp == 1 ) "
"setenv LD_LIBRARY_PATH %s/%s:$LD_LIBRARY_PATH ; "
"%s %s",
(opal_prefix != NULL ? "setenv OPAL_PREFIX " : " "),
(opal_prefix != NULL ? opal_prefix : " "),
(opal_prefix != NULL ? " ;" : " "),
prefix_dir, bin_base,
prefix_dir, lib_base,
prefix_dir, lib_base,
(orted_prefix != NULL ? orted_prefix : " "),
(full_orted_cmd != NULL ? full_orted_cmd : " "));
} else {
orte_show_help("help-plm-rsh.txt", "cannot-resolve-shell-with-prefix", true,
(NULL == opal_prefix) ? "NULL" : opal_prefix,
prefix_dir);
return ORTE_ERR_SILENT;
}
if( NULL != full_orted_cmd ) {
free(full_orted_cmd);
}
} else {
/* no prefix directory, so just aggregate the result */
asprintf(&final_cmd, "%s %s",
(orted_prefix != NULL ? orted_prefix : ""),
(orted_cmd != NULL ? orted_cmd : ""));
}
/* now add the final cmd to the argv array */
opal_argv_append(&argc, &argv, final_cmd);
free(final_cmd); /* done with this */
if (NULL != orted_prefix) free(orted_prefix);
if (NULL != orted_cmd) free(orted_cmd);
/* if we are not debugging, tell the daemon
* to daemonize so we can launch the next group
*/
if (!orte_debug_flag &&
!orte_debug_daemons_flag &&
!orte_debug_daemons_file_flag &&
!orte_leave_session_attached) {
opal_argv_append(&argc, &argv, "--daemonize");
}
/*
* Add the basic arguments to the orted command line, including
* all debug options
*/
orte_plm_base_orted_append_basic_args(&argc, &argv,
"env",
proc_vpid_index,
NULL);
/* ensure that only the ssh plm is selected on the remote daemon */
opal_argv_append_nosize(&argv, "-mca");
opal_argv_append_nosize(&argv, "plm");
opal_argv_append_nosize(&argv, "rsh");
/* in the rsh environment, we can append multi-word arguments
* by enclosing them in quotes. Check for any multi-word
* mca params passed to mpirun and include them
*/
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) {
int cnt, i;
cnt = opal_argv_count(orted_cmd_line);
for (i=0; i < cnt; i+=3) {
/* check if the specified option is more than one word - all
* others have already been passed
*/
if (NULL != strchr(orted_cmd_line[i+2], ' ')) {
/* must add quotes around it */
asprintf(&param, "\"%s\"", orted_cmd_line[i+2]);
/* now pass it along */
opal_argv_append(&argc, &argv, orted_cmd_line[i]);
opal_argv_append(&argc, &argv, orted_cmd_line[i+1]);
opal_argv_append(&argc, &argv, param);
free(param);
}
}
}
if (ORTE_PLM_RSH_SHELL_SH == remote_shell ||
ORTE_PLM_RSH_SHELL_KSH == remote_shell) {
opal_argv_append(&argc, &argv, ")");
}
if (0 < opal_output_get_verbosity(orte_plm_globals.output)) {
param = opal_argv_join(argv, ' ');
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rsh: final template argv:\n\t%s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == param) ? "NULL" : param));
if (NULL != param) free(param);
}
/* all done */
*argcptr = argc;
*argvptr = argv;
return ORTE_SUCCESS;
}
void orte_plm_base_ssh_child(int argc, char **argv,
orte_vpid_t vpid, int proc_vpid_index)
{
char** env;
char* var;
long fd, fdmax = sysconf(_SC_OPEN_MAX);
int rc;
char *exec_path;
char **exec_argv;
int fdin;
sigset_t sigs;
/* setup environment */
env = opal_argv_copy(orte_launch_environ);
/* We don't need to sense an oversubscribed condition and set the sched_yield
* for the node as we are only launching the daemons at this time. The daemons
* are now smart enough to set the oversubscribed condition themselves when
* they launch the local procs.
*/
/* We cannot launch locally as this would cause multiple daemons to
* exist on a node (HNP counts as a daemon). This is taken care of
* by the earlier check for daemon_preexists, so we only have to worry
* about remote launches here
*/
exec_argv = argv;
exec_path = strdup(orte_plm_globals.rsh_agent_path);
/* pass the vpid */
rc = orte_util_convert_vpid_to_string(&var, vpid);
if (ORTE_SUCCESS != rc) {
opal_output(0, "orte_plm_rsh: unable to get daemon vpid as string");
exit(-1);
}
free(argv[proc_vpid_index]);
argv[proc_vpid_index] = strdup(var);
free(var);
/* Don't let ssh slurp all of our stdin! */
fdin = open("/dev/null", O_RDWR);
dup2(fdin, 0);
close(fdin);
/* close all file descriptors w/ exception of stdin/stdout/stderr */
for(fd=3; fd<fdmax; fd++)
close(fd);
/* Set signal handlers back to the default. Do this close
to the execve() because the event library may (and likely
will) reset them. If we don't do this, the event
library may have left some set that, at least on some
OS's, don't get reset via fork() or exec(). Hence, the
orted could be unkillable (for example). */
set_handler_default(SIGTERM);
set_handler_default(SIGINT);
set_handler_default(SIGHUP);
set_handler_default(SIGPIPE);
set_handler_default(SIGCHLD);
/* Unblock all signals, for many of the same reasons that
we set the default handlers, above. This is noticable
on Linux where the event library blocks SIGTERM, but we
don't want that blocked by the orted (or, more
specifically, we don't want it to be blocked by the
orted and then inherited by the ORTE processes that it
forks, making them unkillable by SIGTERM). */
sigprocmask(0, 0, &sigs);
sigprocmask(SIG_UNBLOCK, &sigs, 0);
/* exec the daemon */
var = opal_argv_join(argv, ' ');
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rsh: executing: (%s) [%s]",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
exec_path, (NULL == var) ? "NULL" : var));
if (NULL != var) free(var);
execve(exec_path, exec_argv, env);
opal_output(0, "plm:rsh: execv of %s failed with errno=%s(%d)\n",
exec_path, strerror(errno), errno);
exit(-1);
}
int orte_plm_base_append_bootproxy_args(orte_app_context_t *app, char ***argv,
orte_jobid_t jobid, orte_vpid_t vpid,
int num_nodes, orte_vpid_t num_procs,

88
orte/mca/plm/base/plm_base_rsh_support.h Обычный файл
Просмотреть файл

@ -0,0 +1,88 @@
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*/
#ifndef MCA_PLM_RSH_SUPPORT_H
#define MCA_PLM_RSH_SUPPORT_H
/*
* includes
*/
#include "orte_config.h"
#include "orte/types.h"
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif /* HAVE_SYS_TIME_H */
#include "opal/class/opal_list.h"
#include "opal/class/opal_pointer_array.h"
#include "opal/threads/condition.h"
#include "opal/dss/dss_types.h"
#include "orte/mca/plm/plm_types.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/odls/odls_types.h"
#include "orte/runtime/orte_globals.h"
BEGIN_C_DECLS
typedef enum {
ORTE_PLM_RSH_SHELL_BASH = 0,
ORTE_PLM_RSH_SHELL_ZSH,
ORTE_PLM_RSH_SHELL_TCSH,
ORTE_PLM_RSH_SHELL_CSH,
ORTE_PLM_RSH_SHELL_KSH,
ORTE_PLM_RSH_SHELL_SH,
ORTE_PLM_RSH_SHELL_UNKNOWN
} orte_plm_rsh_shell_t;
ORTE_DECLSPEC extern const char *orte_plm_rsh_shell_name[7];
/* rsh launch support */
ORTE_DECLSPEC int orte_plm_base_rsh_launch_agent_setup(const char *agent_list, char *path);
ORTE_DECLSPEC int orte_plm_base_rsh_launch_agent_lookup(const char *agent_list, char *path);
ORTE_DECLSPEC int orte_plm_base_rsh_shell_probe(char *nodename, orte_plm_rsh_shell_t *shell);
ORTE_DECLSPEC int orte_plm_base_rsh_setup_shell(orte_plm_rsh_shell_t *rshell,
orte_plm_rsh_shell_t *lshell,
char *nodename, char ***argv);
ORTE_DECLSPEC int orte_plm_base_rsh_setup_launch(int *argcptr, char ***argvptr,
char *nodename,
int *node_name_index1,
int *proc_vpid_index, char *prefix_dir);
ORTE_DECLSPEC void orte_plm_base_ssh_child(int argc, char **argv,
orte_vpid_t vpid, int proc_vpid_index);
/**
* Local slave launch
*/
ORTE_DECLSPEC int orte_plm_base_local_slave_launch(orte_job_t *jdata);
ORTE_DECLSPEC void orte_plm_base_local_slave_finalize(void);
ORTE_DECLSPEC int orte_plm_base_setup_slave_launch(char *nodename, orte_app_context_t *app,
char *rcmd, char ***argv, char **exec_path);
ORTE_DECLSPEC int orte_plm_base_append_bootproxy_args(orte_app_context_t *app, char ***argv,
orte_jobid_t jobid, orte_vpid_t vpid,
int num_nodes, orte_vpid_t num_procs,
orte_node_rank_t nrank, orte_local_rank_t lrank,
orte_vpid_t nlocal, int nslots, bool overwrite);
END_C_DECLS
#endif /* MCA_PLM_RSH_SUPPORT_H */

Просмотреть файл

@ -117,21 +117,6 @@ ORTE_DECLSPEC void orte_plm_base_reset_job(orte_job_t *jdata);
ORTE_DECLSPEC int orte_plm_base_setup_orted_cmd(int *argc, char ***argv);
/**
* Local slave launch
*/
ORTE_DECLSPEC int orte_plm_base_local_slave_launch(orte_job_t *jdata);
ORTE_DECLSPEC int orte_plm_base_rsh_launch_agent_setup(const char *agent_list, char *path);
ORTE_DECLSPEC int orte_plm_base_rsh_launch_agent_lookup(const char *agent_list, char *path);
ORTE_DECLSPEC void orte_plm_base_local_slave_finalize(void);
ORTE_DECLSPEC int orte_plm_base_setup_rsh_launch(char *nodename, orte_app_context_t *app,
char *rcmd, char ***argv, char **exec_path);
ORTE_DECLSPEC int orte_plm_base_append_bootproxy_args(orte_app_context_t *app, char ***argv,
orte_jobid_t jobid, orte_vpid_t vpid,
int num_nodes, orte_vpid_t num_procs,
orte_node_rank_t nrank, orte_local_rank_t lrank,
orte_vpid_t nlocal, int nslots, bool overwrite);
/**
* Utilities for plm components that use proxy daemons
*/

Просмотреть файл

@ -50,6 +50,7 @@
#include "orte/mca/plm/plm.h"
#include "orte/mca/plm/base/plm_private.h"
#include "orte/mca/plm/base/plm_base_rsh_support.h"
#include "orte/mca/plm/rsh/plm_rsh.h"

Просмотреть файл

@ -90,6 +90,7 @@
#include "orte/mca/plm/plm.h"
#include "orte/mca/plm/base/base.h"
#include "orte/mca/plm/base/plm_private.h"
#include "orte/mca/plm/base/plm_base_rsh_support.h"
#include "orte/mca/plm/rsh/plm_rsh.h"
static void ssh_child(int argc, char **argv,
@ -110,28 +111,6 @@ orte_plm_base_module_t orte_plm_rsh_module = {
orte_plm_rsh_finalize
};
typedef enum {
ORTE_PLM_RSH_SHELL_BASH = 0,
ORTE_PLM_RSH_SHELL_ZSH,
ORTE_PLM_RSH_SHELL_TCSH,
ORTE_PLM_RSH_SHELL_CSH,
ORTE_PLM_RSH_SHELL_KSH,
ORTE_PLM_RSH_SHELL_SH,
ORTE_PLM_RSH_SHELL_UNKNOWN
} orte_plm_rsh_shell_t;
/* These strings *must* follow the same order as the enum
ORTE_PLM_RSH_SHELL_* */
static const char * orte_plm_rsh_shell_name[] = {
"bash",
"zsh",
"tcsh", /* tcsh has to be first otherwise strstr finds csh */
"csh",
"ksh",
"sh",
"unknown"
};
/*
* Local functions
*/

46
orte/mca/plm/rshbase/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,46 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_pkgdata_DATA = help-plm-rshbase.txt
sources = \
plm_rshbase.h \
plm_rshbase_component.c \
plm_rshbase_module.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_plm_rshbase_DSO
component_noinst =
component_install = mca_plm_rshbase.la
else
component_noinst = libmca_plm_rshbase.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_plm_rshbase_la_SOURCES = $(sources)
mca_plm_rshbase_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_plm_rshbase_la_SOURCES =$(sources)
libmca_plm_rshbase_la_LDFLAGS = -module -avoid-version

27
orte/mca/plm/rshbase/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,27 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
dnl Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_plm_rshbase_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_orte_plm_rshbase_CONFIG],[
AC_CONFIG_FILES([orte/mca/plm/rshbase/Makefile])
AC_CHECK_FUNC([fork], [$1], [$2])
])dnl

77
orte/mca/plm/rshbase/help-plm-rshbase.txt Обычный файл
Просмотреть файл

@ -0,0 +1,77 @@
# -*- text -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for Open RTE's orterun.
#
[no-local-orted]
The rsh PLS component was not able to find the executable "orted" in
your PATH or in the directory where Open MPI/OpenRTE was initially installed,
and therefore cannot continue.
For reference, your current PATH is:
%s
We also looked for orted in the following directory:
%s
[multiple-prefixes]
Specified multiple application contexts using different
settings for --prefix. Care should be taken, that corresponding
processes are mapped to different nodes. Having multiple prefixes
per node is not allowed.
The previously set prefix was
%s
the prefix to be set overriding:
%s
[concurrency-less-than-zero]
The value of the MCA parameter "pls_rsh_num_concurrent" is less than
or equal to zero (%d). This parameter is used to determine how many
remote agents (typically rsh or ssh) to invoke concurrently while
launching parallel jobs.
This value has automatically be reset to 1; processing will continue.
[deadlock-params]
The rsh launcher has been given a number of %d concurrent daemons to
launch and is in a debug-daemons option. However, the total number of
daemons to launch (%d) is greater than this value. This is a scenario that
will cause the system to deadlock.
To avoid deadlock, either increase the number of concurrent daemons, or
remove the debug-daemons flag.
[unknown-user]
The user (%d) is unknown to the system (i.e. there is no corresponding
entry in the password file). Please contact your system administrator
for a fix.
#
[cannot-resolve-shell-with-prefix]
The rsh launcher has been given a prefix to use, but could not determine
the type of remote shell being used on the remote node. This is a fatal
error as we cannot determine how to construct the cmd line to set your
remote LD_LIBRARY_PATH and PATH environmental variables.
The prefix we were given are:
opal_prefix: %s
prefix_dir: %s

58
orte/mca/plm/rshbase/plm_rshbase.h Обычный файл
Просмотреть файл

@ -0,0 +1,58 @@
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file:
* Part of the rshbase launcher. See plm_rshbase.h for an overview of how it works.
*/
#ifndef ORTE_PLM_RSHBASE_EXPORT_H
#define ORTE_PLM_RSHBASE_EXPORT_H
#include "orte_config.h"
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#include "opal/threads/condition.h"
#include "opal/mca/mca.h"
#include "orte/mca/plm/plm.h"
BEGIN_C_DECLS
/**
* PLS Component
*/
struct orte_plm_rshbase_component_t {
orte_plm_base_component_t super;
bool force_rsh;
int priority;
orte_std_cntr_t num_concurrent;
opal_mutex_t lock;
opal_condition_t cond;
};
typedef struct orte_plm_rshbase_component_t orte_plm_rshbase_component_t;
ORTE_MODULE_DECLSPEC extern orte_plm_rshbase_component_t mca_plm_rshbase_component;
extern orte_plm_base_module_t orte_plm_rshbase_module;
END_C_DECLS
#endif /* ORTE_PLS_RSHBASE_EXPORT_H */

162
orte/mca/plm/rshbase/plm_rshbase_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,162 @@
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights
* reserved.
* Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* These symbols are in a file by themselves to provide nice linker
* semantics. Since linkers generally pull in symbols by object
* files, keeping these symbols as the only symbols in this file
* prevents utility programs such as "ompi_info" from having to import
* entire components just to query their version and parameters.
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <ctype.h>
#include "opal/util/opal_environ.h"
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "opal/util/path.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/show_help.h"
#include "orte/mca/plm/plm.h"
#include "orte/mca/plm/base/plm_private.h"
#include "orte/mca/plm/base/plm_base_rsh_support.h"
#include "orte/mca/plm/rshbase/plm_rshbase.h"
/*
* Public string showing the plm ompi_rshbase component version number
*/
const char *mca_plm_rshbase_component_version_string =
"Open MPI rshbase plm MCA component version " ORTE_VERSION;
static int rshbase_component_open(void);
static int rshbase_component_query(mca_base_module_t **module, int *priority);
static int rshbase_component_close(void);
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
orte_plm_rshbase_component_t mca_plm_rshbase_component = {
{
/* First, the mca_component_t struct containing meta information
about the component itself */
{
ORTE_PLM_BASE_VERSION_2_0_0,
/* Component name and version */
"rshbase",
ORTE_MAJOR_VERSION,
ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION,
/* Component open and close functions */
rshbase_component_open,
rshbase_component_close,
rshbase_component_query
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
}
};
static int rshbase_component_open(void)
{
int tmp;
mca_base_component_t *c = &mca_plm_rshbase_component.super.base_version;
/* initialize globals */
OBJ_CONSTRUCT(&mca_plm_rshbase_component.lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_plm_rshbase_component.cond, opal_condition_t);
/* lookup parameters */
mca_base_param_reg_int(c, "num_concurrent",
"How many plm_rsh_agent instances to invoke concurrently (must be > 0)",
false, false, 128, &tmp);
if (tmp <= 0) {
orte_show_help("help-plm-rshbase.txt", "concurrency-less-than-zero",
true, tmp);
tmp = 1;
}
mca_plm_rshbase_component.num_concurrent = tmp;
mca_base_param_reg_int(c, "force_rsh",
"Force the launcher to always use rsh",
false, false, false, &tmp);
mca_plm_rshbase_component.force_rsh = OPAL_INT_TO_BOOL(tmp);
mca_base_param_reg_int(c, "priority",
"Priority of the rshbase plm component",
false, false, 5,
&mca_plm_rshbase_component.priority);
return ORTE_SUCCESS;
}
static int rshbase_component_query(mca_base_module_t **module, int *priority)
{
/* see if MCA-specified agent (default: ssh:rsh) is available */
if (ORTE_SUCCESS != orte_plm_base_rsh_launch_agent_lookup(NULL, NULL)) {
/* this isn't an error - we just cannot be selected */
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rshbase: unable to be used: cannot find path "
"for launching agent \"%s\"\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
orte_rsh_agent));
*module = NULL;
return ORTE_ERROR;
}
/* we are good - make ourselves available */
*priority = mca_plm_rshbase_component.priority;
*module = (mca_base_module_t *) &orte_plm_rshbase_module;
return ORTE_SUCCESS;
}
static int rshbase_component_close(void)
{
/* cleanup state */
OBJ_DESTRUCT(&mca_plm_rshbase_component.lock);
OBJ_DESTRUCT(&mca_plm_rshbase_component.cond);
return ORTE_SUCCESS;
}

541
orte/mca/plm/rshbase/plm_rshbase_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,541 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <errno.h>
#include <string.h>
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif
#ifdef HAVE_SYS_SELECT_H
#include <sys/select.h>
#endif
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_SYS_WAIT_H
#include <sys/wait.h>
#endif
#include <fcntl.h>
#include <signal.h>
#ifdef HAVE_PWD_H
#include <pwd.h>
#endif
#include "opal/mca/installdirs/installdirs.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/output.h"
#include "opal/util/opal_sos.h"
#include "opal/mca/event/event.h"
#include "opal/util/argv.h"
#include "opal/util/opal_environ.h"
#include "opal/util/basename.h"
#include "opal/util/bit_ops.h"
#include "opal/class/opal_pointer_array.h"
#include "orte/util/show_help.h"
#include "orte/runtime/orte_wait.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/name_fns.h"
#include "orte/util/nidmap.h"
#include "orte/util/proc_info.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/ess/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/routed/routed.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/plm/plm.h"
#include "orte/mca/plm/base/base.h"
#include "orte/mca/plm/base/plm_private.h"
#include "orte/mca/plm/base/plm_base_rsh_support.h"
#include "orte/mca/plm/rshbase/plm_rshbase.h"
static int init(void);
static int spawn(orte_job_t *jdata);
static int terminate_orteds(void);
static int signal_job(orte_jobid_t jobid, int32_t signal);
static int finalize(void);
orte_plm_base_module_t orte_plm_rshbase_module = {
init,
orte_plm_base_set_hnp_name,
spawn,
NULL,
orte_plm_base_orted_terminate_job,
terminate_orteds,
orte_plm_base_orted_kill_local_procs,
signal_job,
finalize
};
/* local global storage of timing variables */
static struct timeval joblaunchstart, joblaunchstop;
/* local global storage */
static int num_in_progress=0;
/**
* Init the module
*/
static int init(void)
{
int rc;
/* we were selected, so setup the launch agent */
if (ORTE_SUCCESS != (rc = orte_plm_base_rsh_launch_agent_setup(NULL, NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_plm_base_comm_start())) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/**
* Callback on daemon exit.
*/
static void orte_plm_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
{
orte_std_cntr_t cnt=1;
uint8_t flag;
orte_job_t *jdata;
if (! WIFEXITED(status) || ! WEXITSTATUS(status) == 0) { /* if abnormal exit */
/* if we are not the HNP, send a message to the HNP alerting it
* to the failure
*/
if (!ORTE_PROC_IS_HNP) {
opal_buffer_t buf;
orte_vpid_t *vpid=(orte_vpid_t*)cbdata;
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s daemon %d failed with status %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(int)*vpid, WEXITSTATUS(status)));
OBJ_CONSTRUCT(&buf, opal_buffer_t);
opal_dss.pack(&buf, &cnt, 1, ORTE_STD_CNTR);
flag = 1;
opal_dss.pack(&buf, &flag, 1, OPAL_UINT8);
opal_dss.pack(&buf, vpid, 1, ORTE_VPID);
orte_rml.send_buffer(ORTE_PROC_MY_HNP, &buf, ORTE_RML_TAG_REPORT_REMOTE_LAUNCH, 0);
OBJ_DESTRUCT(&buf);
} else {
orte_proc_t *daemon=(orte_proc_t*)cbdata;
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s daemon %d failed with status %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(int)daemon->name.vpid, WEXITSTATUS(status)));
/* note that this daemon failed */
daemon->state = ORTE_PROC_STATE_FAILED_TO_START;
/* increment the #daemons terminated so we will exit properly */
jdata->num_terminated++;
#if 0
/* report that the daemon has failed so we can exit */
orte_errmgr.update_state(ORTE_PROC_MY_NAME->jobid, ORTE_JOB_STATE_FAILED_TO_START,
NULL, ORTE_PROC_STATE_UNDEF, status);
#else
/* JJH: Look into a better way of doing this. If we let the daemon
* know, then it kills the job when we are trying to restart.. */
opal_output(0, "%s daemon %s failed. SKIPPING orte_plm_base_launch_failed()",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&daemon->name));
#endif
}
}
/* release any waiting threads */
num_in_progress--;
OPAL_THREAD_LOCK(&mca_plm_rshbase_component.lock);
if (num_in_progress <= mca_plm_rshbase_component.num_concurrent) {
opal_condition_signal(&mca_plm_rshbase_component.cond);
}
OPAL_THREAD_UNLOCK(&mca_plm_rshbase_component.lock);
}
/**
* Launch a daemon (bootproxy) on each node. The daemon will be responsible
* for launching the application.
*/
/* When working in this function, ALWAYS jump to "cleanup" if
* you encounter an error so that orterun will be woken up and
* the job can cleanly terminate
*/
static int spawn(orte_job_t *jdata)
{
int rc;
orte_job_map_t *map;
orte_app_context_t *app;
orte_node_t *node;
int nnode;
int argc;
char **argv=NULL;
char *prefix_dir;
int node_name_index1;
int proc_vpid_index;
pid_t pid;
bool failed_launch = true;
orte_job_state_t job_state;
orte_jobid_t active_job, failed_job;
/* wait for the launch to complete */
OPAL_THREAD_LOCK(&orte_plm_globals.spawn_lock);
while (orte_plm_globals.spawn_in_progress) {
opal_condition_wait(&orte_plm_globals.spawn_in_progress_cond, &orte_plm_globals.spawn_lock);
}
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "released to spawn"));
orte_plm_globals.spawn_in_progress = true;
orte_plm_globals.spawn_status = ORTE_ERR_FATAL;
OPAL_THREAD_UNLOCK(&orte_plm_globals.spawn_lock);
if (jdata->controls & ORTE_JOB_CONTROL_LOCAL_SLAVE) {
/* if this is a request to launch a local slave,
* then we will not be launching an orted - we will
* directly ssh the slave process itself. No mapping
* is performed to support this - the caller must
* provide all the info required to launch the job,
* including the target hosts
*/
rc = orte_plm_base_local_slave_launch(jdata);
OPAL_THREAD_LOCK(&orte_plm_globals.spawn_lock);
orte_plm_globals.spawn_in_progress = false;
OPAL_THREAD_UNLOCK(&orte_plm_globals.spawn_lock);
return rc;
}
/* default to declaring the daemon launch as having failed */
failed_job = ORTE_PROC_MY_NAME->jobid;
/* if we are timing, record the start time */
if (orte_timing) {
gettimeofday(&orte_plm_globals.daemonlaunchstart, NULL);
joblaunchstart = orte_plm_globals.daemonlaunchstart;
}
/* setup the job */
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rsh: launching job %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jdata->jobid)));
/* set the active jobid */
active_job = jdata->jobid;
/* Get the map for this job */
if (NULL == (map = orte_rmaps.get_job_map(jdata->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto cleanup;
}
if (0 == map->num_new_daemons) {
/* have all the daemons we need - launch app */
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rsh: no new daemons to launch",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
goto launch_apps;
}
if ((0 < opal_output_get_verbosity(orte_plm_globals.output) ||
orte_leave_session_attached) &&
mca_plm_rshbase_component.num_concurrent < map->num_new_daemons) {
/**
* If we are in '--debug-daemons' we keep the ssh connection
* alive for the span of the run. If we use this option
* AND we launch on more than "num_concurrent" machines
* then we will deadlock. No connections are terminated
* until the job is complete, no job is started
* since all the orteds are waiting for all the others
* to come online, and the others ore not launched because
* we are waiting on those that have started to terminate
* their ssh tunnels. :(
* As we cannot run in this situation, pretty print the error
* and return an error code.
*/
orte_show_help("help-plm-rsh.txt", "deadlock-params",
true, mca_plm_rshbase_component.num_concurrent, map->num_new_daemons);
rc = ORTE_ERR_FATAL;
goto cleanup;
}
/*
* After a discussion between Ralph & Jeff, we concluded that we
* really are handling the prefix dir option incorrectly. It currently
* is associated with an app_context, yet it really refers to the
* location where OpenRTE/Open MPI is installed on a NODE. Fixing
* this right now would involve significant change to orterun as well
* as elsewhere, so we will intentionally leave this incorrect at this
* point. The error, however, is identical to that seen in all prior
* releases of OpenRTE/Open MPI, so our behavior is no worse than before.
*
* A note to fix this, along with ideas on how to do so, has been filed
* on the project's Trac system under "feature enhancement".
*
* For now, default to the prefix_dir provided in the first app_context.
* Since there always MUST be at least one app_context, we are safe in
* doing this.
*/
app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0);
/* we also need at least one node name so we can check what shell is
* being used, if we have to
*/
node = NULL;
for (nnode = 0; nnode < map->nodes->size; nnode++) {
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) {
break;
}
}
if (NULL == node) {
/* well, if there isn't even one node in the map, then we are hammered */
rc = ORTE_ERR_FATAL;
goto cleanup;
}
prefix_dir = app->prefix_dir;
/* setup the launch */
if (ORTE_SUCCESS != (rc = orte_plm_base_rsh_setup_launch(&argc, &argv, node->name, &node_name_index1,
&proc_vpid_index, prefix_dir))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* set the active jobid */
active_job = jdata->jobid;
/*
* Iterate through each of the nodes
*/
for (nnode=0; nnode < map->nodes->size; nnode++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) {
continue;
}
/* if this daemon already exists, don't launch it! */
if (node->daemon_launched) {
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rsh:launch daemon already exists on node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
node->name));
continue;
}
/* if the node's daemon has not been defined, then we
* have an error!
*/
if (NULL == node->daemon) {
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rsh:launch daemon failed to be defined on node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
node->name));
rc = ORTE_ERR_FATAL;
goto cleanup;
}
/* setup node name */
free(argv[node_name_index1]);
if (NULL != node->username &&
0 != strlen (node->username)) {
asprintf (&argv[node_name_index1], "%s@%s",
node->username, node->name);
} else {
argv[node_name_index1] = strdup(node->name);
}
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rsh: launching on node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
node->name));
/* fork a child to exec the rsh/ssh session */
pid = fork();
if (pid < 0) {
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN);
rc = ORTE_ERR_SYS_LIMITS_CHILDREN;
goto cleanup;
}
/* child */
if (pid == 0) {
/* do the ssh launch - this will exit if it fails */
orte_plm_base_ssh_child(argc, argv, node->daemon->name.vpid, proc_vpid_index);
} else { /* father */
/* indicate this daemon has been launched */
node->daemon->state = ORTE_PROC_STATE_LAUNCHED;
/* record the pid */
node->daemon->pid = pid;
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rsh: recording launch of daemon %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&node->daemon->name)));
/* setup callback on sigchild - wait until setup above is complete
* as the callback can occur in the call to orte_wait_cb
*/
orte_wait_cb(pid, orte_plm_rsh_wait_daemon, (void*)node->daemon);
OPAL_THREAD_LOCK(&mca_plm_rshbase_component.lock);
/* This situation can lead to a deadlock if '--debug-daemons' is set.
* However, the deadlock condition is tested at the begining of this
* function, so we're quite confident it should not happens here.
*/
if (num_in_progress++ >= mca_plm_rshbase_component.num_concurrent) {
opal_condition_wait(&mca_plm_rshbase_component.cond, &mca_plm_rshbase_component.lock);
}
OPAL_THREAD_UNLOCK(&mca_plm_rshbase_component.lock);
}
}
/* wait for daemons to callback */
if (ORTE_SUCCESS != (rc = orte_plm_base_daemon_callback(map->num_new_daemons))) {
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rshbase: daemon launch failed for job %s on error %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
goto cleanup;
}
launch_apps:
/* if we get here, then the daemons succeeded, so any failure would now be
* for the application job
*/
failed_job = active_job;
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(active_job))) {
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rsh: launch of apps failed for job %s on error %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
goto cleanup;
}
/* wait for the launch to complete */
OPAL_THREAD_LOCK(&orte_plm_globals.spawn_lock);
while (!orte_plm_globals.spawn_complete) {
opal_condition_wait(&orte_plm_globals.spawn_cond, &orte_plm_globals.spawn_lock);
}
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"completed spawn for job %s", ORTE_JOBID_PRINT(jdata->jobid)));
orte_plm_globals.spawn_in_progress = false;
opal_condition_broadcast(&orte_plm_globals.spawn_in_progress_cond);
OPAL_THREAD_UNLOCK(&orte_plm_globals.spawn_lock);
/* get here if launch went okay */
failed_launch = false;
if (orte_timing ) {
if (0 != gettimeofday(&joblaunchstop, NULL)) {
opal_output(0, "plm_rsh: could not obtain job launch stop time");
} else {
opal_output(0, "plm_rsh: total job launch time is %ld usec",
(joblaunchstop.tv_sec - joblaunchstart.tv_sec)*1000000 +
(joblaunchstop.tv_usec - joblaunchstart.tv_usec));
}
}
cleanup:
if (NULL != argv) {
opal_argv_free(argv);
}
/* check for failed launch - if so, force terminate */
if (failed_launch) {
orte_errmgr.update_state(failed_job, job_state,
NULL, ORTE_PROC_STATE_UNDEF,
0, ORTE_ERROR_DEFAULT_EXIT_CODE);
}
return rc;
}
/**
* Terminate the orteds for a given job
*/
static int terminate_orteds(void)
{
int rc;
/* now tell them to die - we need them to "phone home", though,
* so we can know that they have exited
*/
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
static int signal_job(orte_jobid_t jobid, int32_t signal)
{
int rc;
/* order them to pass this signal to their local procs */
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_signal_local_procs(jobid, signal))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
static int finalize(void)
{
int rc;
/* cleanup any pending recvs */
if (ORTE_SUCCESS != (rc = orte_plm_base_comm_stop())) {
ORTE_ERROR_LOG(rc);
}
return rc;
}

Просмотреть файл

@ -47,6 +47,7 @@
#include "orte/mca/plm/plm.h"
#include "orte/mca/plm/base/plm_private.h"
#include "orte/mca/plm/base/plm_base_rsh_support.h"
#include "orte/mca/plm/rshd/plm_rshd.h"

Просмотреть файл

@ -88,6 +88,7 @@
#include "orte/mca/plm/plm.h"
#include "orte/mca/plm/base/base.h"
#include "orte/mca/plm/base/plm_private.h"
#include "orte/mca/plm/base/plm_base_rsh_support.h"
#include "orte/mca/plm/rshd/plm_rshd.h"
static void ssh_child(char *cmd, char **argv) __opal_attribute_noreturn__;
@ -296,9 +297,9 @@ int orte_plm_rshd_launch(orte_job_t *jdata)
}
node = (orte_node_t*)proc->node;
/* setup the launch */
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_rsh_launch(proc->nodename, app,
"orte-bootproxy.sh",
&argv, &cmd))) {
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_slave_launch(proc->nodename, app,
"orte-bootproxy.sh",
&argv, &cmd))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}

Просмотреть файл

@ -75,6 +75,7 @@
#include "orte/mca/plm/plm.h"
#include "orte/mca/plm/base/plm_private.h"
#include "orte/mca/plm/base/plm_base_rsh_support.h"
#include "plm_slurm.h"

Просмотреть файл

@ -73,6 +73,7 @@
#include "orte/mca/plm/plm.h"
#include "orte/mca/plm/base/plm_private.h"
#include "orte/mca/plm/base/plm_base_rsh_support.h"
#include "plm_tm.h"