1
1

Merge pull request #936 from rhc54/topic/rsh

Now that we have an "isolated" PLM component, we cannot just let rsh …
Этот коммит содержится в:
rhc54 2015-09-24 13:15:22 -07:00
родитель 8bac351a9a 0140ff048d
Коммит 7d3321b66e
8 изменённых файлов: 130 добавлений и 201 удалений

Просмотреть файл

@ -4,6 +4,7 @@
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -38,6 +39,7 @@ int mca_base_select(const char *type_name, int output_id,
mca_base_component_t *component = NULL; mca_base_component_t *component = NULL;
mca_base_module_t *module = NULL; mca_base_module_t *module = NULL;
int priority = 0, best_priority = INT32_MIN; int priority = 0, best_priority = INT32_MIN;
int rc;
*best_module = NULL; *best_module = NULL;
*best_component = NULL; *best_component = NULL;
@ -70,7 +72,18 @@ int mca_base_select(const char *type_name, int output_id,
"mca:base:select:(%5s) Querying component [%s]", "mca:base:select:(%5s) Querying component [%s]",
type_name, component->mca_component_name); type_name, component->mca_component_name);
component->mca_query_component(&module, &priority); rc = component->mca_query_component(&module, &priority);
if (OPAL_ERR_FATAL == rc) {
/* a fatal error was detected by this component - e.g., the
* user specified a required element and the component could
* not find it. In this case, we must not continue as we might
* find some other component that could run, causing us to do
* something the user didn't want */
return rc;
} else if (OPAL_SUCCESS != rc) {
/* silently skip this component */
continue;
}
/* /*
* If no module was returned, then skip component * If no module was returned, then skip component

Просмотреть файл

@ -224,31 +224,26 @@ static int rte_init(void)
* process stats if requested * process stats if requested
*/ */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_pstat_base_framework, 0))) { if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_pstat_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "opal_pstat_base_open"; error = "opal_pstat_base_open";
goto error; goto error;
} }
if (ORTE_SUCCESS != (ret = opal_pstat_base_select())) { if (ORTE_SUCCESS != (ret = opal_pstat_base_select())) {
ORTE_ERROR_LOG(ret);
error = "opal_pstat_base_select"; error = "opal_pstat_base_select";
goto error; goto error;
} }
/* open and setup the state machine */ /* open and setup the state machine */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_state_base_open"; error = "orte_state_base_open";
goto error; goto error;
} }
if (ORTE_SUCCESS != (ret = orte_state_base_select())) { if (ORTE_SUCCESS != (ret = orte_state_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_state_base_select"; error = "orte_state_base_select";
goto error; goto error;
} }
/* open the errmgr */ /* open the errmgr */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) { if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_errmgr_base_open"; error = "orte_errmgr_base_open";
goto error; goto error;
} }
@ -259,26 +254,26 @@ static int rte_init(void)
* first and select that component. * first and select that component.
*/ */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_plm_base_framework, 0))) { if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_plm_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_plm_base_open"; error = "orte_plm_base_open";
goto error; goto error;
} }
if (ORTE_SUCCESS != (ret = orte_plm_base_select())) { if (ORTE_SUCCESS != (ret = orte_plm_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_plm_base_select"; error = "orte_plm_base_select";
if (ORTE_ERR_FATAL == ret) {
/* we already output a show_help - so keep down the verbage */
ret = ORTE_ERR_SILENT;
}
goto error; goto error;
} }
/* if we were spawned by a singleton, our jobid was given to us */ /* if we were spawned by a singleton, our jobid was given to us */
if (NULL != orte_ess_base_jobid) { if (NULL != orte_ess_base_jobid) {
if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&ORTE_PROC_MY_NAME->jobid, orte_ess_base_jobid))) { if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&ORTE_PROC_MY_NAME->jobid, orte_ess_base_jobid))) {
ORTE_ERROR_LOG(ret);
error = "convert_string_to_jobid"; error = "convert_string_to_jobid";
goto error; goto error;
} }
ORTE_PROC_MY_NAME->vpid = 0; ORTE_PROC_MY_NAME->vpid = 0;
} else { } else {
if (ORTE_SUCCESS != (ret = orte_plm.set_hnp_name())) { if (ORTE_SUCCESS != (ret = orte_plm.set_hnp_name())) {
ORTE_ERROR_LOG(ret);
error = "orte_plm_set_hnp_name"; error = "orte_plm_set_hnp_name";
goto error; goto error;
} }
@ -304,7 +299,6 @@ static int rte_init(void)
orte_process_info.tmpdir_base, orte_process_info.tmpdir_base,
orte_process_info.nodename, NULL, orte_process_info.nodename, NULL,
ORTE_PROC_MY_NAME))) { ORTE_PROC_MY_NAME))) {
ORTE_ERROR_LOG(ret);
error = "orte_session_dir define"; error = "orte_session_dir define";
goto error; goto error;
} }
@ -318,7 +312,6 @@ static int rte_init(void)
orte_process_info.tmpdir_base, orte_process_info.tmpdir_base,
orte_process_info.nodename, NULL, orte_process_info.nodename, NULL,
ORTE_PROC_MY_NAME))) { ORTE_PROC_MY_NAME))) {
ORTE_ERROR_LOG(ret);
error = "orte_session_dir"; error = "orte_session_dir";
goto error; goto error;
} }
@ -329,12 +322,10 @@ static int rte_init(void)
* OOB Layer * OOB Layer
*/ */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) { if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_oob_base_open"; error = "orte_oob_base_open";
goto error; goto error;
} }
if (ORTE_SUCCESS != (ret = orte_oob_base_select())) { if (ORTE_SUCCESS != (ret = orte_oob_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_oob_base_select"; error = "orte_oob_base_select";
goto error; goto error;
} }
@ -343,30 +334,25 @@ static int rte_init(void)
* Runtime Messaging Layer * Runtime Messaging Layer
*/ */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) { if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_rml_base_open"; error = "orte_rml_base_open";
goto error; goto error;
} }
if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { if (ORTE_SUCCESS != (ret = orte_rml_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_rml_base_select"; error = "orte_rml_base_select";
goto error; goto error;
} }
/* Messaging QoS Layer */ /* Messaging QoS Layer */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) { if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_qos_base_open"; error = "orte_qos_base_open";
goto error; goto error;
} }
if (ORTE_SUCCESS != (ret = orte_qos_base_select())) { if (ORTE_SUCCESS != (ret = orte_qos_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_qos_base_select"; error = "orte_qos_base_select";
goto error; goto error;
} }
if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_errmgr_base_select"; error = "orte_errmgr_base_select";
goto error; goto error;
} }

Просмотреть файл

@ -11,6 +11,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. * Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -32,43 +33,26 @@
/** /**
* Function for selecting one component from all those that are * Function for selecting one component from all those that are
* available. * available.
*/ */
int orte_plm_base_select(void) int orte_plm_base_select(void)
{ {
int exit_status = ORTE_SUCCESS; int rc;
orte_plm_base_component_t *best_component = NULL; orte_plm_base_component_t *best_component = NULL;
orte_plm_base_module_t *best_module = NULL; orte_plm_base_module_t *best_module = NULL;
/* /*
* Select the best component * Select the best component
*/ */
if( OPAL_SUCCESS != mca_base_select("plm", orte_plm_base_framework.framework_output, if (OPAL_SUCCESS == (rc = mca_base_select("plm", orte_plm_base_framework.framework_output,
&orte_plm_base_framework.framework_components, &orte_plm_base_framework.framework_components,
(mca_base_module_t **) &best_module, (mca_base_module_t **) &best_module,
(mca_base_component_t **) &best_component) ) { (mca_base_component_t **) &best_component))) {
/* This will only happen if no component was selected /* Save the winner */
* orte_plm = *best_module;
* If we didn't find one, and we are a daemon, then default to retaining the proxy.
* Otherwise, if we didn't find one to select, that is unacceptable.
*/
if (ORTE_PROC_IS_DAEMON) {
/* don't record a selected component or flag selected
* so we finalize correctly - just leave the plm alone
* as it defaults to pointing at the proxy
*/
goto cleanup;
} else {
exit_status = ORTE_ERR_NOT_FOUND;
goto cleanup;
}
} }
/* Save the winner */ return rc;
orte_plm = *best_module;
cleanup:
return exit_status;
} }

Просмотреть файл

@ -10,6 +10,7 @@
# University of Stuttgart. All rights reserved. # University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California. # Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved. # All rights reserved.
# Copyright (c) 2015 Intel, Inc. All rights reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
# Additional copyrights may follow # Additional copyrights may follow
@ -18,6 +19,14 @@
# #
# This is the US/English general help file for Open RTE's orterun. # This is the US/English general help file for Open RTE's orterun.
# #
[agent-not-found]
The value of the MCA parameter "plm_rsh_agent" was set to a path
that could not be found:
plm_rsh_agent: %s
Please either unset the parameter, or check that the path is correct
#
[no-local-orted] [no-local-orted]
The rsh PLS component was not able to find the executable "orted" in The rsh PLS component was not able to find the executable "orted" in
your PATH or in the directory where Open MPI/OpenRTE was initially installed, your PATH or in the directory where Open MPI/OpenRTE was initially installed,

Просмотреть файл

@ -59,6 +59,8 @@ struct orte_plm_rsh_component_t {
bool no_tree_spawn; bool no_tree_spawn;
int num_concurrent; int num_concurrent;
char *agent; char *agent;
char *agent_path;
char **agent_argv;
bool assume_same_shell; bool assume_same_shell;
bool pass_environ_mca_params; bool pass_environ_mca_params;
char *ssh_args; char *ssh_args;

Просмотреть файл

@ -43,8 +43,10 @@
#include "opal/util/opal_environ.h" #include "opal/util/opal_environ.h"
#include "opal/util/output.h" #include "opal/util/output.h"
#include "opal/util/argv.h" #include "opal/util/argv.h"
#include "opal/util/basename.h"
#include "opal/util/path.h" #include "opal/util/path.h"
#include "orte/mca/state/state.h"
#include "orte/util/name_fns.h" #include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_globals.h"
#include "orte/util/show_help.h" #include "orte/util/show_help.h"
@ -228,6 +230,7 @@ static int rsh_component_open(void)
/* initialize globals */ /* initialize globals */
mca_plm_rsh_component.using_qrsh = false; mca_plm_rsh_component.using_qrsh = false;
mca_plm_rsh_component.using_llspawn = false; mca_plm_rsh_component.using_llspawn = false;
mca_plm_rsh_component.agent_argv = NULL;
/* lookup parameters */ /* lookup parameters */
if (mca_plm_rsh_component.num_concurrent <= 0) { if (mca_plm_rsh_component.num_concurrent <= 0) {
@ -256,48 +259,59 @@ static int rsh_component_query(mca_base_module_t **module, int *priority)
/* Check if we are under Grid Engine parallel environment by looking at several /* Check if we are under Grid Engine parallel environment by looking at several
* environment variables. If so, setup the path and argv[0]. */ * environment variables. If so, setup the path and argv[0]. */
if (!mca_plm_rsh_component.disable_qrsh && if (NULL == mca_plm_rsh_component.agent) {
NULL != getenv("SGE_ROOT") && NULL != getenv("ARC") && if (!mca_plm_rsh_component.disable_qrsh &&
NULL != getenv("PE_HOSTFILE") && NULL != getenv("JOB_ID")) { NULL != getenv("SGE_ROOT") && NULL != getenv("ARC") &&
/* setup the search path for qrsh */ NULL != getenv("PE_HOSTFILE") && NULL != getenv("JOB_ID")) {
asprintf(&tmp, "%s/bin/%s", getenv("SGE_ROOT"), getenv("ARC")); /* setup the search path for qrsh */
/* see if the agent is available */ asprintf(&tmp, "%s/bin/%s", getenv("SGE_ROOT"), getenv("ARC"));
if (ORTE_SUCCESS != rsh_launch_agent_lookup("qrsh", tmp)) { /* see if the agent is available */
/* can't be SGE */ if (ORTE_SUCCESS != rsh_launch_agent_lookup("qrsh", tmp)) {
opal_output_verbose(1, orte_plm_base_framework.framework_output, /* can't be SGE */
"%s plm:rsh: unable to be used: SGE indicated but cannot find path " opal_output_verbose(1, orte_plm_base_framework.framework_output,
"or execution permissions not set for launching agent qrsh", "%s plm:rsh: unable to be used: SGE indicated but cannot find path "
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); "or execution permissions not set for launching agent qrsh",
free(tmp); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
*module = NULL; free(tmp);
return ORTE_ERROR; *module = NULL;
return ORTE_ERROR;
}
mca_plm_rsh_component.agent = tmp;
mca_plm_rsh_component.using_qrsh = true;
/* no tree spawn allowed under qrsh */
mca_plm_rsh_component.no_tree_spawn = true;
goto success;
} else if (!mca_plm_rsh_component.disable_llspawn &&
NULL != getenv("LOADL_STEP_ID")) {
/* We are running as a LOADLEVELER job.
* Search for llspawn in the users PATH */
if (ORTE_SUCCESS != rsh_launch_agent_lookup("llspawn", NULL)) {
opal_output_verbose(1, orte_plm_base_framework.framework_output,
"%s plm:rsh: unable to be used: LoadLeveler "
"indicated but cannot find path or execution "
"permissions not set for launching agent llspawn",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
*module = NULL;
return ORTE_ERROR;
}
mca_plm_rsh_component.agent = strdup("llspawn");
mca_plm_rsh_component.using_llspawn = true;
goto success;
} }
free(tmp);
mca_plm_rsh_component.using_qrsh = true;
/* no tree spawn allowed under qrsh */
mca_plm_rsh_component.no_tree_spawn = true;
goto success;
} else if (!mca_plm_rsh_component.disable_llspawn &&
NULL != getenv("LOADL_STEP_ID")) {
/* We are running as a LOADLEVELER job.
Search for llspawn in the users PATH */
if (ORTE_SUCCESS != rsh_launch_agent_lookup("llspawn", NULL)) {
opal_output_verbose(1, orte_plm_base_framework.framework_output,
"%s plm:rsh: unable to be used: LoadLeveler "
"indicated but cannot find path or execution "
"permissions not set for launching agent llspawn",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
*module = NULL;
return ORTE_ERROR;
}
mca_plm_rsh_component.using_llspawn = true;
goto success;
} }
/* if this isn't an Grid Engine or LoadLeveler environment, /* if this isn't an Grid Engine or LoadLeveler environment, or
see if MCA-specified agent (default: ssh:rsh) is available */ * if the user specified a launch agent, look for it */
if (ORTE_SUCCESS != rsh_launch_agent_lookup(NULL, NULL)) { if (ORTE_SUCCESS != rsh_launch_agent_lookup(NULL, NULL)) {
/* if the user specified an agent and we couldn't find it,
* then we want to error out and not continue */
if (NULL != mca_plm_rsh_component.agent) {
orte_show_help("help-plm-rsh.txt", "agent-not-found", true,
mca_plm_rsh_component.agent);
ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_FATAL;
}
/* this isn't an error - we just cannot be selected */ /* this isn't an error - we just cannot be selected */
OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
"%s plm:rsh: unable to be used: cannot find path " "%s plm:rsh: unable to be used: cannot find path "
@ -380,19 +394,48 @@ char **orte_plm_rsh_search(const char* agent_list, const char *path)
static int rsh_launch_agent_lookup(const char *agent_list, char *path) static int rsh_launch_agent_lookup(const char *agent_list, char *path)
{ {
char **tmp; char *bname;
int i;
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
"%s plm:rsh_lookup on agent %s path %s", "%s plm:rsh_lookup on agent %s path %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == agent_list) ? mca_plm_rsh_component.agent : agent_list, (NULL == agent_list) ? mca_plm_rsh_component.agent : agent_list,
(NULL == path) ? "NULL" : path)); (NULL == path) ? "NULL" : path));
if (NULL == (tmp = orte_plm_rsh_search(agent_list, path))) { if (NULL == (mca_plm_rsh_component.agent_argv = orte_plm_rsh_search(agent_list, path))) {
return ORTE_ERR_NOT_FOUND; return ORTE_ERR_NOT_FOUND;
} }
/* if we got here, then one of the given agents could be found */ /* if we got here, then one of the given agents could be found - the
opal_argv_free(tmp); * complete path is in the argv[0] position */
mca_plm_rsh_component.agent_path = strdup(mca_plm_rsh_component.agent_argv[0]);
bname = opal_basename(mca_plm_rsh_component.agent_argv[0]);
if (NULL == bname) {
return ORTE_SUCCESS;
}
/* replace the initial position with the basename */
free(mca_plm_rsh_component.agent_argv[0]);
mca_plm_rsh_component.agent_argv[0] = bname;
/* see if we need to add an xterm argument */
if (0 == strcmp(bname, "ssh")) {
/* if xterm option was given, add '-X', ensuring we don't do it twice */
if (NULL != orte_xterm) {
opal_argv_append_unique_nosize(&mca_plm_rsh_component.agent_argv, "-X", false);
} else if (0 >= opal_output_get_verbosity(orte_plm_base_framework.framework_output)) {
/* if debug was not specified, and the user didn't explicitly
* specify X11 forwarding/non-forwarding, add "-x" if it
* isn't already there (check either case)
*/
for (i = 1; NULL != mca_plm_rsh_component.agent_argv[i]; ++i) {
if (0 == strcasecmp("-x", mca_plm_rsh_component.agent_argv[i])) {
break;
}
}
if (NULL == mca_plm_rsh_component.agent_argv[i]) {
opal_argv_append_nosize(&mca_plm_rsh_component.agent_argv, "-x");
}
}
}
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }

Просмотреть файл

@ -164,7 +164,6 @@ static const char *orte_plm_rsh_shell_name[7] = {
*/ */
static void set_handler_default(int sig); static void set_handler_default(int sig);
static orte_plm_rsh_shell_t find_shell(char *shell); static orte_plm_rsh_shell_t find_shell(char *shell);
static int launch_agent_setup(const char *agent, char *path);
static void ssh_child(int argc, char **argv) __opal_attribute_noreturn__; static void ssh_child(int argc, char **argv) __opal_attribute_noreturn__;
static int rsh_probe(char *nodename, static int rsh_probe(char *nodename,
orte_plm_rsh_shell_t *shell); orte_plm_rsh_shell_t *shell);
@ -175,8 +174,6 @@ static void launch_daemons(int fd, short args, void *cbdata);
static void process_launch_list(int fd, short args, void *cbdata); static void process_launch_list(int fd, short args, void *cbdata);
/* local global storage */ /* local global storage */
static char *rsh_agent_path=NULL;
static char **rsh_agent_argv=NULL;
static int num_in_progress=0; static int num_in_progress=0;
static opal_list_t launch_list; static opal_list_t launch_list;
static opal_event_t launch_event; static opal_event_t launch_event;
@ -186,51 +183,8 @@ static opal_event_t launch_event;
*/ */
static int rsh_init(void) static int rsh_init(void)
{ {
char *tmp;
int rc; int rc;
/* we were selected, so setup the launch agent */
if (mca_plm_rsh_component.using_qrsh) {
/* perform base setup for qrsh */
(void)asprintf(&tmp, "%s/bin/%s", getenv("SGE_ROOT"), getenv("ARC"));
if (ORTE_SUCCESS != (rc = launch_agent_setup("qrsh", tmp))) {
ORTE_ERROR_LOG(rc);
free(tmp);
return rc;
}
free(tmp);
/* automatically add -inherit and grid engine PE related flags */
opal_argv_append_nosize(&rsh_agent_argv, "-inherit");
/* Don't use the "-noshell" flag as qrsh would have a problem
* swallowing a long command */
opal_argv_append_nosize(&rsh_agent_argv, "-nostdin");
opal_argv_append_nosize(&rsh_agent_argv, "-V");
if (0 < opal_output_get_verbosity(orte_plm_base_framework.framework_output)) {
opal_argv_append_nosize(&rsh_agent_argv, "-verbose");
tmp = opal_argv_join(rsh_agent_argv, ' ');
opal_output_verbose(1, orte_plm_base_framework.framework_output,
"%s plm:rsh: using \"%s\" for launching\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
free(tmp);
}
} else if(mca_plm_rsh_component.using_llspawn) {
/* perform base setup for llspawn */
if (ORTE_SUCCESS != (rc = launch_agent_setup("llspawn", NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
opal_output_verbose(1, orte_plm_base_framework.framework_output,
"%s plm:rsh: using \"%s\" for launching\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
rsh_agent_path);
} else {
/* not using qrsh or llspawn - use MCA-specified agent */
if (ORTE_SUCCESS != (rc = launch_agent_setup(mca_plm_rsh_component.agent, NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* point to our launch command */ /* point to our launch command */
if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_LAUNCH_DAEMONS, if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_LAUNCH_DAEMONS,
launch_daemons, ORTE_SYS_PRI))) { launch_daemons, ORTE_SYS_PRI))) {
@ -371,8 +325,8 @@ static int setup_launch(int *argcptr, char ***argvptr,
/* /*
* Build argv array * Build argv array
*/ */
argv = opal_argv_copy(rsh_agent_argv); argv = opal_argv_copy(mca_plm_rsh_component.agent_argv);
argc = opal_argv_count(rsh_agent_argv); argc = opal_argv_count(mca_plm_rsh_component.agent_argv);
/* if any ssh args were provided, now is the time to add them */ /* if any ssh args were provided, now is the time to add them */
if (NULL != mca_plm_rsh_component.ssh_args) { if (NULL != mca_plm_rsh_component.ssh_args) {
char **ssh_argv; char **ssh_argv;
@ -722,7 +676,7 @@ static void ssh_child(int argc, char **argv)
* about remote launches here * about remote launches here
*/ */
exec_argv = argv; exec_argv = argv;
exec_path = strdup(rsh_agent_path); exec_path = mca_plm_rsh_component.agent_path;
/* Don't let ssh slurp all of our stdin! */ /* Don't let ssh slurp all of our stdin! */
fdin = open("/dev/null", O_RDWR); fdin = open("/dev/null", O_RDWR);
@ -1377,67 +1331,6 @@ static orte_plm_rsh_shell_t find_shell(char *shell)
return ORTE_PLM_RSH_SHELL_UNKNOWN; return ORTE_PLM_RSH_SHELL_UNKNOWN;
} }
static int launch_agent_setup(const char *agent, char *path)
{
char *bname;
int i;
/* if no agent was provided, then report not found */
if (NULL == mca_plm_rsh_component.agent && NULL == agent) {
return ORTE_ERR_NOT_FOUND;
}
/* search for the argv */
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
"%s plm:rsh_setup on agent %s path %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == agent) ? mca_plm_rsh_component.agent : agent,
(NULL == path) ? "NULL" : path));
rsh_agent_argv = orte_plm_rsh_search(agent, path);
if (0 == opal_argv_count(rsh_agent_argv)) {
/* nothing was found */
return ORTE_ERR_NOT_FOUND;
}
/* see if we can find the agent in the path */
rsh_agent_path = opal_path_findv(rsh_agent_argv[0], X_OK, environ, path);
if (NULL == rsh_agent_path) {
/* not an error - just report not found */
opal_argv_free(rsh_agent_argv);
return ORTE_ERR_NOT_FOUND;
}
bname = opal_basename(rsh_agent_argv[0]);
if (NULL == bname) {
return ORTE_SUCCESS;
}
if (0 == strcmp(bname, "ssh")) {
/* if xterm option was given, add '-X', ensuring we don't do it twice */
if (NULL != orte_xterm) {
opal_argv_append_unique_nosize(&rsh_agent_argv, "-X", false);
} else if (0 >= opal_output_get_verbosity(orte_plm_base_framework.framework_output)) {
/* if debug was not specified, and the user didn't explicitly
* specify X11 forwarding/non-forwarding, add "-x" if it
* isn't already there (check either case)
*/
for (i = 1; NULL != rsh_agent_argv[i]; ++i) {
if (0 == strcasecmp("-x", rsh_agent_argv[i])) {
break;
}
}
if (NULL == rsh_agent_argv[i]) {
opal_argv_append_nosize(&rsh_agent_argv, "-x");
}
}
}
free(bname);
/* the caller can append any additional argv's they desire */
return ORTE_SUCCESS;
}
/** /**
* Check the Shell variable and system type on the specified node * Check the Shell variable and system type on the specified node
*/ */
@ -1479,8 +1372,8 @@ static int rsh_probe(char *nodename,
exit(01); exit(01);
} }
/* Build argv array */ /* Build argv array */
argv = opal_argv_copy(rsh_agent_argv); argv = opal_argv_copy(mca_plm_rsh_component.agent_argv);
argc = opal_argv_count(rsh_agent_argv); argc = opal_argv_count(mca_plm_rsh_component.agent_argv);
opal_argv_append(&argc, &argv, nodename); opal_argv_append(&argc, &argv, nodename);
opal_argv_append(&argc, &argv, "echo $SHELL"); opal_argv_append(&argc, &argv, "echo $SHELL");

Просмотреть файл

@ -556,11 +556,10 @@ orte_session_dir_finalize(orte_process_name_t *proc)
NULL == orte_process_info.top_session_dir) { NULL == orte_process_info.top_session_dir) {
/* this should never happen - it means we are calling /* this should never happen - it means we are calling
* cleanup *before* properly setting up the session * cleanup *before* properly setting up the session
* dir system. This leaves open the possibility of * dir system. Protect against the possibility of
* accidentally removing directories we shouldn't * accidentally removing directories we shouldn't
* touch * touch by returning
*/ */
ORTE_ERROR_LOG(ORTE_ERR_NOT_INITIALIZED);
return ORTE_ERR_NOT_INITIALIZED; return ORTE_ERR_NOT_INITIALIZED;
} }