diff --git a/opal/util/argv.c b/opal/util/argv.c index a5a024a5e1..817ab7cad1 100644 --- a/opal/util/argv.c +++ b/opal/util/argv.c @@ -269,6 +269,62 @@ char *opal_argv_join(char **argv, int delimiter) } +/* + * Join all the elements of an argv array from within a + * specified range into a single newly-allocated string. + */ +char *opal_argv_join_range(char **argv, size_t start, size_t end, int delimiter) +{ + char **p; + char *pp; + char *str; + size_t str_len = 0; + size_t i; + + /* Bozo case */ + + if (NULL == argv || NULL == argv[0] || (int)start > opal_argv_count(argv)) { + return strdup(""); + } + + /* Find the total string length in argv including delimiters. The + last delimiter is replaced by the NULL character. */ + + for (p = &argv[start], i=start; *p && i < end; ++p, ++i) { + str_len += strlen(*p) + 1; + } + + /* Allocate the string. */ + + if (NULL == (str = (char*) malloc(str_len))) + return NULL; + + /* Loop filling in the string. */ + + str[--str_len] = '\0'; + p = &argv[start]; + pp = *p; + + for (i = 0; i < str_len; ++i) { + if ('\0' == *pp) { + + /* End of a string, fill in a delimiter and go to the next + string. */ + + str[i] = (char) delimiter; + ++p; + pp = *p; + } else { + str[i] = *pp++; + } + } + + /* All done */ + + return str; +} + + /* * Return the number of bytes consumed by an argv array. */ diff --git a/opal/util/argv.h b/opal/util/argv.h index 899bc88f0c..3737d00bb6 100644 --- a/opal/util/argv.h +++ b/opal/util/argv.h @@ -35,9 +35,8 @@ #include #endif -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS + /** * Append a string (by value) to an new or existing NULL-terminated * argv array. @@ -165,6 +164,8 @@ OPAL_DECLSPEC int opal_argv_count(char **argv); */ OPAL_DECLSPEC char *opal_argv_join(char **argv, int delimiter) __opal_attribute_malloc__; +OPAL_DECLSPEC char *opal_argv_join_range(char **argv, size_t start, size_t end, int delimiter) __opal_attribute_malloc__; + /** * Return the number of bytes consumed by an argv array. * @@ -240,8 +241,7 @@ OPAL_DECLSPEC int opal_argv_delete(int *argc, char ***argv, * target). */ OPAL_DECLSPEC int opal_argv_insert(char ***target, int start, char **source); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif + +END_C_DECLS #endif /* OPAL_ARGV_H */ diff --git a/orte/mca/plm/alps/plm_alps.h b/orte/mca/plm/alps/plm_alps.h index eea32f6600..d42b4f9920 100644 --- a/orte/mca/plm/alps/plm_alps.h +++ b/orte/mca/plm/alps/plm_alps.h @@ -31,7 +31,6 @@ struct orte_plm_alps_component_t { int priority; int debug; bool timing; - char *orted; char *custom_args; }; typedef struct orte_plm_alps_component_t orte_plm_alps_component_t; diff --git a/orte/mca/plm/alps/plm_alps_component.c b/orte/mca/plm/alps/plm_alps_component.c index 405433e74d..3b3e2b63cb 100644 --- a/orte/mca/plm/alps/plm_alps_component.c +++ b/orte/mca/plm/alps/plm_alps_component.c @@ -103,11 +103,6 @@ static int plm_alps_open(void) false, false, 75, &mca_plm_alps_component.priority); - mca_base_param_reg_string(comp, "orted", - "Command to use to start proxy orted", - false, false, "orted", - &mca_plm_alps_component.orted); - mca_plm_alps_component.timing = orte_timing; mca_base_param_reg_string(comp, "args", @@ -129,10 +124,6 @@ static int orte_plm_alps_component_query(mca_base_module_t **module, int *priori static int plm_alps_close(void) { - if (NULL != mca_plm_alps_component.orted) { - free(mca_plm_alps_component.orted); - } - if (NULL != mca_plm_alps_component.custom_args) { free(mca_plm_alps_component.custom_args); } diff --git a/orte/mca/plm/alps/plm_alps_module.c b/orte/mca/plm/alps/plm_alps_module.c index c832347191..84eefcad30 100644 --- a/orte/mca/plm/alps/plm_alps_module.c +++ b/orte/mca/plm/alps/plm_alps_module.c @@ -266,7 +266,7 @@ static int plm_alps_launch_job(orte_job_t *jdata) */ /* add the daemon command (as specified by user) */ - opal_argv_append(&argc, &argv, mca_plm_alps_component.orted); + orte_plm_base_setup_orted_cmd(&argc, &argv); /* Add basic orted command line options, including debug flags */ orte_plm_base_orted_append_basic_args(&argc, &argv, diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 863a073c95..e28f718379 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -700,6 +700,27 @@ static int orte_plm_base_report_launched(orte_jobid_t job) return ORTE_SUCCESS; } +int orte_plm_base_setup_orted_cmd(int *argc, char ***argv) +{ + int i, loc; + char **tmpv; + + /* set default location */ + loc = -1; + /* split the command apart in case it is multi-word */ + tmpv = opal_argv_split(orte_launch_agent, ' '); + for (i = 0; NULL != tmpv && NULL != tmpv[i]; ++i) { + if (0 == strcmp(tmpv[i], "orted")) { + loc = i; + } + opal_argv_append(argc, argv, tmpv[i]); + } + opal_argv_free(tmpv); + + return loc; +} + + int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, char *ess, int *proc_vpid_index, diff --git a/orte/mca/plm/base/plm_private.h b/orte/mca/plm/base/plm_private.h index f544b6ac3a..59a6bfe748 100644 --- a/orte/mca/plm/base/plm_private.h +++ b/orte/mca/plm/base/plm_private.h @@ -75,6 +75,8 @@ ORTE_DECLSPEC int orte_plm_base_set_hnp_name(void); ORTE_DECLSPEC int orte_plm_base_create_jobid(orte_jobid_t *jobid); +ORTE_DECLSPEC int orte_plm_base_setup_orted_cmd(int *argc, char ***argv); + /** * Heartbeat support */ diff --git a/orte/mca/plm/ccp/plm_ccp.h b/orte/mca/plm/ccp/plm_ccp.h index 490eed02a2..4d8e29aa29 100644 --- a/orte/mca/plm/ccp/plm_ccp.h +++ b/orte/mca/plm/ccp/plm_ccp.h @@ -29,7 +29,6 @@ struct orte_plm_ccp_component_t { int debug; int verbose; bool want_path_check; - char *orted; char **checked_paths; bool timing; }; diff --git a/orte/mca/plm/ccp/plm_ccp_component.c b/orte/mca/plm/ccp/plm_ccp_component.c index 37df23b4ab..1628ef3794 100644 --- a/orte/mca/plm/ccp/plm_ccp_component.c +++ b/orte/mca/plm/ccp/plm_ccp_component.c @@ -97,10 +97,6 @@ static int plm_ccp_open(void) mca_base_param_reg_int(comp, "priority", "Default selection priority", false, false, 75, &mca_plm_ccp_component.priority); - mca_base_param_reg_string(comp, "orted", - "Command to use to start proxy orted", - false, false, "orted", - &mca_plm_ccp_component.orted); mca_base_param_reg_int(comp, "want_path_check", "Whether the launching process should check for the plm_ccp_orted executable in the PATH before launching (the CCP API does not give an indication of failure; this is a somewhat-lame workaround; non-zero values enable this check)", false, false, (int) true, &tmp); diff --git a/orte/mca/plm/ccp/plm_ccp_module.c b/orte/mca/plm/ccp/plm_ccp_module.c index bc49f90a96..674a24a4b5 100644 --- a/orte/mca/plm/ccp/plm_ccp_module.c +++ b/orte/mca/plm/ccp/plm_ccp_module.c @@ -95,11 +95,11 @@ orte_plm_base_module_t orte_plm_ccp_module = { plm_ccp_finalize }; - -/* - * Local variables - */ -static orte_jobid_t active_job = ORTE_JOBID_INVALID; + +/* + * Local variables + */ +static orte_jobid_t active_job = ORTE_JOBID_INVALID; /** @@ -209,8 +209,9 @@ GETMAP: } /* add the daemon command (as specified by user) */ - argv = opal_argv_split(mca_plm_ccp_component.orted, ' '); - argc = opal_argv_count(argv); + argc = 0; + argv = NULL; + orte_plm_base_setup_orted_cmd(&argc, &argv); opal_argv_append(&argc, &argv, "--no-daemonize"); diff --git a/orte/mca/plm/lsf/plm_lsf.h b/orte/mca/plm/lsf/plm_lsf.h index 6d83e0e959..d4791d19dd 100644 --- a/orte/mca/plm/lsf/plm_lsf.h +++ b/orte/mca/plm/lsf/plm_lsf.h @@ -30,9 +30,7 @@ BEGIN_C_DECLS struct orte_plm_lsf_component_t { orte_plm_base_component_t super; - int priority; bool timing; - char *orted; }; typedef struct orte_plm_lsf_component_t orte_plm_lsf_component_t; diff --git a/orte/mca/plm/lsf/plm_lsf_component.c b/orte/mca/plm/lsf/plm_lsf_component.c index d8a79c7dba..1dbcb5dbab 100644 --- a/orte/mca/plm/lsf/plm_lsf_component.c +++ b/orte/mca/plm/lsf/plm_lsf_component.c @@ -93,16 +93,6 @@ orte_plm_lsf_component_t mca_plm_lsf_component = { static int plm_lsf_open(void) { - mca_base_component_t *comp = &mca_plm_lsf_component.super.base_version; - - mca_base_param_reg_int(comp, "priority", "Default selection priority", - false, false, 75, &mca_plm_lsf_component.priority); - - mca_base_param_reg_string(comp, "orted", - "Command to use to start proxy orted", - false, false, "orted", - &mca_plm_lsf_component.orted); - return ORTE_SUCCESS; } @@ -125,7 +115,7 @@ static int orte_plm_lsf_component_query(mca_base_module_t **module, int *priorit return ORTE_ERROR; } - *priority = mca_plm_lsf_component.priority; + *priority = 75; *module = (mca_base_module_t *) &orte_plm_lsf_module; return ORTE_SUCCESS; } diff --git a/orte/mca/plm/lsf/plm_lsf_module.c b/orte/mca/plm/lsf/plm_lsf_module.c index 2b10239509..348544001d 100644 --- a/orte/mca/plm/lsf/plm_lsf_module.c +++ b/orte/mca/plm/lsf/plm_lsf_module.c @@ -222,7 +222,7 @@ static int plm_lsf_launch_job(orte_job_t *jdata) */ /* add the daemon command (as specified by user) */ - opal_argv_append(&argc, &argv, mca_plm_lsf_component.orted); + orte_plm_base_setup_orted_cmd(&argc, &argv); /* Add basic orted command line options */ orte_plm_base_orted_append_basic_args(&argc, &argv, diff --git a/orte/mca/plm/rsh/plm_rsh_module.c b/orte/mca/plm/rsh/plm_rsh_module.c index 6ccf3fe7e2..3887bfbd1d 100644 --- a/orte/mca/plm/rsh/plm_rsh_module.c +++ b/orte/mca/plm/rsh/plm_rsh_module.c @@ -343,8 +343,7 @@ static void orte_plm_rsh_wait_daemon(pid_t pid, int status, void* cbdata) static int setup_launch(int *argcptr, char ***argvptr, char *nodename, int *node_name_index1, int *node_name_index2, - int *local_exec_index, - int *proc_vpid_index, char **lib_base, char **bin_base, + int *proc_vpid_index, char *prefix_dir, bool *remote_sh, bool *remote_csh) { struct passwd *p; @@ -353,6 +352,11 @@ static int setup_launch(int *argcptr, char ***argvptr, char *param; orte_plm_rsh_shell_t shell; bool local_sh = false, local_csh = false; + char *lib_base, *bin_base; + int orted_argc; + char **orted_argv; + char *orted_cmd, *orted_prefix, *final_cmd; + int orted_index; int rc; /* What is our local shell? */ @@ -428,6 +432,36 @@ static int setup_launch(int *argcptr, char ***argvptr, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), *remote_csh, *remote_sh)); + /* Figure out the basenames for the libdir and bindir. This + requires some explanation: + + - Use opal_install_dirs.libdir and opal_install_dirs.bindir. + + - After a discussion on the devel-core mailing list, the + developers decided that we should use the local directory + basenames as the basis for the prefix on the remote note. + This does not handle a few notable cases (e.g., if the + libdir/bindir is not simply a subdir under the prefix, if the + libdir/bindir basename is not the same on the remote node as + it is here on the local node, etc.), but we decided that + --prefix was meant to handle "the common case". If you need + something more complex than this, a) edit your shell startup + files to set PATH/LD_LIBRARY_PATH properly on the remove + node, or b) use some new/to-be-defined options that + explicitly allow setting the bindir/libdir on the remote + node. We decided to implement these options (e.g., + --remote-bindir and --remote-libdir) to orterun when it + actually becomes a problem for someone (vs. a hypothetical + situation). + + Hence, for now, we simply take the basename of this install's + libdir and bindir and use it to append this install's prefix + and use that on the remote node. + */ + + lib_base = opal_basename(opal_install_dirs.libdir); + bin_base = opal_basename(opal_install_dirs.bindir); + /* * Build argv array */ @@ -436,9 +470,127 @@ static int setup_launch(int *argcptr, char ***argvptr, *node_name_index1 = argc; opal_argv_append(&argc, &argv, "