Orterun creates a "clean" copy of its environment for use in launching procs. This includes properly setting LD_LIBRARY_PATH and PATH, among other things. Unfortunately, our PLM modules were using the local environ instead of the saved copy, thus missing a number of things that really should have been included. From what I see, we got away with the error because the PLM's were duplicating all that setup logic themselves - I'll clean this up over the next few days.
Meantime, correct the PLM's so they use the correct environ for launching. This commit was SVN r18713.
Этот коммит содержится в:
родитель
f70b7e51ce
Коммит
f799ea225f
@ -328,7 +328,7 @@ static int plm_alps_launch_job(orte_job_t *jdata)
|
||||
}
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
env = opal_argv_copy(orte_launch_environ);
|
||||
|
||||
/* add the nodelist */
|
||||
var = mca_base_param_environ_variable("orte", "alps", "nodelist");
|
||||
|
@ -255,7 +255,7 @@ GETMAP:
|
||||
bin_base = opal_basename(opal_install_dirs.bindir);
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
env = opal_argv_copy(orte_launch_environ);
|
||||
|
||||
/* add our umask -- see big note in orted.c */
|
||||
current_umask = umask(0);
|
||||
|
@ -282,7 +282,7 @@ static int plm_lsf_launch_job(orte_job_t *jdata)
|
||||
}
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
env = opal_argv_copy(orte_launch_environ);
|
||||
|
||||
if (mca_plm_lsf_component.timing) {
|
||||
if (0 != gettimeofday(&launchstart, NULL)) {
|
||||
|
@ -780,7 +780,7 @@ int orte_plm_process_launch(orte_job_t *jdata)
|
||||
set_handler_default(SIGCHLD);
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
env = opal_argv_copy(orte_launch_environ);
|
||||
|
||||
/* exec the daemon */
|
||||
if (0 < opal_output_get_verbosity(orte_plm_globals.output)) {
|
||||
|
@ -545,7 +545,7 @@ static void ssh_child(int argc, char **argv,
|
||||
sigset_t sigs;
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
env = opal_argv_copy(orte_launch_environ);
|
||||
|
||||
/* ensure that only the ssh plm is selected on the remote daemon */
|
||||
var = mca_base_param_environ_variable("plm", NULL, NULL);
|
||||
|
@ -340,7 +340,7 @@ static int plm_slurm_launch_job(orte_job_t *jdata)
|
||||
}
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
env = opal_argv_copy(orte_launch_environ);
|
||||
|
||||
/* add the nodelist */
|
||||
var = mca_base_param_environ_variable("orte", "slurm", "nodelist");
|
||||
|
@ -230,7 +230,7 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
||||
bin_base = opal_basename(opal_install_dirs.bindir);
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
env = opal_argv_copy(orte_launch_environ);
|
||||
|
||||
/* add our umask -- see big note in orted.c */
|
||||
current_umask = umask(0);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user