1
1

Additions to the tm, slurm, and rsh pls modules to handle the --prefix

option as discussed on the devel-core mailing list.  The Big
Difference is that instead of hard-coding the strings "/lib" and
"/bin" in to append to the prefix, we append the basename of the local
libdir and bindir.  Hence, if your libdir is $prefix/lib64, we'll
append /lib64 to construct the remote node's LD_LIBRARY_PATH (etc.).

Also appended the orterun.1 man page to include a description of
--prefix, how it is constructed, what it handles / what it does not,
etc.

This commit was SVN r9930.
Этот коммит содержится в:
Jeff Squyres 2006-05-16 14:14:12 +00:00
родитель 4080646606
Коммит 1d6902296c
4 изменённых файлов: 190 добавлений и 21 удалений

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -64,6 +65,7 @@
#include "opal/util/argv.h"
#include "opal/util/opal_environ.h"
#include "opal/util/output.h"
#include "opal/util/basename.h"
#include "orte/orte_constants.h"
#include "orte/util/univ_info.h"
#include "orte/util/session_dir.h"
@ -396,6 +398,7 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
struct passwd *p;
bool remote_bash = false, remote_csh = false;
bool local_bash = false, local_csh = false;
char *lib_base = NULL, *bin_base = NULL;
/* Query the list of nodes allocated and mapped to this job.
* We need the entire mapping for a couple of reasons:
@ -583,6 +586,39 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
}
}
/* Figure out the basenames for the libdir and bindir. This
requires some explanation:
- Use OPAL_LIBDIR and OPAL_BINDIR instead of -D'ing some macros
in this directory's Makefile.am because it makes all the
dependencies work out correctly. These are defined in
opal/install_dirs.h.
- After a discussion on the devel-core mailing list, the
developers decided that we should use the local directory
basenames as the basis for the prefix on the remote note.
This does not handle a few notable cases (e.g., f the
libdir/bindir is not simply a subdir under the prefix, if the
libdir/bindir basename is not the same on the remote node as
it is here in the local node, etc.), but we decided that
--prefix was meant to handle "the common case". If you need
something more complex than this, a) edit your shell startup
files to set PATH/LD_LIBRARY_PATH properly on the remove
node, or b) use some new/to-be-defined options that
explicitly allow setting the bindir/libdir on the remote
node. We decided to implement these options (e.g.,
--remote-bindir and --remote-libdir) to orterun when it
actually becomes a problem for someone (vs. a hypothetical
situation).
Hence, for now, we simply take the basename of this install's
libdir and bindir and use it to append this install's prefix
and use that on the remote node.
*/
lib_base = opal_basename(OPAL_LIBDIR);
bin_base = opal_basename(OPAL_BINDIR);
/*
* Iterate through each of the contexts
*/
@ -727,7 +763,8 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
}
} else {
if (NULL != prefix_dir) {
asprintf(&exec_path, "%s/bin/orted", prefix_dir);
asprintf(&exec_path, "%s/%s/orted",
prefix_dir, bin_base);
}
/* If we yet did not fill up the execpath, do so now */
if (NULL == exec_path) {
@ -748,9 +785,10 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
/* Reset PATH */
oldenv = getenv("PATH");
if (NULL != oldenv) {
asprintf(&newenv, "%s/bin:%s", prefix_dir, oldenv);
asprintf(&newenv, "%s/%s:%s", prefix_dir,
bin_base, oldenv);
} else {
asprintf(&newenv, "%s/bin", prefix_dir);
asprintf(&newenv, "%s/%s", prefix_dir, bin_base);
}
opal_setenv("PATH", newenv, true, &environ);
if (mca_pls_rsh_component.debug) {
@ -761,9 +799,10 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
/* Reset LD_LIBRARY_PATH */
oldenv = getenv("LD_LIBRARY_PATH");
if (NULL != oldenv) {
asprintf(&newenv, "%s/lib:%s", prefix_dir, oldenv);
asprintf(&newenv, "%s/%s:%s", prefix_dir,
lib_base, oldenv);
} else {
asprintf(&newenv, "%s/lib", prefix_dir);
asprintf(&newenv, "%s/%s", prefix_dir, lib_base);
}
opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ);
if (mca_pls_rsh_component.debug) {
@ -954,6 +993,13 @@ cleanup:
}
OBJ_DESTRUCT(&mapping);
if (NULL != lib_base) {
free(lib_base);
}
if (NULL != bin_base) {
free(bin_base);
}
free(jobid_string); /* done with this variable */
opal_argv_free(argv);

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -42,11 +43,13 @@
#include <fcntl.h>
#endif
#include "opal/install_dirs.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/util/opal_environ.h"
#include "opal/util/path.h"
#include "opal/util/show_help.h"
#include "opal/util/basename.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/runtime/runtime.h"
#include "orte/orte_constants.h"
@ -461,7 +464,16 @@ static int pls_slurm_start_proc(int argc, char **argv, char **env,
"pls:slurm:start_proc: fork failed");
return ORTE_ERR_IN_ERRNO;
} else if (0 == srun_pid) {
char *bin_base = NULL, *lib_base = NULL;
/* Figure out the basenames for the libdir and bindir. There
is a lengthy comment about this in pls_rsh_module.c
explaining all the rationale for how / why we're doing
this. */
lib_base = opal_basename(OPAL_LIBDIR);
bin_base = opal_basename(OPAL_BINDIR);
/* If we have a prefix, then modify the PATH and
LD_LIBRARY_PATH environment variables. We're already in
the child process, so it's ok to modify environ. */
@ -471,9 +483,9 @@ static int pls_slurm_start_proc(int argc, char **argv, char **env,
/* Reset PATH */
oldenv = getenv("PATH");
if (NULL != oldenv) {
asprintf(&newenv, "%s/bin:%s", prefix, oldenv);
asprintf(&newenv, "%s/%s:%s", prefix, bin_base, oldenv);
} else {
asprintf(&newenv, "%s/bin", prefix);
asprintf(&newenv, "%s/%s", prefix, bin_base);
}
opal_setenv("PATH", newenv, true, &environ);
if (mca_pls_slurm_component.debug) {
@ -484,9 +496,9 @@ static int pls_slurm_start_proc(int argc, char **argv, char **env,
/* Reset LD_LIBRARY_PATH */
oldenv = getenv("LD_LIBRARY_PATH");
if (NULL != oldenv) {
asprintf(&newenv, "%s/lib:%s", prefix, oldenv);
asprintf(&newenv, "%s/%s:%s", prefix, lib_base, oldenv);
} else {
asprintf(&newenv, "%s/lib", prefix);
asprintf(&newenv, "%s/%s", prefix, lib_base);
}
opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ);
if (mca_pls_slurm_component.debug) {

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -36,11 +37,13 @@
#include <errno.h>
#include <tm.h>
#include "opal/install_dirs.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/util/opal_environ.h"
#include "opal/util/show_help.h"
#include "opal/util/path.h"
#include "opal/util/basename.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/runtime/opal_progress.h"
#include "orte/orte_constants.h"
@ -105,6 +108,7 @@ pls_tm_launch(orte_jobid_t jobid)
bool connected = false;
opal_list_t map;
char *cur_prefix;
char *bin_base = NULL, *lib_base = NULL;
/* Query the list of nodes allocated and mapped to this job.
* We need the entire mapping for a couple of reasons:
@ -210,6 +214,13 @@ pls_tm_launch(orte_jobid_t jobid)
}
connected = true;
/* Figure out the basenames for the libdir and bindir. There is a
lengthy comment about this in pls_rsh_module.c explaining all
the rationale for how / why we're doing this. */
lib_base = opal_basename(OPAL_LIBDIR);
bin_base = opal_basename(OPAL_BINDIR);
/*
* Iterate through each of the nodes and spin
* up a daemon.
@ -311,8 +322,8 @@ pls_tm_launch(orte_jobid_t jobid)
for (i = 0; NULL != env && NULL != env[i]; ++i) {
/* Reset PATH */
if (0 == strncmp("PATH=", env[i], 5)) {
asprintf(&newenv, "%s/bin:%s",
cur_prefix, env[i] + 5);
asprintf(&newenv, "%s/%s:%s",
cur_prefix, bin_base, env[i] + 5);
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: resetting PATH: %s",
newenv);
@ -323,8 +334,8 @@ pls_tm_launch(orte_jobid_t jobid)
/* Reset LD_LIBRARY_PATH */
else if (0 == strncmp("LD_LIBRARY_PATH=", env[i], 16)) {
asprintf(&newenv, "%s/lib:%s",
cur_prefix, env[i] + 16);
asprintf(&newenv, "%s/%s:%s",
cur_prefix, lib_base, env[i] + 16);
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: resetting LD_LIBRARY_PATH: %s",
newenv);
@ -408,6 +419,13 @@ cleanup:
}
OBJ_DESTRUCT(&mapping_list);
if (NULL != lib_base) {
free(lib_base);
}
if (NULL != bin_base) {
free(bin_base);
}
return rc;
}

Просмотреть файл

@ -26,6 +26,7 @@ Single Process Multiple Data (SPMD) Model:
.R [ options ]
.B <program>
.R [ <args> ]
.
Multiple Instruction Multiple Data (MIMD) Model:
@ -41,6 +42,19 @@ Multiple Instruction Multiple Data (MIMD) Model:
[ local_optionsN ]
.B <programN>
.R [ <argsN> ]
.P
Note that in both models, invoking \fImpirun\fR via an absolute path
name is equivalent to specifying the \fI--prefix\fR option with a
\fI<dir>\fR value equivalent to the directory where \fImpirun\fR
resides, minus its last subdirectory. For example:
\fBshell$\fP /usr/local/bin/mpirun ...
is equivalent to
\fBshell$\fP mpirun --prefix /usr/local
.
.\" **************************
.\" Quick Summary Section
@ -185,6 +199,13 @@ complete as soon as successful launch occurs.
.
.
.TP
.B --prefix \fR<dir>\fP
Prefix directory that will be used to set the \fIPATH\fR and
\fILD_LIBRARY_PATH\fR on the remote node before invoking Open MPI or
the target process. See the "Remote Execution" section, below.
.
.
.TP
.B --tmpdir \fR<dir>\fP
Set the root for the session directory tree for mpirun only.
.
@ -477,14 +498,85 @@ necessary (and safest) for the user to only clean up non-MPI state.
.SS Process Environment
.
Processes in the MPI application inherit their environment from the
Open RTE daemon upon the node on which they are running. The environment
is typically inherited from the user's shell. On remote nodes, the exact
environment is determined by the boot MCA module used. The rsh boot module,
for example, uses either rsh/ssh to launch the Open RTE daemon on remote nodes, and
typically executes one or more of the user's shell-setup files before launching
the Open RTE daemon. When running dynamically linked applications which
require the LD_LIBRARY_PATH environment variable to be set, care must be taken
to ensure that it is correctly set when booting Open MPI.
Open RTE daemon upon the node on which they are running. The
environment is typically inherited from the user's shell. On remote
nodes, the exact environment is determined by the boot MCA module
used. The \fIrsh\fR launch module, for example, uses either
\fIrsh\fR/\fIssh\fR to launch the Open RTE daemon on remote nodes, and
typically executes one or more of the user's shell-setup files before
launching the Open RTE daemon. When running dynamically linked
applications which require the \fILD_LIBRARY_PATH\fR environment
variable to be set, care must be taken to ensure that it is correctly
set when booting Open MPI.
.PP
See the "Remote Execution" section for more details.
.
.
.SS Remote Execution
.
Open MPI requires that the \fIPATH\fR environment variable be set to
find executables on remote nodes (this is typically only necessary in
\fIrsh\fR- or \fIssh\fR-based environments -- batch/scheduled
environments typically copy the current environment to the execution
of remote jobs, so if the current environment has \fIPATH\fR and/or
\fILD_LIBRARY_PATH\fR set properly, the remote nodes will also have it
set properly). If Open MPI was compiled with shared library support,
it may also be necessary to have the \fILD_LIBRARY_PATH\fR environment
variable set on remote nodes as well (especially to find the shared
libraries required to run user MPI applications).
.PP
However, it is not always desirable or possible to edit shell
startup files to set \fIPATH\fR and/or \fILD_LIBRARY_PATH\fR. The
\fI--prefix\fR option is provided for some simple configurations where
this is not possible.
.PP
The \fI--prefix\fR option takes a single argument: the base directory
on the remote node where Open MPI is installed. Open MPI will use
this directory to set the remote \fIPATH\fR and \fILD_LIBRARY_PATH\fR
before executing any Open MPI or user applications. This allows
running Open MPI jobs without having pre-configued the \fIPATH\fR and
\fILD_LIBRARY_PATH\fR on the remote nodes.
.PP
Open MPI adds the basename of the current
node's "bindir" (the directory where Open MPI's executables are
installed) to the prefix and uses that to set the \fIPATH\fR on the
remote node. Similarly, Open MPI adds the basename of the current
node's "libdir" (the directory where Open MPI's libraries are
installed) to the prefix and uses that to set the
\fILD_LIBRARY_PATH\fR on the remote node. For example:
.TP 15
Local bindir:
/local/node/directory/bin
.TP
Local libdir:
/local/node/directory/lib64
.PP
If the following command line is used:
\fBshell$\fP mpirun --prefix /remote/node/directory
Open MPI will add "/remote/node/directory/bin" to the \fIPATH\fR
and "/remote/node/directory/lib64" to the \fLD_LIBRARY_PATH\fR on the
remote node before attempting to execute anything.
.PP
Note that \fI--prefix\fR can be set on a per-context basis, allowing
for different values for different nodes.
.PP
The \fI--prefix\fR option is not sufficient if the installation paths
on the remote node are different than the local node (e.g., if "/lib"
is used on the local node, but "/lib64" is used on the remote node),
or if the installation paths are something other than a subdirectory
under a common prefix.
.PP
Note that executing \fImpirun\fR via an absolute pathname is
equivalent to specifying \fI--prefix\fR without the last subdirectory
in the absolute pathname to \fImpirun\fR. For example:
\fBshell$\fP /usr/local/bin/mpirun ...
is equivalent to
\fBshell$\fP mpirun --prefix /usr/local
.
.
.
@ -502,6 +594,7 @@ environment and use \fI\-x\fP to export them; not to define them.
.
.
.SS MCA (Modular Component Architecture)
.
The \fI-mca\fP switch allows the passing of parameters to various MCA modules.
.\" Open MPI's MCA modules are described in detail in ompimca(7).
MCA modules have direct impact on MPI programs because they allow tunable