Update orte-submit manpage, add the ompi-* versions of orte-dvm and orte-submit manpages
Этот коммит содержится в:
родитель
e303a9b1d6
Коммит
4dba298e6e
2
.gitignore
поставляемый
2
.gitignore
поставляемый
@ -478,6 +478,7 @@ orte/tools/orte-clean/orte-clean
|
||||
orte/tools/orte-clean/orte-clean.1
|
||||
orte/tools/orte-dvm/orte-dvm
|
||||
orte/tools/orte-dvm/orte-dvm.1
|
||||
ompi/mca/rte/orte/ompi-dvm.1
|
||||
orte/tools/orte-info/orte-info
|
||||
orte/tools/orte-info/orte-info.1
|
||||
orte/tools/orte-migrate/orte-migrate
|
||||
@ -492,6 +493,7 @@ orte/tools/orte-server/orte-server
|
||||
orte/tools/orte-server/orte-server.1
|
||||
orte/tools/orte-submit/orte-submit
|
||||
orte/tools/orte-submit/orte-submit.1
|
||||
ompi/mca/rte/orte/ompi-submit.1
|
||||
orte/tools/orte-top/orte-top
|
||||
orte/tools/orte-top/orte-top.1
|
||||
orte/tools/orted/orted
|
||||
|
@ -27,7 +27,7 @@ libmca_rte_orte_la_SOURCES =$(sources) $(headers)
|
||||
libmca_rte_orte_la_LDFLAGS = -module -avoid-version
|
||||
libmca_rte_orte_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la
|
||||
|
||||
man_pages = mpirun.1 mpiexec.1 ompi-ps.1 ompi-clean.1 ompi-top.1 ompi-server.1
|
||||
man_pages = mpirun.1 mpiexec.1 ompi-ps.1 ompi-clean.1 ompi-top.1 ompi-server.1 ompi-dvm.1 ompi-submit.1
|
||||
|
||||
if WANT_FT
|
||||
man_pages += ompi-checkpoint.1 ompi-restart.1
|
||||
@ -43,6 +43,8 @@ install-exec-hook:
|
||||
(cd $(DESTDIR)$(bindir); rm -f ompi-clean$(EXEEXT); $(LN_S) orte-clean$(EXEEXT) ompi-clean$(EXEEXT))
|
||||
(cd $(DESTDIR)$(bindir); rm -f ompi-top$(EXEEXT); $(LN_S) orte-top$(EXEEXT) ompi-top$(EXEEXT))
|
||||
(cd $(DESTDIR)$(bindir); rm -f ompi-server$(EXEEXT); $(LN_S) orte-server$(EXEEXT) ompi-server$(EXEEXT))
|
||||
(cd $(DESTDIR)$(bindir); rm -f ompi-dvm$(EXEEXT); $(LN_S) orte-dvm$(EXEEXT) ompi-dvm$(EXEEXT))
|
||||
(cd $(DESTDIR)$(bindir); rm -f ompi-submit$(EXEEXT); $(LN_S) orte-submit$(EXEEXT) ompi-submit$(EXEEXT))
|
||||
if WANT_FT
|
||||
(cd $(DESTDIR)$(bindir); rm -f ompi-checkpoint$(EXEEXT); $(LN_S) orte-checkpoint$(EXEEXT) ompi-checkpoint$(EXEEXT))
|
||||
(cd $(DESTDIR)$(bindir); rm -f ompi-restart$(EXEEXT); $(LN_S) orte-restart$(EXEEXT) ompi-restart$(EXEEXT))
|
||||
@ -55,7 +57,9 @@ uninstall-local:
|
||||
$(DESTDIR)$(bindir)/ompi-ps$(EXEEXT) \
|
||||
$(DESTDIR)$(bindir)/ompi-clean$(EXEEXT) \
|
||||
$(DESTDIR)$(bindir)/ompi-top$(EXEEXT) \
|
||||
$(DESTDIR)$(bindir)/ompi-server$(EXEEXT)
|
||||
$(DESTDIR)$(bindir)/ompi-server$(EXEEXT) \
|
||||
$(DESTDIR)$(bindir)/ompi-dvm$(EXEEXT) \
|
||||
$(DESTDIR)$(bindir)/ompi-submit$(EXEEXT)
|
||||
if WANT_FT
|
||||
rm -f $(DESTDIR)$(bindir)/ompi-checkpoint$(EXEEXT) \
|
||||
$(DESTDIR)$(bindir)/ompi-restart$(EXEEXT) \
|
||||
@ -115,5 +119,11 @@ $(top_builddir)/orte/tools/orte-server/orte-server.1:
|
||||
ompi-server.1: $(top_builddir)/orte/tools/orte-server/orte-server.1
|
||||
cp -f $(top_builddir)/orte/tools/orte-server/orte-server.1 ompi-server.1
|
||||
|
||||
ompi-dvm.1: $(top_builddir)/orte/tools/orte-dvm/orte-dvm.1
|
||||
cp -f $(top_builddir)/orte/tools/orte-dvm/orte-dvm.1 ompi-dvm.1
|
||||
|
||||
ompi-submit.1: $(top_builddir)/orte/tools/orte-submit/orte-submit.1
|
||||
cp -f $(top_builddir)/orte/tools/orte-submit/orte-submit.1 ompi-submit.1
|
||||
|
||||
clean-local:
|
||||
rm -f $(man_pages)
|
||||
|
@ -1,22 +1,23 @@
|
||||
.\" -*- nroff -*-
|
||||
.\" Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved.
|
||||
.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
|
||||
.\” Copyright (c) 2015 Intel, Inc. All rights reserved.
|
||||
.\" $COPYRIGHT$
|
||||
.\"
|
||||
.\" Man page for ORTE's orterun command
|
||||
.\" Man page for ORTE's orte-submit command
|
||||
.\"
|
||||
.\" .TH name section center-footer left-footer center-header
|
||||
.TH MPIRUN 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
|
||||
.TH ORTE-SUBMIT 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
|
||||
.\" **************************
|
||||
.\" Name Section
|
||||
.\" **************************
|
||||
.SH NAME
|
||||
.
|
||||
orterun, mpirun, mpiexec \- Execute serial and parallel jobs in Open MPI.
|
||||
orte-submit, ompi-submit \- Execute serial and parallel jobs in Open MPI using a DVM.
|
||||
|
||||
.B Note:
|
||||
\fImpirun\fP, \fImpiexec\fP, and \fIorterun\fP are all synonyms for each
|
||||
other. Using any of the names will produce the same behavior.
|
||||
\fIompi-submit\fP and \fIorte-submit\fP are synonyms for each
|
||||
other. Using either of the names will produce the same behavior.
|
||||
.
|
||||
.\" **************************
|
||||
.\" Synopsis Section
|
||||
@ -26,7 +27,7 @@ other. Using any of the names will produce the same behavior.
|
||||
.PP
|
||||
Single Process Multiple Data (SPMD) Model:
|
||||
|
||||
.B mpirun
|
||||
.B ompi-submit
|
||||
[ options ]
|
||||
.B <program>
|
||||
[ <args> ]
|
||||
@ -34,7 +35,7 @@ Single Process Multiple Data (SPMD) Model:
|
||||
|
||||
Multiple Instruction Multiple Data (MIMD) Model:
|
||||
|
||||
.B mpirun
|
||||
.B ompi-submit
|
||||
[ global_options ]
|
||||
[ local_options1 ]
|
||||
.B <program1>
|
||||
@ -48,16 +49,16 @@ Multiple Instruction Multiple Data (MIMD) Model:
|
||||
[ <argsN> ]
|
||||
.P
|
||||
|
||||
Note that in both models, invoking \fImpirun\fP via an absolute path
|
||||
Note that in both models, invoking \fIompi-submit\fP via an absolute path
|
||||
name is equivalent to specifying the \fI--prefix\fP option with a
|
||||
\fI<dir>\fR value equivalent to the directory where \fImpirun\fR
|
||||
\fI<dir>\fR value equivalent to the directory where \fIompi-submit\fR
|
||||
resides, minus its last subdirectory. For example:
|
||||
|
||||
\fB%\fP /usr/local/bin/mpirun ...
|
||||
\fB%\fP /usr/local/bin/ompi-submit ...
|
||||
|
||||
is equivalent to
|
||||
|
||||
\fB%\fP mpirun --prefix /usr/local
|
||||
\fB%\fP ompi-submit --prefix /usr/local
|
||||
|
||||
.
|
||||
.\" **************************
|
||||
@ -65,20 +66,24 @@ is equivalent to
|
||||
.\" **************************
|
||||
.SH QUICK SUMMARY
|
||||
.
|
||||
.B
|
||||
Use of \fIorte-submit\fP requires that you first start the Distributed Virtual
|
||||
Machine (DVM) using \fIorte-dvm\fP.
|
||||
.P
|
||||
If you are simply looking for how to run an MPI application, you
|
||||
probably want to use a command line of the following form:
|
||||
|
||||
\fB%\fP mpirun [ -np X ] [ --hostfile <filename> ] <program>
|
||||
\fB%\fP ompi-submit [ -np X ] [ --hostfile <filename> ] <program>
|
||||
|
||||
This will run X copies of \fI<program>\fR in your current run-time
|
||||
environment (if running under a supported resource manager, Open MPI's
|
||||
\fImpirun\fR will usually automatically use the corresponding resource manager
|
||||
\fIompi-submit\fR will usually automatically use the corresponding resource manager
|
||||
process starter, as opposed to, for example, \fIrsh\fR or \fIssh\fR,
|
||||
which require the use of a hostfile, or will default to running all X
|
||||
copies on the localhost), scheduling (by default) in a round-robin fashion by
|
||||
CPU slot. See the rest of this page for more details.
|
||||
.P
|
||||
Please note that mpirun automatically binds processes as of the start of the
|
||||
Please note that ompi-submit automatically binds processes as of the start of the
|
||||
v1.8 series. Two binding patterns are used in the absence of any further directives:
|
||||
.TP 18
|
||||
.B Bind to core:
|
||||
@ -101,7 +106,7 @@ application process.
|
||||
.\" **************************
|
||||
.SH OPTIONS
|
||||
.
|
||||
.I mpirun
|
||||
.I ompi-submit
|
||||
will send the name of the directory where it was invoked on the local
|
||||
node to each of the remote nodes, and attempt to change to that
|
||||
directory. See the "Current Working Directory" section below for further
|
||||
@ -112,13 +117,13 @@ details.
|
||||
.TP 10
|
||||
.B <program>
|
||||
The program executable. This is identified as the first non-recognized argument
|
||||
to mpirun.
|
||||
to ompi-submit.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B <args>
|
||||
Pass these run-time arguments to every new process. These must always
|
||||
be the last arguments to \fImpirun\fP. If an app context file is used,
|
||||
be the last arguments to \fIompi-submit\fP. If an app context file is used,
|
||||
\fI<args>\fP will be ignored.
|
||||
.
|
||||
.
|
||||
@ -129,7 +134,7 @@ Display help for this command
|
||||
.
|
||||
.TP
|
||||
.B -q\fR,\fP --quiet
|
||||
Suppress informative messages from orterun during application execution.
|
||||
Suppress informative messages from orte-submit during application execution.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
@ -140,20 +145,14 @@ Be verbose
|
||||
.TP
|
||||
.B -V\fR,\fP --version
|
||||
Print version number. If no other arguments are given, this will also
|
||||
cause orterun to exit.
|
||||
cause orte-submit to exit.
|
||||
.
|
||||
.
|
||||
.
|
||||
.
|
||||
.P
|
||||
Use one of the following options to specify which hosts (nodes) of the cluster to run on. Note
|
||||
that as of the start of the v1.8 release, mpirun will launch a daemon onto each host in the
|
||||
allocation (as modified by the following options) at the very beginning of execution, regardless
|
||||
of whether or not application processes will eventually be mapped to execute there. This is
|
||||
done to allow collection of hardware topology information from the remote nodes, thus allowing
|
||||
us to map processes against known topology. However, it is a change from the behavior in prior releases
|
||||
where daemons were only launched \fRafter\fP mapping was complete, and thus only occurred on
|
||||
nodes where application processes would actually be executing.
|
||||
Use one of the following options to specify which hosts (nodes) of the DVM to run on.
|
||||
Specifying hosts outside the DVM will result in an error.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
@ -247,7 +246,7 @@ Map processes by socket (deprecated in favor of --map-by socket)
|
||||
.TP
|
||||
.B -nolocal\fR,\fP --nolocal
|
||||
Do not run any copies of the launched application on the same node as
|
||||
orterun is running. This option will override listing the localhost
|
||||
orte-submit is running. This option will override listing the localhost
|
||||
with \fB--host\fR or any other host-specifying mechanism.
|
||||
.
|
||||
.TP
|
||||
@ -386,7 +385,7 @@ Each xterm window will subsequently need to be manually closed.
|
||||
In some environments, xterm may require that the executable be in the user's
|
||||
path, or be specified in absolute or relative terms. Thus, it may be necessary
|
||||
to specify a local executable as "./foo" instead of just "foo". If xterm fails to
|
||||
find the executable, mpirun will hang, but still respond correctly to a ctrl-c.
|
||||
find the executable, ompi-submit will hang, but still respond correctly to a ctrl-c.
|
||||
If this happens, please check that the executable is being specified correctly
|
||||
and try again.
|
||||
.
|
||||
@ -430,11 +429,6 @@ directory. By default, the absolute and relative paths provided by --preload-fil
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B --tmpdir \fR<dir>\fP
|
||||
Set the root for the session directory tree for mpirun only.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -wd \fR<dir>\fP
|
||||
Synonym for \fI-wdir\fP.
|
||||
.
|
||||
@ -458,7 +452,7 @@ executing the program. Only one environment variable can be specified
|
||||
per \fI-x\fP option. Existing environment variables can be specified
|
||||
or new variable names specified with corresponding values. For
|
||||
example:
|
||||
\fB%\fP mpirun -x DISPLAY -x OFILE=/tmp/out ...
|
||||
\fB%\fP ompi-submit -x DISPLAY -x OFILE=/tmp/out ...
|
||||
|
||||
The parser for the \fI-x\fP option is not very sophisticated; it does
|
||||
not even understand quoted values. Users are advised to set variables
|
||||
@ -515,9 +509,9 @@ There are also other options:
|
||||
.TP
|
||||
.B --allow-run-as-root
|
||||
Allow
|
||||
.I mpirun
|
||||
.I ompi-submit
|
||||
to run when executed by the root user
|
||||
.RI ( mpirun
|
||||
.RI ( ompi-submit
|
||||
defaults to aborting when launched as the root user).
|
||||
.
|
||||
.
|
||||
@ -542,54 +536,16 @@ Indicates that multiple app_contexts are being provided that are a mix of 32/64-
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -leave-session-attached\fR,\fP --leave-session-attached
|
||||
Do not detach OmpiRTE daemons used by this application. This allows error messages from the daemons
|
||||
as well as the underlying environment (e.g., when failing to launch a daemon) to be output.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -ompi-server\fR,\fP --ompi-server <uri or file>
|
||||
Specify the URI of the Open MPI server (or the mpirun to be used as the server)
|
||||
Specify the URI of the Open MPI server (or the ompi-submit to be used as the server)
|
||||
, the name
|
||||
of the file (specified as file:filename) that
|
||||
contains that info, or the PID (specified as pid:#) of the mpirun to be used as
|
||||
contains that info, or the PID (specified as pid:#) of the ompi-submit to be used as
|
||||
the server.
|
||||
The Open MPI server is used to support multi-application data exchange via
|
||||
the MPI-2 MPI_Publish_name and MPI_Lookup_name functions.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -report-pid\fR,\fP --report-pid <channel>
|
||||
Print out mpirun's PID during startup. The channel must be either a '-' to indi
|
||||
cate that
|
||||
the pid is to be output to stdout, a '+' to indicate that the pid is to be outp
|
||||
ut to stderr,
|
||||
or a filename to which the pid is to be written.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -report-uri\fR,\fP --report-uri <channel>
|
||||
Print out mpirun's URI during startup. The channel must be either a '-' to indi
|
||||
cate that
|
||||
the URI is to be output to stdout, a '+' to indicate that the URI is to be outp
|
||||
ut to stderr,
|
||||
or a filename to which the URI is to be written.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -wait-for-server\fR,\fP --wait-for-server
|
||||
Pause mpirun before launching the job until ompi-server is detected. This
|
||||
is useful in scripts where ompi-server may be started in the background, followed immediately by
|
||||
an \fImpirun\fP command that wishes to connect to it. Mpirun will pause until either the specified
|
||||
ompi-server is contacted or the server-wait-time is exceeded.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -server-wait-time\fR,\fP --server-wait-time <secs>
|
||||
The max amount of time (in seconds) mpirun should wait for the ompi-server to start. The default
|
||||
is 10 seconds.
|
||||
.
|
||||
.
|
||||
.
|
||||
.
|
||||
.P
|
||||
@ -602,33 +558,9 @@ Enable debugging of the OmpiRTE (the run-time layer in Open MPI).
|
||||
This is not generally useful for most users.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B --debug-daemons
|
||||
Enable debugging of any OmpiRTE daemons used by this application.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B --debug-daemons-file
|
||||
Enable debugging of any OmpiRTE daemons used by this application, storing
|
||||
output in files.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -launch-agent\fR,\fP --launch-agent
|
||||
Name of the executable that is to be used to start processes on the remote nodes. The default
|
||||
is "orted". This option can be used to test new daemon concepts, or to pass options back to the
|
||||
daemons without having mpirun itself see them. For example, specifying a launch agent of
|
||||
\fRorted -mca odls_base_verbose 5\fR allows the developer to ask the orted for debugging output
|
||||
without clutter from mpirun itself.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B --noprefix
|
||||
Disable the automatic --prefix behavior
|
||||
.
|
||||
.
|
||||
.P
|
||||
There may be other options listed with \fImpirun --help\fP.
|
||||
There may be other options listed with \fIompi-submit --help\fP.
|
||||
.
|
||||
.
|
||||
.SS Environment Variables
|
||||
@ -636,10 +568,10 @@ There may be other options listed with \fImpirun --help\fP.
|
||||
.TP
|
||||
.B MPIEXEC_TIMEOUT
|
||||
The maximum number of seconds that
|
||||
.I mpirun
|
||||
.I ompi-submit
|
||||
.RI ( mpiexec )
|
||||
will run. After this many seconds,
|
||||
.I mpirun
|
||||
.I ompi-submit
|
||||
will abort the launched job and exit.
|
||||
.
|
||||
.
|
||||
@ -648,9 +580,9 @@ will abort the launched job and exit.
|
||||
.\" **************************
|
||||
.SH DESCRIPTION
|
||||
.
|
||||
One invocation of \fImpirun\fP starts an MPI application running under Open
|
||||
One invocation of \fIompi-submit\fP starts an MPI application running under Open
|
||||
MPI. If the application is single process multiple data (SPMD), the application
|
||||
can be specified on the \fImpirun\fP command line.
|
||||
can be specified on the \fIompi-submit\fP command line.
|
||||
|
||||
If the application is multiple instruction multiple data (MIMD), comprising of
|
||||
multiple programs, the set of programs and argument can be specified in one of
|
||||
@ -659,7 +591,7 @@ two ways: Extended Command Line Arguments, and Application Context.
|
||||
An application context describes the MIMD program set including all arguments
|
||||
in a separate file.
|
||||
.\" See appcontext(5) for a description of the application context syntax.
|
||||
This file essentially contains multiple \fImpirun\fP command lines, less the
|
||||
This file essentially contains multiple \fIompi-submit\fP command lines, less the
|
||||
command name itself. The ability to specify different options for different
|
||||
instantiations of a program is another reason to use an application context.
|
||||
.PP
|
||||
@ -673,14 +605,14 @@ programs (e.g. --hostfile), while others are specific to a single program
|
||||
.
|
||||
.SS Specifying Host Nodes
|
||||
.
|
||||
Host nodes can be identified on the \fImpirun\fP command line with the \fI-host\fP
|
||||
Host nodes can be identified on the \fIompi-submit\fP command line with the \fI-host\fP
|
||||
option or in a hostfile.
|
||||
.
|
||||
.PP
|
||||
For example,
|
||||
.
|
||||
.TP 4
|
||||
mpirun -H aa,aa,bb ./a.out
|
||||
ompi-submit -H aa,aa,bb ./a.out
|
||||
launches two processes on node aa and one on bb.
|
||||
.
|
||||
.PP
|
||||
@ -694,27 +626,22 @@ Or, consider the hostfile
|
||||
|
||||
.
|
||||
.PP
|
||||
Here, we list both the host names (aa, bb, and cc) but also how many "slots"
|
||||
there are for each. Slots indicate how many processes can potentially execute
|
||||
on a node. For best performance, the number of slots may be chosen to be the
|
||||
number of cores on the node or the number of processor sockets. If the hostfile
|
||||
does not provide slots information, a default of 1 is assumed.
|
||||
When running under resource managers (e.g., SLURM, Torque, etc.),
|
||||
Open MPI will obtain both the hostnames and the number of slots directly
|
||||
from the resource manger.
|
||||
Since the DVM was started with \fIorte-dvm\fP, \fIorte-submit\fP
|
||||
will ignore any slots arguments in the hostfile. Values provided
|
||||
via hostfile to \fIorte-dvm\fP will control the behavior.
|
||||
.
|
||||
.PP
|
||||
.
|
||||
.TP 4
|
||||
mpirun -hostfile myhostfile ./a.out
|
||||
ompi-submit -hostfile myhostfile ./a.out
|
||||
will launch two processes on each of the three nodes.
|
||||
.
|
||||
.TP 4
|
||||
mpirun -hostfile myhostfile -host aa ./a.out
|
||||
ompi-submit -hostfile myhostfile -host aa ./a.out
|
||||
will launch two processes, both on node aa.
|
||||
.
|
||||
.TP 4
|
||||
mpirun -hostfile myhostfile -host dd ./a.out
|
||||
ompi-submit -hostfile myhostfile -host dd ./a.out
|
||||
will find no hosts to run on and abort with an error.
|
||||
That is, the specified host dd is not in the specified hostfile.
|
||||
.
|
||||
@ -728,22 +655,22 @@ The number of processes launched can be specified as a multiple of the
|
||||
number of nodes or processor sockets available. For example,
|
||||
.
|
||||
.TP 4
|
||||
mpirun -H aa,bb -npersocket 2 ./a.out
|
||||
ompi-submit -H aa,bb -npersocket 2 ./a.out
|
||||
launches processes 0-3 on node aa and process 4-7 on node bb,
|
||||
where aa and bb are both dual-socket nodes.
|
||||
The \fI-npersocket\fP option also turns on the \fI-bind-to-socket\fP option,
|
||||
which is discussed in a later section.
|
||||
.
|
||||
.TP 4
|
||||
mpirun -H aa,bb -npernode 2 ./a.out
|
||||
ompi-submit -H aa,bb -npernode 2 ./a.out
|
||||
launches processes 0-1 on node aa and processes 2-3 on node bb.
|
||||
.
|
||||
.TP 4
|
||||
mpirun -H aa,bb -npernode 1 ./a.out
|
||||
ompi-submit -H aa,bb -npernode 1 ./a.out
|
||||
launches one process per host node.
|
||||
.
|
||||
.TP 4
|
||||
mpirun -H aa,bb -pernode ./a.out
|
||||
ompi-submit -H aa,bb -pernode ./a.out
|
||||
is the same as \fI-npernode\fP 1.
|
||||
.
|
||||
.
|
||||
@ -762,7 +689,7 @@ Another alternative is to specify the number of processes with the
|
||||
Now,
|
||||
.
|
||||
.TP 4
|
||||
mpirun -hostfile myhostfile -np 6 ./a.out
|
||||
ompi-submit -hostfile myhostfile -np 6 ./a.out
|
||||
will launch processes 0-3 on node aa and processes 4-5 on node bb. The remaining
|
||||
slots in the hostfile will not be used since the \fI-np\fP option indicated
|
||||
that only 6 processes should be launched.
|
||||
@ -771,7 +698,7 @@ that only 6 processes should be launched.
|
||||
.
|
||||
The examples above illustrate the default mapping of process processes
|
||||
to nodes. This mapping can also be controlled with various
|
||||
\fImpirun\fP options that describe mapping policies.
|
||||
\fIompi-submit\fP options that describe mapping policies.
|
||||
.
|
||||
.
|
||||
.PP
|
||||
@ -780,11 +707,11 @@ Consider the same hostfile as above, again with \fI-np\fP 6:
|
||||
|
||||
node aa node bb node cc
|
||||
|
||||
mpirun 0 1 2 3 4 5
|
||||
ompi-submit 0 1 2 3 4 5
|
||||
|
||||
mpirun --map-by node 0 3 1 4 2 5
|
||||
ompi-submit --map-by node 0 3 1 4 2 5
|
||||
|
||||
mpirun -nolocal 0 1 2 3 4 5
|
||||
ompi-submit -nolocal 0 1 2 3 4 5
|
||||
.
|
||||
.PP
|
||||
The \fI--map-by node\fP option will load balance the processes across
|
||||
@ -792,9 +719,9 @@ the available nodes, numbering each process in a round-robin fashion.
|
||||
.
|
||||
.PP
|
||||
The \fI-nolocal\fP option prevents any processes from being mapped onto the
|
||||
local host (in this case node aa). While \fImpirun\fP typically consumes
|
||||
local host (in this case node aa). While \fIompi-submit\fP typically consumes
|
||||
few system resources, \fI-nolocal\fP can be helpful for launching very
|
||||
large jobs where \fImpirun\fP may actually need to use noticeable amounts
|
||||
large jobs where \fIompi-submit\fP may actually need to use noticeable amounts
|
||||
of memory and/or processing time.
|
||||
.
|
||||
.PP
|
||||
@ -802,7 +729,7 @@ Just as \fI-np\fP can specify fewer processes than there are slots, it can
|
||||
also oversubscribe the slots. For example, with the same hostfile:
|
||||
.
|
||||
.TP 4
|
||||
mpirun -hostfile myhostfile -np 14 ./a.out
|
||||
ompi-submit -hostfile myhostfile -np 14 ./a.out
|
||||
will launch processes 0-3 on node aa, 4-7 on bb, and 8-11 on cc. It will
|
||||
then add the remaining two processes to whichever nodes it chooses.
|
||||
.
|
||||
@ -811,7 +738,7 @@ One can also specify limits to oversubscription. For example, with the same
|
||||
hostfile:
|
||||
.
|
||||
.TP 4
|
||||
mpirun -hostfile myhostfile -np 14 -nooversubscribe ./a.out
|
||||
ompi-submit -hostfile myhostfile -np 14 -nooversubscribe ./a.out
|
||||
will produce an error since \fI-nooversubscribe\fP prevents oversubscription.
|
||||
.
|
||||
.PP
|
||||
@ -827,7 +754,7 @@ The \fImax_slots\fP field specifies such a limit. When it does, the
|
||||
\fIslots\fP value defaults to the limit. Now:
|
||||
.
|
||||
.TP 4
|
||||
mpirun -hostfile myhostfile -np 14 ./a.out
|
||||
ompi-submit -hostfile myhostfile -np 14 ./a.out
|
||||
causes the first 12 processes to be launched as before, but the remaining
|
||||
two processes will be forced onto node cc. The other two nodes are
|
||||
protected by the hostfile against oversubscription by this job.
|
||||
@ -841,7 +768,7 @@ Of course, \fI-np\fP can also be used with the \fI-H\fP or \fI-host\fP
|
||||
option. For example,
|
||||
.
|
||||
.TP 4
|
||||
mpirun -H aa,bb -np 8 ./a.out
|
||||
ompi-submit -H aa,bb -np 8 ./a.out
|
||||
launches 8 processes. Since only two hosts are specified, after the first
|
||||
two processes are mapped, one to aa and one to bb, the remaining processes
|
||||
oversubscribe the specified hosts.
|
||||
@ -850,7 +777,7 @@ oversubscribe the specified hosts.
|
||||
And here is a MIMD example:
|
||||
.
|
||||
.TP 4
|
||||
mpirun -H aa -np 1 hostname : -H bb,cc -np 2 uptime
|
||||
ompi-submit -H aa -np 1 hostname : -H bb,cc -np 2 uptime
|
||||
will launch process 0 running \fIhostname\fP on node aa and processes 1 and 2
|
||||
each running \fIuptime\fP on nodes bb and cc, respectively.
|
||||
.
|
||||
@ -891,7 +818,7 @@ to nodes in whatever order the hostfile specifies. Use the
|
||||
as before:
|
||||
.
|
||||
.PP
|
||||
mpirun -hostfile myhostfile -mca rmaps seq ./a.out
|
||||
ompi-submit -hostfile myhostfile -mca rmaps seq ./a.out
|
||||
.
|
||||
.PP
|
||||
will launch three processes, one on each of nodes aa, bb, and cc, respectively.
|
||||
@ -967,29 +894,29 @@ Finally, \fI--report-bindings\fP can be used to report bindings.
|
||||
.
|
||||
.PP
|
||||
As an example, consider a node with two processor sockets, each comprising
|
||||
four cores. We run \fImpirun\fP with \fI-np 4 --report-bindings\fP and
|
||||
four cores. We run \fIompi-submit\fP with \fI-np 4 --report-bindings\fP and
|
||||
the following additional options:
|
||||
.
|
||||
|
||||
% mpirun ... --map-by core --bind-to core
|
||||
% ompi-submit ... --map-by core --bind-to core
|
||||
[...] ... binding child [...,0] to cpus 0001
|
||||
[...] ... binding child [...,1] to cpus 0002
|
||||
[...] ... binding child [...,2] to cpus 0004
|
||||
[...] ... binding child [...,3] to cpus 0008
|
||||
|
||||
% mpirun ... --map-by socket -0bind-to socket
|
||||
% ompi-submit ... --map-by socket -0bind-to socket
|
||||
[...] ... binding child [...,0] to socket 0 cpus 000f
|
||||
[...] ... binding child [...,1] to socket 1 cpus 00f0
|
||||
[...] ... binding child [...,2] to socket 0 cpus 000f
|
||||
[...] ... binding child [...,3] to socket 1 cpus 00f0
|
||||
|
||||
% mpirun ... --map-by core:PE=2 -bind-to core
|
||||
% ompi-submit ... --map-by core:PE=2 -bind-to core
|
||||
[...] ... binding child [...,0] to cpus 0003
|
||||
[...] ... binding child [...,1] to cpus 000c
|
||||
[...] ... binding child [...,2] to cpus 0030
|
||||
[...] ... binding child [...,3] to cpus 00c0
|
||||
|
||||
% mpirun ... --bind-to none
|
||||
% ompi-submit ... --bind-to none
|
||||
.
|
||||
.PP
|
||||
Here, \fI--report-bindings\fP shows the binding of each process as a mask.
|
||||
@ -1008,14 +935,14 @@ on every system.
|
||||
.
|
||||
.PP
|
||||
Process binding can also be set with MCA parameters.
|
||||
Their usage is less convenient than that of \fImpirun\fP options.
|
||||
On the other hand, MCA parameters can be set not only on the \fImpirun\fP
|
||||
Their usage is less convenient than that of \fIompi-submit\fP options.
|
||||
On the other hand, MCA parameters can be set not only on the \fIompi-submit\fP
|
||||
command line, but alternatively in a system or user mca-params.conf file
|
||||
or as environment variables, as described in the MCA section below.
|
||||
Some examples include:
|
||||
.
|
||||
.PP
|
||||
mpirun option MCA parameter key value
|
||||
ompi-submit option MCA parameter key value
|
||||
|
||||
--map-by core rmaps_base_mapping_policy core
|
||||
--map-by socket rmaps_base_mapping_policy socket
|
||||
@ -1045,7 +972,7 @@ For example:
|
||||
rank 0=aa slot=1:0-2
|
||||
rank 1=bb slot=0:0,1
|
||||
rank 2=cc slot=1-2
|
||||
$ mpirun -H aa,bb,cc,dd -rf myrankfile ./a.out
|
||||
$ ompi-submit -H aa,bb,cc,dd -rf myrankfile ./a.out
|
||||
.
|
||||
.PP
|
||||
Means that
|
||||
@ -1086,7 +1013,7 @@ is to be considered as \fIphysical\fP.
|
||||
The hostnames listed above are "absolute," meaning that actual
|
||||
resolveable hostnames are specified. However, hostnames can also be
|
||||
specified as "relative," meaning that they are specified in relation
|
||||
to an externally-specified list of hostnames (e.g., by mpirun's --host
|
||||
to an externally-specified list of hostnames (e.g., by ompi-submit's --host
|
||||
argument, a hostfile, or a job scheduler).
|
||||
.
|
||||
.PP
|
||||
@ -1099,7 +1026,7 @@ hostnames, indexed from 0. For example:
|
||||
rank 0=+n0 slot=1:0-2
|
||||
rank 1=+n1 slot=0:0,1
|
||||
rank 2=+n2 slot=1-2
|
||||
$ mpirun -H aa,bb,cc,dd -rf myrankfile ./a.out
|
||||
$ ompi-submit -H aa,bb,cc,dd -rf myrankfile ./a.out
|
||||
.
|
||||
.PP
|
||||
Starting with Open MPI v1.7, all socket/core slot locations are be
|
||||
@ -1113,7 +1040,7 @@ logical indexes of socket and cores.
|
||||
.
|
||||
.SS Application Context or Executable Program?
|
||||
.
|
||||
To distinguish the two different forms, \fImpirun\fP
|
||||
To distinguish the two different forms, \fIompi-submit\fP
|
||||
looks on the command line for \fI--app\fP option. If
|
||||
it is specified, then the file named on the command line is
|
||||
assumed to be an application context. If it is not
|
||||
@ -1134,13 +1061,13 @@ If a relative directory is specified, it must be relative to the initial
|
||||
working directory determined by the specific starter used. For example when
|
||||
using the rsh or ssh starters, the initial directory is $HOME by default. Other
|
||||
starters may set the initial directory to the current working directory from
|
||||
the invocation of \fImpirun\fP.
|
||||
the invocation of \fIompi-submit\fP.
|
||||
.
|
||||
.
|
||||
.
|
||||
.SS Current Working Directory
|
||||
.
|
||||
The \fI\-wdir\fP mpirun option (and its synonym, \fI\-wd\fP) allows
|
||||
The \fI\-wdir\fP ompi-submit option (and its synonym, \fI\-wd\fP) allows
|
||||
the user to change to an arbitrary directory before the program is
|
||||
invoked. It can also be used in application context files to specify
|
||||
working directories on specific nodes and/or for specific
|
||||
@ -1152,10 +1079,10 @@ line value.
|
||||
.PP
|
||||
If the \fI-wdir\fP option is specified, Open MPI will attempt to
|
||||
change to the specified directory on all of the remote nodes. If this
|
||||
fails, \fImpirun\fP will abort.
|
||||
fails, \fIompi-submit\fP will abort.
|
||||
.PP
|
||||
If the \fI-wdir\fP option is \fBnot\fP specified, Open MPI will send
|
||||
the directory name where \fImpirun\fP was invoked to each of the
|
||||
the directory name where \fIompi-submit\fP was invoked to each of the
|
||||
remote nodes. The remote nodes will try to change to that
|
||||
directory. If they are unable (e.g., if the directory does not exist on
|
||||
that node), then Open MPI will use the default directory determined by
|
||||
@ -1170,22 +1097,22 @@ does not wait until \fIMPI_INIT\fP is called.
|
||||
.
|
||||
Open MPI directs UNIX standard input to /dev/null on all processes
|
||||
except the MPI_COMM_WORLD rank 0 process. The MPI_COMM_WORLD rank 0 process
|
||||
inherits standard input from \fImpirun\fP.
|
||||
inherits standard input from \fIompi-submit\fP.
|
||||
.B Note:
|
||||
The node that invoked \fImpirun\fP need not be the same as the node where the
|
||||
The node that invoked \fIompi-submit\fP need not be the same as the node where the
|
||||
MPI_COMM_WORLD rank 0 process resides. Open MPI handles the redirection of
|
||||
\fImpirun\fP's standard input to the rank 0 process.
|
||||
\fIompi-submit\fP's standard input to the rank 0 process.
|
||||
.PP
|
||||
Open MPI directs UNIX standard output and error from remote nodes to the node
|
||||
that invoked \fImpirun\fP and prints it on the standard output/error of
|
||||
\fImpirun\fP.
|
||||
Local processes inherit the standard output/error of \fImpirun\fP and transfer
|
||||
that invoked \fIompi-submit\fP and prints it on the standard output/error of
|
||||
\fIompi-submit\fP.
|
||||
Local processes inherit the standard output/error of \fIompi-submit\fP and transfer
|
||||
to it directly.
|
||||
.PP
|
||||
Thus it is possible to redirect standard I/O for Open MPI applications by
|
||||
using the typical shell redirection procedure on \fImpirun\fP.
|
||||
using the typical shell redirection procedure on \fIompi-submit\fP.
|
||||
|
||||
\fB%\fP mpirun -np 2 my_app < my_input > my_output
|
||||
\fB%\fP ompi-submit -np 2 my_app < my_input > my_output
|
||||
|
||||
Note that in this example \fIonly\fP the MPI_COMM_WORLD rank 0 process will
|
||||
receive the stream from \fImy_input\fP on stdin. The stdin on all the other
|
||||
@ -1196,32 +1123,32 @@ be collected into the \fImy_output\fP file.
|
||||
.
|
||||
.SS Signal Propagation
|
||||
.
|
||||
When orterun receives a SIGTERM and SIGINT, it will attempt to kill
|
||||
When orte-submit receives a SIGTERM and SIGINT, it will attempt to kill
|
||||
the entire job by sending all processes in the job a SIGTERM, waiting
|
||||
a small number of seconds, then sending all processes in the job a
|
||||
SIGKILL.
|
||||
.
|
||||
.PP
|
||||
SIGUSR1 and SIGUSR2 signals received by orterun are propagated to
|
||||
SIGUSR1 and SIGUSR2 signals received by orte-submit are propagated to
|
||||
all processes in the job.
|
||||
.
|
||||
.PP
|
||||
One can turn on forwarding of SIGSTOP and SIGCONT to the program executed
|
||||
by mpirun by setting the MCA parameter orte_forward_job_control to 1.
|
||||
A SIGTSTOP signal to mpirun will then cause a SIGSTOP signal to be sent
|
||||
to all of the programs started by mpirun and likewise a SIGCONT signal
|
||||
to mpirun will cause a SIGCONT sent.
|
||||
by ompi-submit by setting the MCA parameter orte_forward_job_control to 1.
|
||||
A SIGTSTOP signal to ompi-submit will then cause a SIGSTOP signal to be sent
|
||||
to all of the programs started by ompi-submit and likewise a SIGCONT signal
|
||||
to ompi-submit will cause a SIGCONT sent.
|
||||
.
|
||||
.PP
|
||||
Other signals are not currently propagated
|
||||
by orterun.
|
||||
by orte-submit.
|
||||
.
|
||||
.
|
||||
.SS Process Termination / Signal Handling
|
||||
.
|
||||
During the run of an MPI application, if any process dies abnormally
|
||||
(either exiting before invoking \fIMPI_FINALIZE\fP, or dying as the result of a
|
||||
signal), \fImpirun\fP will print out an error message and kill the rest of the
|
||||
signal), \fIompi-submit\fP will print out an error message and kill the rest of the
|
||||
MPI application.
|
||||
.PP
|
||||
User signal handlers should probably avoid trying to cleanup MPI state
|
||||
@ -1232,7 +1159,7 @@ and thread safety). For example, if a segmentation fault occurs in
|
||||
\fIMPI_SEND\fP (perhaps because a bad buffer was passed in) and a user
|
||||
signal handler is invoked, if this user handler attempts to invoke
|
||||
\fIMPI_FINALIZE\fP, Bad Things could happen since Open MPI was already
|
||||
"in" MPI when the error occurred. Since \fImpirun\fP will notice that
|
||||
"in" MPI when the error occurred. Since \fIompi-submit\fP will notice that
|
||||
the process died due to a signal, it is probably not necessary (and
|
||||
safest) for the user to only clean up non-MPI state.
|
||||
.
|
||||
@ -1296,7 +1223,7 @@ Local libdir:
|
||||
.PP
|
||||
If the following command line is used:
|
||||
|
||||
\fB%\fP mpirun --prefix /remote/node/directory
|
||||
\fB%\fP ompi-submit --prefix /remote/node/directory
|
||||
|
||||
Open MPI will add "/remote/node/directory/bin" to the \fIPATH\fR
|
||||
and "/remote/node/directory/lib64" to the \fLD_LIBRARY_PATH\fR on the
|
||||
@ -1308,15 +1235,15 @@ is used on the local node, but "/lib64" is used on the remote node),
|
||||
or if the installation paths are something other than a subdirectory
|
||||
under a common prefix.
|
||||
.PP
|
||||
Note that executing \fImpirun\fR via an absolute pathname is
|
||||
Note that executing \fIompi-submit\fR via an absolute pathname is
|
||||
equivalent to specifying \fI--prefix\fR without the last subdirectory
|
||||
in the absolute pathname to \fImpirun\fR. For example:
|
||||
in the absolute pathname to \fIompi-submit\fR. For example:
|
||||
|
||||
\fB%\fP /usr/local/bin/mpirun ...
|
||||
\fB%\fP /usr/local/bin/ompi-submit ...
|
||||
|
||||
is equivalent to
|
||||
|
||||
\fB%\fP mpirun --prefix /usr/local
|
||||
\fB%\fP ompi-submit --prefix /usr/local
|
||||
.
|
||||
.
|
||||
.
|
||||
@ -1325,7 +1252,7 @@ is equivalent to
|
||||
All environment variables that are named in the form OMPI_* will automatically
|
||||
be exported to new processes on the local and remote nodes. Environmental
|
||||
parameters can also be set/forwarded to the new processes using the MCA
|
||||
parameter \fImca_base_env_list\fP. The \fI\-x\fP option to \fImpirun\fP has
|
||||
parameter \fImca_base_env_list\fP. The \fI\-x\fP option to \fIompi-submit\fP has
|
||||
been deprecated, but the syntax of the MCA param follows that prior
|
||||
example. While the syntax of the \fI\-x\fP option and MCA param
|
||||
allows the definition of new variables, note that the parser
|
||||
@ -1352,12 +1279,12 @@ passed.
|
||||
For example:
|
||||
.
|
||||
.TP 4
|
||||
mpirun -mca btl tcp,self -np 1 foo
|
||||
ompi-submit -mca btl tcp,self -np 1 foo
|
||||
Tells Open MPI to use the "tcp" and "self" BTLs, and to run a single copy of
|
||||
"foo" an allocated node.
|
||||
.
|
||||
.TP
|
||||
mpirun -mca btl self -np 1 foo
|
||||
ompi-submit -mca btl self -np 1 foo
|
||||
Tells Open MPI to use the "self" BTL, and to run a single copy of "foo" an
|
||||
allocated node.
|
||||
.\" And so on. Open MPI's BTL MCA modules are described in ompimca_btl(7).
|
||||
@ -1369,7 +1296,7 @@ specified more than once, the \fI<value>\fPs are concatenated with a comma
|
||||
.PP
|
||||
Note that the \fI-mca\fP switch is simply a shortcut for setting environment variables.
|
||||
The same effect may be accomplished by setting corresponding environment
|
||||
variables before running \fImpirun\fP.
|
||||
variables before running \fIompi-submit\fP.
|
||||
The form of the environment variables that Open MPI sets is:
|
||||
|
||||
OMPI_MCA_<key>=<value>
|
||||
@ -1382,7 +1309,7 @@ file.
|
||||
.
|
||||
.PP
|
||||
Unknown \fI<key>\fP arguments are still set as
|
||||
environment variable -- they are not checked (by \fImpirun\fP) for correctness.
|
||||
environment variable -- they are not checked (by \fIompi-submit\fP) for correctness.
|
||||
Illegal or incorrect \fI<value>\fP arguments may or may not be reported -- it
|
||||
depends on the specific MCA module.
|
||||
.PP
|
||||
@ -1393,24 +1320,24 @@ See the \fIompi_info(1)\fP man page for detailed information on the command.
|
||||
.SS Running as root
|
||||
.
|
||||
The Open MPI team strongly advises against executing
|
||||
.I mpirun
|
||||
.I ompi-submit
|
||||
as the root user. MPI applications should be run as regular
|
||||
(non-root) users.
|
||||
.
|
||||
.PP
|
||||
Reflecting this advice, mpirun will refuse to run as root by default.
|
||||
Reflecting this advice, ompi-submit will refuse to run as root by default.
|
||||
To override this default, you can add the
|
||||
.I --allow-run-as-root
|
||||
option to the
|
||||
.I mpirun
|
||||
.I ompi-submit
|
||||
command line.
|
||||
.
|
||||
.SS Exit status
|
||||
.
|
||||
There is no standard definition for what \fImpirun\fP should return as an exit
|
||||
There is no standard definition for what \fIompi-submit\fP should return as an exit
|
||||
status. After considerable discussion, we settled on the following method for
|
||||
assigning the \fImpirun\fP exit status (note: in the following description,
|
||||
the "primary" job is the initial application started by mpirun - all jobs that
|
||||
assigning the \fIompi-submit\fP exit status (note: in the following description,
|
||||
the "primary" job is the initial application started by ompi-submit - all jobs that
|
||||
are spawned by that job are designated "secondary" jobs):
|
||||
.
|
||||
.IP \[bu] 2
|
||||
@ -1456,13 +1383,13 @@ with the first process to so exit identified along with its exit status.
|
||||
Be sure also to see the examples throughout the sections above.
|
||||
.
|
||||
.TP 4
|
||||
mpirun -np 4 -mca btl ib,tcp,self prog1
|
||||
ompi-submit -np 4 -mca btl ib,tcp,self prog1
|
||||
Run 4 copies of prog1 using the "ib", "tcp", and "self" BTL's for the
|
||||
transport of MPI messages.
|
||||
.
|
||||
.
|
||||
.TP 4
|
||||
mpirun -np 4 -mca btl tcp,sm,self
|
||||
ompi-submit -np 4 -mca btl tcp,sm,self
|
||||
.br
|
||||
--mca btl_tcp_if_include eth0 prog1
|
||||
.br
|
||||
@ -1486,12 +1413,12 @@ parameters.
|
||||
.
|
||||
.SH RETURN VALUE
|
||||
.
|
||||
\fImpirun\fP returns 0 if all processes started by \fImpirun\fP exit after calling
|
||||
\fIompi-submit\fP returns 0 if all processes started by \fIompi-submit\fP exit after calling
|
||||
MPI_FINALIZE. A non-zero value is returned if an internal error occurred in
|
||||
mpirun, or one or more processes exited before calling MPI_FINALIZE. If an
|
||||
internal error occurred in mpirun, the corresponding error code is returned.
|
||||
ompi-submit, or one or more processes exited before calling MPI_FINALIZE. If an
|
||||
internal error occurred in ompi-submit, the corresponding error code is returned.
|
||||
In the event that one or more processes exit before calling MPI_FINALIZE, the
|
||||
return value of the MPI_COMM_WORLD rank of the process that \fImpirun\fP first notices died
|
||||
return value of the MPI_COMM_WORLD rank of the process that \fIompi-submit\fP first notices died
|
||||
before calling MPI_FINALIZE will be returned. Note that, in general, this will
|
||||
be the first process that died but is not guaranteed to be so.
|
||||
.
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user