Merge pull request #1727 from jsquyres/pr/mpirun-timeout-and-friends
mpirun.1in: add descriptions of new options
Этот коммит содержится в:
Коммит
873cebb4c0
@ -158,6 +158,7 @@ _mpirun() {
|
||||
'(-do-not-launch --do-not-launch)'{-do-not-launch,--do-not-launch}'[Perform all necessary operations to prepare to launch the application, but do not actually launch it]' \
|
||||
'(-do-not-resolve --do-not-resolve)'{-do-not-resolve,--do-not-resolve}'[Do not attempt to resolve interfaces]' \
|
||||
'(-enable-recovery --enable-recovery)'{-enable-recovery,--enable-recovery}'[Enable recovery from process failure (default: disabled)]' \
|
||||
'(-get-stack-traces --get-stack-traces)'{-get-stack-traces,--get-stack-traces}'[Upon timeout, obtain stack traces from all still-alive MPI processes (default: disabled)]' \
|
||||
'*'{-gmca,--gmca}'[Pass global MCA parameters that are applicable to all contexts (arg0 is the parameter name; arg1 is the parameter value)]:mca variable name:->mca_variable_name:mca variable value:->mca_variable_value' \
|
||||
'(- *)'{-h,--help}'[Help message]' \
|
||||
'*'{-H,-host,--host}'[List of hosts to invoke processes on]:hostnames:' \
|
||||
@ -192,6 +193,7 @@ _mpirun() {
|
||||
'(-report-child-jobs-separately --report-child-jobs-separately)'{-report-child-jobs-separately,--report-child-jobs-separately}'[Return the exit status of the primary job only]' \
|
||||
'(-report-events --report-events)'{-report-events,--report-events}'[Report events to a tool listening at the specified URI]:URI:' \
|
||||
'(-report-pid --report-pid)'{-report-pid,--report-pid}'[Printout pid on stdout (-), stderr (+), or a file (anything else)]:report file:_report_file' \
|
||||
'(-report-state-upon-timeout --report-state-upon-timeout)'{-report-state-upon-timeout,--report-state-upon-timeout}'[Upon timeout, print run-time status of each process]' \
|
||||
'(-report-uri --report-uri)'{-report-uri,--report-uri}'[Printout URI on stdout (-), stderr (+), or a file (anything else)]:report file:_report_file' \
|
||||
'(-rf --rankfile)'{-rf,--rankfile}'[Provide a rankfile file]:rank file:_files' \
|
||||
'(-s --preload-binary)'{-s,--preload-binary}'[Preload the binary on the remote machine before starting the remote process.]' \
|
||||
@ -202,6 +204,7 @@ _mpirun() {
|
||||
'(-staged --staged)'{-staged,--staged}'[Used staged execution if inadequate resources are present (cannot support MPI jobs)]' \
|
||||
'(-stdin --stdin)'{-stdin,--stdin}'[Specify procs to receive stdin \[rank, all, none\] (default: 0, indicating rank 0)]:rank list:' \
|
||||
'(-tag-output --tag-output)'{-tag-output,--tag-output}'[Tag all output with \[job,rank\]]' \
|
||||
'(-timeout --timeout)'{-timeout,--timeout}'[Timeout, in seconds, for the entire job]' \
|
||||
'(-timestamp-output --timestamp-output)'{-timestamp-output,--timestamp-output}'[Timestamp all application process output]' \
|
||||
'(-use-hwthread-cpus --use-hwthread-cpus)'{-use-hwthread-cpus,--use-hwthread-cpus}'[Use hardware threads as independent cpus]' \
|
||||
'(-use-regexp --use-regexp)'{-use-regexp,--use-regexp}'[Use regular expressions for launch]' \
|
||||
|
@ -642,14 +642,13 @@ number of processes to run:
|
||||
Please correct this value and try again.
|
||||
#
|
||||
[orterun:timeout]
|
||||
The user-provided time limit for job execution has been
|
||||
reached:
|
||||
The user-provided time limit for job execution has been reached:
|
||||
|
||||
MPIEXEC_TIMEOUT: %d seconds
|
||||
Timeout: %d seconds
|
||||
|
||||
The job will now be aborted. Please check your code and/or
|
||||
adjust/remove the job execution time limit (as specified by
|
||||
MPIEXEC_TIMEOUT in your environment or --timeout on the command line).
|
||||
adjust/remove the job execution time limit (as specified by --timeout
|
||||
command line option or MPIEXEC_TIMEOUT environment variable).
|
||||
#
|
||||
[orterun:conflict-env-set]
|
||||
ERROR: You have attempted to pass environment variables to Open MPI
|
||||
|
@ -1,5 +1,5 @@
|
||||
.\" -*- nroff -*-
|
||||
.\" Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved.
|
||||
.\" Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
|
||||
.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
|
||||
.\" $COPYRIGHT$
|
||||
.\"
|
||||
@ -529,12 +529,41 @@ MCA parameter.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B --get-stack-traces
|
||||
When paired with the
|
||||
.B --timeout
|
||||
option,
|
||||
.I mpirun
|
||||
will obtain and print out stack traces from all launched processes
|
||||
that are still alive when the timeout expires. Note that obtaining
|
||||
stack traces can take a little time and produce a lot of output,
|
||||
especially for large process-count jobs.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -debugger\fR,\fP --debugger
|
||||
Sequence of debuggers to search for when \fI--debug\fP is used (i.e.
|
||||
a synonym for \fIorte_base_user_debugger\fP MCA parameter).
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B --timeout \fR<seconds>
|
||||
The maximum number of seconds that
|
||||
.I mpirun
|
||||
(also known as
|
||||
.I mpiexec\fR,\fI oshrun\fR,\fI orterun\fR,\fI
|
||||
etc.)
|
||||
will run. After this many seconds,
|
||||
.I mpirun
|
||||
will abort the launched job and exit with a non-zero exit status.
|
||||
Using
|
||||
.B --timeout
|
||||
can be also useful when combined with the
|
||||
.B --get-stack-traces
|
||||
option.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -tv\fR,\fP --tv
|
||||
Launch processes under the TotalView debugger.
|
||||
Deprecated backwards compatibility flag. Synonym for \fI--debug\fP.
|
||||
@ -661,6 +690,14 @@ without clutter from mpirun itself.
|
||||
Disable the automatic --prefix behavior
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B --report-state-on-timeout
|
||||
When paired with the
|
||||
.B --timeout
|
||||
command line option, report the run-time subsystem state of each
|
||||
process when the timeout expires.
|
||||
.
|
||||
.
|
||||
.P
|
||||
There may be other options listed with \fImpirun --help\fP.
|
||||
.
|
||||
@ -669,12 +706,9 @@ There may be other options listed with \fImpirun --help\fP.
|
||||
.
|
||||
.TP
|
||||
.B MPIEXEC_TIMEOUT
|
||||
The maximum number of seconds that
|
||||
.I mpirun
|
||||
.RI ( mpiexec )
|
||||
will run. After this many seconds,
|
||||
.I mpirun
|
||||
will abort the launched job and exit.
|
||||
Synonym for the
|
||||
.B --timeout
|
||||
command line option.
|
||||
.
|
||||
.
|
||||
.\" **************************
|
||||
@ -1541,6 +1575,19 @@ In the event that one or more processes exit before calling MPI_FINALIZE, the
|
||||
return value of the MPI_COMM_WORLD rank of the process that \fImpirun\fP first notices died
|
||||
before calling MPI_FINALIZE will be returned. Note that, in general, this will
|
||||
be the first process that died but is not guaranteed to be so.
|
||||
.
|
||||
.PP
|
||||
If the
|
||||
.B --timeout
|
||||
command line option is used and the timeout expires before the job
|
||||
completes (thereby forcing
|
||||
.I mpirun
|
||||
to kill the job)
|
||||
.I mpirun
|
||||
will return an exit status equivalent to the value of
|
||||
.B ETIMEDOUT
|
||||
(which is typically 110 on Linux and OS X systems).
|
||||
|
||||
.
|
||||
.\" **************************
|
||||
.\" See Also Section
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user