Merge pull request #1727 from jsquyres/pr/mpirun-timeout-and-friends
mpirun.1in: add descriptions of new options
Этот коммит содержится в:
Коммит
873cebb4c0
@ -158,6 +158,7 @@ _mpirun() {
|
|||||||
'(-do-not-launch --do-not-launch)'{-do-not-launch,--do-not-launch}'[Perform all necessary operations to prepare to launch the application, but do not actually launch it]' \
|
'(-do-not-launch --do-not-launch)'{-do-not-launch,--do-not-launch}'[Perform all necessary operations to prepare to launch the application, but do not actually launch it]' \
|
||||||
'(-do-not-resolve --do-not-resolve)'{-do-not-resolve,--do-not-resolve}'[Do not attempt to resolve interfaces]' \
|
'(-do-not-resolve --do-not-resolve)'{-do-not-resolve,--do-not-resolve}'[Do not attempt to resolve interfaces]' \
|
||||||
'(-enable-recovery --enable-recovery)'{-enable-recovery,--enable-recovery}'[Enable recovery from process failure (default: disabled)]' \
|
'(-enable-recovery --enable-recovery)'{-enable-recovery,--enable-recovery}'[Enable recovery from process failure (default: disabled)]' \
|
||||||
|
'(-get-stack-traces --get-stack-traces)'{-get-stack-traces,--get-stack-traces}'[Upon timeout, obtain stack traces from all still-alive MPI processes (default: disabled)]' \
|
||||||
'*'{-gmca,--gmca}'[Pass global MCA parameters that are applicable to all contexts (arg0 is the parameter name; arg1 is the parameter value)]:mca variable name:->mca_variable_name:mca variable value:->mca_variable_value' \
|
'*'{-gmca,--gmca}'[Pass global MCA parameters that are applicable to all contexts (arg0 is the parameter name; arg1 is the parameter value)]:mca variable name:->mca_variable_name:mca variable value:->mca_variable_value' \
|
||||||
'(- *)'{-h,--help}'[Help message]' \
|
'(- *)'{-h,--help}'[Help message]' \
|
||||||
'*'{-H,-host,--host}'[List of hosts to invoke processes on]:hostnames:' \
|
'*'{-H,-host,--host}'[List of hosts to invoke processes on]:hostnames:' \
|
||||||
@ -192,6 +193,7 @@ _mpirun() {
|
|||||||
'(-report-child-jobs-separately --report-child-jobs-separately)'{-report-child-jobs-separately,--report-child-jobs-separately}'[Return the exit status of the primary job only]' \
|
'(-report-child-jobs-separately --report-child-jobs-separately)'{-report-child-jobs-separately,--report-child-jobs-separately}'[Return the exit status of the primary job only]' \
|
||||||
'(-report-events --report-events)'{-report-events,--report-events}'[Report events to a tool listening at the specified URI]:URI:' \
|
'(-report-events --report-events)'{-report-events,--report-events}'[Report events to a tool listening at the specified URI]:URI:' \
|
||||||
'(-report-pid --report-pid)'{-report-pid,--report-pid}'[Printout pid on stdout (-), stderr (+), or a file (anything else)]:report file:_report_file' \
|
'(-report-pid --report-pid)'{-report-pid,--report-pid}'[Printout pid on stdout (-), stderr (+), or a file (anything else)]:report file:_report_file' \
|
||||||
|
'(-report-state-upon-timeout --report-state-upon-timeout)'{-report-state-upon-timeout,--report-state-upon-timeout}'[Upon timeout, print run-time status of each process]' \
|
||||||
'(-report-uri --report-uri)'{-report-uri,--report-uri}'[Printout URI on stdout (-), stderr (+), or a file (anything else)]:report file:_report_file' \
|
'(-report-uri --report-uri)'{-report-uri,--report-uri}'[Printout URI on stdout (-), stderr (+), or a file (anything else)]:report file:_report_file' \
|
||||||
'(-rf --rankfile)'{-rf,--rankfile}'[Provide a rankfile file]:rank file:_files' \
|
'(-rf --rankfile)'{-rf,--rankfile}'[Provide a rankfile file]:rank file:_files' \
|
||||||
'(-s --preload-binary)'{-s,--preload-binary}'[Preload the binary on the remote machine before starting the remote process.]' \
|
'(-s --preload-binary)'{-s,--preload-binary}'[Preload the binary on the remote machine before starting the remote process.]' \
|
||||||
@ -202,6 +204,7 @@ _mpirun() {
|
|||||||
'(-staged --staged)'{-staged,--staged}'[Used staged execution if inadequate resources are present (cannot support MPI jobs)]' \
|
'(-staged --staged)'{-staged,--staged}'[Used staged execution if inadequate resources are present (cannot support MPI jobs)]' \
|
||||||
'(-stdin --stdin)'{-stdin,--stdin}'[Specify procs to receive stdin \[rank, all, none\] (default: 0, indicating rank 0)]:rank list:' \
|
'(-stdin --stdin)'{-stdin,--stdin}'[Specify procs to receive stdin \[rank, all, none\] (default: 0, indicating rank 0)]:rank list:' \
|
||||||
'(-tag-output --tag-output)'{-tag-output,--tag-output}'[Tag all output with \[job,rank\]]' \
|
'(-tag-output --tag-output)'{-tag-output,--tag-output}'[Tag all output with \[job,rank\]]' \
|
||||||
|
'(-timeout --timeout)'{-timeout,--timeout}'[Timeout, in seconds, for the entire job]' \
|
||||||
'(-timestamp-output --timestamp-output)'{-timestamp-output,--timestamp-output}'[Timestamp all application process output]' \
|
'(-timestamp-output --timestamp-output)'{-timestamp-output,--timestamp-output}'[Timestamp all application process output]' \
|
||||||
'(-use-hwthread-cpus --use-hwthread-cpus)'{-use-hwthread-cpus,--use-hwthread-cpus}'[Use hardware threads as independent cpus]' \
|
'(-use-hwthread-cpus --use-hwthread-cpus)'{-use-hwthread-cpus,--use-hwthread-cpus}'[Use hardware threads as independent cpus]' \
|
||||||
'(-use-regexp --use-regexp)'{-use-regexp,--use-regexp}'[Use regular expressions for launch]' \
|
'(-use-regexp --use-regexp)'{-use-regexp,--use-regexp}'[Use regular expressions for launch]' \
|
||||||
|
@ -642,14 +642,13 @@ number of processes to run:
|
|||||||
Please correct this value and try again.
|
Please correct this value and try again.
|
||||||
#
|
#
|
||||||
[orterun:timeout]
|
[orterun:timeout]
|
||||||
The user-provided time limit for job execution has been
|
The user-provided time limit for job execution has been reached:
|
||||||
reached:
|
|
||||||
|
|
||||||
MPIEXEC_TIMEOUT: %d seconds
|
Timeout: %d seconds
|
||||||
|
|
||||||
The job will now be aborted. Please check your code and/or
|
The job will now be aborted. Please check your code and/or
|
||||||
adjust/remove the job execution time limit (as specified by
|
adjust/remove the job execution time limit (as specified by --timeout
|
||||||
MPIEXEC_TIMEOUT in your environment or --timeout on the command line).
|
command line option or MPIEXEC_TIMEOUT environment variable).
|
||||||
#
|
#
|
||||||
[orterun:conflict-env-set]
|
[orterun:conflict-env-set]
|
||||||
ERROR: You have attempted to pass environment variables to Open MPI
|
ERROR: You have attempted to pass environment variables to Open MPI
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
.\" -*- nroff -*-
|
.\" -*- nroff -*-
|
||||||
.\" Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved.
|
.\" Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
|
||||||
.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
|
.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
|
||||||
.\" $COPYRIGHT$
|
.\" $COPYRIGHT$
|
||||||
.\"
|
.\"
|
||||||
@ -529,12 +529,41 @@ MCA parameter.
|
|||||||
.
|
.
|
||||||
.
|
.
|
||||||
.TP
|
.TP
|
||||||
|
.B --get-stack-traces
|
||||||
|
When paired with the
|
||||||
|
.B --timeout
|
||||||
|
option,
|
||||||
|
.I mpirun
|
||||||
|
will obtain and print out stack traces from all launched processes
|
||||||
|
that are still alive when the timeout expires. Note that obtaining
|
||||||
|
stack traces can take a little time and produce a lot of output,
|
||||||
|
especially for large process-count jobs.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.TP
|
||||||
.B -debugger\fR,\fP --debugger
|
.B -debugger\fR,\fP --debugger
|
||||||
Sequence of debuggers to search for when \fI--debug\fP is used (i.e.
|
Sequence of debuggers to search for when \fI--debug\fP is used (i.e.
|
||||||
a synonym for \fIorte_base_user_debugger\fP MCA parameter).
|
a synonym for \fIorte_base_user_debugger\fP MCA parameter).
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.TP
|
.TP
|
||||||
|
.B --timeout \fR<seconds>
|
||||||
|
The maximum number of seconds that
|
||||||
|
.I mpirun
|
||||||
|
(also known as
|
||||||
|
.I mpiexec\fR,\fI oshrun\fR,\fI orterun\fR,\fI
|
||||||
|
etc.)
|
||||||
|
will run. After this many seconds,
|
||||||
|
.I mpirun
|
||||||
|
will abort the launched job and exit with a non-zero exit status.
|
||||||
|
Using
|
||||||
|
.B --timeout
|
||||||
|
can be also useful when combined with the
|
||||||
|
.B --get-stack-traces
|
||||||
|
option.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.TP
|
||||||
.B -tv\fR,\fP --tv
|
.B -tv\fR,\fP --tv
|
||||||
Launch processes under the TotalView debugger.
|
Launch processes under the TotalView debugger.
|
||||||
Deprecated backwards compatibility flag. Synonym for \fI--debug\fP.
|
Deprecated backwards compatibility flag. Synonym for \fI--debug\fP.
|
||||||
@ -661,6 +690,14 @@ without clutter from mpirun itself.
|
|||||||
Disable the automatic --prefix behavior
|
Disable the automatic --prefix behavior
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.TP
|
||||||
|
.B --report-state-on-timeout
|
||||||
|
When paired with the
|
||||||
|
.B --timeout
|
||||||
|
command line option, report the run-time subsystem state of each
|
||||||
|
process when the timeout expires.
|
||||||
|
.
|
||||||
|
.
|
||||||
.P
|
.P
|
||||||
There may be other options listed with \fImpirun --help\fP.
|
There may be other options listed with \fImpirun --help\fP.
|
||||||
.
|
.
|
||||||
@ -669,12 +706,9 @@ There may be other options listed with \fImpirun --help\fP.
|
|||||||
.
|
.
|
||||||
.TP
|
.TP
|
||||||
.B MPIEXEC_TIMEOUT
|
.B MPIEXEC_TIMEOUT
|
||||||
The maximum number of seconds that
|
Synonym for the
|
||||||
.I mpirun
|
.B --timeout
|
||||||
.RI ( mpiexec )
|
command line option.
|
||||||
will run. After this many seconds,
|
|
||||||
.I mpirun
|
|
||||||
will abort the launched job and exit.
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.\" **************************
|
.\" **************************
|
||||||
@ -1541,6 +1575,19 @@ In the event that one or more processes exit before calling MPI_FINALIZE, the
|
|||||||
return value of the MPI_COMM_WORLD rank of the process that \fImpirun\fP first notices died
|
return value of the MPI_COMM_WORLD rank of the process that \fImpirun\fP first notices died
|
||||||
before calling MPI_FINALIZE will be returned. Note that, in general, this will
|
before calling MPI_FINALIZE will be returned. Note that, in general, this will
|
||||||
be the first process that died but is not guaranteed to be so.
|
be the first process that died but is not guaranteed to be so.
|
||||||
|
.
|
||||||
|
.PP
|
||||||
|
If the
|
||||||
|
.B --timeout
|
||||||
|
command line option is used and the timeout expires before the job
|
||||||
|
completes (thereby forcing
|
||||||
|
.I mpirun
|
||||||
|
to kill the job)
|
||||||
|
.I mpirun
|
||||||
|
will return an exit status equivalent to the value of
|
||||||
|
.B ETIMEDOUT
|
||||||
|
(which is typically 110 on Linux and OS X systems).
|
||||||
|
|
||||||
.
|
.
|
||||||
.\" **************************
|
.\" **************************
|
||||||
.\" See Also Section
|
.\" See Also Section
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user