diff --git a/configure.ac b/configure.ac index 1a61ea52a2..4a7378e1d8 100644 --- a/configure.ac +++ b/configure.ac @@ -1125,7 +1125,10 @@ AC_CONFIG_FILES([ orte/etc/Makefile orte/tools/console/Makefile + orte/tools/orteboot/Makefile orte/tools/orted/Makefile + orte/tools/ortehalt/Makefile + orte/tools/ortekill/Makefile orte/tools/orteprobe/Makefile orte/tools/orterun/Makefile orte/tools/wrappers/Makefile diff --git a/orte/mca/odls/base/odls_base_open.c b/orte/mca/odls/base/odls_base_open.c index d483391af5..0505bb6418 100644 --- a/orte/mca/odls/base/odls_base_open.c +++ b/orte/mca/odls/base/odls_base_open.c @@ -94,6 +94,10 @@ int orte_odls_base_open(void) orte_odls_globals.output = -1; } + mca_base_param_reg_int_name("odls_base", "sigkill_timeout", + "Time to wait for a process to die after issuing a kill signal to it", + false, false, 1, &orte_odls_globals.timeout_before_sigkill); + /* register the daemon cmd data type */ tmp = ORTE_DAEMON_CMD; if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_odls_pack_daemon_cmd, diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c index f2c8c35217..bb2dd81e1b 100644 --- a/orte/mca/rmaps/base/rmaps_base_map_job.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -52,6 +52,7 @@ int orte_rmaps_base_map_job(orte_jobid_t job, opal_list_t *attributes) opal_list_t working_attrs; opal_list_item_t *item; orte_jobid_t *jptr, parent_job=ORTE_JOBID_INVALID; + orte_job_map_t *map; int rc; /* check the attributes to see if anything in the environment @@ -200,6 +201,13 @@ int orte_rmaps_base_map_job(orte_jobid_t job, opal_list_t *attributes) } } + /* if we wanted to display the map, now is the time to do it */ + if (NULL != orte_rmgr.find_attribute(attributes, ORTE_RMAPS_DISPLAY_AFTER_MAP)) { + orte_rmaps.get_job_map(&map, job); + orte_dss.dump(0, map, ORTE_JOB_MAP); + } + + return ORTE_SUCCESS; } diff --git a/orte/mca/rmgr/proxy/rmgr_proxy.c b/orte/mca/rmgr/proxy/rmgr_proxy.c index 0e1186c828..f3840a1168 100644 --- a/orte/mca/rmgr/proxy/rmgr_proxy.c +++ b/orte/mca/rmgr/proxy/rmgr_proxy.c @@ -416,10 +416,6 @@ static int orte_rmgr_proxy_spawn_job( ORTE_ERROR_LOG(rc); return rc; } - if (NULL != orte_rmgr.find_attribute(attributes, ORTE_RMAPS_DISPLAY_AFTER_MAP)) { - orte_rmaps.get_job_map(&map, *jobid); - orte_dss.dump(0, map, ORTE_JOB_MAP); - } } if (flags & ORTE_RMGR_SETUP_TRIGS) { diff --git a/orte/mca/rmgr/urm/rmgr_urm.c b/orte/mca/rmgr/urm/rmgr_urm.c index 1c0fc59465..bb59c9bde0 100644 --- a/orte/mca/rmgr/urm/rmgr_urm.c +++ b/orte/mca/rmgr/urm/rmgr_urm.c @@ -372,10 +372,6 @@ static int orte_rmgr_urm_spawn_job( ORTE_ERROR_LOG(rc); return rc; } - if (NULL != orte_rmgr.find_attribute(attributes, ORTE_RMAPS_DISPLAY_AFTER_MAP)) { - orte_rmaps.get_job_map(&map, *jobid); - orte_dss.dump(0, map, ORTE_JOB_MAP); - } } if (flags & ORTE_RMGR_SETUP_TRIGS) { diff --git a/orte/tools/Makefile.am b/orte/tools/Makefile.am index fd36d3fdd7..c716d764f8 100644 --- a/orte/tools/Makefile.am +++ b/orte/tools/Makefile.am @@ -22,7 +22,10 @@ SUBDIRS += \ tools/console \ + tools/orteboot \ tools/orted \ + tools/ortehalt \ + tools/ortekill \ tools/orteprobe \ tools/orterun \ tools/wrappers \ @@ -31,7 +34,10 @@ SUBDIRS += \ DIST_SUBDIRS += \ tools/console \ + tools/orteboot \ tools/orted \ + tools/ortehalt \ + tools/ortekill \ tools/orteprobe \ tools/orterun \ tools/wrappers \ diff --git a/orte/tools/orteboot/.ompi_ignore b/orte/tools/orteboot/.ompi_ignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/orte/tools/orteboot/.ompi_unignore b/orte/tools/orteboot/.ompi_unignore new file mode 100644 index 0000000000..97b20ffb20 --- /dev/null +++ b/orte/tools/orteboot/.ompi_unignore @@ -0,0 +1 @@ +rhc diff --git a/orte/tools/orteboot/Makefile.am b/orte/tools/orteboot/Makefile.am new file mode 100644 index 0000000000..d55b1aa1d4 --- /dev/null +++ b/orte/tools/orteboot/Makefile.am @@ -0,0 +1,39 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +libs = \ + $(top_builddir)/orte/liborte.la + +orteboot_SOURCES = \ + orteboot.c + +orteboot_LDADD = $(libs) +orteboot_DEPENDENCIES = $(libs) + +if OMPI_INSTALL_BINARIES + +bin_PROGRAMS = orteboot + +dist_pkgdata_DATA = help-orteboot.txt + +# AM 1.9.6 seems to have a bug in it's dependencies for install-man if +#dist_ and nodist_ are used, so explicitly add to EXTRA_DIST... +man_MANS = orteboot.1 +EXTRA_DIST = orteboot.1 + +endif diff --git a/orte/tools/orteboot/help-orteboot.txt b/orte/tools/orteboot/help-orteboot.txt new file mode 100644 index 0000000000..59d08d3418 --- /dev/null +++ b/orte/tools/orteboot/help-orteboot.txt @@ -0,0 +1,130 @@ +# -*- text -*- +# +# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English general help file for Open RTE's orterun. +# +[orterun:init-failure] +Open RTE was unable to initialize properly. The error occured while +attempting to %s. Returned value %d instead of ORTE_SUCCESS. +[orterun:usage] +%s (%s) %s + +Usage: %s [OPTION]... [PROGRAM]... +Start the given program using Open RTE + +%s + +Report bugs to %s +[orterun:version] +%s (%s) %s + +Report bugs to %s +[orterun:allocate-resources] +%s was unable to allocate enough resources to start your application. +This might be a transient error (too many nodes in the cluster were +unavailable at the time of the request) or a permenant error (you +requsted more nodes than exist in your cluster). + +While probably only useful to Open RTE developers, the error returned +was %d. +[orterun:error-spawning] +%s was unable to start the specified application. An attempt has been +made to clean up all processes that did start. The error returned was +%d. +[orterun:appfile-not-found] +Unable to open the appfile: + + %s + +Double check that this file exists and is readable. +[orterun:executable-not-specified] +No executable was specified on the %s command line. + +Aborting. +[orterun:multi-apps-and-zero-np] +%s found multiple applications specified on the command line, with +at least one that failed to specify the number of processes to execute. +When specifying multiple applications, you must specify how many processes +of each to launch via the -np argument. +[orterun:nothing-to-do] +%s could not find anything to do. + +It is possible that you forgot to specify how many processes to run +via the "-np" argument. +[orterun:call-failed] +%s encountered a %s call failure. This should not happen, and +usually indicates an error within the operating system itself. +Specifically, the following error occurred: + + %s + +The only other available information that may be helpful is the errno +that was returned: %d. +[orterun:environ] +%s was unable to set + %s = %s +in the environment. Returned value %d instead of ORTE_SUCCESS. +[orterun:precondition] +%s was unable to precondition transports +Returned value %d instead of ORTE_SUCCESS. +[orterun:attr-failed] +%s was unable to define an attribute +Returned value %d instead of ORTE_SUCCESS. +[orterun:proc-aborted] +%s noticed that job rank %lu with PID %lu on node %s exited on signal %d. +[orterun:abnormal-exit] +WARNING: %s encountered an abnormal exit. + +This means that %s exited before it received notification that all +started processes had terminated. You should double check and ensure +that there are no runaway processes still executing. +[orterun:empty-prefix] +A prefix was supplied to %s that only contained slashes. + +This is a fatal error; %s will now abort. No processes were launched. +# +[debugger-mca-param-not-found] +Internal error -- the orte_base_debugger MCA parameter was not able to +be found. Please contact the Open RTE developers; this should not +happen. +# +[debugger-orte_base_user_debugger-empty] +The MCA parameter "orte_base_user_debugger" was empty, indicating that +no user-level debuggers have been defined. Please set this MCA +parameter to a value and try again. +# +[debugger-not-found] +A suitable debugger could not be found in your PATH. Check the values +specified in the orte_base_user_debugger MCA parameter for the list of +debuggers that was searched. +# +[debugger-exec-failed] +%s was unable to launch the specified debugger. This is what was +launched: + + %s + +Things to check: + + - Ensure that the debugger is installed properly + - Ensure that the "%s" executable is in your path + - Ensure that any required licenses are available to run the debugger +# +[orterun:daemon-die] +%s was unable to cleanly terminate the daemons for this job. Returned value %d instead of ORTE_SUCCESS. + diff --git a/orte/tools/orteboot/orteboot.1 b/orte/tools/orteboot/orteboot.1 new file mode 100644 index 0000000000..24463ad2e4 --- /dev/null +++ b/orte/tools/orteboot/orteboot.1 @@ -0,0 +1,851 @@ +.\" +.\" Man page for ORTE's orterun command +.\" +.\" .TH name section center-footer left-footer center-header +.TH MPIRUN 1 "March 2006" "Open MPI" "OPEN MPI COMMANDS" +.\" ************************** +.\" Name Section +.\" ************************** +.SH NAME +. +orterun, mpirun, mpiexec \- Execute serial and parallel jobs in Open MPI. + +.B Note: +\fImpirun\fP, \fImpiexec\fP, and \fIorterun\fP are all exact synonyms for each +other. Using any of the names will result in exactly identical behavior. +. +.\" ************************** +.\" Synopsis Section +.\" ************************** +.SH SYNOPSIS +. +.PP +Single Process Multiple Data (SPMD) Model: + +.B mpirun +.R [ options ] +.B +.R [ ] +. + +Multiple Instruction Multiple Data (MIMD) Model: + +.B mpirun +.R [ global_options ] + [ local_options1 ] +.B +.R [ ] : + [ local_options2 ] +.B +.R [ ] : + ... : + [ local_optionsN ] +.B +.R [ ] +.P + +Note that in both models, invoking \fImpirun\fR via an absolute path +name is equivalent to specifying the \fI--prefix\fR option with a +\fI\fR value equivalent to the directory where \fImpirun\fR +resides, minus its last subdirectory. For example: + + \fBshell$\fP /usr/local/bin/mpirun ... + +is equivalent to + + \fBshell$\fP mpirun --prefix /usr/local + +. +.\" ************************** +.\" Quick Summary Section +.\" ************************** +.SH QUICK SUMMARY +. +If you are simply looking for how to run an MPI application, you +probably want to use a command line of the following form: + + \fBshell$\fP mpirun [ -np X ] [ --hostfile ] + +This will run X copies of \fI\fR in your current run-time +environment (if running under a supported resource manager, Open MPI's +\fImpirun\fR will usually automatically use the corresponding resource manager +process starter, as opposed to, for example, \fIrsh\fR or \fIssh\fR, +which require the use of a hostfile, or will default to running all X +copies on the localhost), scheduling (by default) in a round-robin fashion by +CPU slot. See the rest of this page for more details. +. +.\" ************************** +.\" Options Section +.\" ************************** +.SH OPTIONS +. +.I mpirun +will send the name of the directory where it was invoked on the local +node to each of the remote nodes, and attempt to change to that +directory. See the "Current Working Directory" section below for further +details. +.\" +.\" Start options listing +.\" Indent 10 chacters from start of first column to start of second column +.TP 10 +.B +Pass these run-time arguments to every new process. These must always +be the last arguments to \fImpirun\fP. If an app context file is used, +\fI\fP will be ignored. +. +. +.TP +.B +The program executable. This is identified as the first non-recognized argument +to mpirun. +. +. +.TP +.B -aborted\fR,\fP --aborted \fR<#>\fP +Set the maximum number of aborted processes to display. +. +. +.TP +.B --app \fR\fP +Provide an appfile, ignoring all other command line options. +. +. +.TP +.B -bynode\fR,\fP --bynode +Allocate (map) the processes by node in a round-robin scheme. +. +. +.TP +.B -byslot\fR,\fP --byslot +Allocate (map) the processes by slot in a round-robin scheme. This is the +default. +. +. +.TP +.B -c \fR<#>\fP +Synonym for \fI-np\fP. +. +. +.TP +.B -debug\fR,\fP --debug +Invoke the user-level debugger indicated by the \fIorte_base_user_debugger\fP +MCA parameter. +. +. +.TP +.B -debugger\fR,\fP --debugger +Sequence of debuggers to search for when \fI--debug\fP is used (i.e. +a synonym for \fIorte_base_user_debugger\fP MCA parameter). +. +. +.TP +.B -gmca\fR,\fP --gmca \fR \fP +Pass global MCA parameters that are applicable to all contexts. \fI\fP is +the parameter name; \fI\fP is the parameter value. +. +. +.TP +.B -h\fR,\fP --help +Display help for this command +. +. +.TP +.B -H \fR\fP +Synonym for \fI-host\fP. +. +. +.TP +.B -host\fR,\fP --host \fR\fP +List of hosts on which to invoke processes. +. +. +.TP +.B -hostfile\fR,\fP --hostfile \fR\fP +Provide a hostfile to use. +.\" JJH - Should have man page for how to format a hostfile properly. +. +. +.TP +.B -machinefile\fR,\fP --machinefile \fR\fP +Synonym for \fI-hostfile\fP. +. +. +.TP +.B -mca\fR,\fP --mca +Send arguments to various MCA modules. See the "MCA" section, below. +. +. +.TP +.B -n\fR,\fP --n \fR<#>\fP +Synonym for \fI-np\fP. +. +. +.TP +.B -nolocal\fR,\fP --nolocal +Do not run any copies of the launched application on the same node as +orterun is running. This option will override listing the localhost +with \fB--host\fR or any other host-specifying mechanism. +. +. +.TP +.B -nooversubscribe\fR,\fP --nooversubscribe +Do not oversubscribe any nodes; error (without starting any processes) +if the requested number of processes would cause oversubscription. +This option implicitly sets "max_slots" equal to the "slots" value for +each node. +. +. +.TP +.B -np \fR<#>\fP +Run this many copies of the program on the given nodes. This option +indicates that the specified file is an executable program and not an +application context. If no value is provided for the number of copies to +execute (i.e., neither the "-np" nor its synonyms are provided on the command +line), Open MPI will automatically execute a copy of the program on +each process slot (see below for description of a "process slot"). This +feature, however, can only be used in the SPMD model and will return an +error (without beginning execution of the application) otherwise. +. +. +.TP +.B -nw\fR,\fP --nw +Launch the processes and do not wait for their completion. mpirun will +complete as soon as successful launch occurs. +. +. +.TP +.B -path\fR,\fP --path \fR\fP + that will be used when attempting to locate requested executables. +. +. +.TP +.B --prefix \fR\fP +Prefix directory that will be used to set the \fIPATH\fR and +\fILD_LIBRARY_PATH\fR on the remote node before invoking Open MPI or +the target process. See the "Remote Execution" section, below. +. +. +.TP +.B -q\fR,\fP --quiet +Suppress informative messages from orterun during application execution. +. +. +.TP +.B --tmpdir \fR\fP +Set the root for the session directory tree for mpirun only. +. +. +.TP +.B -tv\fR,\fP --tv +Launch processes under the TotalView debugger. +Deprecated backwards compatibility flag. Synonym for \fI--debug\fP. +. +. +.TP +.B --universe \fR\fP +For this application, set the universe name as: + username@hostname:universe_name +. +. +.TP +.B -v\fR,\fP --verbose +Be verbose +.TP +.B -V\fR,\fP --version +Print version number. If no other arguments are given, this will also +cause orterun to exit. +. +. +.TP +.B -wd \fR\fP +Change to the directory before the user's program executes. +See the "Current Working Directory" section for notes on relative paths. +.B Note: +If the \fI-wd\fP option appears both on the command line and in an +application context, the context will take precedence over the command line. +. +. +.TP +.B -x \fR\fP +Export the specified environment variables to the remote nodes before +executing the program. Existing environment variables can be +specified (see the Examples section, below), or new variable names +specified with corresponding values. The parser for the \fI-x\fP +option is not very sophisticated; it does not even understand quoted +values. Users are advised to set variables in the environment, and +then use \fI-x\fP to export (not define) them. +. +. +.P +The following options are useful for developers; they are not generally +useful to most ORTE and/or MPI users: +. +.TP +.B -d\fR,\fP --debug-devel +Enable debugging of the OpenRTE (the run-time layer in Open MPI). +This is not generally useful for most users. +. +. +.TP +.B --debug-daemons +Enable debugging of any OpenRTE daemons used by this application. +. +. +.TP +.B --debug-daemons-file +Enable debugging of any OpenRTE daemons used by this application, storing +output in files. +. +. +.TP +.B --no-daemonize +Do not detach OpenRTE daemons used by this application. +. +. +.\" ************************** +.\" Description Section +.\" ************************** +.SH DESCRIPTION +. +One invocation of \fImpirun\fP starts an MPI application running under Open +MPI. If the application is single process multiple data (SPMD), the application +can be specified on the \fImpirun\fP command line. + +If the application is multiple instruction multiple data (MIMD), comprising of +multiple programs, the set of programs and argument can be specified in one of +two ways: Extended Command Line Arguments, and Application Context. +.PP +An application context describes the MIMD program set including all arguments +in a separate file. +.\"See appcontext(5) for a description of the application context syntax. +This file essentially contains multiple \fImpirun\fP command lines, less the +command name itself. The ability to specify different options for different +instantiations of a program is another reason to use an application context. +.PP +Extended command line arguments allow for the description of the application +layout on the command line using colons (\fI:\fP) to separate the specification +of programs and arguments. Some options are globally set across all specified +programs (e.g. --hostfile), while others are specific to a single program +(e.g. -np). +. +. +. +.SS Process Slots +. +Open MPI uses "slots" to represent a potential location for a process. +Hence, a node with 2 slots means that 2 processes can be launched on +that node. For performance, the community typically equates a "slot" +with a physical CPU, thus ensuring that any process assigned to that +slot has a dedicated processor. This is not, however, a requirement for +the operation of Open MPI. +.PP +Slots can be specified in hostfiles after the hostname. For example: +. +.TP 4 +host1.example.com slots=4 +Indicates that there are 4 process slots on host1. +. +.PP +If no slots value is specified, then Open MPI will automatically assign +a default value of "slots=1" to that host. +. +.PP +When running under resource managers (e.g., SLURM, Torque, etc.), Open +MPI will obtain both the hostnames and the number of slots directly +from the resource manger. For example, if running under a SLURM job, +Open MPI will automatically receive the hosts that SLURM has allocated +to the job as well as how many slots on each node that SLURM says +are usable - in most high-performance environments, the slots will +equate to the number of processors on the node. +. +.PP +When deciding where to launch processes, Open MPI will first fill up +all available slots before oversubscribing (see "Location +Nomenclature", below, for more details on the scheduling algorithms +available). Unless told otherwise, Open MPI will arbitrarily +oversubscribe nodes. For example, if the only node available is the +localhost, Open MPI will run as many processes as specified by the +-n (or one of its variants) command line option on the +localhost (although they may run quite slowly, since they'll all be +competing for CPU and other resources). +. +.PP +Limits can be placed on oversubscription with the "max_slots" +attribute in the hostfile. For example: +. +.TP 4 +host2.example.com slots=4 max_slots=6 +Indicates that there are 4 process slots on host2. Further, Open MPI +is limited to launching a maximum of 6 processes on host2. +. +.TP +host3.example.com slots=2 max_slots=2 +Indicates that there are 2 process slots on host3 and that no +oversubscription is allowed (similar to the \fI--nooversubscribe\fR +option). +. +.TP +host4.example.com max_slots=2 +Shorthand; same as listing "slots=2 max_slots=2". +. +. +.PP +Note that Open MPI's support for resource managers does not currently +set the "max_slots" values for hosts. If you wish to prevent +oversubscription in such scenarios, use the \fI--nooversubscribe\fR +option. +. +.PP +In scenarios where the user wishes to launch an application across +all available slots by not providing a "-n" option on the mpirun +command line, Open MPI will launch a process on each process slot +for each host within the provided environment. For example, if a +hostfile has been provided, then Open MPI will spawn processes +on each identified host up to the "slots=x" limit if oversubscription +is not allowed. If oversubscription is allowed (the default), then +Open MPI will spawn processes on each host up to the "max_slots=y" limit +if that value is provided. In all cases, the "-bynode" and "-byslot" +mapping directives will be enforced to ensure proper placement of +process ranks. +. +. +. +.SS Location Nomenclature +. +As described above, \fImpirun\fP can specify arbitrary locations in +the current Open MPI universe. Locations can be specified either by +CPU or by node. + +.B Note: +This nomenclature does not force Open MPI to bind processes to CPUs -- +specifying a location "by CPU" is really a convenience mechanism for +SMPs that ultimately maps down to a specific node. +.PP +Specifying locations by node will launch one copy of an executable per +specified node. +Using the \fI--bynode\fP option tells Open MPI to use all available nodes. +Using the \fI--byslot\fP option tells Open MPI to use all slots on an available +node before allocating resources on the next available node. +For example: +. +.TP 4 +mpirun --bynode -np 4 a.out +Runs one copy of the the executable +.I a.out +on all available nodes in the Open MPI universe. MPI_COMM_WORLD rank 0 +will be on node0, rank 1 will be on node1, etc. Regardless of how many slots +are available on each of the nodes. +. +. +.TP +mpirun --byslot -np 4 a.out +Runs one copy of the the executable +.I a.out +on each slot on a given node before running the executable on other available +nodes. +. +. +. +.SS Specifying Hosts +. +Hosts can be specified in a number of ways. The most common of which is in a +'hostfile' or 'machinefile'. If our hostfile contain the following information: +. +. + + \fBshell$\fP cat my-hostfile + node00 slots=2 + node01 slots=2 + node02 slots=2 + +. +. +.TP +mpirun --hostfile my-hostfile -np 3 a.out +This will run one copy of the executable +.I a.out +on hosts node00,node01, and node02. +. +. +.PP +Another method for specifying hosts is directly on the command line. Here can +can include and exclude hosts from the set of hosts to run on. For example: +. +. +.TP +mpirun -np 3 --host a a.out +Runs three copies of the executable +.I a.out +on host a. +. +. +.TP +mpirun -np 3 --host a,b,c a.out +Runs one copy of the executable +.I a.out +on hosts a, b, and c. +. +. +.TP +mpirun -np 3 --hostfile my-hostfile --host node00 a.out +Runs three copies of the executable +.I a.out +on host node00. +. +. +.TP +mpirun -np 3 --hostfile my-hostfile --host node10 a.out +This will prompt an error since node10 is not in my-hostfile; mpirun will +abort. +. +. +.TP +shell$ mpirun -np 1 --host a hostname : -np 2 --host b,c uptime +Runs one copy of the executable +.I hostname +on host a. And runs one copy of the executable +.I uptime +on hosts b and c. +. +. +. +.SS No Local Launch +. +Using the \fB--nolocal\fR option to orterun tells the system to not +launch any of the application processes on the same node that orterun +is running. While orterun typically blocks and consumes few system +resources, this option can be helpful for launching very large jobs +where orterun may actually need to use noticable amounts of memory +and/or processing time. \fB--nolocal\fR allows orteun to run without +sharing the local node with the launched applications, and likewise +allows the launched applications to run unhindered by orterun's system +usage. +.PP +Note that \fB--nolocal\fR will override any other specification to +launch the application on the local node. It will disqualify the +localhost from being capable of running any processes in the +application. +. +. +.TP +shell$ mpirun -np 1 --host localhost --nolocal hostname +This example will result in an error because orterun will not find +anywhere to launch the application. +. +. +. +.SS No Oversubscription +. +Using the \fI--nooversubscribe\fR option causes Open MPI to implicitly +set the "max_slots" value to be the same as the "slots" value for each +node. This can be especially helpful when running jobs under a +resource manager because Open MPI currently only sets the "slots" +value for each node that it obtains from the resource manager. +. +. +. +.SS Application Context or Executable Program? +. +To distinguish the two different forms, \fImpirun\fP +looks on the command line for \fI--app\fP option. If +it is specified, then the file named on the command line is +assumed to be an application context. If it is not +specified, then the file is assumed to be an executable program. +. +. +. +.SS Locating Files +. +If \fIno\fP relative or absolute path is specified for a file, Open MPI +will look for files by searching the directories in the user's PATH environment +variable as defined on the source node(s). +.PP +If a relative directory is specified, it must be relative to the initial +working directory determined by the specific starter used. For example when +using the rsh or ssh starters, the initial directory is $HOME by default. Other +starters may set the initial directory to the current working directory from +the invocation of \fImpirun\fP. +. +. +. +.SS Current Working Directory +. +The \fI\-wd\fP mpirun option allows the user to change to an arbitrary +directory before their program is invoked. It can also be used in application +context files to specify working directories on specific nodes and/or +for specific applications. +.PP +If the \fI\-wd\fP option appears both in a context file and on the command line, +the context file directory will override the command line value. +.PP +If the \fI-wd\fP option is specified, Open MPI will attempt to change to the +specified directory on all of the remote nodes. If this fails, \fImpirun\fP +will abort. +.PP +If the \fI-wd\fP option is \fBnot\fP specified, Open MPI will send the +directory name where \fImpirun\fP was invoked to each of the remote nodes. The +remote nodes will try to change to that directory. If they are unable (e.g., if +the directory does not exit on that node), then Open MPI will use the default +directory determined by the starter. +.PP +All directory changing occurs before the user's program is invoked; it +does not wait until \fIMPI_INIT\fP is called. +. +. +. +.SS Standard I/O +. +Open MPI directs UNIX standard input to /dev/null on all processes +except the MPI_COMM_WORLD rank 0 process. The MPI_COMM_WORLD rank 0 process +inherits standard input from \fImpirun\fP. +.B Note: +The node that invoked \fImpirun\fP need not be the same as the node where the +MPI_COMM_WORLD rank 0 process resides. Open MPI handles the redirection of +\fImpirun\fP's standard input to the rank 0 process. +.PP +Open MPI directs UNIX standard output and error from remote nodes to the node +that invoked \fImpirun\fP and prints it on the standard output/error of +\fImpirun\fP. +Local processes inherit the standard output/error of \fImpirun\fP and transfer +to it directly. +.PP +Thus it is possible to redirect standard I/O for Open MPI applications by +using the typical shell redirection procedure on \fImpirun\fP. + + \fBshell$\fP mpirun -np 2 my_app < my_input > my_output + +Note that in this example \fIonly\fP the MPI_COMM_WORLD rank 0 process will +receive the stream from \fImy_input\fP on stdin. The stdin on all the other +nodes will be tied to /dev/null. However, the stdout from all nodes will +be collected into the \fImy_output\fP file. +. +. +. +.SS Signal Propagation +. +When orterun receives a SIGTERM and SIGINT, it will attempt to kill +the entire job by sending all processes in the job a SIGTERM, waiting +a small number of seconds, then sending all processes in the job a +SIGKILL. +. +SIGUSR1 and SIGUSR2 signals received by orterun are propagated to +all processes in the job. Other signals are not currently propagated +by orterun. +. +. +.SS Process Termination / Signal Handling +. +During the run of an MPI application, if any rank dies abnormally +(either exiting before invoking \fIMPI_FINALIZE\fP, or dying as the result of a +signal), \fImpirun\fP will print out an error message and kill the rest of the +MPI application. +.PP +User signal handlers should probably avoid trying to cleanup MPI state +(Open MPI is, currently, neither thread-safe nor async-signal-safe). +For example, if a segmentation fault occurs in \fIMPI_SEND\fP (perhaps because +a bad buffer was passed in) and a user signal handler is invoked, if this user +handler attempts to invoke \fIMPI_FINALIZE\fP, Bad Things could happen since +Open MPI was already "in" MPI when the error occurred. Since \fImpirun\fP +will notice that the process died due to a signal, it is probably not +necessary (and safest) for the user to only clean up non-MPI state. +. +. +. +.SS Process Environment +. +Processes in the MPI application inherit their environment from the +Open RTE daemon upon the node on which they are running. The +environment is typically inherited from the user's shell. On remote +nodes, the exact environment is determined by the boot MCA module +used. The \fIrsh\fR launch module, for example, uses either +\fIrsh\fR/\fIssh\fR to launch the Open RTE daemon on remote nodes, and +typically executes one or more of the user's shell-setup files before +launching the Open RTE daemon. When running dynamically linked +applications which require the \fILD_LIBRARY_PATH\fR environment +variable to be set, care must be taken to ensure that it is correctly +set when booting Open MPI. +.PP +See the "Remote Execution" section for more details. +. +. +.SS Remote Execution +. +Open MPI requires that the \fIPATH\fR environment variable be set to +find executables on remote nodes (this is typically only necessary in +\fIrsh\fR- or \fIssh\fR-based environments -- batch/scheduled +environments typically copy the current environment to the execution +of remote jobs, so if the current environment has \fIPATH\fR and/or +\fILD_LIBRARY_PATH\fR set properly, the remote nodes will also have it +set properly). If Open MPI was compiled with shared library support, +it may also be necessary to have the \fILD_LIBRARY_PATH\fR environment +variable set on remote nodes as well (especially to find the shared +libraries required to run user MPI applications). +.PP +However, it is not always desirable or possible to edit shell +startup files to set \fIPATH\fR and/or \fILD_LIBRARY_PATH\fR. The +\fI--prefix\fR option is provided for some simple configurations where +this is not possible. +.PP +The \fI--prefix\fR option takes a single argument: the base directory +on the remote node where Open MPI is installed. Open MPI will use +this directory to set the remote \fIPATH\fR and \fILD_LIBRARY_PATH\fR +before executing any Open MPI or user applications. This allows +running Open MPI jobs without having pre-configued the \fIPATH\fR and +\fILD_LIBRARY_PATH\fR on the remote nodes. +.PP +Open MPI adds the basename of the current +node's "bindir" (the directory where Open MPI's executables are +installed) to the prefix and uses that to set the \fIPATH\fR on the +remote node. Similarly, Open MPI adds the basename of the current +node's "libdir" (the directory where Open MPI's libraries are +installed) to the prefix and uses that to set the +\fILD_LIBRARY_PATH\fR on the remote node. For example: +.TP 15 +Local bindir: +/local/node/directory/bin +.TP +Local libdir: +/local/node/directory/lib64 +.PP +If the following command line is used: + + \fBshell$\fP mpirun --prefix /remote/node/directory + +Open MPI will add "/remote/node/directory/bin" to the \fIPATH\fR +and "/remote/node/directory/lib64" to the \fLD_LIBRARY_PATH\fR on the +remote node before attempting to execute anything. +.PP +Note that \fI--prefix\fR can be set on a per-context basis, allowing +for different values for different nodes. +.PP +The \fI--prefix\fR option is not sufficient if the installation paths +on the remote node are different than the local node (e.g., if "/lib" +is used on the local node, but "/lib64" is used on the remote node), +or if the installation paths are something other than a subdirectory +under a common prefix. +.PP +Note that executing \fImpirun\fR via an absolute pathname is +equivalent to specifying \fI--prefix\fR without the last subdirectory +in the absolute pathname to \fImpirun\fR. For example: + + \fBshell$\fP /usr/local/bin/mpirun ... + +is equivalent to + + \fBshell$\fP mpirun --prefix /usr/local +. +. +. +.SS Exported Environment Variables +. +All environment variables that are named in the form OMPI_* will automatically +be exported to new processes on the local and remote nodes. +The \fI\-x\fP option to \fImpirun\fP can be used to export specific environment +variables to the new processes. While the syntax of the \fI\-x\fP +option allows the definition of new variables, note that the parser +for this option is currently not very sophisticated - it does not even +understand quoted values. Users are advised to set variables in the +environment and use \fI\-x\fP to export them; not to define them. +. +. +. +.SS MCA (Modular Component Architecture) +. +The \fI-mca\fP switch allows the passing of parameters to various MCA modules. +.\" Open MPI's MCA modules are described in detail in ompimca(7). +MCA modules have direct impact on MPI programs because they allow tunable +parameters to be set at run time (such as which BTL communication device driver +to use, what parameters to pass to that BTL, etc.). +.PP +The \fI-mca\fP switch takes two arguments: \fI\fP and \fI\fP. +The \fI\fP argument generally specifies which MCA module will receive the value. +For example, the \fI\fP "btl" is used to select which BTL to be used for +transporting MPI messages. The \fI\fP argument is the value that is +passed. +For example: +. +.TP 4 +mpirun -mca btl tcp,self -np 1 foo +Tells Open MPI to use the "tcp" and "self" BTLs, and to run a single copy of +"foo" an allocated node. +. +.TP +mpirun -mca btl self -np 1 foo +Tells Open MPI to use the "self" BTL, and to run a single copy of "foo" an +allocated node. +.\" And so on. Open MPI's BTL MCA modules are described in ompimca_btl(7). +.PP +The \fI-mca\fP switch can be used multiple times to specify different +\fI\fP and/or \fI\fP arguments. If the same \fI\fP is +specified more than once, the \fI\fPs are concatenated with a comma +(",") separating them. +.PP +.B Note: +The \fI-mca\fP switch is simply a shortcut for setting environment variables. +The same effect may be accomplished by setting corresponding environment +variables before running \fImpirun\fP. +The form of the environment variables that Open MPI sets are: + + OMPI_= +.PP +Note that the \fI-mca\fP switch overrides any previously set environment +variables. Also note that unknown \fI\fP arguments are still set as +environment variable -- they are not checked (by \fImpirun\fP) for correctness. +Illegal or incorrect \fI\fP arguments may or may not be reported -- it +depends on the specific MCA module. +. +.\" ************************** +.\" Examples Section +.\" ************************** +.SH EXAMPLES +Be sure to also see the examples in the "Location Nomenclature" section, above. +. +.TP 4 +mpirun -np 1 prog1 +Load and execute prog1 on one node. Search the user's $PATH for the +executable file on each node. +. +. +.TP +mpirun -np 8 --byslot prog1 +Run 8 copies of prog1 wherever Open MPI wants to run them. +. +. +.TP +mpirun -np 4 -mca btl ib,tcp,self prog1 +Run 4 copies of prog1 using the "ib", "tcp", and "self" BTL's for the transport +of MPI messages. +. +.\" ************************** +.\" Diagnostics Section +.\" ************************** +. +.\" .SH DIAGNOSTICS +.\".TP 4 +.\"Error Msg: +.\"Description +. +.\" ************************** +.\" Return Value Section +.\" ************************** +. +.SH RETURN VALUE +. +\fImpirun\fP returns 0 if all ranks started by \fImpirun\fP exit after calling +MPI_FINALIZE. A non-zero value is returned if an internal error occurred in +mpirun, or one or more ranks exited before calling MPI_FINALIZE. If an +internal error occurred in mpirun, the corresponding error code is returned. +In the event that one or more ranks exit before calling MPI_FINALIZE, the +return value of the rank of the process that \fImpirun\fP first notices died +before calling MPI_FINALIZE will be returned. Note that, in general, this will +be the first rank that died but is not guaranteed to be so. +.PP +However, note that if the \fI-nw\fP switch is used, the return value from +mpirun does not indicate the exit status of the ranks. +. +.\" ************************** +.\" See Also Section +.\" ************************** +. +.\" .SH SEE ALSO +.\" orted(1) diff --git a/orte/tools/orteboot/orteboot.c b/orte/tools/orteboot/orteboot.c new file mode 100644 index 0000000000..f4bb442959 --- /dev/null +++ b/orte/tools/orteboot/orteboot.c @@ -0,0 +1,348 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" + +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_PARAM_H +#include +#endif +#include +#include +#include +#ifdef HAVE_SYS_TYPES_H +#include +#endif /* HAVE_SYS_TYPES_H */ +#ifdef HAVE_SYS_WAIT_H +#include +#endif /* HAVE_SYS_WAIT_H */ +#ifdef HAVE_LIBGEN_H +#include +#endif + +#include "opal/event/event.h" +#include "opal/install_dirs.h" +#include "opal/mca/base/base.h" +#include "opal/threads/condition.h" +#include "opal/util/argv.h" +#include "opal/util/basename.h" +#include "opal/util/cmd_line.h" +#include "opal/util/opal_environ.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/util/trace.h" +#include "opal/version.h" + +#include "orte/orte_constants.h" + +#include "orte/class/orte_pointer_array.h" +#include "orte/util/proc_info.h" +#include "orte/util/sys_info.h" +#include "orte/util/universe_setup_file_io.h" +#include "orte/util/pre_condition_transports.h" + +#include "orte/mca/ns/ns.h" +#include "orte/mca/gpr/gpr.h" +#include "orte/mca/pls/pls.h" +#include "orte/mca/rmaps/rmaps_types.h" +#include "orte/mca/rmgr/rmgr.h" +#include "orte/mca/schema/schema.h" +#include "orte/mca/smr/smr.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/runtime/runtime.h" +#include "orte/runtime/orte_wait.h" + +/* + * Globals + */ +static orte_jobid_t jobid = ORTE_JOBID_INVALID; +static char *orteboot_basename = NULL; + +/* + * setup globals for catching orteboot command line options + */ +struct globals_t { + bool help; + bool version; + bool verbose; + bool quiet; + bool exit; + char *hostfile; + char *wdir; + opal_mutex_t lock; + opal_condition_t cond; +} orteboot_globals; + + +opal_cmd_line_init_t cmd_line_init[] = { + /* Various "obvious" options */ + { NULL, NULL, NULL, 'h', NULL, "help", 0, + &orteboot_globals.help, OPAL_CMD_LINE_TYPE_BOOL, + "This help message" }, + { NULL, NULL, NULL, 'V', NULL, "version", 0, + &orteboot_globals.version, OPAL_CMD_LINE_TYPE_BOOL, + "Print version and exit" }, + { NULL, NULL, NULL, 'v', NULL, "verbose", 0, + &orteboot_globals.verbose, OPAL_CMD_LINE_TYPE_BOOL, + "Be verbose" }, + { NULL, NULL, NULL, 'q', NULL, "quiet", 0, + &orteboot_globals.quiet, OPAL_CMD_LINE_TYPE_BOOL, + "Suppress helpful messages" }, + + /* Set a hostfile */ + { "rds", "hostfile", "path", '\0', "hostfile", "hostfile", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Provide a hostfile" }, + { "rds", "hostfile", "path", '\0', "machinefile", "machinefile", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Provide a hostfile" }, + + /* mpiexec-like arguments */ + { NULL, NULL, NULL, '\0', "wdir", "wdir", 1, + &orteboot_globals.wdir, OPAL_CMD_LINE_TYPE_STRING, + "Set the working directory of the started processes" }, + + /* These arguments can be specified multiple times */ + { NULL, NULL, NULL, 'H', "host", "host", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "List of hosts to invoke processes on" }, + + /* OpenRTE arguments */ + { "orte", "debug", NULL, 'd', NULL, "debug-devel", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Enable debugging of OpenRTE" }, + + { "orte", "debug", "daemons", '\0', NULL, "debug-daemons", 0, + NULL, OPAL_CMD_LINE_TYPE_INT, + "Enable debugging of any OpenRTE daemons used by this application" }, + + { "orte", "debug", "daemons_file", '\0', NULL, "debug-daemons-file", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Enable debugging of any OpenRTE daemons used by this application, storing output in files" }, + + { "orte", "no_daemonize", NULL, '\0', NULL, "no-daemonize", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Do not detach OpenRTE daemons used by this application" }, + + { "universe", NULL, NULL, '\0', NULL, "universe", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Set the universe name as username@hostname:universe_name for this application" }, + + { NULL, NULL, NULL, '\0', NULL, "tmpdir", 1, + &orte_process_info.tmpdir_base, OPAL_CMD_LINE_TYPE_STRING, + "Set the root for the session directory tree for orteboot ONLY" }, + + { NULL, NULL, NULL, '\0', NULL, "prefix", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Prefix where Open MPI is installed on remote nodes" }, + { NULL, NULL, NULL, '\0', NULL, "noprefix", 0, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Disable automatic --prefix behavior" }, + + /* End of list */ + { NULL, NULL, NULL, '\0', NULL, NULL, 0, + NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } +}; + +#if !defined(__WINDOWS__) +extern char** environ; +#endif /* !defined(__WINDOWS__) */ +/* + * Local functions + */ + +int main(int argc, char *argv[]) +{ + orte_app_context_t *app; + int rc, ret; + int id, iparam; + opal_list_t attributes; + opal_cmd_line_t cmd_line; + + OBJ_CONSTRUCT(&orteboot_globals.lock, opal_mutex_t); + OBJ_CONSTRUCT(&orteboot_globals.cond, opal_condition_t); + orteboot_globals.hostfile = NULL; + orteboot_globals.wdir = NULL; + orteboot_globals.help = false; + orteboot_globals.version = false; + orteboot_globals.verbose = false; + orteboot_globals.exit = false; + + /* Setup MCA params */ + mca_base_param_init(); + + /* find our basename (the name of the executable) so that we can + * use it in pretty-print error messages + */ + orteboot_basename = opal_basename(argv[0]); + + /* Setup and parse the command line */ + opal_cmd_line_create(&cmd_line, cmd_line_init); + mca_base_cmd_line_setup(&cmd_line); + if (ORTE_SUCCESS != (ret = opal_cmd_line_parse(&cmd_line, true, + argc, argv))) { + char *args = NULL; + args = opal_cmd_line_get_usage_msg(&cmd_line); + opal_show_help("help-orteboot.txt", "orteboot:usage", false, + argv[0], args); + free(args); + return ret; + } + + /* print version if requested. Do this before check for help so + that --version --help works as one might expect. */ + if (orteboot_globals.version && + !(1 == argc || orteboot_globals.help)) { + char *project_name = NULL; + if (0 == strcmp(orteboot_basename, "ompiboot")) { + project_name = "Open MPI"; + } else { + project_name = "OpenRTE"; + } + opal_show_help("help-orteboot.txt", "orteboot:version", false, + orteboot_basename, project_name, OPAL_VERSION, + PACKAGE_BUGREPORT); + /* if we were the only argument, exit */ + if (2 == argc) exit(0); + } + + /* Check for help request */ + if (1 == argc || orteboot_globals.help) { + char *args = NULL; + char *project_name = NULL; + if (0 == strcmp(orteboot_basename, "ompiboot")) { + project_name = "Open MPI"; + } else { + project_name = "OpenRTE"; + } + args = opal_cmd_line_get_usage_msg(&cmd_line); + opal_show_help("help-orteboot.txt", "orteboot:usage", false, + orteboot_basename, project_name, OPAL_VERSION, + orteboot_basename, args, + PACKAGE_BUGREPORT); + free(args); + + /* If someone asks for help, that should be all we do */ + exit(0); + } + + /* check for daemon flags and push them into the environment + * since this isn't being automatically done + */ + id = mca_base_param_reg_int_name("orte_debug", "daemons", + "Whether to debug the ORTE daemons or not", + false, false, (int)false, &iparam); + if (iparam) { + char *tmp = mca_base_param_environ_variable("orte", "debug", "daemons"); + if (ORTE_SUCCESS != (rc = opal_setenv(tmp, "1", true, &environ))) { + opal_show_help("help-orteboot.txt", "orteboot:environ", false, + orteboot_basename, tmp, "1", rc); + free(tmp); + return rc; + } + free(tmp); + } + id = mca_base_param_reg_int_name("orte", "debug", + "Top-level ORTE debug switch", + false, false, 0, &iparam); + if (iparam) { + char *tmp = mca_base_param_environ_variable("orte", NULL, "debug"); + if (ORTE_SUCCESS != (rc = opal_setenv(tmp, "1", true, &environ))) { + opal_show_help("help-orteboot.txt", "orteboot:environ", false, + orteboot_basename, tmp, "1", rc); + free(tmp); + return rc; + } + free(tmp); + } + id = mca_base_param_reg_int_name("orte_debug", "daemons_file", + "Whether want stdout/stderr of daemons to go to a file or not", + false, false, 0, &iparam); + if (iparam) { + char *tmp = mca_base_param_environ_variable("orte", "debug", + "daemons_file"); + if (ORTE_SUCCESS != (rc = opal_setenv(tmp, "1", true, &environ))) { + opal_show_help("help-orteboot.txt", "orteboot:environ", false, + orteboot_basename, tmp, "1", rc); + free(tmp); + return rc; + } + free(tmp); + } + id = mca_base_param_reg_int_name("orte", "no_daemonize", + "Whether to properly daemonize the ORTE daemons or not", + false, false, 0, &iparam); + if (iparam) { + char *tmp = mca_base_param_environ_variable("orte", "no_daemonize", NULL); + if (ORTE_SUCCESS != (rc = opal_setenv(tmp, "1", true, &environ))) { + opal_show_help("help-orteboot.txt", "orteboot:environ", false, + orteboot_basename, tmp, "1", rc); + free(tmp); + return rc; + } + free(tmp); + } + + /* Intialize our Open RTE environment */ + /* Set the flag telling orte_init that I am NOT a + * singleton, but am "infrastructure" - prevents setting + * up incorrect infrastructure that only a singleton would + * require + */ + if (ORTE_SUCCESS != (rc = orte_init(true))) { + opal_show_help("help-orteboot.txt", "orteboot:init-failure", true, + "orte_init()", rc); + return rc; + } + + /* Prep to start the virtual machine */ + /* construct the list of attributes */ + OBJ_CONSTRUCT(&attributes, opal_list_t); + + orte_rmgr.add_attribute(&attributes, ORTE_RMAPS_PERNODE, ORTE_UNDEF, NULL, ORTE_RMGR_ATTR_NO_OVERRIDE); + + /* Create the app - in this case, that's just a no_op to get the daemons launched */ + app = OBJ_NEW(orte_app_context_t); + if (NULL == app) { + opal_show_help("help-orteboot.txt", "orteboot:call-failed", + true, orteboot_basename, "system", "malloc returned NULL", errno); + exit(1); + } + + + /* Spawn the job */ + + rc = orte_rmgr.spawn_job(&app, 1, &jobid, 0, NULL, NULL, 0, &attributes); + if (ORTE_SUCCESS != rc) { + /* JMS show_help */ + opal_output(0, "%s: spawn failed with errno=%d\n", orteboot_basename, rc); + } + OBJ_DESTRUCT(&attributes); + + + orte_finalize(); + free(orteboot_basename); + return rc; +} + diff --git a/orte/tools/ortehalt/.ompi_ignore b/orte/tools/ortehalt/.ompi_ignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/orte/tools/ortehalt/.ompi_unignore b/orte/tools/ortehalt/.ompi_unignore new file mode 100644 index 0000000000..97b20ffb20 --- /dev/null +++ b/orte/tools/ortehalt/.ompi_unignore @@ -0,0 +1 @@ +rhc diff --git a/orte/tools/ortehalt/Makefile.am b/orte/tools/ortehalt/Makefile.am new file mode 100644 index 0000000000..78366c89d3 --- /dev/null +++ b/orte/tools/ortehalt/Makefile.am @@ -0,0 +1,39 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +libs = \ + $(top_builddir)/orte/liborte.la + +ortehalt_SOURCES = \ + ortehalt.c + +ortehalt_LDADD = $(libs) +ortehalt_DEPENDENCIES = $(libs) + +if OMPI_INSTALL_BINARIES + +bin_PROGRAMS = ortehalt + +dist_pkgdata_DATA = help-ortehalt.txt + +# AM 1.9.6 seems to have a bug in it's dependencies for install-man if +#dist_ and nodist_ are used, so explicitly add to EXTRA_DIST... +man_MANS = ortehalt.1 +EXTRA_DIST = ortehalt.1 + +endif diff --git a/orte/tools/ortehalt/help-ortehalt.txt b/orte/tools/ortehalt/help-ortehalt.txt new file mode 100644 index 0000000000..59d08d3418 --- /dev/null +++ b/orte/tools/ortehalt/help-ortehalt.txt @@ -0,0 +1,130 @@ +# -*- text -*- +# +# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English general help file for Open RTE's orterun. +# +[orterun:init-failure] +Open RTE was unable to initialize properly. The error occured while +attempting to %s. Returned value %d instead of ORTE_SUCCESS. +[orterun:usage] +%s (%s) %s + +Usage: %s [OPTION]... [PROGRAM]... +Start the given program using Open RTE + +%s + +Report bugs to %s +[orterun:version] +%s (%s) %s + +Report bugs to %s +[orterun:allocate-resources] +%s was unable to allocate enough resources to start your application. +This might be a transient error (too many nodes in the cluster were +unavailable at the time of the request) or a permenant error (you +requsted more nodes than exist in your cluster). + +While probably only useful to Open RTE developers, the error returned +was %d. +[orterun:error-spawning] +%s was unable to start the specified application. An attempt has been +made to clean up all processes that did start. The error returned was +%d. +[orterun:appfile-not-found] +Unable to open the appfile: + + %s + +Double check that this file exists and is readable. +[orterun:executable-not-specified] +No executable was specified on the %s command line. + +Aborting. +[orterun:multi-apps-and-zero-np] +%s found multiple applications specified on the command line, with +at least one that failed to specify the number of processes to execute. +When specifying multiple applications, you must specify how many processes +of each to launch via the -np argument. +[orterun:nothing-to-do] +%s could not find anything to do. + +It is possible that you forgot to specify how many processes to run +via the "-np" argument. +[orterun:call-failed] +%s encountered a %s call failure. This should not happen, and +usually indicates an error within the operating system itself. +Specifically, the following error occurred: + + %s + +The only other available information that may be helpful is the errno +that was returned: %d. +[orterun:environ] +%s was unable to set + %s = %s +in the environment. Returned value %d instead of ORTE_SUCCESS. +[orterun:precondition] +%s was unable to precondition transports +Returned value %d instead of ORTE_SUCCESS. +[orterun:attr-failed] +%s was unable to define an attribute +Returned value %d instead of ORTE_SUCCESS. +[orterun:proc-aborted] +%s noticed that job rank %lu with PID %lu on node %s exited on signal %d. +[orterun:abnormal-exit] +WARNING: %s encountered an abnormal exit. + +This means that %s exited before it received notification that all +started processes had terminated. You should double check and ensure +that there are no runaway processes still executing. +[orterun:empty-prefix] +A prefix was supplied to %s that only contained slashes. + +This is a fatal error; %s will now abort. No processes were launched. +# +[debugger-mca-param-not-found] +Internal error -- the orte_base_debugger MCA parameter was not able to +be found. Please contact the Open RTE developers; this should not +happen. +# +[debugger-orte_base_user_debugger-empty] +The MCA parameter "orte_base_user_debugger" was empty, indicating that +no user-level debuggers have been defined. Please set this MCA +parameter to a value and try again. +# +[debugger-not-found] +A suitable debugger could not be found in your PATH. Check the values +specified in the orte_base_user_debugger MCA parameter for the list of +debuggers that was searched. +# +[debugger-exec-failed] +%s was unable to launch the specified debugger. This is what was +launched: + + %s + +Things to check: + + - Ensure that the debugger is installed properly + - Ensure that the "%s" executable is in your path + - Ensure that any required licenses are available to run the debugger +# +[orterun:daemon-die] +%s was unable to cleanly terminate the daemons for this job. Returned value %d instead of ORTE_SUCCESS. + diff --git a/orte/tools/ortehalt/ortehalt.1 b/orte/tools/ortehalt/ortehalt.1 new file mode 100644 index 0000000000..24463ad2e4 --- /dev/null +++ b/orte/tools/ortehalt/ortehalt.1 @@ -0,0 +1,851 @@ +.\" +.\" Man page for ORTE's orterun command +.\" +.\" .TH name section center-footer left-footer center-header +.TH MPIRUN 1 "March 2006" "Open MPI" "OPEN MPI COMMANDS" +.\" ************************** +.\" Name Section +.\" ************************** +.SH NAME +. +orterun, mpirun, mpiexec \- Execute serial and parallel jobs in Open MPI. + +.B Note: +\fImpirun\fP, \fImpiexec\fP, and \fIorterun\fP are all exact synonyms for each +other. Using any of the names will result in exactly identical behavior. +. +.\" ************************** +.\" Synopsis Section +.\" ************************** +.SH SYNOPSIS +. +.PP +Single Process Multiple Data (SPMD) Model: + +.B mpirun +.R [ options ] +.B +.R [ ] +. + +Multiple Instruction Multiple Data (MIMD) Model: + +.B mpirun +.R [ global_options ] + [ local_options1 ] +.B +.R [ ] : + [ local_options2 ] +.B +.R [ ] : + ... : + [ local_optionsN ] +.B +.R [ ] +.P + +Note that in both models, invoking \fImpirun\fR via an absolute path +name is equivalent to specifying the \fI--prefix\fR option with a +\fI\fR value equivalent to the directory where \fImpirun\fR +resides, minus its last subdirectory. For example: + + \fBshell$\fP /usr/local/bin/mpirun ... + +is equivalent to + + \fBshell$\fP mpirun --prefix /usr/local + +. +.\" ************************** +.\" Quick Summary Section +.\" ************************** +.SH QUICK SUMMARY +. +If you are simply looking for how to run an MPI application, you +probably want to use a command line of the following form: + + \fBshell$\fP mpirun [ -np X ] [ --hostfile ] + +This will run X copies of \fI\fR in your current run-time +environment (if running under a supported resource manager, Open MPI's +\fImpirun\fR will usually automatically use the corresponding resource manager +process starter, as opposed to, for example, \fIrsh\fR or \fIssh\fR, +which require the use of a hostfile, or will default to running all X +copies on the localhost), scheduling (by default) in a round-robin fashion by +CPU slot. See the rest of this page for more details. +. +.\" ************************** +.\" Options Section +.\" ************************** +.SH OPTIONS +. +.I mpirun +will send the name of the directory where it was invoked on the local +node to each of the remote nodes, and attempt to change to that +directory. See the "Current Working Directory" section below for further +details. +.\" +.\" Start options listing +.\" Indent 10 chacters from start of first column to start of second column +.TP 10 +.B +Pass these run-time arguments to every new process. These must always +be the last arguments to \fImpirun\fP. If an app context file is used, +\fI\fP will be ignored. +. +. +.TP +.B +The program executable. This is identified as the first non-recognized argument +to mpirun. +. +. +.TP +.B -aborted\fR,\fP --aborted \fR<#>\fP +Set the maximum number of aborted processes to display. +. +. +.TP +.B --app \fR\fP +Provide an appfile, ignoring all other command line options. +. +. +.TP +.B -bynode\fR,\fP --bynode +Allocate (map) the processes by node in a round-robin scheme. +. +. +.TP +.B -byslot\fR,\fP --byslot +Allocate (map) the processes by slot in a round-robin scheme. This is the +default. +. +. +.TP +.B -c \fR<#>\fP +Synonym for \fI-np\fP. +. +. +.TP +.B -debug\fR,\fP --debug +Invoke the user-level debugger indicated by the \fIorte_base_user_debugger\fP +MCA parameter. +. +. +.TP +.B -debugger\fR,\fP --debugger +Sequence of debuggers to search for when \fI--debug\fP is used (i.e. +a synonym for \fIorte_base_user_debugger\fP MCA parameter). +. +. +.TP +.B -gmca\fR,\fP --gmca \fR \fP +Pass global MCA parameters that are applicable to all contexts. \fI\fP is +the parameter name; \fI\fP is the parameter value. +. +. +.TP +.B -h\fR,\fP --help +Display help for this command +. +. +.TP +.B -H \fR\fP +Synonym for \fI-host\fP. +. +. +.TP +.B -host\fR,\fP --host \fR\fP +List of hosts on which to invoke processes. +. +. +.TP +.B -hostfile\fR,\fP --hostfile \fR\fP +Provide a hostfile to use. +.\" JJH - Should have man page for how to format a hostfile properly. +. +. +.TP +.B -machinefile\fR,\fP --machinefile \fR\fP +Synonym for \fI-hostfile\fP. +. +. +.TP +.B -mca\fR,\fP --mca +Send arguments to various MCA modules. See the "MCA" section, below. +. +. +.TP +.B -n\fR,\fP --n \fR<#>\fP +Synonym for \fI-np\fP. +. +. +.TP +.B -nolocal\fR,\fP --nolocal +Do not run any copies of the launched application on the same node as +orterun is running. This option will override listing the localhost +with \fB--host\fR or any other host-specifying mechanism. +. +. +.TP +.B -nooversubscribe\fR,\fP --nooversubscribe +Do not oversubscribe any nodes; error (without starting any processes) +if the requested number of processes would cause oversubscription. +This option implicitly sets "max_slots" equal to the "slots" value for +each node. +. +. +.TP +.B -np \fR<#>\fP +Run this many copies of the program on the given nodes. This option +indicates that the specified file is an executable program and not an +application context. If no value is provided for the number of copies to +execute (i.e., neither the "-np" nor its synonyms are provided on the command +line), Open MPI will automatically execute a copy of the program on +each process slot (see below for description of a "process slot"). This +feature, however, can only be used in the SPMD model and will return an +error (without beginning execution of the application) otherwise. +. +. +.TP +.B -nw\fR,\fP --nw +Launch the processes and do not wait for their completion. mpirun will +complete as soon as successful launch occurs. +. +. +.TP +.B -path\fR,\fP --path \fR\fP + that will be used when attempting to locate requested executables. +. +. +.TP +.B --prefix \fR\fP +Prefix directory that will be used to set the \fIPATH\fR and +\fILD_LIBRARY_PATH\fR on the remote node before invoking Open MPI or +the target process. See the "Remote Execution" section, below. +. +. +.TP +.B -q\fR,\fP --quiet +Suppress informative messages from orterun during application execution. +. +. +.TP +.B --tmpdir \fR\fP +Set the root for the session directory tree for mpirun only. +. +. +.TP +.B -tv\fR,\fP --tv +Launch processes under the TotalView debugger. +Deprecated backwards compatibility flag. Synonym for \fI--debug\fP. +. +. +.TP +.B --universe \fR\fP +For this application, set the universe name as: + username@hostname:universe_name +. +. +.TP +.B -v\fR,\fP --verbose +Be verbose +.TP +.B -V\fR,\fP --version +Print version number. If no other arguments are given, this will also +cause orterun to exit. +. +. +.TP +.B -wd \fR\fP +Change to the directory before the user's program executes. +See the "Current Working Directory" section for notes on relative paths. +.B Note: +If the \fI-wd\fP option appears both on the command line and in an +application context, the context will take precedence over the command line. +. +. +.TP +.B -x \fR\fP +Export the specified environment variables to the remote nodes before +executing the program. Existing environment variables can be +specified (see the Examples section, below), or new variable names +specified with corresponding values. The parser for the \fI-x\fP +option is not very sophisticated; it does not even understand quoted +values. Users are advised to set variables in the environment, and +then use \fI-x\fP to export (not define) them. +. +. +.P +The following options are useful for developers; they are not generally +useful to most ORTE and/or MPI users: +. +.TP +.B -d\fR,\fP --debug-devel +Enable debugging of the OpenRTE (the run-time layer in Open MPI). +This is not generally useful for most users. +. +. +.TP +.B --debug-daemons +Enable debugging of any OpenRTE daemons used by this application. +. +. +.TP +.B --debug-daemons-file +Enable debugging of any OpenRTE daemons used by this application, storing +output in files. +. +. +.TP +.B --no-daemonize +Do not detach OpenRTE daemons used by this application. +. +. +.\" ************************** +.\" Description Section +.\" ************************** +.SH DESCRIPTION +. +One invocation of \fImpirun\fP starts an MPI application running under Open +MPI. If the application is single process multiple data (SPMD), the application +can be specified on the \fImpirun\fP command line. + +If the application is multiple instruction multiple data (MIMD), comprising of +multiple programs, the set of programs and argument can be specified in one of +two ways: Extended Command Line Arguments, and Application Context. +.PP +An application context describes the MIMD program set including all arguments +in a separate file. +.\"See appcontext(5) for a description of the application context syntax. +This file essentially contains multiple \fImpirun\fP command lines, less the +command name itself. The ability to specify different options for different +instantiations of a program is another reason to use an application context. +.PP +Extended command line arguments allow for the description of the application +layout on the command line using colons (\fI:\fP) to separate the specification +of programs and arguments. Some options are globally set across all specified +programs (e.g. --hostfile), while others are specific to a single program +(e.g. -np). +. +. +. +.SS Process Slots +. +Open MPI uses "slots" to represent a potential location for a process. +Hence, a node with 2 slots means that 2 processes can be launched on +that node. For performance, the community typically equates a "slot" +with a physical CPU, thus ensuring that any process assigned to that +slot has a dedicated processor. This is not, however, a requirement for +the operation of Open MPI. +.PP +Slots can be specified in hostfiles after the hostname. For example: +. +.TP 4 +host1.example.com slots=4 +Indicates that there are 4 process slots on host1. +. +.PP +If no slots value is specified, then Open MPI will automatically assign +a default value of "slots=1" to that host. +. +.PP +When running under resource managers (e.g., SLURM, Torque, etc.), Open +MPI will obtain both the hostnames and the number of slots directly +from the resource manger. For example, if running under a SLURM job, +Open MPI will automatically receive the hosts that SLURM has allocated +to the job as well as how many slots on each node that SLURM says +are usable - in most high-performance environments, the slots will +equate to the number of processors on the node. +. +.PP +When deciding where to launch processes, Open MPI will first fill up +all available slots before oversubscribing (see "Location +Nomenclature", below, for more details on the scheduling algorithms +available). Unless told otherwise, Open MPI will arbitrarily +oversubscribe nodes. For example, if the only node available is the +localhost, Open MPI will run as many processes as specified by the +-n (or one of its variants) command line option on the +localhost (although they may run quite slowly, since they'll all be +competing for CPU and other resources). +. +.PP +Limits can be placed on oversubscription with the "max_slots" +attribute in the hostfile. For example: +. +.TP 4 +host2.example.com slots=4 max_slots=6 +Indicates that there are 4 process slots on host2. Further, Open MPI +is limited to launching a maximum of 6 processes on host2. +. +.TP +host3.example.com slots=2 max_slots=2 +Indicates that there are 2 process slots on host3 and that no +oversubscription is allowed (similar to the \fI--nooversubscribe\fR +option). +. +.TP +host4.example.com max_slots=2 +Shorthand; same as listing "slots=2 max_slots=2". +. +. +.PP +Note that Open MPI's support for resource managers does not currently +set the "max_slots" values for hosts. If you wish to prevent +oversubscription in such scenarios, use the \fI--nooversubscribe\fR +option. +. +.PP +In scenarios where the user wishes to launch an application across +all available slots by not providing a "-n" option on the mpirun +command line, Open MPI will launch a process on each process slot +for each host within the provided environment. For example, if a +hostfile has been provided, then Open MPI will spawn processes +on each identified host up to the "slots=x" limit if oversubscription +is not allowed. If oversubscription is allowed (the default), then +Open MPI will spawn processes on each host up to the "max_slots=y" limit +if that value is provided. In all cases, the "-bynode" and "-byslot" +mapping directives will be enforced to ensure proper placement of +process ranks. +. +. +. +.SS Location Nomenclature +. +As described above, \fImpirun\fP can specify arbitrary locations in +the current Open MPI universe. Locations can be specified either by +CPU or by node. + +.B Note: +This nomenclature does not force Open MPI to bind processes to CPUs -- +specifying a location "by CPU" is really a convenience mechanism for +SMPs that ultimately maps down to a specific node. +.PP +Specifying locations by node will launch one copy of an executable per +specified node. +Using the \fI--bynode\fP option tells Open MPI to use all available nodes. +Using the \fI--byslot\fP option tells Open MPI to use all slots on an available +node before allocating resources on the next available node. +For example: +. +.TP 4 +mpirun --bynode -np 4 a.out +Runs one copy of the the executable +.I a.out +on all available nodes in the Open MPI universe. MPI_COMM_WORLD rank 0 +will be on node0, rank 1 will be on node1, etc. Regardless of how many slots +are available on each of the nodes. +. +. +.TP +mpirun --byslot -np 4 a.out +Runs one copy of the the executable +.I a.out +on each slot on a given node before running the executable on other available +nodes. +. +. +. +.SS Specifying Hosts +. +Hosts can be specified in a number of ways. The most common of which is in a +'hostfile' or 'machinefile'. If our hostfile contain the following information: +. +. + + \fBshell$\fP cat my-hostfile + node00 slots=2 + node01 slots=2 + node02 slots=2 + +. +. +.TP +mpirun --hostfile my-hostfile -np 3 a.out +This will run one copy of the executable +.I a.out +on hosts node00,node01, and node02. +. +. +.PP +Another method for specifying hosts is directly on the command line. Here can +can include and exclude hosts from the set of hosts to run on. For example: +. +. +.TP +mpirun -np 3 --host a a.out +Runs three copies of the executable +.I a.out +on host a. +. +. +.TP +mpirun -np 3 --host a,b,c a.out +Runs one copy of the executable +.I a.out +on hosts a, b, and c. +. +. +.TP +mpirun -np 3 --hostfile my-hostfile --host node00 a.out +Runs three copies of the executable +.I a.out +on host node00. +. +. +.TP +mpirun -np 3 --hostfile my-hostfile --host node10 a.out +This will prompt an error since node10 is not in my-hostfile; mpirun will +abort. +. +. +.TP +shell$ mpirun -np 1 --host a hostname : -np 2 --host b,c uptime +Runs one copy of the executable +.I hostname +on host a. And runs one copy of the executable +.I uptime +on hosts b and c. +. +. +. +.SS No Local Launch +. +Using the \fB--nolocal\fR option to orterun tells the system to not +launch any of the application processes on the same node that orterun +is running. While orterun typically blocks and consumes few system +resources, this option can be helpful for launching very large jobs +where orterun may actually need to use noticable amounts of memory +and/or processing time. \fB--nolocal\fR allows orteun to run without +sharing the local node with the launched applications, and likewise +allows the launched applications to run unhindered by orterun's system +usage. +.PP +Note that \fB--nolocal\fR will override any other specification to +launch the application on the local node. It will disqualify the +localhost from being capable of running any processes in the +application. +. +. +.TP +shell$ mpirun -np 1 --host localhost --nolocal hostname +This example will result in an error because orterun will not find +anywhere to launch the application. +. +. +. +.SS No Oversubscription +. +Using the \fI--nooversubscribe\fR option causes Open MPI to implicitly +set the "max_slots" value to be the same as the "slots" value for each +node. This can be especially helpful when running jobs under a +resource manager because Open MPI currently only sets the "slots" +value for each node that it obtains from the resource manager. +. +. +. +.SS Application Context or Executable Program? +. +To distinguish the two different forms, \fImpirun\fP +looks on the command line for \fI--app\fP option. If +it is specified, then the file named on the command line is +assumed to be an application context. If it is not +specified, then the file is assumed to be an executable program. +. +. +. +.SS Locating Files +. +If \fIno\fP relative or absolute path is specified for a file, Open MPI +will look for files by searching the directories in the user's PATH environment +variable as defined on the source node(s). +.PP +If a relative directory is specified, it must be relative to the initial +working directory determined by the specific starter used. For example when +using the rsh or ssh starters, the initial directory is $HOME by default. Other +starters may set the initial directory to the current working directory from +the invocation of \fImpirun\fP. +. +. +. +.SS Current Working Directory +. +The \fI\-wd\fP mpirun option allows the user to change to an arbitrary +directory before their program is invoked. It can also be used in application +context files to specify working directories on specific nodes and/or +for specific applications. +.PP +If the \fI\-wd\fP option appears both in a context file and on the command line, +the context file directory will override the command line value. +.PP +If the \fI-wd\fP option is specified, Open MPI will attempt to change to the +specified directory on all of the remote nodes. If this fails, \fImpirun\fP +will abort. +.PP +If the \fI-wd\fP option is \fBnot\fP specified, Open MPI will send the +directory name where \fImpirun\fP was invoked to each of the remote nodes. The +remote nodes will try to change to that directory. If they are unable (e.g., if +the directory does not exit on that node), then Open MPI will use the default +directory determined by the starter. +.PP +All directory changing occurs before the user's program is invoked; it +does not wait until \fIMPI_INIT\fP is called. +. +. +. +.SS Standard I/O +. +Open MPI directs UNIX standard input to /dev/null on all processes +except the MPI_COMM_WORLD rank 0 process. The MPI_COMM_WORLD rank 0 process +inherits standard input from \fImpirun\fP. +.B Note: +The node that invoked \fImpirun\fP need not be the same as the node where the +MPI_COMM_WORLD rank 0 process resides. Open MPI handles the redirection of +\fImpirun\fP's standard input to the rank 0 process. +.PP +Open MPI directs UNIX standard output and error from remote nodes to the node +that invoked \fImpirun\fP and prints it on the standard output/error of +\fImpirun\fP. +Local processes inherit the standard output/error of \fImpirun\fP and transfer +to it directly. +.PP +Thus it is possible to redirect standard I/O for Open MPI applications by +using the typical shell redirection procedure on \fImpirun\fP. + + \fBshell$\fP mpirun -np 2 my_app < my_input > my_output + +Note that in this example \fIonly\fP the MPI_COMM_WORLD rank 0 process will +receive the stream from \fImy_input\fP on stdin. The stdin on all the other +nodes will be tied to /dev/null. However, the stdout from all nodes will +be collected into the \fImy_output\fP file. +. +. +. +.SS Signal Propagation +. +When orterun receives a SIGTERM and SIGINT, it will attempt to kill +the entire job by sending all processes in the job a SIGTERM, waiting +a small number of seconds, then sending all processes in the job a +SIGKILL. +. +SIGUSR1 and SIGUSR2 signals received by orterun are propagated to +all processes in the job. Other signals are not currently propagated +by orterun. +. +. +.SS Process Termination / Signal Handling +. +During the run of an MPI application, if any rank dies abnormally +(either exiting before invoking \fIMPI_FINALIZE\fP, or dying as the result of a +signal), \fImpirun\fP will print out an error message and kill the rest of the +MPI application. +.PP +User signal handlers should probably avoid trying to cleanup MPI state +(Open MPI is, currently, neither thread-safe nor async-signal-safe). +For example, if a segmentation fault occurs in \fIMPI_SEND\fP (perhaps because +a bad buffer was passed in) and a user signal handler is invoked, if this user +handler attempts to invoke \fIMPI_FINALIZE\fP, Bad Things could happen since +Open MPI was already "in" MPI when the error occurred. Since \fImpirun\fP +will notice that the process died due to a signal, it is probably not +necessary (and safest) for the user to only clean up non-MPI state. +. +. +. +.SS Process Environment +. +Processes in the MPI application inherit their environment from the +Open RTE daemon upon the node on which they are running. The +environment is typically inherited from the user's shell. On remote +nodes, the exact environment is determined by the boot MCA module +used. The \fIrsh\fR launch module, for example, uses either +\fIrsh\fR/\fIssh\fR to launch the Open RTE daemon on remote nodes, and +typically executes one or more of the user's shell-setup files before +launching the Open RTE daemon. When running dynamically linked +applications which require the \fILD_LIBRARY_PATH\fR environment +variable to be set, care must be taken to ensure that it is correctly +set when booting Open MPI. +.PP +See the "Remote Execution" section for more details. +. +. +.SS Remote Execution +. +Open MPI requires that the \fIPATH\fR environment variable be set to +find executables on remote nodes (this is typically only necessary in +\fIrsh\fR- or \fIssh\fR-based environments -- batch/scheduled +environments typically copy the current environment to the execution +of remote jobs, so if the current environment has \fIPATH\fR and/or +\fILD_LIBRARY_PATH\fR set properly, the remote nodes will also have it +set properly). If Open MPI was compiled with shared library support, +it may also be necessary to have the \fILD_LIBRARY_PATH\fR environment +variable set on remote nodes as well (especially to find the shared +libraries required to run user MPI applications). +.PP +However, it is not always desirable or possible to edit shell +startup files to set \fIPATH\fR and/or \fILD_LIBRARY_PATH\fR. The +\fI--prefix\fR option is provided for some simple configurations where +this is not possible. +.PP +The \fI--prefix\fR option takes a single argument: the base directory +on the remote node where Open MPI is installed. Open MPI will use +this directory to set the remote \fIPATH\fR and \fILD_LIBRARY_PATH\fR +before executing any Open MPI or user applications. This allows +running Open MPI jobs without having pre-configued the \fIPATH\fR and +\fILD_LIBRARY_PATH\fR on the remote nodes. +.PP +Open MPI adds the basename of the current +node's "bindir" (the directory where Open MPI's executables are +installed) to the prefix and uses that to set the \fIPATH\fR on the +remote node. Similarly, Open MPI adds the basename of the current +node's "libdir" (the directory where Open MPI's libraries are +installed) to the prefix and uses that to set the +\fILD_LIBRARY_PATH\fR on the remote node. For example: +.TP 15 +Local bindir: +/local/node/directory/bin +.TP +Local libdir: +/local/node/directory/lib64 +.PP +If the following command line is used: + + \fBshell$\fP mpirun --prefix /remote/node/directory + +Open MPI will add "/remote/node/directory/bin" to the \fIPATH\fR +and "/remote/node/directory/lib64" to the \fLD_LIBRARY_PATH\fR on the +remote node before attempting to execute anything. +.PP +Note that \fI--prefix\fR can be set on a per-context basis, allowing +for different values for different nodes. +.PP +The \fI--prefix\fR option is not sufficient if the installation paths +on the remote node are different than the local node (e.g., if "/lib" +is used on the local node, but "/lib64" is used on the remote node), +or if the installation paths are something other than a subdirectory +under a common prefix. +.PP +Note that executing \fImpirun\fR via an absolute pathname is +equivalent to specifying \fI--prefix\fR without the last subdirectory +in the absolute pathname to \fImpirun\fR. For example: + + \fBshell$\fP /usr/local/bin/mpirun ... + +is equivalent to + + \fBshell$\fP mpirun --prefix /usr/local +. +. +. +.SS Exported Environment Variables +. +All environment variables that are named in the form OMPI_* will automatically +be exported to new processes on the local and remote nodes. +The \fI\-x\fP option to \fImpirun\fP can be used to export specific environment +variables to the new processes. While the syntax of the \fI\-x\fP +option allows the definition of new variables, note that the parser +for this option is currently not very sophisticated - it does not even +understand quoted values. Users are advised to set variables in the +environment and use \fI\-x\fP to export them; not to define them. +. +. +. +.SS MCA (Modular Component Architecture) +. +The \fI-mca\fP switch allows the passing of parameters to various MCA modules. +.\" Open MPI's MCA modules are described in detail in ompimca(7). +MCA modules have direct impact on MPI programs because they allow tunable +parameters to be set at run time (such as which BTL communication device driver +to use, what parameters to pass to that BTL, etc.). +.PP +The \fI-mca\fP switch takes two arguments: \fI\fP and \fI\fP. +The \fI\fP argument generally specifies which MCA module will receive the value. +For example, the \fI\fP "btl" is used to select which BTL to be used for +transporting MPI messages. The \fI\fP argument is the value that is +passed. +For example: +. +.TP 4 +mpirun -mca btl tcp,self -np 1 foo +Tells Open MPI to use the "tcp" and "self" BTLs, and to run a single copy of +"foo" an allocated node. +. +.TP +mpirun -mca btl self -np 1 foo +Tells Open MPI to use the "self" BTL, and to run a single copy of "foo" an +allocated node. +.\" And so on. Open MPI's BTL MCA modules are described in ompimca_btl(7). +.PP +The \fI-mca\fP switch can be used multiple times to specify different +\fI\fP and/or \fI\fP arguments. If the same \fI\fP is +specified more than once, the \fI\fPs are concatenated with a comma +(",") separating them. +.PP +.B Note: +The \fI-mca\fP switch is simply a shortcut for setting environment variables. +The same effect may be accomplished by setting corresponding environment +variables before running \fImpirun\fP. +The form of the environment variables that Open MPI sets are: + + OMPI_= +.PP +Note that the \fI-mca\fP switch overrides any previously set environment +variables. Also note that unknown \fI\fP arguments are still set as +environment variable -- they are not checked (by \fImpirun\fP) for correctness. +Illegal or incorrect \fI\fP arguments may or may not be reported -- it +depends on the specific MCA module. +. +.\" ************************** +.\" Examples Section +.\" ************************** +.SH EXAMPLES +Be sure to also see the examples in the "Location Nomenclature" section, above. +. +.TP 4 +mpirun -np 1 prog1 +Load and execute prog1 on one node. Search the user's $PATH for the +executable file on each node. +. +. +.TP +mpirun -np 8 --byslot prog1 +Run 8 copies of prog1 wherever Open MPI wants to run them. +. +. +.TP +mpirun -np 4 -mca btl ib,tcp,self prog1 +Run 4 copies of prog1 using the "ib", "tcp", and "self" BTL's for the transport +of MPI messages. +. +.\" ************************** +.\" Diagnostics Section +.\" ************************** +. +.\" .SH DIAGNOSTICS +.\".TP 4 +.\"Error Msg: +.\"Description +. +.\" ************************** +.\" Return Value Section +.\" ************************** +. +.SH RETURN VALUE +. +\fImpirun\fP returns 0 if all ranks started by \fImpirun\fP exit after calling +MPI_FINALIZE. A non-zero value is returned if an internal error occurred in +mpirun, or one or more ranks exited before calling MPI_FINALIZE. If an +internal error occurred in mpirun, the corresponding error code is returned. +In the event that one or more ranks exit before calling MPI_FINALIZE, the +return value of the rank of the process that \fImpirun\fP first notices died +before calling MPI_FINALIZE will be returned. Note that, in general, this will +be the first rank that died but is not guaranteed to be so. +.PP +However, note that if the \fI-nw\fP switch is used, the return value from +mpirun does not indicate the exit status of the ranks. +. +.\" ************************** +.\" See Also Section +.\" ************************** +. +.\" .SH SEE ALSO +.\" orted(1) diff --git a/orte/tools/ortehalt/ortehalt.c b/orte/tools/ortehalt/ortehalt.c new file mode 100644 index 0000000000..b41c7519d0 --- /dev/null +++ b/orte/tools/ortehalt/ortehalt.c @@ -0,0 +1,177 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_PARAM_H +#include +#endif +#include +#include +#include +#ifdef HAVE_SYS_TYPES_H +#include +#endif /* HAVE_SYS_TYPES_H */ +#ifdef HAVE_SYS_WAIT_H +#include +#endif /* HAVE_SYS_WAIT_H */ +#ifdef HAVE_LIBGEN_H +#include +#endif + +#include "opal/event/event.h" +#include "opal/install_dirs.h" +#include "opal/mca/base/base.h" +#include "opal/threads/condition.h" +#include "opal/util/argv.h" +#include "opal/util/basename.h" +#include "opal/util/cmd_line.h" +#include "opal/util/opal_environ.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/util/trace.h" +#include "opal/version.h" + +#include "orte/class/orte_pointer_array.h" +#include "orte/util/proc_info.h" +#include "orte/util/sys_info.h" +#include "orte/util/universe_setup_file_io.h" + +#include "orte/mca/ns/ns.h" +#include "orte/mca/gpr/gpr.h" +#include "orte/mca/pls/pls.h" +#include "orte/mca/rmaps/rmaps_types.h" +#include "orte/mca/rmgr/rmgr.h" +#include "orte/mca/schema/schema.h" +#include "orte/mca/smr/smr.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/runtime/runtime.h" +#include "orte/runtime/orte_wait.h" + +static char *orte_basename = NULL; + +/* + * setup globals for catching orterun command line options + */ +struct globals_t { + bool help; + bool version; + bool verbose; + bool quiet; + bool exit; + int exit_status; + char *wdir; + char *path; + opal_mutex_t lock; + opal_condition_t cond; +} ortehalt_globals; + + +opal_cmd_line_init_t cmd_line_init[] = { + /* Various "obvious" options */ + { NULL, NULL, NULL, 'h', NULL, "help", 0, + &ortehalt_globals.help, OPAL_CMD_LINE_TYPE_BOOL, + "This help message" }, + { NULL, NULL, NULL, 'V', NULL, "version", 0, + &ortehalt_globals.version, OPAL_CMD_LINE_TYPE_BOOL, + "Print version and exit" }, + { NULL, NULL, NULL, 'v', NULL, "verbose", 0, + &ortehalt_globals.verbose, OPAL_CMD_LINE_TYPE_BOOL, + "Be verbose" }, + { NULL, NULL, NULL, 'q', NULL, "quiet", 0, + &ortehalt_globals.quiet, OPAL_CMD_LINE_TYPE_BOOL, + "Suppress helpful messages" }, + + /* OpenRTE arguments */ + { "orte", "debug", NULL, 'd', NULL, "debug-devel", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Enable debugging of OpenRTE" }, + + { "universe", NULL, NULL, '\0', NULL, "universe", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Set the universe name as username@hostname:universe_name for this application" }, + + { NULL, NULL, NULL, '\0', NULL, "tmpdir", 1, + &orte_process_info.tmpdir_base, OPAL_CMD_LINE_TYPE_STRING, + "Set the root for the session directory tree for orterun ONLY" }, + + /* End of list */ + { NULL, NULL, NULL, '\0', NULL, NULL, 0, + NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } +}; + +#if !defined(__WINDOWS__) +extern char** environ; +#endif /* !defined(__WINDOWS__) */ + +int main(int argc, char *argv[]) +{ + int rc; + int id, iparam; + + /* Setup MCA params */ + + mca_base_param_init(); + orte_register_params(false); + + /* find our basename (the name of the executable) so that we can + use it in pretty-print error messages */ + orte_basename = opal_basename(argv[0]); + + /* check for daemon flags and push them into the environment + * since this isn't being automatically done + */ + id = mca_base_param_reg_int_name("orte", "debug", + "Top-level ORTE debug switch", + false, false, 0, &iparam); + if (iparam) { + char *tmp = mca_base_param_environ_variable("orte", NULL, "debug"); + if (ORTE_SUCCESS != (rc = opal_setenv(tmp, "1", true, &environ))) { + opal_show_help("help-ortehalt.txt", "ortehalt:environ", false, + orte_basename, tmp, "1", rc); + free(tmp); + return rc; + } + free(tmp); + } + + /* Intialize our Open RTE environment */ + /* Set the flag telling orte_init that I am NOT a + * singleton, but am "infrastructure" - prevents setting + * up incorrect infrastructure that only a singleton would + * require + */ + if (ORTE_SUCCESS != (rc = orte_init(true))) { + opal_show_help("help-orterun.txt", "orterun:init-failure", true, + "orte_init()", rc); + return rc; + } + + + orte_finalize(); + free(orte_basename); + return rc; +} diff --git a/orte/tools/ortekill/.ompi_ignore b/orte/tools/ortekill/.ompi_ignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/orte/tools/ortekill/.ompi_unignore b/orte/tools/ortekill/.ompi_unignore new file mode 100644 index 0000000000..97b20ffb20 --- /dev/null +++ b/orte/tools/ortekill/.ompi_unignore @@ -0,0 +1 @@ +rhc diff --git a/orte/tools/ortekill/Makefile.am b/orte/tools/ortekill/Makefile.am new file mode 100644 index 0000000000..30a59b964c --- /dev/null +++ b/orte/tools/ortekill/Makefile.am @@ -0,0 +1,39 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +libs = \ + $(top_builddir)/orte/liborte.la + +ortekill_SOURCES = \ + ortekill.c + +ortekill_LDADD = $(libs) +ortekill_DEPENDENCIES = $(libs) + +if OMPI_INSTALL_BINARIES + +bin_PROGRAMS = ortekill + +dist_pkgdata_DATA = help-ortekill.txt + +# AM 1.9.6 seems to have a bug in it's dependencies for install-man if +#dist_ and nodist_ are used, so explicitly add to EXTRA_DIST... +man_MANS = ortekill.1 +EXTRA_DIST = ortekill.1 + +endif diff --git a/orte/tools/ortekill/help-ortekill.txt b/orte/tools/ortekill/help-ortekill.txt new file mode 100644 index 0000000000..59d08d3418 --- /dev/null +++ b/orte/tools/ortekill/help-ortekill.txt @@ -0,0 +1,130 @@ +# -*- text -*- +# +# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English general help file for Open RTE's orterun. +# +[orterun:init-failure] +Open RTE was unable to initialize properly. The error occured while +attempting to %s. Returned value %d instead of ORTE_SUCCESS. +[orterun:usage] +%s (%s) %s + +Usage: %s [OPTION]... [PROGRAM]... +Start the given program using Open RTE + +%s + +Report bugs to %s +[orterun:version] +%s (%s) %s + +Report bugs to %s +[orterun:allocate-resources] +%s was unable to allocate enough resources to start your application. +This might be a transient error (too many nodes in the cluster were +unavailable at the time of the request) or a permenant error (you +requsted more nodes than exist in your cluster). + +While probably only useful to Open RTE developers, the error returned +was %d. +[orterun:error-spawning] +%s was unable to start the specified application. An attempt has been +made to clean up all processes that did start. The error returned was +%d. +[orterun:appfile-not-found] +Unable to open the appfile: + + %s + +Double check that this file exists and is readable. +[orterun:executable-not-specified] +No executable was specified on the %s command line. + +Aborting. +[orterun:multi-apps-and-zero-np] +%s found multiple applications specified on the command line, with +at least one that failed to specify the number of processes to execute. +When specifying multiple applications, you must specify how many processes +of each to launch via the -np argument. +[orterun:nothing-to-do] +%s could not find anything to do. + +It is possible that you forgot to specify how many processes to run +via the "-np" argument. +[orterun:call-failed] +%s encountered a %s call failure. This should not happen, and +usually indicates an error within the operating system itself. +Specifically, the following error occurred: + + %s + +The only other available information that may be helpful is the errno +that was returned: %d. +[orterun:environ] +%s was unable to set + %s = %s +in the environment. Returned value %d instead of ORTE_SUCCESS. +[orterun:precondition] +%s was unable to precondition transports +Returned value %d instead of ORTE_SUCCESS. +[orterun:attr-failed] +%s was unable to define an attribute +Returned value %d instead of ORTE_SUCCESS. +[orterun:proc-aborted] +%s noticed that job rank %lu with PID %lu on node %s exited on signal %d. +[orterun:abnormal-exit] +WARNING: %s encountered an abnormal exit. + +This means that %s exited before it received notification that all +started processes had terminated. You should double check and ensure +that there are no runaway processes still executing. +[orterun:empty-prefix] +A prefix was supplied to %s that only contained slashes. + +This is a fatal error; %s will now abort. No processes were launched. +# +[debugger-mca-param-not-found] +Internal error -- the orte_base_debugger MCA parameter was not able to +be found. Please contact the Open RTE developers; this should not +happen. +# +[debugger-orte_base_user_debugger-empty] +The MCA parameter "orte_base_user_debugger" was empty, indicating that +no user-level debuggers have been defined. Please set this MCA +parameter to a value and try again. +# +[debugger-not-found] +A suitable debugger could not be found in your PATH. Check the values +specified in the orte_base_user_debugger MCA parameter for the list of +debuggers that was searched. +# +[debugger-exec-failed] +%s was unable to launch the specified debugger. This is what was +launched: + + %s + +Things to check: + + - Ensure that the debugger is installed properly + - Ensure that the "%s" executable is in your path + - Ensure that any required licenses are available to run the debugger +# +[orterun:daemon-die] +%s was unable to cleanly terminate the daemons for this job. Returned value %d instead of ORTE_SUCCESS. + diff --git a/orte/tools/ortekill/ortekill.1 b/orte/tools/ortekill/ortekill.1 new file mode 100644 index 0000000000..24463ad2e4 --- /dev/null +++ b/orte/tools/ortekill/ortekill.1 @@ -0,0 +1,851 @@ +.\" +.\" Man page for ORTE's orterun command +.\" +.\" .TH name section center-footer left-footer center-header +.TH MPIRUN 1 "March 2006" "Open MPI" "OPEN MPI COMMANDS" +.\" ************************** +.\" Name Section +.\" ************************** +.SH NAME +. +orterun, mpirun, mpiexec \- Execute serial and parallel jobs in Open MPI. + +.B Note: +\fImpirun\fP, \fImpiexec\fP, and \fIorterun\fP are all exact synonyms for each +other. Using any of the names will result in exactly identical behavior. +. +.\" ************************** +.\" Synopsis Section +.\" ************************** +.SH SYNOPSIS +. +.PP +Single Process Multiple Data (SPMD) Model: + +.B mpirun +.R [ options ] +.B +.R [ ] +. + +Multiple Instruction Multiple Data (MIMD) Model: + +.B mpirun +.R [ global_options ] + [ local_options1 ] +.B +.R [ ] : + [ local_options2 ] +.B +.R [ ] : + ... : + [ local_optionsN ] +.B +.R [ ] +.P + +Note that in both models, invoking \fImpirun\fR via an absolute path +name is equivalent to specifying the \fI--prefix\fR option with a +\fI\fR value equivalent to the directory where \fImpirun\fR +resides, minus its last subdirectory. For example: + + \fBshell$\fP /usr/local/bin/mpirun ... + +is equivalent to + + \fBshell$\fP mpirun --prefix /usr/local + +. +.\" ************************** +.\" Quick Summary Section +.\" ************************** +.SH QUICK SUMMARY +. +If you are simply looking for how to run an MPI application, you +probably want to use a command line of the following form: + + \fBshell$\fP mpirun [ -np X ] [ --hostfile ] + +This will run X copies of \fI\fR in your current run-time +environment (if running under a supported resource manager, Open MPI's +\fImpirun\fR will usually automatically use the corresponding resource manager +process starter, as opposed to, for example, \fIrsh\fR or \fIssh\fR, +which require the use of a hostfile, or will default to running all X +copies on the localhost), scheduling (by default) in a round-robin fashion by +CPU slot. See the rest of this page for more details. +. +.\" ************************** +.\" Options Section +.\" ************************** +.SH OPTIONS +. +.I mpirun +will send the name of the directory where it was invoked on the local +node to each of the remote nodes, and attempt to change to that +directory. See the "Current Working Directory" section below for further +details. +.\" +.\" Start options listing +.\" Indent 10 chacters from start of first column to start of second column +.TP 10 +.B +Pass these run-time arguments to every new process. These must always +be the last arguments to \fImpirun\fP. If an app context file is used, +\fI\fP will be ignored. +. +. +.TP +.B +The program executable. This is identified as the first non-recognized argument +to mpirun. +. +. +.TP +.B -aborted\fR,\fP --aborted \fR<#>\fP +Set the maximum number of aborted processes to display. +. +. +.TP +.B --app \fR\fP +Provide an appfile, ignoring all other command line options. +. +. +.TP +.B -bynode\fR,\fP --bynode +Allocate (map) the processes by node in a round-robin scheme. +. +. +.TP +.B -byslot\fR,\fP --byslot +Allocate (map) the processes by slot in a round-robin scheme. This is the +default. +. +. +.TP +.B -c \fR<#>\fP +Synonym for \fI-np\fP. +. +. +.TP +.B -debug\fR,\fP --debug +Invoke the user-level debugger indicated by the \fIorte_base_user_debugger\fP +MCA parameter. +. +. +.TP +.B -debugger\fR,\fP --debugger +Sequence of debuggers to search for when \fI--debug\fP is used (i.e. +a synonym for \fIorte_base_user_debugger\fP MCA parameter). +. +. +.TP +.B -gmca\fR,\fP --gmca \fR \fP +Pass global MCA parameters that are applicable to all contexts. \fI\fP is +the parameter name; \fI\fP is the parameter value. +. +. +.TP +.B -h\fR,\fP --help +Display help for this command +. +. +.TP +.B -H \fR\fP +Synonym for \fI-host\fP. +. +. +.TP +.B -host\fR,\fP --host \fR\fP +List of hosts on which to invoke processes. +. +. +.TP +.B -hostfile\fR,\fP --hostfile \fR\fP +Provide a hostfile to use. +.\" JJH - Should have man page for how to format a hostfile properly. +. +. +.TP +.B -machinefile\fR,\fP --machinefile \fR\fP +Synonym for \fI-hostfile\fP. +. +. +.TP +.B -mca\fR,\fP --mca +Send arguments to various MCA modules. See the "MCA" section, below. +. +. +.TP +.B -n\fR,\fP --n \fR<#>\fP +Synonym for \fI-np\fP. +. +. +.TP +.B -nolocal\fR,\fP --nolocal +Do not run any copies of the launched application on the same node as +orterun is running. This option will override listing the localhost +with \fB--host\fR or any other host-specifying mechanism. +. +. +.TP +.B -nooversubscribe\fR,\fP --nooversubscribe +Do not oversubscribe any nodes; error (without starting any processes) +if the requested number of processes would cause oversubscription. +This option implicitly sets "max_slots" equal to the "slots" value for +each node. +. +. +.TP +.B -np \fR<#>\fP +Run this many copies of the program on the given nodes. This option +indicates that the specified file is an executable program and not an +application context. If no value is provided for the number of copies to +execute (i.e., neither the "-np" nor its synonyms are provided on the command +line), Open MPI will automatically execute a copy of the program on +each process slot (see below for description of a "process slot"). This +feature, however, can only be used in the SPMD model and will return an +error (without beginning execution of the application) otherwise. +. +. +.TP +.B -nw\fR,\fP --nw +Launch the processes and do not wait for their completion. mpirun will +complete as soon as successful launch occurs. +. +. +.TP +.B -path\fR,\fP --path \fR\fP + that will be used when attempting to locate requested executables. +. +. +.TP +.B --prefix \fR\fP +Prefix directory that will be used to set the \fIPATH\fR and +\fILD_LIBRARY_PATH\fR on the remote node before invoking Open MPI or +the target process. See the "Remote Execution" section, below. +. +. +.TP +.B -q\fR,\fP --quiet +Suppress informative messages from orterun during application execution. +. +. +.TP +.B --tmpdir \fR\fP +Set the root for the session directory tree for mpirun only. +. +. +.TP +.B -tv\fR,\fP --tv +Launch processes under the TotalView debugger. +Deprecated backwards compatibility flag. Synonym for \fI--debug\fP. +. +. +.TP +.B --universe \fR\fP +For this application, set the universe name as: + username@hostname:universe_name +. +. +.TP +.B -v\fR,\fP --verbose +Be verbose +.TP +.B -V\fR,\fP --version +Print version number. If no other arguments are given, this will also +cause orterun to exit. +. +. +.TP +.B -wd \fR\fP +Change to the directory before the user's program executes. +See the "Current Working Directory" section for notes on relative paths. +.B Note: +If the \fI-wd\fP option appears both on the command line and in an +application context, the context will take precedence over the command line. +. +. +.TP +.B -x \fR\fP +Export the specified environment variables to the remote nodes before +executing the program. Existing environment variables can be +specified (see the Examples section, below), or new variable names +specified with corresponding values. The parser for the \fI-x\fP +option is not very sophisticated; it does not even understand quoted +values. Users are advised to set variables in the environment, and +then use \fI-x\fP to export (not define) them. +. +. +.P +The following options are useful for developers; they are not generally +useful to most ORTE and/or MPI users: +. +.TP +.B -d\fR,\fP --debug-devel +Enable debugging of the OpenRTE (the run-time layer in Open MPI). +This is not generally useful for most users. +. +. +.TP +.B --debug-daemons +Enable debugging of any OpenRTE daemons used by this application. +. +. +.TP +.B --debug-daemons-file +Enable debugging of any OpenRTE daemons used by this application, storing +output in files. +. +. +.TP +.B --no-daemonize +Do not detach OpenRTE daemons used by this application. +. +. +.\" ************************** +.\" Description Section +.\" ************************** +.SH DESCRIPTION +. +One invocation of \fImpirun\fP starts an MPI application running under Open +MPI. If the application is single process multiple data (SPMD), the application +can be specified on the \fImpirun\fP command line. + +If the application is multiple instruction multiple data (MIMD), comprising of +multiple programs, the set of programs and argument can be specified in one of +two ways: Extended Command Line Arguments, and Application Context. +.PP +An application context describes the MIMD program set including all arguments +in a separate file. +.\"See appcontext(5) for a description of the application context syntax. +This file essentially contains multiple \fImpirun\fP command lines, less the +command name itself. The ability to specify different options for different +instantiations of a program is another reason to use an application context. +.PP +Extended command line arguments allow for the description of the application +layout on the command line using colons (\fI:\fP) to separate the specification +of programs and arguments. Some options are globally set across all specified +programs (e.g. --hostfile), while others are specific to a single program +(e.g. -np). +. +. +. +.SS Process Slots +. +Open MPI uses "slots" to represent a potential location for a process. +Hence, a node with 2 slots means that 2 processes can be launched on +that node. For performance, the community typically equates a "slot" +with a physical CPU, thus ensuring that any process assigned to that +slot has a dedicated processor. This is not, however, a requirement for +the operation of Open MPI. +.PP +Slots can be specified in hostfiles after the hostname. For example: +. +.TP 4 +host1.example.com slots=4 +Indicates that there are 4 process slots on host1. +. +.PP +If no slots value is specified, then Open MPI will automatically assign +a default value of "slots=1" to that host. +. +.PP +When running under resource managers (e.g., SLURM, Torque, etc.), Open +MPI will obtain both the hostnames and the number of slots directly +from the resource manger. For example, if running under a SLURM job, +Open MPI will automatically receive the hosts that SLURM has allocated +to the job as well as how many slots on each node that SLURM says +are usable - in most high-performance environments, the slots will +equate to the number of processors on the node. +. +.PP +When deciding where to launch processes, Open MPI will first fill up +all available slots before oversubscribing (see "Location +Nomenclature", below, for more details on the scheduling algorithms +available). Unless told otherwise, Open MPI will arbitrarily +oversubscribe nodes. For example, if the only node available is the +localhost, Open MPI will run as many processes as specified by the +-n (or one of its variants) command line option on the +localhost (although they may run quite slowly, since they'll all be +competing for CPU and other resources). +. +.PP +Limits can be placed on oversubscription with the "max_slots" +attribute in the hostfile. For example: +. +.TP 4 +host2.example.com slots=4 max_slots=6 +Indicates that there are 4 process slots on host2. Further, Open MPI +is limited to launching a maximum of 6 processes on host2. +. +.TP +host3.example.com slots=2 max_slots=2 +Indicates that there are 2 process slots on host3 and that no +oversubscription is allowed (similar to the \fI--nooversubscribe\fR +option). +. +.TP +host4.example.com max_slots=2 +Shorthand; same as listing "slots=2 max_slots=2". +. +. +.PP +Note that Open MPI's support for resource managers does not currently +set the "max_slots" values for hosts. If you wish to prevent +oversubscription in such scenarios, use the \fI--nooversubscribe\fR +option. +. +.PP +In scenarios where the user wishes to launch an application across +all available slots by not providing a "-n" option on the mpirun +command line, Open MPI will launch a process on each process slot +for each host within the provided environment. For example, if a +hostfile has been provided, then Open MPI will spawn processes +on each identified host up to the "slots=x" limit if oversubscription +is not allowed. If oversubscription is allowed (the default), then +Open MPI will spawn processes on each host up to the "max_slots=y" limit +if that value is provided. In all cases, the "-bynode" and "-byslot" +mapping directives will be enforced to ensure proper placement of +process ranks. +. +. +. +.SS Location Nomenclature +. +As described above, \fImpirun\fP can specify arbitrary locations in +the current Open MPI universe. Locations can be specified either by +CPU or by node. + +.B Note: +This nomenclature does not force Open MPI to bind processes to CPUs -- +specifying a location "by CPU" is really a convenience mechanism for +SMPs that ultimately maps down to a specific node. +.PP +Specifying locations by node will launch one copy of an executable per +specified node. +Using the \fI--bynode\fP option tells Open MPI to use all available nodes. +Using the \fI--byslot\fP option tells Open MPI to use all slots on an available +node before allocating resources on the next available node. +For example: +. +.TP 4 +mpirun --bynode -np 4 a.out +Runs one copy of the the executable +.I a.out +on all available nodes in the Open MPI universe. MPI_COMM_WORLD rank 0 +will be on node0, rank 1 will be on node1, etc. Regardless of how many slots +are available on each of the nodes. +. +. +.TP +mpirun --byslot -np 4 a.out +Runs one copy of the the executable +.I a.out +on each slot on a given node before running the executable on other available +nodes. +. +. +. +.SS Specifying Hosts +. +Hosts can be specified in a number of ways. The most common of which is in a +'hostfile' or 'machinefile'. If our hostfile contain the following information: +. +. + + \fBshell$\fP cat my-hostfile + node00 slots=2 + node01 slots=2 + node02 slots=2 + +. +. +.TP +mpirun --hostfile my-hostfile -np 3 a.out +This will run one copy of the executable +.I a.out +on hosts node00,node01, and node02. +. +. +.PP +Another method for specifying hosts is directly on the command line. Here can +can include and exclude hosts from the set of hosts to run on. For example: +. +. +.TP +mpirun -np 3 --host a a.out +Runs three copies of the executable +.I a.out +on host a. +. +. +.TP +mpirun -np 3 --host a,b,c a.out +Runs one copy of the executable +.I a.out +on hosts a, b, and c. +. +. +.TP +mpirun -np 3 --hostfile my-hostfile --host node00 a.out +Runs three copies of the executable +.I a.out +on host node00. +. +. +.TP +mpirun -np 3 --hostfile my-hostfile --host node10 a.out +This will prompt an error since node10 is not in my-hostfile; mpirun will +abort. +. +. +.TP +shell$ mpirun -np 1 --host a hostname : -np 2 --host b,c uptime +Runs one copy of the executable +.I hostname +on host a. And runs one copy of the executable +.I uptime +on hosts b and c. +. +. +. +.SS No Local Launch +. +Using the \fB--nolocal\fR option to orterun tells the system to not +launch any of the application processes on the same node that orterun +is running. While orterun typically blocks and consumes few system +resources, this option can be helpful for launching very large jobs +where orterun may actually need to use noticable amounts of memory +and/or processing time. \fB--nolocal\fR allows orteun to run without +sharing the local node with the launched applications, and likewise +allows the launched applications to run unhindered by orterun's system +usage. +.PP +Note that \fB--nolocal\fR will override any other specification to +launch the application on the local node. It will disqualify the +localhost from being capable of running any processes in the +application. +. +. +.TP +shell$ mpirun -np 1 --host localhost --nolocal hostname +This example will result in an error because orterun will not find +anywhere to launch the application. +. +. +. +.SS No Oversubscription +. +Using the \fI--nooversubscribe\fR option causes Open MPI to implicitly +set the "max_slots" value to be the same as the "slots" value for each +node. This can be especially helpful when running jobs under a +resource manager because Open MPI currently only sets the "slots" +value for each node that it obtains from the resource manager. +. +. +. +.SS Application Context or Executable Program? +. +To distinguish the two different forms, \fImpirun\fP +looks on the command line for \fI--app\fP option. If +it is specified, then the file named on the command line is +assumed to be an application context. If it is not +specified, then the file is assumed to be an executable program. +. +. +. +.SS Locating Files +. +If \fIno\fP relative or absolute path is specified for a file, Open MPI +will look for files by searching the directories in the user's PATH environment +variable as defined on the source node(s). +.PP +If a relative directory is specified, it must be relative to the initial +working directory determined by the specific starter used. For example when +using the rsh or ssh starters, the initial directory is $HOME by default. Other +starters may set the initial directory to the current working directory from +the invocation of \fImpirun\fP. +. +. +. +.SS Current Working Directory +. +The \fI\-wd\fP mpirun option allows the user to change to an arbitrary +directory before their program is invoked. It can also be used in application +context files to specify working directories on specific nodes and/or +for specific applications. +.PP +If the \fI\-wd\fP option appears both in a context file and on the command line, +the context file directory will override the command line value. +.PP +If the \fI-wd\fP option is specified, Open MPI will attempt to change to the +specified directory on all of the remote nodes. If this fails, \fImpirun\fP +will abort. +.PP +If the \fI-wd\fP option is \fBnot\fP specified, Open MPI will send the +directory name where \fImpirun\fP was invoked to each of the remote nodes. The +remote nodes will try to change to that directory. If they are unable (e.g., if +the directory does not exit on that node), then Open MPI will use the default +directory determined by the starter. +.PP +All directory changing occurs before the user's program is invoked; it +does not wait until \fIMPI_INIT\fP is called. +. +. +. +.SS Standard I/O +. +Open MPI directs UNIX standard input to /dev/null on all processes +except the MPI_COMM_WORLD rank 0 process. The MPI_COMM_WORLD rank 0 process +inherits standard input from \fImpirun\fP. +.B Note: +The node that invoked \fImpirun\fP need not be the same as the node where the +MPI_COMM_WORLD rank 0 process resides. Open MPI handles the redirection of +\fImpirun\fP's standard input to the rank 0 process. +.PP +Open MPI directs UNIX standard output and error from remote nodes to the node +that invoked \fImpirun\fP and prints it on the standard output/error of +\fImpirun\fP. +Local processes inherit the standard output/error of \fImpirun\fP and transfer +to it directly. +.PP +Thus it is possible to redirect standard I/O for Open MPI applications by +using the typical shell redirection procedure on \fImpirun\fP. + + \fBshell$\fP mpirun -np 2 my_app < my_input > my_output + +Note that in this example \fIonly\fP the MPI_COMM_WORLD rank 0 process will +receive the stream from \fImy_input\fP on stdin. The stdin on all the other +nodes will be tied to /dev/null. However, the stdout from all nodes will +be collected into the \fImy_output\fP file. +. +. +. +.SS Signal Propagation +. +When orterun receives a SIGTERM and SIGINT, it will attempt to kill +the entire job by sending all processes in the job a SIGTERM, waiting +a small number of seconds, then sending all processes in the job a +SIGKILL. +. +SIGUSR1 and SIGUSR2 signals received by orterun are propagated to +all processes in the job. Other signals are not currently propagated +by orterun. +. +. +.SS Process Termination / Signal Handling +. +During the run of an MPI application, if any rank dies abnormally +(either exiting before invoking \fIMPI_FINALIZE\fP, or dying as the result of a +signal), \fImpirun\fP will print out an error message and kill the rest of the +MPI application. +.PP +User signal handlers should probably avoid trying to cleanup MPI state +(Open MPI is, currently, neither thread-safe nor async-signal-safe). +For example, if a segmentation fault occurs in \fIMPI_SEND\fP (perhaps because +a bad buffer was passed in) and a user signal handler is invoked, if this user +handler attempts to invoke \fIMPI_FINALIZE\fP, Bad Things could happen since +Open MPI was already "in" MPI when the error occurred. Since \fImpirun\fP +will notice that the process died due to a signal, it is probably not +necessary (and safest) for the user to only clean up non-MPI state. +. +. +. +.SS Process Environment +. +Processes in the MPI application inherit their environment from the +Open RTE daemon upon the node on which they are running. The +environment is typically inherited from the user's shell. On remote +nodes, the exact environment is determined by the boot MCA module +used. The \fIrsh\fR launch module, for example, uses either +\fIrsh\fR/\fIssh\fR to launch the Open RTE daemon on remote nodes, and +typically executes one or more of the user's shell-setup files before +launching the Open RTE daemon. When running dynamically linked +applications which require the \fILD_LIBRARY_PATH\fR environment +variable to be set, care must be taken to ensure that it is correctly +set when booting Open MPI. +.PP +See the "Remote Execution" section for more details. +. +. +.SS Remote Execution +. +Open MPI requires that the \fIPATH\fR environment variable be set to +find executables on remote nodes (this is typically only necessary in +\fIrsh\fR- or \fIssh\fR-based environments -- batch/scheduled +environments typically copy the current environment to the execution +of remote jobs, so if the current environment has \fIPATH\fR and/or +\fILD_LIBRARY_PATH\fR set properly, the remote nodes will also have it +set properly). If Open MPI was compiled with shared library support, +it may also be necessary to have the \fILD_LIBRARY_PATH\fR environment +variable set on remote nodes as well (especially to find the shared +libraries required to run user MPI applications). +.PP +However, it is not always desirable or possible to edit shell +startup files to set \fIPATH\fR and/or \fILD_LIBRARY_PATH\fR. The +\fI--prefix\fR option is provided for some simple configurations where +this is not possible. +.PP +The \fI--prefix\fR option takes a single argument: the base directory +on the remote node where Open MPI is installed. Open MPI will use +this directory to set the remote \fIPATH\fR and \fILD_LIBRARY_PATH\fR +before executing any Open MPI or user applications. This allows +running Open MPI jobs without having pre-configued the \fIPATH\fR and +\fILD_LIBRARY_PATH\fR on the remote nodes. +.PP +Open MPI adds the basename of the current +node's "bindir" (the directory where Open MPI's executables are +installed) to the prefix and uses that to set the \fIPATH\fR on the +remote node. Similarly, Open MPI adds the basename of the current +node's "libdir" (the directory where Open MPI's libraries are +installed) to the prefix and uses that to set the +\fILD_LIBRARY_PATH\fR on the remote node. For example: +.TP 15 +Local bindir: +/local/node/directory/bin +.TP +Local libdir: +/local/node/directory/lib64 +.PP +If the following command line is used: + + \fBshell$\fP mpirun --prefix /remote/node/directory + +Open MPI will add "/remote/node/directory/bin" to the \fIPATH\fR +and "/remote/node/directory/lib64" to the \fLD_LIBRARY_PATH\fR on the +remote node before attempting to execute anything. +.PP +Note that \fI--prefix\fR can be set on a per-context basis, allowing +for different values for different nodes. +.PP +The \fI--prefix\fR option is not sufficient if the installation paths +on the remote node are different than the local node (e.g., if "/lib" +is used on the local node, but "/lib64" is used on the remote node), +or if the installation paths are something other than a subdirectory +under a common prefix. +.PP +Note that executing \fImpirun\fR via an absolute pathname is +equivalent to specifying \fI--prefix\fR without the last subdirectory +in the absolute pathname to \fImpirun\fR. For example: + + \fBshell$\fP /usr/local/bin/mpirun ... + +is equivalent to + + \fBshell$\fP mpirun --prefix /usr/local +. +. +. +.SS Exported Environment Variables +. +All environment variables that are named in the form OMPI_* will automatically +be exported to new processes on the local and remote nodes. +The \fI\-x\fP option to \fImpirun\fP can be used to export specific environment +variables to the new processes. While the syntax of the \fI\-x\fP +option allows the definition of new variables, note that the parser +for this option is currently not very sophisticated - it does not even +understand quoted values. Users are advised to set variables in the +environment and use \fI\-x\fP to export them; not to define them. +. +. +. +.SS MCA (Modular Component Architecture) +. +The \fI-mca\fP switch allows the passing of parameters to various MCA modules. +.\" Open MPI's MCA modules are described in detail in ompimca(7). +MCA modules have direct impact on MPI programs because they allow tunable +parameters to be set at run time (such as which BTL communication device driver +to use, what parameters to pass to that BTL, etc.). +.PP +The \fI-mca\fP switch takes two arguments: \fI\fP and \fI\fP. +The \fI\fP argument generally specifies which MCA module will receive the value. +For example, the \fI\fP "btl" is used to select which BTL to be used for +transporting MPI messages. The \fI\fP argument is the value that is +passed. +For example: +. +.TP 4 +mpirun -mca btl tcp,self -np 1 foo +Tells Open MPI to use the "tcp" and "self" BTLs, and to run a single copy of +"foo" an allocated node. +. +.TP +mpirun -mca btl self -np 1 foo +Tells Open MPI to use the "self" BTL, and to run a single copy of "foo" an +allocated node. +.\" And so on. Open MPI's BTL MCA modules are described in ompimca_btl(7). +.PP +The \fI-mca\fP switch can be used multiple times to specify different +\fI\fP and/or \fI\fP arguments. If the same \fI\fP is +specified more than once, the \fI\fPs are concatenated with a comma +(",") separating them. +.PP +.B Note: +The \fI-mca\fP switch is simply a shortcut for setting environment variables. +The same effect may be accomplished by setting corresponding environment +variables before running \fImpirun\fP. +The form of the environment variables that Open MPI sets are: + + OMPI_= +.PP +Note that the \fI-mca\fP switch overrides any previously set environment +variables. Also note that unknown \fI\fP arguments are still set as +environment variable -- they are not checked (by \fImpirun\fP) for correctness. +Illegal or incorrect \fI\fP arguments may or may not be reported -- it +depends on the specific MCA module. +. +.\" ************************** +.\" Examples Section +.\" ************************** +.SH EXAMPLES +Be sure to also see the examples in the "Location Nomenclature" section, above. +. +.TP 4 +mpirun -np 1 prog1 +Load and execute prog1 on one node. Search the user's $PATH for the +executable file on each node. +. +. +.TP +mpirun -np 8 --byslot prog1 +Run 8 copies of prog1 wherever Open MPI wants to run them. +. +. +.TP +mpirun -np 4 -mca btl ib,tcp,self prog1 +Run 4 copies of prog1 using the "ib", "tcp", and "self" BTL's for the transport +of MPI messages. +. +.\" ************************** +.\" Diagnostics Section +.\" ************************** +. +.\" .SH DIAGNOSTICS +.\".TP 4 +.\"Error Msg: +.\"Description +. +.\" ************************** +.\" Return Value Section +.\" ************************** +. +.SH RETURN VALUE +. +\fImpirun\fP returns 0 if all ranks started by \fImpirun\fP exit after calling +MPI_FINALIZE. A non-zero value is returned if an internal error occurred in +mpirun, or one or more ranks exited before calling MPI_FINALIZE. If an +internal error occurred in mpirun, the corresponding error code is returned. +In the event that one or more ranks exit before calling MPI_FINALIZE, the +return value of the rank of the process that \fImpirun\fP first notices died +before calling MPI_FINALIZE will be returned. Note that, in general, this will +be the first rank that died but is not guaranteed to be so. +.PP +However, note that if the \fI-nw\fP switch is used, the return value from +mpirun does not indicate the exit status of the ranks. +. +.\" ************************** +.\" See Also Section +.\" ************************** +. +.\" .SH SEE ALSO +.\" orted(1) diff --git a/orte/tools/ortekill/ortekill.c b/orte/tools/ortekill/ortekill.c new file mode 100644 index 0000000000..8545251ebc --- /dev/null +++ b/orte/tools/ortekill/ortekill.c @@ -0,0 +1,332 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" + +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_PARAM_H +#include +#endif +#include +#include +#include +#ifdef HAVE_SYS_TYPES_H +#include +#endif /* HAVE_SYS_TYPES_H */ +#ifdef HAVE_SYS_WAIT_H +#include +#endif /* HAVE_SYS_WAIT_H */ +#ifdef HAVE_LIBGEN_H +#include +#endif + +#include "opal/event/event.h" +#include "opal/install_dirs.h" +#include "opal/mca/base/base.h" +#include "opal/threads/condition.h" +#include "opal/util/argv.h" +#include "opal/util/basename.h" +#include "opal/util/cmd_line.h" +#include "opal/util/opal_environ.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/util/trace.h" +#include "opal/version.h" + +#include "orte/orte_constants.h" + +#include "orte/class/orte_pointer_array.h" +#include "orte/util/proc_info.h" +#include "orte/util/sys_info.h" +#include "orte/util/universe_setup_file_io.h" +#include "orte/util/pre_condition_transports.h" + +#include "orte/mca/ns/ns.h" +#include "orte/mca/gpr/gpr.h" +#include "orte/mca/pls/pls.h" +#include "orte/mca/rmaps/rmaps_types.h" +#include "orte/mca/rmgr/rmgr.h" +#include "orte/mca/schema/schema.h" +#include "orte/mca/smr/smr.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/runtime/runtime.h" +#include "orte/runtime/orte_wait.h" + +/* + * Globals + */ +static orte_jobid_t jobid = ORTE_JOBID_INVALID; +static char *orterun_basename = NULL; + +/* + * setup globals for catching orterun command line options + */ +struct globals_t { + bool help; + bool version; + bool verbose; + bool quiet; + bool exit; + bool no_wait_for_job_completion; + bool by_node; + bool by_slot; + bool per_node; + bool no_oversubscribe; + bool debugger; + bool no_local_schedule; + bool displaymapatlaunch; + int num_procs; + int exit_status; + char *hostfile; + char *env_val; + char *appfile; + char *wdir; + char *path; + opal_mutex_t lock; + opal_condition_t cond; +} orterun_globals; +static bool globals_init = false; + + +opal_cmd_line_init_t cmd_line_init[] = { + /* Various "obvious" options */ + { NULL, NULL, NULL, 'h', NULL, "help", 0, + &orterun_globals.help, OPAL_CMD_LINE_TYPE_BOOL, + "This help message" }, + { NULL, NULL, NULL, 'V', NULL, "version", 0, + &orterun_globals.version, OPAL_CMD_LINE_TYPE_BOOL, + "Print version and exit" }, + { NULL, NULL, NULL, 'v', NULL, "verbose", 0, + &orterun_globals.verbose, OPAL_CMD_LINE_TYPE_BOOL, + "Be verbose" }, + { NULL, NULL, NULL, 'q', NULL, "quiet", 0, + &orterun_globals.quiet, OPAL_CMD_LINE_TYPE_BOOL, + "Suppress helpful messages" }, + + /* Use an appfile */ + { NULL, NULL, NULL, '\0', NULL, "app", 1, + &orterun_globals.appfile, OPAL_CMD_LINE_TYPE_STRING, + "Provide an appfile; ignore all other command line options" }, + + /* Number of processes; -c, -n, --n, -np, and --np are all + synonyms */ + { NULL, NULL, NULL, 'c', "np", "np", 1, + &orterun_globals.num_procs, OPAL_CMD_LINE_TYPE_INT, + "Number of processes to run" }, + { NULL, NULL, NULL, '\0', "n", "n", 1, + &orterun_globals.num_procs, OPAL_CMD_LINE_TYPE_INT, + "Number of processes to run" }, + + /* Set a hostfile */ + { "rds", "hostfile", "path", '\0', "hostfile", "hostfile", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Provide a hostfile" }, + { "rds", "hostfile", "path", '\0', "machinefile", "machinefile", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Provide a hostfile" }, + + /* Don't wait for the process to finish before exiting */ + { NULL, NULL, NULL, '\0', "nw", "nw", 0, + &orterun_globals.no_wait_for_job_completion, OPAL_CMD_LINE_TYPE_BOOL, + "Launch the processes and do not wait for their completion (i.e., let orterun complete as soon a successful launch occurs)" }, + + /* Export environment variables; potentially used multiple times, + so it does not make sense to set into a variable */ + { NULL, NULL, NULL, 'x', NULL, NULL, 1, + NULL, OPAL_CMD_LINE_TYPE_NULL, + "Export an environment variable, optionally specifying a value (e.g., \"-x foo\" exports the environment variable foo and takes its value from the current environment; \"-x foo=bar\" exports the environment variable name foo and sets its value to \"bar\" in the started processes)" }, + + /* Specific mapping (C, cX, N, nX) */ +#if 0 + /* JJH --map is not currently implemented so don't advertise it until it is */ + { NULL, NULL, NULL, '\0', NULL, "map", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Mapping of processes to nodes / CPUs" }, +#endif + { NULL, NULL, NULL, '\0', "bynode", "bynode", 0, + &orterun_globals.by_node, OPAL_CMD_LINE_TYPE_BOOL, + "Whether to allocate/map processes round-robin by node" }, + { NULL, NULL, NULL, '\0', "byslot", "byslot", 0, + &orterun_globals.by_slot, OPAL_CMD_LINE_TYPE_BOOL, + "Whether to allocate/map processes round-robin by slot (the default)" }, + { NULL, NULL, NULL, '\0', "pernode", "pernode", 0, + &orterun_globals.per_node, OPAL_CMD_LINE_TYPE_BOOL, + "If no number of process is specified, this will cause one process per available node to be executed" }, + { NULL, NULL, NULL, '\0', "nooversubscribe", "nooversubscribe", 0, + &orterun_globals.no_oversubscribe, OPAL_CMD_LINE_TYPE_BOOL, + "Nodes are not to be oversubscribed, even if the system supports such operation"}, + { NULL, NULL, NULL, '\0', "display-map-at-launch", "display-map-at-launch", 0, + &orterun_globals.displaymapatlaunch, OPAL_CMD_LINE_TYPE_BOOL, + "Display the process map just before launch"}, + + /* mpiexec-like arguments */ + { NULL, NULL, NULL, '\0', "wdir", "wdir", 1, + &orterun_globals.wdir, OPAL_CMD_LINE_TYPE_STRING, + "Set the working directory of the started processes" }, + { NULL, NULL, NULL, '\0', "path", "path", 1, + &orterun_globals.path, OPAL_CMD_LINE_TYPE_STRING, + "PATH to be used to look for executables to start processes" }, + /* These arguments can be specified multiple times */ +#if 0 + /* JMS: Removed because it's not really implemented */ + { NULL, NULL, NULL, '\0', "arch", "arch", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Architecture to start processes on" }, +#endif + { NULL, NULL, NULL, 'H', "host", "host", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "List of hosts to invoke processes on" }, + + /* OSC mpiexec-like arguments */ + { NULL, NULL, NULL, '\0', "nolocal", "nolocal", 0, + &orterun_globals.no_local_schedule, OPAL_CMD_LINE_TYPE_BOOL, + "Do not run any MPI applications on the local node" }, + + /* User-level debugger arguments */ + { NULL, NULL, NULL, '\0', "tv", "tv", 0, + &orterun_globals.debugger, OPAL_CMD_LINE_TYPE_BOOL, + "Deprecated backwards compatibility flag; synonym for \"--debug\"" }, + { NULL, NULL, NULL, '\0', "debug", "debug", 0, + &orterun_globals.debugger, OPAL_CMD_LINE_TYPE_BOOL, + "Invoke the user-level debugger indicated by the orte_base_user_debugger MCA parameter" }, + { "orte", "base", "user_debugger", '\0', "debugger", "debugger", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Sequence of debuggers to search for when \"--debug\" is used" }, + + /* OpenRTE arguments */ + { "orte", "debug", NULL, 'd', NULL, "debug-devel", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Enable debugging of OpenRTE" }, + + { "orte", "debug", "daemons", '\0', NULL, "debug-daemons", 0, + NULL, OPAL_CMD_LINE_TYPE_INT, + "Enable debugging of any OpenRTE daemons used by this application" }, + + { "orte", "debug", "daemons_file", '\0', NULL, "debug-daemons-file", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Enable debugging of any OpenRTE daemons used by this application, storing output in files" }, + + { "orte", "no_daemonize", NULL, '\0', NULL, "no-daemonize", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Do not detach OpenRTE daemons used by this application" }, + + { "universe", NULL, NULL, '\0', NULL, "universe", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Set the universe name as username@hostname:universe_name for this application" }, + + { NULL, NULL, NULL, '\0', NULL, "tmpdir", 1, + &orte_process_info.tmpdir_base, OPAL_CMD_LINE_TYPE_STRING, + "Set the root for the session directory tree for orterun ONLY" }, + + { NULL, NULL, NULL, '\0', NULL, "prefix", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Prefix where Open MPI is installed on remote nodes" }, + { NULL, NULL, NULL, '\0', NULL, "noprefix", 0, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Disable automatic --prefix behavior" }, + + /* End of list */ + { NULL, NULL, NULL, '\0', NULL, NULL, 0, + NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } +}; + +#if !defined(__WINDOWS__) +extern char** environ; +#endif /* !defined(__WINDOWS__) */ + + +int main(int argc, char *argv[]) +{ + int rc; + int id, iparam; + + /* Setup MCA params */ + + mca_base_param_init(); + orte_register_params(false); + + /* find our basename (the name of the executable) so that we can + use it in pretty-print error messages */ + orterun_basename = opal_basename(argv[0]); + + /* Intialize our Open RTE environment */ + /* Set the flag telling orte_init that I am NOT a + * singleton, but am "infrastructure" - prevents setting + * up incorrect infrastructure that only a singleton would + * require + */ + if (ORTE_SUCCESS != (rc = orte_init(true))) { + opal_show_help("help-orterun.txt", "orterun:init-failure", true, + "orte_init()", rc); + return rc; + } + + /* check for daemon flags and push them into the environment + * since this isn't being automatically done + */ + id = mca_base_param_reg_int_name("orte_debug", "daemons", + "Whether to debug the ORTE daemons or not", + false, false, (int)false, &iparam); + if (iparam) { + char *tmp = mca_base_param_environ_variable("orte", "debug", "daemons"); + if (ORTE_SUCCESS != (rc = opal_setenv(tmp, "1", true, &environ))) { + opal_show_help("help-orterun.txt", "orterun:environ", false, + orterun_basename, tmp, "1", rc); + free(tmp); + return rc; + } + free(tmp); + } + id = mca_base_param_reg_int_name("orte", "debug", + "Top-level ORTE debug switch", + false, false, 0, &iparam); + if (iparam) { + char *tmp = mca_base_param_environ_variable("orte", NULL, "debug"); + if (ORTE_SUCCESS != (rc = opal_setenv(tmp, "1", true, &environ))) { + opal_show_help("help-orterun.txt", "orterun:environ", false, + orterun_basename, tmp, "1", rc); + free(tmp); + return rc; + } + free(tmp); + } + id = mca_base_param_reg_int_name("orte_debug", "daemons_file", + "Whether want stdout/stderr of daemons to go to a file or not", + false, false, 0, &iparam); + if (iparam) { + char *tmp = mca_base_param_environ_variable("orte", "debug", + "daemons_file"); + if (ORTE_SUCCESS != (rc = opal_setenv(tmp, "1", true, &environ))) { + opal_show_help("help-orterun.txt", "orterun:environ", false, + orterun_basename, tmp, "1", rc); + free(tmp); + return rc; + } + free(tmp); + } + + orte_finalize(); + free(orterun_basename); + return rc; +} diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 4a84809d6e..f9bcfb39a7 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -113,7 +113,6 @@ struct globals_t { bool no_oversubscribe; bool debugger; bool no_local_schedule; - bool displaymapatlaunch; bool reuse_daemons; int num_procs; int exit_status; @@ -201,7 +200,7 @@ opal_cmd_line_init_t cmd_line_init[] = { &orterun_globals.no_oversubscribe, OPAL_CMD_LINE_TYPE_BOOL, "Nodes are not to be oversubscribed, even if the system supports such operation"}, { NULL, NULL, NULL, '\0', "display-map-at-launch", "display-map-at-launch", 0, - &orterun_globals.displaymapatlaunch, OPAL_CMD_LINE_TYPE_BOOL, + NULL, OPAL_CMD_LINE_TYPE_BOOL, "Display the process map just before launch"}, /* mpiexec-like arguments */ @@ -419,6 +418,20 @@ int orterun(int argc, char *argv[]) free(tmp); } + id = mca_base_param_reg_int_name("rmaps_base", "display_map", + "Whether to display the process map after it is computed", + false, false, (int)false, &iparam); + if (iparam) { + char *tmp = mca_base_param_environ_variable("rmaps", "base", "display_map"); + if (ORTE_SUCCESS != (rc = opal_setenv(tmp, "1", true, &environ))) { + opal_show_help("help-orterun.txt", "orterun:environ", false, + orterun_basename, tmp, "1", rc); + free(tmp); + return rc; + } + free(tmp); + } + /* pre-condition any network transports that require it */ if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(apps, num_apps))) { ORTE_ERROR_LOG(rc); @@ -432,14 +445,6 @@ int orterun(int argc, char *argv[]) /* construct the list of attributes */ OBJ_CONSTRUCT(&attributes, opal_list_t); - if (orterun_globals.displaymapatlaunch) { - if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_RMAPS_DISPLAY_AFTER_MAP, - ORTE_UNDEF, NULL, ORTE_RMGR_ATTR_OVERRIDE))) { - opal_show_help("help-orterun.txt", "orterun:attr-failed", false, - orterun_basename, NULL, NULL, rc); - } - } - /** setup callbacks for abort signals */ opal_signal_set(&term_handler, SIGTERM, abort_signal_callback, &term_handler); @@ -858,7 +863,6 @@ static int init_globals(void) orterun_globals.no_oversubscribe = false; orterun_globals.debugger = false; orterun_globals.no_local_schedule = false; - orterun_globals.displaymapatlaunch = false; orterun_globals.num_procs = 0; orterun_globals.exit_status = 0; if( NULL != orterun_globals.hostfile )