2005-03-14 23:57:21 +03:00
/* -*- C -*-
*
2006-02-16 23:40:23 +03:00
* Copyright ( c ) 2004 - 2006 The Trustees of Indiana University and Indiana
2005-11-05 22:57:48 +03:00
* University Research and Technology
* Corporation . All rights reserved .
2006-08-24 20:18:42 +04:00
* Copyright ( c ) 2004 - 2006 The University of Tennessee and The University
2005-11-05 22:57:48 +03:00
* of Tennessee Research Foundation . All rights
* reserved .
2005-09-20 21:09:11 +04:00
* Copyright ( c ) 2004 - 2005 High Performance Computing Center Stuttgart ,
2005-03-14 23:57:21 +03:00
* University of Stuttgart . All rights reserved .
2005-03-24 15:43:37 +03:00
* Copyright ( c ) 2004 - 2005 The Regents of the University of California .
* All rights reserved .
2007-01-08 23:25:26 +03:00
* Copyright ( c ) 2006 - 2007 Cisco Systems , Inc . All rights reserved .
2005-03-14 23:57:21 +03:00
* $ COPYRIGHT $
2005-09-20 21:09:11 +04:00
*
2005-03-14 23:57:21 +03:00
* Additional copyrights may follow
2005-09-20 21:09:11 +04:00
*
2005-03-14 23:57:21 +03:00
* $ HEADER $
*/
# include "orte_config.h"
# include <stdio.h>
# ifdef HAVE_UNISTD_H
# include <unistd.h>
# endif
# ifdef HAVE_SYS_PARAM_H
# include <sys/param.h>
# endif
# include <errno.h>
# include <signal.h>
# include <ctype.h>
2005-12-18 01:05:10 +03:00
# ifdef HAVE_SYS_TYPES_H
2005-04-01 04:30:37 +04:00
# include <sys/types.h>
2005-12-18 01:05:10 +03:00
# endif /* HAVE_SYS_TYPES_H */
# ifdef HAVE_SYS_WAIT_H
2005-04-01 04:30:37 +04:00
# include <sys/wait.h>
2005-12-18 01:05:10 +03:00
# endif /* HAVE_SYS_WAIT_H */
2006-03-23 19:53:11 +03:00
# ifdef HAVE_LIBGEN_H
2006-02-28 14:52:12 +03:00
# include <libgen.h>
# endif
2005-03-14 23:57:21 +03:00
2005-07-04 03:09:55 +04:00
# include "opal/event/event.h"
2006-09-15 06:52:08 +04:00
# include "opal/install_dirs.h"
2005-09-19 21:20:01 +04:00
# include "opal/mca/base/base.h"
# include "opal/threads/condition.h"
2005-07-04 04:13:44 +04:00
# include "opal/util/argv.h"
2005-09-19 21:20:01 +04:00
# include "opal/util/basename.h"
2005-07-04 04:13:44 +04:00
# include "opal/util/cmd_line.h"
2005-09-19 21:20:01 +04:00
# include "opal/util/opal_environ.h"
2005-07-04 03:31:27 +04:00
# include "opal/util/output.h"
2005-07-04 06:38:44 +04:00
# include "opal/util/show_help.h"
2005-09-19 21:20:01 +04:00
# include "opal/util/trace.h"
2006-06-09 21:21:23 +04:00
# include "opal/version.h"
2005-09-19 21:20:01 +04:00
2006-02-12 04:33:29 +03:00
# include "orte/orte_constants.h"
2005-09-19 21:20:01 +04:00
# include "orte/class/orte_pointer_array.h"
# include "orte/util/proc_info.h"
# include "orte/util/sys_info.h"
# include "orte/util/universe_setup_file_io.h"
2006-09-14 19:27:17 +04:00
# include "orte/util/pre_condition_transports.h"
2005-03-14 23:57:21 +03:00
2005-09-19 21:20:01 +04:00
# include "orte/mca/ns/ns.h"
# include "orte/mca/gpr/gpr.h"
2006-09-15 01:29:51 +04:00
# include "orte/mca/pls/pls.h"
2006-11-01 01:16:51 +03:00
# include "orte/mca/rmaps/rmaps_types.h"
2005-09-19 21:20:01 +04:00
# include "orte/mca/rmgr/rmgr.h"
# include "orte/mca/schema/schema.h"
2006-09-15 01:29:51 +04:00
# include "orte/mca/smr/smr.h"
2005-09-19 21:20:01 +04:00
# include "orte/mca/errmgr/errmgr.h"
2005-03-14 23:57:21 +03:00
2005-09-19 21:20:01 +04:00
# include "orte/runtime/runtime.h"
# include "orte/runtime/orte_wait.h"
2005-03-14 23:57:21 +03:00
2005-08-31 20:15:59 +04:00
# include "orterun.h"
2005-08-31 04:47:52 +04:00
# include "totalview.h"
2005-03-14 23:57:21 +03:00
/*
* Globals
*/
2005-07-04 03:09:55 +04:00
static struct opal_event term_handler ;
static struct opal_event int_handler ;
2006-07-11 09:24:08 +04:00
# ifndef __WINDOWS__
2006-06-08 22:27:17 +04:00
static struct opal_event sigusr1_handler ;
static struct opal_event sigusr2_handler ;
2006-07-11 09:24:08 +04:00
# endif /* __WINDOWS__ */
2006-10-02 04:46:31 +04:00
static orte_jobid_t jobid = ORTE_JOBID_INVALID ;
2005-07-03 08:02:01 +04:00
static orte_pointer_array_t * apps_pa ;
2005-03-14 23:57:21 +03:00
static bool wait_for_job_completion = true ;
2005-04-12 20:01:30 +04:00
static char * orterun_basename = NULL ;
2005-04-16 01:52:58 +04:00
static int max_display_aborted = 1 ;
static int num_aborted = 0 ;
static int num_killed = 0 ;
2005-08-08 20:42:28 +04:00
static char * * global_mca_env = NULL ;
2006-07-11 01:25:33 +04:00
static bool have_zero_np = false ;
2006-08-15 23:54:10 +04:00
static orte_std_cntr_t total_num_apps = 0 ;
2006-09-15 06:52:08 +04:00
static bool want_prefix_by_default = ( bool ) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT ;
2005-03-14 23:57:21 +03:00
/*
* setup globals for catching orterun command line options
*/
struct globals_t {
bool help ;
2006-06-09 21:21:23 +04:00
bool version ;
2005-03-14 23:57:21 +03:00
bool verbose ;
2006-06-26 22:21:45 +04:00
bool quiet ;
2005-03-14 23:57:21 +03:00
bool exit ;
bool no_wait_for_job_completion ;
(copied from a mail that has a lengthy description of this commit)
I spoke with Tim about this the other day -- he gave me the green
light to go ahead with this, but it turned into a bigger job than I
thought it would be. I revamped how the default RAS scheduling and
round_robin RMAPS mapping occurs. The previous algorithms were pretty
brain dead, and ignored the "slots" and "max_slots" tokens in
hostfiles. I considered this a big enough problem to fix it for the
beta (because there is currently no way to control where processes are
launched on SMPs).
There's still some more bells and whistles that I'd like to implement,
but there's no hurry, and they can go on the trunk at any time. My
patches below are for what I considered "essential", and do the
following:
- honor the "slots" and "max-slots" tokens in the hostfile (and all
their synonyms), meaning that we allocate/map until we fill slots,
and if there are still more processes to allocate/map, we keep going
until we fill max-slots (i.e., only oversubscribe a node if we have
to).
- offer two different algorithms, currently supported by two new
options to orterun. Remember that there are two parts here -- slot
allocation and process mapping. Slot allocation controls how many
processes we'll be running on a node. After that decision has been
made, process mapping effectively controls where the ranks of
MPI_COMM_WORLD (MCW) are placed. Some of the examples given below
don't make sense unless you remember that there is a difference
between the two (which makes total sense, but you have to think
about it in terms of both things):
1. "-bynode": allocates/maps one process per node in a round-robin
fashion until all slots on the node are taken. If we still have more
processes after all slots are taken, then keep going until all
max-slots are taken. Examples:
- The hostfile:
eddie slots=2 max-slots=4
vogon slots=4 max-slots=8
- orterun -bynode -np 6 -hostfile hostfile a.out
eddie: MCW ranks 0, 2
vogon: MCW ranks 1, 3, 4, 5
- orterun -bynode -np 8 -hostfile hostfile a.out
eddie: MCW ranks 0, 2, 4
vogon: MCW ranks 1, 3, 5, 6, 7
-> the algorithm oversubscribes all nodes "equally" (until each
node's max_slots is hit, of course)
- orterun -bynode -np 12 -hostfile hostfile a.out
eddie: MCW ranks 0, 2, 4, 6
vogon: MCW ranks 1, 3, 5, 7, 8, 9, 10, 11
2. "-byslot" (this is the default if you don't specify -bynode):
greedily takes all available slots on a node for a job before moving
on to the next node. If we still have processes to allocate/schedule,
then oversubscribe all nodes equally (i.e., go round robin on all
nodes until each node's max_slots is hit). Examples:
- The hostfile
eddie slots=2 max-slots=4
vogon slots=4 max-slots=8
- orterun -np 6 -hostfile hostfile a.out
eddie: MCW ranks 0, 1
vogon: MCW ranks 2, 3, 4, 5
- orterun -np 8 -hostfile hostfile a.out
eddie: MCW ranks 0, 1, 2
vogon: MCW ranks 3, 4, 5, 6, 7
-> the algorithm oversubscribes all nodes "equally" (until max_slots
is hit)
- orterun -np 12 -hostfile hostfile a.out
eddie: MCW ranks 0, 1, 2, 3
vogon: MCW ranks 4, 5, 6, 7, 8, 9, 10, 11
The above examples are fairly contrived, and it's not clear from them
that you can get different allocation answers in all cases (the
mapping differences are obvious). Consider the following allocation
example:
- The hostfile
eddie count=4
vogon count=4
earth count=4
deep-thought count=4
- orterun -np 8 -hostfile hostfile a.out
eddie: 4 slots will be allocated
vogon: 4 slots will be allocated
earth: no slots allocated
deep-thought: no slots allocated
- orterun -bynode -np 8 -hostfile hostfile a.out
eddie: 2 slots will be allocated
vogon: 2 slots will be allocated
earth: 2 slots will be allocated
deep-thought: 2 slots will be allocated
This commit was SVN r5894.
2005-05-31 20:36:53 +04:00
bool by_node ;
bool by_slot ;
2006-12-13 07:51:38 +03:00
bool do_not_launch ;
2005-11-20 19:06:53 +03:00
bool debugger ;
2006-09-25 23:41:54 +04:00
int num_procs ;
2005-04-29 04:36:07 +04:00
int exit_status ;
2005-03-14 23:57:21 +03:00
char * hostfile ;
char * env_val ;
char * appfile ;
char * wdir ;
char * path ;
2005-07-04 02:45:48 +04:00
opal_mutex_t lock ;
opal_condition_t cond ;
2005-03-14 23:57:21 +03:00
} orterun_globals ;
2005-03-19 02:58:36 +03:00
static bool globals_init = false ;
2005-03-14 23:57:21 +03:00
2005-07-04 04:13:44 +04:00
opal_cmd_line_init_t cmd_line_init [ ] = {
2005-03-14 23:57:21 +03:00
/* Various "obvious" options */
2005-09-05 00:54:19 +04:00
{ NULL , NULL , NULL , ' h ' , NULL , " help " , 0 ,
2005-07-04 04:13:44 +04:00
& orterun_globals . help , OPAL_CMD_LINE_TYPE_BOOL ,
2005-03-14 23:57:21 +03:00
" This help message " } ,
2006-06-09 21:21:23 +04:00
{ NULL , NULL , NULL , ' V ' , NULL , " version " , 0 ,
& orterun_globals . version , OPAL_CMD_LINE_TYPE_BOOL ,
" Print version and exit " } ,
2005-03-14 23:57:21 +03:00
{ NULL , NULL , NULL , ' v ' , NULL , " verbose " , 0 ,
2005-07-04 04:13:44 +04:00
& orterun_globals . verbose , OPAL_CMD_LINE_TYPE_BOOL ,
2005-03-14 23:57:21 +03:00
" Be verbose " } ,
2006-06-26 22:21:45 +04:00
{ NULL , NULL , NULL , ' q ' , NULL , " quiet " , 0 ,
& orterun_globals . quiet , OPAL_CMD_LINE_TYPE_BOOL ,
" Suppress helpful messages " } ,
2005-03-14 23:57:21 +03:00
/* Use an appfile */
{ NULL , NULL , NULL , ' \0 ' , NULL , " app " , 1 ,
2005-07-04 04:13:44 +04:00
& orterun_globals . appfile , OPAL_CMD_LINE_TYPE_STRING ,
2005-03-14 23:57:21 +03:00
" Provide an appfile; ignore all other command line options " } ,
/* Number of processes; -c, -n, --n, -np, and --np are all
synonyms */
{ NULL , NULL , NULL , ' c ' , " np " , " np " , 1 ,
2006-09-25 23:41:54 +04:00
& orterun_globals . num_procs , OPAL_CMD_LINE_TYPE_INT ,
2005-03-14 23:57:21 +03:00
" Number of processes to run " } ,
{ NULL , NULL , NULL , ' \0 ' , " n " , " n " , 1 ,
2006-09-25 23:41:54 +04:00
& orterun_globals . num_procs , OPAL_CMD_LINE_TYPE_INT ,
2005-03-14 23:57:21 +03:00
" Number of processes to run " } ,
2006-07-11 01:25:33 +04:00
2005-03-14 23:57:21 +03:00
/* Set a hostfile */
2005-03-19 02:40:08 +03:00
{ " rds " , " hostfile " , " path " , ' \0 ' , " hostfile " , " hostfile " , 1 ,
2005-07-04 04:13:44 +04:00
NULL , OPAL_CMD_LINE_TYPE_STRING ,
2005-03-19 02:40:08 +03:00
" Provide a hostfile " } ,
{ " rds " , " hostfile " , " path " , ' \0 ' , " machinefile " , " machinefile " , 1 ,
2005-07-04 04:13:44 +04:00
NULL , OPAL_CMD_LINE_TYPE_STRING ,
2005-03-14 23:57:21 +03:00
" Provide a hostfile " } ,
/* Don't wait for the process to finish before exiting */
2007-01-16 19:10:31 +03:00
#if 0
2005-03-14 23:57:21 +03:00
{ NULL , NULL , NULL , ' \0 ' , " nw " , " nw " , 0 ,
2005-07-04 04:13:44 +04:00
& orterun_globals . no_wait_for_job_completion , OPAL_CMD_LINE_TYPE_BOOL ,
2005-03-14 23:57:21 +03:00
" Launch the processes and do not wait for their completion (i.e., let orterun complete as soon a successful launch occurs) " } ,
2007-01-16 19:10:31 +03:00
# endif
2005-04-16 01:52:58 +04:00
/* Set the max number of aborted processes to show */
{ NULL , NULL , NULL , ' \0 ' , " aborted " , " aborted " , 1 ,
2005-07-04 04:13:44 +04:00
& max_display_aborted , OPAL_CMD_LINE_TYPE_INT ,
2005-04-16 01:52:58 +04:00
" The maximum number of aborted processes to display " } ,
2005-03-14 23:57:21 +03:00
/* Export environment variables; potentially used multiple times,
so it does not make sense to set into a variable */
{ NULL , NULL , NULL , ' x ' , NULL , NULL , 1 ,
2005-07-04 04:13:44 +04:00
NULL , OPAL_CMD_LINE_TYPE_NULL ,
2005-03-14 23:57:21 +03:00
" Export an environment variable, optionally specifying a value (e.g., \" -x foo \" exports the environment variable foo and takes its value from the current environment; \" -x foo=bar \" exports the environment variable name foo and sets its value to \" bar \" in the started processes) " } ,
/* Specific mapping (C, cX, N, nX) */
2005-10-01 19:51:20 +04:00
#if 0
/* JJH --map is not currently implemented so don't advertise it until it is */
2005-03-14 23:57:21 +03:00
{ NULL , NULL , NULL , ' \0 ' , NULL , " map " , 1 ,
2005-07-04 04:13:44 +04:00
NULL , OPAL_CMD_LINE_TYPE_STRING ,
2005-03-14 23:57:21 +03:00
" Mapping of processes to nodes / CPUs " } ,
2005-10-01 19:51:20 +04:00
# endif
(copied from a mail that has a lengthy description of this commit)
I spoke with Tim about this the other day -- he gave me the green
light to go ahead with this, but it turned into a bigger job than I
thought it would be. I revamped how the default RAS scheduling and
round_robin RMAPS mapping occurs. The previous algorithms were pretty
brain dead, and ignored the "slots" and "max_slots" tokens in
hostfiles. I considered this a big enough problem to fix it for the
beta (because there is currently no way to control where processes are
launched on SMPs).
There's still some more bells and whistles that I'd like to implement,
but there's no hurry, and they can go on the trunk at any time. My
patches below are for what I considered "essential", and do the
following:
- honor the "slots" and "max-slots" tokens in the hostfile (and all
their synonyms), meaning that we allocate/map until we fill slots,
and if there are still more processes to allocate/map, we keep going
until we fill max-slots (i.e., only oversubscribe a node if we have
to).
- offer two different algorithms, currently supported by two new
options to orterun. Remember that there are two parts here -- slot
allocation and process mapping. Slot allocation controls how many
processes we'll be running on a node. After that decision has been
made, process mapping effectively controls where the ranks of
MPI_COMM_WORLD (MCW) are placed. Some of the examples given below
don't make sense unless you remember that there is a difference
between the two (which makes total sense, but you have to think
about it in terms of both things):
1. "-bynode": allocates/maps one process per node in a round-robin
fashion until all slots on the node are taken. If we still have more
processes after all slots are taken, then keep going until all
max-slots are taken. Examples:
- The hostfile:
eddie slots=2 max-slots=4
vogon slots=4 max-slots=8
- orterun -bynode -np 6 -hostfile hostfile a.out
eddie: MCW ranks 0, 2
vogon: MCW ranks 1, 3, 4, 5
- orterun -bynode -np 8 -hostfile hostfile a.out
eddie: MCW ranks 0, 2, 4
vogon: MCW ranks 1, 3, 5, 6, 7
-> the algorithm oversubscribes all nodes "equally" (until each
node's max_slots is hit, of course)
- orterun -bynode -np 12 -hostfile hostfile a.out
eddie: MCW ranks 0, 2, 4, 6
vogon: MCW ranks 1, 3, 5, 7, 8, 9, 10, 11
2. "-byslot" (this is the default if you don't specify -bynode):
greedily takes all available slots on a node for a job before moving
on to the next node. If we still have processes to allocate/schedule,
then oversubscribe all nodes equally (i.e., go round robin on all
nodes until each node's max_slots is hit). Examples:
- The hostfile
eddie slots=2 max-slots=4
vogon slots=4 max-slots=8
- orterun -np 6 -hostfile hostfile a.out
eddie: MCW ranks 0, 1
vogon: MCW ranks 2, 3, 4, 5
- orterun -np 8 -hostfile hostfile a.out
eddie: MCW ranks 0, 1, 2
vogon: MCW ranks 3, 4, 5, 6, 7
-> the algorithm oversubscribes all nodes "equally" (until max_slots
is hit)
- orterun -np 12 -hostfile hostfile a.out
eddie: MCW ranks 0, 1, 2, 3
vogon: MCW ranks 4, 5, 6, 7, 8, 9, 10, 11
The above examples are fairly contrived, and it's not clear from them
that you can get different allocation answers in all cases (the
mapping differences are obvious). Consider the following allocation
example:
- The hostfile
eddie count=4
vogon count=4
earth count=4
deep-thought count=4
- orterun -np 8 -hostfile hostfile a.out
eddie: 4 slots will be allocated
vogon: 4 slots will be allocated
earth: no slots allocated
deep-thought: no slots allocated
- orterun -bynode -np 8 -hostfile hostfile a.out
eddie: 2 slots will be allocated
vogon: 2 slots will be allocated
earth: 2 slots will be allocated
deep-thought: 2 slots will be allocated
This commit was SVN r5894.
2005-05-31 20:36:53 +04:00
{ NULL , NULL , NULL , ' \0 ' , " bynode " , " bynode " , 0 ,
2005-07-04 04:13:44 +04:00
& orterun_globals . by_node , OPAL_CMD_LINE_TYPE_BOOL ,
(copied from a mail that has a lengthy description of this commit)
I spoke with Tim about this the other day -- he gave me the green
light to go ahead with this, but it turned into a bigger job than I
thought it would be. I revamped how the default RAS scheduling and
round_robin RMAPS mapping occurs. The previous algorithms were pretty
brain dead, and ignored the "slots" and "max_slots" tokens in
hostfiles. I considered this a big enough problem to fix it for the
beta (because there is currently no way to control where processes are
launched on SMPs).
There's still some more bells and whistles that I'd like to implement,
but there's no hurry, and they can go on the trunk at any time. My
patches below are for what I considered "essential", and do the
following:
- honor the "slots" and "max-slots" tokens in the hostfile (and all
their synonyms), meaning that we allocate/map until we fill slots,
and if there are still more processes to allocate/map, we keep going
until we fill max-slots (i.e., only oversubscribe a node if we have
to).
- offer two different algorithms, currently supported by two new
options to orterun. Remember that there are two parts here -- slot
allocation and process mapping. Slot allocation controls how many
processes we'll be running on a node. After that decision has been
made, process mapping effectively controls where the ranks of
MPI_COMM_WORLD (MCW) are placed. Some of the examples given below
don't make sense unless you remember that there is a difference
between the two (which makes total sense, but you have to think
about it in terms of both things):
1. "-bynode": allocates/maps one process per node in a round-robin
fashion until all slots on the node are taken. If we still have more
processes after all slots are taken, then keep going until all
max-slots are taken. Examples:
- The hostfile:
eddie slots=2 max-slots=4
vogon slots=4 max-slots=8
- orterun -bynode -np 6 -hostfile hostfile a.out
eddie: MCW ranks 0, 2
vogon: MCW ranks 1, 3, 4, 5
- orterun -bynode -np 8 -hostfile hostfile a.out
eddie: MCW ranks 0, 2, 4
vogon: MCW ranks 1, 3, 5, 6, 7
-> the algorithm oversubscribes all nodes "equally" (until each
node's max_slots is hit, of course)
- orterun -bynode -np 12 -hostfile hostfile a.out
eddie: MCW ranks 0, 2, 4, 6
vogon: MCW ranks 1, 3, 5, 7, 8, 9, 10, 11
2. "-byslot" (this is the default if you don't specify -bynode):
greedily takes all available slots on a node for a job before moving
on to the next node. If we still have processes to allocate/schedule,
then oversubscribe all nodes equally (i.e., go round robin on all
nodes until each node's max_slots is hit). Examples:
- The hostfile
eddie slots=2 max-slots=4
vogon slots=4 max-slots=8
- orterun -np 6 -hostfile hostfile a.out
eddie: MCW ranks 0, 1
vogon: MCW ranks 2, 3, 4, 5
- orterun -np 8 -hostfile hostfile a.out
eddie: MCW ranks 0, 1, 2
vogon: MCW ranks 3, 4, 5, 6, 7
-> the algorithm oversubscribes all nodes "equally" (until max_slots
is hit)
- orterun -np 12 -hostfile hostfile a.out
eddie: MCW ranks 0, 1, 2, 3
vogon: MCW ranks 4, 5, 6, 7, 8, 9, 10, 11
The above examples are fairly contrived, and it's not clear from them
that you can get different allocation answers in all cases (the
mapping differences are obvious). Consider the following allocation
example:
- The hostfile
eddie count=4
vogon count=4
earth count=4
deep-thought count=4
- orterun -np 8 -hostfile hostfile a.out
eddie: 4 slots will be allocated
vogon: 4 slots will be allocated
earth: no slots allocated
deep-thought: no slots allocated
- orterun -bynode -np 8 -hostfile hostfile a.out
eddie: 2 slots will be allocated
vogon: 2 slots will be allocated
earth: 2 slots will be allocated
deep-thought: 2 slots will be allocated
This commit was SVN r5894.
2005-05-31 20:36:53 +04:00
" Whether to allocate/map processes round-robin by node " } ,
{ NULL , NULL , NULL , ' \0 ' , " byslot " , " byslot " , 0 ,
2005-07-04 04:13:44 +04:00
& orterun_globals . by_slot , OPAL_CMD_LINE_TYPE_BOOL ,
(copied from a mail that has a lengthy description of this commit)
I spoke with Tim about this the other day -- he gave me the green
light to go ahead with this, but it turned into a bigger job than I
thought it would be. I revamped how the default RAS scheduling and
round_robin RMAPS mapping occurs. The previous algorithms were pretty
brain dead, and ignored the "slots" and "max_slots" tokens in
hostfiles. I considered this a big enough problem to fix it for the
beta (because there is currently no way to control where processes are
launched on SMPs).
There's still some more bells and whistles that I'd like to implement,
but there's no hurry, and they can go on the trunk at any time. My
patches below are for what I considered "essential", and do the
following:
- honor the "slots" and "max-slots" tokens in the hostfile (and all
their synonyms), meaning that we allocate/map until we fill slots,
and if there are still more processes to allocate/map, we keep going
until we fill max-slots (i.e., only oversubscribe a node if we have
to).
- offer two different algorithms, currently supported by two new
options to orterun. Remember that there are two parts here -- slot
allocation and process mapping. Slot allocation controls how many
processes we'll be running on a node. After that decision has been
made, process mapping effectively controls where the ranks of
MPI_COMM_WORLD (MCW) are placed. Some of the examples given below
don't make sense unless you remember that there is a difference
between the two (which makes total sense, but you have to think
about it in terms of both things):
1. "-bynode": allocates/maps one process per node in a round-robin
fashion until all slots on the node are taken. If we still have more
processes after all slots are taken, then keep going until all
max-slots are taken. Examples:
- The hostfile:
eddie slots=2 max-slots=4
vogon slots=4 max-slots=8
- orterun -bynode -np 6 -hostfile hostfile a.out
eddie: MCW ranks 0, 2
vogon: MCW ranks 1, 3, 4, 5
- orterun -bynode -np 8 -hostfile hostfile a.out
eddie: MCW ranks 0, 2, 4
vogon: MCW ranks 1, 3, 5, 6, 7
-> the algorithm oversubscribes all nodes "equally" (until each
node's max_slots is hit, of course)
- orterun -bynode -np 12 -hostfile hostfile a.out
eddie: MCW ranks 0, 2, 4, 6
vogon: MCW ranks 1, 3, 5, 7, 8, 9, 10, 11
2. "-byslot" (this is the default if you don't specify -bynode):
greedily takes all available slots on a node for a job before moving
on to the next node. If we still have processes to allocate/schedule,
then oversubscribe all nodes equally (i.e., go round robin on all
nodes until each node's max_slots is hit). Examples:
- The hostfile
eddie slots=2 max-slots=4
vogon slots=4 max-slots=8
- orterun -np 6 -hostfile hostfile a.out
eddie: MCW ranks 0, 1
vogon: MCW ranks 2, 3, 4, 5
- orterun -np 8 -hostfile hostfile a.out
eddie: MCW ranks 0, 1, 2
vogon: MCW ranks 3, 4, 5, 6, 7
-> the algorithm oversubscribes all nodes "equally" (until max_slots
is hit)
- orterun -np 12 -hostfile hostfile a.out
eddie: MCW ranks 0, 1, 2, 3
vogon: MCW ranks 4, 5, 6, 7, 8, 9, 10, 11
The above examples are fairly contrived, and it's not clear from them
that you can get different allocation answers in all cases (the
mapping differences are obvious). Consider the following allocation
example:
- The hostfile
eddie count=4
vogon count=4
earth count=4
deep-thought count=4
- orterun -np 8 -hostfile hostfile a.out
eddie: 4 slots will be allocated
vogon: 4 slots will be allocated
earth: no slots allocated
deep-thought: no slots allocated
- orterun -bynode -np 8 -hostfile hostfile a.out
eddie: 2 slots will be allocated
vogon: 2 slots will be allocated
earth: 2 slots will be allocated
deep-thought: 2 slots will be allocated
This commit was SVN r5894.
2005-05-31 20:36:53 +04:00
" Whether to allocate/map processes round-robin by slot (the default) " } ,
2007-01-17 17:56:22 +03:00
{ " rmaps " , " base " , " pernode " , ' \0 ' , " pernode " , " pernode " , 0 ,
2006-12-13 07:51:38 +03:00
NULL , OPAL_CMD_LINE_TYPE_BOOL ,
2006-12-12 03:54:05 +03:00
" Launch one process per available node on the specified number of nodes [no -np => use all allocated nodes] " } ,
2007-01-17 17:56:22 +03:00
{ " rmaps " , " base " , " n_pernode " , ' \0 ' , " npernode " , " npernode " , 1 ,
2006-12-13 07:51:38 +03:00
NULL , OPAL_CMD_LINE_TYPE_INT ,
2006-12-12 03:54:05 +03:00
" Launch n processes per node on all allocated nodes " } ,
2007-01-17 17:56:22 +03:00
{ " rmaps " , " base " , " no_oversubscribe " , ' \0 ' , " nooversubscribe " , " nooversubscribe " , 0 ,
2006-12-13 07:51:38 +03:00
NULL , OPAL_CMD_LINE_TYPE_BOOL ,
2006-07-11 01:25:33 +04:00
" Nodes are not to be oversubscribed, even if the system supports such operation " } ,
2006-12-13 16:49:15 +03:00
{ " rmaps " , " base " , " display_map " , ' \0 ' , " display-map " , " display-map " , 0 ,
2006-12-03 16:59:23 +03:00
NULL , OPAL_CMD_LINE_TYPE_BOOL ,
" Display the process map just before launch " } ,
2006-11-01 01:16:51 +03:00
2005-03-14 23:57:21 +03:00
/* mpiexec-like arguments */
{ NULL , NULL , NULL , ' \0 ' , " wdir " , " wdir " , 1 ,
2005-07-04 04:13:44 +04:00
& orterun_globals . wdir , OPAL_CMD_LINE_TYPE_STRING ,
2005-03-14 23:57:21 +03:00
" Set the working directory of the started processes " } ,
{ NULL , NULL , NULL , ' \0 ' , " path " , " path " , 1 ,
2005-07-04 04:13:44 +04:00
& orterun_globals . path , OPAL_CMD_LINE_TYPE_STRING ,
2005-03-14 23:57:21 +03:00
" PATH to be used to look for executables to start processes " } ,
/* These arguments can be specified multiple times */
2005-09-20 12:56:02 +04:00
#if 0
/* JMS: Removed because it's not really implemented */
2005-03-14 23:57:21 +03:00
{ NULL , NULL , NULL , ' \0 ' , " arch " , " arch " , 1 ,
2005-07-04 04:13:44 +04:00
NULL , OPAL_CMD_LINE_TYPE_STRING ,
2005-03-14 23:57:21 +03:00
" Architecture to start processes on " } ,
2005-09-20 12:56:02 +04:00
# endif
2005-03-14 23:57:21 +03:00
{ NULL , NULL , NULL , ' H ' , " host " , " host " , 1 ,
2005-07-04 04:13:44 +04:00
NULL , OPAL_CMD_LINE_TYPE_STRING ,
2005-03-14 23:57:21 +03:00
" List of hosts to invoke processes on " } ,
2005-11-20 19:06:53 +03:00
2006-07-05 00:12:35 +04:00
/* OSC mpiexec-like arguments */
{ NULL , NULL , NULL , ' \0 ' , " nolocal " , " nolocal " , 0 ,
2006-12-13 07:51:38 +03:00
NULL , OPAL_CMD_LINE_TYPE_BOOL ,
2006-07-05 00:12:35 +04:00
" Do not run any MPI applications on the local node " } ,
2005-11-20 19:06:53 +03:00
/* User-level debugger arguments */
{ NULL , NULL , NULL , ' \0 ' , " tv " , " tv " , 0 ,
& orterun_globals . debugger , OPAL_CMD_LINE_TYPE_BOOL ,
" Deprecated backwards compatibility flag; synonym for \" --debug \" " } ,
{ NULL , NULL , NULL , ' \0 ' , " debug " , " debug " , 0 ,
& orterun_globals . debugger , OPAL_CMD_LINE_TYPE_BOOL ,
" Invoke the user-level debugger indicated by the orte_base_user_debugger MCA parameter " } ,
{ " orte " , " base " , " user_debugger " , ' \0 ' , " debugger " , " debugger " , 1 ,
NULL , OPAL_CMD_LINE_TYPE_STRING ,
" Sequence of debuggers to search for when \" --debug \" is used " } ,
2005-05-13 01:44:23 +04:00
/* OpenRTE arguments */
2005-11-20 19:06:53 +03:00
{ " orte " , " debug " , NULL , ' d ' , NULL , " debug-devel " , 0 ,
2005-07-04 04:13:44 +04:00
NULL , OPAL_CMD_LINE_TYPE_BOOL ,
2005-05-13 01:44:23 +04:00
" Enable debugging of OpenRTE " } ,
2006-10-11 19:18:57 +04:00
2005-05-13 01:44:23 +04:00
{ " orte " , " debug " , " daemons " , ' \0 ' , NULL , " debug-daemons " , 0 ,
2005-07-04 04:13:44 +04:00
NULL , OPAL_CMD_LINE_TYPE_INT ,
2005-05-13 01:44:23 +04:00
" Enable debugging of any OpenRTE daemons used by this application " } ,
2006-10-11 19:18:57 +04:00
2005-05-13 01:44:23 +04:00
{ " orte " , " debug " , " daemons_file " , ' \0 ' , NULL , " debug-daemons-file " , 0 ,
2005-07-04 04:13:44 +04:00
NULL , OPAL_CMD_LINE_TYPE_BOOL ,
2005-05-13 01:44:23 +04:00
" Enable debugging of any OpenRTE daemons used by this application, storing output in files " } ,
2006-10-11 19:18:57 +04:00
2005-05-24 19:02:50 +04:00
{ " orte " , " no_daemonize " , NULL , ' \0 ' , NULL , " no-daemonize " , 0 ,
2005-07-04 04:13:44 +04:00
NULL , OPAL_CMD_LINE_TYPE_BOOL ,
2005-05-24 19:02:50 +04:00
" Do not detach OpenRTE daemons used by this application " } ,
2006-10-11 19:18:57 +04:00
2005-05-13 01:44:23 +04:00
{ " universe " , NULL , NULL , ' \0 ' , NULL , " universe " , 1 ,
2005-07-04 04:13:44 +04:00
NULL , OPAL_CMD_LINE_TYPE_STRING ,
2005-05-13 01:44:23 +04:00
" Set the universe name as username@hostname:universe_name for this application " } ,
2006-10-11 19:18:57 +04:00
2005-05-13 01:44:23 +04:00
{ NULL , NULL , NULL , ' \0 ' , NULL , " tmpdir " , 1 ,
2005-07-04 04:13:44 +04:00
& orte_process_info . tmpdir_base , OPAL_CMD_LINE_TYPE_STRING ,
2005-05-13 01:44:23 +04:00
" Set the root for the session directory tree for orterun ONLY " } ,
2006-12-13 07:51:38 +03:00
{ NULL , NULL , NULL , ' \0 ' , NULL , " do-not-launch " , 0 ,
& orterun_globals . do_not_launch , OPAL_CMD_LINE_TYPE_BOOL ,
" Perform all necessary operations to prepare to launch the application, but do not actually launch it " } ,
2006-11-16 00:12:27 +03:00
{ NULL , NULL , NULL , ' \0 ' , " reuse-daemons " , " reuse-daemons " , 0 ,
2006-12-13 07:51:38 +03:00
NULL , OPAL_CMD_LINE_TYPE_BOOL ,
2006-12-03 16:59:23 +03:00
" If set, reuse daemons to launch dynamically spawned processes " } ,
2006-11-16 00:12:27 +03:00
2006-02-28 14:52:12 +03:00
{ NULL , NULL , NULL , ' \0 ' , NULL , " prefix " , 1 ,
NULL , OPAL_CMD_LINE_TYPE_STRING ,
" Prefix where Open MPI is installed on remote nodes " } ,
2006-10-06 17:02:56 +04:00
{ NULL , NULL , NULL , ' \0 ' , NULL , " noprefix " , 0 ,
2006-09-15 06:52:08 +04:00
NULL , OPAL_CMD_LINE_TYPE_STRING ,
" Disable automatic --prefix behavior " } ,
2006-03-23 19:53:11 +03:00
2005-03-14 23:57:21 +03:00
/* End of list */
{ NULL , NULL , NULL , ' \0 ' , NULL , NULL , 0 ,
2005-07-04 04:13:44 +04:00
NULL , OPAL_CMD_LINE_TYPE_NULL , NULL }
2005-03-14 23:57:21 +03:00
} ;
2006-08-23 06:35:00 +04:00
# if !defined(__WINDOWS__)
extern char * * environ ;
# endif /* !defined(__WINDOWS__) */
2005-03-14 23:57:21 +03:00
/*
* Local functions
*/
static void exit_callback ( int fd , short event , void * arg ) ;
2006-06-26 19:12:52 +04:00
static void abort_signal_callback ( int fd , short event , void * arg ) ;
static void signal_forward_callback ( int fd , short event , void * arg ) ;
2005-03-14 23:57:21 +03:00
static int create_app ( int argc , char * argv [ ] , orte_app_context_t * * app ,
2005-08-08 20:42:28 +04:00
bool * made_app , char * * * app_env ) ;
2005-03-14 23:57:21 +03:00
static int init_globals ( void ) ;
static int parse_globals ( int argc , char * argv [ ] ) ;
static int parse_locals ( int argc , char * argv [ ] ) ;
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
static int parse_appfile ( char * filename , char * * * env ) ;
2005-03-14 23:57:21 +03:00
static void job_state_callback ( orte_jobid_t jobid , orte_proc_state_t state ) ;
2006-09-15 01:29:51 +04:00
static void dump_aborted_procs ( orte_jobid_t jobid ) ;
2005-03-14 23:57:21 +03:00
2005-08-31 20:15:59 +04:00
int orterun ( int argc , char * argv [ ] )
2005-03-14 23:57:21 +03:00
{
orte_app_context_t * * apps ;
2006-10-09 05:04:00 +04:00
int rc , ret , i , num_apps , array_size ;
2006-02-08 20:40:11 +03:00
orte_proc_state_t cb_states ;
2006-09-15 01:29:51 +04:00
orte_job_state_t exit_state ;
2006-10-17 20:06:17 +04:00
opal_list_t attributes ;
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
opal_list_item_t * item ;
2006-12-13 07:51:38 +03:00
uint8_t flow ;
2005-03-14 23:57:21 +03:00
2005-11-20 19:06:53 +03:00
/* Setup MCA params */
mca_base_param_init ( ) ;
orte_register_params ( false ) ;
2006-06-26 19:12:52 +04:00
/* find our basename (the name of the executable) so that we can
use it in pretty - print error messages */
2005-07-04 04:13:44 +04:00
orterun_basename = opal_basename ( argv [ 0 ] ) ;
2006-06-08 22:27:17 +04:00
2005-03-14 23:57:21 +03:00
/* Check for some "global" command line params */
parse_globals ( argc , argv ) ;
/* If we're still here, parse each app */
parse_locals ( argc , argv ) ;
/* Convert the list of apps to an array of orte_app_context_t
pointers */
2005-07-08 22:48:25 +04:00
array_size = orte_pointer_array_get_size ( apps_pa ) ;
2006-08-23 06:35:00 +04:00
apps = ( orte_app_context_t * * ) malloc ( sizeof ( orte_app_context_t * ) * array_size ) ;
2005-03-14 23:57:21 +03:00
if ( NULL = = apps ) {
2006-02-16 23:40:23 +03:00
opal_show_help ( " help-orterun.txt " , " orterun:call-failed " ,
true , orterun_basename , " system " , " malloc returned NULL " , errno ) ;
2005-03-14 23:57:21 +03:00
exit ( 1 ) ;
}
2005-07-08 22:48:25 +04:00
num_apps = 0 ;
2006-10-02 19:03:43 +04:00
for ( i = 0 ; i < array_size ; + + i ) {
2005-09-05 00:54:19 +04:00
apps [ num_apps ] = ( orte_app_context_t * )
2005-07-03 08:02:01 +04:00
orte_pointer_array_get_item ( apps_pa , i ) ;
2005-08-08 20:42:28 +04:00
if ( NULL ! = apps [ num_apps ] ) {
2005-07-08 22:48:25 +04:00
num_apps + + ;
}
}
if ( 0 = = num_apps ) {
/* This should never happen -- this case should be caught in
create_app ( ) , but let ' s just double check . . . */
2005-09-05 00:54:19 +04:00
opal_show_help ( " help-orterun.txt " , " orterun:nothing-to-do " ,
2005-07-08 22:48:25 +04:00
true , orterun_basename ) ;
exit ( 1 ) ;
2005-04-16 01:52:58 +04:00
}
2005-03-14 23:57:21 +03:00
/* Intialize our Open RTE environment */
2005-08-27 00:13:35 +04:00
/* Set the flag telling orte_init that I am NOT a
2005-06-24 20:59:37 +04:00
* singleton , but am " infrastructure " - prevents setting
* up incorrect infrastructure that only a singleton would
* require
*/
2005-08-27 00:13:35 +04:00
if ( ORTE_SUCCESS ! = ( rc = orte_init ( true ) ) ) {
2005-07-04 06:38:44 +04:00
opal_show_help ( " help-orterun.txt " , " orterun:init-failure " , true ,
2005-03-14 23:57:21 +03:00
" orte_init() " , rc ) ;
return rc ;
}
2006-09-14 19:27:17 +04:00
/* pre-condition any network transports that require it */
if ( ORTE_SUCCESS ! = ( rc = orte_pre_condition_transports ( apps , num_apps ) ) ) {
ORTE_ERROR_LOG ( rc ) ;
opal_show_help ( " help-orterun.txt " , " orterun:precondition " , false ,
orterun_basename , NULL , NULL , rc ) ;
return rc ;
}
2005-09-05 00:54:19 +04:00
/* Prep to start the application */
2006-10-17 20:06:17 +04:00
/* construct the list of attributes */
OBJ_CONSTRUCT ( & attributes , opal_list_t ) ;
2006-12-13 07:51:38 +03:00
if ( orterun_globals . do_not_launch ) {
flow = ORTE_RMGR_SETUP | ORTE_RMGR_RES_DISC | ORTE_RMGR_ALLOC | ORTE_RMGR_MAP | ORTE_RMGR_SETUP_TRIGS ;
orte_rmgr . add_attribute ( & attributes , ORTE_RMGR_SPAWN_FLOW , ORTE_UINT8 , & flow , ORTE_RMGR_ATTR_OVERRIDE ) ;
}
2005-03-14 23:57:21 +03:00
2006-06-08 22:27:17 +04:00
/** setup callbacks for abort signals */
2005-09-20 21:09:11 +04:00
opal_signal_set ( & term_handler , SIGTERM ,
2006-06-26 19:12:52 +04:00
abort_signal_callback , & term_handler ) ;
2005-09-11 03:22:37 +04:00
opal_signal_add ( & term_handler , NULL ) ;
2005-09-20 21:09:11 +04:00
opal_signal_set ( & int_handler , SIGINT ,
2006-06-26 19:12:52 +04:00
abort_signal_callback , & int_handler ) ;
2005-09-11 03:22:37 +04:00
opal_signal_add ( & int_handler , NULL ) ;
2005-03-14 23:57:21 +03:00
2006-07-11 09:24:08 +04:00
# ifndef __WINDOWS__
2006-06-26 19:12:52 +04:00
/** setup callbacks for signals we should foward */
2006-06-08 22:27:17 +04:00
opal_signal_set ( & sigusr1_handler , SIGUSR1 ,
2006-06-26 19:12:52 +04:00
signal_forward_callback , & sigusr1_handler ) ;
2006-06-08 22:27:17 +04:00
opal_signal_add ( & sigusr1_handler , NULL ) ;
opal_signal_set ( & sigusr2_handler , SIGUSR2 ,
2006-06-26 19:12:52 +04:00
signal_forward_callback , & sigusr2_handler ) ;
2006-06-08 22:27:17 +04:00
opal_signal_add ( & sigusr2_handler , NULL ) ;
2006-07-11 09:24:08 +04:00
# endif /* __WINDOWS__ */
2005-08-30 21:29:43 +04:00
orte_totalview_init_before_spawn ( ) ;
2005-03-14 23:57:21 +03:00
/* Spawn the job */
2005-09-05 00:54:19 +04:00
2006-09-15 01:29:51 +04:00
cb_states = ORTE_PROC_STATE_TERMINATED | ORTE_PROC_STATE_AT_STG1 ;
2006-10-17 20:06:17 +04:00
rc = orte_rmgr . spawn_job ( apps , num_apps , & jobid , 0 , NULL , job_state_callback , cb_states , & attributes ) ;
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
while ( NULL ! = ( item = opal_list_remove_first ( & attributes ) ) ) OBJ_RELEASE ( item ) ;
OBJ_DESTRUCT ( & attributes ) ;
2005-03-14 23:57:21 +03:00
if ( ORTE_SUCCESS ! = rc ) {
2007-01-18 20:15:19 +03:00
/* JMS show_help unless it is ERR_SILENT */
if ( ORTE_ERR_SILENT ! = rc ) {
opal_output ( 0 , " %s: spawn failed with errno=%d \n " , orterun_basename , rc ) ;
}
2005-03-14 23:57:21 +03:00
} else {
2005-08-30 21:29:43 +04:00
2006-12-13 07:51:38 +03:00
if ( orterun_globals . do_not_launch ) {
/* we are done! */
goto DONE ;
}
2005-03-14 23:57:21 +03:00
/* Wait for the app to complete */
if ( wait_for_job_completion ) {
2005-07-04 02:45:48 +04:00
OPAL_THREAD_LOCK ( & orterun_globals . lock ) ;
2005-03-14 23:57:21 +03:00
while ( ! orterun_globals . exit ) {
2005-09-05 00:54:19 +04:00
opal_condition_wait ( & orterun_globals . cond ,
2005-03-14 23:57:21 +03:00
& orterun_globals . lock ) ;
}
2006-09-15 01:29:51 +04:00
/* check to see if the job was aborted */
if ( ORTE_SUCCESS ! = ( rc = orte_smr . get_job_state ( & exit_state , jobid ) ) ) {
ORTE_ERROR_LOG ( rc ) ;
/* define the exit state as abnormal by default */
exit_state = ORTE_JOB_STATE_ABORTED ;
}
if ( ORTE_JOB_STATE_TERMINATED ! = exit_state ) {
/* abnormal termination of some kind */
dump_aborted_procs ( jobid ) ;
/* If we showed more abort messages than were allowed,
show a followup message here */
if ( num_aborted > max_display_aborted ) {
i = num_aborted - max_display_aborted ;
printf ( " %d additional process%s aborted (not shown) \n " ,
i , ( ( i > 1 ) ? " es " : " " ) ) ;
}
if ( num_killed > 0 ) {
printf ( " %d process%s killed (possibly by Open MPI) \n " ,
num_killed , ( ( num_killed > 1 ) ? " es " : " " ) ) ;
}
}
2005-04-07 19:57:34 +04:00
/* Make sure we propagate the exit code */
2005-04-29 04:36:07 +04:00
if ( WIFEXITED ( orterun_globals . exit_status ) ) {
rc = WEXITSTATUS ( orterun_globals . exit_status ) ;
} else {
2006-02-16 01:41:29 +03:00
/* If a process was killed by a signal, then make the
* exit code of orterun be " signo + 128 " so that " prog "
* and " orterun prog " will both set the same status
* value for the shell */
rc = WTERMSIG ( orterun_globals . exit_status ) + 128 ;
2005-04-29 04:36:07 +04:00
}
2006-09-15 01:29:51 +04:00
/* the job is complete - now tell the orteds that it is
* okay to finalize and exit , we are done with them
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
* be sure to include any descendants so nothing is
* left hanging
2006-09-15 01:29:51 +04:00
*/
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
OBJ_CONSTRUCT ( & attributes , opal_list_t ) ;
orte_rmgr . add_attribute ( & attributes , ORTE_NS_INCLUDE_DESCENDANTS , ORTE_UNDEF , NULL , ORTE_RMGR_ATTR_OVERRIDE ) ;
if ( ORTE_SUCCESS ! = ( ret = orte_pls . terminate_orteds ( jobid , & attributes ) ) ) {
2006-09-15 01:29:51 +04:00
opal_show_help ( " help-orterun.txt " , " orterun:daemon-die " , false ,
2006-10-09 05:04:00 +04:00
orterun_basename , NULL , NULL , ret ) ;
2006-09-15 01:29:51 +04:00
}
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
while ( NULL ! = ( item = opal_list_remove_first ( & attributes ) ) ) OBJ_RELEASE ( item ) ;
OBJ_DESTRUCT ( & attributes ) ;
2005-07-04 02:45:48 +04:00
OPAL_THREAD_UNLOCK ( & orterun_globals . lock ) ;
2005-04-16 01:52:58 +04:00
2005-03-14 23:57:21 +03:00
}
}
2006-12-13 07:51:38 +03:00
DONE :
2005-03-14 23:57:21 +03:00
for ( i = 0 ; i < num_apps ; + + i ) {
OBJ_RELEASE ( apps [ i ] ) ;
}
free ( apps ) ;
2005-07-03 08:02:01 +04:00
OBJ_RELEASE ( apps_pa ) ;
2006-10-17 20:06:17 +04:00
2005-03-14 23:57:21 +03:00
orte_finalize ( ) ;
2005-04-13 19:26:33 +04:00
free ( orterun_basename ) ;
2005-03-14 23:57:21 +03:00
return rc ;
}
2005-03-31 23:39:02 +04:00
/*
2005-09-05 00:54:19 +04:00
* On abnormal termination - dump the
2005-03-31 23:39:02 +04:00
* exit status of the aborted procs .
*/
static void dump_aborted_procs ( orte_jobid_t jobid )
{
char * segment ;
orte_gpr_value_t * * values = NULL ;
2006-08-15 23:54:10 +04:00
orte_std_cntr_t i , k , num_values = 0 ;
2005-03-31 23:39:02 +04:00
int rc ;
2005-04-29 04:36:07 +04:00
int32_t exit_status = 0 ;
bool exit_status_set ;
2005-03-31 23:39:02 +04:00
char * keys [ ] = {
ORTE_PROC_NAME_KEY ,
ORTE_PROC_PID_KEY ,
ORTE_PROC_RANK_KEY ,
ORTE_PROC_EXIT_CODE_KEY ,
ORTE_NODE_NAME_KEY ,
NULL
} ;
2005-09-20 21:09:11 +04:00
OPAL_TRACE_ARG1 ( 1 , jobid ) ;
2005-03-31 23:39:02 +04:00
/* query the job segment on the registry */
if ( ORTE_SUCCESS ! = ( rc = orte_schema . get_job_segment_name ( & segment , jobid ) ) ) {
ORTE_ERROR_LOG ( rc ) ;
return ;
}
rc = orte_gpr . get (
2005-04-01 02:30:22 +04:00
ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR ,
2005-03-31 23:39:02 +04:00
segment ,
NULL ,
keys ,
& num_values ,
& values
) ;
if ( rc ! = ORTE_SUCCESS ) {
ORTE_ERROR_LOG ( rc ) ;
free ( segment ) ;
return ;
}
2005-04-28 17:18:52 +04:00
for ( i = 0 ; i < num_values ; i + + ) {
2005-03-31 23:39:02 +04:00
orte_gpr_value_t * value = values [ i ] ;
2006-02-07 06:32:36 +03:00
orte_process_name_t name , * nptr ;
pid_t pid = 0 , * pidptr ;
2006-08-15 23:54:10 +04:00
orte_std_cntr_t rank = 0 , * sptr ;
2005-05-01 04:47:35 +04:00
bool rank_found = false ;
2005-03-31 23:39:02 +04:00
char * node_name = NULL ;
2006-02-07 06:32:36 +03:00
orte_exit_code_t * ecptr ;
2005-03-31 23:39:02 +04:00
2005-04-29 04:36:07 +04:00
exit_status = 0 ;
exit_status_set = false ;
2005-03-31 23:39:02 +04:00
for ( k = 0 ; k < value - > cnt ; k + + ) {
orte_gpr_keyval_t * keyval = value - > keyvals [ k ] ;
if ( strcmp ( keyval - > key , ORTE_PROC_NAME_KEY ) = = 0 ) {
2006-02-07 06:32:36 +03:00
if ( ORTE_SUCCESS ! = ( rc = orte_dss . get ( ( void * * ) & nptr , keyval - > value , ORTE_NAME ) ) ) {
ORTE_ERROR_LOG ( rc ) ;
continue ;
}
name = * nptr ;
2005-03-31 23:39:02 +04:00
continue ;
}
if ( strcmp ( keyval - > key , ORTE_PROC_PID_KEY ) = = 0 ) {
2006-02-07 06:32:36 +03:00
if ( ORTE_SUCCESS ! = ( rc = orte_dss . get ( ( void * * ) & pidptr , keyval - > value , ORTE_PID ) ) ) {
ORTE_ERROR_LOG ( rc ) ;
continue ;
}
pid = * pidptr ;
2005-03-31 23:39:02 +04:00
continue ;
}
if ( strcmp ( keyval - > key , ORTE_PROC_RANK_KEY ) = = 0 ) {
2006-08-16 20:35:09 +04:00
if ( ORTE_SUCCESS ! = ( rc = orte_dss . get ( ( void * * ) & sptr , keyval - > value , ORTE_STD_CNTR ) ) ) {
2006-02-07 06:32:36 +03:00
ORTE_ERROR_LOG ( rc ) ;
continue ;
}
2005-05-01 04:47:35 +04:00
rank_found = true ;
2006-02-07 06:32:36 +03:00
rank = * sptr ;
2005-03-31 23:39:02 +04:00
continue ;
}
if ( strcmp ( keyval - > key , ORTE_PROC_EXIT_CODE_KEY ) = = 0 ) {
2006-02-07 06:32:36 +03:00
if ( ORTE_SUCCESS ! = ( rc = orte_dss . get ( ( void * * ) & ecptr , keyval - > value , ORTE_EXIT_CODE ) ) ) {
ORTE_ERROR_LOG ( rc ) ;
continue ;
}
exit_status = * ecptr ;
2005-04-29 04:36:07 +04:00
exit_status_set = true ;
2005-03-31 23:39:02 +04:00
continue ;
}
if ( strcmp ( keyval - > key , ORTE_NODE_NAME_KEY ) = = 0 ) {
2006-02-07 06:32:36 +03:00
node_name = ( char * ) ( keyval - > value - > data ) ;
2005-03-31 23:39:02 +04:00
continue ;
}
}
2005-04-16 01:52:58 +04:00
2006-10-02 19:03:43 +04:00
if ( rank_found ) {
2006-02-16 23:40:23 +03:00
if ( WIFSIGNALED ( exit_status ) ) {
if ( 9 = = WTERMSIG ( exit_status ) ) {
+ + num_killed ;
} else {
if ( num_aborted < max_display_aborted ) {
2006-12-17 23:01:11 +03:00
# ifdef HAVE_STRSIGNAL
opal_show_help ( " help-orterun.txt " , " orterun:proc-aborted-strsignal " , false ,
orterun_basename , ( unsigned long ) rank , ( unsigned long ) pid ,
node_name , WTERMSIG ( exit_status ) ,
strsignal ( WTERMSIG ( exit_status ) ) ) ;
# else
2006-02-16 23:40:23 +03:00
opal_show_help ( " help-orterun.txt " , " orterun:proc-aborted " , false ,
orterun_basename , ( unsigned long ) rank , ( unsigned long ) pid ,
node_name , WTERMSIG ( exit_status ) ) ;
2006-12-17 23:01:11 +03:00
# endif
2006-02-16 23:40:23 +03:00
}
+ + num_aborted ;
2005-04-16 01:52:58 +04:00
}
}
2005-04-28 17:18:52 +04:00
}
2005-04-07 19:57:34 +04:00
2005-05-01 13:53:30 +04:00
/* If we haven't done so already, hold the exit_status so we
can return it when exiting . Specifically , keep the first
2005-04-28 17:18:52 +04:00
non - zero entry . If they all return zero , we ' ll return
2005-05-01 13:53:30 +04:00
zero . We already have the globals . lock ( from
job_state_callback ) , so don ' t try to get it again . */
2005-04-28 17:18:52 +04:00
2005-04-29 04:36:07 +04:00
if ( 0 = = orterun_globals . exit_status & & exit_status_set ) {
orterun_globals . exit_status = exit_status ;
2005-03-31 23:39:02 +04:00
}
2005-04-28 17:18:52 +04:00
2005-03-31 23:39:02 +04:00
OBJ_RELEASE ( value ) ;
}
2005-04-28 17:18:52 +04:00
if ( NULL ! = values ) {
2005-03-31 23:39:02 +04:00
free ( values ) ;
}
2005-09-03 05:22:11 +04:00
free ( segment ) ;
2005-03-31 23:39:02 +04:00
}
2005-03-14 23:57:21 +03:00
/*
* signal main thread when application completes
*/
static void job_state_callback ( orte_jobid_t jobid , orte_proc_state_t state )
{
2005-09-20 21:09:11 +04:00
OPAL_TRACE_ARG2 ( 1 , jobid , state ) ;
2005-07-04 02:45:48 +04:00
OPAL_THREAD_LOCK ( & orterun_globals . lock ) ;
2005-04-16 01:52:58 +04:00
2006-02-16 23:40:23 +03:00
/* Note that there's only three states that we're interested in
2005-04-16 01:52:58 +04:00
here :
TERMINATED : which means that all the processes in the job have
completed ( normally and / or abnormally ) .
2006-02-16 23:40:23 +03:00
AT_STG1 : which means that everyone has hit stage gate 1 , so we
can do the parallel debugger startup stuff .
2005-04-16 01:52:58 +04:00
Remember that the rmgr itself will also be called for the
ABORTED state and call the pls . terminate_job , which will result
in killing all the other processes . */
2005-09-05 00:54:19 +04:00
2005-08-30 21:29:43 +04:00
if ( orte_debug_flag ) {
opal_output ( 0 , " spawn: in job_state_callback(jobid = %d, state = 0x%x) \n " ,
jobid , state ) ;
}
2005-03-14 23:57:21 +03:00
switch ( state ) {
2005-03-31 23:39:02 +04:00
case ORTE_PROC_STATE_TERMINATED :
2006-08-16 20:35:09 +04:00
orterun_globals . exit_status = 0 ; /* set the exit status to indicate normal termination */
2005-03-14 23:57:21 +03:00
orterun_globals . exit = true ;
2005-07-04 02:45:48 +04:00
opal_condition_signal ( & orterun_globals . cond ) ;
2005-03-14 23:57:21 +03:00
break ;
2005-08-30 21:29:43 +04:00
case ORTE_PROC_STATE_AT_STG1 :
orte_totalview_init_after_spawn ( jobid ) ;
break ;
2006-02-08 20:40:11 +03:00
default :
opal_output ( 0 , " orterun: job state callback in unexpected state - jobid %lu, state 0x%04x \n " , jobid , state ) ;
break ;
2005-03-14 23:57:21 +03:00
}
2005-07-04 02:45:48 +04:00
OPAL_THREAD_UNLOCK ( & orterun_globals . lock ) ;
2005-03-14 23:57:21 +03:00
}
2005-03-31 23:39:02 +04:00
/*
2005-09-05 00:54:19 +04:00
* Fail - safe in the event the job hangs and doesn ' t
2005-03-31 23:39:02 +04:00
* cleanup correctly .
*/
2005-03-14 23:57:21 +03:00
static void exit_callback ( int fd , short event , void * arg )
{
2005-09-19 21:20:01 +04:00
OPAL_TRACE ( 1 ) ;
2005-09-20 21:09:11 +04:00
2005-07-29 01:17:48 +04:00
opal_show_help ( " help-orterun.txt " , " orterun:abnormal-exit " ,
2005-08-27 17:32:25 +04:00
true , orterun_basename , orterun_basename ) ;
2005-08-27 00:36:11 +04:00
2005-09-11 03:22:37 +04:00
/* Remove the TERM and INT signal handlers */
opal_signal_del ( & term_handler ) ;
opal_signal_del ( & int_handler ) ;
2006-07-11 09:24:08 +04:00
# ifndef __WINDOWS__
2006-06-08 22:27:17 +04:00
/** Remove the USR signal handlers */
opal_signal_del ( & sigusr1_handler ) ;
opal_signal_del ( & sigusr2_handler ) ;
2006-07-11 09:24:08 +04:00
# endif /* __WINDOWS__ */
2006-06-08 22:27:17 +04:00
2005-08-27 00:36:11 +04:00
/* Trigger the normal exit conditions */
orterun_globals . exit = true ;
orterun_globals . exit_status = 1 ;
opal_condition_signal ( & orterun_globals . cond ) ;
2005-03-14 23:57:21 +03:00
}
2005-03-31 23:39:02 +04:00
/*
* Attempt to terminate the job and wait for callback indicating
2005-09-05 00:54:19 +04:00
* the job has been aborted .
2005-03-31 23:39:02 +04:00
*/
2007-01-08 23:25:26 +03:00
typedef enum {
ABORT_SIGNAL_FIRST ,
ABORT_SIGNAL_PROCESSING ,
ABORT_SIGNAL_WARNED ,
ABORT_SIGNAL_DONE
} abort_signal_state_t ;
2006-06-08 22:27:17 +04:00
static void abort_signal_callback ( int fd , short flags , void * arg )
2005-03-14 23:57:21 +03:00
{
int ret ;
2006-11-11 07:03:45 +03:00
struct timeval tv = { 1 , 0 } ;
2005-07-04 03:09:55 +04:00
opal_event_t * event ;
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
opal_list_t attrs ;
opal_list_item_t * item ;
2007-01-08 23:25:26 +03:00
static abort_signal_state_t state ;
static struct timeval invoked , now ;
double a , b ;
2005-09-20 21:09:11 +04:00
2005-09-19 21:20:01 +04:00
OPAL_TRACE ( 1 ) ;
2005-09-20 21:09:11 +04:00
2007-01-08 23:25:26 +03:00
/* If this whole process has already completed, then bail */
switch ( state ) {
case ABORT_SIGNAL_FIRST :
/* This is the first time through */
state = ABORT_SIGNAL_PROCESSING ;
break ;
case ABORT_SIGNAL_WARNED :
gettimeofday ( & now , NULL ) ;
a = invoked . tv_sec * 1000000 + invoked . tv_usec ;
b = now . tv_sec * 1000000 + invoked . tv_usec ;
if ( b - a < = 1000000 ) {
/* We are in an event handler; exit_callback() will delete
the handler that is currently running ( which is a Bad
Thing ) , so we can ' t call it directly . Instead , we have
to exit this handler and setup to call exit_handler ( )
after this . */
if ( NULL ! = ( event = ( opal_event_t * )
malloc ( sizeof ( opal_event_t ) ) ) ) {
opal_evtimer_set ( event , exit_callback , NULL ) ;
now . tv_sec = 0 ;
now . tv_usec = 0 ;
opal_evtimer_add ( event , & now ) ;
state = ABORT_SIGNAL_DONE ;
}
return ;
}
/* Otherwise fall through to PROCESSING and warn again */
case ABORT_SIGNAL_PROCESSING :
opal_show_help ( " help-orterun.txt " , " orterun:sigint-while-processing " ,
true , orterun_basename , orterun_basename ,
orterun_basename ) ;
gettimeofday ( & invoked , NULL ) ;
state = ABORT_SIGNAL_WARNED ;
return ;
case ABORT_SIGNAL_DONE :
/* Nothing to do -- return */
return ;
2005-03-14 23:57:21 +03:00
}
2007-01-08 23:25:26 +03:00
2006-06-26 22:21:45 +04:00
if ( ! orterun_globals . quiet ) {
2006-09-15 01:29:51 +04:00
fprintf ( stderr , " %s: killing job... \n \n " , orterun_basename ) ;
2006-06-26 22:21:45 +04:00
}
2005-03-14 23:57:21 +03:00
2006-09-15 01:29:51 +04:00
/* terminate the job - this will also wakeup orterun so
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
* it can kill all the orteds . Be sure to kill all the job ' s
* descendants , if any , so nothing is left hanging
2006-09-15 01:29:51 +04:00
*/
2006-10-02 04:46:31 +04:00
if ( jobid ! = ORTE_JOBID_INVALID ) {
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
OBJ_CONSTRUCT ( & attrs , opal_list_t ) ;
orte_rmgr . add_attribute ( & attrs , ORTE_NS_INCLUDE_DESCENDANTS , ORTE_UNDEF , NULL , ORTE_RMGR_ATTR_OVERRIDE ) ;
ret = orte_pls . terminate_job ( jobid , & attrs ) ;
while ( NULL ! = ( item = opal_list_remove_first ( & attrs ) ) ) OBJ_RELEASE ( item ) ;
OBJ_DESTRUCT ( & attrs ) ;
2005-03-14 23:57:21 +03:00
if ( ORTE_SUCCESS ! = ret ) {
2006-10-02 04:46:31 +04:00
jobid = ORTE_JOBID_INVALID ;
2005-03-14 23:57:21 +03:00
}
}
2006-09-15 01:29:51 +04:00
2006-11-17 22:06:10 +03:00
/* setup a delay to give the orteds time to complete their departure */
2005-07-04 03:09:55 +04:00
if ( NULL ! = ( event = ( opal_event_t * ) malloc ( sizeof ( opal_event_t ) ) ) ) {
opal_evtimer_set ( event , exit_callback , NULL ) ;
opal_evtimer_add ( event , & tv ) ;
2005-03-14 23:57:21 +03:00
}
2006-11-11 07:03:45 +03:00
2007-01-08 23:25:26 +03:00
state = ABORT_SIGNAL_DONE ;
2005-03-14 23:57:21 +03:00
}
2006-06-08 22:27:17 +04:00
/**
* Pass user signals to the remote application processes
*/
2006-06-26 19:12:52 +04:00
static void signal_forward_callback ( int fd , short event , void * arg )
2006-06-08 22:27:17 +04:00
{
2006-08-23 06:35:00 +04:00
struct opal_event * signal = ( struct opal_event * ) arg ;
2006-06-26 19:12:52 +04:00
int signum , ret ;
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
opal_list_t attrs ;
opal_list_item_t * item ;
2006-06-08 22:27:17 +04:00
OPAL_TRACE ( 1 ) ;
2006-06-26 19:12:52 +04:00
signum = OPAL_EVENT_SIGNAL ( signal ) ;
2006-06-26 22:21:45 +04:00
if ( ! orterun_globals . quiet ) {
fprintf ( stderr , " %s: Forwarding signal %d to job " ,
2006-06-26 19:12:52 +04:00
orterun_basename , signum ) ;
2006-06-26 22:21:45 +04:00
}
2006-06-08 22:27:17 +04:00
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
/** send the signal out to the processes, including any descendants */
OBJ_CONSTRUCT ( & attrs , opal_list_t ) ;
orte_rmgr . add_attribute ( & attrs , ORTE_NS_INCLUDE_DESCENDANTS , ORTE_UNDEF , NULL , ORTE_RMGR_ATTR_OVERRIDE ) ;
if ( ORTE_SUCCESS ! = ( ret = orte_pls . signal_job ( jobid , signum , & attrs ) ) ) {
2006-06-26 19:12:52 +04:00
fprintf ( stderr , " Signal %d could not be sent to the job (returned %d) " ,
signum , ret ) ;
2006-06-08 22:27:17 +04:00
}
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
while ( NULL ! = ( item = opal_list_remove_first ( & attrs ) ) ) OBJ_RELEASE ( item ) ;
OBJ_DESTRUCT ( & attrs ) ;
2006-06-08 22:27:17 +04:00
}
2005-09-05 00:54:19 +04:00
static int init_globals ( void )
2005-03-14 23:57:21 +03:00
{
2005-03-19 02:58:36 +03:00
/* Only CONSTRUCT things once */
if ( ! globals_init ) {
2005-07-04 02:45:48 +04:00
OBJ_CONSTRUCT ( & orterun_globals . lock , opal_mutex_t ) ;
OBJ_CONSTRUCT ( & orterun_globals . cond , opal_condition_t ) ;
2006-10-23 07:34:08 +04:00
orterun_globals . hostfile = NULL ;
orterun_globals . env_val = NULL ;
orterun_globals . appfile = NULL ;
orterun_globals . wdir = NULL ;
orterun_globals . path = NULL ;
2005-03-19 02:58:36 +03:00
}
2006-07-11 01:25:33 +04:00
/* Reset the other fields every time */
2005-03-19 02:58:36 +03:00
2006-10-23 07:34:08 +04:00
orterun_globals . help = false ;
orterun_globals . version = false ;
orterun_globals . verbose = false ;
orterun_globals . quiet = false ;
orterun_globals . exit = false ;
orterun_globals . no_wait_for_job_completion = false ;
orterun_globals . by_node = false ;
orterun_globals . by_slot = false ;
orterun_globals . debugger = false ;
2006-12-13 07:51:38 +03:00
orterun_globals . do_not_launch = false ;
2006-12-12 03:54:05 +03:00
orterun_globals . num_procs = 0 ;
orterun_globals . exit_status = 0 ;
2006-11-16 01:59:01 +03:00
if ( NULL ! = orterun_globals . hostfile )
2006-10-23 07:34:08 +04:00
free ( orterun_globals . hostfile ) ;
orterun_globals . hostfile = NULL ;
2006-11-16 01:59:01 +03:00
if ( NULL ! = orterun_globals . env_val )
2006-10-23 07:34:08 +04:00
free ( orterun_globals . env_val ) ;
orterun_globals . env_val = NULL ;
2006-11-16 01:59:01 +03:00
if ( NULL ! = orterun_globals . appfile )
2006-10-23 07:34:08 +04:00
free ( orterun_globals . appfile ) ;
orterun_globals . appfile = NULL ;
2006-11-16 01:59:01 +03:00
if ( NULL ! = orterun_globals . wdir )
2006-10-23 07:34:08 +04:00
free ( orterun_globals . wdir ) ;
orterun_globals . wdir = NULL ;
if ( NULL ! = orterun_globals . path )
free ( orterun_globals . path ) ;
orterun_globals . path = NULL ;
2005-03-19 02:58:36 +03:00
/* All done */
globals_init = true ;
2005-03-14 23:57:21 +03:00
return ORTE_SUCCESS ;
}
static int parse_globals ( int argc , char * argv [ ] )
{
2005-07-04 04:13:44 +04:00
opal_cmd_line_t cmd_line ;
2005-08-08 20:42:28 +04:00
int id , ret ;
2005-03-14 23:57:21 +03:00
/* Setup and parse the command line */
init_globals ( ) ;
2005-07-04 04:13:44 +04:00
opal_cmd_line_create ( & cmd_line , cmd_line_init ) ;
2005-08-02 22:52:12 +04:00
mca_base_cmd_line_setup ( & cmd_line ) ;
2006-02-12 04:33:29 +03:00
if ( ORTE_SUCCESS ! = ( ret = opal_cmd_line_parse ( & cmd_line , true ,
2005-07-29 01:17:48 +04:00
argc , argv ) ) ) {
return ret ;
}
2005-09-05 00:54:19 +04:00
2006-06-09 21:21:23 +04:00
/* print version if requested. Do this before check for help so
that - - version - - help works as one might expect . */
2006-06-22 23:48:27 +04:00
if ( orterun_globals . version & &
! ( 1 = = argc | | orterun_globals . help ) ) {
2006-06-09 21:21:23 +04:00
char * project_name = NULL ;
if ( 0 = = strcmp ( orterun_basename , " mpirun " ) ) {
project_name = " Open MPI " ;
} else {
project_name = " OpenRTE " ;
}
opal_show_help ( " help-orterun.txt " , " orterun:version " , false ,
2006-06-22 23:48:27 +04:00
orterun_basename , project_name , OPAL_VERSION ,
PACKAGE_BUGREPORT ) ;
2006-06-09 21:21:23 +04:00
/* if we were the only argument, exit */
if ( 2 = = argc ) exit ( 0 ) ;
}
2005-07-29 01:17:48 +04:00
/* Check for help request */
2005-09-05 00:54:19 +04:00
2005-04-12 20:01:30 +04:00
if ( 1 = = argc | | orterun_globals . help ) {
2005-03-14 23:57:21 +03:00
char * args = NULL ;
2006-06-22 23:48:27 +04:00
char * project_name = NULL ;
if ( 0 = = strcmp ( orterun_basename , " mpirun " ) ) {
project_name = " Open MPI " ;
} else {
project_name = " OpenRTE " ;
}
2005-07-04 04:13:44 +04:00
args = opal_cmd_line_get_usage_msg ( & cmd_line ) ;
2005-07-04 06:38:44 +04:00
opal_show_help ( " help-orterun.txt " , " orterun:usage " , false ,
2006-06-22 23:48:27 +04:00
orterun_basename , project_name , OPAL_VERSION ,
orterun_basename , args ,
PACKAGE_BUGREPORT ) ;
2005-03-14 23:57:21 +03:00
free ( args ) ;
2005-09-05 00:54:19 +04:00
2005-03-14 23:57:21 +03:00
/* If someone asks for help, that should be all we do */
exit ( 0 ) ;
}
2005-11-20 19:06:53 +03:00
/* Do we want a user-level debugger? */
2005-10-05 14:24:34 +04:00
2005-11-20 19:06:53 +03:00
if ( orterun_globals . debugger ) {
orte_run_debugger ( orterun_basename , argc , argv ) ;
}
2005-10-05 14:24:34 +04:00
2005-06-06 17:43:20 +04:00
/* Allocate and map by node or by slot? Shortcut for setting an
MCA param . */
(copied from a mail that has a lengthy description of this commit)
I spoke with Tim about this the other day -- he gave me the green
light to go ahead with this, but it turned into a bigger job than I
thought it would be. I revamped how the default RAS scheduling and
round_robin RMAPS mapping occurs. The previous algorithms were pretty
brain dead, and ignored the "slots" and "max_slots" tokens in
hostfiles. I considered this a big enough problem to fix it for the
beta (because there is currently no way to control where processes are
launched on SMPs).
There's still some more bells and whistles that I'd like to implement,
but there's no hurry, and they can go on the trunk at any time. My
patches below are for what I considered "essential", and do the
following:
- honor the "slots" and "max-slots" tokens in the hostfile (and all
their synonyms), meaning that we allocate/map until we fill slots,
and if there are still more processes to allocate/map, we keep going
until we fill max-slots (i.e., only oversubscribe a node if we have
to).
- offer two different algorithms, currently supported by two new
options to orterun. Remember that there are two parts here -- slot
allocation and process mapping. Slot allocation controls how many
processes we'll be running on a node. After that decision has been
made, process mapping effectively controls where the ranks of
MPI_COMM_WORLD (MCW) are placed. Some of the examples given below
don't make sense unless you remember that there is a difference
between the two (which makes total sense, but you have to think
about it in terms of both things):
1. "-bynode": allocates/maps one process per node in a round-robin
fashion until all slots on the node are taken. If we still have more
processes after all slots are taken, then keep going until all
max-slots are taken. Examples:
- The hostfile:
eddie slots=2 max-slots=4
vogon slots=4 max-slots=8
- orterun -bynode -np 6 -hostfile hostfile a.out
eddie: MCW ranks 0, 2
vogon: MCW ranks 1, 3, 4, 5
- orterun -bynode -np 8 -hostfile hostfile a.out
eddie: MCW ranks 0, 2, 4
vogon: MCW ranks 1, 3, 5, 6, 7
-> the algorithm oversubscribes all nodes "equally" (until each
node's max_slots is hit, of course)
- orterun -bynode -np 12 -hostfile hostfile a.out
eddie: MCW ranks 0, 2, 4, 6
vogon: MCW ranks 1, 3, 5, 7, 8, 9, 10, 11
2. "-byslot" (this is the default if you don't specify -bynode):
greedily takes all available slots on a node for a job before moving
on to the next node. If we still have processes to allocate/schedule,
then oversubscribe all nodes equally (i.e., go round robin on all
nodes until each node's max_slots is hit). Examples:
- The hostfile
eddie slots=2 max-slots=4
vogon slots=4 max-slots=8
- orterun -np 6 -hostfile hostfile a.out
eddie: MCW ranks 0, 1
vogon: MCW ranks 2, 3, 4, 5
- orterun -np 8 -hostfile hostfile a.out
eddie: MCW ranks 0, 1, 2
vogon: MCW ranks 3, 4, 5, 6, 7
-> the algorithm oversubscribes all nodes "equally" (until max_slots
is hit)
- orterun -np 12 -hostfile hostfile a.out
eddie: MCW ranks 0, 1, 2, 3
vogon: MCW ranks 4, 5, 6, 7, 8, 9, 10, 11
The above examples are fairly contrived, and it's not clear from them
that you can get different allocation answers in all cases (the
mapping differences are obvious). Consider the following allocation
example:
- The hostfile
eddie count=4
vogon count=4
earth count=4
deep-thought count=4
- orterun -np 8 -hostfile hostfile a.out
eddie: 4 slots will be allocated
vogon: 4 slots will be allocated
earth: no slots allocated
deep-thought: no slots allocated
- orterun -bynode -np 8 -hostfile hostfile a.out
eddie: 2 slots will be allocated
vogon: 2 slots will be allocated
earth: 2 slots will be allocated
deep-thought: 2 slots will be allocated
This commit was SVN r5894.
2005-05-31 20:36:53 +04:00
2005-06-06 17:43:20 +04:00
/* JMS To be changed post-beta to LAM's C/N command line notation */
2005-09-27 06:54:15 +04:00
/* Don't initialize the MCA parameter here unless we have to,
2005-10-08 02:24:52 +04:00
* since it really should be initialized in rmaps_base_open */
2005-09-27 06:54:15 +04:00
if ( orterun_globals . by_node | | orterun_globals . by_slot ) {
char * policy = NULL ;
2006-07-05 00:12:35 +04:00
id = mca_base_param_reg_string_name ( " rmaps " , " base_schedule_policy " ,
2005-10-08 02:24:52 +04:00
" Scheduling policy for RMAPS. [slot | node] " ,
2005-09-27 06:54:15 +04:00
false , false , " slot " , & policy ) ;
if ( orterun_globals . by_node ) {
orterun_globals . by_slot = false ;
mca_base_param_set_string ( id , " node " ) ;
} else {
orterun_globals . by_slot = true ;
mca_base_param_set_string ( id , " slot " ) ;
}
free ( policy ) ;
}
else {
/* Default */
(copied from a mail that has a lengthy description of this commit)
I spoke with Tim about this the other day -- he gave me the green
light to go ahead with this, but it turned into a bigger job than I
thought it would be. I revamped how the default RAS scheduling and
round_robin RMAPS mapping occurs. The previous algorithms were pretty
brain dead, and ignored the "slots" and "max_slots" tokens in
hostfiles. I considered this a big enough problem to fix it for the
beta (because there is currently no way to control where processes are
launched on SMPs).
There's still some more bells and whistles that I'd like to implement,
but there's no hurry, and they can go on the trunk at any time. My
patches below are for what I considered "essential", and do the
following:
- honor the "slots" and "max-slots" tokens in the hostfile (and all
their synonyms), meaning that we allocate/map until we fill slots,
and if there are still more processes to allocate/map, we keep going
until we fill max-slots (i.e., only oversubscribe a node if we have
to).
- offer two different algorithms, currently supported by two new
options to orterun. Remember that there are two parts here -- slot
allocation and process mapping. Slot allocation controls how many
processes we'll be running on a node. After that decision has been
made, process mapping effectively controls where the ranks of
MPI_COMM_WORLD (MCW) are placed. Some of the examples given below
don't make sense unless you remember that there is a difference
between the two (which makes total sense, but you have to think
about it in terms of both things):
1. "-bynode": allocates/maps one process per node in a round-robin
fashion until all slots on the node are taken. If we still have more
processes after all slots are taken, then keep going until all
max-slots are taken. Examples:
- The hostfile:
eddie slots=2 max-slots=4
vogon slots=4 max-slots=8
- orterun -bynode -np 6 -hostfile hostfile a.out
eddie: MCW ranks 0, 2
vogon: MCW ranks 1, 3, 4, 5
- orterun -bynode -np 8 -hostfile hostfile a.out
eddie: MCW ranks 0, 2, 4
vogon: MCW ranks 1, 3, 5, 6, 7
-> the algorithm oversubscribes all nodes "equally" (until each
node's max_slots is hit, of course)
- orterun -bynode -np 12 -hostfile hostfile a.out
eddie: MCW ranks 0, 2, 4, 6
vogon: MCW ranks 1, 3, 5, 7, 8, 9, 10, 11
2. "-byslot" (this is the default if you don't specify -bynode):
greedily takes all available slots on a node for a job before moving
on to the next node. If we still have processes to allocate/schedule,
then oversubscribe all nodes equally (i.e., go round robin on all
nodes until each node's max_slots is hit). Examples:
- The hostfile
eddie slots=2 max-slots=4
vogon slots=4 max-slots=8
- orterun -np 6 -hostfile hostfile a.out
eddie: MCW ranks 0, 1
vogon: MCW ranks 2, 3, 4, 5
- orterun -np 8 -hostfile hostfile a.out
eddie: MCW ranks 0, 1, 2
vogon: MCW ranks 3, 4, 5, 6, 7
-> the algorithm oversubscribes all nodes "equally" (until max_slots
is hit)
- orterun -np 12 -hostfile hostfile a.out
eddie: MCW ranks 0, 1, 2, 3
vogon: MCW ranks 4, 5, 6, 7, 8, 9, 10, 11
The above examples are fairly contrived, and it's not clear from them
that you can get different allocation answers in all cases (the
mapping differences are obvious). Consider the following allocation
example:
- The hostfile
eddie count=4
vogon count=4
earth count=4
deep-thought count=4
- orterun -np 8 -hostfile hostfile a.out
eddie: 4 slots will be allocated
vogon: 4 slots will be allocated
earth: no slots allocated
deep-thought: no slots allocated
- orterun -bynode -np 8 -hostfile hostfile a.out
eddie: 2 slots will be allocated
vogon: 2 slots will be allocated
earth: 2 slots will be allocated
deep-thought: 2 slots will be allocated
This commit was SVN r5894.
2005-05-31 20:36:53 +04:00
orterun_globals . by_slot = true ;
}
2006-10-07 23:50:12 +04:00
2006-12-13 07:51:38 +03:00
/* If we don't want to wait, we don't want to wait */
2005-03-14 23:57:21 +03:00
if ( orterun_globals . no_wait_for_job_completion ) {
wait_for_job_completion = false ;
}
OBJ_DESTRUCT ( & cmd_line ) ;
return ORTE_SUCCESS ;
}
static int parse_locals ( int argc , char * argv [ ] )
{
int i , rc , app_num ;
int temp_argc ;
2005-08-08 20:42:28 +04:00
char * * temp_argv , * * env ;
2005-03-14 23:57:21 +03:00
orte_app_context_t * app ;
bool made_app ;
2006-08-15 23:54:10 +04:00
orte_std_cntr_t j , size1 ;
2005-03-14 23:57:21 +03:00
/* Make the apps */
temp_argc = 0 ;
temp_argv = NULL ;
2005-07-04 04:13:44 +04:00
opal_argv_append ( & temp_argc , & temp_argv , argv [ 0 ] ) ;
2006-03-24 18:39:09 +03:00
/* Make the max size of the array be INT_MAX because we may be
parsing an app file , in which case we don ' t know how many
entries there will be . The max size of an orte_pointer_array
is only a safety net ; it only initially allocates block_size
entries ( 2 , in this case ) */
2006-03-23 20:55:25 +03:00
orte_pointer_array_init ( & apps_pa , 1 , INT_MAX , 2 ) ;
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
2005-08-08 20:42:28 +04:00
/* NOTE: This bogus env variable is necessary in the calls to
create_app ( ) , below . See comment immediately before the
create_app ( ) function for an explanation . */
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
env = NULL ;
2005-03-14 23:57:21 +03:00
for ( app_num = 0 , i = 1 ; i < argc ; + + i ) {
if ( 0 = = strcmp ( argv [ i ] , " : " ) ) {
/* Make an app with this argv */
2005-07-04 04:13:44 +04:00
if ( opal_argv_count ( temp_argv ) > 1 ) {
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
if ( NULL ! = env ) {
2005-07-04 04:13:44 +04:00
opal_argv_free ( env ) ;
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
env = NULL ;
}
2006-03-24 18:28:42 +03:00
app = NULL ;
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
rc = create_app ( temp_argc , temp_argv , & app , & made_app , & env ) ;
2006-03-23 19:53:11 +03:00
/** keep track of the number of apps - point this app_context to that index */
2005-03-14 23:57:21 +03:00
if ( ORTE_SUCCESS ! = rc ) {
/* Assume that the error message has already been
printed ; no need to cleanup - - we can just
exit */
exit ( 1 ) ;
}
if ( made_app ) {
2006-08-15 23:54:10 +04:00
orte_std_cntr_t dummy ;
2006-03-24 18:28:42 +03:00
app - > idx = app_num ;
+ + app_num ;
2005-07-03 08:02:01 +04:00
orte_pointer_array_add ( & dummy , apps_pa , app ) ;
2005-03-14 23:57:21 +03:00
}
2005-09-05 00:54:19 +04:00
2005-03-14 23:57:21 +03:00
/* Reset the temps */
2005-09-05 00:54:19 +04:00
2005-03-14 23:57:21 +03:00
temp_argc = 0 ;
temp_argv = NULL ;
2005-07-04 04:13:44 +04:00
opal_argv_append ( & temp_argc , & temp_argv , argv [ 0 ] ) ;
2005-03-14 23:57:21 +03:00
}
} else {
2005-07-04 04:13:44 +04:00
opal_argv_append ( & temp_argc , & temp_argv , argv [ i ] ) ;
2005-03-14 23:57:21 +03:00
}
}
2005-07-04 04:13:44 +04:00
if ( opal_argv_count ( temp_argv ) > 1 ) {
2006-03-24 18:28:42 +03:00
app = NULL ;
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
rc = create_app ( temp_argc , temp_argv , & app , & made_app , & env ) ;
2005-03-14 23:57:21 +03:00
if ( ORTE_SUCCESS ! = rc ) {
/* Assume that the error message has already been printed;
no need to cleanup - - we can just exit */
exit ( 1 ) ;
}
if ( made_app ) {
2006-08-15 23:54:10 +04:00
orte_std_cntr_t dummy ;
2006-03-24 18:28:42 +03:00
app - > idx = app_num ;
+ + app_num ;
2005-07-03 08:02:01 +04:00
orte_pointer_array_add ( & dummy , apps_pa , app ) ;
2005-03-14 23:57:21 +03:00
}
}
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
if ( NULL ! = env ) {
2005-07-04 04:13:44 +04:00
opal_argv_free ( env ) ;
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
}
2005-07-04 04:13:44 +04:00
opal_argv_free ( temp_argv ) ;
2005-03-14 23:57:21 +03:00
2005-08-08 20:42:28 +04:00
/* Once we've created all the apps, add the global MCA params to
each app ' s environment ( checking for duplicates , of
course - - yay opal_environ_merge ( ) ) . */
if ( NULL ! = global_mca_env ) {
size1 = orte_pointer_array_get_size ( apps_pa ) ;
/* Iterate through all the apps */
for ( j = 0 ; j < size1 ; + + j ) {
2005-09-05 00:54:19 +04:00
app = ( orte_app_context_t * )
2005-08-08 20:42:28 +04:00
orte_pointer_array_get_item ( apps_pa , j ) ;
if ( NULL ! = app ) {
/* Use handy utility function */
env = opal_environ_merge ( global_mca_env , app - > env ) ;
opal_argv_free ( app - > env ) ;
app - > env = env ;
}
}
}
/* Now take a subset of the MCA params and set them as MCA
overrides here in orterun ( so that when we orte_init ( ) later ,
all the components see these MCA params ) . Here ' s how we decide
which subset of the MCA params we set here in orterun :
1. If any global MCA params were set , use those
2. If no global MCA params were set and there was only one app ,
then use its app MCA params
3. Otherwise , don ' t set any
*/
env = NULL ;
if ( NULL ! = global_mca_env ) {
env = global_mca_env ;
} else {
if ( orte_pointer_array_get_size ( apps_pa ) > = 1 ) {
/* Remember that pointer_array's can be padded with NULL
entries ; so only use the app ' s env if there is exactly
1 non - NULL entry */
2005-09-05 00:54:19 +04:00
app = ( orte_app_context_t * )
2005-08-08 20:42:28 +04:00
orte_pointer_array_get_item ( apps_pa , 0 ) ;
if ( NULL ! = app ) {
env = app - > env ;
for ( j = 1 ; j < orte_pointer_array_get_size ( apps_pa ) ; + + j ) {
if ( NULL ! = orte_pointer_array_get_item ( apps_pa , j ) ) {
env = NULL ;
break ;
}
}
}
}
}
2005-09-05 00:54:19 +04:00
2005-08-08 20:42:28 +04:00
if ( NULL ! = env ) {
size1 = opal_argv_count ( env ) ;
for ( j = 0 ; j < size1 ; + + j ) {
putenv ( env [ j ] ) ;
}
}
2005-03-14 23:57:21 +03:00
/* All done */
return ORTE_SUCCESS ;
}
2005-08-08 20:42:28 +04:00
/*
* This function takes a " char ***app_env " parameter to handle the
* specific case :
*
* orterun - - mca foo bar - app appfile
*
* That is , we ' ll need to keep foo = bar , but the presence of the app
* file will cause an invocation of parse_appfile ( ) , which will cause
* one or more recursive calls back to create_app ( ) . Since the
* foo = bar value applies globally to all apps in the appfile , we need
* to pass in the " base " environment ( that contains the foo = bar value )
* when we parse each line in the appfile .
*
* This is really just a special case - - when we have a simple case like :
*
* orterun - - mca foo bar - np 4 hostname
*
* Then the upper - level function ( parse_locals ( ) ) calls create_app ( )
* with a NULL value for app_env , meaning that there is no " base "
* environment that the app needs to be created from .
*/
2005-03-14 23:57:21 +03:00
static int create_app ( int argc , char * argv [ ] , orte_app_context_t * * app_ptr ,
2005-08-08 20:42:28 +04:00
bool * made_app , char * * * app_env )
2005-03-14 23:57:21 +03:00
{
2005-07-04 04:13:44 +04:00
opal_cmd_line_t cmd_line ;
2005-03-14 23:57:21 +03:00
char cwd [ OMPI_PATH_MAX ] ;
2006-02-07 06:32:36 +03:00
int i , j , count , rc ;
2005-03-14 23:57:21 +03:00
char * param , * value , * value2 ;
orte_app_context_t * app = NULL ;
2005-11-03 21:15:47 +03:00
#if 0 /* Used only in the C/N notion case, remove to silence compiler warnings */
2006-08-15 23:54:10 +04:00
orte_std_cntr_t l , len ;
2005-11-03 21:15:47 +03:00
# endif
2005-04-19 09:45:25 +04:00
bool map_data = false , save_arg , cmd_line_made = false ;
2005-03-14 23:57:21 +03:00
int new_argc = 0 ;
char * * new_argv = NULL ;
* made_app = false ;
/* Pre-process the command line:
2005-09-05 00:54:19 +04:00
2005-03-14 23:57:21 +03:00
- convert C , cX , N , nX arguments to " -rawmap <id> <arg> " so
that the parser can pick it up nicely .
- convert - host to - rawmap < id > < arg >
- convert - arch to - rawmap < id > < arg >
Converting these to the same argument type will a ) simplify the
logic down below , and b ) allow us to preserve the ordering of
these arguments as the user specified them on the command
line . */
for ( i = 0 ; i < argc ; + + i ) {
map_data = false ;
save_arg = true ;
2005-11-03 21:15:47 +03:00
/* JJH To fix in the future
* Currently C / N notation is not supported so don ' t execute this check
2006-02-07 06:32:36 +03:00
* Bug : Make this context sensitive since it will not behave properly
2005-11-03 21:15:47 +03:00
* with the following argument set :
* $ orterun - np 2 - host c2 , c3 , c12 hostname
* Since it will see the hosts c2 , c3 , and c12 as C options instead
* of hostnames .
*/
if ( false ) { ; } /* Wrapper to preserve logic continuation while the below
is commented out */
#if 0
2005-03-14 23:57:21 +03:00
if ( 0 = = strcmp ( argv [ i ] , " C " ) | |
0 = = strcmp ( argv [ i ] , " N " ) ) {
map_data = true ;
2005-09-05 00:54:19 +04:00
}
2005-03-14 23:57:21 +03:00
2005-09-05 00:54:19 +04:00
/* Heuristic: if the string fits "[cn][0-9]+" or "[cn][0-9],",
2005-03-14 23:57:21 +03:00
then accept it as mapping data */
else if ( ' c ' = = argv [ i ] [ 0 ] | | ' n ' = = argv [ i ] [ 0 ] ) {
len = strlen ( argv [ i ] ) ;
if ( len > 1 ) {
for ( l = 1 ; l < len ; + + l ) {
if ( ' , ' = = argv [ i ] [ l ] ) {
map_data = true ;
break ;
} else if ( ! isdigit ( argv [ i ] [ l ] ) ) {
break ;
}
}
if ( l > = len ) {
map_data = true ;
}
}
}
2005-11-03 21:15:47 +03:00
# endif
2005-03-14 23:57:21 +03:00
2005-09-20 12:56:02 +04:00
#if 0
/* JMS commented out because we don't handle this in any
mapper */
2005-03-14 23:57:21 +03:00
/* Save -arch args */
else if ( 0 = = strcmp ( " -arch " , argv [ i ] ) ) {
char str [ 2 ] = { ' 0 ' + ORTE_APP_CONTEXT_MAP_ARCH , ' \0 ' } ;
2005-07-04 04:13:44 +04:00
opal_argv_append ( & new_argc , & new_argv , " -rawmap " ) ;
opal_argv_append ( & new_argc , & new_argv , str ) ;
2005-03-14 23:57:21 +03:00
save_arg = false ;
}
2005-09-20 12:56:02 +04:00
# endif
2005-03-14 23:57:21 +03:00
/* Save -host args */
2006-02-07 06:32:36 +03:00
else if ( 0 = = strcmp ( " --host " , argv [ i ] ) | |
0 = = strcmp ( " -host " , argv [ i ] ) | |
2005-09-22 20:08:40 +04:00
0 = = strcmp ( " -H " , argv [ i ] ) ) {
2005-03-14 23:57:21 +03:00
char str [ 2 ] = { ' 0 ' + ORTE_APP_CONTEXT_MAP_HOSTNAME , ' \0 ' } ;
2005-07-04 04:13:44 +04:00
opal_argv_append ( & new_argc , & new_argv , " -rawmap " ) ;
opal_argv_append ( & new_argc , & new_argv , str ) ;
2005-03-14 23:57:21 +03:00
save_arg = false ;
}
/* If this token was C/N map data, save it */
if ( map_data ) {
char str [ 2 ] = { ' 0 ' + ORTE_APP_CONTEXT_MAP_CN , ' \0 ' } ;
2005-07-04 04:13:44 +04:00
opal_argv_append ( & new_argc , & new_argv , " -rawmap " ) ;
opal_argv_append ( & new_argc , & new_argv , str ) ;
2005-03-14 23:57:21 +03:00
}
if ( save_arg ) {
2005-07-04 04:13:44 +04:00
opal_argv_append ( & new_argc , & new_argv , argv [ i ] ) ;
2005-03-14 23:57:21 +03:00
}
}
/* Parse application command line options. Add the -rawmap option
separately so that the user doesn ' t see it in the - - help
message . */
init_globals ( ) ;
2005-07-04 04:13:44 +04:00
opal_cmd_line_create ( & cmd_line , cmd_line_init ) ;
2005-03-18 06:43:59 +03:00
mca_base_cmd_line_setup ( & cmd_line ) ;
2005-03-14 23:57:21 +03:00
cmd_line_made = true ;
2005-07-04 04:13:44 +04:00
opal_cmd_line_make_opt3 ( & cmd_line , ' \0 ' , NULL , " rawmap " , 2 ,
2005-03-14 23:57:21 +03:00
" Hidden / internal parameter -- users should not use this! " ) ;
2005-07-04 04:13:44 +04:00
rc = opal_cmd_line_parse ( & cmd_line , true , new_argc , new_argv ) ;
opal_argv_free ( new_argv ) ;
2005-03-14 23:57:21 +03:00
new_argv = NULL ;
2006-02-12 04:33:29 +03:00
if ( ORTE_SUCCESS ! = rc ) {
2005-03-14 23:57:21 +03:00
goto cleanup ;
}
2005-08-08 20:42:28 +04:00
mca_base_cmd_line_process_args ( & cmd_line , app_env , & global_mca_env ) ;
2005-03-14 23:57:21 +03:00
/* Is there an appfile in here? */
if ( NULL ! = orterun_globals . appfile ) {
OBJ_DESTRUCT ( & cmd_line ) ;
2005-08-08 20:42:28 +04:00
return parse_appfile ( strdup ( orterun_globals . appfile ) , app_env ) ;
2005-03-14 23:57:21 +03:00
}
/* Setup application context */
app = OBJ_NEW ( orte_app_context_t ) ;
2006-02-07 06:32:36 +03:00
opal_cmd_line_get_tail ( & cmd_line , & count , & app - > argv ) ;
2005-03-14 23:57:21 +03:00
/* See if we have anything left */
2006-02-07 06:32:36 +03:00
if ( 0 = = count ) {
2005-07-04 06:38:44 +04:00
opal_show_help ( " help-orterun.txt " , " orterun:executable-not-specified " ,
2005-04-12 20:01:30 +04:00
true , orterun_basename , orterun_basename ) ;
2005-03-14 23:57:21 +03:00
rc = ORTE_ERR_NOT_FOUND ;
goto cleanup ;
}
2005-04-09 05:26:17 +04:00
/* Grab all OMPI_* environment variables */
2005-03-14 23:57:21 +03:00
2005-08-08 20:42:28 +04:00
app - > env = opal_argv_copy ( * app_env ) ;
2005-03-14 23:57:21 +03:00
for ( i = 0 ; NULL ! = environ [ i ] ; + + i ) {
2005-04-06 05:58:30 +04:00
if ( 0 = = strncmp ( " OMPI_ " , environ [ i ] , 5 ) ) {
2005-07-04 04:13:44 +04:00
opal_argv_append_nosize ( & app - > env , environ [ i ] ) ;
2005-03-14 23:57:21 +03:00
}
}
/* Did the user request to export any environment variables? */
2005-07-04 04:13:44 +04:00
if ( opal_cmd_line_is_taken ( & cmd_line , " x " ) ) {
j = opal_cmd_line_get_ninsts ( & cmd_line , " x " ) ;
2005-03-14 23:57:21 +03:00
for ( i = 0 ; i < j ; + + i ) {
2005-07-04 04:13:44 +04:00
param = opal_cmd_line_get_param ( & cmd_line , " x " , i , 0 ) ;
2005-03-14 23:57:21 +03:00
if ( NULL ! = strchr ( param , ' = ' ) ) {
2005-07-04 04:13:44 +04:00
opal_argv_append_nosize ( & app - > env , param ) ;
2005-03-14 23:57:21 +03:00
} else {
value = getenv ( param ) ;
if ( NULL ! = value ) {
if ( NULL ! = strchr ( value , ' = ' ) ) {
2005-07-04 04:13:44 +04:00
opal_argv_append_nosize ( & app - > env , value ) ;
2005-03-14 23:57:21 +03:00
} else {
asprintf ( & value2 , " %s=%s " , param , value ) ;
2005-07-04 04:13:44 +04:00
opal_argv_append_nosize ( & app - > env , value2 ) ;
2005-05-13 01:44:23 +04:00
free ( value2 ) ;
2005-03-14 23:57:21 +03:00
}
} else {
2005-07-04 03:31:27 +04:00
opal_output ( 0 , " Warning: could not find environment variable \" %s \" \n " , param ) ;
2005-03-14 23:57:21 +03:00
}
}
}
}
/* Did the user request a specific path? */
if ( NULL ! = orterun_globals . path ) {
asprintf ( & value , " PATH=%s " , orterun_globals . path ) ;
2005-07-04 04:13:44 +04:00
opal_argv_append_nosize ( & app - > env , value ) ;
2005-03-14 23:57:21 +03:00
free ( value ) ;
}
/* Did the user request a specific wdir? */
if ( NULL ! = orterun_globals . wdir ) {
app - > cwd = strdup ( orterun_globals . wdir ) ;
2006-02-16 23:40:23 +03:00
app - > user_specified_cwd = true ;
2005-03-14 23:57:21 +03:00
} else {
getcwd ( cwd , sizeof ( cwd ) ) ;
app - > cwd = strdup ( cwd ) ;
2006-02-16 23:40:23 +03:00
app - > user_specified_cwd = false ;
2005-03-14 23:57:21 +03:00
}
2006-09-15 06:52:08 +04:00
/* Check to see if the user explicitly wanted to disable automatic
- - prefix behavior */
if ( opal_cmd_line_is_taken ( & cmd_line , " noprefix " ) ) {
want_prefix_by_default = false ;
}
2006-02-28 14:52:12 +03:00
/* Did the user specify a specific prefix for this app_context_t
or provide an absolute path name to argv [ 0 ] ? */
if ( opal_cmd_line_is_taken ( & cmd_line , " prefix " ) | |
2006-09-15 06:52:08 +04:00
' / ' = = argv [ 0 ] [ 0 ] | | want_prefix_by_default ) {
2005-09-06 20:10:05 +04:00
size_t param_len ;
2006-02-28 17:44:40 +03:00
/* The --prefix option takes precedence over /path/to/orterun */
if ( opal_cmd_line_is_taken ( & cmd_line , " prefix " ) ) {
param = opal_cmd_line_get_param ( & cmd_line , " prefix " , 0 , 0 ) ;
2006-09-15 06:52:08 +04:00
}
/* /path/to/orterun */
else if ( ' / ' = = argv [ 0 ] [ 0 ] ) {
2006-08-23 06:35:00 +04:00
char * tmp_basename = NULL ;
2006-02-28 17:44:40 +03:00
/* If they specified an absolute path, strip off the
/ bin / < exec_name > " and leave just the prefix */
2006-08-23 06:35:00 +04:00
param = opal_dirname ( argv [ 0 ] ) ;
2006-02-28 14:52:12 +03:00
/* Quick sanity check to ensure we got
something / bin / < exec_name > and that the installation
tree is at least more or less what we expect it to
be */
2006-08-23 06:35:00 +04:00
tmp_basename = opal_basename ( param ) ;
if ( 0 = = strcmp ( " bin " , tmp_basename ) ) {
char * tmp = param ;
param = opal_dirname ( tmp ) ;
free ( tmp ) ;
2006-02-28 14:52:12 +03:00
} else {
free ( param ) ;
param = NULL ;
2005-09-06 20:10:05 +04:00
}
2006-08-23 06:35:00 +04:00
free ( tmp_basename ) ;
2005-09-06 20:10:05 +04:00
}
2006-09-15 06:52:08 +04:00
/* --enable-orterun-prefix-default was given to orterun */
else {
param = strdup ( OPAL_PREFIX ) ;
}
2005-09-06 20:10:05 +04:00
2006-02-28 14:52:12 +03:00
if ( NULL ! = param ) {
2006-08-24 20:18:42 +04:00
/* "Parse" the param, aka remove superfluous path_sep. */
2006-02-28 14:52:12 +03:00
param_len = strlen ( param ) ;
2006-08-22 01:55:41 +04:00
while ( 0 = = strcmp ( OPAL_PATH_SEP , & ( param [ param_len - 1 ] ) ) ) {
2006-02-28 14:52:12 +03:00
param [ param_len - 1 ] = ' \0 ' ;
param_len - - ;
if ( 0 = = param_len ) {
opal_show_help ( " help-orterun.txt " , " orterun:empty-prefix " ,
true , orterun_basename , orterun_basename ) ;
return ORTE_ERR_FATAL ;
}
}
app - > prefix_dir = strdup ( param ) ;
}
2005-09-06 20:10:05 +04:00
}
2005-03-14 23:57:21 +03:00
/* Did the user request any mappings? They were all converted to
- - rawmap items , above . */
2005-07-04 04:13:44 +04:00
if ( opal_cmd_line_is_taken ( & cmd_line , " rawmap " ) ) {
j = opal_cmd_line_get_ninsts ( & cmd_line , " rawmap " ) ;
2006-08-23 06:35:00 +04:00
app - > map_data = ( orte_app_context_map_t * * ) malloc ( sizeof ( orte_app_context_map_t * ) * j ) ;
2005-03-14 23:57:21 +03:00
if ( NULL = = app - > map_data ) {
rc = ORTE_ERR_OUT_OF_RESOURCE ;
goto cleanup ;
}
app - > num_map = j ;
for ( i = 0 ; i < j ; + + i ) {
app - > map_data [ i ] = NULL ;
}
for ( i = 0 ; i < j ; + + i ) {
2005-07-04 04:13:44 +04:00
value = opal_cmd_line_get_param ( & cmd_line , " rawmap " , i , 0 ) ;
value2 = opal_cmd_line_get_param ( & cmd_line , " rawmap " , i , 1 ) ;
2005-03-14 23:57:21 +03:00
app - > map_data [ i ] = OBJ_NEW ( orte_app_context_map_t ) ;
if ( NULL = = app - > map_data [ i ] ) {
rc = ORTE_ERR_OUT_OF_RESOURCE ;
goto cleanup ;
}
app - > map_data [ i ] - > map_type = value [ 0 ] - ' 0 ' ;
app - > map_data [ i ] - > map_data = strdup ( value2 ) ;
2006-02-07 06:32:36 +03:00
/* map_data = true;
2005-10-08 02:24:52 +04:00
* JJH - This activates the C / N mapping stuff ,
* or at least allows us to pass the ' num_procs ' check below .
* since it is not implemented yet , leave commented . */
2005-03-14 23:57:21 +03:00
}
}
/* Get the numprocs */
2006-09-25 23:41:54 +04:00
app - > num_procs = ( orte_std_cntr_t ) orterun_globals . num_procs ;
2005-04-09 05:26:17 +04:00
2006-07-11 01:25:33 +04:00
/* If the user didn't specify the number of processes to run, then we
default to launching an app process using every slot . We can ' t do
anything about that here - we leave it to the RMAPS framework ' s
components to note this and deal with it later .
HOWEVER , we ONLY support this mode of operation if the number of
app_contexts is equal to ONE . If the user provides multiple applications ,
we simply must have more information - in this case , generate an
error .
*/
if ( app - > num_procs = = 0 ) {
have_zero_np = true ; /** flag that we have a zero_np situation */
2005-03-14 23:57:21 +03:00
}
2006-07-11 01:25:33 +04:00
if ( 0 < total_num_apps & & have_zero_np ) {
/** we have more than one app and a zero_np - that's no good.
* note that we have to do this as a two step logic check since
* the user may fail to specify num_procs for the first app , but
* then give us another application .
*/
opal_show_help ( " help-orterun.txt " , " orterun:multi-apps-and-zero-np " ,
true , orterun_basename , NULL ) ;
return ORTE_ERR_FATAL ;
}
total_num_apps + + ;
2006-02-16 23:40:23 +03:00
/* Do not try to find argv[0] here -- the starter is responsible
for that because it may not be relevant to try to find it on
the node where orterun is executing . So just strdup ( ) argv [ 0 ]
into app . */
2005-03-14 23:57:21 +03:00
2006-02-16 23:40:23 +03:00
app - > app = strdup ( app - > argv [ 0 ] ) ;
2005-03-14 23:57:21 +03:00
if ( NULL = = app - > app ) {
2006-02-16 23:40:23 +03:00
opal_show_help ( " help-orterun.txt " , " orterun:call-failed " ,
true , orterun_basename , " library " , " strdup returned NULL " , errno ) ;
2005-03-14 23:57:21 +03:00
rc = ORTE_ERR_NOT_FOUND ;
goto cleanup ;
}
* app_ptr = app ;
app = NULL ;
* made_app = true ;
/* All done */
cleanup :
if ( NULL ! = app ) {
OBJ_RELEASE ( app ) ;
}
if ( NULL ! = new_argv ) {
2005-07-04 04:13:44 +04:00
opal_argv_free ( new_argv ) ;
2005-03-14 23:57:21 +03:00
}
if ( cmd_line_made ) {
OBJ_DESTRUCT ( & cmd_line ) ;
}
return rc ;
}
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
static int parse_appfile ( char * filename , char * * * env )
2005-03-14 23:57:21 +03:00
{
size_t i , len ;
FILE * fp ;
char line [ BUFSIZ ] ;
2006-03-23 20:55:25 +03:00
int rc , argc , app_num ;
2005-03-14 23:57:21 +03:00
char * * argv ;
orte_app_context_t * app ;
bool blank , made_app ;
char bogus [ ] = " bogus " ;
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
char * * tmp_env ;
2005-03-14 23:57:21 +03:00
/* Try to open the file */
fp = fopen ( filename , " r " ) ;
if ( NULL = = fp ) {
2005-07-04 06:38:44 +04:00
opal_show_help ( " help-orterun.txt " , " orterun:appfile-not-found " , true ,
2005-03-14 23:57:21 +03:00
filename ) ;
return ORTE_ERR_NOT_FOUND ;
}
/* Read in line by line */
line [ sizeof ( line ) - 1 ] = ' \0 ' ;
2006-03-23 20:55:25 +03:00
app_num = 0 ;
2005-03-14 23:57:21 +03:00
do {
/* We need a bogus argv[0] (because when argv comes in from
the command line , argv [ 0 ] is " orterun " , so the parsing
logic ignores it ) . So create one here rather than making
an argv and then pre - pending a new argv [ 0 ] ( which would be
rather inefficient ) . */
line [ 0 ] = ' \0 ' ;
strcat ( line , bogus ) ;
2005-09-05 00:54:19 +04:00
if ( NULL = = fgets ( line + sizeof ( bogus ) - 1 ,
2005-03-14 23:57:21 +03:00
sizeof ( line ) - sizeof ( bogus ) - 1 , fp ) ) {
break ;
}
2005-04-12 22:42:34 +04:00
/* Remove a trailing newline */
2005-03-14 23:57:21 +03:00
len = strlen ( line ) ;
2005-04-12 22:42:34 +04:00
if ( len > 0 & & ' \n ' = = line [ len - 1 ] ) {
line [ len - 1 ] = ' \0 ' ;
if ( len > 0 ) {
- - len ;
}
}
/* Remove comments */
2005-03-14 23:57:21 +03:00
for ( i = 0 ; i < len ; + + i ) {
if ( ' # ' = = line [ i ] ) {
line [ i ] = ' \0 ' ;
break ;
} else if ( i + 1 < len & & ' / ' = = line [ i ] & & ' / ' = = line [ i + 1 ] ) {
line [ i ] = ' \0 ' ;
break ;
}
}
/* Is this a blank line? */
len = strlen ( line ) ;
for ( blank = true , i = sizeof ( bogus ) ; i < len ; + + i ) {
if ( ! isspace ( line [ i ] ) ) {
blank = false ;
break ;
}
}
if ( blank ) {
continue ;
}
/* We got a line with *something* on it. So process it */
2005-07-04 04:13:44 +04:00
argv = opal_argv_split ( line , ' ' ) ;
argc = opal_argv_count ( argv ) ;
2005-03-14 23:57:21 +03:00
if ( argc > 0 ) {
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
2005-08-08 20:42:28 +04:00
/* Create a temporary env to use in the recursive call --
that is : don ' t disturb the original env so that we can
have a consistent global env . This allows for the
case :
2005-09-05 00:54:19 +04:00
orterun - - mca foo bar - - appfile file
2005-08-08 20:42:28 +04:00
where the " file " contains multiple apps . In this case ,
each app in " file " will get * only * foo = bar as the base
environment from which its specific environment is
constructed . */
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
if ( NULL ! = * env ) {
2005-07-04 04:13:44 +04:00
tmp_env = opal_argv_copy ( * env ) ;
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
if ( NULL = = tmp_env ) {
return ORTE_ERR_OUT_OF_RESOURCE ;
}
} else {
tmp_env = NULL ;
}
rc = create_app ( argc , argv , & app , & made_app , & tmp_env ) ;
2005-03-14 23:57:21 +03:00
if ( ORTE_SUCCESS ! = rc ) {
/* Assume that the error message has already been
printed ; no need to cleanup - - we can just exit */
exit ( 1 ) ;
}
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
if ( NULL ! = tmp_env ) {
2005-07-04 04:13:44 +04:00
opal_argv_free ( tmp_env ) ;
While waiting for fortran compiles...
Fixes for orterun in handling different MCA params for different
processes (reviewed by Brian):
- By design, if you run the following:
mpirun --mca foo aaa --mca foo bbb a.out
a.out will get a single MCA param for foo with value "aaa,bbb".
- However, if you specify multiple apps with different values for the
same MCA param, you should expect to get the different values for
each app. For example:
mpirun --mca foo aaa a.out : --mca foo bbb b.out
Should yield a.out with a "foo" param with value "aaa" and b.out
with a "foo" param with a value "bbb".
- This did not work -- both a.out and b.out would get a "foo" with
"aaa,bbb".
- This commit fixes this behavior -- now a.out will get aaa and b.out
will get bbb.
- Additionally, if you mix --mca and and app file, you can have
"global" params and per-line-in-the-appfile params. For example:
mpirun --mca foo zzzz --app appfile
where "appfile" contains:
-np 1 --mca bar aaa a.out
-np 1 --mca bar bbb b.out
In this case, a.out will get foo=zzzz and bar=aaa, and b.out will
get foo=zzzz and bar=bbb.
Spiffy.
Ok, fortran build is done... back to Fortran... sigh...
This commit was SVN r5710.
2005-05-13 18:36:36 +04:00
}
2005-03-14 23:57:21 +03:00
if ( made_app ) {
2006-08-15 23:54:10 +04:00
orte_std_cntr_t dummy ;
2006-03-24 18:28:42 +03:00
app - > idx = app_num ;
+ + app_num ;
2005-07-03 08:02:01 +04:00
orte_pointer_array_add ( & dummy , apps_pa , app ) ;
2005-03-14 23:57:21 +03:00
}
}
} while ( ! feof ( fp ) ) ;
fclose ( fp ) ;
/* All done */
free ( filename ) ;
return ORTE_SUCCESS ;
}