2006-11-17 02:58:46 +00:00
/* -*- C -*-
*
* Copyright ( c ) 2004 - 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation . All rights reserved .
* Copyright ( c ) 2004 - 2006 The University of Tennessee and The University
* of Tennessee Research Foundation . All rights
* reserved .
* Copyright ( c ) 2004 - 2005 High Performance Computing Center Stuttgart ,
* University of Stuttgart . All rights reserved .
* Copyright ( c ) 2004 - 2005 The Regents of the University of California .
* All rights reserved .
2007-04-21 00:15:05 +00:00
* Copyright ( c ) 2006 - 2007 Cisco Systems , Inc . All rights reserved .
2006-11-17 02:58:46 +00:00
* $ COPYRIGHT $
*
* Additional copyrights may follow
*
* $ HEADER $
*/
# include "orte_config.h"
# include <stdio.h>
# ifdef HAVE_UNISTD_H
# include <unistd.h>
# endif
# ifdef HAVE_SYS_PARAM_H
# include <sys/param.h>
# endif
# include <errno.h>
# include <signal.h>
# include <ctype.h>
# ifdef HAVE_SYS_TYPES_H
# include <sys/types.h>
# endif /* HAVE_SYS_TYPES_H */
# ifdef HAVE_SYS_WAIT_H
# include <sys/wait.h>
# endif /* HAVE_SYS_WAIT_H */
# include "opal/event/event.h"
# include "opal/mca/base/base.h"
# include "opal/threads/condition.h"
# include "opal/util/argv.h"
# include "opal/util/basename.h"
# include "opal/util/cmd_line.h"
# include "opal/util/opal_environ.h"
# include "opal/util/output.h"
# include "opal/util/show_help.h"
# include "opal/util/trace.h"
# include "opal/version.h"
# include "orte/orte_constants.h"
# include "orte/class/orte_pointer_array.h"
# include "orte/util/proc_info.h"
# include "orte/util/sys_info.h"
# include "orte/util/universe_setup_file_io.h"
# include "orte/util/pre_condition_transports.h"
# include "orte/mca/ns/ns.h"
# include "orte/mca/gpr/gpr.h"
# include "orte/mca/pls/pls.h"
# include "orte/mca/rmaps/rmaps_types.h"
# include "orte/mca/rmgr/rmgr.h"
# include "orte/mca/schema/schema.h"
# include "orte/mca/smr/smr.h"
# include "orte/mca/errmgr/errmgr.h"
# include "orte/runtime/runtime.h"
# include "orte/runtime/orte_wait.h"
/*
* Globals
*/
2006-11-17 19:32:49 +00:00
static char * ortekill_basename = NULL ;
2006-11-17 02:58:46 +00:00
/*
* setup globals for catching orterun command line options
*/
struct globals_t {
bool help ;
bool version ;
bool verbose ;
bool quiet ;
bool exit ;
bool no_wait_for_job_completion ;
bool by_node ;
bool by_slot ;
bool per_node ;
bool no_oversubscribe ;
bool debugger ;
bool no_local_schedule ;
bool displaymapatlaunch ;
int num_procs ;
int exit_status ;
char * hostfile ;
char * env_val ;
char * appfile ;
char * wdir ;
char * path ;
opal_mutex_t lock ;
opal_condition_t cond ;
2006-11-17 19:32:49 +00:00
} ortekill_globals ;
2006-11-17 02:58:46 +00:00
opal_cmd_line_init_t cmd_line_init [ ] = {
/* Various "obvious" options */
{ NULL , NULL , NULL , ' h ' , NULL , " help " , 0 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . help , OPAL_CMD_LINE_TYPE_BOOL ,
2006-11-17 02:58:46 +00:00
" This help message " } ,
{ NULL , NULL , NULL , ' V ' , NULL , " version " , 0 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . version , OPAL_CMD_LINE_TYPE_BOOL ,
2006-11-17 02:58:46 +00:00
" Print version and exit " } ,
{ NULL , NULL , NULL , ' v ' , NULL , " verbose " , 0 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . verbose , OPAL_CMD_LINE_TYPE_BOOL ,
2006-11-17 02:58:46 +00:00
" Be verbose " } ,
{ NULL , NULL , NULL , ' q ' , NULL , " quiet " , 0 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . quiet , OPAL_CMD_LINE_TYPE_BOOL ,
2006-11-17 02:58:46 +00:00
" Suppress helpful messages " } ,
/* Use an appfile */
{ NULL , NULL , NULL , ' \0 ' , NULL , " app " , 1 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . appfile , OPAL_CMD_LINE_TYPE_STRING ,
2006-11-17 02:58:46 +00:00
" Provide an appfile; ignore all other command line options " } ,
/* Number of processes; -c, -n, --n, -np, and --np are all
synonyms */
{ NULL , NULL , NULL , ' c ' , " np " , " np " , 1 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . num_procs , OPAL_CMD_LINE_TYPE_INT ,
2006-11-17 02:58:46 +00:00
" Number of processes to run " } ,
{ NULL , NULL , NULL , ' \0 ' , " n " , " n " , 1 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . num_procs , OPAL_CMD_LINE_TYPE_INT ,
2006-11-17 02:58:46 +00:00
" Number of processes to run " } ,
/* Set a hostfile */
{ " rds " , " hostfile " , " path " , ' \0 ' , " hostfile " , " hostfile " , 1 ,
NULL , OPAL_CMD_LINE_TYPE_STRING ,
" Provide a hostfile " } ,
{ " rds " , " hostfile " , " path " , ' \0 ' , " machinefile " , " machinefile " , 1 ,
NULL , OPAL_CMD_LINE_TYPE_STRING ,
" Provide a hostfile " } ,
/* Don't wait for the process to finish before exiting */
{ NULL , NULL , NULL , ' \0 ' , " nw " , " nw " , 0 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . no_wait_for_job_completion , OPAL_CMD_LINE_TYPE_BOOL ,
2006-11-17 02:58:46 +00:00
" Launch the processes and do not wait for their completion (i.e., let orterun complete as soon a successful launch occurs) " } ,
/* Export environment variables; potentially used multiple times,
so it does not make sense to set into a variable */
{ NULL , NULL , NULL , ' x ' , NULL , NULL , 1 ,
NULL , OPAL_CMD_LINE_TYPE_NULL ,
" Export an environment variable, optionally specifying a value (e.g., \" -x foo \" exports the environment variable foo and takes its value from the current environment; \" -x foo=bar \" exports the environment variable name foo and sets its value to \" bar \" in the started processes) " } ,
/* Specific mapping (C, cX, N, nX) */
#if 0
/* JJH --map is not currently implemented so don't advertise it until it is */
{ NULL , NULL , NULL , ' \0 ' , NULL , " map " , 1 ,
NULL , OPAL_CMD_LINE_TYPE_STRING ,
" Mapping of processes to nodes / CPUs " } ,
# endif
{ NULL , NULL , NULL , ' \0 ' , " bynode " , " bynode " , 0 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . by_node , OPAL_CMD_LINE_TYPE_BOOL ,
2006-11-17 02:58:46 +00:00
" Whether to allocate/map processes round-robin by node " } ,
{ NULL , NULL , NULL , ' \0 ' , " byslot " , " byslot " , 0 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . by_slot , OPAL_CMD_LINE_TYPE_BOOL ,
2006-11-17 02:58:46 +00:00
" Whether to allocate/map processes round-robin by slot (the default) " } ,
{ NULL , NULL , NULL , ' \0 ' , " pernode " , " pernode " , 0 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . per_node , OPAL_CMD_LINE_TYPE_BOOL ,
2006-11-17 02:58:46 +00:00
" If no number of process is specified, this will cause one process per available node to be executed " } ,
{ NULL , NULL , NULL , ' \0 ' , " nooversubscribe " , " nooversubscribe " , 0 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . no_oversubscribe , OPAL_CMD_LINE_TYPE_BOOL ,
2006-11-17 02:58:46 +00:00
" Nodes are not to be oversubscribed, even if the system supports such operation " } ,
{ NULL , NULL , NULL , ' \0 ' , " display-map-at-launch " , " display-map-at-launch " , 0 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . displaymapatlaunch , OPAL_CMD_LINE_TYPE_BOOL ,
2006-11-17 02:58:46 +00:00
" Display the process map just before launch " } ,
/* mpiexec-like arguments */
{ NULL , NULL , NULL , ' \0 ' , " wdir " , " wdir " , 1 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . wdir , OPAL_CMD_LINE_TYPE_STRING ,
2006-11-17 02:58:46 +00:00
" Set the working directory of the started processes " } ,
{ NULL , NULL , NULL , ' \0 ' , " path " , " path " , 1 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . path , OPAL_CMD_LINE_TYPE_STRING ,
2006-11-17 02:58:46 +00:00
" PATH to be used to look for executables to start processes " } ,
/* These arguments can be specified multiple times */
#if 0
/* JMS: Removed because it's not really implemented */
{ NULL , NULL , NULL , ' \0 ' , " arch " , " arch " , 1 ,
NULL , OPAL_CMD_LINE_TYPE_STRING ,
" Architecture to start processes on " } ,
# endif
{ NULL , NULL , NULL , ' H ' , " host " , " host " , 1 ,
NULL , OPAL_CMD_LINE_TYPE_STRING ,
" List of hosts to invoke processes on " } ,
/* OSC mpiexec-like arguments */
{ NULL , NULL , NULL , ' \0 ' , " nolocal " , " nolocal " , 0 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . no_local_schedule , OPAL_CMD_LINE_TYPE_BOOL ,
2006-11-17 02:58:46 +00:00
" Do not run any MPI applications on the local node " } ,
/* User-level debugger arguments */
{ NULL , NULL , NULL , ' \0 ' , " tv " , " tv " , 0 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . debugger , OPAL_CMD_LINE_TYPE_BOOL ,
2006-11-17 02:58:46 +00:00
" Deprecated backwards compatibility flag; synonym for \" --debug \" " } ,
{ NULL , NULL , NULL , ' \0 ' , " debug " , " debug " , 0 ,
2006-11-17 19:32:49 +00:00
& ortekill_globals . debugger , OPAL_CMD_LINE_TYPE_BOOL ,
2006-11-17 02:58:46 +00:00
" Invoke the user-level debugger indicated by the orte_base_user_debugger MCA parameter " } ,
{ " orte " , " base " , " user_debugger " , ' \0 ' , " debugger " , " debugger " , 1 ,
NULL , OPAL_CMD_LINE_TYPE_STRING ,
" Sequence of debuggers to search for when \" --debug \" is used " } ,
/* OpenRTE arguments */
{ " orte " , " debug " , NULL , ' d ' , NULL , " debug-devel " , 0 ,
NULL , OPAL_CMD_LINE_TYPE_BOOL ,
" Enable debugging of OpenRTE " } ,
{ " orte " , " debug " , " daemons " , ' \0 ' , NULL , " debug-daemons " , 0 ,
NULL , OPAL_CMD_LINE_TYPE_INT ,
" Enable debugging of any OpenRTE daemons used by this application " } ,
{ " orte " , " debug " , " daemons_file " , ' \0 ' , NULL , " debug-daemons-file " , 0 ,
NULL , OPAL_CMD_LINE_TYPE_BOOL ,
" Enable debugging of any OpenRTE daemons used by this application, storing output in files " } ,
{ " orte " , " no_daemonize " , NULL , ' \0 ' , NULL , " no-daemonize " , 0 ,
NULL , OPAL_CMD_LINE_TYPE_BOOL ,
" Do not detach OpenRTE daemons used by this application " } ,
{ " universe " , NULL , NULL , ' \0 ' , NULL , " universe " , 1 ,
NULL , OPAL_CMD_LINE_TYPE_STRING ,
" Set the universe name as username@hostname:universe_name for this application " } ,
{ NULL , NULL , NULL , ' \0 ' , NULL , " tmpdir " , 1 ,
& orte_process_info . tmpdir_base , OPAL_CMD_LINE_TYPE_STRING ,
" Set the root for the session directory tree for orterun ONLY " } ,
{ NULL , NULL , NULL , ' \0 ' , NULL , " prefix " , 1 ,
NULL , OPAL_CMD_LINE_TYPE_STRING ,
" Prefix where Open MPI is installed on remote nodes " } ,
{ NULL , NULL , NULL , ' \0 ' , NULL , " noprefix " , 0 ,
NULL , OPAL_CMD_LINE_TYPE_STRING ,
" Disable automatic --prefix behavior " } ,
/* End of list */
{ NULL , NULL , NULL , ' \0 ' , NULL , NULL , 0 ,
NULL , OPAL_CMD_LINE_TYPE_NULL , NULL }
} ;
# if !defined(__WINDOWS__)
extern char * * environ ;
# endif /* !defined(__WINDOWS__) */
int main ( int argc , char * argv [ ] )
{
int rc ;
int id , iparam ;
/* Setup MCA params */
mca_base_param_init ( ) ;
orte_register_params ( false ) ;
/* find our basename (the name of the executable) so that we can
use it in pretty - print error messages */
2006-11-17 19:32:49 +00:00
ortekill_basename = opal_basename ( argv [ 0 ] ) ;
2006-11-17 02:58:46 +00:00
/* Intialize our Open RTE environment */
/* Set the flag telling orte_init that I am NOT a
* singleton , but am " infrastructure " - prevents setting
* up incorrect infrastructure that only a singleton would
* require
*/
if ( ORTE_SUCCESS ! = ( rc = orte_init ( true ) ) ) {
opal_show_help ( " help-orterun.txt " , " orterun:init-failure " , true ,
" orte_init() " , rc ) ;
return rc ;
}
/* check for daemon flags and push them into the environment
* since this isn ' t being automatically done
*/
id = mca_base_param_reg_int_name ( " orte_debug " , " daemons " ,
" Whether to debug the ORTE daemons or not " ,
false , false , ( int ) false , & iparam ) ;
if ( iparam ) {
char * tmp = mca_base_param_environ_variable ( " orte " , " debug " , " daemons " ) ;
if ( ORTE_SUCCESS ! = ( rc = opal_setenv ( tmp , " 1 " , true , & environ ) ) ) {
opal_show_help ( " help-orterun.txt " , " orterun:environ " , false ,
2006-11-17 19:32:49 +00:00
ortekill_basename , tmp , " 1 " , rc ) ;
2006-11-17 02:58:46 +00:00
free ( tmp ) ;
return rc ;
}
free ( tmp ) ;
}
id = mca_base_param_reg_int_name ( " orte " , " debug " ,
" Top-level ORTE debug switch " ,
false , false , 0 , & iparam ) ;
if ( iparam ) {
char * tmp = mca_base_param_environ_variable ( " orte " , NULL , " debug " ) ;
if ( ORTE_SUCCESS ! = ( rc = opal_setenv ( tmp , " 1 " , true , & environ ) ) ) {
opal_show_help ( " help-orterun.txt " , " orterun:environ " , false ,
2006-11-17 19:32:49 +00:00
ortekill_basename , tmp , " 1 " , rc ) ;
2006-11-17 02:58:46 +00:00
free ( tmp ) ;
return rc ;
}
free ( tmp ) ;
}
id = mca_base_param_reg_int_name ( " orte_debug " , " daemons_file " ,
" Whether want stdout/stderr of daemons to go to a file or not " ,
false , false , 0 , & iparam ) ;
if ( iparam ) {
char * tmp = mca_base_param_environ_variable ( " orte " , " debug " ,
" daemons_file " ) ;
if ( ORTE_SUCCESS ! = ( rc = opal_setenv ( tmp , " 1 " , true , & environ ) ) ) {
opal_show_help ( " help-orterun.txt " , " orterun:environ " , false ,
2006-11-17 19:32:49 +00:00
ortekill_basename , tmp , " 1 " , rc ) ;
2006-11-17 02:58:46 +00:00
free ( tmp ) ;
return rc ;
}
free ( tmp ) ;
}
orte_finalize ( ) ;
2006-11-17 19:32:49 +00:00
free ( ortekill_basename ) ;
2006-11-17 02:58:46 +00:00
return rc ;
}