349 строки
12 KiB
C
349 строки
12 KiB
C
|
/* -*- C -*-
|
||
|
*
|
||
|
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||
|
* University Research and Technology
|
||
|
* Corporation. All rights reserved.
|
||
|
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||
|
* of Tennessee Research Foundation. All rights
|
||
|
* reserved.
|
||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||
|
* University of Stuttgart. All rights reserved.
|
||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||
|
* All rights reserved.
|
||
|
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||
|
* $COPYRIGHT$
|
||
|
*
|
||
|
* Additional copyrights may follow
|
||
|
*
|
||
|
* $HEADER$
|
||
|
*/
|
||
|
|
||
|
#include "orte_config.h"
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#ifdef HAVE_UNISTD_H
|
||
|
#include <unistd.h>
|
||
|
#endif
|
||
|
#ifdef HAVE_SYS_PARAM_H
|
||
|
#include <sys/param.h>
|
||
|
#endif
|
||
|
#include <errno.h>
|
||
|
#include <signal.h>
|
||
|
#include <ctype.h>
|
||
|
#ifdef HAVE_SYS_TYPES_H
|
||
|
#include <sys/types.h>
|
||
|
#endif /* HAVE_SYS_TYPES_H */
|
||
|
#ifdef HAVE_SYS_WAIT_H
|
||
|
#include <sys/wait.h>
|
||
|
#endif /* HAVE_SYS_WAIT_H */
|
||
|
#ifdef HAVE_LIBGEN_H
|
||
|
#include <libgen.h>
|
||
|
#endif
|
||
|
|
||
|
#include "opal/event/event.h"
|
||
|
#include "opal/install_dirs.h"
|
||
|
#include "opal/mca/base/base.h"
|
||
|
#include "opal/threads/condition.h"
|
||
|
#include "opal/util/argv.h"
|
||
|
#include "opal/util/basename.h"
|
||
|
#include "opal/util/cmd_line.h"
|
||
|
#include "opal/util/opal_environ.h"
|
||
|
#include "opal/util/output.h"
|
||
|
#include "opal/util/show_help.h"
|
||
|
#include "opal/util/trace.h"
|
||
|
#include "opal/version.h"
|
||
|
|
||
|
#include "orte/orte_constants.h"
|
||
|
|
||
|
#include "orte/class/orte_pointer_array.h"
|
||
|
#include "orte/util/proc_info.h"
|
||
|
#include "orte/util/sys_info.h"
|
||
|
#include "orte/util/universe_setup_file_io.h"
|
||
|
#include "orte/util/pre_condition_transports.h"
|
||
|
|
||
|
#include "orte/mca/ns/ns.h"
|
||
|
#include "orte/mca/gpr/gpr.h"
|
||
|
#include "orte/mca/pls/pls.h"
|
||
|
#include "orte/mca/rmaps/rmaps_types.h"
|
||
|
#include "orte/mca/rmgr/rmgr.h"
|
||
|
#include "orte/mca/schema/schema.h"
|
||
|
#include "orte/mca/smr/smr.h"
|
||
|
#include "orte/mca/errmgr/errmgr.h"
|
||
|
|
||
|
#include "orte/runtime/runtime.h"
|
||
|
#include "orte/runtime/orte_wait.h"
|
||
|
|
||
|
/*
|
||
|
* Globals
|
||
|
*/
|
||
|
static orte_jobid_t jobid = ORTE_JOBID_INVALID;
|
||
|
static char *orteboot_basename = NULL;
|
||
|
|
||
|
/*
|
||
|
* setup globals for catching orteboot command line options
|
||
|
*/
|
||
|
struct globals_t {
|
||
|
bool help;
|
||
|
bool version;
|
||
|
bool verbose;
|
||
|
bool quiet;
|
||
|
bool exit;
|
||
|
char *hostfile;
|
||
|
char *wdir;
|
||
|
opal_mutex_t lock;
|
||
|
opal_condition_t cond;
|
||
|
} orteboot_globals;
|
||
|
|
||
|
|
||
|
opal_cmd_line_init_t cmd_line_init[] = {
|
||
|
/* Various "obvious" options */
|
||
|
{ NULL, NULL, NULL, 'h', NULL, "help", 0,
|
||
|
&orteboot_globals.help, OPAL_CMD_LINE_TYPE_BOOL,
|
||
|
"This help message" },
|
||
|
{ NULL, NULL, NULL, 'V', NULL, "version", 0,
|
||
|
&orteboot_globals.version, OPAL_CMD_LINE_TYPE_BOOL,
|
||
|
"Print version and exit" },
|
||
|
{ NULL, NULL, NULL, 'v', NULL, "verbose", 0,
|
||
|
&orteboot_globals.verbose, OPAL_CMD_LINE_TYPE_BOOL,
|
||
|
"Be verbose" },
|
||
|
{ NULL, NULL, NULL, 'q', NULL, "quiet", 0,
|
||
|
&orteboot_globals.quiet, OPAL_CMD_LINE_TYPE_BOOL,
|
||
|
"Suppress helpful messages" },
|
||
|
|
||
|
/* Set a hostfile */
|
||
|
{ "rds", "hostfile", "path", '\0', "hostfile", "hostfile", 1,
|
||
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
||
|
"Provide a hostfile" },
|
||
|
{ "rds", "hostfile", "path", '\0', "machinefile", "machinefile", 1,
|
||
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
||
|
"Provide a hostfile" },
|
||
|
|
||
|
/* mpiexec-like arguments */
|
||
|
{ NULL, NULL, NULL, '\0', "wdir", "wdir", 1,
|
||
|
&orteboot_globals.wdir, OPAL_CMD_LINE_TYPE_STRING,
|
||
|
"Set the working directory of the started processes" },
|
||
|
|
||
|
/* These arguments can be specified multiple times */
|
||
|
{ NULL, NULL, NULL, 'H', "host", "host", 1,
|
||
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
||
|
"List of hosts to invoke processes on" },
|
||
|
|
||
|
/* OpenRTE arguments */
|
||
|
{ "orte", "debug", NULL, 'd', NULL, "debug-devel", 0,
|
||
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||
|
"Enable debugging of OpenRTE" },
|
||
|
|
||
|
{ "orte", "debug", "daemons", '\0', NULL, "debug-daemons", 0,
|
||
|
NULL, OPAL_CMD_LINE_TYPE_INT,
|
||
|
"Enable debugging of any OpenRTE daemons used by this application" },
|
||
|
|
||
|
{ "orte", "debug", "daemons_file", '\0', NULL, "debug-daemons-file", 0,
|
||
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||
|
"Enable debugging of any OpenRTE daemons used by this application, storing output in files" },
|
||
|
|
||
|
{ "orte", "no_daemonize", NULL, '\0', NULL, "no-daemonize", 0,
|
||
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||
|
"Do not detach OpenRTE daemons used by this application" },
|
||
|
|
||
|
{ "universe", NULL, NULL, '\0', NULL, "universe", 1,
|
||
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
||
|
"Set the universe name as username@hostname:universe_name for this application" },
|
||
|
|
||
|
{ NULL, NULL, NULL, '\0', NULL, "tmpdir", 1,
|
||
|
&orte_process_info.tmpdir_base, OPAL_CMD_LINE_TYPE_STRING,
|
||
|
"Set the root for the session directory tree for orteboot ONLY" },
|
||
|
|
||
|
{ NULL, NULL, NULL, '\0', NULL, "prefix", 1,
|
||
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
||
|
"Prefix where Open MPI is installed on remote nodes" },
|
||
|
{ NULL, NULL, NULL, '\0', NULL, "noprefix", 0,
|
||
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
||
|
"Disable automatic --prefix behavior" },
|
||
|
|
||
|
/* End of list */
|
||
|
{ NULL, NULL, NULL, '\0', NULL, NULL, 0,
|
||
|
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
|
||
|
};
|
||
|
|
||
|
#if !defined(__WINDOWS__)
|
||
|
extern char** environ;
|
||
|
#endif /* !defined(__WINDOWS__) */
|
||
|
/*
|
||
|
* Local functions
|
||
|
*/
|
||
|
|
||
|
int main(int argc, char *argv[])
|
||
|
{
|
||
|
orte_app_context_t *app;
|
||
|
int rc, ret;
|
||
|
int id, iparam;
|
||
|
opal_list_t attributes;
|
||
|
opal_cmd_line_t cmd_line;
|
||
|
|
||
|
OBJ_CONSTRUCT(&orteboot_globals.lock, opal_mutex_t);
|
||
|
OBJ_CONSTRUCT(&orteboot_globals.cond, opal_condition_t);
|
||
|
orteboot_globals.hostfile = NULL;
|
||
|
orteboot_globals.wdir = NULL;
|
||
|
orteboot_globals.help = false;
|
||
|
orteboot_globals.version = false;
|
||
|
orteboot_globals.verbose = false;
|
||
|
orteboot_globals.exit = false;
|
||
|
|
||
|
/* Setup MCA params */
|
||
|
mca_base_param_init();
|
||
|
|
||
|
/* find our basename (the name of the executable) so that we can
|
||
|
* use it in pretty-print error messages
|
||
|
*/
|
||
|
orteboot_basename = opal_basename(argv[0]);
|
||
|
|
||
|
/* Setup and parse the command line */
|
||
|
opal_cmd_line_create(&cmd_line, cmd_line_init);
|
||
|
mca_base_cmd_line_setup(&cmd_line);
|
||
|
if (ORTE_SUCCESS != (ret = opal_cmd_line_parse(&cmd_line, true,
|
||
|
argc, argv))) {
|
||
|
char *args = NULL;
|
||
|
args = opal_cmd_line_get_usage_msg(&cmd_line);
|
||
|
opal_show_help("help-orteboot.txt", "orteboot:usage", false,
|
||
|
argv[0], args);
|
||
|
free(args);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
/* print version if requested. Do this before check for help so
|
||
|
that --version --help works as one might expect. */
|
||
|
if (orteboot_globals.version &&
|
||
|
!(1 == argc || orteboot_globals.help)) {
|
||
|
char *project_name = NULL;
|
||
|
if (0 == strcmp(orteboot_basename, "ompiboot")) {
|
||
|
project_name = "Open MPI";
|
||
|
} else {
|
||
|
project_name = "OpenRTE";
|
||
|
}
|
||
|
opal_show_help("help-orteboot.txt", "orteboot:version", false,
|
||
|
orteboot_basename, project_name, OPAL_VERSION,
|
||
|
PACKAGE_BUGREPORT);
|
||
|
/* if we were the only argument, exit */
|
||
|
if (2 == argc) exit(0);
|
||
|
}
|
||
|
|
||
|
/* Check for help request */
|
||
|
if (1 == argc || orteboot_globals.help) {
|
||
|
char *args = NULL;
|
||
|
char *project_name = NULL;
|
||
|
if (0 == strcmp(orteboot_basename, "ompiboot")) {
|
||
|
project_name = "Open MPI";
|
||
|
} else {
|
||
|
project_name = "OpenRTE";
|
||
|
}
|
||
|
args = opal_cmd_line_get_usage_msg(&cmd_line);
|
||
|
opal_show_help("help-orteboot.txt", "orteboot:usage", false,
|
||
|
orteboot_basename, project_name, OPAL_VERSION,
|
||
|
orteboot_basename, args,
|
||
|
PACKAGE_BUGREPORT);
|
||
|
free(args);
|
||
|
|
||
|
/* If someone asks for help, that should be all we do */
|
||
|
exit(0);
|
||
|
}
|
||
|
|
||
|
/* check for daemon flags and push them into the environment
|
||
|
* since this isn't being automatically done
|
||
|
*/
|
||
|
id = mca_base_param_reg_int_name("orte_debug", "daemons",
|
||
|
"Whether to debug the ORTE daemons or not",
|
||
|
false, false, (int)false, &iparam);
|
||
|
if (iparam) {
|
||
|
char *tmp = mca_base_param_environ_variable("orte", "debug", "daemons");
|
||
|
if (ORTE_SUCCESS != (rc = opal_setenv(tmp, "1", true, &environ))) {
|
||
|
opal_show_help("help-orteboot.txt", "orteboot:environ", false,
|
||
|
orteboot_basename, tmp, "1", rc);
|
||
|
free(tmp);
|
||
|
return rc;
|
||
|
}
|
||
|
free(tmp);
|
||
|
}
|
||
|
id = mca_base_param_reg_int_name("orte", "debug",
|
||
|
"Top-level ORTE debug switch",
|
||
|
false, false, 0, &iparam);
|
||
|
if (iparam) {
|
||
|
char *tmp = mca_base_param_environ_variable("orte", NULL, "debug");
|
||
|
if (ORTE_SUCCESS != (rc = opal_setenv(tmp, "1", true, &environ))) {
|
||
|
opal_show_help("help-orteboot.txt", "orteboot:environ", false,
|
||
|
orteboot_basename, tmp, "1", rc);
|
||
|
free(tmp);
|
||
|
return rc;
|
||
|
}
|
||
|
free(tmp);
|
||
|
}
|
||
|
id = mca_base_param_reg_int_name("orte_debug", "daemons_file",
|
||
|
"Whether want stdout/stderr of daemons to go to a file or not",
|
||
|
false, false, 0, &iparam);
|
||
|
if (iparam) {
|
||
|
char *tmp = mca_base_param_environ_variable("orte", "debug",
|
||
|
"daemons_file");
|
||
|
if (ORTE_SUCCESS != (rc = opal_setenv(tmp, "1", true, &environ))) {
|
||
|
opal_show_help("help-orteboot.txt", "orteboot:environ", false,
|
||
|
orteboot_basename, tmp, "1", rc);
|
||
|
free(tmp);
|
||
|
return rc;
|
||
|
}
|
||
|
free(tmp);
|
||
|
}
|
||
|
id = mca_base_param_reg_int_name("orte", "no_daemonize",
|
||
|
"Whether to properly daemonize the ORTE daemons or not",
|
||
|
false, false, 0, &iparam);
|
||
|
if (iparam) {
|
||
|
char *tmp = mca_base_param_environ_variable("orte", "no_daemonize", NULL);
|
||
|
if (ORTE_SUCCESS != (rc = opal_setenv(tmp, "1", true, &environ))) {
|
||
|
opal_show_help("help-orteboot.txt", "orteboot:environ", false,
|
||
|
orteboot_basename, tmp, "1", rc);
|
||
|
free(tmp);
|
||
|
return rc;
|
||
|
}
|
||
|
free(tmp);
|
||
|
}
|
||
|
|
||
|
/* Intialize our Open RTE environment */
|
||
|
/* Set the flag telling orte_init that I am NOT a
|
||
|
* singleton, but am "infrastructure" - prevents setting
|
||
|
* up incorrect infrastructure that only a singleton would
|
||
|
* require
|
||
|
*/
|
||
|
if (ORTE_SUCCESS != (rc = orte_init(true))) {
|
||
|
opal_show_help("help-orteboot.txt", "orteboot:init-failure", true,
|
||
|
"orte_init()", rc);
|
||
|
return rc;
|
||
|
}
|
||
|
|
||
|
/* Prep to start the virtual machine */
|
||
|
/* construct the list of attributes */
|
||
|
OBJ_CONSTRUCT(&attributes, opal_list_t);
|
||
|
|
||
|
orte_rmgr.add_attribute(&attributes, ORTE_RMAPS_PERNODE, ORTE_UNDEF, NULL, ORTE_RMGR_ATTR_NO_OVERRIDE);
|
||
|
|
||
|
/* Create the app - in this case, that's just a no_op to get the daemons launched */
|
||
|
app = OBJ_NEW(orte_app_context_t);
|
||
|
if (NULL == app) {
|
||
|
opal_show_help("help-orteboot.txt", "orteboot:call-failed",
|
||
|
true, orteboot_basename, "system", "malloc returned NULL", errno);
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Spawn the job */
|
||
|
|
||
|
rc = orte_rmgr.spawn_job(&app, 1, &jobid, 0, NULL, NULL, 0, &attributes);
|
||
|
if (ORTE_SUCCESS != rc) {
|
||
|
/* JMS show_help */
|
||
|
opal_output(0, "%s: spawn failed with errno=%d\n", orteboot_basename, rc);
|
||
|
}
|
||
|
OBJ_DESTRUCT(&attributes);
|
||
|
|
||
|
|
||
|
orte_finalize();
|
||
|
free(orteboot_basename);
|
||
|
return rc;
|
||
|
}
|
||
|
|