cc97c0f611
in schizo/ompi, sets the new OMPI_MCA_mpi_oversubscribe environment variable according to the node oversubscription state. This MCA parameter is used to set the default value of the mpi_yield_when_idle parameter. This two steps tango is needed so the mpi_yield_when_idle setting is always honored when set in a config file. Refs. open-mpi/ompi#6433 Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
1351 строка
57 KiB
C
1351 строка
57 KiB
C
/*
|
|
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* Copyright (c) 2006-2017 Los Alamos National Security, LLC.
|
|
* All rights reserved.
|
|
* Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved
|
|
* Copyright (c) 2011-2017 Oak Ridge National Labs. All rights reserved.
|
|
* Copyright (c) 2017 UT-Battelle, LLC. All rights reserved.
|
|
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
|
* Copyright (c) 2015-2019 Research Organization for Information Science
|
|
* and Technology (RIST). All rights reserved.
|
|
* Copyright (c) 2018 IBM Corporation. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*
|
|
*/
|
|
|
|
#include "orte_config.h"
|
|
#include "orte/types.h"
|
|
#include "opal/types.h"
|
|
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif
|
|
#include <ctype.h>
|
|
|
|
#include "opal/util/argv.h"
|
|
#include "opal/util/opal_environ.h"
|
|
#include "opal/util/os_dirpath.h"
|
|
#include "opal/util/show_help.h"
|
|
#include "opal/mca/shmem/base/base.h"
|
|
|
|
#include "orte/mca/errmgr/errmgr.h"
|
|
#include "orte/mca/ess/base/base.h"
|
|
#include "orte/mca/rmaps/rmaps_types.h"
|
|
#include "orte/orted/orted_submit.h"
|
|
#include "orte/util/name_fns.h"
|
|
#include "orte/util/session_dir.h"
|
|
#include "orte/util/show_help.h"
|
|
#include "orte/runtime/orte_globals.h"
|
|
|
|
#include "orte/mca/schizo/base/base.h"
|
|
|
|
static int define_cli(opal_cmd_line_t *cli);
|
|
static int parse_cli(int argc, int start, char **argv);
|
|
static int parse_env(char *path,
|
|
opal_cmd_line_t *cmd_line,
|
|
char **srcenv,
|
|
char ***dstenv);
|
|
static int setup_fork(orte_job_t *jdata,
|
|
orte_app_context_t *context);
|
|
static int setup_child(orte_job_t *jobdat,
|
|
orte_proc_t *child,
|
|
orte_app_context_t *app,
|
|
char ***env);
|
|
|
|
orte_schizo_base_module_t orte_schizo_ompi_module = {
|
|
.define_cli = define_cli,
|
|
.parse_cli = parse_cli,
|
|
.parse_env = parse_env,
|
|
.setup_fork = setup_fork,
|
|
.setup_child = setup_child
|
|
};
|
|
|
|
|
|
static opal_cmd_line_init_t cmd_line_init[] = {
|
|
/* Various "obvious" options */
|
|
{ NULL, 'h', NULL, "help", 1,
|
|
&orte_cmd_options.help, OPAL_CMD_LINE_TYPE_STRING,
|
|
"This help message", OPAL_CMD_LINE_OTYPE_GENERAL },
|
|
{ NULL, 'V', NULL, "version", 0,
|
|
&orte_cmd_options.version, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Print version and exit", OPAL_CMD_LINE_OTYPE_GENERAL },
|
|
{ NULL, 'v', NULL, "verbose", 0,
|
|
&orte_cmd_options.verbose, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Be verbose", OPAL_CMD_LINE_OTYPE_GENERAL },
|
|
{ "orte_execute_quiet", 'q', NULL, "quiet", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Suppress helpful messages", OPAL_CMD_LINE_OTYPE_GENERAL },
|
|
{ NULL, '\0', "report-pid", "report-pid", 1,
|
|
&orte_cmd_options.report_pid, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Printout pid on stdout [-], stderr [+], or a file [anything else]",
|
|
OPAL_CMD_LINE_OTYPE_DEBUG },
|
|
{ NULL, '\0', "report-uri", "report-uri", 1,
|
|
&orte_cmd_options.report_uri, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Printout URI on stdout [-], stderr [+], or a file [anything else]",
|
|
OPAL_CMD_LINE_OTYPE_DEBUG },
|
|
|
|
/* testing options */
|
|
{ NULL, '\0', "timeout", "timeout", 1,
|
|
&orte_cmd_options.timeout, OPAL_CMD_LINE_TYPE_INT,
|
|
"Timeout the job after the specified number of seconds",
|
|
OPAL_CMD_LINE_OTYPE_DEBUG },
|
|
{ NULL, '\0', "report-state-on-timeout", "report-state-on-timeout", 0,
|
|
&orte_cmd_options.report_state_on_timeout, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Report all job and process states upon timeout",
|
|
OPAL_CMD_LINE_OTYPE_DEBUG },
|
|
{ NULL, '\0', "get-stack-traces", "get-stack-traces", 0,
|
|
&orte_cmd_options.get_stack_traces, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Get stack traces of all application procs on timeout",
|
|
OPAL_CMD_LINE_OTYPE_DEBUG },
|
|
|
|
|
|
/* exit status reporting */
|
|
{ "orte_report_child_jobs_separately", '\0', "report-child-jobs-separately", "report-child-jobs-separately", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Return the exit status of the primary job only", OPAL_CMD_LINE_OTYPE_OUTPUT },
|
|
|
|
/* uri of the dvm, or at least where to get it */
|
|
{ NULL, '\0', "hnp", "hnp", 1,
|
|
&orte_cmd_options.hnp, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Specify the URI of the HNP, or the name of the file (specified as file:filename) that contains that info",
|
|
OPAL_CMD_LINE_OTYPE_DVM },
|
|
|
|
/* select XML output */
|
|
{ "orte_xml_output", '\0', "xml", "xml", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Provide all output in XML format", OPAL_CMD_LINE_OTYPE_OUTPUT },
|
|
{ "orte_xml_file", '\0', "xml-file", "xml-file", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Provide all output in XML format to the specified file", OPAL_CMD_LINE_OTYPE_OUTPUT },
|
|
|
|
/* tag output */
|
|
{ "orte_tag_output", '\0', "tag-output", "tag-output", 0,
|
|
&orte_cmd_options.tag_output, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Tag all output with [job,rank]", OPAL_CMD_LINE_OTYPE_OUTPUT },
|
|
{ "orte_timestamp_output", '\0', "timestamp-output", "timestamp-output", 0,
|
|
&orte_cmd_options.timestamp_output, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Timestamp all application process output", OPAL_CMD_LINE_OTYPE_OUTPUT },
|
|
{ "orte_output_filename", '\0', "output-filename", "output-filename", 1,
|
|
&orte_cmd_options.output_filename, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Redirect output from application processes into filename/job/rank/std[out,err,diag]. A relative path value will be converted to an absolute path",
|
|
OPAL_CMD_LINE_OTYPE_OUTPUT },
|
|
{ NULL, '\0', "merge-stderr-to-stdout", "merge-stderr-to-stdout", 0,
|
|
&orte_cmd_options.merge, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Merge stderr to stdout for each process", OPAL_CMD_LINE_OTYPE_OUTPUT },
|
|
{ "orte_xterm", '\0', "xterm", "xterm", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Create a new xterm window and display output from the specified ranks there",
|
|
OPAL_CMD_LINE_OTYPE_OUTPUT },
|
|
|
|
/* select stdin option */
|
|
{ NULL, '\0', "stdin", "stdin", 1,
|
|
&orte_cmd_options.stdin_target, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Specify procs to receive stdin [rank, all, none] (default: 0, indicating rank 0)",
|
|
OPAL_CMD_LINE_OTYPE_INPUT },
|
|
|
|
/* request that argv[0] be indexed */
|
|
{ NULL, '\0', "index-argv-by-rank", "index-argv-by-rank", 0,
|
|
&orte_cmd_options.index_argv, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Uniquely index argv[0] for each process using its rank",
|
|
OPAL_CMD_LINE_OTYPE_INPUT },
|
|
|
|
/* Specify the launch agent to be used */
|
|
{ "orte_launch_agent", '\0', "launch-agent", "launch-agent", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Command used to start processes on remote nodes (default: orted)",
|
|
OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
|
|
/* Preload the binary on the remote machine */
|
|
{ NULL, 's', NULL, "preload-binary", 0,
|
|
&orte_cmd_options.preload_binaries, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Preload the binary on the remote machine before starting the remote process.",
|
|
OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
|
|
/* Preload files on the remote machine */
|
|
{ NULL, '\0', NULL, "preload-files", 1,
|
|
&orte_cmd_options.preload_files, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Preload the comma separated list of files to the remote machines current working directory before starting the remote process.",
|
|
OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
|
|
#if OPAL_ENABLE_FT_CR == 1
|
|
/* Tell SStore to preload a snapshot before launch */
|
|
{ NULL, '\0', NULL, "sstore-load", 1,
|
|
&orte_cmd_options.sstore_load, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Internal Use Only! Tell SStore to preload a snapshot before launch." },
|
|
#endif
|
|
|
|
/* Use an appfile */
|
|
{ NULL, '\0', NULL, "app", 1,
|
|
&orte_cmd_options.appfile, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Provide an appfile; ignore all other command line options",
|
|
OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
|
|
/* Number of processes; -c, -n, --n, -np, and --np are all
|
|
synonyms */
|
|
{ NULL, 'c', "np", "np", 1,
|
|
&orte_cmd_options.num_procs, OPAL_CMD_LINE_TYPE_INT,
|
|
"Number of processes to run", OPAL_CMD_LINE_OTYPE_GENERAL },
|
|
{ NULL, '\0', "n", "n", 1,
|
|
&orte_cmd_options.num_procs, OPAL_CMD_LINE_TYPE_INT,
|
|
"Number of processes to run", OPAL_CMD_LINE_OTYPE_GENERAL },
|
|
|
|
/* maximum size of VM - typically used to subdivide an allocation */
|
|
{ "orte_max_vm_size", '\0', "max-vm-size", "max-vm-size", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_INT,
|
|
"Number of processes to run", OPAL_CMD_LINE_OTYPE_DVM },
|
|
|
|
/* Set a hostfile */
|
|
{ NULL, '\0', "hostfile", "hostfile", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Provide a hostfile", OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
{ NULL, '\0', "machinefile", "machinefile", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Provide a hostfile", OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
{ "orte_default_hostfile", '\0', "default-hostfile", "default-hostfile", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Provide a default hostfile", OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
{ "opal_if_do_not_resolve", '\0', "do-not-resolve", "do-not-resolve", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Do not attempt to resolve interfaces", OPAL_CMD_LINE_OTYPE_DEVEL },
|
|
|
|
/* uri of PMIx publish/lookup server, or at least where to get it */
|
|
{ "pmix_server_uri", '\0', "ompi-server", "ompi-server", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Specify the URI of the publish/lookup server, or the name of the file (specified as file:filename) that contains that info",
|
|
OPAL_CMD_LINE_OTYPE_DVM },
|
|
|
|
{ "carto_file_path", '\0', "cf", "cartofile", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Provide a cartography file", OPAL_CMD_LINE_OTYPE_MAPPING },
|
|
|
|
{ "orte_rankfile", '\0', "rf", "rankfile", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Provide a rankfile file", OPAL_CMD_LINE_OTYPE_MAPPING },
|
|
|
|
/* Export environment variables; potentially used multiple times,
|
|
so it does not make sense to set into a variable */
|
|
{ NULL, 'x', NULL, NULL, 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_NULL,
|
|
"Export an environment variable, optionally specifying a value (e.g., \"-x foo\" exports the environment variable foo and takes its value from the current environment; \"-x foo=bar\" exports the environment variable name foo and sets its value to \"bar\" in the started processes)", OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
|
|
/* Mapping controls */
|
|
{ "rmaps_base_display_map", '\0', "display-map", "display-map", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Display the process map just before launch", OPAL_CMD_LINE_OTYPE_DEBUG },
|
|
{ "rmaps_base_display_devel_map", '\0', "display-devel-map", "display-devel-map", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Display a detailed process map (mostly intended for developers) just before launch",
|
|
OPAL_CMD_LINE_OTYPE_DEVEL },
|
|
{ "rmaps_base_display_topo_with_map", '\0', "display-topo", "display-topo", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Display the topology as part of the process map (mostly intended for developers) just before launch",
|
|
OPAL_CMD_LINE_OTYPE_DEVEL },
|
|
{ "rmaps_base_display_diffable_map", '\0', "display-diffable-map", "display-diffable-map", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Display a diffable process map (mostly intended for developers) just before launch",
|
|
OPAL_CMD_LINE_OTYPE_DEVEL },
|
|
{ NULL, 'H', "host", "host", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"List of hosts to invoke processes on",
|
|
OPAL_CMD_LINE_OTYPE_MAPPING },
|
|
{ "rmaps_base_no_schedule_local", '\0', "nolocal", "nolocal", 0,
|
|
&orte_cmd_options.nolocal, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Do not run any MPI applications on the local node",
|
|
OPAL_CMD_LINE_OTYPE_MAPPING },
|
|
{ "rmaps_base_no_oversubscribe", '\0', "nooversubscribe", "nooversubscribe", 0,
|
|
&orte_cmd_options.no_oversubscribe, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Nodes are not to be oversubscribed, even if the system supports such operation",
|
|
OPAL_CMD_LINE_OTYPE_MAPPING },
|
|
{ "rmaps_base_oversubscribe", '\0', "oversubscribe", "oversubscribe", 0,
|
|
&orte_cmd_options.oversubscribe, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Nodes are allowed to be oversubscribed, even on a managed system, and overloading of processing elements",
|
|
OPAL_CMD_LINE_OTYPE_MAPPING },
|
|
{ "rmaps_base_cpus_per_rank", '\0', "cpus-per-proc", "cpus-per-proc", 1,
|
|
&orte_cmd_options.cpus_per_proc, OPAL_CMD_LINE_TYPE_INT,
|
|
"Number of cpus to use for each process [default=1]",
|
|
OPAL_CMD_LINE_OTYPE_MAPPING },
|
|
{ "rmaps_base_cpus_per_rank", '\0', "cpus-per-rank", "cpus-per-rank", 1,
|
|
&orte_cmd_options.cpus_per_proc, OPAL_CMD_LINE_TYPE_INT,
|
|
"Synonym for cpus-per-proc", OPAL_CMD_LINE_OTYPE_MAPPING },
|
|
|
|
/* backward compatiblity */
|
|
{ "rmaps_base_bycore", '\0', "bycore", "bycore", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Whether to map and rank processes round-robin by core",
|
|
OPAL_CMD_LINE_OTYPE_COMPAT },
|
|
{ "rmaps_base_bynode", '\0', "bynode", "bynode", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Whether to map and rank processes round-robin by node",
|
|
OPAL_CMD_LINE_OTYPE_COMPAT },
|
|
{ "rmaps_base_byslot", '\0', "byslot", "byslot", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Whether to map and rank processes round-robin by slot",
|
|
OPAL_CMD_LINE_OTYPE_COMPAT },
|
|
|
|
/* Nperxxx options that do not require topology and are always
|
|
* available - included for backwards compatibility
|
|
*/
|
|
{ "rmaps_ppr_pernode", '\0', "pernode", "pernode", 0,
|
|
&orte_cmd_options.pernode, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Launch one process per available node",
|
|
OPAL_CMD_LINE_OTYPE_COMPAT },
|
|
{ "rmaps_ppr_n_pernode", '\0', "npernode", "npernode", 1,
|
|
&orte_cmd_options.npernode, OPAL_CMD_LINE_TYPE_INT,
|
|
"Launch n processes per node on all allocated nodes",
|
|
OPAL_CMD_LINE_OTYPE_COMPAT },
|
|
{ "rmaps_ppr_n_pernode", '\0', "N", NULL, 1,
|
|
&orte_cmd_options.npernode, OPAL_CMD_LINE_TYPE_INT,
|
|
"Launch n processes per node on all allocated nodes (synonym for 'map-by node')",
|
|
OPAL_CMD_LINE_OTYPE_MAPPING },
|
|
|
|
/* declare hardware threads as independent cpus */
|
|
{ "hwloc_base_use_hwthreads_as_cpus", '\0', "use-hwthread-cpus", "use-hwthread-cpus", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Use hardware threads as independent cpus", OPAL_CMD_LINE_OTYPE_MAPPING },
|
|
|
|
/* include npersocket for backwards compatibility */
|
|
{ "rmaps_ppr_n_persocket", '\0', "npersocket", "npersocket", 1,
|
|
&orte_cmd_options.npersocket, OPAL_CMD_LINE_TYPE_INT,
|
|
"Launch n processes per socket on all allocated nodes",
|
|
OPAL_CMD_LINE_OTYPE_COMPAT },
|
|
|
|
/* Mapping options */
|
|
{ "rmaps_base_mapping_policy", '\0', NULL, "map-by", 1,
|
|
&orte_cmd_options.mapping_policy, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Mapping Policy [slot | hwthread | core | socket (default) | numa | board | node]",
|
|
OPAL_CMD_LINE_OTYPE_MAPPING },
|
|
|
|
/* Ranking options */
|
|
{ "rmaps_base_ranking_policy", '\0', NULL, "rank-by", 1,
|
|
&orte_cmd_options.ranking_policy, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Ranking Policy [slot (default) | hwthread | core | socket | numa | board | node]",
|
|
OPAL_CMD_LINE_OTYPE_RANKING },
|
|
|
|
/* Binding options */
|
|
{ "hwloc_base_binding_policy", '\0', NULL, "bind-to", 1,
|
|
&orte_cmd_options.binding_policy, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Policy for binding processes. Allowed values: none, hwthread, core, l1cache, l2cache, l3cache, socket, numa, board, cpu-list (\"none\" is the default when oversubscribed, \"core\" is the default when np<=2, and \"socket\" is the default when np>2). Allowed qualifiers: overload-allowed, if-supported, ordered", OPAL_CMD_LINE_OTYPE_BINDING },
|
|
|
|
/* backward compatiblity */
|
|
{ "hwloc_base_bind_to_core", '\0', "bind-to-core", "bind-to-core", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Bind processes to cores", OPAL_CMD_LINE_OTYPE_COMPAT },
|
|
{ "hwloc_base_bind_to_socket", '\0', "bind-to-socket", "bind-to-socket", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Bind processes to sockets", OPAL_CMD_LINE_OTYPE_COMPAT },
|
|
|
|
{ "hwloc_base_report_bindings", '\0', "report-bindings", "report-bindings", 0,
|
|
&orte_cmd_options.report_bindings, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Whether to report process bindings to stderr",
|
|
OPAL_CMD_LINE_OTYPE_BINDING },
|
|
|
|
/* slot list option */
|
|
{ "hwloc_base_cpu_list", '\0', "cpu-list", "cpu-list", 1,
|
|
&orte_cmd_options.cpu_list, OPAL_CMD_LINE_TYPE_STRING,
|
|
"List of processor IDs to bind processes to [default=NULL]",
|
|
OPAL_CMD_LINE_OTYPE_BINDING },
|
|
|
|
/* generalized pattern mapping option */
|
|
{ "rmaps_ppr_pattern", '\0', NULL, "ppr", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Comma-separated list of number of processes on a given resource type [default: none]",
|
|
OPAL_CMD_LINE_OTYPE_MAPPING },
|
|
|
|
/* Allocation options */
|
|
{ "orte_display_alloc", '\0', "display-allocation", "display-allocation", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Display the allocation being used by this job", OPAL_CMD_LINE_OTYPE_DEBUG },
|
|
{ "orte_display_devel_alloc", '\0', "display-devel-allocation", "display-devel-allocation", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Display a detailed list (mostly intended for developers) of the allocation being used by this job",
|
|
OPAL_CMD_LINE_OTYPE_DEVEL },
|
|
{ "hwloc_base_cpu_set", '\0', "cpu-set", "cpu-set", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Comma-separated list of ranges specifying logical cpus allocated to this job [default: none]",
|
|
OPAL_CMD_LINE_OTYPE_DEBUG },
|
|
|
|
/* mpiexec-like arguments */
|
|
{ NULL, '\0', "wdir", "wdir", 1,
|
|
&orte_cmd_options.wdir, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Set the working directory of the started processes",
|
|
OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
{ NULL, '\0', "wd", "wd", 1,
|
|
&orte_cmd_options.wdir, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Synonym for --wdir", OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
{ NULL, '\0', "set-cwd-to-session-dir", "set-cwd-to-session-dir", 0,
|
|
&orte_cmd_options.set_cwd_to_session_dir, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Set the working directory of the started processes to their session directory",
|
|
OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
{ NULL, '\0', "path", "path", 1,
|
|
&orte_cmd_options.path, OPAL_CMD_LINE_TYPE_STRING,
|
|
"PATH to be used to look for executables to start processes",
|
|
OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
|
|
/* User-level debugger arguments */
|
|
{ NULL, '\0', "tv", "tv", 0,
|
|
&orte_cmd_options.debugger, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Deprecated backwards compatibility flag; synonym for \"--debug\"",
|
|
OPAL_CMD_LINE_OTYPE_DEBUG },
|
|
{ NULL, '\0', "debug", "debug", 0,
|
|
&orte_cmd_options.debugger, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Invoke the user-level debugger indicated by the orte_base_user_debugger MCA parameter",
|
|
OPAL_CMD_LINE_OTYPE_DEBUG },
|
|
{ "orte_base_user_debugger", '\0', "debugger", "debugger", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Sequence of debuggers to search for when \"--debug\" is used",
|
|
OPAL_CMD_LINE_OTYPE_DEBUG },
|
|
{ "orte_output_debugger_proctable", '\0', "output-proctable", "output-proctable", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Output the debugger proctable after launch",
|
|
OPAL_CMD_LINE_OTYPE_DEBUG },
|
|
|
|
/* OpenRTE arguments */
|
|
{ "orte_debug", 'd', "debug-devel", "debug-devel", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Enable debugging of OpenRTE", OPAL_CMD_LINE_OTYPE_DEVEL },
|
|
|
|
{ "orte_debug_daemons", '\0', "debug-daemons", "debug-daemons", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_INT,
|
|
"Enable debugging of any OpenRTE daemons used by this application",
|
|
OPAL_CMD_LINE_OTYPE_DEVEL },
|
|
|
|
{ "orte_debug_daemons_file", '\0', "debug-daemons-file", "debug-daemons-file", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Enable debugging of any OpenRTE daemons used by this application, storing output in files",
|
|
OPAL_CMD_LINE_OTYPE_DEVEL },
|
|
|
|
{ "orte_leave_session_attached", '\0', "leave-session-attached", "leave-session-attached", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Enable debugging of OpenRTE", OPAL_CMD_LINE_OTYPE_DEBUG },
|
|
|
|
{ "orte_do_not_launch", '\0', "do-not-launch", "do-not-launch", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Perform all necessary operations to prepare to launch the application, but do not actually launch it",
|
|
OPAL_CMD_LINE_OTYPE_DEVEL },
|
|
|
|
{ NULL, '\0', NULL, "prefix", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Prefix where Open MPI is installed on remote nodes",
|
|
OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
{ NULL, '\0', NULL, "noprefix", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Disable automatic --prefix behavior",
|
|
OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
|
|
{ "orte_report_launch_progress", '\0', "show-progress", "show-progress", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Output a brief periodic report on launch progress",
|
|
OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
|
|
{ "orte_use_regexp", '\0', "use-regexp", "use-regexp", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Use regular expressions for launch", OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
|
|
{ "orte_report_events", '\0', "report-events", "report-events", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Report events to a tool listening at the specified URI", OPAL_CMD_LINE_OTYPE_DEBUG },
|
|
|
|
{ "orte_enable_recovery", '\0', "enable-recovery", "enable-recovery", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Enable recovery from process failure [Default = disabled]",
|
|
OPAL_CMD_LINE_OTYPE_UNSUPPORTED },
|
|
|
|
{ "orte_max_restarts", '\0', "max-restarts", "max-restarts", 1,
|
|
NULL, OPAL_CMD_LINE_TYPE_INT,
|
|
"Max number of times to restart a failed process",
|
|
OPAL_CMD_LINE_OTYPE_UNSUPPORTED },
|
|
|
|
{ NULL, '\0', "continuous", "continuous", 0,
|
|
&orte_cmd_options.continuous, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Job is to run until explicitly terminated", OPAL_CMD_LINE_OTYPE_DEBUG },
|
|
|
|
#if OPAL_ENABLE_CRDEBUG == 1
|
|
{ "opal_cr_enable_crdebug", '\0', "crdebug", "crdebug", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Enable C/R Debugging" },
|
|
#endif
|
|
|
|
{ NULL, '\0', "disable-recovery", "disable-recovery", 0,
|
|
&orte_cmd_options.disable_recovery, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Disable recovery (resets all recovery options to off)",
|
|
OPAL_CMD_LINE_OTYPE_UNSUPPORTED },
|
|
|
|
{ "orte_no_vm", '\0', "novm", "novm", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Execute without creating an allocation-spanning virtual machine (only start daemons on nodes hosting application procs)",
|
|
OPAL_CMD_LINE_OTYPE_DVM },
|
|
|
|
{ NULL, '\0', "allow-run-as-root", "allow-run-as-root", 0,
|
|
&orte_cmd_options.run_as_root, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Allow execution as root (STRONGLY DISCOURAGED)",
|
|
OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
|
|
{ NULL, '\0', "personality", "personality", 1,
|
|
&orte_cmd_options.personality, OPAL_CMD_LINE_TYPE_STRING,
|
|
"Comma-separated list of programming model, languages, and containers being used (default=\"ompi\")",
|
|
OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
|
|
{ NULL, '\0', "dvm", "dvm", 0,
|
|
&orte_cmd_options.create_dvm, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Create a persistent distributed virtual machine (DVM)",
|
|
OPAL_CMD_LINE_OTYPE_DVM },
|
|
|
|
/* fwd mpirun port */
|
|
{ "orte_fwd_mpirun_port", '\0', "fwd-mpirun-port", "fwd-mpirun-port", 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
|
"Forward mpirun port to compute node daemons so all will use it",
|
|
OPAL_CMD_LINE_OTYPE_LAUNCH },
|
|
|
|
/* End of list */
|
|
{ NULL, '\0', NULL, NULL, 0,
|
|
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
|
|
};
|
|
|
|
static int define_cli(opal_cmd_line_t *cli)
|
|
{
|
|
int i, rc;
|
|
bool takeus = false;
|
|
|
|
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
|
|
"%s schizo:ompi: define_cli",
|
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
|
|
|
/* protect against bozo error */
|
|
if (NULL == cli) {
|
|
return ORTE_ERR_BAD_PARAM;
|
|
}
|
|
|
|
if (NULL != orte_schizo_base.personalities) {
|
|
/* if we aren't included, then ignore us */
|
|
for (i=0; NULL != orte_schizo_base.personalities[i]; i++) {
|
|
if (0 == strcmp(orte_schizo_base.personalities[i], "ompi")) {
|
|
takeus = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!takeus) {
|
|
return ORTE_ERR_TAKE_NEXT_OPTION;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Check if a HNP DVM URI is being passed via environment.
|
|
* Note: Place before opal_cmd_line_parse() so that
|
|
* if user passes both envvar & cmdln, the cmdln wins.
|
|
*/
|
|
if (NULL != getenv("ORTE_HNP_DVM_URI")) {
|
|
orte_cmd_options.hnp = strdup(getenv("ORTE_HNP_DVM_URI"));
|
|
}
|
|
|
|
/* just add ours to the end */
|
|
rc = opal_cmd_line_add(cli, cmd_line_init);
|
|
return rc;
|
|
}
|
|
|
|
static int parse_cli(int argc, int start, char **argv)
|
|
{
|
|
int i, j, k;
|
|
bool ignore;
|
|
char *no_dups[] = {
|
|
"grpcomm",
|
|
"odls",
|
|
"rml",
|
|
"routed",
|
|
NULL
|
|
};
|
|
bool takeus = false;
|
|
|
|
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
|
|
"%s schizo:ompi: parse_cli",
|
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
|
|
|
/* if they gave us a list of personalities,
|
|
* see if we are included */
|
|
if (NULL != orte_schizo_base.personalities) {
|
|
for (i=0; NULL != orte_schizo_base.personalities[i]; i++) {
|
|
if (0 == strcmp(orte_schizo_base.personalities[i], "ompi")) {
|
|
takeus = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!takeus) {
|
|
return ORTE_ERR_TAKE_NEXT_OPTION;
|
|
}
|
|
} else {
|
|
/* attempt to auto-detect CLI options that
|
|
* we recognize */
|
|
}
|
|
|
|
for (i = 0; i < (argc-start); ++i) {
|
|
if (0 == strcmp("-mca", argv[i]) ||
|
|
0 == strcmp("--mca", argv[i]) ) {
|
|
/* ignore this one */
|
|
if (0 == strcmp(argv[i+1], "mca_base_env_list")) {
|
|
i += 2;
|
|
continue;
|
|
}
|
|
/* It would be nice to avoid increasing the length
|
|
* of the orted cmd line by removing any non-ORTE
|
|
* params. However, this raises a problem since
|
|
* there could be OPAL directives that we really
|
|
* -do- want the orted to see - it's only the OMPI
|
|
* related directives we could ignore. This becomes
|
|
* a very complicated procedure, however, since
|
|
* the OMPI mca params are not cleanly separated - so
|
|
* filtering them out is nearly impossible.
|
|
*
|
|
* see if this is already present so we at least can
|
|
* avoid growing the cmd line with duplicates
|
|
*/
|
|
ignore = false;
|
|
if (NULL != orted_cmd_line) {
|
|
for (j=0; NULL != orted_cmd_line[j]; j++) {
|
|
if (0 == strcmp(argv[i+1], orted_cmd_line[j])) {
|
|
/* already here - if the value is the same,
|
|
* we can quitely ignore the fact that they
|
|
* provide it more than once. However, some
|
|
* frameworks are known to have problems if the
|
|
* value is different. We don't have a good way
|
|
* to know this, but we at least make a crude
|
|
* attempt here to protect ourselves.
|
|
*/
|
|
if (0 == strcmp(argv[i+2], orted_cmd_line[j+1])) {
|
|
/* values are the same */
|
|
ignore = true;
|
|
break;
|
|
} else {
|
|
/* values are different - see if this is a problem */
|
|
for (k=0; NULL != no_dups[k]; k++) {
|
|
if (0 == strcmp(no_dups[k], argv[i+1])) {
|
|
/* print help message
|
|
* and abort as we cannot know which one is correct
|
|
*/
|
|
orte_show_help("help-orterun.txt", "orterun:conflicting-params",
|
|
true, orte_basename, argv[i+1],
|
|
argv[i+2], orted_cmd_line[j+1]);
|
|
return ORTE_ERR_BAD_PARAM;
|
|
}
|
|
}
|
|
/* this passed muster - just ignore it */
|
|
ignore = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (!ignore) {
|
|
opal_argv_append_nosize(&orted_cmd_line, argv[i]);
|
|
opal_argv_append_nosize(&orted_cmd_line, argv[i+1]);
|
|
opal_argv_append_nosize(&orted_cmd_line, argv[i+2]);
|
|
}
|
|
i += 2;
|
|
}
|
|
}
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
static int parse_env(char *path,
|
|
opal_cmd_line_t *cmd_line,
|
|
char **srcenv,
|
|
char ***dstenv)
|
|
{
|
|
int i, j;
|
|
char *param;
|
|
char *value;
|
|
char *env_set_flag;
|
|
char **vars;
|
|
bool takeus = false;
|
|
|
|
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
|
|
"%s schizo:ompi: parse_env",
|
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
|
|
|
if (NULL != orte_schizo_base.personalities) {
|
|
/* see if we are included */
|
|
for (i=0; NULL != orte_schizo_base.personalities[i]; i++) {
|
|
if (0 == strcmp(orte_schizo_base.personalities[i], "ompi")) {
|
|
takeus = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!takeus) {
|
|
return ORTE_ERR_TAKE_NEXT_OPTION;
|
|
}
|
|
}
|
|
|
|
for (i = 0; NULL != srcenv[i]; ++i) {
|
|
if (0 == strncmp("OMPI_", srcenv[i], 5) ||
|
|
0 == strncmp("PMIX_", srcenv[i], 5)) {
|
|
/* check for duplicate in app->env - this
|
|
* would have been placed there by the
|
|
* cmd line processor. By convention, we
|
|
* always let the cmd line override the
|
|
* environment
|
|
*/
|
|
param = strdup(srcenv[i]);
|
|
value = strchr(param, '=');
|
|
*value = '\0';
|
|
value++;
|
|
opal_setenv(param, value, false, dstenv);
|
|
free(param);
|
|
}
|
|
}
|
|
|
|
/* set necessary env variables for external usage from tune conf file*/
|
|
int set_from_file = 0;
|
|
vars = NULL;
|
|
if (OPAL_SUCCESS == mca_base_var_process_env_list_from_file(&vars) &&
|
|
NULL != vars) {
|
|
for (i=0; NULL != vars[i]; i++) {
|
|
value = strchr(vars[i], '=');
|
|
/* terminate the name of the param */
|
|
*value = '\0';
|
|
/* step over the equals */
|
|
value++;
|
|
/* overwrite any prior entry */
|
|
opal_setenv(vars[i], value, true, dstenv);
|
|
/* save it for any comm_spawn'd apps */
|
|
opal_setenv(vars[i], value, true, &orte_forwarded_envars);
|
|
}
|
|
set_from_file = 1;
|
|
opal_argv_free(vars);
|
|
}
|
|
/* Did the user request to export any environment variables on the cmd line? */
|
|
env_set_flag = getenv("OMPI_MCA_mca_base_env_list");
|
|
if (opal_cmd_line_is_taken(cmd_line, "x")) {
|
|
if (NULL != env_set_flag) {
|
|
orte_show_help("help-orterun.txt", "orterun:conflict-env-set", false);
|
|
return ORTE_ERR_FATAL;
|
|
}
|
|
j = opal_cmd_line_get_ninsts(cmd_line, "x");
|
|
for (i = 0; i < j; ++i) {
|
|
param = opal_cmd_line_get_param(cmd_line, "x", i, 0);
|
|
|
|
if (NULL != (value = strchr(param, '='))) {
|
|
/* terminate the name of the param */
|
|
*value = '\0';
|
|
/* step over the equals */
|
|
value++;
|
|
/* overwrite any prior entry */
|
|
opal_setenv(param, value, true, dstenv);
|
|
/* save it for any comm_spawn'd apps */
|
|
opal_setenv(param, value, true, &orte_forwarded_envars);
|
|
} else {
|
|
value = getenv(param);
|
|
if (NULL != value) {
|
|
/* overwrite any prior entry */
|
|
opal_setenv(param, value, true, dstenv);
|
|
/* save it for any comm_spawn'd apps */
|
|
opal_setenv(param, value, true, &orte_forwarded_envars);
|
|
} else {
|
|
opal_output(0, "Warning: could not find environment variable \"%s\"\n", param);
|
|
}
|
|
}
|
|
}
|
|
} else if (NULL != env_set_flag) {
|
|
/* if mca_base_env_list was set, check if some of env vars were set via -x from a conf file.
|
|
* If this is the case, error out.
|
|
*/
|
|
if (!set_from_file) {
|
|
/* set necessary env variables for external usage */
|
|
vars = NULL;
|
|
if (OPAL_SUCCESS == mca_base_var_process_env_list(env_set_flag, &vars) &&
|
|
NULL != vars) {
|
|
for (i=0; NULL != vars[i]; i++) {
|
|
value = strchr(vars[i], '=');
|
|
/* terminate the name of the param */
|
|
*value = '\0';
|
|
/* step over the equals */
|
|
value++;
|
|
/* overwrite any prior entry */
|
|
opal_setenv(vars[i], value, true, dstenv);
|
|
/* save it for any comm_spawn'd apps */
|
|
opal_setenv(vars[i], value, true, &orte_forwarded_envars);
|
|
}
|
|
opal_argv_free(vars);
|
|
}
|
|
} else {
|
|
orte_show_help("help-orterun.txt", "orterun:conflict-env-set", false);
|
|
return ORTE_ERR_FATAL;
|
|
}
|
|
}
|
|
|
|
/* If the user specified --path, store it in the user's app
|
|
environment via the OMPI_exec_path variable. */
|
|
if (NULL != path) {
|
|
opal_asprintf(&value, "OMPI_exec_path=%s", path);
|
|
opal_argv_append_nosize(dstenv, value);
|
|
/* save it for any comm_spawn'd apps */
|
|
opal_argv_append_nosize(&orte_forwarded_envars, value);
|
|
free(value);
|
|
}
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
static int setup_fork(orte_job_t *jdata,
|
|
orte_app_context_t *app)
|
|
{
|
|
int i;
|
|
char *param, *p2, *saveptr;
|
|
bool oversubscribed;
|
|
orte_node_t *node;
|
|
char **envcpy, **nps, **firstranks;
|
|
char *npstring, *firstrankstring;
|
|
char *num_app_ctx;
|
|
bool takeus = false;
|
|
bool exists;
|
|
orte_app_context_t* tmp_app;
|
|
orte_attribute_t *attr;
|
|
|
|
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
|
|
"%s schizo:ompi: setup_fork",
|
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
|
|
|
/* if no personality was specified, then nothing to do */
|
|
if (NULL == jdata->personality) {
|
|
return ORTE_ERR_TAKE_NEXT_OPTION;
|
|
}
|
|
|
|
if (NULL != orte_schizo_base.personalities) {
|
|
/* see if we are included */
|
|
for (i=0; NULL != jdata->personality[i]; i++) {
|
|
if (0 == strcmp(jdata->personality[i], "ompi")) {
|
|
takeus = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!takeus) {
|
|
return ORTE_ERR_TAKE_NEXT_OPTION;
|
|
}
|
|
}
|
|
|
|
/* see if the mapper thinks we are oversubscribed */
|
|
oversubscribed = false;
|
|
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, ORTE_PROC_MY_NAME->vpid))) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
|
return ORTE_ERR_NOT_FOUND;
|
|
}
|
|
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_OVERSUBSCRIBED)) {
|
|
oversubscribed = true;
|
|
}
|
|
|
|
/* setup base environment: copy the current environ and merge
|
|
in the app context environ */
|
|
if (NULL != app->env) {
|
|
/* manually free original context->env to avoid a memory leak */
|
|
char **tmp = app->env;
|
|
envcpy = opal_environ_merge(orte_launch_environ, app->env);
|
|
if (NULL != tmp) {
|
|
opal_argv_free(tmp);
|
|
}
|
|
} else {
|
|
envcpy = opal_argv_copy(orte_launch_environ);
|
|
}
|
|
app->env = envcpy;
|
|
|
|
/* special case handling for --prefix: this is somewhat icky,
|
|
but at least some users do this. :-\ It is possible that
|
|
when using --prefix, the user will also "-x PATH" and/or
|
|
"-x LD_LIBRARY_PATH", which would therefore clobber the
|
|
work that was done in the prior pls to ensure that we have
|
|
the prefix at the beginning of the PATH and
|
|
LD_LIBRARY_PATH. So examine the context->env and see if we
|
|
find PATH or LD_LIBRARY_PATH. If found, that means the
|
|
prior work was clobbered, and we need to re-prefix those
|
|
variables. */
|
|
param = NULL;
|
|
orte_get_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, (void**)¶m, OPAL_STRING);
|
|
/* grab the parameter from the first app context because the current context does not have a prefix assigned */
|
|
if (NULL == param) {
|
|
tmp_app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0);
|
|
assert (NULL != tmp_app);
|
|
orte_get_attribute(&tmp_app->attributes, ORTE_APP_PREFIX_DIR, (void**)¶m, OPAL_STRING);
|
|
}
|
|
for (i = 0; NULL != param && NULL != app->env && NULL != app->env[i]; ++i) {
|
|
char *newenv;
|
|
|
|
/* Reset PATH */
|
|
if (0 == strncmp("PATH=", app->env[i], 5)) {
|
|
opal_asprintf(&newenv, "%s/bin:%s", param, app->env[i] + 5);
|
|
opal_setenv("PATH", newenv, true, &app->env);
|
|
free(newenv);
|
|
}
|
|
|
|
/* Reset LD_LIBRARY_PATH */
|
|
else if (0 == strncmp("LD_LIBRARY_PATH=", app->env[i], 16)) {
|
|
opal_asprintf(&newenv, "%s/lib:%s", param, app->env[i] + 16);
|
|
opal_setenv("LD_LIBRARY_PATH", newenv, true, &app->env);
|
|
free(newenv);
|
|
}
|
|
}
|
|
if (NULL != param) {
|
|
free(param);
|
|
}
|
|
|
|
/* pass my contact info to the local proc so we can talk */
|
|
opal_setenv("OMPI_MCA_orte_local_daemon_uri", orte_process_info.my_daemon_uri, true, &app->env);
|
|
|
|
/* pass the hnp's contact info to the local proc in case it
|
|
* needs it
|
|
*/
|
|
if (NULL != orte_process_info.my_hnp_uri) {
|
|
opal_setenv("OMPI_MCA_orte_hnp_uri", orte_process_info.my_hnp_uri, true, &app->env);
|
|
}
|
|
|
|
/* setup yield schedule - do not override any user-supplied directive! */
|
|
if (oversubscribed) {
|
|
opal_setenv("OMPI_MCA_mpi_oversubscribe", "1", true, &app->env);
|
|
} else {
|
|
opal_setenv("OMPI_MCA_mpi_oversubscribe", "0", true, &app->env);
|
|
}
|
|
|
|
/* set the app_context number into the environment */
|
|
opal_asprintf(¶m, "%ld", (long)app->idx);
|
|
opal_setenv("OMPI_MCA_orte_app_num", param, true, &app->env);
|
|
free(param);
|
|
|
|
/* although the total_slots_alloc is the universe size, users
|
|
* would appreciate being given a public environmental variable
|
|
* that also represents this value - something MPI specific - so
|
|
* do that here. Also required by the ompi_attributes code!
|
|
*
|
|
* AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT.
|
|
* We know - just live with it
|
|
*/
|
|
opal_asprintf(¶m, "%ld", (long)jdata->total_slots_alloc);
|
|
opal_setenv("OMPI_UNIVERSE_SIZE", param, true, &app->env);
|
|
free(param);
|
|
|
|
/* pass the number of nodes involved in this job */
|
|
opal_asprintf(¶m, "%ld", (long)(jdata->map->num_nodes));
|
|
opal_setenv("OMPI_MCA_orte_num_nodes", param, true, &app->env);
|
|
free(param);
|
|
|
|
/* pass a param telling the child what type and model of cpu we are on,
|
|
* if we know it. If hwloc has the value, use what it knows. Otherwise,
|
|
* see if we were explicitly given it and use that value.
|
|
*/
|
|
hwloc_obj_t obj;
|
|
char *htmp;
|
|
if (NULL != opal_hwloc_topology) {
|
|
obj = hwloc_get_root_obj(opal_hwloc_topology);
|
|
if (NULL != (htmp = (char*)hwloc_obj_get_info_by_name(obj, "CPUType")) ||
|
|
NULL != (htmp = orte_local_cpu_type)) {
|
|
opal_setenv("OMPI_MCA_orte_cpu_type", htmp, true, &app->env);
|
|
}
|
|
if (NULL != (htmp = (char*)hwloc_obj_get_info_by_name(obj, "CPUModel")) ||
|
|
NULL != (htmp = orte_local_cpu_model)) {
|
|
opal_setenv("OMPI_MCA_orte_cpu_model", htmp, true, &app->env);
|
|
}
|
|
} else {
|
|
if (NULL != orte_local_cpu_type) {
|
|
opal_setenv("OMPI_MCA_orte_cpu_type", orte_local_cpu_type, true, &app->env);
|
|
}
|
|
if (NULL != orte_local_cpu_model) {
|
|
opal_setenv("OMPI_MCA_orte_cpu_model", orte_local_cpu_model, true, &app->env);
|
|
}
|
|
}
|
|
|
|
/* get shmem's best component name so we can provide a hint to the shmem
|
|
* framework. the idea here is to have someone figure out what component to
|
|
* select (via the shmem framework) and then have the rest of the
|
|
* components in shmem obey that decision. for more details take a look at
|
|
* the shmem framework in opal.
|
|
*/
|
|
if (NULL != (param = opal_shmem_base_best_runnable_component_name())) {
|
|
opal_setenv("OMPI_MCA_shmem_RUNTIME_QUERY_hint", param, true, &app->env);
|
|
free(param);
|
|
}
|
|
|
|
/* Set an info MCA param that tells the launched processes that
|
|
* any binding policy was applied by us (e.g., so that
|
|
* MPI_INIT doesn't try to bind itself)
|
|
*/
|
|
if (OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(jdata->map->binding)) {
|
|
opal_setenv("OMPI_MCA_orte_bound_at_launch", "1", true, &app->env);
|
|
}
|
|
|
|
/* tell the ESS to avoid the singleton component - but don't override
|
|
* anything that may have been provided elsewhere
|
|
*/
|
|
opal_setenv("OMPI_MCA_ess", "^singleton", false, &app->env);
|
|
|
|
/* ensure that the spawned process ignores direct launch components,
|
|
* but do not overrride anything we were given */
|
|
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray", false, &app->env);
|
|
|
|
/* since we want to pass the name as separate components, make sure
|
|
* that the "name" environmental variable is cleared!
|
|
*/
|
|
opal_unsetenv("OMPI_MCA_orte_ess_name", &app->env);
|
|
|
|
opal_asprintf(¶m, "%ld", (long)jdata->num_procs);
|
|
opal_setenv("OMPI_MCA_orte_ess_num_procs", param, true, &app->env);
|
|
|
|
/* although the num_procs is the comm_world size, users
|
|
* would appreciate being given a public environmental variable
|
|
* that also represents this value - something MPI specific - so
|
|
* do that here.
|
|
*
|
|
* AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT.
|
|
* We know - just live with it
|
|
*/
|
|
opal_setenv("OMPI_COMM_WORLD_SIZE", param, true, &app->env);
|
|
free(param);
|
|
|
|
/* users would appreciate being given a public environmental variable
|
|
* that also represents this value - something MPI specific - so
|
|
* do that here.
|
|
*
|
|
* AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT.
|
|
* We know - just live with it
|
|
*/
|
|
opal_asprintf(¶m, "%ld", (long)jdata->num_local_procs);
|
|
opal_setenv("OMPI_COMM_WORLD_LOCAL_SIZE", param, true, &app->env);
|
|
free(param);
|
|
|
|
/* forcibly set the local tmpdir base and top session dir to match ours */
|
|
opal_setenv("OMPI_MCA_orte_tmpdir_base", orte_process_info.tmpdir_base, true, &app->env);
|
|
/* TODO: should we use PMIx key to pass this data? */
|
|
opal_setenv("OMPI_MCA_orte_top_session_dir", orte_process_info.top_session_dir, true, &app->env);
|
|
opal_setenv("OMPI_MCA_orte_jobfam_session_dir", orte_process_info.jobfam_session_dir, true, &app->env);
|
|
|
|
/* MPI-3 requires we provide some further info to the procs,
|
|
* so we pass them as envars to avoid introducing further
|
|
* ORTE calls in the MPI layer
|
|
*/
|
|
opal_asprintf(&num_app_ctx, "%lu", (unsigned long)jdata->num_apps);
|
|
|
|
/* build some common envars we need to pass for MPI-3 compatibility */
|
|
nps = NULL;
|
|
firstranks = NULL;
|
|
for (i=0; i < jdata->apps->size; i++) {
|
|
if (NULL == (tmp_app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
|
|
continue;
|
|
}
|
|
opal_argv_append_nosize(&nps, ORTE_VPID_PRINT(tmp_app->num_procs));
|
|
opal_argv_append_nosize(&firstranks, ORTE_VPID_PRINT(tmp_app->first_rank));
|
|
}
|
|
npstring = opal_argv_join(nps, ' ');
|
|
firstrankstring = opal_argv_join(firstranks, ' ');
|
|
opal_argv_free(nps);
|
|
opal_argv_free(firstranks);
|
|
|
|
/* add the MPI-3 envars */
|
|
opal_setenv("OMPI_NUM_APP_CTX", num_app_ctx, true, &app->env);
|
|
opal_setenv("OMPI_FIRST_RANKS", firstrankstring, true, &app->env);
|
|
opal_setenv("OMPI_APP_CTX_NUM_PROCS", npstring, true, &app->env);
|
|
free(num_app_ctx);
|
|
free(firstrankstring);
|
|
free(npstring);
|
|
|
|
/* now process any envar attributes - we begin with the job-level
|
|
* ones as the app-specific ones can override them. We have to
|
|
* process them in the order they were given to ensure we wind
|
|
* up in the desired final state */
|
|
OPAL_LIST_FOREACH(attr, &jdata->attributes, orte_attribute_t) {
|
|
if (ORTE_JOB_SET_ENVAR == attr->key) {
|
|
opal_setenv(attr->data.envar.envar, attr->data.envar.value, true, &app->env);
|
|
} else if (ORTE_JOB_ADD_ENVAR == attr->key) {
|
|
opal_setenv(attr->data.envar.envar, attr->data.envar.value, false, &app->env);
|
|
} else if (ORTE_JOB_UNSET_ENVAR == attr->key) {
|
|
opal_unsetenv(attr->data.string, &app->env);
|
|
} else if (ORTE_JOB_PREPEND_ENVAR == attr->key) {
|
|
/* see if the envar already exists */
|
|
exists = false;
|
|
for (i=0; NULL != app->env[i]; i++) {
|
|
saveptr = strchr(app->env[i], '='); // cannot be NULL
|
|
*saveptr = '\0';
|
|
if (0 == strcmp(app->env[i], attr->data.envar.envar)) {
|
|
/* we have the var - prepend it */
|
|
param = saveptr;
|
|
++param; // move past where the '=' sign was
|
|
opal_asprintf(&p2, "%s%c%s", attr->data.envar.value,
|
|
attr->data.envar.separator, param);
|
|
*saveptr = '='; // restore the current envar setting
|
|
opal_setenv(attr->data.envar.envar, p2, true, &app->env);
|
|
free(p2);
|
|
exists = true;
|
|
break;
|
|
} else {
|
|
*saveptr = '='; // restore the current envar setting
|
|
}
|
|
}
|
|
if (!exists) {
|
|
/* just insert it */
|
|
opal_setenv(attr->data.envar.envar, attr->data.envar.value, true, &app->env);
|
|
}
|
|
} else if (ORTE_JOB_APPEND_ENVAR == attr->key) {
|
|
/* see if the envar already exists */
|
|
exists = false;
|
|
for (i=0; NULL != app->env[i]; i++) {
|
|
saveptr = strchr(app->env[i], '='); // cannot be NULL
|
|
*saveptr = '\0';
|
|
if (0 == strcmp(app->env[i], attr->data.envar.envar)) {
|
|
/* we have the var - prepend it */
|
|
param = saveptr;
|
|
++param; // move past where the '=' sign was
|
|
opal_asprintf(&p2, "%s%c%s", param, attr->data.envar.separator,
|
|
attr->data.envar.value);
|
|
*saveptr = '='; // restore the current envar setting
|
|
opal_setenv(attr->data.envar.envar, p2, true, &app->env);
|
|
free(p2);
|
|
exists = true;
|
|
break;
|
|
} else {
|
|
*saveptr = '='; // restore the current envar setting
|
|
}
|
|
}
|
|
if (!exists) {
|
|
/* just insert it */
|
|
opal_setenv(attr->data.envar.envar, attr->data.envar.value, true, &app->env);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* now do the same thing for any app-level attributes */
|
|
OPAL_LIST_FOREACH(attr, &app->attributes, orte_attribute_t) {
|
|
if (ORTE_APP_SET_ENVAR == attr->key) {
|
|
opal_setenv(attr->data.envar.envar, attr->data.envar.value, true, &app->env);
|
|
} else if (ORTE_APP_ADD_ENVAR == attr->key) {
|
|
opal_setenv(attr->data.envar.envar, attr->data.envar.value, false, &app->env);
|
|
} else if (ORTE_APP_UNSET_ENVAR == attr->key) {
|
|
opal_unsetenv(attr->data.string, &app->env);
|
|
} else if (ORTE_APP_PREPEND_ENVAR == attr->key) {
|
|
/* see if the envar already exists */
|
|
exists = false;
|
|
for (i=0; NULL != app->env[i]; i++) {
|
|
saveptr = strchr(app->env[i], '='); // cannot be NULL
|
|
*saveptr = '\0';
|
|
if (0 == strcmp(app->env[i], attr->data.envar.envar)) {
|
|
/* we have the var - prepend it */
|
|
param = saveptr;
|
|
++param; // move past where the '=' sign was
|
|
opal_asprintf(&p2, "%s%c%s", attr->data.envar.value,
|
|
attr->data.envar.separator, param);
|
|
*saveptr = '='; // restore the current envar setting
|
|
opal_setenv(attr->data.envar.envar, p2, true, &app->env);
|
|
free(p2);
|
|
exists = true;
|
|
break;
|
|
} else {
|
|
*saveptr = '='; // restore the current envar setting
|
|
}
|
|
}
|
|
if (!exists) {
|
|
/* just insert it */
|
|
opal_setenv(attr->data.envar.envar, attr->data.envar.value, true, &app->env);
|
|
}
|
|
} else if (ORTE_APP_APPEND_ENVAR == attr->key) {
|
|
/* see if the envar already exists */
|
|
exists = false;
|
|
for (i=0; NULL != app->env[i]; i++) {
|
|
saveptr = strchr(app->env[i], '='); // cannot be NULL
|
|
*saveptr = '\0';
|
|
if (0 == strcmp(app->env[i], attr->data.envar.envar)) {
|
|
/* we have the var - prepend it */
|
|
param = saveptr;
|
|
++param; // move past where the '=' sign was
|
|
opal_asprintf(&p2, "%s%c%s", param, attr->data.envar.separator,
|
|
attr->data.envar.value);
|
|
*saveptr = '='; // restore the current envar setting
|
|
opal_setenv(attr->data.envar.envar, p2, true, &app->env);
|
|
free(p2);
|
|
exists = true;
|
|
break;
|
|
} else {
|
|
*saveptr = '='; // restore the current envar setting
|
|
}
|
|
}
|
|
if (!exists) {
|
|
/* just insert it */
|
|
opal_setenv(attr->data.envar.envar, attr->data.envar.value, true, &app->env);
|
|
}
|
|
}
|
|
}
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
|
|
static int setup_child(orte_job_t *jdata,
|
|
orte_proc_t *child,
|
|
orte_app_context_t *app,
|
|
char ***env)
|
|
{
|
|
char *param, *value;
|
|
int rc, i;
|
|
int32_t nrestarts=0, *nrptr;
|
|
bool takeus = false;
|
|
|
|
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
|
|
"%s schizo:ompi: setup_child",
|
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
|
|
|
/* if no personality was specified, then nothing to do */
|
|
if (NULL == jdata->personality) {
|
|
return ORTE_ERR_TAKE_NEXT_OPTION;
|
|
}
|
|
|
|
if (NULL != orte_schizo_base.personalities) {
|
|
/* see if we are included */
|
|
for (i=0; NULL != jdata->personality[i]; i++) {
|
|
if (0 == strcmp(jdata->personality[i], "ompi")) {
|
|
takeus = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!takeus) {
|
|
return ORTE_ERR_TAKE_NEXT_OPTION;
|
|
}
|
|
}
|
|
|
|
/* setup the jobid */
|
|
if (ORTE_SUCCESS != (rc = orte_util_convert_jobid_to_string(&value, child->name.jobid))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
opal_setenv("OMPI_MCA_ess_base_jobid", value, true, env);
|
|
free(value);
|
|
|
|
/* setup the vpid */
|
|
if (ORTE_SUCCESS != (rc = orte_util_convert_vpid_to_string(&value, child->name.vpid))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
opal_setenv("OMPI_MCA_ess_base_vpid", value, true, env);
|
|
|
|
/* although the vpid IS the process' rank within the job, users
|
|
* would appreciate being given a public environmental variable
|
|
* that also represents this value - something MPI specific - so
|
|
* do that here.
|
|
*
|
|
* AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT.
|
|
* We know - just live with it
|
|
*/
|
|
opal_setenv("OMPI_COMM_WORLD_RANK", value, true, env);
|
|
free(value); /* done with this now */
|
|
|
|
/* users would appreciate being given a public environmental variable
|
|
* that also represents the local rank value - something MPI specific - so
|
|
* do that here.
|
|
*
|
|
* AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT.
|
|
* We know - just live with it
|
|
*/
|
|
if (ORTE_LOCAL_RANK_INVALID == child->local_rank) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
|
|
rc = ORTE_ERR_VALUE_OUT_OF_BOUNDS;
|
|
return rc;
|
|
}
|
|
opal_asprintf(&value, "%lu", (unsigned long) child->local_rank);
|
|
opal_setenv("OMPI_COMM_WORLD_LOCAL_RANK", value, true, env);
|
|
free(value);
|
|
|
|
/* users would appreciate being given a public environmental variable
|
|
* that also represents the node rank value - something MPI specific - so
|
|
* do that here.
|
|
*
|
|
* AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT.
|
|
* We know - just live with it
|
|
*/
|
|
if (ORTE_NODE_RANK_INVALID == child->node_rank) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
|
|
rc = ORTE_ERR_VALUE_OUT_OF_BOUNDS;
|
|
return rc;
|
|
}
|
|
opal_asprintf(&value, "%lu", (unsigned long) child->node_rank);
|
|
opal_setenv("OMPI_COMM_WORLD_NODE_RANK", value, true, env);
|
|
/* set an mca param for it too */
|
|
opal_setenv("OMPI_MCA_orte_ess_node_rank", value, true, env);
|
|
free(value);
|
|
|
|
/* provide the identifier for the PMIx connection - the
|
|
* PMIx connection is made prior to setting the process
|
|
* name itself. Although in most cases the ID and the
|
|
* process name are the same, it isn't necessarily
|
|
* required */
|
|
orte_util_convert_process_name_to_string(&value, &child->name);
|
|
opal_setenv("PMIX_ID", value, true, env);
|
|
free(value);
|
|
|
|
nrptr = &nrestarts;
|
|
if (orte_get_attribute(&child->attributes, ORTE_PROC_NRESTARTS, (void**)&nrptr, OPAL_INT32)) {
|
|
/* pass the number of restarts for this proc - will be zero for
|
|
* an initial start, but procs would like to know if they are being
|
|
* restarted so they can take appropriate action
|
|
*/
|
|
opal_asprintf(&value, "%d", nrestarts);
|
|
opal_setenv("OMPI_MCA_orte_num_restarts", value, true, env);
|
|
free(value);
|
|
}
|
|
|
|
/* if the proc should not barrier in orte_init, tell it */
|
|
if (orte_get_attribute(&child->attributes, ORTE_PROC_NOBARRIER, NULL, OPAL_BOOL)
|
|
|| 0 < nrestarts) {
|
|
opal_setenv("OMPI_MCA_orte_do_not_barrier", "1", true, env);
|
|
}
|
|
|
|
/* if the proc isn't going to forward IO, then we need to flag that
|
|
* it has "completed" iof termination as otherwise it will never fire
|
|
*/
|
|
if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_FORWARD_OUTPUT)) {
|
|
ORTE_FLAG_SET(child, ORTE_PROC_FLAG_IOF_COMPLETE);
|
|
}
|
|
|
|
/* pass an envar so the proc can find any files it had prepositioned */
|
|
param = orte_process_info.proc_session_dir;
|
|
opal_setenv("OMPI_FILE_LOCATION", param, true, env);
|
|
|
|
/* if the user wanted the cwd to be the proc's session dir, then
|
|
* switch to that location now
|
|
*/
|
|
if (orte_get_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, NULL, OPAL_BOOL)) {
|
|
/* create the session dir - may not exist */
|
|
if (OPAL_SUCCESS != (rc = opal_os_dirpath_create(param, S_IRWXU))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
/* doesn't exist with correct permissions, and/or we can't
|
|
* create it - either way, we are done
|
|
*/
|
|
return rc;
|
|
}
|
|
/* change to it */
|
|
if (0 != chdir(param)) {
|
|
return ORTE_ERROR;
|
|
}
|
|
/* It seems that chdir doesn't
|
|
* adjust the $PWD enviro variable when it changes the directory. This
|
|
* can cause a user to get a different response when doing getcwd vs
|
|
* looking at the enviro variable. To keep this consistent, we explicitly
|
|
* ensure that the PWD enviro variable matches the CWD we moved to.
|
|
*
|
|
* NOTE: if a user's program does a chdir(), then $PWD will once
|
|
* again not match getcwd! This is beyond our control - we are only
|
|
* ensuring they start out matching.
|
|
*/
|
|
opal_setenv("PWD", param, true, env);
|
|
/* update the initial wdir value too */
|
|
opal_setenv("OMPI_MCA_initial_wdir", param, true, env);
|
|
} else if (NULL != app->cwd) {
|
|
/* change to it */
|
|
if (0 != chdir(app->cwd)) {
|
|
return ORTE_ERROR;
|
|
}
|
|
}
|
|
return ORTE_SUCCESS;
|
|
}
|