From 06c3dfc052171a76cedb80230dcb64faf4f3efad Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 16 Dec 2015 15:30:40 -0800 Subject: [PATCH] Refactor the ORTE DVM code so that external codes can submit multiple jobs using only a single connection to the HNP. * Clean up the DVM so it continues to run even when applications error out and we would ordinarily abort the daemons. * Create a new errmgr component for the DVM to handle the differences. * Cleanup the DVM state component. * Add ORTE bindings directory and brief README * Pass a local tool index around to match jobs. * Pass the jobid on job completion. * Fix initialization logic. * Add framework for python wrapper. * Fix terminate-with-non-zero-exit behavior so it properly terminates only the indicated procs, notifies orte-submit, and orte-dvm continues executing. * Add some missing options to orte-dvm * Fix a bug in -host processing that caused us to ignore the #slots designator. Add a new attribute to indicate "do not expand the DVM" when submitting job spawn requests. * It actually makes no sense that we treat the termination of all children differently than terminating the children of a specific job - it only creates confusion over the difference in behavior. So terminate children the same way regardless. Extend the cmd_line utility to easily allow layering of command line definitions Catch up with ORTE interface change and make build more generic. Disable "fixed dvm" logic for now. Add another cmd_line function to merge a table of cmd line options with another one, reporting as errors any duplicate entries. Use this to allow orterun to reuse the orted_submit code Fix the "fixed_dvm" logic by ensuring we reset num_new_daemons to zero. Also ensure that the nidmap is sent with the first job so the downstream daemons get the node info. Remove a duplicate cmd line entry in orterun. Revise the DVM startup procedure to pass the nidmap only once, at the startup of the DVM. This reduces the overhead on each job launch and ensures that the nidmap doesn't get overwritten. Add new commands to get_orted_comm_cmd_str(). Move ORTE command line options to orte_globals.[ch]. Catch up with extra orte_submit_init parameter. Add example code. Add documentation. Bump version. The nidmap and routing data must be updated prior to propagating the xcast or else the xcast will fail. Fix the return code so it is something more expected when an error occurs. Ensure we get an error returned to us when we fail to launch for some reason. In this case, we will always get a launch_cb as we did indeed attempt to spawn it. The error code will be returned in the complete_cb. Fix the return code from orte_submit_job - it was returning the tracker index instead of "success". Take advantage of ORTE's pretty-print capabilities to provide a nice error output explaining why we failed to launch. Ensure we always get a launch_cb when we fail to launch, but no complete_cb as the job never launched. Extend the error reporting capability to job completion as well. Add index parameter to orte_submit_job(). Add orte_job_cancel and implement ORTE_DAEMON_TERMINATE_JOB_CMD. Factor out dvm termination. Parse the terminate option at tool level. Add error string for ORTE_ERR_JOB_CANCELLED. Add some safeguards. Cleanup and/of comments. Enable the return. Properly ORTE_DECLSPEC orte_submit_halt. Add orte_submit_halt and orte_submit_cancel to interface. Use the plm interface to terminate the job --- opal/util/cmd_line.c | 33 +- opal/util/cmd_line.h | 12 +- orte/bindings/README | 21 + orte/bindings/python/README | 49 + orte/bindings/python/examples/submit.py | 68 + orte/bindings/python/setup.py | 16 + orte/bindings/python/src/orte-cffi/build.py | 140 ++ orte/include/orte/constants.h | 3 +- .../errmgr_default_hnp_component.c | 2 +- orte/mca/errmgr/dvm/Makefile.am | 35 + orte/mca/errmgr/dvm/errmgr_dvm.c | 693 +++++++ orte/mca/errmgr/dvm/errmgr_dvm.h | 39 + orte/mca/errmgr/dvm/errmgr_dvm_component.c | 102 + orte/mca/errmgr/dvm/owner.txt | 7 + orte/mca/grpcomm/direct/grpcomm_direct.c | 62 +- orte/mca/odls/alps/odls_alps_module.c | 3 +- orte/mca/odls/base/odls_base_default_fns.c | 134 +- orte/mca/odls/base/odls_private.h | 6 +- orte/mca/odls/default/odls_default_module.c | 3 +- orte/mca/odls/odls.h | 8 +- orte/mca/odls/odls_types.h | 12 +- orte/mca/plm/base/plm_base_launch_support.c | 32 +- orte/mca/rml/rml_types.h | 4 +- orte/mca/state/base/state_base_fns.c | 8 +- orte/mca/state/dvm/state_dvm.c | 312 ++- orte/orted/Makefile.am | 9 +- orte/orted/orted_comm.c | 152 +- orte/orted/orted_submit.c | 1808 +++++++++++++++++ orte/orted/orted_submit.h | 35 + orte/runtime/orte_globals.c | 5 + orte/runtime/orte_globals.h | 36 + orte/runtime/orte_quit.c | 370 ++-- orte/runtime/orte_quit.h | 8 + orte/tools/orte-dvm/orte-dvm.c | 56 +- orte/tools/orte-submit/orte-submit.c | 1593 +-------------- orte/tools/orterun/orterun.c | 256 ++- orte/tools/orterun/orterun.h | 39 - orte/util/attr.c | 6 +- orte/util/attr.h | 5 +- orte/util/dash_host/dash_host.c | 7 +- orte/util/error_strings.c | 3 + orte/util/hostfile/hostfile.c | 5 +- orte/util/nidmap.c | 1 - 43 files changed, 3875 insertions(+), 2323 deletions(-) create mode 100644 orte/bindings/README create mode 100644 orte/bindings/python/README create mode 100755 orte/bindings/python/examples/submit.py create mode 100644 orte/bindings/python/setup.py create mode 100644 orte/bindings/python/src/orte-cffi/build.py create mode 100644 orte/mca/errmgr/dvm/Makefile.am create mode 100644 orte/mca/errmgr/dvm/errmgr_dvm.c create mode 100644 orte/mca/errmgr/dvm/errmgr_dvm.h create mode 100644 orte/mca/errmgr/dvm/errmgr_dvm_component.c create mode 100644 orte/mca/errmgr/dvm/owner.txt create mode 100644 orte/orted/orted_submit.c create mode 100644 orte/orted/orted_submit.h diff --git a/opal/util/cmd_line.c b/opal/util/cmd_line.c index cc1e99e0f8..3aa8564092 100644 --- a/opal/util/cmd_line.c +++ b/opal/util/cmd_line.c @@ -14,6 +14,7 @@ * Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -146,7 +147,7 @@ static int qsort_callback(const void *a, const void *b); int opal_cmd_line_create(opal_cmd_line_t *cmd, opal_cmd_line_init_t *table) { - int i, ret = OPAL_SUCCESS; + int ret = OPAL_SUCCESS; /* Check bozo case */ @@ -155,8 +156,17 @@ int opal_cmd_line_create(opal_cmd_line_t *cmd, } OBJ_CONSTRUCT(cmd, opal_cmd_line_t); - /* Ensure we got a table */ + ret = opal_cmd_line_add(cmd, table); + return ret; +} +/* Add a table to an existing cmd line object */ +int opal_cmd_line_add(opal_cmd_line_t *cmd, + opal_cmd_line_init_t *table) +{ + int i, ret; + + /* Ensure we got a table */ if (NULL == table) { return OPAL_SUCCESS; } @@ -164,9 +174,7 @@ int opal_cmd_line_create(opal_cmd_line_t *cmd, /* Loop through the table */ for (i = 0; ; ++i) { - /* Is this the end? */ - if ('\0' == table[i].ocl_cmd_short_name && NULL == table[i].ocl_cmd_single_dash_name && NULL == table[i].ocl_cmd_long_name) { @@ -174,16 +182,14 @@ int opal_cmd_line_create(opal_cmd_line_t *cmd, } /* Nope -- it's an entry. Process it. */ - ret = make_opt(cmd, &table[i]); if (OPAL_SUCCESS != ret) { return ret; } } - return ret; + return OPAL_SUCCESS; } - /* * Append a command line entry to the previously constructed command line */ @@ -965,8 +971,19 @@ static int make_opt(opal_cmd_line_t *cmd, opal_cmd_line_init_t *e) return OPAL_ERR_BAD_PARAM; } - /* Allocate and fill an option item */ + /* see if the option already exists */ + if (NULL != e->ocl_cmd_single_dash_name && + NULL != find_option(cmd, e->ocl_cmd_single_dash_name)) { + opal_output(0, "Duplicate cmd line entry %s", e->ocl_cmd_single_dash_name); + return OPAL_ERR_BAD_PARAM; + } + if (NULL != e->ocl_cmd_long_name && + NULL != find_option(cmd, e->ocl_cmd_long_name)) { + opal_output(0, "Duplicate cmd line entry %s", e->ocl_cmd_long_name); + return OPAL_ERR_BAD_PARAM; + } + /* Allocate and fill an option item */ option = OBJ_NEW(cmd_line_option_t); if (NULL == option) { return OPAL_ERR_OUT_OF_RESOURCE; diff --git a/opal/util/cmd_line.h b/opal/util/cmd_line.h index 9cf3217208..a374899b49 100644 --- a/opal/util/cmd_line.h +++ b/opal/util/cmd_line.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -268,6 +268,16 @@ BEGIN_C_DECLS OPAL_DECLSPEC int opal_cmd_line_create(opal_cmd_line_t *cmd, opal_cmd_line_init_t *table); + /* Add a table of opal_cmd_line_init_t instances + * to an existing OPAL command line handle. + * + * Multiple calls to opal_cmd_line_add are permitted - each + * subsequent call will simply append new options to the existing + * handle. Note that any duplicates will return an error. + */ + OPAL_DECLSPEC int opal_cmd_line_add(opal_cmd_line_t *cmd, + opal_cmd_line_init_t *table); + /** * Create a command line option. * diff --git a/orte/bindings/README b/orte/bindings/README new file mode 100644 index 0000000000..1638e67c39 --- /dev/null +++ b/orte/bindings/README @@ -0,0 +1,21 @@ +Copyright (c) 2016 Intel, Inc. All rights reserved + +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +=========================================================================== + +This is where bindings of ORTE functions to alternative programming languages +such as Python and C++ reside. Not every ORTE function has been provided with +a wrapper - it is purely on an as-needed basis. However, there is no restriction +on the number of wrappers that can exist, nor on what type of function is wrapped. + +There is only one rule to observe: you can wrap a framework, but you cannot wrap a +specific plugin within that framework. This constraint flows from the fact that +plugins are only accessed via the framework interface - thus, there is no way to +guarantee that a particular plugin will be the active selection. + + diff --git a/orte/bindings/python/README b/orte/bindings/python/README new file mode 100644 index 0000000000..f83ef79e30 --- /dev/null +++ b/orte/bindings/python/README @@ -0,0 +1,49 @@ +=========================================================================== +CFFI based Python wrapper for ORTE +=========================================================================== + + +Example +------- + +This example starts up a persistent DVM and then spawns some tasks using +Python. + +$ virtualenv ve +$ source ve/bin/activate +$ pip install orte-cffi +$ orte-dvm --report-uri dvm_uri +$ python examples/submit.py + + +Create a distfile +---------------------------------------- + +If you want to create a sdist file: + +$ virtualenv ve +$ source ve/bin/activate +$ python setup.py sdist + + +Uploading sdist to pypi +----------------------- + +Assuming you have admin privileges to the pypi package repository for this +package, a new version can be uploaded using twine: + +$ virtualenv ve +$ source ve/bin/activate +$ pip install twine +$ twine upload dist/orte-cffi-`python setup.py --version`.tar.gz + + +Building (for development purposes only) +---------------------------------------- + +If you want to create a non-pip build: + +$ virtualenv ve +$ source ve/bin/activate +$ pip install cffi +$ python src/orte-cffi/build.py diff --git a/orte/bindings/python/examples/submit.py b/orte/bindings/python/examples/submit.py new file mode 100755 index 0000000000..fd60984de7 --- /dev/null +++ b/orte/bindings/python/examples/submit.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python + +import os +import time + +from orte_cffi import ffi, lib + +DVM_URI = "file:dvm_uri" + +@ffi.def_extern() +def launch_cb(task, jdata, status, cbdata): + print "Task %d is started!" % task + instance = task_instance_map[task] + instance.myspawn -= 1 + +@ffi.def_extern() +def finish_cb(task, jdata, status, cbdata): + print "Task %d is completed with status %d!" % (task, status) + instance = task_instance_map[task] + instance.mywait -= 1 + del task_instance_map[task] + +# Dictionary to find class instance from task id +task_instance_map = {} + +# Request to create a background asynchronous event loop +os.putenv("OMPI_MCA_ess_tool_async_progress", "enabled") + +class Submit(): + + mywait = 0 + myspawn = 0 + + def run(self): + + argv_keepalive = [ + ffi.new("char[]", "submit"), # Will be stripped off by the library + ffi.new("char[]", "--hnp"), ffi.new("char[]", DVM_URI), + ffi.NULL, # Required + ] + argv = ffi.new("char *[]", argv_keepalive) + lib.orte_submit_init(3, argv, ffi.NULL) + + index = ffi.new("int *") + + for i in range(3): + + argv_keepalive = [ + ffi.new("char[]", "RADICAL-Pilot"), + ffi.new("char[]", "--np"), ffi.new("char[]", "1"), + ffi.new("char[]", "false"), + ffi.NULL, # Required + ] + argv = ffi.new("char *[]", argv_keepalive) + lib.orte_submit_job(argv, index, lib.launch_cb, ffi.NULL, lib.finish_cb, ffi.NULL) + task = index[0] + task_instance_map[task] = self + self.mywait += 1 + self.myspawn += 1 + print "Task %d submitted!" % task + + while self.myspawn > 0 or self.mywait > 0: + time.sleep(0.1) + + print("Done!") + +rp = Submit() +rp.run() diff --git a/orte/bindings/python/setup.py b/orte/bindings/python/setup.py new file mode 100644 index 0000000000..033fea4e31 --- /dev/null +++ b/orte/bindings/python/setup.py @@ -0,0 +1,16 @@ +from setuptools import setup + +setup( + name = "orte-cffi", + version = "0.4.0", + author = "Mark Santcroos", + author_email = "mark.santcroos@rutgers.edu", + description = "CFFI-based Python wrapper for Open RTE", + license = "New BSD", + keywords = "mpi cffi", + packages = ['src/orte-cffi'], + url = "http://www.open-mpi.org", + setup_requires = ["cffi>=1.5.0"], + cffi_modules = ["src/orte-cffi/build.py:ffi"], + install_requires = ["cffi>=1.5.0"], +) diff --git a/orte/bindings/python/src/orte-cffi/build.py b/orte/bindings/python/src/orte-cffi/build.py new file mode 100644 index 0000000000..18dfd6cdf7 --- /dev/null +++ b/orte/bindings/python/src/orte-cffi/build.py @@ -0,0 +1,140 @@ +import subprocess +import os + + +########################################################################## +# Helper functions # +########################################################################## + + +# +# +# Get a path value from ompi_info based on key +# +def ompi_info_path(key): + + cmd = ['ompi_info', '--path', key, '--parseable'] + + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr= p.communicate() + + if p.returncode != 0: + raise Exception(stderr) + + p_str, l_str, path = stdout.split(':') + if p_str.strip() != 'path': + raise Exception('Parse error') + if l_str.strip() != key: + raise Exception('Parse error') + + path = path.strip() + + if not os.path.isdir(path): + raise Exception('Path "%s" is not an existing directory' % path) + + return path + + +# +# Get the pkgconfig directory assuming its '$libdir/pkgconfig' +# +def get_pkgconfig_dir(): + + libdir = ompi_info_path('libdir') + + pkgdir = os.path.join(libdir, 'pkgconfig') + if not os.path.isdir(pkgdir): + raise Exception('Path "%s" is not an existing directory' % pkgdir) + + return pkgdir + + +# +# Run pkgconfig to get include dirs and lib dirs. +# Optionally allow to specify a variable to pkgconfig. +# +def pkgconfig(libname, variables=None): + + cmd = ['pkg-config', '--cflags-only-I', '--libs-only-L', libname] + + if variables: + for k,v in variables.iteritems(): + cmd.append('--define-variable=%s=%s' % (k, v)) + + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr= p.communicate() + + if p.returncode != 0: + raise Exception(stderr) + + include_dirs = [] + library_dirs = [] + + for item in stdout.split(): + if item.startswith("-L"): + library_dirs.append(item[2:]) + elif item.startswith("-I"): + include_dirs.append(item[2:]) + + return {'include_dirs': include_dirs, + 'library_dirs': library_dirs} + + +# Get the pkgconfigdir from orte_info and export to environment +pkgconfig_dir = get_pkgconfig_dir() +os.environ['PKG_CONFIG_PATH'] = pkgconfig_dir + +# Get the pkgincludedir from ompi_info +pkgincludedir = ompi_info_path('pkgincludedir') +pkgcfg = pkgconfig('orte', variables={'pkgincludedir': pkgincludedir}) + +# Extract include directories and check for existince +include_dirs = pkgcfg['include_dirs'] +if len(include_dirs) == 0: + raise Exception("No include dirs found") + +# Extract library directories and check for existince +library_dirs = pkgcfg['library_dirs'] +if len(library_dirs) == 0: + raise Exception("No library dirs found") + + +########################################################################## +# CFFI specifics # +########################################################################## + + +from cffi import FFI +ffi = FFI() + +ffi.set_source("orte_cffi", """ +#include "orte/orted/orted_submit.h" +""", + libraries=["open-rte"], + include_dirs=include_dirs, + library_dirs=library_dirs +) + +ffi.cdef(""" +/* Types */ +typedef ... orte_job_t; +typedef ... opal_cmd_line_t; +typedef void (*orte_submit_cbfunc_t)(int index, orte_job_t *jdata, int ret, void *cbdata); + +/* Functions */ +int orte_submit_init(int argc, char *argv[], opal_cmd_line_t *opts); +int orte_submit_job(char *cmd[], int *index, + orte_submit_cbfunc_t launch_cb, void *launch_cbdata, + orte_submit_cbfunc_t complete_cb, void *complete_cbdata); +void orte_submit_finalize(void); +int orte_submit_cancel(int index); +int orte_submit_halt(void); + +/* Callbacks */ +extern "Python" void launch_cb(int, orte_job_t *, int, void *); +extern "Python" void finish_cb(int, orte_job_t *, int, void *); +""") + + +if __name__ == "__main__": + ffi.compile(verbose=True) diff --git a/orte/include/orte/constants.h b/orte/include/orte/constants.h index eb9e7ac7ee..06871f03a0 100644 --- a/orte/include/orte/constants.h +++ b/orte/include/orte/constants.h @@ -146,7 +146,8 @@ enum { ORTE_ERR_DUPLICATE_MSG = (ORTE_ERR_BASE - 54), ORTE_ERR_OUT_OF_ORDER_MSG = (ORTE_ERR_BASE - 55), ORTE_ERR_OPEN_CHANNEL_DUPLICATE = (ORTE_ERR_BASE - 56), - ORTE_ERR_FORCE_SELECT = (ORTE_ERR_BASE - 57) + ORTE_ERR_FORCE_SELECT = (ORTE_ERR_BASE - 57), + ORTE_ERR_JOB_CANCELLED = (ORTE_ERR_BASE - 58) }; #define ORTE_ERR_MAX (ORTE_ERR_BASE - 100) diff --git a/orte/mca/errmgr/default_hnp/errmgr_default_hnp_component.c b/orte/mca/errmgr/default_hnp/errmgr_default_hnp_component.c index 640baa3b33..aa7e136fcd 100644 --- a/orte/mca/errmgr/default_hnp/errmgr_default_hnp_component.c +++ b/orte/mca/errmgr/default_hnp/errmgr_default_hnp_component.c @@ -88,7 +88,7 @@ static int default_hnp_close(void) static int default_hnp_component_query(mca_base_module_t **module, int *priority) { - if( ORTE_PROC_IS_HNP ) { + if (ORTE_PROC_IS_HNP && !ORTE_PROC_IS_MASTER) { /* we are the default HNP component */ *priority = my_priority; *module = (mca_base_module_t *)&orte_errmgr_default_hnp_module; diff --git a/orte/mca/errmgr/dvm/Makefile.am b/orte/mca/errmgr/dvm/Makefile.am new file mode 100644 index 0000000000..285a105277 --- /dev/null +++ b/orte/mca/errmgr/dvm/Makefile.am @@ -0,0 +1,35 @@ +# +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + errmgr_dvm.h \ + errmgr_dvm_component.c \ + errmgr_dvm.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_errmgr_dvm_DSO +component_noinst = +component_install = mca_errmgr_dvm.la +else +component_noinst = libmca_errmgr_dvm.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_errmgr_dvm_la_SOURCES = $(sources) +mca_errmgr_dvm_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_errmgr_dvm_la_SOURCES =$(sources) +libmca_errmgr_dvm_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/errmgr/dvm/errmgr_dvm.c b/orte/mca/errmgr/dvm/errmgr_dvm.c new file mode 100644 index 0000000000..c259ac0253 --- /dev/null +++ b/orte/mca/errmgr/dvm/errmgr_dvm.c @@ -0,0 +1,693 @@ +/* + * Copyright (c) 2009-2011 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2011 Oracle and/or all its affiliates. All rights reserved. + * Copyright (c) 2011-2013 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" + +#include +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#include +#ifdef HAVE_SYS_WAIT_H +#include +#endif + +#include "opal/util/output.h" +#include "opal/dss/dss.h" + +#include "orte/mca/rml/rml.h" +#include "orte/mca/odls/odls.h" +#include "orte/mca/odls/base/base.h" +#include "orte/mca/odls/base/odls_private.h" +#include "orte/mca/plm/base/plm_private.h" +#include "orte/mca/plm/plm.h" +#include "orte/mca/rmaps/rmaps_types.h" +#include "orte/mca/routed/routed.h" +#include "orte/mca/grpcomm/grpcomm.h" +#include "orte/mca/ess/ess.h" +#include "orte/mca/state/state.h" + +#include "orte/util/error_strings.h" +#include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" +#include "orte/util/show_help.h" +#include "orte/util/nidmap.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_locks.h" +#include "orte/runtime/orte_quit.h" +#include "orte/runtime/data_type_support/orte_dt_support.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/errmgr/base/base.h" +#include "orte/mca/errmgr/base/errmgr_private.h" + +#include "errmgr_dvm.h" + +static int init(void); +static int finalize(void); + +static int predicted_fault(opal_list_t *proc_list, + opal_list_t *node_list, + opal_list_t *suggested_map); + +static int suggest_map_targets(orte_proc_t *proc, + orte_node_t *oldnode, + opal_list_t *node_list); + +static int ft_event(int state); + + +/****************** + * dvm module + ******************/ +orte_errmgr_base_module_t orte_errmgr_dvm_module = { + init, + finalize, + orte_errmgr_base_log, + orte_errmgr_base_abort, + orte_errmgr_base_abort_peers, + predicted_fault, + suggest_map_targets, + ft_event, + orte_errmgr_base_register_migration_warning, + NULL, + orte_errmgr_base_execute_error_callbacks +}; + + +/* + * Local functions + */ +static void job_errors(int fd, short args, void *cbdata); +static void proc_errors(int fd, short args, void *cbdata); + +static int init(void) +{ + /* setup state machine to trap job errors */ + orte_state.add_job_state(ORTE_JOB_STATE_ERROR, job_errors, ORTE_ERROR_PRI); + + /* set the lost connection state to run at MSG priority so + * we can process any last messages from the proc + */ + orte_state.add_proc_state(ORTE_PROC_STATE_COMM_FAILED, proc_errors, ORTE_MSG_PRI); + + /* setup state machine to trap proc errors */ + orte_state.add_proc_state(ORTE_PROC_STATE_ERROR, proc_errors, ORTE_ERROR_PRI); + + return ORTE_SUCCESS; +} + +static int finalize(void) +{ + return ORTE_SUCCESS; +} + +static void _terminate_job(orte_jobid_t jobid) +{ + opal_pointer_array_t procs; + orte_proc_t pobj; + + OBJ_CONSTRUCT(&procs, opal_pointer_array_t); + opal_pointer_array_init(&procs, 1, 1, 1); + OBJ_CONSTRUCT(&pobj, orte_proc_t); + pobj.name.jobid = jobid; + pobj.name.vpid = ORTE_VPID_WILDCARD; + opal_pointer_array_add(&procs, &pobj); + orte_plm.terminate_procs(&procs); + OBJ_DESTRUCT(&procs); + OBJ_DESTRUCT(&pobj); +} + +static void job_errors(int fd, short args, void *cbdata) +{ + orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + orte_job_t *jdata; + orte_job_state_t jobstate; + orte_exit_code_t sts; + orte_proc_t *aborted_proc; + opal_buffer_t *answer; + int32_t rc, ret; + int room, *rmptr; + + /* + * if orte is trying to shutdown, just let it + */ + if (orte_finalizing) { + return; + } + + /* if the jdata is NULL, then we ignore it as this + * is reporting an unrecoverable error + */ + if (NULL == caddy->jdata) { + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + OBJ_RELEASE(caddy); + return; + } + + /* update the state */ + jdata = caddy->jdata; + jobstate = caddy->job_state; + jdata->state = jobstate; + + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, + "%s errmgr:dvm: job %s reported state %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(jdata->jobid), + orte_job_state_to_str(jobstate))); + + if (ORTE_JOB_STATE_NEVER_LAUNCHED == jobstate || + ORTE_JOB_STATE_ALLOC_FAILED == jobstate || + ORTE_JOB_STATE_MAP_FAILED == jobstate || + ORTE_JOB_STATE_CANNOT_LAUNCH == jobstate) { + /* disable routing as we may not have performed the daemon + * wireup - e.g., in a managed environment, all the daemons + * "phone home", but don't actually wireup into the routed + * network until they receive the launch message + */ + orte_routing_is_enabled = false; + jdata->num_terminated = jdata->num_procs; + ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_TERMINATED); + /* if it was a dynamic spawn, then we better tell them this didn't work */ + if (ORTE_JOBID_INVALID != jdata->originator.jobid) { + rc = jobstate; + answer = OBJ_NEW(opal_buffer_t); + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &rc, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(caddy); + return; + } + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &jdata->jobid, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(caddy); + return; + } + /* pack the room number */ + rmptr = &room; + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ROOM_NUM, (void**)&rmptr, OPAL_INT)) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &room, 1, OPAL_INT))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(caddy); + return; + } + } + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s errmgr:dvm sending dyn error release of job %s to %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(jdata->jobid), + ORTE_NAME_PRINT(&jdata->originator))); + if (0 > (ret = orte_rml.send_buffer_nb(&jdata->originator, answer, + ORTE_RML_TAG_LAUNCH_RESP, + orte_rml_send_callback, NULL))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(answer); + } + } + OBJ_RELEASE(caddy); + return; + } + + if (ORTE_JOB_STATE_FAILED_TO_START == jobstate || + ORTE_JOB_STATE_FAILED_TO_LAUNCH == jobstate) { + /* the job object for this job will have been NULL'd + * in the array if the job was solely local. If it isn't + * NULL, then we need to tell everyone else to die + */ + aborted_proc = NULL; + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, (void**)&aborted_proc, OPAL_PTR)) { + sts = aborted_proc->exit_code; + if (ORTE_PROC_MY_NAME->jobid == jdata->jobid) { + if (WIFSIGNALED(sts)) { /* died on signal */ +#ifdef WCOREDUMP + if (WCOREDUMP(sts)) { + orte_show_help("help-plm-base.txt", "daemon-died-signal-core", true, + WTERMSIG(sts)); + sts = WTERMSIG(sts); + } else { + orte_show_help("help-plm-base.txt", "daemon-died-signal", true, + WTERMSIG(sts)); + sts = WTERMSIG(sts); + } +#else + orte_show_help("help-plm-base.txt", "daemon-died-signal", true, + WTERMSIG(sts)); + sts = WTERMSIG(sts); +#endif /* WCOREDUMP */ + } else { + orte_show_help("help-plm-base.txt", "daemon-died-no-signal", true, + WEXITSTATUS(sts)); + sts = WEXITSTATUS(sts); + } + } + } + /* if this is the daemon job, then we need to ensure we + * output an error message indicating we couldn't launch the + * daemons */ + if (jdata->jobid == ORTE_PROC_MY_NAME->jobid) { + orte_show_help("help-errmgr-base.txt", "failed-daemon-launch", true); + } + } + + /* if the daemon job aborted and we haven't heard from everyone yet, + * then this could well have been caused by a daemon not finding + * a way back to us. In this case, output a message indicating a daemon + * died without reporting. Otherwise, say nothing as we + * likely already output an error message */ + if (ORTE_JOB_STATE_ABORTED == jobstate && + jdata->jobid == ORTE_PROC_MY_NAME->jobid && + jdata->num_procs != jdata->num_reported) { + orte_show_help("help-errmgr-base.txt", "failed-daemon", true); + } + + OBJ_RELEASE(caddy); +} + +static void proc_errors(int fd, short args, void *cbdata) +{ + orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + orte_job_t *jdata; + orte_proc_t *pptr, *proct; + orte_process_name_t *proc = &caddy->name; + orte_proc_state_t state = caddy->proc_state; + int i; + int32_t i32, *i32ptr; + + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, + "%s errmgr:dvm: for proc %s state %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(proc), + orte_proc_state_to_str(state))); + + /* + * if orte is trying to shutdown, just let it + */ + if (orte_finalizing) { + goto cleanup; + } + + /* get the job object */ + if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) { + /* could be a race condition */ + goto cleanup; + } + pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid); + + /* we MUST handle a communication failure before doing anything else + * as it requires some special care to avoid normal termination issues + * for local application procs + */ + if (ORTE_PROC_STATE_COMM_FAILED == state) { + /* is this to a daemon? */ + if (ORTE_PROC_MY_NAME->jobid != proc->jobid) { + /* nope - ignore it */ + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s Comm failure to non-daemon proc - ignoring it", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + goto cleanup; + } + /* if this is my own connection, ignore it */ + if (ORTE_PROC_MY_NAME->vpid == proc->vpid) { + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s Comm failure on my own connection - ignoring it", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + goto cleanup; + } + /* mark the daemon as gone */ + ORTE_FLAG_UNSET(pptr, ORTE_PROC_FLAG_ALIVE); + /* if we have ordered orteds to terminate or abort + * is in progress, record it */ + if (orte_orteds_term_ordered || orte_abnormal_term_ordered) { + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s Comm failure: daemons terminating - recording daemon %s as gone", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); + /* remove from dependent routes, if it is one */ + orte_routed.route_lost(proc); + /* if all my routes and local children are gone, then terminate ourselves */ + if (0 == orte_routed.num_routes()) { + for (i=0; i < orte_local_children->size; i++) { + if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) && + ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_ALIVE) && proct->state < ORTE_PROC_STATE_UNTERMINATED) { + /* at least one is still alive */ + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s Comm failure: at least one proc (%s) still alive", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&proct->name))); + goto cleanup; + } + } + /* call our appropriate exit procedure */ + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s errmgr_dvm: all routes and children gone - ordering exit", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED); + } else { + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s Comm failure: %d routes remain alive", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (int)orte_routed.num_routes())); + } + goto cleanup; + } + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s Comm failure: daemon %s - aborting", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); + /* record the first one to fail */ + if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { + /* output an error message so the user knows what happened */ + orte_show_help("help-errmgr-base.txt", "node-died", true, pptr->node->name); + /* mark the daemon job as failed */ + jdata->state = ORTE_JOB_STATE_COMM_FAILED; + /* point to the lowest rank to cause the problem */ + orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); + /* retain the object so it doesn't get free'd */ + OBJ_RETAIN(pptr); + ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); + /* update our exit code */ + ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); + /* just in case the exit code hadn't been set, do it here - this + * won't override any reported exit code */ + ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_COMM_FAILURE); + } + goto cleanup; + } + + /* update the proc state - can get multiple reports on a proc + * depending on circumstances, so ensure we only do this once + */ + if (pptr->state < ORTE_PROC_STATE_TERMINATED) { + pptr->state = state; + } + + /* if we were ordered to terminate, mark this proc as dead and see if + * any of our routes or local children remain alive - if not, then + * terminate ourselves. */ + if (orte_orteds_term_ordered) { + for (i=0; i < orte_local_children->size; i++) { + if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { + if (ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) { + goto keep_going; + } + } + } + /* if all my routes and children are gone, then terminate + ourselves nicely (i.e., this is a normal termination) */ + if (0 == orte_routed.num_routes()) { + OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, + "%s errmgr:default:dvm all routes gone - exiting", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED); + } + } + + keep_going: + /* ensure we record the failed proc properly so we can report + * the error once we terminate + */ + switch (state) { + case ORTE_PROC_STATE_KILLED_BY_CMD: + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s errmgr:dvm: proc %s killed by cmd", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(proc))); + /* we ordered this proc to die, so it isn't an abnormal termination + * and we don't flag it as such + */ + if (jdata->num_terminated >= jdata->num_procs) { + /* this job has terminated */ + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); + } + /* don't abort the job as this isn't an abnormal termination */ + break; + + case ORTE_PROC_STATE_ABORTED: + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s errmgr:dvm: proc %s aborted", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(proc))); + if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { + jdata->state = ORTE_JOB_STATE_ABORTED; + /* point to the first rank to cause the problem */ + orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); + /* retain the object so it doesn't get free'd */ + OBJ_RETAIN(pptr); + ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); + ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); + /* kill the job */ + _terminate_job(jdata->jobid); + } + break; + + case ORTE_PROC_STATE_ABORTED_BY_SIG: + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s errmgr:dvm: proc %s aborted by signal", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(proc))); + if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { + jdata->state = ORTE_JOB_STATE_ABORTED_BY_SIG; + /* point to the first rank to cause the problem */ + orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); + /* retain the object so it doesn't get free'd */ + OBJ_RETAIN(pptr); + ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); + ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); + /* kill the job */ + _terminate_job(jdata->jobid); + } + break; + + case ORTE_PROC_STATE_TERM_WO_SYNC: + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s errmgr:dvm: proc %s terminated without sync", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(proc))); + if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { + jdata->state = ORTE_JOB_STATE_ABORTED_WO_SYNC; + /* point to the first rank to cause the problem */ + orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); + /* retain the object so it doesn't get free'd */ + OBJ_RETAIN(pptr); + ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); + ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); + /* now treat a special case - if the proc exit'd without a required + * sync, it may have done so with a zero exit code. We want to ensure + * that the user realizes there was an error, so in this -one- case, + * we overwrite the process' exit code with the default error code + */ + ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); + /* kill the job */ + _terminate_job(jdata->jobid); + } + break; + + case ORTE_PROC_STATE_FAILED_TO_START: + case ORTE_PROC_STATE_FAILED_TO_LAUNCH: + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s errmgr:dvm: proc %s %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(proc), + orte_proc_state_to_str(state))); + if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { + opal_buffer_t *answer; + int id, *idptr, ret; + + if (ORTE_PROC_STATE_FAILED_TO_START) { + jdata->state = ORTE_JOB_STATE_FAILED_TO_START; + } else { + jdata->state = ORTE_JOB_STATE_FAILED_TO_LAUNCH; + } + /* point to the first rank to cause the problem */ + orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); + /* retain the object so it doesn't get free'd */ + OBJ_RETAIN(pptr); + ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); + /* send a notification to the requestor - indicate that this is a spawn response */ + answer = OBJ_NEW(opal_buffer_t); + /* pack the return status */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &pptr->exit_code, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(answer); + goto CLEANUP; + } + /* pack the jobid to be returned */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &jdata->jobid, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(answer); + goto CLEANUP; + } + idptr = &id; + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ROOM_NUM, (void**)&idptr, OPAL_INT)) { + /* pack the sender's index to the tracking object */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, idptr, 1, OPAL_INT))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(answer); + goto CLEANUP; + } + } + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FIXED_DVM, NULL, OPAL_BOOL)) { + /* we need to send the requestor more info about what happened */ + opal_dss.pack(answer, &jdata->state, 1, ORTE_JOB_STATE_T); + opal_dss.pack(answer, &pptr, 1, ORTE_PROC); + opal_dss.pack(answer, &pptr->node, 1, ORTE_NODE); + } + /* return response */ + if (0 > (ret = orte_rml.send_buffer_nb(&jdata->originator, answer, + ORTE_RML_TAG_LAUNCH_RESP, + orte_rml_send_callback, NULL))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(answer); + } + /* record that we notified about this job */ + jdata->state = ORTE_JOB_STATE_NOTIFIED; + CLEANUP: + /* kill the job */ + _terminate_job(jdata->jobid); + } + /* if this was a daemon, report it */ + if (jdata->jobid == ORTE_PROC_MY_NAME->jobid) { + /* output a message indicating we failed to launch a daemon */ + orte_show_help("help-errmgr-base.txt", "failed-daemon-launch", true); + } + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); + break; + + case ORTE_PROC_STATE_CALLED_ABORT: + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s errmgr:dvm: proc %s called abort with exit code %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(proc), pptr->exit_code)); + if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { + jdata->state = ORTE_JOB_STATE_CALLED_ABORT; + /* point to the first proc to cause the problem */ + orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); + /* retain the object so it doesn't get free'd */ + OBJ_RETAIN(pptr); + ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); + ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); + /* kill the job */ + _terminate_job(jdata->jobid); + } + break; + + case ORTE_PROC_STATE_TERM_NON_ZERO: + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s errmgr:dvm: proc %s exited with non-zero status %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(proc), + pptr->exit_code)); + ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); + /* track the number of non-zero exits */ + i32 = 0; + i32ptr = &i32; + orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32); + ++i32; + orte_set_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, ORTE_ATTR_LOCAL, i32ptr, OPAL_INT32); + if (orte_abort_non_zero_exit) { + if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { + jdata->state = ORTE_JOB_STATE_NON_ZERO_TERM; + /* point to the first rank to cause the problem */ + orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); + /* retain the object so it doesn't get free'd */ + OBJ_RETAIN(pptr); + ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); + /* kill the job */ + _terminate_job(jdata->jobid); + } + } else { + /* user requested we consider this normal termination */ + if (jdata->num_terminated >= jdata->num_procs) { + /* this job has terminated */ + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); + } + } + break; + + case ORTE_PROC_STATE_HEARTBEAT_FAILED: + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s errmgr:dvm: proc %s heartbeat failed", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(proc))); + if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { + jdata->state = ORTE_JOB_STATE_HEARTBEAT_FAILED; + /* point to the first rank to cause the problem */ + orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); + /* retain the object so it doesn't get free'd */ + OBJ_RETAIN(pptr); + ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); + ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); + /* kill the job */ + _terminate_job(jdata->jobid); + } + /* remove from dependent routes, if it is one */ + orte_routed.route_lost(proc); + break; + + case ORTE_PROC_STATE_UNABLE_TO_SEND_MSG: + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s errmgr:dvm: unable to send message to proc %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(proc))); + /* if this proc is one of my daemons, then we are truly + * hosed - so just exit out + */ + if (ORTE_PROC_MY_NAME->jobid == proc->jobid) { + ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED); + break; + } + break; + + default: + /* shouldn't get this, but terminate job if required */ + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, + "%s errmgr:dvm: proc %s default error %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(proc), + orte_proc_state_to_str(state))); + if (jdata->num_terminated == jdata->num_procs) { + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); + } + break; + } + /* if the waitpid fired, be sure to let the state machine know */ + if (ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_WAITPID)) { + ORTE_ACTIVATE_PROC_STATE(&pptr->name, ORTE_PROC_STATE_WAITPID_FIRED); + } + + cleanup: + OBJ_RELEASE(caddy); +} + +static int predicted_fault(opal_list_t *proc_list, + opal_list_t *node_list, + opal_list_t *suggested_map) +{ + return ORTE_ERR_NOT_IMPLEMENTED; +} + +static int suggest_map_targets(orte_proc_t *proc, + orte_node_t *oldnode, + opal_list_t *node_list) +{ + return ORTE_ERR_NOT_IMPLEMENTED; +} + +static int ft_event(int state) +{ + return ORTE_SUCCESS; +} diff --git a/orte/mca/errmgr/dvm/errmgr_dvm.h b/orte/mca/errmgr/dvm/errmgr_dvm.h new file mode 100644 index 0000000000..291394d9a5 --- /dev/null +++ b/orte/mca/errmgr/dvm/errmgr_dvm.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + */ + +#ifndef MCA_ERRMGR_dvm_EXPORT_H +#define MCA_ERRMGR_dvm_EXPORT_H + +#include "orte_config.h" + +#include "orte/mca/errmgr/errmgr.h" + +BEGIN_C_DECLS + +/* + * Local Component structures + */ + +ORTE_MODULE_DECLSPEC extern orte_errmgr_base_component_t mca_errmgr_dvm_component; + +ORTE_DECLSPEC extern orte_errmgr_base_module_t orte_errmgr_dvm_module; + +END_C_DECLS + +#endif /* MCA_ERRMGR_dvm_EXPORT_H */ diff --git a/orte/mca/errmgr/dvm/errmgr_dvm_component.c b/orte/mca/errmgr/dvm/errmgr_dvm_component.c new file mode 100644 index 0000000000..879062893b --- /dev/null +++ b/orte/mca/errmgr/dvm/errmgr_dvm_component.c @@ -0,0 +1,102 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "opal/util/output.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/errmgr/base/base.h" +#include "orte/mca/errmgr/base/errmgr_private.h" +#include "errmgr_dvm.h" + +/* + * Public string for version number + */ +const char *orte_errmgr_dvm_component_version_string = + "ORTE ERRMGR dvm MCA component version " ORTE_VERSION; + +/* + * Local functionality + */ +static int dvm_register(void); +static int dvm_open(void); +static int dvm_close(void); +static int dvm_component_query(mca_base_module_t **module, int *priority); + +/* + * Instantiate the public struct with all of our public information + * and pointer to our public functions in it + */ +orte_errmgr_base_component_t mca_errmgr_dvm_component = { + /* Handle the general mca_component_t struct containing + * meta information about the component dvm + */ + .base_version = { + ORTE_ERRMGR_BASE_VERSION_3_0_0, + /* Component name and version */ + .mca_component_name = "dvm", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + + /* Component open and close functions */ + .mca_open_component = dvm_open, + .mca_close_component = dvm_close, + .mca_query_component = dvm_component_query, + .mca_register_component_params = dvm_register, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, +}; + +static int my_priority; + +static int dvm_register(void) +{ + mca_base_component_t *c = &mca_errmgr_dvm_component.base_version; + + my_priority = 1000; + (void) mca_base_component_var_register(c, "priority", + "Priority of the dvm errmgr component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &my_priority); + + return ORTE_SUCCESS; +} + +static int dvm_open(void) +{ + return ORTE_SUCCESS; +} + +static int dvm_close(void) +{ + return ORTE_SUCCESS; +} + +static int dvm_component_query(mca_base_module_t **module, int *priority) +{ + /* used by DVM masters */ + if (ORTE_PROC_IS_MASTER) { + *priority = my_priority; + *module = (mca_base_module_t *)&orte_errmgr_dvm_module; + return ORTE_SUCCESS; + } + + *module = NULL; + *priority = -1; + return ORTE_ERROR; +} diff --git a/orte/mca/errmgr/dvm/owner.txt b/orte/mca/errmgr/dvm/owner.txt new file mode 100644 index 0000000000..85b4416d20 --- /dev/null +++ b/orte/mca/errmgr/dvm/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: INTEL +status: active diff --git a/orte/mca/grpcomm/direct/grpcomm_direct.c b/orte/mca/grpcomm/direct/grpcomm_direct.c index 4fc737865c..3ad308f3a4 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct.c +++ b/orte/mca/grpcomm/direct/grpcomm_direct.c @@ -299,6 +299,8 @@ static void xcast_recv(int status, orte_process_name_t* sender, * the initial message, minus the headers inserted by xcast itself */ relay = OBJ_NEW(opal_buffer_t); opal_dss.copy_payload(relay, buffer); + /* setup the relay list */ + OBJ_CONSTRUCT(&coll, opal_list_t); /* if this is headed for the daemon command processor, * then we first need to check for add_local_procs @@ -308,14 +310,8 @@ static void xcast_recv(int status, orte_process_name_t* sender, cnt=1; if (ORTE_SUCCESS == (ret = opal_dss.unpack(buffer, &command, &cnt, ORTE_DAEMON_CMD))) { /* if it is add_procs, then... */ - if (ORTE_DAEMON_ADD_LOCAL_PROCS == command) { - OBJ_RELEASE(relay); - relay = OBJ_NEW(opal_buffer_t); - /* repack the command */ - if (OPAL_SUCCESS != (ret = opal_dss.pack(relay, &command, 1, ORTE_DAEMON_CMD))) { - ORTE_ERROR_LOG(ret); - goto relay; - } + if (ORTE_DAEMON_ADD_LOCAL_PROCS == command || + ORTE_DAEMON_DVM_NIDMAP_CMD == command) { /* extract the byte object holding the daemonmap */ cnt=1; if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &bo, &cnt, OPAL_BYTE_OBJECT))) { @@ -354,11 +350,21 @@ static void xcast_recv(int status, orte_process_name_t* sender, ORTE_ERROR_LOG(ret); goto relay; } - if (0 == flag) { - /* copy the remainder of the payload */ - opal_dss.copy_payload(relay, buffer); - /* no - just return */ - goto relay; + + if (ORTE_DAEMON_ADD_LOCAL_PROCS == command) { + OBJ_RELEASE(relay); + relay = OBJ_NEW(opal_buffer_t); + /* repack the command */ + if (OPAL_SUCCESS != (ret = opal_dss.pack(relay, &command, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(ret); + goto relay; + } + if (0 == flag) { + /* copy the remainder of the payload */ + opal_dss.copy_payload(relay, buffer); + /* no - just return */ + goto relay; + } } /* unpack the byte object */ @@ -381,8 +387,10 @@ static void xcast_recv(int status, orte_process_name_t* sender, OBJ_DESTRUCT(&wireup); } free(bo); - /* copy the remainder of the payload */ - opal_dss.copy_payload(relay, buffer); + if (ORTE_DAEMON_ADD_LOCAL_PROCS == command) { + /* copy the remainder of the payload */ + opal_dss.copy_payload(relay, buffer); + } } } else { ORTE_ERROR_LOG(ret); @@ -391,8 +399,6 @@ static void xcast_recv(int status, orte_process_name_t* sender, } relay: - /* setup the relay list */ - OBJ_CONSTRUCT(&coll, opal_list_t); /* get the list of next recipients from the routed module */ orte_routed.get_routing_list(&coll); @@ -420,18 +426,14 @@ static void xcast_recv(int status, orte_process_name_t* sender, */ jdata = orte_get_job_data_object(nm->name.jobid); if (NULL == (rec = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, nm->name.vpid))) { - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:direct:send_relay proc %s not found - cannot relay", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&nm->name))); + opal_output(0, "%s grpcomm:direct:send_relay proc %s not found - cannot relay", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name)); OBJ_RELEASE(rly); continue; } if (ORTE_PROC_STATE_RUNNING < rec->state) { - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:direct:send_relay proc %s not running - cannot relay", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&nm->name))); + opal_output(0, "%s grpcomm:direct:send_relay proc %s not running - cannot relay", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name)); OBJ_RELEASE(rly); continue; } @@ -449,10 +451,12 @@ static void xcast_recv(int status, orte_process_name_t* sender, OBJ_DESTRUCT(&coll); /* now send the relay buffer to myself for processing */ - if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, relay, tag, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(relay); + if (ORTE_DAEMON_DVM_NIDMAP_CMD != command) { + if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, relay, tag, + orte_rml_send_callback, NULL))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(relay); + } } } diff --git a/orte/mca/odls/alps/odls_alps_module.c b/orte/mca/odls/alps/odls_alps_module.c index 63daba20d4..43ca0d291b 100644 --- a/orte/mca/odls/alps/odls_alps_module.c +++ b/orte/mca/odls/alps/odls_alps_module.c @@ -15,7 +15,7 @@ * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -160,7 +160,6 @@ orte_odls_base_module_t orte_odls_alps_module = { orte_odls_alps_launch_local_procs, orte_odls_alps_kill_local_procs, orte_odls_alps_signal_local_procs, - orte_odls_base_default_deliver_message, orte_odls_alps_restart_proc }; diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index fc1c758d4b..c5fa5bffcf 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -122,6 +122,21 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data, return ORTE_SUCCESS; } + /* if this is a DVM-based launch, then don't pack all the wireup + * info as we don't need it - just pack the job itself */ + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FIXED_DVM, NULL, OPAL_BOOL)) { + numjobs = 0; + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &numjobs, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* pack the job struct */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata, 1, ORTE_JOB))) { + ORTE_ERROR_LOG(rc); + } + return rc; + } + /* construct a nodemap - only want updated items */ if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(&bo, true))) { ORTE_ERROR_LOG(rc); @@ -757,7 +772,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) } if (OPAL_EQUAL == opal_dss.compare(&job, &(child->name.jobid), ORTE_JOBID) && j == (int)child->app_idx) { - child->exit_code = rc; + child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH; ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH); } } @@ -954,7 +969,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) orte_show_help("help-orte-odls-base.txt", "orte-odls-base:xterm-rank-out-of-bounds", true, nm->name.vpid, jobdat->num_procs); - child->exit_code = ORTE_ERR_SILENT; + child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH; ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH); continue; } @@ -981,7 +996,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) orte_show_help("help-orte-odls-base.txt", "orte-odls-base:fork-agent-not-found", true, orte_process_info.nodename, orte_fork_agent[0]); - child->exit_code = ORTE_ERR_SILENT; + child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH; ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH); continue; } @@ -1014,7 +1029,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) &(app->argv), &(app->env) ) ) ) { ORTE_ERROR_LOG(rc); - child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH; + child->exit_code = rc; ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH); continue; } @@ -1040,7 +1055,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) } if (ORTE_SUCCESS != (rc = fork_local(app, child, app->env, jobdat))) { - child->exit_code = ORTE_ERR_SILENT; /* error message already output */ + child->exit_code = rc; /* error message already output */ ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START); } orte_wait_cb(child, odls_base_default_wait_local_proc, NULL); @@ -1059,7 +1074,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) * across the entire cluster. Instead, we let orterun * output a consolidated error message for us */ - child->exit_code = ORTE_ERR_SILENT; /* error message already output */ + child->exit_code = rc; /* error message already output */ ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START); continue; } else { @@ -1104,57 +1119,6 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) OBJ_RELEASE(caddy); } -int orte_odls_base_default_deliver_message(orte_jobid_t job, opal_buffer_t *buffer, orte_rml_tag_t tag) -{ - int rc, exit_status = ORTE_SUCCESS; - int i; - orte_proc_t *child; - opal_buffer_t *relay; - - for (i=0; i < orte_local_children->size; i++) { - if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { - continue; - } - - /* do we have a child from the specified job. Because the - * job could be given as a WILDCARD value, we must use - * the dss.compare function to check for equality. - */ - if (!ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_ALIVE) || - OPAL_EQUAL != opal_dss.compare(&job, &(child->name.jobid), ORTE_JOBID)) { - continue; - } - - OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, - "%s odls: sending message to tag %lu on child %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (unsigned long)tag, ORTE_NAME_PRINT(&child->name))); - - /* if so, send the message */ - relay = OBJ_NEW(opal_buffer_t); - opal_dss.copy_payload(relay, buffer); - rc = orte_rml.send_buffer_nb(&child->name, relay, tag, orte_rml_send_callback, NULL); - if (rc < 0 && rc != ORTE_ERR_ADDRESSEE_UNKNOWN) { - /* ignore if the addressee is unknown as a race condition could - * have allowed the child to exit before we send it a barrier - * due to the vagaries of the event library. - * - * If we do get an error it is likely that the orte_local_children - * has changed to reflect it, so we can no longer deliver messages. - * So just break out and return the error code. - */ - ORTE_ERROR_LOG(rc); - exit_status = rc; - OBJ_RELEASE(relay); - goto cleanup; - } - } - - cleanup: - return exit_status; -} - - /** * Pass a signal to my local procs */ @@ -1412,23 +1376,6 @@ void odls_base_default_wait_local_proc(orte_proc_t *proc, void* cbdata) ORTE_ACTIVATE_PROC_STATE(&proc->name, state); } -typedef struct { - orte_proc_t *child; - orte_odls_base_kill_local_fn_t kill_local; -} odls_kill_caddy_t; - -static void kill_cbfunc(int fd, short args, void *cbdata) -{ - odls_kill_caddy_t *cd = (odls_kill_caddy_t*)cbdata; - - if (!ORTE_FLAG_TEST(cd->child, ORTE_PROC_FLAG_ALIVE) || 0 == cd->child->pid) { - free(cd); - return; - } - cd->kill_local(cd->child->pid, SIGKILL); - free(cd); -} - int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, orte_odls_base_kill_local_fn_t kill_local, orte_odls_base_child_died_fn_t child_died) @@ -1555,48 +1502,17 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, */ orte_wait_cb_cancel(child); - if (!do_cleanup) { - odls_kill_caddy_t *cd; - - /* if we are killing only selected procs, then do so in a gentle - fashion. First send a SIGCONT in case the process is in stopped state. - If it is in a stopped state and we do not first change it to - running, then SIGTERM will not get delivered. Ignore return - value. */ - OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, - "%s SENDING SIGCONT TO %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&child->name))); - kill_local(child->pid, SIGCONT); - - /* Send a sigterm to the process before sigkill to be nice */ - OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, - "%s SENDING SIGTERM TO %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&child->name))); - kill_local(child->pid, SIGTERM); - /* provide a polite delay so the proc has a chance to react */ - cd = (odls_kill_caddy_t*)malloc(sizeof(odls_kill_caddy_t)); - OBJ_RETAIN(child); // protect against race conditions - cd->child = child; - cd->kill_local = kill_local; - ORTE_TIMER_EVENT(1, 0, kill_cbfunc, ORTE_SYS_PRI); - continue; - } - - /* Force the SIGKILL just to make sure things are dead + /* Use SIGKILL just to make sure things are dead * This fixes an issue that, if the application is masking - * SIGTERM, then the child_died() - * may return 'true' even though waipid returns with 0. - * It does this to avoid a race condition, per documentation - * in odls_default_module.c. + * SIGTERM, then the child_died() may return 'true' even + * though waipid returns with 0. It does this to avoid a + * race condition, per documentation in odls_default_module.c. */ OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s SENDING FORCE SIGKILL TO %s pid %lu", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&child->name), (unsigned long)child->pid)); kill_local(child->pid, SIGKILL); - /* indicate the waitpid fired as this is effectively what * has happened */ diff --git a/orte/mca/odls/base/odls_private.h b/orte/mca/odls/base/odls_private.h index 2556b9dfc1..48fa5133c8 100644 --- a/orte/mca/odls/base/odls_private.h +++ b/orte/mca/odls/base/odls_private.h @@ -9,9 +9,10 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -103,9 +104,6 @@ OBJ_CLASS_DECLARATION(orte_odls_launch_local_t); ORTE_DECLSPEC void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata); -ORTE_DECLSPEC int -orte_odls_base_default_deliver_message(orte_jobid_t job, opal_buffer_t *buffer, orte_rml_tag_t tag); - ORTE_DECLSPEC void odls_base_default_wait_local_proc(orte_proc_t *proc, void* cbdata); /* define a function type to signal a local proc */ diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index 48548b34a2..21af9f5bd3 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -15,7 +15,7 @@ * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -160,7 +160,6 @@ orte_odls_base_module_t orte_odls_default_module = { orte_odls_default_launch_local_procs, orte_odls_default_kill_local_procs, orte_odls_default_signal_local_procs, - orte_odls_base_default_deliver_message, orte_odls_default_restart_proc }; diff --git a/orte/mca/odls/odls.h b/orte/mca/odls/odls.h index d28a964f77..9c49f0ac57 100644 --- a/orte/mca/odls/odls.h +++ b/orte/mca/odls/odls.h @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -74,12 +75,6 @@ typedef int (*orte_odls_base_module_kill_local_processes_fn_t)(opal_pointer_arra typedef int (*orte_odls_base_module_signal_local_process_fn_t)(const orte_process_name_t *proc, int32_t signal); -/** - * Deliver a message to local processes - */ -typedef int (*orte_odls_base_module_deliver_message_fn_t)(orte_jobid_t job, opal_buffer_t *buffer, - orte_rml_tag_t tag); - /** * Restart a local process */ @@ -93,7 +88,6 @@ struct orte_odls_base_module_1_3_0_t { orte_odls_base_module_launch_local_processes_fn_t launch_local_procs; orte_odls_base_module_kill_local_processes_fn_t kill_local_procs; orte_odls_base_module_signal_local_process_fn_t signal_local_procs; - orte_odls_base_module_deliver_message_fn_t deliver_message; orte_odls_base_module_restart_proc_fn_t restart_proc; }; diff --git a/orte/mca/odls/odls_types.h b/orte/mca/odls/odls_types.h index 1164e5931f..bd3115f254 100644 --- a/orte/mca/odls/odls_types.h +++ b/orte/mca/odls/odls_types.h @@ -12,7 +12,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,7 +48,6 @@ typedef uint8_t orte_daemon_cmd_flag_t; #define ORTE_DAEMON_HEARTBEAT_CMD (orte_daemon_cmd_flag_t) 6 #define ORTE_DAEMON_EXIT_CMD (orte_daemon_cmd_flag_t) 7 #define ORTE_DAEMON_PROCESS_AND_RELAY_CMD (orte_daemon_cmd_flag_t) 9 -#define ORTE_DAEMON_MESSAGE_LOCAL_PROCS (orte_daemon_cmd_flag_t) 10 #define ORTE_DAEMON_NULL_CMD (orte_daemon_cmd_flag_t) 11 /* commands for use by tools */ @@ -59,6 +58,8 @@ typedef uint8_t orte_daemon_cmd_flag_t; #define ORTE_DAEMON_TERMINATE_JOB_CMD (orte_daemon_cmd_flag_t) 18 #define ORTE_DAEMON_HALT_VM_CMD (orte_daemon_cmd_flag_t) 19 #define ORTE_DAEMON_HALT_DVM_CMD (orte_daemon_cmd_flag_t) 20 +#define ORTE_DAEMON_REPORT_JOB_COMPLETE (orte_daemon_cmd_flag_t) 21 + /* request proc resource usage */ #define ORTE_DAEMON_TOP_CMD (orte_daemon_cmd_flag_t) 22 @@ -74,9 +75,10 @@ typedef uint8_t orte_daemon_cmd_flag_t; /* process called "errmgr.abort_procs" */ #define ORTE_DAEMON_ABORT_PROCS_CALLED (orte_daemon_cmd_flag_t) 28 -/* new daemon collective id */ -#define ORTE_DAEMON_NEW_COLL_ID (orte_daemon_cmd_flag_t) 29 - +/* nidmap for the DVM */ +#define ORTE_DAEMON_DVM_NIDMAP_CMD (orte_daemon_cmd_flag_t) 29 +/* add procs for the DVM */ +#define ORTE_DAEMON_DVM_ADD_PROCS (orte_daemon_cmd_flag_t) 30 /* * Struct written up the pipe from the child to the parent. diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index bef9465789..1a4a5eba9a 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -13,7 +13,7 @@ * Copyright (c) 2009 Institut National de Recherche en Informatique * et Automatique. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -104,6 +104,9 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata) if (NULL == (node = dmn1->node) || NULL == (t = node->topology)) { /* something is wrong */ + opal_output(0, "NODE IS %s T IS %s", + (NULL == node) ? "NULL" : "NOT-NULL", + (NULL == t) ? "NULL" : "NOT-NULL"); ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND); OBJ_RELEASE(caddy); @@ -512,8 +515,12 @@ void orte_plm_base_launch_apps(int fd, short args, void *cbdata) /* setup the buffer */ buffer = OBJ_NEW(opal_buffer_t); - /* pack the add_local_procs command */ - command = ORTE_DAEMON_ADD_LOCAL_PROCS; + /* pack the appropriate add_local_procs command */ + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FIXED_DVM, NULL, OPAL_BOOL)) { + command = ORTE_DAEMON_DVM_ADD_PROCS; + } else { + command = ORTE_DAEMON_ADD_LOCAL_PROCS; + } if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &command, 1, ORTE_DAEMON_CMD))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buffer); @@ -637,9 +644,10 @@ void orte_plm_base_post_launch(int fd, short args, void *cbdata) * it won't register and we need to send the response now. * Otherwise, it is an MPI job and we should wait for it * to register */ - if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_NON_ORTE_JOB, NULL, OPAL_BOOL)) { + if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_NON_ORTE_JOB, NULL, OPAL_BOOL) && + !orte_get_attribute(&jdata->attributes, ORTE_JOB_DVM_JOB, NULL, OPAL_BOOL)) { OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s plm:base:launch job %s is not MPI", + "%s plm:base:launch job %s is MPI", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); goto cleanup; @@ -724,13 +732,16 @@ void orte_plm_base_registered(int fd, short args, void *cbdata) caddy->jdata->state = caddy->job_state; /* if this isn't a dynamic spawn, just cleanup */ - if (ORTE_JOBID_INVALID == jdata->originator.jobid) { + if (ORTE_JOBID_INVALID == jdata->originator.jobid || + orte_get_attribute(&jdata->attributes, ORTE_JOB_NON_ORTE_JOB, NULL, OPAL_BOOL) || + orte_get_attribute(&jdata->attributes, ORTE_JOB_DVM_JOB, NULL, OPAL_BOOL)) { OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, "%s plm:base:launch job %s is not a dynamic spawn", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); goto cleanup; } + /* if it was a dynamic spawn, send the response */ rc = ORTE_SUCCESS; answer = OBJ_NEW(opal_buffer_t); @@ -1529,6 +1540,15 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata) } map = daemons->map; + /* if this job is being launched against a fixed DVM, then there is + * nothing for us to do - the DVM will stand as is */ + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FIXED_DVM, NULL, OPAL_BOOL)) { + /* mark that the daemons have reported so we can proceed */ + daemons->state = ORTE_JOB_STATE_DAEMONS_REPORTED; + map->num_new_daemons = 0; + return ORTE_SUCCESS; + } + /* if this is a dynamic spawn, then we don't make any changes to * the virtual machine unless specifically requested to do so */ diff --git a/orte/mca/rml/rml_types.h b/orte/mca/rml/rml_types.h index 94cc1cfb5c..6b4ea0923b 100644 --- a/orte/mca/rml/rml_types.h +++ b/orte/mca/rml/rml_types.h @@ -12,7 +12,7 @@ * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -151,7 +151,7 @@ BEGIN_C_DECLS /* notifier support */ #define ORTE_RML_TAG_NOTIFIER_HNP 52 -#define ORTE_RML_TAG_CONFIRM_SPAWN 53 +#define ORTE_RML_TAG_NOTIFY_COMPLETE 53 /*** QOS specific RML TAGS ***/ #define ORTE_RML_TAG_OPEN_CHANNEL_REQ 54 diff --git a/orte/mca/state/base/state_base_fns.c b/orte/mca/state/base/state_base_fns.c index 01a28c0dee..00827a3566 100644 --- a/orte/mca/state/base/state_base_fns.c +++ b/orte/mca/state/base/state_base_fns.c @@ -757,10 +757,10 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata) * is maintained! */ if (1 < j) { - if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { - /* this was a debugger daemon. notify that a debugger has detached */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DEBUGGER_DETACH); - } + if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { + /* this was a debugger daemon. notify that a debugger has detached */ + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DEBUGGER_DETACH); + } opal_pointer_array_set_item(orte_job_data, j, NULL); /* ensure the array has a NULL */ OBJ_RELEASE(jdata); } diff --git a/orte/mca/state/dvm/state_dvm.c b/orte/mca/state/dvm/state_dvm.c index 26bed1bdf3..22163c9ed4 100644 --- a/orte/mca/state/dvm/state_dvm.c +++ b/orte/mca/state/dvm/state_dvm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015 Intel, Inc. All rights reserved + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,11 +20,14 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/filem/filem.h" +#include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/iof/iof.h" +#include "orte/mca/odls/odls_types.h" #include "orte/mca/plm/base/base.h" #include "orte/mca/ras/base/base.h" #include "orte/mca/rmaps/base/base.h" #include "orte/mca/routed/routed.h" +#include "orte/util/nidmap.h" #include "orte/util/session_dir.h" #include "orte/runtime/orte_quit.h" @@ -40,8 +43,10 @@ static int init(void); static int finalize(void); /* local functions */ +static void init_complete(int fd, short args, void *cbdata); static void vm_ready(int fd, short args, void *cbata); -void check_complete(int fd, short args, void *cbdata); +static void check_complete(int fd, short args, void *cbdata); +static void cleanup_job(int fd, short args, void *cbdata); /****************** * DVM module - used when mpirun is persistent @@ -86,7 +91,7 @@ static orte_job_state_t launch_states[] = { }; static orte_state_cbfunc_t launch_callbacks[] = { orte_plm_base_setup_job, - orte_plm_base_setup_job_complete, + init_complete, orte_ras_base_allocate, orte_plm_base_allocation_complete, orte_plm_base_daemons_launched, @@ -100,7 +105,7 @@ static orte_state_cbfunc_t launch_callbacks[] = { orte_plm_base_post_launch, orte_plm_base_registered, check_complete, - orte_state_base_cleanup_job, + cleanup_job, orte_quit }; @@ -210,12 +215,105 @@ static void files_ready(int status, void *cbdata) } } -static void vm_ready(int fd, short args, void *cbdata) +static void init_complete(int sd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + /* nothing to do here but move along - if it is the + * daemon job, then next step is allocate */ + if (caddy->jdata->jobid == ORTE_PROC_MY_NAME->jobid) { + ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_ALLOCATE); + } else { + /* next step - position any required files */ + if (ORTE_SUCCESS != orte_filem.preposition_files(caddy->jdata, files_ready, caddy->jdata)) { + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + } + } + OBJ_RELEASE(caddy); +} + +static void vm_ready(int fd, short args, void *cbdata) +{ + orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + int rc; + opal_buffer_t *buf; + orte_daemon_cmd_flag_t command = ORTE_DAEMON_DVM_NIDMAP_CMD; + orte_grpcomm_signature_t *sig; + opal_buffer_t *wireup; + opal_byte_object_t bo, *boptr; + int8_t flag; + int32_t numbytes; + /* if this is my job, then we are done */ if (ORTE_PROC_MY_NAME->jobid == caddy->jdata->jobid) { + /* send the daemon map to every daemon in this DVM - we + * do this here so we don't have to do it for every + * job we are going to launch */ + buf = OBJ_NEW(opal_buffer_t); + /* pack the "load nidmap" cmd */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &command, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buf); + return; + } + /* construct a nodemap with everything in it */ + if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(&bo, false))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buf); + return; + } + + /* store it */ + boptr = &bo; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buf); + return; + } + /* release the data since it has now been copied into our buffer */ + free(bo.bytes); + + /* pack a flag indicating wiring info is provided */ + flag = 1; + opal_dss.pack(buf, &flag, 1, OPAL_INT8); + /* get wireup info for daemons per the selected routing module */ + wireup = OBJ_NEW(opal_buffer_t); + if (ORTE_SUCCESS != (rc = orte_routed.get_wireup_info(wireup))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + OBJ_RELEASE(buf); + return; + } + /* put it in a byte object for xmission */ + opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes); + /* pack the byte object - zero-byte objects are fine */ + bo.size = numbytes; + boptr = &bo; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + OBJ_RELEASE(buf); + return; + } + /* release the data since it has now been copied into our buffer */ + if (NULL != bo.bytes) { + free(bo.bytes); + } + OBJ_RELEASE(wireup); + + /* goes to all daemons */ + sig = OBJ_NEW(orte_grpcomm_signature_t); + sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); + sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid; + sig->signature[0].vpid = ORTE_VPID_WILDCARD; + if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, buf))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buf); + OBJ_RELEASE(sig); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + return; + } + OBJ_RELEASE(buf); /* notify that the vm is ready */ fprintf(stdout, "DVM ready\n"); OBJ_RELEASE(caddy); @@ -234,92 +332,27 @@ static void vm_ready(int fd, short args, void *cbdata) OBJ_RELEASE(caddy); } -void check_complete(int fd, short args, void *cbdata) +static void check_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; orte_job_t *jdata = caddy->jdata; orte_proc_t *proc; int i; - orte_std_cntr_t j; - orte_job_t *job; orte_node_t *node; orte_job_map_t *map; orte_std_cntr_t index; - bool one_still_alive; - orte_vpid_t lowest=0; - int32_t i32, *i32ptr; opal_output_verbose(2, orte_state_base_framework.framework_output, - "%s state:base:check_job_complete on job %s", + "%s state:dvm:check_job_complete on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid)); if (NULL == jdata || jdata->jobid == ORTE_PROC_MY_NAME->jobid) { /* just check to see if the daemons are complete */ OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:base:check_job_complete - received NULL job, checking daemons", + "%s state:dvm:check_job_complete - received NULL job, checking daemons", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - goto CHECK_DAEMONS; - } else { - /* mark the job as terminated, but don't override any - * abnormal termination flags - */ - if (jdata->state < ORTE_JOB_STATE_UNTERMINATED) { - jdata->state = ORTE_JOB_STATE_TERMINATED; - } - } - - /* tell the IOF that the job is complete */ - if (NULL != orte_iof.complete) { - orte_iof.complete(jdata); - } - - i32ptr = &i32; - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32) && !orte_abort_non_zero_exit) { - if (!orte_report_child_jobs_separately || 1 == ORTE_LOCAL_JOBID(jdata->jobid)) { - /* update the exit code */ - ORTE_UPDATE_EXIT_STATUS(lowest); - } - - /* warn user */ - opal_output(orte_clean_output, - "-------------------------------------------------------\n" - "While %s job %s terminated normally, %d %s. Further examination may be required.\n" - "-------------------------------------------------------", - (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "the primary" : "child", - (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid), - i32, (1 == i32) ? "process returned\na non-zero exit code." : - "processes returned\nnon-zero exit codes."); - } - - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:base:check_job_completed declared job %s terminated with state %s - checking all jobs", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jdata->jobid), - orte_job_state_to_str(jdata->state))); - - /* if this job is a continuously operating one, then don't do - * anything further - just return here - */ - if (NULL != jdata && - (orte_get_attribute(&jdata->attributes, ORTE_JOB_CONTINUOUS_OP, NULL, OPAL_BOOL) || - ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RECOVERABLE))) { - goto CHECK_ALIVE; - } - - /* if the job that is being checked is the HNP, then we are - * trying to terminate the orteds. In that situation, we - * do -not- check all jobs - we simply notify the HNP - * that the orteds are complete. Also check special case - * if jdata is NULL - we want - * to definitely declare the job done if the orteds - * have completed, no matter what else may be happening. - * This can happen if a ctrl-c hits in the "wrong" place - * while launching - */ - CHECK_DAEMONS: - if (jdata == NULL || jdata->jobid == ORTE_PROC_MY_NAME->jobid) { if (0 == orte_routed.num_routes()) { /* orteds are done! */ OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, @@ -336,6 +369,18 @@ void check_complete(int fd, short args, void *cbdata) return; } + /* mark the job as terminated, but don't override any + * abnormal termination flags + */ + if (jdata->state < ORTE_JOB_STATE_UNTERMINATED) { + jdata->state = ORTE_JOB_STATE_TERMINATED; + } + + /* tell the IOF that the job is complete */ + if (NULL != orte_iof.complete) { + orte_iof.complete(jdata); + } + /* Release the resources used by this job. Since some errmgrs may want * to continue using resources allocated to the job as part of their * fault recovery procedure, we only do this once the job is "complete". @@ -388,114 +433,25 @@ void check_complete(int fd, short args, void *cbdata) } } - CHECK_ALIVE: - /* now check to see if all jobs are done - trigger notification of this jdata - * object when we find it - */ - one_still_alive = false; - for (j=1; j < orte_job_data->size; j++) { - if (NULL == (job = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, j))) { - /* since we are releasing jdata objects as we - * go, we can no longer assume that the job_data - * array is left justified - */ - continue; - } - /* if this is the job we are checking AND it normally terminated, - * then activate the "notify_completed" state - this will release - * the job state, but is provided so that the HNP main code can - * take alternative actions if desired. If the state is killed_by_cmd, - * then go ahead and release it. We cannot release it if it - * abnormally terminated as mpirun needs the info so it can - * report appropriately to the user - * - * NOTE: do not release the primary job (j=1) so we - * can pretty-print completion message - */ - if (NULL != jdata && job->jobid == jdata->jobid) { - if (jdata->state == ORTE_JOB_STATE_TERMINATED) { - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:base:check_job_completed state is terminated - activating notify", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_NOTIFY_COMPLETED); - one_still_alive = true; - } else if (jdata->state == ORTE_JOB_STATE_KILLED_BY_CMD || - jdata->state == ORTE_JOB_STATE_NOTIFIED) { - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:base:check_job_completed state is killed or notified - cleaning up", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - /* release this object, ensuring that the - * pointer array internal accounting - * is maintained! - */ - if (1 < j) { - if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { - /* this was a debugger daemon. notify that a debugger has detached */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DEBUGGER_DETACH); - } - opal_pointer_array_set_item(orte_job_data, j, NULL); /* ensure the array has a NULL */ - OBJ_RELEASE(jdata); - } - } - continue; - } - /* if the job is flagged to not be monitored, skip it */ - if (ORTE_FLAG_TEST(job, ORTE_JOB_FLAG_DO_NOT_MONITOR)) { - continue; - } - /* when checking for job termination, we must be sure to NOT check - * our own job as it - rather obviously - has NOT terminated! - */ - if (job->num_terminated < job->num_procs) { - /* we have at least one job that is not done yet - we cannot - * just return, though, as we need to ensure we cleanout the - * job data for the job that just completed - */ - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:base:check_job_completed job %s is not terminated (%d:%d)", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(job->jobid), - job->num_terminated, job->num_procs)); - one_still_alive = true; - } - else { - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:base:check_job_completed job %s is terminated (%d vs %d [%s])", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(job->jobid), - job->num_terminated, job->num_procs, - (NULL == jdata) ? "UNKNOWN" : orte_job_state_to_str(jdata->state) )); - } - } - /* if a job is still alive, we just return */ - if (one_still_alive) { + if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { + /* this was a debugger daemon. notify that a debugger has detached */ + OBJ_RETAIN(jdata); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DEBUGGER_DETACH); + } else if (jdata->state != ORTE_JOB_STATE_NOTIFIED) { OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:base:check_job_completed at least one job is not terminated", + "%s state:dvm:check_job_completed state is terminated - activating notify", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - OBJ_RELEASE(caddy); - return; + OBJ_RETAIN(jdata); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_NOTIFY_COMPLETED); + /* mark the job as notified */ + jdata->state = ORTE_JOB_STATE_NOTIFIED; } - /* if we get here, then all jobs are done, so terminate */ - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:base:check_job_completed all jobs terminated", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* stop the job timeout event, if set */ - if (NULL != orte_mpiexec_timeout) { - OBJ_RELEASE(orte_mpiexec_timeout); - orte_mpiexec_timeout = NULL; - } - - /* set the exit status to 0 - this will only happen if it - * wasn't already set by an error condition - */ - ORTE_UPDATE_EXIT_STATUS(0); - - /* order daemon termination - this tells us to cleanup - * our local procs as well as telling remote daemons - * to die - */ - orte_plm.terminate_orteds(); OBJ_RELEASE(caddy); } + +static void cleanup_job(int sd, short args, void *cbdata) +{ + orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + OBJ_RELEASE(caddy); +} diff --git a/orte/orted/Makefile.am b/orte/orted/Makefile.am index 5a8f63409e..f4f87665f5 100644 --- a/orte/orted/Makefile.am +++ b/orte/orted/Makefile.am @@ -10,7 +10,8 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -23,10 +24,12 @@ dist_ortedata_DATA += orted/help-orted.txt headers += \ - orted/orted.h + orted/orted.h \ + orted/orted_submit.h lib@ORTE_LIB_PREFIX@open_rte_la_SOURCES += \ orted/orted_main.c \ - orted/orted_comm.c + orted/orted_comm.c \ + orted/orted_submit.c include orted/pmix/Makefile.am diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index 4c26488f6e..66fdca5dab 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -95,10 +95,8 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, orte_std_cntr_t n; int32_t signal; orte_jobid_t job; - orte_rml_tag_t target_tag; char *contact_info; opal_buffer_t *answer; - orte_rml_cmd_flag_t rml_cmd; orte_job_t *jdata; orte_process_name_t proc, proc2; orte_process_name_t *return_addr; @@ -228,6 +226,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, /**** ADD_LOCAL_PROCS ****/ case ORTE_DAEMON_ADD_LOCAL_PROCS: + case ORTE_DAEMON_DVM_ADD_PROCS: if (orte_debug_daemons_flag) { opal_output(0, "%s orted_cmd: received add_local_procs", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); @@ -340,87 +339,6 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, } break; - /**** DELIVER A MESSAGE TO THE LOCAL PROCS ****/ - case ORTE_DAEMON_MESSAGE_LOCAL_PROCS: - if (orte_debug_daemons_flag) { - opal_output(0, "%s orted_cmd: received message_local_procs", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - } - - /* unpack the jobid of the procs that are to receive the message */ - n = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job, &n, ORTE_JOBID))) { - ORTE_ERROR_LOG(ret); - goto CLEANUP; - } - - /* unpack the tag where we are to deliver the message */ - n = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &target_tag, &n, ORTE_RML_TAG))) { - ORTE_ERROR_LOG(ret); - goto CLEANUP; - } - - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, - "%s orted:comm:message_local_procs delivering message to job %s tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(job), (int)target_tag)); - - relay_msg = OBJ_NEW(opal_buffer_t); - opal_dss.copy_payload(relay_msg, buffer); - - /* if job=my_jobid, then this message is for us and not for our children */ - if (ORTE_PROC_MY_NAME->jobid == job) { - /* if the target tag is our xcast_barrier or rml_update, then we have - * to handle the message as a special case. The RML has logic in it - * intended to make it easier to use. This special logic mandates that - * any message we "send" actually only goes into the queue for later - * transmission. Thus, since we are already in a recv when we enter - * the "process_commands" function, any attempt to "send" the relay - * buffer to ourselves will only be added to the queue - it won't - * actually be delivered until *after* we conclude the processing - * of the current recv. - * - * The problem here is that, for messages where we need to relay - * them along the orted chain, the rml_update - * message contains contact info we may well need in order to do - * the relay! So we need to process those messages immediately. - * The only way to accomplish that is to (a) detect that the - * buffer is intended for those tags, and then (b) process - * those buffers here. - * - */ - if (ORTE_RML_TAG_RML_INFO_UPDATE == target_tag) { - n = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(relay_msg, &rml_cmd, &n, ORTE_RML_CMD))) { - ORTE_ERROR_LOG(ret); - goto CLEANUP; - } - /* initialize the routes to my peers - this will update the number - * of daemons in the system (i.e., orte_process_info.num_procs) as - * this might have changed - */ - if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, relay_msg))) { - ORTE_ERROR_LOG(ret); - goto CLEANUP; - } - } else { - /* just deliver it to ourselves */ - if ((ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, relay_msg, target_tag, - orte_rml_send_callback, NULL)) < 0) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(relay_msg); - } - } - } else { - /* must be for our children - deliver the message */ - if (ORTE_SUCCESS != (ret = orte_odls.deliver_message(job, relay_msg, target_tag))) { - ORTE_ERROR_LOG(ret); - } - OBJ_RELEASE(relay_msg); - } - break; - /**** EXIT COMMAND ****/ case ORTE_DAEMON_EXIT_CMD: if (orte_debug_daemons_flag) { @@ -518,22 +436,22 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, opal_output(0, "%s orted_cmd: received spawn job", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); } - answer = OBJ_NEW(opal_buffer_t); - job = ORTE_JOBID_INVALID; /* can only process this if we are the HNP */ if (ORTE_PROC_IS_HNP) { /* unpack the job data */ n = 1; if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &jdata, &n, ORTE_JOB))) { ORTE_ERROR_LOG(ret); - goto ANSWER_LAUNCH; + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_LAUNCH); + break; } /* point the originator to the sender */ jdata->originator = *sender; /* assign a jobid to it */ if (ORTE_SUCCESS != (ret = orte_plm_base_create_jobid(jdata))) { ORTE_ERROR_LOG(ret); - goto ANSWER_LAUNCH; + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_LAUNCH); + break; } /* store it on the global job data pool */ opal_pointer_array_set_item(orte_job_data, ORTE_LOCAL_JOBID(jdata->jobid), jdata); @@ -550,7 +468,8 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, if (ORTE_SUCCESS != (ret = opal_dss.pack(iofbuf, &ioftag, 1, ORTE_IOF_TAG))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(iofbuf); - goto ANSWER_LAUNCH; + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_LAUNCH); + break; } /* pack the name of the source */ source.jobid = jdata->jobid; @@ -558,13 +477,15 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, if (ORTE_SUCCESS != (ret = opal_dss.pack(iofbuf, &source, 1, ORTE_NAME))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(iofbuf); - goto ANSWER_LAUNCH; + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_LAUNCH); + break; } /* pack the sender as the sink */ if (ORTE_SUCCESS != (ret = opal_dss.pack(iofbuf, sender, 1, ORTE_NAME))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(iofbuf); - goto ANSWER_LAUNCH; + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_LAUNCH); + break; } /* send the buffer to our IOF */ orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, iofbuf, ORTE_RML_TAG_IOF_HNP, @@ -578,22 +499,39 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, /* now launch the job - this will just push it into our state machine */ if (ORTE_SUCCESS != (ret = orte_plm.spawn(jdata))) { ORTE_ERROR_LOG(ret); - goto ANSWER_LAUNCH; + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_LAUNCH); + break; } - job = jdata->jobid; } - ANSWER_LAUNCH: - /* pack the jobid to be returned */ - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &job, 1, ORTE_JOBID))) { + break; + + + /**** TERMINATE JOB COMMAND ****/ + case ORTE_DAEMON_TERMINATE_JOB_CMD: + + /* unpack the jobid */ + n = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job, &n, ORTE_JOBID))) { ORTE_ERROR_LOG(ret); - OBJ_RELEASE(answer); goto CLEANUP; } - /* return response */ - if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_CONFIRM_SPAWN, - orte_rml_send_callback, NULL))) { + + /* look up job data object */ + if (NULL == (jdata = orte_get_job_data_object(job))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + goto CLEANUP; + } + + /* mark the job as (being) cancelled so that we can distinguish it later */ + if (ORTE_SUCCESS != (ret = orte_set_attribute(&jdata->attributes, ORTE_JOB_CANCELLED, + ORTE_ATTR_LOCAL, NULL, OPAL_BOOL))) { ORTE_ERROR_LOG(ret); - OBJ_RELEASE(answer); + goto CLEANUP; + } + + if (ORTE_SUCCESS != (ret = orte_plm.terminate_job(job))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; } break; @@ -1165,8 +1103,6 @@ static char *get_orted_comm_cmd_str(int command) return strdup("ORTE_DAEMON_EXIT_CMD"); case ORTE_DAEMON_PROCESS_AND_RELAY_CMD: return strdup("ORTE_DAEMON_PROCESS_AND_RELAY_CMD"); - case ORTE_DAEMON_MESSAGE_LOCAL_PROCS: - return strdup("ORTE_DAEMON_MESSAGE_LOCAL_PROCS"); case ORTE_DAEMON_NULL_CMD: return strdup("NULL"); @@ -1185,6 +1121,9 @@ static char *get_orted_comm_cmd_str(int command) return strdup("ORTE_DAEMON_HALT_VM_CMD"); case ORTE_DAEMON_HALT_DVM_CMD: return strdup("ORTE_DAEMON_HALT_DVM_CMD"); + case ORTE_DAEMON_REPORT_JOB_COMPLETE: + return strdup("ORTE_DAEMON_REPORT_JOB_COMPLETE"); + case ORTE_DAEMON_TOP_CMD: return strdup("ORTE_DAEMON_TOP_CMD"); case ORTE_DAEMON_NAME_REQ_CMD: @@ -1198,8 +1137,11 @@ static char *get_orted_comm_cmd_str(int command) return strdup("ORTE_DAEMON_PROCESS_CMD"); case ORTE_DAEMON_ABORT_PROCS_CALLED: return strdup("ORTE_DAEMON_ABORT_PROCS_CALLED"); - case ORTE_DAEMON_NEW_COLL_ID: - return strdup("ORTE_DAEMON_NEW_COLL_ID"); + + case ORTE_DAEMON_DVM_NIDMAP_CMD: + return strdup("ORTE_DAEMON_DVM_NIDMAP_CMD"); + case ORTE_DAEMON_DVM_ADD_PROCS: + return strdup("ORTE_DAEMON_DVM_ADD_PROCS"); default: return strdup("Unknown Command!"); diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c new file mode 100644 index 0000000000..eea9384ffd --- /dev/null +++ b/orte/orted/orted_submit.c @@ -0,0 +1,1808 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2008 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include +#include +#ifdef HAVE_STRINGS_H +#include +#endif /* HAVE_STRINGS_H */ +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_PARAM_H +#include +#endif +#include +#include +#include +#ifdef HAVE_SYS_TYPES_H +#include +#endif /* HAVE_SYS_TYPES_H */ +#ifdef HAVE_SYS_WAIT_H +#include +#endif /* HAVE_SYS_WAIT_H */ +#ifdef HAVE_SYS_TIME_H +#include +#endif /* HAVE_SYS_TIME_H */ +#include +#ifdef HAVE_SYS_STAT_H +#include +#endif + +#include "opal/dss/dss.h" +#include "opal/mca/event/event.h" +#include "opal/mca/installdirs/installdirs.h" +#include "opal/mca/hwloc/base/base.h" +#include "opal/mca/base/base.h" +#include "opal/util/argv.h" +#include "opal/util/output.h" +#include "opal/util/basename.h" +#include "opal/util/cmd_line.h" +#include "opal/util/opal_environ.h" +#include "opal/util/opal_getcwd.h" +#include "opal/util/show_help.h" +#include "opal/util/fd.h" +#include "opal/sys/atomic.h" +#if OPAL_ENABLE_FT_CR == 1 +#include "opal/runtime/opal_cr.h" +#endif + +#include "opal/version.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_info_support.h" +#include "opal/util/os_path.h" +#include "opal/util/path.h" +#include "opal/class/opal_pointer_array.h" +#include "opal/dss/dss.h" + +#include "orte/mca/odls/odls_types.h" +#include "orte/mca/plm/plm.h" +#include "orte/mca/rmaps/rmaps_types.h" +#include "orte/mca/rmaps/base/base.h" + +#include "orte/mca/schizo/schizo.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/rml/base/rml_contact.h" +#include "orte/mca/routed/routed.h" + +#include "orte/runtime/runtime.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_wait.h" +#include "orte/runtime/orte_quit.h" +#include "orte/util/show_help.h" + +#include "orted_submit.h" +/* + * Globals + */ +///* +// * Globals +// */ +static struct { + bool help; + bool version; + char *report_pid; + char *stdin_target; + bool index_argv; + bool preload_binaries; + char *preload_files; + char *appfile; + int num_procs; + char *hnp; + char *wdir; + bool set_cwd_to_session_dir; + char *path; + bool enable_recovery; + char *personality; + char *prefix; + bool terminate; + bool nolocal; + bool no_oversubscribe; + bool oversubscribe; + int cpus_per_proc; + bool pernode; + int npernode; + bool use_hwthreads_as_cpus; + int npersocket; + char *mapping_policy; + char *ranking_policy; + char *binding_policy; + bool report_bindings; + char *slot_list; + bool debug; + bool run_as_root; +} myglobals; + +static char **global_mca_env = NULL; +static orte_std_cntr_t total_num_apps = 0; +static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT; +static opal_pointer_array_t tool_jobs; +static opal_cmd_line_t *cmd_line=NULL; +static bool mycmdline = false; + +static opal_cmd_line_init_t cmd_line_init[] = { + /* Various "obvious" options */ + { NULL, 'h', NULL, "help", 0, + &myglobals.help, OPAL_CMD_LINE_TYPE_BOOL, + "This help message" }, + { NULL, 'V', NULL, "version", 0, + &myglobals.version, OPAL_CMD_LINE_TYPE_BOOL, + "Print version and exit" }, + + /* select stdin option */ + { NULL, '\0', "stdin", "stdin", 1, + &myglobals.stdin_target, OPAL_CMD_LINE_TYPE_STRING, + "Specify procs to receive stdin [rank, all, none] (default: 0, indicating rank 0)" }, + + /* request that argv[0] be indexed */ + { NULL, '\0', "index-argv-by-rank", "index-argv-by-rank", 0, + &myglobals.index_argv, OPAL_CMD_LINE_TYPE_BOOL, + "Uniquely index argv[0] for each process using its rank" }, + + /* Preload the binary on the remote machine */ + { NULL, 's', NULL, "preload-binary", 0, + &myglobals.preload_binaries, OPAL_CMD_LINE_TYPE_BOOL, + "Preload the binary on the remote machine before starting the remote process." }, + + /* Preload files on the remote machine */ + { NULL, '\0', NULL, "preload-files", 1, + &myglobals.preload_files, OPAL_CMD_LINE_TYPE_STRING, + "Preload the comma separated list of files to the remote machines current working directory before starting the remote process." }, + + /* Use an appfile */ + { NULL, '\0', NULL, "app", 1, + &myglobals.appfile, OPAL_CMD_LINE_TYPE_STRING, + "Provide an appfile; ignore all other command line options" }, + + /* Number of processes; -c, -n, --n, -np, and --np are all + synonyms */ + { NULL, 'c', "np", "np", 1, + &myglobals.num_procs, OPAL_CMD_LINE_TYPE_INT, + "Number of processes to run" }, + { NULL, '\0', "n", "n", 1, + &myglobals.num_procs, OPAL_CMD_LINE_TYPE_INT, + "Number of processes to run" }, + + /* uri of the dvm, or at least where to get it */ + { NULL, '\0', "hnp", "hnp", 1, + &myglobals.hnp, OPAL_CMD_LINE_TYPE_STRING, + "Specify the URI of the Open MPI server, or the name of the file (specified as file:filename) that contains that info" }, + + /* Set a hostfile */ + { NULL, '\0', "hostfile", "hostfile", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Provide a hostfile" }, + { NULL, '\0', "machinefile", "machinefile", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Provide a hostfile" }, + { "orte_default_hostfile", '\0', "default-hostfile", "default-hostfile", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Provide a default hostfile" }, + { "opal_if_do_not_resolve", '\0', "do-not-resolve", "do-not-resolve", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Do not attempt to resolve interfaces" }, + + /* Export environment variables; potentially used multiple times, + so it does not make sense to set into a variable */ + { NULL, 'x', NULL, NULL, 1, + NULL, OPAL_CMD_LINE_TYPE_NULL, + "Export an environment variable, optionally specifying a value (e.g., \"-x foo\" exports the environment variable foo and takes its value from the current environment; \"-x foo=bar\" exports the environment variable name foo and sets its value to \"bar\" in the started processes)" }, + + /* Mapping controls */ + { NULL, 'H', "host", "host", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "List of hosts to invoke processes on" }, + { NULL, '\0', "nolocal", "nolocal", 0, + &myglobals.nolocal, OPAL_CMD_LINE_TYPE_BOOL, + "Do not run any MPI applications on the local node" }, + { NULL, '\0', "nooversubscribe", "nooversubscribe", 0, + &myglobals.no_oversubscribe, OPAL_CMD_LINE_TYPE_BOOL, + "Nodes are not to be oversubscribed, even if the system supports such operation"}, + { NULL, '\0', "oversubscribe", "oversubscribe", 0, + &myglobals.oversubscribe, OPAL_CMD_LINE_TYPE_BOOL, + "Nodes are allowed to be oversubscribed, even on a managed system, and overloading of processing elements"}, + { NULL, '\0', "cpus-per-proc", "cpus-per-proc", 1, + &myglobals.cpus_per_proc, OPAL_CMD_LINE_TYPE_INT, + "Number of cpus to use for each process [default=1]" }, + + /* Nperxxx options that do not require topology and are always + * available - included for backwards compatibility + */ + { NULL, '\0', "pernode", "pernode", 0, + &myglobals.pernode, OPAL_CMD_LINE_TYPE_BOOL, + "Launch one process per available node" }, + { NULL, '\0', "npernode", "npernode", 1, + &myglobals.npernode, OPAL_CMD_LINE_TYPE_INT, + "Launch n processes per node on all allocated nodes" }, + { NULL, '\0', "N", NULL, 1, + &myglobals.npernode, OPAL_CMD_LINE_TYPE_INT, + "Launch n processes per node on all allocated nodes (synonym for npernode)" }, + + /* declare hardware threads as independent cpus */ + { NULL, '\0', "use-hwthread-cpus", "use-hwthread-cpus", 0, + &myglobals.use_hwthreads_as_cpus, OPAL_CMD_LINE_TYPE_BOOL, + "Use hardware threads as independent cpus" }, + + /* include npersocket for backwards compatibility */ + { NULL, '\0', "npersocket", "npersocket", 1, + &myglobals.npersocket, OPAL_CMD_LINE_TYPE_INT, + "Launch n processes per socket on all allocated nodes" }, + + /* Mapping options */ + { NULL, '\0', NULL, "map-by", 1, + &myglobals.mapping_policy, OPAL_CMD_LINE_TYPE_STRING, + "Mapping Policy [slot | hwthread | core | socket (default) | numa | board | node]" }, + + /* Ranking options */ + { NULL, '\0', NULL, "rank-by", 1, + &myglobals.ranking_policy, OPAL_CMD_LINE_TYPE_STRING, + "Ranking Policy [slot (default) | hwthread | core | socket | numa | board | node]" }, + + /* Binding options */ + { NULL, '\0', NULL, "bind-to", 1, + &myglobals.binding_policy, OPAL_CMD_LINE_TYPE_STRING, + "Policy for binding processes. Allowed values: none, hwthread, core, l1cache, l2cache, l3cache, socket, numa, board (\"none\" is the default when oversubscribed, \"core\" is the default when np<=2, and \"socket\" is the default when np>2). Allowed qualifiers: overload-allowed, if-supported" }, + + { NULL, '\0', "report-bindings", "report-bindings", 0, + &myglobals.report_bindings, OPAL_CMD_LINE_TYPE_BOOL, + "Whether to report process bindings to stderr" }, + + /* slot list option */ + { NULL, '\0', "slot-list", "slot-list", 1, + &myglobals.slot_list, OPAL_CMD_LINE_TYPE_STRING, + "List of processor IDs to bind processes to [default=NULL]"}, + + /* mpiexec-like arguments */ + { NULL, '\0', "wdir", "wdir", 1, + &myglobals.wdir, OPAL_CMD_LINE_TYPE_STRING, + "Set the working directory of the started processes" }, + { NULL, '\0', "wd", "wd", 1, + &myglobals.wdir, OPAL_CMD_LINE_TYPE_STRING, + "Synonym for --wdir" }, + { NULL, '\0', "set-cwd-to-session-dir", "set-cwd-to-session-dir", 0, + &myglobals.set_cwd_to_session_dir, OPAL_CMD_LINE_TYPE_BOOL, + "Set the working directory of the started processes to their session directory" }, + { NULL, '\0', "path", "path", 1, + &myglobals.path, OPAL_CMD_LINE_TYPE_STRING, + "PATH to be used to look for executables to start processes" }, + + { NULL, '\0', "enable-recovery", "enable-recovery", 0, + &myglobals.enable_recovery, OPAL_CMD_LINE_TYPE_BOOL, + "Enable recovery (resets all recovery options to on)" }, + + { NULL, '\0', "personality", "personality", 1, + &myglobals.personality, OPAL_CMD_LINE_TYPE_STRING, + "Programming model/language being used (default=\"ompi\")" }, + + { NULL, 'd', "debug-devel", "debug-devel", 0, + &myglobals.debug, OPAL_CMD_LINE_TYPE_BOOL, + "Enable debugging of OpenRTE" }, + + { NULL, '\0', "allow-run-as-root", "allow-run-as-root", 0, + &myglobals.run_as_root, OPAL_CMD_LINE_TYPE_BOOL, + "Allow execution as root (STRONGLY DISCOURAGED)" }, + + /* End of list */ + { NULL, '\0', NULL, NULL, 0, + NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } +}; + +/* + * Local functions + */ +static int create_app(int argc, char* argv[], + orte_job_t *jdata, + orte_app_context_t **app, + bool *made_app, char ***app_env); +static int init_globals(void); +static int parse_globals(int argc, char* argv[], opal_cmd_line_t *cmd_line); +static int parse_locals(orte_job_t *jdata, int argc, char* argv[]); +static void set_classpath_jar_file(orte_app_context_t *app, int index, char *jarfile); +static int parse_appfile(orte_job_t *jdata, char *filename, char ***env); +static void orte_timeout_wakeup(int sd, short args, void *cbdata); +static void launch_recv(int status, orte_process_name_t* sender, + opal_buffer_t *buffer, + orte_rml_tag_t tag, void *cbdata); +static void complete_recv(int status, orte_process_name_t* sender, + opal_buffer_t *buffer, + orte_rml_tag_t tag, void *cbdata); + +/* local objects */ +typedef struct { + opal_object_t super; + orte_job_t *jdata; + int index; + orte_submit_cbfunc_t launch_cb; + void *launch_cbdata; + orte_submit_cbfunc_t complete_cb; + void *complete_cbdata; +} trackr_t; +static void tcon(trackr_t *p) +{ + p->jdata = NULL; + p->launch_cb = NULL; + p->launch_cbdata = NULL; + p->complete_cb = NULL; + p->complete_cbdata = NULL; +} +static void tdes(trackr_t *p) +{ + if (NULL != p->jdata) { + OBJ_RELEASE(p->jdata); + } +} +static OBJ_CLASS_INSTANCE(trackr_t, + opal_object_t, + tcon, tdes); + +int orte_submit_init(int argc, char *argv[], + opal_cmd_line_t *opts) +{ + int rc; + + OBJ_CONSTRUCT(&tool_jobs, opal_pointer_array_t); + opal_pointer_array_init(&tool_jobs, 256, INT_MAX, 128); + + /* find our basename (the name of the executable) so that we can + use it in pretty-print error messages */ + orte_basename = opal_basename(argv[0]); + + /* setup the cmd line only once */ + if (NULL != opts) { + /* just add ours to the end */ + if (OPAL_SUCCESS != (rc = opal_cmd_line_add(opts, cmd_line_init))) { + return rc; + } + cmd_line = opts; + mycmdline = false; + } else { + /* create our own */ + cmd_line = OBJ_NEW(opal_cmd_line_t); + opal_cmd_line_create(cmd_line, cmd_line_init); + mca_base_cmd_line_setup(cmd_line); + mycmdline = true; + } + + /* parse the cmd line - we do this here to get the initial + * MCA parameters that might impact our own init */ + if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(cmd_line, true, + argc, argv)) ) { + if (OPAL_ERR_SILENT != rc) { + fprintf(stderr, "%s: command line error (%s)\n", argv[0], + opal_strerror(rc)); + } + return rc; + } + + /* print version if requested. Do this before check for help so + that --version --help works as one might expect. */ + if (myglobals.version) { + char *str; + str = opal_info_make_version_str("all", + OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION, + OPAL_GREEK_VERSION, + OPAL_REPO_REV); + if (NULL != str) { + fprintf(stdout, "%s %s\n\nReport bugs to %s\n", + orte_basename, str, PACKAGE_BUGREPORT); + free(str); + } + return ORTE_ERR_SILENT; + } + + /* process MCA/GMCA parameters */ + if (OPAL_SUCCESS != (rc = mca_base_cmd_line_process_args(cmd_line, &environ, &environ))) { + return rc; + } + + /* Need to initialize OPAL so that install_dirs are filled in */ + if (OPAL_SUCCESS != (rc = opal_init(&argc, &argv))) { + OBJ_DESTRUCT(&cmd_line); + return rc; + } + + /* Check for help request */ + if (myglobals.help) { + char *str, *args = NULL; + char *project_name = NULL; + + if (0 == strcmp(orte_basename, "mpirun")) { + project_name = "Open MPI"; + } else { + project_name = "OpenRTE"; + } + args = opal_cmd_line_get_usage_msg(cmd_line); + str = opal_show_help_string("help-orterun.txt", "orterun:usage", false, + orte_basename, project_name, OPAL_VERSION, + orte_basename, args, + PACKAGE_BUGREPORT); + if (NULL != str) { + printf("%s", str); + free(str); + } + free(args); + + /* If someone asks for help, that should be all we do */ + exit(0); + } + + /* if they didn't point us at an HNP, that's an error */ + if (NULL == myglobals.hnp) { + fprintf(stderr, "%s submit: required option --hnp not provided\n", orte_basename); + return ORTE_ERROR; + } + + if (0 == strncasecmp(myglobals.hnp, "file", strlen("file"))) { + char input[1024], *filename; + FILE *fp; + + /* it is a file - get the filename */ + filename = strchr(myglobals.hnp, ':'); + if (NULL == filename) { + /* filename is not correctly formatted */ + orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "uri", myglobals.hnp); + exit(1); + } + ++filename; /* space past the : */ + + if (0 >= strlen(filename)) { + /* they forgot to give us the name! */ + orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "uri", myglobals.hnp); + exit(1); + } + + /* open the file and extract the uri */ + fp = fopen(filename, "r"); + if (NULL == fp) { /* can't find or read file! */ + orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-access", true, myglobals.hnp); + exit(1); + } + if (NULL == fgets(input, 1024, fp)) { + /* something malformed about file */ + fclose(fp); + orte_show_help("help-orte-top.txt", "orte-top:hnp-file-bad", true, myglobals.hnp); + exit(1); + } + fclose(fp); + input[strlen(input)-1] = '\0'; /* remove newline */ + /* construct the target hnp info */ + opal_setenv("OMPI_MCA_orte_hnp_uri", input, true, &environ); + } else { + /* should just be the uri itself - construct the target hnp info */ + opal_setenv("OMPI_MCA_orte_hnp_uri", myglobals.hnp, true, &environ); + } + + /* Setup MCA params */ + orte_register_params(); + + /* we are never allowed to operate as a distributed tool, + * so insist on the ess/tool component */ + opal_setenv("OMPI_MCA_ess", "tool", true, &environ); + + if (myglobals.debug) { + orte_devel_level_output = true; + } + + /* Initialize our Open RTE environment + * Set the flag telling orte_init that I am NOT a + * singleton, but am "infrastructure" - prevents setting + * up incorrect infrastructure that only a singleton would + * require + */ + if (ORTE_SUCCESS != (rc = orte_init(&argc, &argv, ORTE_PROC_TOOL))) { + /* cannot call ORTE_ERROR_LOG as it could be the errmgr + * never got loaded! + */ + return rc; + } + /* finalize OPAL. As it was opened again from orte_init->opal_init + * we continue to have a reference count on it. So we have to finalize it twice... + */ + opal_finalize(); + + /* clear the ess param from the environment so our children + * don't pick it up */ + opal_unsetenv("OMPI_MCA_ess", &environ); + + /* set the info in our contact table */ + orte_rml.set_contact_info(orte_process_info.my_hnp_uri); + /* extract the name */ + if (ORTE_SUCCESS != orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, ORTE_PROC_MY_HNP, NULL)) { + orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri); + exit(1); + } + /* set the route to be direct */ + if (ORTE_SUCCESS != orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) { + orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri); + orte_finalize(); + exit(1); + } + + /* set the target hnp as our lifeline so we will terminate if it exits */ + orte_routed.set_lifeline(ORTE_PROC_MY_HNP); + + /* setup to listen for HNP response to my commands */ + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFY_COMPLETE, + ORTE_RML_PERSISTENT, complete_recv, NULL); + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_LAUNCH_RESP, + ORTE_RML_PERSISTENT, launch_recv, NULL); + + return ORTE_SUCCESS; +} + + +void orte_submit_finalize(void) +{ + trackr_t *trk; + int i; + + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_LAUNCH_RESP); + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFY_COMPLETE); + for (i=0; i < tool_jobs.size; i++) { + if (NULL != (trk = (trackr_t*)opal_pointer_array_get_item(&tool_jobs, i))) { + OBJ_RELEASE(trk); + } + } + OBJ_DESTRUCT(&tool_jobs); + + /* destruct the cmd line object */ + if (mycmdline) { + OBJ_RELEASE(cmd_line); + } +} + +int orte_submit_cancel(int index) { + + int rc; + trackr_t *trk; + opal_buffer_t *req; + orte_daemon_cmd_flag_t cmd = ORTE_DAEMON_TERMINATE_JOB_CMD; + + /* get the tracker */ + if (NULL == (trk = (trackr_t*)opal_pointer_array_get_item(&tool_jobs, index))) { + opal_output(0, "TRACKER ID %d RETURNED INDEX TO NULL OBJECT", index); + return ORTE_ERROR; + } + + /* create and send request with command and jobid */ + req = OBJ_NEW(opal_buffer_t); + if (OPAL_SUCCESS != (rc = opal_dss.pack(req, &cmd, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (OPAL_SUCCESS != (rc = opal_dss.pack(req, &trk->jdata->jobid, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON, + orte_rml_send_callback, NULL); + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(req); + return rc; + } + + return ORTE_ERR_OP_IN_PROGRESS; +} + + +int orte_submit_halt(void) +{ + int rc; + opal_buffer_t *req; + orte_daemon_cmd_flag_t cmd = ORTE_DAEMON_HALT_DVM_CMD; + + req = OBJ_NEW(opal_buffer_t); + if (OPAL_SUCCESS != (rc = opal_dss.pack(req, &cmd, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + return rc; + } + rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, + ORTE_RML_TAG_DAEMON, + orte_rml_send_callback, NULL); + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(req); + return rc; + } + + return ORTE_ERR_OP_IN_PROGRESS; +} + +// +// The real thing +// +int orte_submit_job(char *argv[], int *index, + orte_submit_cbfunc_t launch_cb, + void *launch_cbdata, + orte_submit_cbfunc_t complete_cb, + void *complete_cbdata) +{ + opal_buffer_t *req; + int rc; + orte_daemon_cmd_flag_t cmd = ORTE_DAEMON_SPAWN_JOB_CMD; + char *param; + orte_job_t *jdata = NULL; + trackr_t *trk; + int argc; + + /* reset the globals every time thru as the argv + * will modify them */ + memset(&myglobals, 0, sizeof(myglobals)); + argc = opal_argv_count(argv); + + /* parse the cmd line - do this every time thru so we can + * repopulate the globals */ + if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(cmd_line, true, + argc, argv)) ) { + if (OPAL_ERR_SILENT != rc) { + fprintf(stderr, "%s: command line error (%s)\n", argv[0], + opal_strerror(rc)); + } + return rc; + } + + /* Check for some "global" command line params */ + parse_globals(argc, argv, cmd_line); + + /* default our personality to OMPI */ + if (NULL == myglobals.personality) { + myglobals.personality = strdup("ompi"); + } + + /* create a new job object to hold the info for this one - the + * jobid field will be filled in by the PLM when the job is + * launched + */ + jdata = OBJ_NEW(orte_job_t); + if (NULL == jdata) { + /* cannot call ORTE_ERROR_LOG as the errmgr + * hasn't been loaded yet! + */ + return ORTE_ERR_OUT_OF_RESOURCE; + } + jdata->personality = strdup(myglobals.personality); + trk = OBJ_NEW(trackr_t); + trk->jdata = jdata; + trk->launch_cb = launch_cb; + trk->launch_cbdata = launch_cbdata; + trk->complete_cb = complete_cb; + trk->complete_cbdata = complete_cbdata; + trk->index = opal_pointer_array_add(&tool_jobs, trk); + + + /* pass our tracker ID */ + orte_set_attribute(&jdata->attributes, ORTE_JOB_ROOM_NUM, ORTE_ATTR_GLOBAL, &trk->index, OPAL_INT); + /* flag that we are using the DVM */ + orte_set_attribute(&jdata->attributes, ORTE_JOB_DVM_JOB, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); + /* flag that the allocation is static - i.e., the DVM is not allowed + * to be adjusted once started, and all unused nodes are to be + * removed from the node pool */ + orte_set_attribute(&jdata->attributes, ORTE_JOB_FIXED_DVM, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); + + /* check what user wants us to do with stdin */ + if (NULL != myglobals.stdin_target) { + if (0 == strcmp(myglobals.stdin_target, "all")) { + jdata->stdin_target = ORTE_VPID_WILDCARD; + } else if (0 == strcmp(myglobals.stdin_target, "none")) { + jdata->stdin_target = ORTE_VPID_INVALID; + } else { + jdata->stdin_target = strtoul(myglobals.stdin_target, NULL, 10); + } + } + + /* if we want the argv's indexed, indicate that */ + if (myglobals.index_argv) { + orte_set_attribute(&jdata->attributes, ORTE_JOB_INDEX_ARGV, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); + } + + /* Parse each app, adding it to the job object */ + parse_locals(jdata, argc, argv); + + /* create the map object to communicate policies */ + jdata->map = OBJ_NEW(orte_job_map_t); + + if (NULL != myglobals.mapping_policy) { + if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&jdata->map->mapping, NULL, myglobals.mapping_policy))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } else if (myglobals.pernode) { + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_PPR); + ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_GIVEN); + /* define the ppr */ + jdata->map->ppr = strdup("1:node"); + } else if (0 < myglobals.npernode) { + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_PPR); + ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_GIVEN); + /* define the ppr */ + (void)asprintf(&jdata->map->ppr, "%d:node", myglobals.npernode); + } + if (NULL != myglobals.ranking_policy) { + if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_ranking_policy(&jdata->map->ranking, + jdata->map->mapping, + myglobals.ranking_policy))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + if (NULL != myglobals.binding_policy) { + if (ORTE_SUCCESS != (rc = opal_hwloc_base_set_binding_policy(&jdata->map->binding, + myglobals.binding_policy))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + + /* if they asked for nolocal, mark it so */ + if (myglobals.nolocal) { + ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL); + } + if (myglobals.no_oversubscribe) { + ORTE_UNSET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); + } + if (myglobals.oversubscribe) { + ORTE_UNSET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); + } + if (myglobals.report_bindings) { + orte_set_attribute(&jdata->attributes, ORTE_JOB_REPORT_BINDINGS, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); + } + if (myglobals.slot_list) { + orte_set_attribute(&jdata->attributes, ORTE_JOB_SLOT_LIST, ORTE_ATTR_GLOBAL, myglobals.slot_list, OPAL_STRING); + } + + if (0 == jdata->num_apps) { + /* This should never happen -- this case should be caught in + create_app(), but let's just double check... */ + orte_show_help("help-orterun.txt", "orterun:nothing-to-do", + true, orte_basename); + return ORTE_ERROR_DEFAULT_EXIT_CODE; + } + + /* check for a job timeout specification, to be provided in seconds + * as that is what MPICH used + */ + if (NULL != (param = getenv("MPIEXEC_TIMEOUT"))) { + if (NULL == (orte_mpiexec_timeout = OBJ_NEW(orte_timer_t))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_OUT_OF_RESOURCE); + //goto DONE; + } + orte_mpiexec_timeout->tv.tv_sec = strtol(param, NULL, 10); + orte_mpiexec_timeout->tv.tv_usec = 0; + opal_event_evtimer_set(orte_event_base, orte_mpiexec_timeout->ev, + orte_timeout_wakeup, jdata); + opal_event_set_priority(orte_mpiexec_timeout->ev, ORTE_ERROR_PRI); + opal_event_evtimer_add(orte_mpiexec_timeout->ev, &orte_mpiexec_timeout->tv); + } + + /* if recovery was disabled on the cmd line, do so */ + if (myglobals.enable_recovery) { + ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_RECOVERABLE); + } + + // pack the ORTE_DAEMON_SPAWN_JOB_CMD command and job object and send to HNP at tag ORTE_RML_TAG_DAEMON + req = OBJ_NEW(opal_buffer_t); + if (OPAL_SUCCESS != (rc = opal_dss.pack(req, &cmd, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (OPAL_SUCCESS != (rc = opal_dss.pack(req, &jdata, 1, ORTE_JOB))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (OPAL_SUCCESS != (rc = opal_dss.pack(req, &trk->index, 1, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL); + + /* Inform the caller of the tracker index if they passed a index pointer */ + if (NULL != index) + *index = trk->index; + + return ORTE_SUCCESS; + +} + + +static int init_globals(void) +{ + /* Reset the other fields every time */ + myglobals.help = false; + myglobals.version = false; + myglobals.num_procs = 0; + if (NULL != myglobals.appfile) { + free(myglobals.appfile); + } + myglobals.appfile = NULL; + if (NULL != myglobals.wdir) { + free(myglobals.wdir); + } + myglobals.set_cwd_to_session_dir = false; + myglobals.wdir = NULL; + if (NULL != myglobals.path) { + free(myglobals.path); + } + myglobals.path = NULL; + + myglobals.preload_binaries = false; + myglobals.preload_files = NULL; + + /* All done */ + return ORTE_SUCCESS; +} + + +static int parse_globals(int argc, char* argv[], opal_cmd_line_t *cmd_line) +{ + /* check for request to report pid */ + if (NULL != myglobals.report_pid) { + FILE *fp; + if (0 == strcmp(myglobals.report_pid, "-")) { + /* if '-', then output to stdout */ + printf("%d\n", (int)getpid()); + } else if (0 == strcmp(myglobals.report_pid, "+")) { + /* if '+', output to stderr */ + fprintf(stderr, "%d\n", (int)getpid()); + } else { + fp = fopen(myglobals.report_pid, "w"); + if (NULL == fp) { + orte_show_help("help-orterun.txt", "orterun:write_file", false, + orte_basename, "pid", myglobals.report_pid); + exit(0); + } + fprintf(fp, "%d\n", (int)getpid()); + fclose(fp); + } + } + + return ORTE_SUCCESS; +} + + +static int parse_locals(orte_job_t *jdata, int argc, char* argv[]) +{ + int i, rc, app_num; + int temp_argc; + char **temp_argv, **env; + orte_app_context_t *app; + bool made_app; + orte_std_cntr_t j, size1; + + /* Make the apps */ + temp_argc = 0; + temp_argv = NULL; + opal_argv_append(&temp_argc, &temp_argv, argv[0]); + + /* NOTE: This bogus env variable is necessary in the calls to + create_app(), below. See comment immediately before the + create_app() function for an explanation. */ + + env = NULL; + for (app_num = 0, i = 1; i < argc; ++i) { + if (0 == strcmp(argv[i], ":")) { + /* Make an app with this argv */ + if (opal_argv_count(temp_argv) > 1) { + if (NULL != env) { + opal_argv_free(env); + env = NULL; + } + app = NULL; + rc = create_app(temp_argc, temp_argv, jdata, &app, &made_app, &env); + /** keep track of the number of apps - point this app_context to that index */ + if (ORTE_SUCCESS != rc) { + /* Assume that the error message has already been + printed; no need to cleanup -- we can just + exit */ + exit(1); + } + if (made_app) { + app->idx = app_num; + ++app_num; + opal_pointer_array_add(jdata->apps, app); + ++jdata->num_apps; + } + + /* Reset the temps */ + + temp_argc = 0; + temp_argv = NULL; + opal_argv_append(&temp_argc, &temp_argv, argv[0]); + } + } else { + opal_argv_append(&temp_argc, &temp_argv, argv[i]); + } + } + + if (opal_argv_count(temp_argv) > 1) { + app = NULL; + rc = create_app(temp_argc, temp_argv, jdata, &app, &made_app, &env); + if (ORTE_SUCCESS != rc) { + /* Assume that the error message has already been printed; + no need to cleanup -- we can just exit */ + exit(1); + } + if (made_app) { + app->idx = app_num; + ++app_num; + opal_pointer_array_add(jdata->apps, app); + ++jdata->num_apps; + } + } + if (NULL != env) { + opal_argv_free(env); + } + opal_argv_free(temp_argv); + + /* Once we've created all the apps, add the global MCA params to + each app's environment (checking for duplicates, of + course -- yay opal_environ_merge()). */ + + if (NULL != global_mca_env) { + size1 = (size_t)opal_pointer_array_get_size(jdata->apps); + /* Iterate through all the apps */ + for (j = 0; j < size1; ++j) { + app = (orte_app_context_t *) + opal_pointer_array_get_item(jdata->apps, j); + if (NULL != app) { + /* Use handy utility function */ + env = opal_environ_merge(global_mca_env, app->env); + opal_argv_free(app->env); + app->env = env; + } + } + } + + /* Now take a subset of the MCA params and set them as MCA + overrides here in orterun (so that when we orte_init() later, + all the components see these MCA params). Here's how we decide + which subset of the MCA params we set here in orterun: + + 1. If any global MCA params were set, use those + 2. If no global MCA params were set and there was only one app, + then use its app MCA params + 3. Otherwise, don't set any + */ + + env = NULL; + if (NULL != global_mca_env) { + env = global_mca_env; + } else { + if (opal_pointer_array_get_size(jdata->apps) >= 1) { + /* Remember that pointer_array's can be padded with NULL + entries; so only use the app's env if there is exactly + 1 non-NULL entry */ + app = (orte_app_context_t *) + opal_pointer_array_get_item(jdata->apps, 0); + if (NULL != app) { + env = app->env; + for (j = 1; j < opal_pointer_array_get_size(jdata->apps); ++j) { + if (NULL != opal_pointer_array_get_item(jdata->apps, j)) { + env = NULL; + break; + } + } + } + } + } + + if (NULL != env) { + size1 = opal_argv_count(env); + for (j = 0; j < size1; ++j) { + /* Use-after-Free error possible here. putenv does not copy + * the string passed to it, and instead stores only the pointer. + * env[j] may be freed later, in which case the pointer + * in environ will now be left dangling into a deallocated + * region. + * So we make a copy of the variable. + */ + char *s = strdup(env[j]); + + if (NULL == s) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + putenv(s); + } + } + + /* All done */ + + return ORTE_SUCCESS; +} + + +/* + * This function takes a "char ***app_env" parameter to handle the + * specific case: + * + * orterun --mca foo bar -app appfile + * + * That is, we'll need to keep foo=bar, but the presence of the app + * file will cause an invocation of parse_appfile(), which will cause + * one or more recursive calls back to create_app(). Since the + * foo=bar value applies globally to all apps in the appfile, we need + * to pass in the "base" environment (that contains the foo=bar value) + * when we parse each line in the appfile. + * + * This is really just a special case -- when we have a simple case like: + * + * orterun --mca foo bar -np 4 hostname + * + * Then the upper-level function (parse_locals()) calls create_app() + * with a NULL value for app_env, meaning that there is no "base" + * environment that the app needs to be created from. + */ +static int create_app(int argc, char* argv[], + orte_job_t *jdata, + orte_app_context_t **app_ptr, + bool *made_app, char ***app_env) +{ + opal_cmd_line_t cmd_line; + char cwd[OPAL_PATH_MAX]; + int i, j, count, rc; + char *param, *value; + orte_app_context_t *app = NULL; + bool cmd_line_made = false; + bool found = false; + char *appname; + + *made_app = false; + + /* Pre-process the command line if we are going to parse an appfile later. + * save any mca command line args so they can be passed + * separately to the daemons. + * Use Case: + * $ cat launch.appfile + * -np 1 -mca aaa bbb ./my-app -mca ccc ddd + * -np 1 -mca aaa bbb ./my-app -mca eee fff + * $ mpirun -np 2 -mca foo bar --app launch.appfile + * Only pick up '-mca foo bar' on this pass. + */ + if (NULL != myglobals.appfile) { + if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(myglobals.personality, argc, 0, argv))) { + goto cleanup; + } + } + + /* Parse application command line options. */ + init_globals(); + opal_cmd_line_create(&cmd_line, cmd_line_init); + mca_base_cmd_line_setup(&cmd_line); + cmd_line_made = true; + rc = opal_cmd_line_parse(&cmd_line, true, argc, argv); + if (ORTE_SUCCESS != rc) { + goto cleanup; + } + mca_base_cmd_line_process_args(&cmd_line, app_env, &global_mca_env); + + /* Is there an appfile in here? */ + if (NULL != myglobals.appfile) { + OBJ_DESTRUCT(&cmd_line); + return parse_appfile(jdata, strdup(myglobals.appfile), app_env); + } + + /* Setup application context */ + app = OBJ_NEW(orte_app_context_t); + opal_cmd_line_get_tail(&cmd_line, &count, &app->argv); + + /* See if we have anything left */ + if (0 == count) { + orte_show_help("help-orterun.txt", "orterun:executable-not-specified", + true, orte_basename, orte_basename); + rc = ORTE_ERR_NOT_FOUND; + goto cleanup; + } + + /* + * Get mca parameters so we can pass them to the daemons. + * Use the count determined above to make sure we do not go past + * the executable name. Example: + * mpirun -np 2 -mca foo bar ./my-app -mca bip bop + * We want to pick up '-mca foo bar' but not '-mca bip bop' + */ + if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(myglobals.personality, + argc, count, argv))) { + goto cleanup; + } + + /* Grab all OMPI_* environment variables */ + + app->env = opal_argv_copy(*app_env); + if (ORTE_SUCCESS != (rc = orte_schizo.parse_env(myglobals.personality, + myglobals.path, + &cmd_line, + environ, &app->env))) { + goto cleanup; + } + + + /* Did the user request a specific wdir? */ + + if (NULL != myglobals.wdir) { + /* if this is a relative path, convert it to an absolute path */ + if (opal_path_is_absolute(myglobals.wdir)) { + app->cwd = strdup(myglobals.wdir); + } else { + /* get the cwd */ + if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) { + orte_show_help("help-orterun.txt", "orterun:init-failure", + true, "get the cwd", rc); + goto cleanup; + } + /* construct the absolute path */ + app->cwd = opal_os_path(false, cwd, myglobals.wdir, NULL); + } + orte_set_attribute(&app->attributes, ORTE_APP_USER_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); + } else if (myglobals.set_cwd_to_session_dir) { + orte_set_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); + orte_set_attribute(&app->attributes, ORTE_APP_USER_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); + } else { + if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) { + orte_show_help("help-orterun.txt", "orterun:init-failure", + true, "get the cwd", rc); + goto cleanup; + } + app->cwd = strdup(cwd); + } + + /* if this is the first app_context, check for prefix directions. + * We only do this for the first app_context because the launchers + * only look at the first one when setting the prefix - we do NOT + * support per-app_context prefix settings! + */ + if (0 == total_num_apps) { + /* Check to see if the user explicitly wanted to disable automatic + --prefix behavior */ + + if (opal_cmd_line_is_taken(&cmd_line, "noprefix")) { + want_prefix_by_default = false; + } + + /* Did the user specify a prefix, or want prefix by default? */ + if (opal_cmd_line_is_taken(&cmd_line, "prefix") || want_prefix_by_default) { + size_t param_len; + /* if both the prefix was given and we have a prefix + * given above, check to see if they match + */ + if (opal_cmd_line_is_taken(&cmd_line, "prefix") && + NULL != myglobals.prefix) { + /* if they don't match, then that merits a warning */ + param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0)); + /* ensure we strip any trailing '/' */ + if (0 == strcmp(OPAL_PATH_SEP, &(param[strlen(param)-1]))) { + param[strlen(param)-1] = '\0'; + } + value = strdup(myglobals.prefix); + if (0 == strcmp(OPAL_PATH_SEP, &(value[strlen(value)-1]))) { + value[strlen(value)-1] = '\0'; + } + if (0 != strcmp(param, value)) { + orte_show_help("help-orterun.txt", "orterun:app-prefix-conflict", + true, orte_basename, value, param); + /* let the global-level prefix take precedence since we + * know that one is being used + */ + free(param); + param = strdup(myglobals.prefix); + } + free(value); + } else if (NULL != myglobals.prefix) { + param = strdup(myglobals.prefix); + } else if (opal_cmd_line_is_taken(&cmd_line, "prefix")){ + /* must be --prefix alone */ + param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0)); + } else { + /* --enable-orterun-prefix-default was given to orterun */ + param = strdup(opal_install_dirs.prefix); + } + + if (NULL != param) { + /* "Parse" the param, aka remove superfluous path_sep. */ + param_len = strlen(param); + while (0 == strcmp (OPAL_PATH_SEP, &(param[param_len-1]))) { + param[param_len-1] = '\0'; + param_len--; + if (0 == param_len) { + orte_show_help("help-orterun.txt", "orterun:empty-prefix", + true, orte_basename, orte_basename); + free(param); + return ORTE_ERR_FATAL; + } + } + orte_set_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, ORTE_ATTR_GLOBAL, param, OPAL_STRING); + free(param); + } + } + } + + /* Did the user specify a hostfile. Need to check for both + * hostfile and machine file. + * We can only deal with one hostfile per app context, otherwise give an error. + */ + if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "hostfile"))) { + if(1 < j) { + orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles", + true, orte_basename, NULL); + return ORTE_ERR_FATAL; + } else { + value = opal_cmd_line_get_param(&cmd_line, "hostfile", 0, 0); + orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_GLOBAL, value, OPAL_STRING); + } + } + if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "machinefile"))) { + if(1 < j || orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, NULL, OPAL_STRING)) { + orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles", + true, orte_basename, NULL); + return ORTE_ERR_FATAL; + } else { + value = opal_cmd_line_get_param(&cmd_line, "machinefile", 0, 0); + orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_GLOBAL, value, OPAL_STRING); + } + } + + /* Did the user specify any hosts? */ + if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "host"))) { + char **targ=NULL, *tval; + for (i = 0; i < j; ++i) { + value = opal_cmd_line_get_param(&cmd_line, "host", i, 0); + opal_argv_append_nosize(&targ, value); + } + tval = opal_argv_join(targ, ','); + orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_GLOBAL, tval, OPAL_STRING); + opal_argv_free(targ); + free(tval); + } else if (NULL != orte_default_dash_host) { + orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL, + orte_default_dash_host, OPAL_STRING); + } + + /* check for bozo error */ + if (0 > myglobals.num_procs) { + orte_show_help("help-orterun.txt", "orterun:negative-nprocs", + true, orte_basename, app->argv[0], + myglobals.num_procs, NULL); + return ORTE_ERR_FATAL; + } + + app->num_procs = (orte_std_cntr_t)myglobals.num_procs; + total_num_apps++; + + /* Capture any preload flags */ + if (myglobals.preload_binaries) { + orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_BIN, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); + } + /* if we were told to cwd to the session dir and the app was given in + * relative syntax, then we need to preload the binary to + * find the app - don't do this for java apps, however, as we + * can't easily find the class on the cmd line. Java apps have to + * preload their binary via the preload_files option + */ + if (!opal_path_is_absolute(app->argv[0]) && + NULL == strstr(app->argv[0], "java")) { + if (myglobals.preload_binaries) { + orte_set_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); + } else if (orte_get_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, NULL, OPAL_BOOL)) { + orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_BIN, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); + } + } + if (NULL != myglobals.preload_files) { + orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_FILES, ORTE_ATTR_GLOBAL, + myglobals.preload_files, OPAL_STRING); + } + + /* Do not try to find argv[0] here -- the starter is responsible + for that because it may not be relevant to try to find it on + the node where orterun is executing. So just strdup() argv[0] + into app. */ + + app->app = strdup(app->argv[0]); + if (NULL == app->app) { + orte_show_help("help-orterun.txt", "orterun:call-failed", + true, orte_basename, "library", "strdup returned NULL", errno); + rc = ORTE_ERR_NOT_FOUND; + goto cleanup; + } + + /* if this is a Java application, we have a bit more work to do. Such + * applications actually need to be run under the Java virtual machine + * and the "java" command will start the "executable". So we need to ensure + * that all the proper java-specific paths are provided + */ + appname = opal_basename(app->app); + if (0 == strcmp(appname, "java")) { + /* see if we were given a library path */ + found = false; + for (i=1; NULL != app->argv[i]; i++) { + if (NULL != strstr(app->argv[i], "java.library.path")) { + /* yep - but does it include the path to the mpi libs? */ + found = true; + if (NULL == strstr(app->argv[i], opal_install_dirs.libdir)) { + /* doesn't appear to - add it to be safe */ + if (':' == app->argv[i][strlen(app->argv[i]-1)]) { + asprintf(&value, "-Djava.library.path=%s%s", app->argv[i], opal_install_dirs.libdir); + } else { + asprintf(&value, "-Djava.library.path=%s:%s", app->argv[i], opal_install_dirs.libdir); + } + free(app->argv[i]); + app->argv[i] = value; + } + break; + } + } + if (!found) { + /* need to add it right after the java command */ + asprintf(&value, "-Djava.library.path=%s", opal_install_dirs.libdir); + opal_argv_insert_element(&app->argv, 1, value); + free(value); + } + + /* see if we were given a class path */ + found = false; + for (i=1; NULL != app->argv[i]; i++) { + if (NULL != strstr(app->argv[i], "cp") || + NULL != strstr(app->argv[i], "classpath")) { + /* yep - but does it include the path to the mpi libs? */ + found = true; + /* check if mpi.jar exists - if so, add it */ + value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL); + if (access(value, F_OK ) != -1) { + set_classpath_jar_file(app, i+1, "mpi.jar"); + } + free(value); + /* check for oshmem support */ + value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL); + if (access(value, F_OK ) != -1) { + set_classpath_jar_file(app, i+1, "shmem.jar"); + } + free(value); + /* always add the local directory */ + asprintf(&value, "%s:%s", app->cwd, app->argv[i+1]); + free(app->argv[i+1]); + app->argv[i+1] = value; + break; + } + } + if (!found) { + /* check to see if CLASSPATH is in the environment */ + found = false; // just to be pedantic + for (i=0; NULL != environ[i]; i++) { + if (0 == strncmp(environ[i], "CLASSPATH", strlen("CLASSPATH"))) { + value = strchr(environ[i], '='); + ++value; /* step over the = */ + opal_argv_insert_element(&app->argv, 1, value); + /* check for mpi.jar */ + value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL); + if (access(value, F_OK ) != -1) { + set_classpath_jar_file(app, 1, "mpi.jar"); + } + free(value); + /* check for shmem.jar */ + value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL); + if (access(value, F_OK ) != -1) { + set_classpath_jar_file(app, 1, "shmem.jar"); + } + free(value); + /* always add the local directory */ + (void)asprintf(&value, "%s:%s", app->cwd, app->argv[1]); + free(app->argv[1]); + app->argv[1] = value; + opal_argv_insert_element(&app->argv, 1, "-cp"); + found = true; + break; + } + } + if (!found) { + /* need to add it right after the java command - have + * to include the working directory and trust that + * the user set cwd if necessary + */ + char *str, *str2; + /* always start with the working directory */ + str = strdup(app->cwd); + /* check for mpi.jar */ + value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL); + if (access(value, F_OK ) != -1) { + (void)asprintf(&str2, "%s:%s", str, value); + free(str); + str = str2; + } + free(value); + /* check for shmem.jar */ + value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL); + if (access(value, F_OK ) != -1) { + asprintf(&str2, "%s:%s", str, value); + free(str); + str = str2; + } + free(value); + opal_argv_insert_element(&app->argv, 1, str); + free(str); + opal_argv_insert_element(&app->argv, 1, "-cp"); + } + } + /* try to find the actual command - may not be perfect */ + for (i=1; i < opal_argv_count(app->argv); i++) { + if (NULL != strstr(app->argv[i], "java.library.path")) { + continue; + } else if (NULL != strstr(app->argv[i], "cp") || + NULL != strstr(app->argv[i], "classpath")) { + /* skip the next field */ + i++; + continue; + } + /* declare this the winner */ + opal_setenv("OMPI_COMMAND", app->argv[i], true, &app->env); + /* collect everything else as the cmd line */ + if ((i+1) < opal_argv_count(app->argv)) { + value = opal_argv_join(&app->argv[i+1], ' '); + opal_setenv("OMPI_ARGV", value, true, &app->env); + free(value); + } + break; + } + } else { + /* add the cmd to the environment for MPI_Info to pickup */ + opal_setenv("OMPI_COMMAND", appname, true, &app->env); + if (1 < opal_argv_count(app->argv)) { + value = opal_argv_join(&app->argv[1], ' '); + opal_setenv("OMPI_ARGV", value, true, &app->env); + free(value); + } + } + free(appname); + + *app_ptr = app; + app = NULL; + *made_app = true; + + /* All done */ + + cleanup: + if (NULL != app) { + OBJ_RELEASE(app); + } + if (cmd_line_made) { + OBJ_DESTRUCT(&cmd_line); + } + return rc; +} + +static void set_classpath_jar_file(orte_app_context_t *app, int index, char *jarfile) +{ + if (NULL == strstr(app->argv[index], jarfile)) { + /* nope - need to add it */ + char *fmt = ':' == app->argv[index][strlen(app->argv[index]-1)] + ? "%s%s/%s" : "%s:%s/%s"; + char *str; + asprintf(&str, fmt, app->argv[index], opal_install_dirs.libdir, jarfile); + free(app->argv[index]); + app->argv[index] = str; + } +} + +static int parse_appfile(orte_job_t *jdata, char *filename, char ***env) +{ + size_t i, len; + FILE *fp; + char line[BUFSIZ]; + int rc, argc, app_num; + char **argv; + orte_app_context_t *app; + bool blank, made_app; + char bogus[] = "bogus "; + char **tmp_env; + + /* + * Make sure to clear out this variable so we don't do anything odd in + * app_create() + */ + if (NULL != myglobals.appfile) { + free(myglobals.appfile); + myglobals.appfile = NULL; + } + + /* Try to open the file */ + + fp = fopen(filename, "r"); + if (NULL == fp) { + orte_show_help("help-orterun.txt", "orterun:appfile-not-found", true, + filename); + return ORTE_ERR_NOT_FOUND; + } + + /* Read in line by line */ + + line[sizeof(line) - 1] = '\0'; + app_num = 0; + do { + + /* We need a bogus argv[0] (because when argv comes in from + the command line, argv[0] is "orterun", so the parsing + logic ignores it). So create one here rather than making + an argv and then pre-pending a new argv[0] (which would be + rather inefficient). */ + + line[0] = '\0'; + strcat(line, bogus); + + if (NULL == fgets(line + sizeof(bogus) - 1, + sizeof(line) - sizeof(bogus) - 1, fp)) { + break; + } + + /* Remove a trailing newline */ + + len = strlen(line); + if (len > 0 && '\n' == line[len - 1]) { + line[len - 1] = '\0'; + if (len > 0) { + --len; + } + } + + /* Remove comments */ + + for (i = 0; i < len; ++i) { + if ('#' == line[i]) { + line[i] = '\0'; + break; + } else if (i + 1 < len && '/' == line[i] && '/' == line[i + 1]) { + line[i] = '\0'; + break; + } + } + + /* Is this a blank line? */ + + len = strlen(line); + for (blank = true, i = sizeof(bogus); i < len; ++i) { + if (!isspace(line[i])) { + blank = false; + break; + } + } + if (blank) { + continue; + } + + /* We got a line with *something* on it. So process it */ + + argv = opal_argv_split(line, ' '); + argc = opal_argv_count(argv); + if (argc > 0) { + + /* Create a temporary env to use in the recursive call -- + that is: don't disturb the original env so that we can + have a consistent global env. This allows for the + case: + + orterun --mca foo bar --appfile file + + where the "file" contains multiple apps. In this case, + each app in "file" will get *only* foo=bar as the base + environment from which its specific environment is + constructed. */ + + if (NULL != *env) { + tmp_env = opal_argv_copy(*env); + if (NULL == tmp_env) { + fclose(fp); + opal_argv_free(argv); + return ORTE_ERR_OUT_OF_RESOURCE; + } + } else { + tmp_env = NULL; + } + + rc = create_app(argc, argv, jdata, &app, &made_app, &tmp_env); + if (ORTE_SUCCESS != rc) { + /* Assume that the error message has already been + printed; no need to cleanup -- we can just exit */ + exit(1); + } + if (NULL != tmp_env) { + opal_argv_free(tmp_env); + } + if (made_app) { + app->idx = app_num; + ++app_num; + opal_pointer_array_add(jdata->apps, app); + ++jdata->num_apps; + } + } + opal_argv_free(argv); + } while (!feof(fp)); + fclose(fp); + + /* All done */ + + free(filename); + + return ORTE_SUCCESS; +} + +void orte_timeout_wakeup(int sd, short args, void *cbdata) +{ + char *tm; + + /* this function gets called when the job execution time + * has hit a prescribed limit - so just abort + */ + tm = getenv("MPIEXEC_TIMEOUT"); + orte_show_help("help-orterun.txt", "orterun:timeout", + true, (NULL == tm) ? "NULL" : tm); + ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); + exit(orte_exit_status); +} + +static void launch_recv(int status, orte_process_name_t* sender, + opal_buffer_t *buffer, + orte_rml_tag_t tag, void *cbdata) +{ + int rc; + int32_t ret; + int32_t cnt; + orte_jobid_t jobid; + orte_app_context_t *app; + orte_proc_t *proc; + orte_node_t *node; + int tool_job_index; + trackr_t *trk; + + /* unpack the completion status */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ret, &cnt, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + ORTE_UPDATE_EXIT_STATUS(rc); + return; + } + /* update our exit status to match */ + ORTE_UPDATE_EXIT_STATUS(ret); + + /* unpack the jobid */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &jobid, &cnt, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + ORTE_UPDATE_EXIT_STATUS(rc); + return; + } + + /* unpack our tracking id */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &tool_job_index, &cnt, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + ORTE_UPDATE_EXIT_STATUS(rc); + return; + } + + // Store the job id in the job data + if (NULL == (trk = (trackr_t*)opal_pointer_array_get_item(&tool_jobs, tool_job_index))) { + opal_output(0, "SPAWN OF TRACKER ID %d RETURNED INDEX TO NULL OBJECT", tool_job_index); + return; + } + trk->jdata->jobid = jobid; + + if (ORTE_SUCCESS == ret) { + printf("[ORTE] Task: %d is launched! (Job ID: %s)\n", tool_job_index, ORTE_JOBID_PRINT(jobid)); + } else { + /* unpack the offending proc and node */ + cnt = 1; + opal_dss.unpack(buffer, &trk->jdata->state, &cnt, ORTE_JOB_STATE_T); + cnt = 1; + opal_dss.unpack(buffer, &proc, &cnt, ORTE_PROC); + proc->exit_code = ret; + app = (orte_app_context_t*)opal_pointer_array_get_item(trk->jdata->apps, proc->app_idx); + cnt = 1; + opal_dss.unpack(buffer, &node, &cnt, ORTE_NODE); + orte_print_aborted_job(trk->jdata, app, proc, node); + } + + /* Inform client */ + if (NULL != trk->launch_cb) { + trk->launch_cb(tool_job_index, trk->jdata, ret, trk->launch_cbdata); + } + + /* if the job failed to launch, then we remove the tracker */ + if (ORTE_SUCCESS != ret) { + opal_pointer_array_set_item(&tool_jobs, tool_job_index, NULL); + OBJ_RELEASE(trk); + } +} + +static void complete_recv(int status, orte_process_name_t* sender, + opal_buffer_t *buffer, + orte_rml_tag_t tag, void *cbdata) +{ + int rc, ret; + int32_t cnt; + orte_jobid_t jobid; + orte_app_context_t *app; + orte_proc_t *proc; + orte_node_t *node; + int tool_job_index; + trackr_t *trk; + + /* unpack the completion status */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ret, &cnt, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + ORTE_UPDATE_EXIT_STATUS(rc); + return; + } + + /* unpack the jobid */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &jobid, &cnt, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + ORTE_UPDATE_EXIT_STATUS(rc); + return; + } + + /* unpack our tracking id */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &tool_job_index, &cnt, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + ORTE_UPDATE_EXIT_STATUS(rc); + return; + } + + /* get the tracker */ + if (NULL == (trk = (trackr_t*)opal_pointer_array_get_item(&tool_jobs, tool_job_index))) { + opal_output(0, "TRACKER ID %d RETURNED INDEX TO NULL OBJECT", tool_job_index); + return; + } + + if (ORTE_SUCCESS == ret) { + printf("[ORTE] Task: %d returned: %d (Job ID: %s)\n", tool_job_index, ret, ORTE_JOBID_PRINT(jobid)); + } else { + /* unpack the offending proc and node */ + cnt = 1; + opal_dss.unpack(buffer, &trk->jdata->state, &cnt, ORTE_JOB_STATE_T); + cnt = 1; + opal_dss.unpack(buffer, &proc, &cnt, ORTE_PROC); + proc->exit_code = ret; + app = (orte_app_context_t*)opal_pointer_array_get_item(trk->jdata->apps, proc->app_idx); + cnt = 1; + opal_dss.unpack(buffer, &node, &cnt, ORTE_NODE); + orte_print_aborted_job(trk->jdata, app, proc, node); + } + + /* Inform client */ + if (NULL != trk && NULL != trk->complete_cb) { + trk->complete_cb(tool_job_index, trk->jdata, ret, trk->complete_cbdata); + } + /* cleanup */ + opal_pointer_array_set_item(&tool_jobs, tool_job_index, NULL); + OBJ_RELEASE(trk); +} diff --git a/orte/orted/orted_submit.h b/orte/orted/orted_submit.h new file mode 100644 index 0000000000..d256598771 --- /dev/null +++ b/orte/orted/orted_submit.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef ORTED_SUBMIT_H +#define ORTED_SUBMIT_H + +#include "orte_config.h" + +#include "orte/mca/plm/plm.h" +#include "orte/runtime/orte_globals.h" + +BEGIN_C_DECLS + + +typedef void (*orte_submit_cbfunc_t)(int index, orte_job_t *jdata, int ret, void *cbdata); + +ORTE_DECLSPEC int orte_submit_init(int argc, char *argv[], + opal_cmd_line_t *opts); +ORTE_DECLSPEC int orte_submit_cancel(int index); +ORTE_DECLSPEC void orte_submit_finalize(void); +ORTE_DECLSPEC int orte_submit_job(char *cmd[], int *index, + orte_submit_cbfunc_t launch_cb, void *launch_cbdata, + orte_submit_cbfunc_t complete_cb, void *complete_cbdata); +ORTE_DECLSPEC int orte_submit_halt(void); + + +END_C_DECLS + +#endif /* ORTED_SUBMIT_H */ diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 06892fe58f..45737b4c2f 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -212,6 +212,11 @@ bool orte_in_parallel_debugger = false; char *orte_daemon_cores = NULL; +/** + * Global struct for catching orte command line options. + */ +orte_cmd_line_t orte_cmd_line = {0}; + int orte_dt_init(void) { int rc; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index bfa4cd63af..4cab15083b 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -412,6 +412,42 @@ typedef struct { } orte_topology_t; ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_topology_t); +/** + * Global struct for catching orte command line options. + */ +struct orte_cmd_line_t { + bool help; + bool version; + bool verbose; + char *report_pid; + char *report_uri; + bool exit; + bool debugger; + int num_procs; + char *env_val; + char *appfile; + char *wdir; + bool set_cwd_to_session_dir; + char *path; + char *preload_files; + bool sleep; + char *stdin_target; + char *prefix; + char *path_to_mpirun; +#if OPAL_ENABLE_FT_CR == 1 + char *sstore_load; +#endif + bool disable_recovery; + bool preload_binaries; + bool index_argv; + bool run_as_root; + char *personality; + bool create_dvm; + bool terminate_dvm; +}; +typedef struct orte_cmd_line_t orte_cmd_line_t; +ORTE_DECLSPEC extern orte_cmd_line_t orte_cmd_line; + /** * Get a job data object * We cannot just reference a job data object with its jobid as diff --git a/orte/runtime/orte_quit.c b/orte/runtime/orte_quit.c index b38a6b5db4..d1824044cc 100644 --- a/orte/runtime/orte_quit.c +++ b/orte/runtime/orte_quit.c @@ -139,6 +139,193 @@ void orte_quit(int fd, short args, void *cbdata) opal_event_base_loopbreak(orte_event_base); } +int orte_print_aborted_job(orte_job_t *job, + orte_app_context_t *approc, + orte_proc_t *proc, + orte_node_t *node) +{ + if (ORTE_JOB_STATE_FAILED_TO_START == job->state || + ORTE_JOB_STATE_FAILED_TO_LAUNCH == job->state) { + switch (proc->exit_code) { + case ORTE_ERR_SILENT: + /* say nothing - it was already reported */ + break; + case ORTE_ERR_SYS_LIMITS_PIPES: + orte_show_help("help-orterun.txt", "orterun:sys-limit-pipe", true, + orte_basename, node->name, + (unsigned long)proc->name.vpid); + break; + case ORTE_ERR_PIPE_SETUP_FAILURE: + orte_show_help("help-orterun.txt", "orterun:pipe-setup-failure", true, + orte_basename, node->name, + (unsigned long)proc->name.vpid); + break; + case ORTE_ERR_SYS_LIMITS_CHILDREN: + orte_show_help("help-orterun.txt", "orterun:sys-limit-children", true, + orte_basename, node->name, + (unsigned long)proc->name.vpid); + break; + case ORTE_ERR_FAILED_GET_TERM_ATTRS: + orte_show_help("help-orterun.txt", "orterun:failed-term-attrs", true, + orte_basename, node->name, + (unsigned long)proc->name.vpid); + break; + case ORTE_ERR_WDIR_NOT_FOUND: + orte_show_help("help-orterun.txt", "orterun:wdir-not-found", true, + orte_basename, approc->cwd, + node->name, (unsigned long)proc->name.vpid); + break; + case ORTE_ERR_EXE_NOT_FOUND: + orte_show_help("help-orterun.txt", "orterun:exe-not-found", true, + orte_basename, + (unsigned long)proc->name.vpid, + orte_basename, + orte_basename, + node->name, + approc->app); + break; + case ORTE_ERR_EXE_NOT_ACCESSIBLE: + orte_show_help("help-orterun.txt", "orterun:exe-not-accessible", true, + orte_basename, approc->app, node->name, + (unsigned long)proc->name.vpid); + break; + case ORTE_ERR_MULTIPLE_AFFINITIES: + orte_show_help("help-orterun.txt", + "orterun:multiple-paffinity-schemes", true, NULL); + break; + case ORTE_ERR_TOPO_SLOT_LIST_NOT_SUPPORTED: + orte_show_help("help-orterun.txt", + "orterun:topo-not-supported", + true, orte_process_info.nodename, "rankfile containing a slot_list of ", + NULL, approc->app); + break; + case ORTE_ERR_INVALID_NODE_RANK: + orte_show_help("help-orterun.txt", + "orterun:invalid-node-rank", true); + break; + case ORTE_ERR_INVALID_LOCAL_RANK: + orte_show_help("help-orterun.txt", + "orterun:invalid-local-rank", true); + break; + case ORTE_ERR_NOT_ENOUGH_CORES: + orte_show_help("help-orterun.txt", + "orterun:not-enough-resources", true, + "sockets", node->name, + "bind-to-core", approc->app); + break; + case ORTE_ERR_TOPO_CORE_NOT_SUPPORTED: + orte_show_help("help-orterun.txt", + "orterun:topo-not-supported", + true, node->name, "bind-to-core", "", + approc->app); + break; + case ORTE_ERR_INVALID_PHYS_CPU: + orte_show_help("help-orterun.txt", + "orterun:invalid-phys-cpu", true); + break; + case ORTE_ERR_NOT_ENOUGH_SOCKETS: + orte_show_help("help-orterun.txt", + "orterun:not-enough-resources", true, + "sockets", node->name, + "bind-to-socket", approc->app); + break; + case ORTE_ERR_TOPO_SOCKET_NOT_SUPPORTED: + orte_show_help("help-orterun.txt", + "orterun:topo-not-supported", + true, node->name, "bind-to-socket", "", + approc->app); + break; + case ORTE_ERR_MODULE_NOT_FOUND: + orte_show_help("help-orterun.txt", + "orterun:paffinity-missing-module", + true, node->name); + break; + case ORTE_ERR_SLOT_LIST_RANGE: + orte_show_help("help-orterun.txt", + "orterun:invalid-slot-list-range", + true, node->name, NULL); + break; + case ORTE_ERR_PIPE_READ_FAILURE: + orte_show_help("help-orterun.txt", "orterun:pipe-read-failure", true, + orte_basename, node->name, (unsigned long)proc->name.vpid); + break; + case ORTE_ERR_SOCKET_NOT_AVAILABLE: + orte_show_help("help-orterun.txt", "orterun:proc-socket-not-avail", true, + orte_basename, ORTE_ERROR_NAME(proc->exit_code), node->name, + (unsigned long)proc->name.vpid); + break; + + default: + if (0 != proc->exit_code) { + orte_show_help("help-orterun.txt", "orterun:proc-failed-to-start", true, + orte_basename, ORTE_ERROR_NAME(proc->exit_code), node->name, + (unsigned long)proc->name.vpid); + } else { + orte_show_help("help-orterun.txt", "orterun:proc-failed-to-start-no-status", true, + orte_basename, node->name); + } + return ORTE_SUCCESS; + } + } else if (ORTE_JOB_STATE_ABORTED == job->state) { + orte_show_help("help-orterun.txt", "orterun:proc-ordered-abort", true, + orte_basename, (unsigned long)proc->name.vpid, (unsigned long)proc->pid, + node->name, orte_basename); + return ORTE_SUCCESS; + } else if (ORTE_JOB_STATE_ABORTED_BY_SIG == job->state) { /* aborted by signal */ +#ifdef HAVE_STRSIGNAL + if (NULL != strsignal(WTERMSIG(proc->exit_code))) { + orte_show_help("help-orterun.txt", "orterun:proc-aborted-strsignal", true, + orte_basename, (unsigned long)proc->name.vpid, (unsigned long)proc->pid, + node->name, WTERMSIG(proc->exit_code), + strsignal(WTERMSIG(proc->exit_code))); + } else { +#endif + orte_show_help("help-orterun.txt", "orterun:proc-aborted", true, + orte_basename, (unsigned long)proc->name.vpid, (unsigned long)proc->pid, + node->name, WTERMSIG(proc->exit_code)); +#ifdef HAVE_STRSIGNAL + } +#endif + return ORTE_SUCCESS; + } else if (ORTE_JOB_STATE_ABORTED_WO_SYNC == job->state) { /* proc exited w/o finalize */ + orte_show_help("help-orterun.txt", "orterun:proc-exit-no-sync", true, + orte_basename, (unsigned long)proc->name.vpid, (unsigned long)proc->pid, + node->name, orte_basename, orte_basename); + return ORTE_SUCCESS; + } else if (ORTE_JOB_STATE_COMM_FAILED == job->state) { + orte_show_help("help-orterun.txt", "orterun:proc-comm-failed", true, + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&proc->name), node->name); + return ORTE_SUCCESS; + } else if (ORTE_JOB_STATE_SENSOR_BOUND_EXCEEDED == job->state) { + switch (proc->exit_code) { + case ORTE_ERR_MEM_LIMIT_EXCEEDED: + orte_show_help("help-orterun.txt", "orterun:proc-mem-exceeded", true, + ORTE_NAME_PRINT(&proc->name), node->name); + break; + case ORTE_ERR_PROC_STALLED: + orte_show_help("help-orterun.txt", "orterun:proc-stalled", true); + break; + + default: + orte_show_help("help-orterun.txt", "orterun:proc-sensor-exceeded", true); + } + return ORTE_SUCCESS; + } else if (ORTE_JOB_STATE_HEARTBEAT_FAILED == job->state) { + orte_show_help("help-orterun.txt", "orterun:proc-heartbeat-failed", true, + orte_basename, ORTE_NAME_PRINT(&proc->name), node->name); + return ORTE_SUCCESS; + } else if (orte_abort_non_zero_exit && + ORTE_JOB_STATE_NON_ZERO_TERM == job->state) { + orte_show_help("help-orterun.txt", "orterun:non-zero-exit", true, + orte_basename, ORTE_NAME_PRINT(&proc->name), proc->exit_code); + return ORTE_SUCCESS; + } + + /* nothing here */ + return ORTE_ERR_NOT_FOUND; +} + /* * On abnormal termination - dump the * exit status of the aborted procs. @@ -146,10 +333,11 @@ void orte_quit(int fd, short args, void *cbdata) static void dump_aborted_procs(void) { - orte_std_cntr_t i, n; + orte_std_cntr_t n; + orte_job_t *job; + orte_std_cntr_t i; orte_proc_t *proc, *pptr; orte_app_context_t *approc; - orte_job_t *job; orte_node_t *node; /* find the job that caused the problem - be sure to start the loop @@ -161,6 +349,7 @@ static void dump_aborted_procs(void) /* the array is no longer left-justified, so we have to continue */ continue; } + if (ORTE_JOB_STATE_UNDEF != job->state && ORTE_JOB_STATE_INIT != job->state && ORTE_JOB_STATE_RUNNING != job->state && @@ -171,7 +360,7 @@ static void dump_aborted_procs(void) for (i=0; i < job->procs->size; i++) { if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(job->procs, i))) { /* array is left-justfied - we are done */ - continue; + break; } if (ORTE_PROC_STATE_FAILED_TO_START == pptr->state || ORTE_PROC_STATE_FAILED_TO_LAUNCH == pptr->state) { @@ -185,7 +374,7 @@ static void dump_aborted_procs(void) } } - /* this is a guilty party */ + /* see if there is a guilty party */ proc = NULL; if (!orte_get_attribute(&job->attributes, ORTE_JOB_ABORTED_PROC, (void**)&proc, OPAL_PTR) || NULL == proc) { @@ -194,178 +383,9 @@ static void dump_aborted_procs(void) approc = (orte_app_context_t*)opal_pointer_array_get_item(job->apps, proc->app_idx); node = proc->node; - if (ORTE_JOB_STATE_FAILED_TO_START == job->state || - ORTE_JOB_STATE_FAILED_TO_LAUNCH == job->state) { - switch (proc->exit_code) { - case ORTE_ERR_SILENT: - /* say nothing - it was already reported */ - break; - case ORTE_ERR_SYS_LIMITS_PIPES: - orte_show_help("help-orterun.txt", "orterun:sys-limit-pipe", true, - orte_basename, proc->node->name, - (unsigned long)proc->name.vpid); - break; - case ORTE_ERR_PIPE_SETUP_FAILURE: - orte_show_help("help-orterun.txt", "orterun:pipe-setup-failure", true, - orte_basename, proc->node->name, - (unsigned long)proc->name.vpid); - break; - case ORTE_ERR_SYS_LIMITS_CHILDREN: - orte_show_help("help-orterun.txt", "orterun:sys-limit-children", true, - orte_basename, proc->node->name, - (unsigned long)proc->name.vpid); - break; - case ORTE_ERR_FAILED_GET_TERM_ATTRS: - orte_show_help("help-orterun.txt", "orterun:failed-term-attrs", true, - orte_basename, proc->node->name, - (unsigned long)proc->name.vpid); - break; - case ORTE_ERR_WDIR_NOT_FOUND: - orte_show_help("help-orterun.txt", "orterun:wdir-not-found", true, - orte_basename, approc->cwd, - proc->node->name, (unsigned long)proc->name.vpid); - break; - case ORTE_ERR_EXE_NOT_FOUND: - orte_show_help("help-orterun.txt", "orterun:exe-not-found", true, - orte_basename, - (unsigned long)proc->name.vpid, - orte_basename, - orte_basename, - proc->node->name, - approc->app); - break; - case ORTE_ERR_EXE_NOT_ACCESSIBLE: - orte_show_help("help-orterun.txt", "orterun:exe-not-accessible", true, - orte_basename, approc->app, proc->node->name, - (unsigned long)proc->name.vpid); - break; - case ORTE_ERR_MULTIPLE_AFFINITIES: - orte_show_help("help-orterun.txt", - "orterun:multiple-paffinity-schemes", true, NULL); - break; - case ORTE_ERR_TOPO_SLOT_LIST_NOT_SUPPORTED: - orte_show_help("help-orterun.txt", - "orterun:topo-not-supported", - true, orte_process_info.nodename, "rankfile containing a slot_list of ", - NULL, approc->app); - break; - case ORTE_ERR_INVALID_NODE_RANK: - orte_show_help("help-orterun.txt", - "orterun:invalid-node-rank", true); - break; - case ORTE_ERR_INVALID_LOCAL_RANK: - orte_show_help("help-orterun.txt", - "orterun:invalid-local-rank", true); - break; - case ORTE_ERR_NOT_ENOUGH_CORES: - orte_show_help("help-orterun.txt", - "orterun:not-enough-resources", true, - "sockets", node->name, - "bind-to-core", approc->app); - break; - case ORTE_ERR_TOPO_CORE_NOT_SUPPORTED: - orte_show_help("help-orterun.txt", - "orterun:topo-not-supported", - true, node->name, "bind-to-core", "", - approc->app); - break; - case ORTE_ERR_INVALID_PHYS_CPU: - orte_show_help("help-orterun.txt", - "orterun:invalid-phys-cpu", true); - break; - case ORTE_ERR_NOT_ENOUGH_SOCKETS: - orte_show_help("help-orterun.txt", - "orterun:not-enough-resources", true, - "sockets", node->name, - "bind-to-socket", approc->app); - break; - case ORTE_ERR_TOPO_SOCKET_NOT_SUPPORTED: - orte_show_help("help-orterun.txt", - "orterun:topo-not-supported", - true, node->name, "bind-to-socket", "", - approc->app); - break; - case ORTE_ERR_MODULE_NOT_FOUND: - orte_show_help("help-orterun.txt", - "orterun:paffinity-missing-module", - true, node->name); - break; - case ORTE_ERR_SLOT_LIST_RANGE: - orte_show_help("help-orterun.txt", - "orterun:invalid-slot-list-range", - true, node->name, NULL); - break; - case ORTE_ERR_PIPE_READ_FAILURE: - orte_show_help("help-orterun.txt", "orterun:pipe-read-failure", true, - orte_basename, node->name, (unsigned long)proc->name.vpid); - break; - case ORTE_ERR_SOCKET_NOT_AVAILABLE: - orte_show_help("help-orterun.txt", "orterun:proc-socket-not-avail", true, - orte_basename, ORTE_ERROR_NAME(proc->exit_code), node->name, - (unsigned long)proc->name.vpid); - break; - - default: - if (0 != proc->exit_code) { - orte_show_help("help-orterun.txt", "orterun:proc-failed-to-start", true, - orte_basename, ORTE_ERROR_NAME(proc->exit_code), node->name, - (unsigned long)proc->name.vpid); - } else { - orte_show_help("help-orterun.txt", "orterun:proc-failed-to-start-no-status", true, - orte_basename, node->name); - } - break; - } - } else if (ORTE_JOB_STATE_ABORTED == job->state) { - orte_show_help("help-orterun.txt", "orterun:proc-ordered-abort", true, - orte_basename, (unsigned long)proc->name.vpid, (unsigned long)proc->pid, - node->name, orte_basename); - } else if (ORTE_JOB_STATE_ABORTED_BY_SIG == job->state) { /* aborted by signal */ -#ifdef HAVE_STRSIGNAL - if (NULL != strsignal(WTERMSIG(proc->exit_code))) { - orte_show_help("help-orterun.txt", "orterun:proc-aborted-strsignal", true, - orte_basename, (unsigned long)proc->name.vpid, (unsigned long)proc->pid, - node->name, WTERMSIG(proc->exit_code), - strsignal(WTERMSIG(proc->exit_code))); - } else { -#endif - orte_show_help("help-orterun.txt", "orterun:proc-aborted", true, - orte_basename, (unsigned long)proc->name.vpid, (unsigned long)proc->pid, - node->name, WTERMSIG(proc->exit_code)); -#ifdef HAVE_STRSIGNAL - } -#endif - } else if (ORTE_JOB_STATE_ABORTED_WO_SYNC == job->state) { /* proc exited w/o finalize */ - orte_show_help("help-orterun.txt", "orterun:proc-exit-no-sync", true, - orte_basename, (unsigned long)proc->name.vpid, (unsigned long)proc->pid, - node->name, orte_basename, orte_basename); - } else if (ORTE_JOB_STATE_COMM_FAILED == job->state) { - orte_show_help("help-orterun.txt", "orterun:proc-comm-failed", true, - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name), node->name); - } else if (ORTE_JOB_STATE_SENSOR_BOUND_EXCEEDED == job->state) { - switch (proc->exit_code) { - case ORTE_ERR_MEM_LIMIT_EXCEEDED: - orte_show_help("help-orterun.txt", "orterun:proc-mem-exceeded", true, - ORTE_NAME_PRINT(&proc->name), node->name); - break; - case ORTE_ERR_PROC_STALLED: - orte_show_help("help-orterun.txt", "orterun:proc-stalled", true); - break; - - default: - orte_show_help("help-orterun.txt", "orterun:proc-sensor-exceeded", true); - break; - } - } else if (ORTE_JOB_STATE_HEARTBEAT_FAILED == job->state) { - orte_show_help("help-orterun.txt", "orterun:proc-heartbeat-failed", true, - orte_basename, ORTE_NAME_PRINT(&proc->name), node->name); - } else if (orte_abort_non_zero_exit && - ORTE_JOB_STATE_NON_ZERO_TERM == job->state) { - orte_show_help("help-orterun.txt", "orterun:non-zero-exit", true, - orte_basename, ORTE_NAME_PRINT(&proc->name), proc->exit_code); + if (ORTE_SUCCESS == orte_print_aborted_job(job, approc, proc, node)) { + break; } - return; } } } diff --git a/orte/runtime/orte_quit.h b/orte/runtime/orte_quit.h index 3a5a9517f7..8e227635c7 100644 --- a/orte/runtime/orte_quit.h +++ b/orte/runtime/orte_quit.h @@ -2,6 +2,7 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -20,10 +21,17 @@ #include "orte_config.h" +#include "orte/runtime/orte_globals.h" + BEGIN_C_DECLS ORTE_DECLSPEC void orte_quit(int fd, short args, void *cbdata); +ORTE_DECLSPEC int orte_print_aborted_job(orte_job_t *job, + orte_app_context_t *approc, + orte_proc_t *proc, + orte_node_t *node); + END_C_DECLS #endif /* ORTE_CR_H */ diff --git a/orte/tools/orte-dvm/orte-dvm.c b/orte/tools/orte-dvm/orte-dvm.c index 3cdf585d96..c77b533d70 100644 --- a/orte/tools/orte-dvm/orte-dvm.c +++ b/orte/tools/orte-dvm/orte-dvm.c @@ -14,7 +14,7 @@ * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -74,6 +74,7 @@ #include "opal/class/opal_pointer_array.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/odls/odls.h" #include "orte/mca/rml/rml.h" #include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/state/state.h" @@ -129,6 +130,31 @@ static opal_cmd_line_init_t cmd_line_init[] = { &myglobals.run_as_root, OPAL_CMD_LINE_TYPE_BOOL, "Allow execution as root (STRONGLY DISCOURAGED)" }, + /* Specify the launch agent to be used */ + { "orte_launch_agent", '\0', "launch-agent", "launch-agent", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Command used to start processes on remote nodes (default: orted)" }, + + /* maximum size of VM - typically used to subdivide an allocation */ + { "orte_max_vm_size", '\0', "max-vm-size", "max-vm-size", 1, + NULL, OPAL_CMD_LINE_TYPE_INT, + "Maximum size of VM" }, + + /* Set a hostfile */ + { NULL, '\0', "hostfile", "hostfile", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Provide a hostfile" }, + { NULL, '\0', "machinefile", "machinefile", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Provide a hostfile" }, + { "orte_default_hostfile", '\0', "default-hostfile", "default-hostfile", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Provide a default hostfile" }, + + { NULL, 'H', "host", "host", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "List of hosts to invoke processes on" }, + /* End of list */ { NULL, '\0', NULL, NULL, 0, NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } @@ -454,24 +480,48 @@ static void send_callback(int status, orte_process_name_t *peer, opal_pointer_array_set_item(orte_job_data, ORTE_LOCAL_JOBID(jdata->jobid), NULL); OBJ_RELEASE(jdata); } + static void notify_requestor(int sd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; orte_job_t *jdata = caddy->jdata; orte_proc_t *pptr; - int ret; + int ret, id, *idptr; opal_buffer_t *reply; /* notify the requestor */ reply = OBJ_NEW(opal_buffer_t); + /* see if there was any problem */ if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, (void**)&pptr, OPAL_PTR) && NULL != pptr) { ret = pptr->exit_code; + /* or whether we got cancelled by the user */ + } else if (orte_get_attribute(&jdata->attributes, ORTE_JOB_CANCELLED, NULL, OPAL_BOOL)) { + ret = ORTE_ERR_JOB_CANCELLED; } else { ret = 0; } + /* return the completion status */ opal_dss.pack(reply, &ret, 1, OPAL_INT); - orte_rml.send_buffer_nb(&jdata->originator, reply, ORTE_RML_TAG_TOOL, send_callback, jdata); + + /* pack the jobid to be returned */ + opal_dss.pack(reply, &jdata->jobid, 1, ORTE_JOBID); + + /* return the tracker ID */ + idptr = &id; + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ROOM_NUM, (void**)&idptr, OPAL_INT)) { + /* pack the sender's index to the tracking object */ + opal_dss.pack(reply, idptr, 1, OPAL_INT); + } + + /* if there was a problem, we need to send the requestor more info about what happened */ + if (0 < ret) { + opal_dss.pack(reply, &jdata->state, 1, ORTE_JOB_STATE_T); + opal_dss.pack(reply, &pptr, 1, ORTE_PROC); + opal_dss.pack(reply, &pptr->node, 1, ORTE_NODE); + } + + orte_rml.send_buffer_nb(&jdata->originator, reply, ORTE_RML_TAG_NOTIFY_COMPLETE, send_callback, jdata); /* we cannot cleanup the job object as we might * hit an error during transmission, so clean it diff --git a/orte/tools/orte-submit/orte-submit.c b/orte/tools/orte-submit/orte-submit.c index c1cd57d160..4aabbf0aa0 100644 --- a/orte/tools/orte-submit/orte-submit.c +++ b/orte/tools/orte-submit/orte-submit.c @@ -14,7 +14,7 @@ * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -30,698 +30,141 @@ #include #include #include -#ifdef HAVE_STRINGS_H -#include -#endif /* HAVE_STRINGS_H */ -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_PARAM_H -#include -#endif -#include -#include -#include -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_SYS_WAIT_H -#include -#endif /* HAVE_SYS_WAIT_H */ -#ifdef HAVE_SYS_TIME_H -#include -#endif /* HAVE_SYS_TIME_H */ -#include -#ifdef HAVE_SYS_STAT_H -#include -#endif #include "opal/dss/dss.h" #include "opal/mca/event/event.h" -#include "opal/mca/installdirs/installdirs.h" -#include "opal/mca/hwloc/base/base.h" -#include "opal/mca/base/base.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/util/basename.h" -#include "opal/util/cmd_line.h" -#include "opal/util/opal_environ.h" -#include "opal/util/opal_getcwd.h" -#include "opal/util/show_help.h" -#include "opal/util/fd.h" -#include "opal/sys/atomic.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/runtime/opal_cr.h" -#endif -#include "opal/version.h" -#include "opal/runtime/opal.h" -#include "opal/runtime/opal_info_support.h" -#include "opal/util/os_path.h" -#include "opal/util/path.h" -#include "opal/class/opal_pointer_array.h" -#include "opal/dss/dss.h" - -#include "orte/mca/odls/odls_types.h" -#include "orte/mca/plm/plm.h" -#include "orte/mca/rmaps/rmaps_types.h" -#include "orte/mca/rmaps/base/base.h" - -#include "orte/mca/schizo/schizo.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/base/rml_contact.h" -#include "orte/mca/routed/routed.h" - -#include "orte/runtime/runtime.h" +#include "orte/orted/orted_submit.h" #include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_wait.h" -#include "orte/runtime/orte_quit.h" #include "orte/util/show_help.h" /* * Globals */ -static char **global_mca_env = NULL; -static orte_std_cntr_t total_num_apps = 0; -static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT; -volatile bool mywait = true; -volatile bool myspawn = true; +typedef struct { + int status; + volatile bool active; + orte_job_t *jdata; +} orte_submit_status_t; + +static void launched(int index, orte_job_t *jdata, int ret, void *cbdata); +static void completed(int index, orte_job_t *jdata, int ret, void *cbdata); -/* - * Globals - */ -static struct { - bool help; - bool version; - char *report_pid; - char *stdin_target; - bool index_argv; - bool preload_binaries; - char *preload_files; - char *appfile; - int num_procs; - char *hnp; - char *wdir; - bool set_cwd_to_session_dir; - char *path; - bool enable_recovery; - char *personality; - char *prefix; - bool terminate; - bool nolocal; - bool no_oversubscribe; - bool oversubscribe; - int cpus_per_proc; - bool pernode; - int npernode; - bool use_hwthreads_as_cpus; - int npersocket; - char *mapping_policy; - char *ranking_policy; - char *binding_policy; - bool report_bindings; - char *slot_list; - bool debug; - bool run_as_root; -} myglobals; static opal_cmd_line_init_t cmd_line_init[] = { - /* Various "obvious" options */ - { NULL, 'h', NULL, "help", 0, - &myglobals.help, OPAL_CMD_LINE_TYPE_BOOL, - "This help message" }, - { NULL, 'V', NULL, "version", 0, - &myglobals.version, OPAL_CMD_LINE_TYPE_BOOL, - "Print version and exit" }, + { "orte_execute_quiet", 'q', NULL, "quiet", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Suppress helpful messages" }, { NULL, '\0', "report-pid", "report-pid", 1, - &myglobals.report_pid, OPAL_CMD_LINE_TYPE_STRING, + &orte_cmd_line.report_pid, OPAL_CMD_LINE_TYPE_STRING, "Printout pid on stdout [-], stderr [+], or a file [anything else]" }, + { NULL, '\0', "report-uri", "report-uri", 1, + &orte_cmd_line.report_uri, OPAL_CMD_LINE_TYPE_STRING, + "Printout URI on stdout [-], stderr [+], or a file [anything else]" }, - /* select stdin option */ - { NULL, '\0', "stdin", "stdin", 1, - &myglobals.stdin_target, OPAL_CMD_LINE_TYPE_STRING, - "Specify procs to receive stdin [rank, all, none] (default: 0, indicating rank 0)" }, + /* exit status reporting */ + { "orte_report_child_jobs_separately", '\0', "report-child-jobs-separately", "report-child-jobs-separately", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Return the exit status of the primary job only" }, - /* request that argv[0] be indexed */ - { NULL, '\0', "index-argv-by-rank", "index-argv-by-rank", 0, - &myglobals.index_argv, OPAL_CMD_LINE_TYPE_BOOL, - "Uniquely index argv[0] for each process using its rank" }, + /* select XML output */ + { "orte_xml_output", '\0', "xml", "xml", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Provide all output in XML format" }, + { "orte_xml_file", '\0', "xml-file", "xml-file", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Provide all output in XML format to the specified file" }, - /* Preload the binary on the remote machine */ - { NULL, 's', NULL, "preload-binary", 0, - &myglobals.preload_binaries, OPAL_CMD_LINE_TYPE_BOOL, - "Preload the binary on the remote machine before starting the remote process." }, - - /* Preload files on the remote machine */ - { NULL, '\0', NULL, "preload-files", 1, - &myglobals.preload_files, OPAL_CMD_LINE_TYPE_STRING, - "Preload the comma separated list of files to the remote machines current working directory before starting the remote process." }, - - /* Use an appfile */ - { NULL, '\0', NULL, "app", 1, - &myglobals.appfile, OPAL_CMD_LINE_TYPE_STRING, - "Provide an appfile; ignore all other command line options" }, - - /* Number of processes; -c, -n, --n, -np, and --np are all - synonyms */ - { NULL, 'c', "np", "np", 1, - &myglobals.num_procs, OPAL_CMD_LINE_TYPE_INT, - "Number of processes to run" }, - { NULL, '\0', "n", "n", 1, - &myglobals.num_procs, OPAL_CMD_LINE_TYPE_INT, - "Number of processes to run" }, - - /* uri of the dvm, or at least where to get it */ - { NULL, '\0', "hnp", "hnp", 1, - &myglobals.hnp, OPAL_CMD_LINE_TYPE_STRING, - "Specify the URI of the Open MPI server, or the name of the file (specified as file:filename) that contains that info" }, + /* tag output */ + { "orte_tag_output", '\0', "tag-output", "tag-output", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Tag all output with [job,rank]" }, + { "orte_timestamp_output", '\0', "timestamp-output", "timestamp-output", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Timestamp all application process output" }, + { "orte_output_filename", '\0', "output-filename", "output-filename", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Redirect output from application processes into filename.rank" }, + { "orte_xterm", '\0', "xterm", "xterm", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Create a new xterm window and display output from the specified ranks there" }, /* tell the dvm to terminate */ { NULL, '\0', "terminate", "terminate", 0, - &myglobals.terminate, OPAL_CMD_LINE_TYPE_BOOL, + &orte_cmd_line.terminate_dvm, OPAL_CMD_LINE_TYPE_BOOL, "Terminate the DVM" }, - - /* Export environment variables; potentially used multiple times, - so it does not make sense to set into a variable */ - { NULL, 'x', NULL, NULL, 1, - NULL, OPAL_CMD_LINE_TYPE_NULL, - "Export an environment variable, optionally specifying a value (e.g., \"-x foo\" exports the environment variable foo and takes its value from the current environment; \"-x foo=bar\" exports the environment variable name foo and sets its value to \"bar\" in the started processes)" }, - - /* Mapping controls */ - { NULL, 'H', "host", "host", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "List of hosts to invoke processes on" }, - { NULL, '\0', "nolocal", "nolocal", 0, - &myglobals.nolocal, OPAL_CMD_LINE_TYPE_BOOL, - "Do not run any MPI applications on the local node" }, - { NULL, '\0', "nooversubscribe", "nooversubscribe", 0, - &myglobals.no_oversubscribe, OPAL_CMD_LINE_TYPE_BOOL, - "Nodes are not to be oversubscribed, even if the system supports such operation"}, - { NULL, '\0', "oversubscribe", "oversubscribe", 0, - &myglobals.oversubscribe, OPAL_CMD_LINE_TYPE_BOOL, - "Nodes are allowed to be oversubscribed, even on a managed system, and overloading of processing elements"}, - { NULL, '\0', "cpus-per-proc", "cpus-per-proc", 1, - &myglobals.cpus_per_proc, OPAL_CMD_LINE_TYPE_INT, - "Number of cpus to use for each process [default=1]" }, - - /* Nperxxx options that do not require topology and are always - * available - included for backwards compatibility - */ - { NULL, '\0', "pernode", "pernode", 0, - &myglobals.pernode, OPAL_CMD_LINE_TYPE_BOOL, - "Launch one process per available node" }, - { NULL, '\0', "npernode", "npernode", 1, - &myglobals.npernode, OPAL_CMD_LINE_TYPE_INT, - "Launch n processes per node on all allocated nodes" }, - { NULL, '\0', "N", NULL, 1, - &myglobals.npernode, OPAL_CMD_LINE_TYPE_INT, - "Launch n processes per node on all allocated nodes (synonym for npernode)" }, - - /* declare hardware threads as independent cpus */ - { NULL, '\0', "use-hwthread-cpus", "use-hwthread-cpus", 0, - &myglobals.use_hwthreads_as_cpus, OPAL_CMD_LINE_TYPE_BOOL, - "Use hardware threads as independent cpus" }, - - /* include npersocket for backwards compatibility */ - { NULL, '\0', "npersocket", "npersocket", 1, - &myglobals.npersocket, OPAL_CMD_LINE_TYPE_INT, - "Launch n processes per socket on all allocated nodes" }, - - /* Mapping options */ - { NULL, '\0', NULL, "map-by", 1, - &myglobals.mapping_policy, OPAL_CMD_LINE_TYPE_STRING, - "Mapping Policy [slot | hwthread | core | socket (default) | numa | board | node]" }, - - /* Ranking options */ - { NULL, '\0', NULL, "rank-by", 1, - &myglobals.ranking_policy, OPAL_CMD_LINE_TYPE_STRING, - "Ranking Policy [slot (default) | hwthread | core | socket | numa | board | node]" }, - - /* Binding options */ - { NULL, '\0', NULL, "bind-to", 1, - &myglobals.binding_policy, OPAL_CMD_LINE_TYPE_STRING, - "Policy for binding processes. Allowed values: none, hwthread, core, l1cache, l2cache, l3cache, socket, numa, board (\"none\" is the default when oversubscribed, \"core\" is the default when np<=2, and \"socket\" is the default when np>2). Allowed qualifiers: overload-allowed, if-supported" }, - - { NULL, '\0', "report-bindings", "report-bindings", 0, - &myglobals.report_bindings, OPAL_CMD_LINE_TYPE_BOOL, - "Whether to report process bindings to stderr" }, - - /* slot list option */ - { NULL, '\0', "slot-list", "slot-list", 1, - &myglobals.slot_list, OPAL_CMD_LINE_TYPE_STRING, - "List of processor IDs to bind processes to [default=NULL]"}, - - /* mpiexec-like arguments */ - { NULL, '\0', "wdir", "wdir", 1, - &myglobals.wdir, OPAL_CMD_LINE_TYPE_STRING, - "Set the working directory of the started processes" }, - { NULL, '\0', "wd", "wd", 1, - &myglobals.wdir, OPAL_CMD_LINE_TYPE_STRING, - "Synonym for --wdir" }, - { NULL, '\0', "set-cwd-to-session-dir", "set-cwd-to-session-dir", 0, - &myglobals.set_cwd_to_session_dir, OPAL_CMD_LINE_TYPE_BOOL, - "Set the working directory of the started processes to their session directory" }, - { NULL, '\0', "path", "path", 1, - &myglobals.path, OPAL_CMD_LINE_TYPE_STRING, - "PATH to be used to look for executables to start processes" }, - - { NULL, '\0', "enable-recovery", "enable-recovery", 0, - &myglobals.enable_recovery, OPAL_CMD_LINE_TYPE_BOOL, - "Enable recovery (resets all recovery options to on)" }, - - { NULL, '\0', "personality", "personality", 1, - &myglobals.personality, OPAL_CMD_LINE_TYPE_STRING, - "Programming model/language being used (default=\"ompi\")" }, - - { NULL, 'd', "debug-devel", "debug-devel", 0, - &myglobals.debug, OPAL_CMD_LINE_TYPE_BOOL, - "Enable debugging of OpenRTE" }, - - { NULL, '\0', "allow-run-as-root", "allow-run-as-root", 0, - &myglobals.run_as_root, OPAL_CMD_LINE_TYPE_BOOL, - "Allow execution as root (STRONGLY DISCOURAGED)" }, - /* End of list */ { NULL, '\0', NULL, NULL, 0, NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } }; -/* - * Local functions - */ -static int create_app(int argc, char* argv[], - orte_job_t *jdata, - orte_app_context_t **app, - bool *made_app, char ***app_env); -static int init_globals(void); -static int parse_globals(int argc, char* argv[], opal_cmd_line_t *cmd_line); -static int parse_locals(orte_job_t *jdata, int argc, char* argv[]); -static void set_classpath_jar_file(orte_app_context_t *app, int index, char *jarfile); -static int parse_appfile(orte_job_t *jdata, char *filename, char ***env); -static void orte_timeout_wakeup(int sd, short args, void *cbdata); -static void local_recv(int status, orte_process_name_t* sender, - opal_buffer_t *buffer, - orte_rml_tag_t tag, void *cbdata); -static void spawn_recv(int status, orte_process_name_t* sender, - opal_buffer_t *buffer, - orte_rml_tag_t tag, void *cbdata); - - int main(int argc, char *argv[]) { int rc; + orte_submit_status_t launchst, completest; opal_cmd_line_t cmd_line; - char *param; - orte_job_t *jdata=NULL; - opal_buffer_t *req; - orte_daemon_cmd_flag_t cmd = ORTE_DAEMON_SPAWN_JOB_CMD; - - /* Setup and parse the command line */ - memset(&myglobals, 0, sizeof(myglobals)); - /* find our basename (the name of the executable) so that we can - use it in pretty-print error messages */ - orte_basename = opal_basename(argv[0]); + orte_cmd_line.terminate_dvm = NULL; + /* setup our cmd line */ opal_cmd_line_create(&cmd_line, cmd_line_init); mca_base_cmd_line_setup(&cmd_line); - if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(&cmd_line, true, - argc, argv)) ) { - if (OPAL_ERR_SILENT != rc) { - fprintf(stderr, "%s: command line error (%s)\n", argv[0], - opal_strerror(rc)); - } - return rc; + + /* initialize the RTE */ + if (ORTE_SUCCESS != (rc = orte_submit_init(argc, argv, &cmd_line))) { + fprintf(stderr, "Init failed due to duplicate command options\n"); + exit(rc); } - /* print version if requested. Do this before check for help so - that --version --help works as one might expect. */ - if (myglobals.version) { - char *str; - str = opal_info_make_version_str("all", - OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION, - OPAL_GREEK_VERSION, - OPAL_REPO_REV); - if (NULL != str) { - fprintf(stdout, "%s %s\n\nReport bugs to %s\n", - orte_basename, str, PACKAGE_BUGREPORT); - free(str); - } - exit(0); - } - - /* check if we are running as root - if we are, then only allow - * us to proceed if the allow-run-as-root flag was given. Otherwise, - * exit with a giant warning flag - */ - if (0 == geteuid() && !myglobals.run_as_root) { - fprintf(stderr, "--------------------------------------------------------------------------\n"); - if (myglobals.help) { - fprintf(stderr, "%s cannot provide the help message when run as root\n", orte_basename); - } else { - /* show_help is not yet available, so print an error manually */ - fprintf(stderr, "%s has detected an attempt to run as root.\n", orte_basename); - } - fprintf(stderr, " This is *strongly* discouraged as any mistake (e.g., in defining TMPDIR) or bug can\n"); - fprintf(stderr, "result in catastrophic damage to the OS file system, leaving\n"); - fprintf(stderr, "your system in an unusable state.\n\n"); - fprintf(stderr, "You can override this protection by adding the --allow-run-as-root\n"); - fprintf(stderr, "option to your cmd line. However, we reiterate our strong advice\n"); - fprintf(stderr, "against doing so - please do so at your own risk.\n"); - fprintf(stderr, "--------------------------------------------------------------------------\n"); - exit(1); - } - - /* - * Since this process can now handle MCA/GMCA parameters, make sure to - * process them. - */ - if (OPAL_SUCCESS != mca_base_cmd_line_process_args(&cmd_line, &environ, &environ)) { - exit(1); - } - - /* if they didn't point us at an HNP, that's an error */ - if (NULL == myglobals.hnp) { - fprintf(stderr, "orte-submit: required option --hnp not provided\n"); - exit(1); - } - - /* Ensure that enough of OPAL is setup for us to be able to run */ - /* - * NOTE: (JJH) - * We need to allow 'mca_base_cmd_line_process_args()' to process command - * line arguments *before* calling opal_init_util() since the command - * line could contain MCA parameters that affect the way opal_init_util() - * functions. AMCA parameters are one such option normally received on the - * command line that affect the way opal_init_util() behaves. - * It is "safe" to call mca_base_cmd_line_process_args() before - * opal_init_util() since mca_base_cmd_line_process_args() does *not* - * depend upon opal_init_util() functionality. - */ - /* Need to initialize OPAL so that install_dirs are filled in */ - if (OPAL_SUCCESS != opal_init(&argc, &argv)) { - exit(1); - } - - /* Check for help request */ - if (myglobals.help) { - char *str, *args = NULL; - char *project_name = NULL; - - if (0 == strcmp(orte_basename, "mpirun")) { - project_name = "Open MPI"; - } else { - project_name = "OpenRTE"; - } - args = opal_cmd_line_get_usage_msg(&cmd_line); - str = opal_show_help_string("help-orterun.txt", "orterun:usage", false, - orte_basename, project_name, OPAL_VERSION, - orte_basename, args, - PACKAGE_BUGREPORT); - if (NULL != str) { - printf("%s", str); - free(str); - } - free(args); - - /* If someone asks for help, that should be all we do */ - exit(0); - } - - /* Check for some "global" command line params */ - parse_globals(argc, argv, &cmd_line); - OBJ_DESTRUCT(&cmd_line); - - if (0 == strncasecmp(myglobals.hnp, "file", strlen("file"))) { - char input[1024], *filename; - FILE *fp; - - /* it is a file - get the filename */ - filename = strchr(myglobals.hnp, ':'); - if (NULL == filename) { - /* filename is not correctly formatted */ - orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "uri", myglobals.hnp); - exit(1); - } - ++filename; /* space past the : */ - - if (0 >= strlen(filename)) { - /* they forgot to give us the name! */ - orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "uri", myglobals.hnp); - exit(1); - } - - /* open the file and extract the uri */ - fp = fopen(filename, "r"); - if (NULL == fp) { /* can't find or read file! */ - orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-access", true, myglobals.hnp); - exit(1); - } - if (NULL == fgets(input, 1024, fp)) { - /* something malformed about file */ - fclose(fp); - orte_show_help("help-orte-top.txt", "orte-top:hnp-file-bad", true, myglobals.hnp); - exit(1); - } - fclose(fp); - input[strlen(input)-1] = '\0'; /* remove newline */ - /* construct the target hnp info */ - opal_setenv("OMPI_MCA_orte_hnp_uri", input, true, &environ); - } else { - /* should just be the uri itself - construct the target hnp info */ - opal_setenv("OMPI_MCA_orte_hnp_uri", myglobals.hnp, true, &environ); - } - - /* Setup MCA params */ - orte_register_params(); - - /* flag that I am a TOOL */ - orte_process_info.proc_type = ORTE_PROC_TOOL; - - /* we are never allowed to operate as a distributed tool, - * so insist on the ess/tool component */ - opal_setenv("OMPI_MCA_ess", "tool", true, &environ); - - if (myglobals.debug) { - orte_devel_level_output = true; - } - - /* Intialize our Open RTE environment - * Set the flag telling orte_init that I am NOT a - * singleton, but am "infrastructure" - prevents setting - * up incorrect infrastructure that only a singleton would - * require - */ - if (ORTE_SUCCESS != (rc = orte_init(&argc, &argv, ORTE_PROC_TOOL))) { - /* cannot call ORTE_ERROR_LOG as it could be the errmgr - * never got loaded! - */ - return rc; - } - /* finalize OPAL. As it was opened again from orte_init->opal_init - * we continue to have a reference count on it. So we have to finalize it twice... - */ - opal_finalize(); - - /* clear the ess param from the environment so our children - * don't pick it up */ - opal_unsetenv("OMPI_MCA_ess", &environ); - - /* set the info in our contact table */ - orte_rml.set_contact_info(orte_process_info.my_hnp_uri); - /* extract the name */ - if (ORTE_SUCCESS != orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, ORTE_PROC_MY_HNP, NULL)) { - orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri); - exit(1); - } - /* set the route to be direct */ - if (ORTE_SUCCESS != orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) { - orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri); - orte_finalize(); - exit(1); - } - - /* set the target hnp as our lifeline so we will terminate if it exits */ - orte_routed.set_lifeline(ORTE_PROC_MY_HNP); - - /* setup to listen for HNP response to my commands */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_TOOL, - ORTE_RML_PERSISTENT, local_recv, NULL); - - /* set a timeout event in case the HNP doesn't answer */ - /* if this is the terminate command, just send it */ - if (myglobals.terminate) { - opal_buffer_t *buf; - orte_daemon_cmd_flag_t cmd = ORTE_DAEMON_HALT_DVM_CMD; - buf = OBJ_NEW(opal_buffer_t); - opal_dss.pack(buf, &cmd, 1, ORTE_DAEMON_CMD); - orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, - ORTE_RML_TAG_DAEMON, - orte_rml_send_callback, NULL); - goto waiting; + if (orte_cmd_line.terminate_dvm) { + rc = orte_submit_halt(); + /* just loop the event library - the errmgr + * will exit us when the connection to our + * HNP closes */ + while (1) { + opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); + } } - /* default our personality to OMPI */ - if (NULL == myglobals.personality) { - myglobals.personality = strdup("ompi"); - } - - /* create a new job object to hold the info for this one - the - * jobid field will be filled in by the PLM when the job is - * launched - */ - jdata = OBJ_NEW(orte_job_t); - if (NULL == jdata) { - /* cannot call ORTE_ERROR_LOG as the errmgr - * hasn't been loaded yet! - */ - return ORTE_ERR_OUT_OF_RESOURCE; - } - jdata->personality = strdup(myglobals.personality); - - /* check what user wants us to do with stdin */ - if (NULL != myglobals.stdin_target) { - if (0 == strcmp(myglobals.stdin_target, "all")) { - jdata->stdin_target = ORTE_VPID_WILDCARD; - } else if (0 == strcmp(myglobals.stdin_target, "none")) { - jdata->stdin_target = ORTE_VPID_INVALID; - } else { - jdata->stdin_target = strtoul(myglobals.stdin_target, NULL, 10); + /* launch whatever job we were given */ + memset(&launchst, 0, sizeof(launchst)); + memset(&completest, 0, sizeof(completest)); + launchst.active = true; + completest.active = true; + if (ORTE_SUCCESS != (rc = orte_submit_job(argv, NULL, + launched, &launchst, + completed, &completest))) { + if (ORTE_ERR_OP_IN_PROGRESS == rc) { + /* terminate command was given */ + goto waiting; } + opal_output(0, "JOB FAILED TO LAUNCH WITH ERROR %d:%s", + rc, ORTE_ERROR_NAME(rc)); + goto DONE; } - /* if we want the argv's indexed, indicate that */ - if (myglobals.index_argv) { - orte_set_attribute(&jdata->attributes, ORTE_JOB_INDEX_ARGV, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - } - - /* Parse each app, adding it to the job object */ - parse_locals(jdata, argc, argv); - - /* create the map object to communicate policies */ - jdata->map = OBJ_NEW(orte_job_map_t); - - if (NULL != myglobals.mapping_policy) { - if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&jdata->map->mapping, NULL, myglobals.mapping_policy))) { - ORTE_ERROR_LOG(rc); - exit(rc); - } - } else if (myglobals.pernode) { - ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_PPR); - ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_GIVEN); - /* define the ppr */ - jdata->map->ppr = strdup("1:node"); - } else if (0 < myglobals.npernode) { - ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_PPR); - ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_GIVEN); - /* define the ppr */ - (void)asprintf(&jdata->map->ppr, "%d:node", myglobals.npernode); - } - if (NULL != myglobals.ranking_policy) { - if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_ranking_policy(&jdata->map->ranking, - jdata->map->mapping, - myglobals.ranking_policy))) { - ORTE_ERROR_LOG(rc); - exit(rc); - } - } - if (NULL != myglobals.binding_policy) { - if (ORTE_SUCCESS != (rc = opal_hwloc_base_set_binding_policy(&jdata->map->binding, - myglobals.binding_policy))) { - ORTE_ERROR_LOG(rc); - exit(rc); - } - } - - /* if they asked for nolocal, mark it so */ - if (myglobals.nolocal) { - ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL); - } - if (myglobals.no_oversubscribe) { - ORTE_UNSET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); - } - if (myglobals.oversubscribe) { - ORTE_UNSET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); - } - if (myglobals.report_bindings) { - orte_set_attribute(&jdata->attributes, ORTE_JOB_REPORT_BINDINGS, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - } - if (myglobals.slot_list) { - orte_set_attribute(&jdata->attributes, ORTE_JOB_SLOT_LIST, ORTE_ATTR_GLOBAL, myglobals.slot_list, OPAL_STRING); - } - if (NULL == myglobals.personality) { - /* default to ompi */ - jdata->personality = strdup("ompi"); - } else { - jdata->personality = strdup(myglobals.personality); - } - - if (0 == jdata->num_apps) { - /* This should never happen -- this case should be caught in - create_app(), but let's just double check... */ - orte_show_help("help-orterun.txt", "orterun:nothing-to-do", - true, orte_basename); - exit(ORTE_ERROR_DEFAULT_EXIT_CODE); - } - - /* check for a job timeout specification, to be provided in seconds - * as that is what MPICH used - */ - if (NULL != (param = getenv("MPIEXEC_TIMEOUT"))) { - if (NULL == (orte_mpiexec_timeout = OBJ_NEW(orte_timer_t))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_OUT_OF_RESOURCE); - goto DONE; - } - orte_mpiexec_timeout->tv.tv_sec = strtol(param, NULL, 10); - orte_mpiexec_timeout->tv.tv_usec = 0; - opal_event_evtimer_set(orte_event_base, orte_mpiexec_timeout->ev, - orte_timeout_wakeup, jdata); - opal_event_set_priority(orte_mpiexec_timeout->ev, ORTE_ERROR_PRI); - opal_event_evtimer_add(orte_mpiexec_timeout->ev, &orte_mpiexec_timeout->tv); - } - - /* if recovery was disabled on the cmd line, do so */ - if (myglobals.enable_recovery) { - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_RECOVERABLE); - } - - /* ask the HNP to spawn the job for us */ - // post recv on tag_confirm_spawn, pass jdata as cbdata - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_CONFIRM_SPAWN, - ORTE_RML_PERSISTENT, spawn_recv, jdata); - // pack the ORTE_DAEMON_SPAWN_JOB_CMD command and job object and send to HNP at tag ORTE_RML_TAG_DAEMON - req = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(req, &cmd, 1, ORTE_DAEMON_CMD))) { - ORTE_ERROR_LOG(rc); - exit(rc); - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(req, &jdata, 1, ORTE_JOB))) { - ORTE_ERROR_LOG(rc); - exit(rc); - } - orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL); - // wait for response and unpack the status, jobid - while (myspawn) { + while (launchst.active) { opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); } if (orte_debug_flag) { - opal_output(0, "Job %s has launched", ORTE_JOBID_PRINT(jdata->jobid)); + opal_output(0, "Job %s has launched", ORTE_JOBID_PRINT(launchst.jdata->jobid)); + } + if (ORTE_SUCCESS != launchst.status) { + goto DONE; } - waiting: - while (mywait) { + waiting: + while (completest.active) { opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); } - DONE: + DONE: /* cleanup and leave */ - orte_finalize(); + orte_submit_finalize(); if (orte_debug_flag) { fprintf(stderr, "exiting with status %d\n", orte_exit_status); @@ -729,869 +172,21 @@ int main(int argc, char *argv[]) exit(orte_exit_status); } -static int init_globals(void) +static void launched(int index, orte_job_t *jdata, int ret, void *cbdata) { - /* Reset the other fields every time */ - myglobals.help = false; - myglobals.version = false; - myglobals.num_procs = 0; - if (NULL != myglobals.appfile) { - free(myglobals.appfile); - } - myglobals.appfile = NULL; - if (NULL != myglobals.wdir) { - free(myglobals.wdir); - } - myglobals.set_cwd_to_session_dir = false; - myglobals.wdir = NULL; - if (NULL != myglobals.path) { - free(myglobals.path); - } - myglobals.path = NULL; - - myglobals.preload_binaries = false; - myglobals.preload_files = NULL; - - /* All done */ - return ORTE_SUCCESS; -} - - -static int parse_globals(int argc, char* argv[], opal_cmd_line_t *cmd_line) -{ - /* check for request to report pid */ - if (NULL != myglobals.report_pid) { - FILE *fp; - if (0 == strcmp(myglobals.report_pid, "-")) { - /* if '-', then output to stdout */ - printf("%d\n", (int)getpid()); - } else if (0 == strcmp(myglobals.report_pid, "+")) { - /* if '+', output to stderr */ - fprintf(stderr, "%d\n", (int)getpid()); - } else { - fp = fopen(myglobals.report_pid, "w"); - if (NULL == fp) { - orte_show_help("help-orterun.txt", "orterun:write_file", false, - orte_basename, "pid", myglobals.report_pid); - exit(0); - } - fprintf(fp, "%d\n", (int)getpid()); - fclose(fp); - } - } - - return ORTE_SUCCESS; -} - - -static int parse_locals(orte_job_t *jdata, int argc, char* argv[]) -{ - int i, rc, app_num; - int temp_argc; - char **temp_argv, **env; - orte_app_context_t *app; - bool made_app; - orte_std_cntr_t j, size1; - - /* Make the apps */ - temp_argc = 0; - temp_argv = NULL; - opal_argv_append(&temp_argc, &temp_argv, argv[0]); - - /* NOTE: This bogus env variable is necessary in the calls to - create_app(), below. See comment immediately before the - create_app() function for an explanation. */ - - env = NULL; - for (app_num = 0, i = 1; i < argc; ++i) { - if (0 == strcmp(argv[i], ":")) { - /* Make an app with this argv */ - if (opal_argv_count(temp_argv) > 1) { - if (NULL != env) { - opal_argv_free(env); - env = NULL; - } - app = NULL; - rc = create_app(temp_argc, temp_argv, jdata, &app, &made_app, &env); - /** keep track of the number of apps - point this app_context to that index */ - if (ORTE_SUCCESS != rc) { - /* Assume that the error message has already been - printed; no need to cleanup -- we can just - exit */ - exit(1); - } - if (made_app) { - app->idx = app_num; - ++app_num; - opal_pointer_array_add(jdata->apps, app); - ++jdata->num_apps; - } - - /* Reset the temps */ - - temp_argc = 0; - temp_argv = NULL; - opal_argv_append(&temp_argc, &temp_argv, argv[0]); - } - } else { - opal_argv_append(&temp_argc, &temp_argv, argv[i]); - } - } - - if (opal_argv_count(temp_argv) > 1) { - app = NULL; - rc = create_app(temp_argc, temp_argv, jdata, &app, &made_app, &env); - if (ORTE_SUCCESS != rc) { - /* Assume that the error message has already been printed; - no need to cleanup -- we can just exit */ - exit(1); - } - if (made_app) { - app->idx = app_num; - ++app_num; - opal_pointer_array_add(jdata->apps, app); - ++jdata->num_apps; - } - } - if (NULL != env) { - opal_argv_free(env); - } - opal_argv_free(temp_argv); - - /* Once we've created all the apps, add the global MCA params to - each app's environment (checking for duplicates, of - course -- yay opal_environ_merge()). */ - - if (NULL != global_mca_env) { - size1 = (size_t)opal_pointer_array_get_size(jdata->apps); - /* Iterate through all the apps */ - for (j = 0; j < size1; ++j) { - app = (orte_app_context_t *) - opal_pointer_array_get_item(jdata->apps, j); - if (NULL != app) { - /* Use handy utility function */ - env = opal_environ_merge(global_mca_env, app->env); - opal_argv_free(app->env); - app->env = env; - } - } - } - - /* Now take a subset of the MCA params and set them as MCA - overrides here in orterun (so that when we orte_init() later, - all the components see these MCA params). Here's how we decide - which subset of the MCA params we set here in orterun: - - 1. If any global MCA params were set, use those - 2. If no global MCA params were set and there was only one app, - then use its app MCA params - 3. Otherwise, don't set any - */ - - env = NULL; - if (NULL != global_mca_env) { - env = global_mca_env; - } else { - if (opal_pointer_array_get_size(jdata->apps) >= 1) { - /* Remember that pointer_array's can be padded with NULL - entries; so only use the app's env if there is exactly - 1 non-NULL entry */ - app = (orte_app_context_t *) - opal_pointer_array_get_item(jdata->apps, 0); - if (NULL != app) { - env = app->env; - for (j = 1; j < opal_pointer_array_get_size(jdata->apps); ++j) { - if (NULL != opal_pointer_array_get_item(jdata->apps, j)) { - env = NULL; - break; - } - } - } - } - } - - if (NULL != env) { - size1 = opal_argv_count(env); - for (j = 0; j < size1; ++j) { - /* Use-after-Free error possible here. putenv does not copy - * the string passed to it, and instead stores only the pointer. - * env[j] may be freed later, in which case the pointer - * in environ will now be left dangling into a deallocated - * region. - * So we make a copy of the variable. - */ - char *s = strdup(env[j]); - - if (NULL == s) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - putenv(s); - } - } - - /* All done */ - - return ORTE_SUCCESS; -} - - -/* - * This function takes a "char ***app_env" parameter to handle the - * specific case: - * - * orterun --mca foo bar -app appfile - * - * That is, we'll need to keep foo=bar, but the presence of the app - * file will cause an invocation of parse_appfile(), which will cause - * one or more recursive calls back to create_app(). Since the - * foo=bar value applies globally to all apps in the appfile, we need - * to pass in the "base" environment (that contains the foo=bar value) - * when we parse each line in the appfile. - * - * This is really just a special case -- when we have a simple case like: - * - * orterun --mca foo bar -np 4 hostname - * - * Then the upper-level function (parse_locals()) calls create_app() - * with a NULL value for app_env, meaning that there is no "base" - * environment that the app needs to be created from. - */ -static int create_app(int argc, char* argv[], - orte_job_t *jdata, - orte_app_context_t **app_ptr, - bool *made_app, char ***app_env) -{ - opal_cmd_line_t cmd_line; - char cwd[OPAL_PATH_MAX]; - int i, j, count, rc; - char *param, *value; - orte_app_context_t *app = NULL; - bool cmd_line_made = false; - bool found = false; - char *appname; - - *made_app = false; - - /* Pre-process the command line if we are going to parse an appfile later. - * save any mca command line args so they can be passed - * separately to the daemons. - * Use Case: - * $ cat launch.appfile - * -np 1 -mca aaa bbb ./my-app -mca ccc ddd - * -np 1 -mca aaa bbb ./my-app -mca eee fff - * $ mpirun -np 2 -mca foo bar --app launch.appfile - * Only pick up '-mca foo bar' on this pass. - */ - if (NULL != myglobals.appfile) { - if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(myglobals.personality, argc, 0, argv))) { - goto cleanup; - } - } - - /* Parse application command line options. */ - - init_globals(); - opal_cmd_line_create(&cmd_line, cmd_line_init); - mca_base_cmd_line_setup(&cmd_line); - cmd_line_made = true; - rc = opal_cmd_line_parse(&cmd_line, true, argc, argv); - if (ORTE_SUCCESS != rc) { - goto cleanup; - } - mca_base_cmd_line_process_args(&cmd_line, app_env, &global_mca_env); - - /* Is there an appfile in here? */ - - if (NULL != myglobals.appfile) { - OBJ_DESTRUCT(&cmd_line); - return parse_appfile(jdata, strdup(myglobals.appfile), app_env); - } - - /* Setup application context */ - - app = OBJ_NEW(orte_app_context_t); - opal_cmd_line_get_tail(&cmd_line, &count, &app->argv); - - /* See if we have anything left */ - - if (0 == count) { - orte_show_help("help-orterun.txt", "orterun:executable-not-specified", - true, orte_basename, orte_basename); - rc = ORTE_ERR_NOT_FOUND; - goto cleanup; - } - - /* - * Get mca parameters so we can pass them to the daemons. - * Use the count determined above to make sure we do not go past - * the executable name. Example: - * mpirun -np 2 -mca foo bar ./my-app -mca bip bop - * We want to pick up '-mca foo bar' but not '-mca bip bop' - */ - if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(myglobals.personality, - argc, count, argv))) { - goto cleanup; - } - - /* Grab all OMPI_* environment variables */ - - app->env = opal_argv_copy(*app_env); - if (ORTE_SUCCESS != (rc = orte_schizo.parse_env(myglobals.personality, - myglobals.path, - &cmd_line, - environ, &app->env))) { - goto cleanup; - } - - - /* Did the user request a specific wdir? */ - - if (NULL != myglobals.wdir) { - /* if this is a relative path, convert it to an absolute path */ - if (opal_path_is_absolute(myglobals.wdir)) { - app->cwd = strdup(myglobals.wdir); - } else { - /* get the cwd */ - if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) { - orte_show_help("help-orterun.txt", "orterun:init-failure", - true, "get the cwd", rc); - goto cleanup; - } - /* construct the absolute path */ - app->cwd = opal_os_path(false, cwd, myglobals.wdir, NULL); - } - orte_set_attribute(&app->attributes, ORTE_APP_USER_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - } else if (myglobals.set_cwd_to_session_dir) { - orte_set_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - orte_set_attribute(&app->attributes, ORTE_APP_USER_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - } else { - if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) { - orte_show_help("help-orterun.txt", "orterun:init-failure", - true, "get the cwd", rc); - goto cleanup; - } - app->cwd = strdup(cwd); - } - - /* if this is the first app_context, check for prefix directions. - * We only do this for the first app_context because the launchers - * only look at the first one when setting the prefix - we do NOT - * support per-app_context prefix settings! - */ - if (0 == total_num_apps) { - /* Check to see if the user explicitly wanted to disable automatic - --prefix behavior */ - - if (opal_cmd_line_is_taken(&cmd_line, "noprefix")) { - want_prefix_by_default = false; - } - - /* Did the user specify a prefix, or want prefix by default? */ - if (opal_cmd_line_is_taken(&cmd_line, "prefix") || want_prefix_by_default) { - size_t param_len; - /* if both the prefix was given and we have a prefix - * given above, check to see if they match - */ - if (opal_cmd_line_is_taken(&cmd_line, "prefix") && - NULL != myglobals.prefix) { - /* if they don't match, then that merits a warning */ - param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0)); - /* ensure we strip any trailing '/' */ - if (0 == strcmp(OPAL_PATH_SEP, &(param[strlen(param)-1]))) { - param[strlen(param)-1] = '\0'; - } - value = strdup(myglobals.prefix); - if (0 == strcmp(OPAL_PATH_SEP, &(value[strlen(value)-1]))) { - value[strlen(value)-1] = '\0'; - } - if (0 != strcmp(param, value)) { - orte_show_help("help-orterun.txt", "orterun:app-prefix-conflict", - true, orte_basename, value, param); - /* let the global-level prefix take precedence since we - * know that one is being used - */ - free(param); - param = strdup(myglobals.prefix); - } - free(value); - } else if (NULL != myglobals.prefix) { - param = strdup(myglobals.prefix); - } else if (opal_cmd_line_is_taken(&cmd_line, "prefix")){ - /* must be --prefix alone */ - param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0)); - } else { - /* --enable-orterun-prefix-default was given to orterun */ - param = strdup(opal_install_dirs.prefix); - } - - if (NULL != param) { - /* "Parse" the param, aka remove superfluous path_sep. */ - param_len = strlen(param); - while (0 == strcmp (OPAL_PATH_SEP, &(param[param_len-1]))) { - param[param_len-1] = '\0'; - param_len--; - if (0 == param_len) { - orte_show_help("help-orterun.txt", "orterun:empty-prefix", - true, orte_basename, orte_basename); - free(param); - return ORTE_ERR_FATAL; - } - } - orte_set_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, ORTE_ATTR_GLOBAL, param, OPAL_STRING); - free(param); - } - } - } - - /* Did the user specify a hostfile. Need to check for both - * hostfile and machine file. - * We can only deal with one hostfile per app context, otherwise give an error. - */ - if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "hostfile"))) { - if(1 < j) { - orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles", - true, orte_basename, NULL); - return ORTE_ERR_FATAL; - } else { - value = opal_cmd_line_get_param(&cmd_line, "hostfile", 0, 0); - orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_GLOBAL, value, OPAL_STRING); - } - } - if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "machinefile"))) { - if(1 < j || orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, NULL, OPAL_STRING)) { - orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles", - true, orte_basename, NULL); - return ORTE_ERR_FATAL; - } else { - value = opal_cmd_line_get_param(&cmd_line, "machinefile", 0, 0); - orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_GLOBAL, value, OPAL_STRING); - } - } - - /* Did the user specify any hosts? */ - if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "host"))) { - char **targ=NULL, *tval; - for (i = 0; i < j; ++i) { - value = opal_cmd_line_get_param(&cmd_line, "host", i, 0); - opal_argv_append_nosize(&targ, value); - } - tval = opal_argv_join(targ, ','); - orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_GLOBAL, tval, OPAL_STRING); - opal_argv_free(targ); - free(tval); - } else if (NULL != orte_default_dash_host) { - orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL, - orte_default_dash_host, OPAL_STRING); - } - - /* check for bozo error */ - if (0 > myglobals.num_procs) { - orte_show_help("help-orterun.txt", "orterun:negative-nprocs", - true, orte_basename, app->argv[0], - myglobals.num_procs, NULL); - return ORTE_ERR_FATAL; - } - - app->num_procs = (orte_std_cntr_t)myglobals.num_procs; - total_num_apps++; - - /* Capture any preload flags */ - if (myglobals.preload_binaries) { - orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_BIN, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - } - /* if we were told to cwd to the session dir and the app was given in - * relative syntax, then we need to preload the binary to - * find the app - don't do this for java apps, however, as we - * can't easily find the class on the cmd line. Java apps have to - * preload their binary via the preload_files option - */ - if (!opal_path_is_absolute(app->argv[0]) && - NULL == strstr(app->argv[0], "java")) { - if (myglobals.preload_binaries) { - orte_set_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - } else if (orte_get_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, NULL, OPAL_BOOL)) { - orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_BIN, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - } - } - if (NULL != myglobals.preload_files) { - orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_FILES, ORTE_ATTR_GLOBAL, - myglobals.preload_files, OPAL_STRING); - } - - /* Do not try to find argv[0] here -- the starter is responsible - for that because it may not be relevant to try to find it on - the node where orterun is executing. So just strdup() argv[0] - into app. */ - - app->app = strdup(app->argv[0]); - if (NULL == app->app) { - orte_show_help("help-orterun.txt", "orterun:call-failed", - true, orte_basename, "library", "strdup returned NULL", errno); - rc = ORTE_ERR_NOT_FOUND; - goto cleanup; - } - - /* if this is a Java application, we have a bit more work to do. Such - * applications actually need to be run under the Java virtual machine - * and the "java" command will start the "executable". So we need to ensure - * that all the proper java-specific paths are provided - */ - appname = opal_basename(app->app); - if (0 == strcmp(appname, "java")) { - /* see if we were given a library path */ - found = false; - for (i=1; NULL != app->argv[i]; i++) { - if (NULL != strstr(app->argv[i], "java.library.path")) { - /* yep - but does it include the path to the mpi libs? */ - found = true; - if (NULL == strstr(app->argv[i], opal_install_dirs.libdir)) { - /* doesn't appear to - add it to be safe */ - if (':' == app->argv[i][strlen(app->argv[i]-1)]) { - asprintf(&value, "-Djava.library.path=%s%s", app->argv[i], opal_install_dirs.libdir); - } else { - asprintf(&value, "-Djava.library.path=%s:%s", app->argv[i], opal_install_dirs.libdir); - } - free(app->argv[i]); - app->argv[i] = value; - } - break; - } - } - if (!found) { - /* need to add it right after the java command */ - asprintf(&value, "-Djava.library.path=%s", opal_install_dirs.libdir); - opal_argv_insert_element(&app->argv, 1, value); - free(value); - } - - /* see if we were given a class path */ - found = false; - for (i=1; NULL != app->argv[i]; i++) { - if (NULL != strstr(app->argv[i], "cp") || - NULL != strstr(app->argv[i], "classpath")) { - /* yep - but does it include the path to the mpi libs? */ - found = true; - /* check if mpi.jar exists - if so, add it */ - value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL); - if (access(value, F_OK ) != -1) { - set_classpath_jar_file(app, i+1, "mpi.jar"); - } - free(value); - /* check for oshmem support */ - value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL); - if (access(value, F_OK ) != -1) { - set_classpath_jar_file(app, i+1, "shmem.jar"); - } - free(value); - /* always add the local directory */ - asprintf(&value, "%s:%s", app->cwd, app->argv[i+1]); - free(app->argv[i+1]); - app->argv[i+1] = value; - break; - } - } - if (!found) { - /* check to see if CLASSPATH is in the environment */ - found = false; // just to be pedantic - for (i=0; NULL != environ[i]; i++) { - if (0 == strncmp(environ[i], "CLASSPATH", strlen("CLASSPATH"))) { - value = strchr(environ[i], '='); - ++value; /* step over the = */ - opal_argv_insert_element(&app->argv, 1, value); - /* check for mpi.jar */ - value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL); - if (access(value, F_OK ) != -1) { - set_classpath_jar_file(app, 1, "mpi.jar"); - } - free(value); - /* check for shmem.jar */ - value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL); - if (access(value, F_OK ) != -1) { - set_classpath_jar_file(app, 1, "shmem.jar"); - } - free(value); - /* always add the local directory */ - (void)asprintf(&value, "%s:%s", app->cwd, app->argv[1]); - free(app->argv[1]); - app->argv[1] = value; - opal_argv_insert_element(&app->argv, 1, "-cp"); - found = true; - break; - } - } - if (!found) { - /* need to add it right after the java command - have - * to include the working directory and trust that - * the user set cwd if necessary - */ - char *str, *str2; - /* always start with the working directory */ - str = strdup(app->cwd); - /* check for mpi.jar */ - value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL); - if (access(value, F_OK ) != -1) { - (void)asprintf(&str2, "%s:%s", str, value); - free(str); - str = str2; - } - free(value); - /* check for shmem.jar */ - value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL); - if (access(value, F_OK ) != -1) { - asprintf(&str2, "%s:%s", str, value); - free(str); - str = str2; - } - free(value); - opal_argv_insert_element(&app->argv, 1, str); - free(str); - opal_argv_insert_element(&app->argv, 1, "-cp"); - } - } - /* try to find the actual command - may not be perfect */ - for (i=1; i < opal_argv_count(app->argv); i++) { - if (NULL != strstr(app->argv[i], "java.library.path")) { - continue; - } else if (NULL != strstr(app->argv[i], "cp") || - NULL != strstr(app->argv[i], "classpath")) { - /* skip the next field */ - i++; - continue; - } - /* declare this the winner */ - opal_setenv("OMPI_COMMAND", app->argv[i], true, &app->env); - /* collect everything else as the cmd line */ - if ((i+1) < opal_argv_count(app->argv)) { - value = opal_argv_join(&app->argv[i+1], ' '); - opal_setenv("OMPI_ARGV", value, true, &app->env); - free(value); - } - break; - } - } else { - /* add the cmd to the environment for MPI_Info to pickup */ - opal_setenv("OMPI_COMMAND", appname, true, &app->env); - if (1 < opal_argv_count(app->argv)) { - value = opal_argv_join(&app->argv[1], ' '); - opal_setenv("OMPI_ARGV", value, true, &app->env); - free(value); - } - } - free(appname); - - *app_ptr = app; - app = NULL; - *made_app = true; - - /* All done */ - - cleanup: - if (NULL != app) { - OBJ_RELEASE(app); - } - if (cmd_line_made) { - OBJ_DESTRUCT(&cmd_line); - } - return rc; -} - -static void set_classpath_jar_file(orte_app_context_t *app, int index, char *jarfile) -{ - if (NULL == strstr(app->argv[index], jarfile)) { - /* nope - need to add it */ - char *fmt = ':' == app->argv[index][strlen(app->argv[index]-1)] - ? "%s%s/%s" : "%s:%s/%s"; - char *str; - asprintf(&str, fmt, app->argv[index], opal_install_dirs.libdir, jarfile); - free(app->argv[index]); - app->argv[index] = str; - } -} - -static int parse_appfile(orte_job_t *jdata, char *filename, char ***env) -{ - size_t i, len; - FILE *fp; - char line[BUFSIZ]; - int rc, argc, app_num; - char **argv; - orte_app_context_t *app; - bool blank, made_app; - char bogus[] = "bogus "; - char **tmp_env; - - /* - * Make sure to clear out this variable so we don't do anything odd in - * app_create() - */ - if (NULL != myglobals.appfile) { - free(myglobals.appfile); - myglobals.appfile = NULL; - } - - /* Try to open the file */ - - fp = fopen(filename, "r"); - if (NULL == fp) { - orte_show_help("help-orterun.txt", "orterun:appfile-not-found", true, - filename); - return ORTE_ERR_NOT_FOUND; - } - - /* Read in line by line */ - - line[sizeof(line) - 1] = '\0'; - app_num = 0; - do { - - /* We need a bogus argv[0] (because when argv comes in from - the command line, argv[0] is "orterun", so the parsing - logic ignores it). So create one here rather than making - an argv and then pre-pending a new argv[0] (which would be - rather inefficient). */ - - line[0] = '\0'; - strcat(line, bogus); - - if (NULL == fgets(line + sizeof(bogus) - 1, - sizeof(line) - sizeof(bogus) - 1, fp)) { - break; - } - - /* Remove a trailing newline */ - - len = strlen(line); - if (len > 0 && '\n' == line[len - 1]) { - line[len - 1] = '\0'; - if (len > 0) { - --len; - } - } - - /* Remove comments */ - - for (i = 0; i < len; ++i) { - if ('#' == line[i]) { - line[i] = '\0'; - break; - } else if (i + 1 < len && '/' == line[i] && '/' == line[i + 1]) { - line[i] = '\0'; - break; - } - } - - /* Is this a blank line? */ - - len = strlen(line); - for (blank = true, i = sizeof(bogus); i < len; ++i) { - if (!isspace(line[i])) { - blank = false; - break; - } - } - if (blank) { - continue; - } - - /* We got a line with *something* on it. So process it */ - - argv = opal_argv_split(line, ' '); - argc = opal_argv_count(argv); - if (argc > 0) { - - /* Create a temporary env to use in the recursive call -- - that is: don't disturb the original env so that we can - have a consistent global env. This allows for the - case: - - orterun --mca foo bar --appfile file - - where the "file" contains multiple apps. In this case, - each app in "file" will get *only* foo=bar as the base - environment from which its specific environment is - constructed. */ - - if (NULL != *env) { - tmp_env = opal_argv_copy(*env); - if (NULL == tmp_env) { - fclose(fp); - opal_argv_free(argv); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } else { - tmp_env = NULL; - } - - rc = create_app(argc, argv, jdata, &app, &made_app, &tmp_env); - if (ORTE_SUCCESS != rc) { - /* Assume that the error message has already been - printed; no need to cleanup -- we can just exit */ - exit(1); - } - if (NULL != tmp_env) { - opal_argv_free(tmp_env); - } - if (made_app) { - app->idx = app_num; - ++app_num; - opal_pointer_array_add(jdata->apps, app); - ++jdata->num_apps; - } - } - opal_argv_free(argv); - } while (!feof(fp)); - fclose(fp); - - /* All done */ - - free(filename); - - return ORTE_SUCCESS; -} - -void orte_timeout_wakeup(int sd, short args, void *cbdata) -{ - char *tm; - - /* this function gets called when the job execution time - * has hit a prescribed limit - so just abort - */ - tm = getenv("MPIEXEC_TIMEOUT"); - orte_show_help("help-orterun.txt", "orterun:timeout", - true, (NULL == tm) ? "NULL" : tm); - ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); - exit(orte_exit_status); -} - -static void local_recv(int status, orte_process_name_t* sender, - opal_buffer_t *buffer, - orte_rml_tag_t tag, void *cbdata) -{ - int rc, ret; - int32_t cnt; - - /* unpack the completion status of the job */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ret, &cnt, OPAL_INT))) { - ORTE_UPDATE_EXIT_STATUS(rc); - } - /* update our exit status to match */ + orte_submit_status_t *launchst = (orte_submit_status_t*)cbdata; + launchst->status = ret; ORTE_UPDATE_EXIT_STATUS(ret); - - exit(orte_exit_status); + OBJ_RETAIN(jdata); + launchst->jdata = jdata; + launchst->active = false; } - -static void spawn_recv(int status, orte_process_name_t* sender, - opal_buffer_t *buffer, - orte_rml_tag_t tag, void *cbdata) +static void completed(int index, orte_job_t *jdata, int ret, void *cbdata) { - orte_job_t *jdata = (orte_job_t*)cbdata; - int32_t cnt; - - // extract the returned jobid - cnt = 1; - opal_dss.unpack(buffer, &jdata->jobid, &cnt, ORTE_JOBID); - - // release the wait - myspawn = false; + orte_submit_status_t *completest = (orte_submit_status_t*)cbdata; + completest->status = ret; + ORTE_UPDATE_EXIT_STATUS(ret); + OBJ_RETAIN(jdata); + completest->jdata = jdata; + completest->active = false; } diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index fe58e6bb67..dab1f3a0a0 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -157,32 +157,27 @@ void* MPIR_Breakpoint(void) static char **global_mca_env = NULL; static orte_std_cntr_t total_num_apps = 0; static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT; - -/* - * Globals - */ -struct orterun_globals_t orterun_globals = {0}; static bool globals_init = false; static opal_cmd_line_init_t cmd_line_init[] = { /* Various "obvious" options */ { NULL, 'h', NULL, "help", 0, - &orterun_globals.help, OPAL_CMD_LINE_TYPE_BOOL, + &orte_cmd_line.help, OPAL_CMD_LINE_TYPE_BOOL, "This help message" }, { NULL, 'V', NULL, "version", 0, - &orterun_globals.version, OPAL_CMD_LINE_TYPE_BOOL, + &orte_cmd_line.version, OPAL_CMD_LINE_TYPE_BOOL, "Print version and exit" }, { NULL, 'v', NULL, "verbose", 0, - &orterun_globals.verbose, OPAL_CMD_LINE_TYPE_BOOL, + &orte_cmd_line.verbose, OPAL_CMD_LINE_TYPE_BOOL, "Be verbose" }, { "orte_execute_quiet", 'q', NULL, "quiet", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, "Suppress helpful messages" }, { NULL, '\0', "report-pid", "report-pid", 1, - &orterun_globals.report_pid, OPAL_CMD_LINE_TYPE_STRING, + &orte_cmd_line.report_pid, OPAL_CMD_LINE_TYPE_STRING, "Printout pid on stdout [-], stderr [+], or a file [anything else]" }, { NULL, '\0', "report-uri", "report-uri", 1, - &orterun_globals.report_uri, OPAL_CMD_LINE_TYPE_STRING, + &orte_cmd_line.report_uri, OPAL_CMD_LINE_TYPE_STRING, "Printout URI on stdout [-], stderr [+], or a file [anything else]" }, /* exit status reporting */ @@ -219,12 +214,12 @@ static opal_cmd_line_init_t cmd_line_init[] = { /* select stdin option */ { NULL, '\0', "stdin", "stdin", 1, - &orterun_globals.stdin_target, OPAL_CMD_LINE_TYPE_STRING, + &orte_cmd_line.stdin_target, OPAL_CMD_LINE_TYPE_STRING, "Specify procs to receive stdin [rank, all, none] (default: 0, indicating rank 0)" }, /* request that argv[0] be indexed */ { NULL, '\0', "index-argv-by-rank", "index-argv-by-rank", 0, - &orterun_globals.index_argv, OPAL_CMD_LINE_TYPE_BOOL, + &orte_cmd_line.index_argv, OPAL_CMD_LINE_TYPE_BOOL, "Uniquely index argv[0] for each process using its rank" }, /* Specify the launch agent to be used */ @@ -234,33 +229,33 @@ static opal_cmd_line_init_t cmd_line_init[] = { /* Preload the binary on the remote machine */ { NULL, 's', NULL, "preload-binary", 0, - &orterun_globals.preload_binaries, OPAL_CMD_LINE_TYPE_BOOL, + &orte_cmd_line.preload_binaries, OPAL_CMD_LINE_TYPE_BOOL, "Preload the binary on the remote machine before starting the remote process." }, /* Preload files on the remote machine */ { NULL, '\0', NULL, "preload-files", 1, - &orterun_globals.preload_files, OPAL_CMD_LINE_TYPE_STRING, + &orte_cmd_line.preload_files, OPAL_CMD_LINE_TYPE_STRING, "Preload the comma separated list of files to the remote machines current working directory before starting the remote process." }, #if OPAL_ENABLE_FT_CR == 1 /* Tell SStore to preload a snapshot before launch */ { NULL, '\0', NULL, "sstore-load", 1, - &orterun_globals.sstore_load, OPAL_CMD_LINE_TYPE_STRING, + &orte_cmd_line.sstore_load, OPAL_CMD_LINE_TYPE_STRING, "Internal Use Only! Tell SStore to preload a snapshot before launch." }, #endif /* Use an appfile */ { NULL, '\0', NULL, "app", 1, - &orterun_globals.appfile, OPAL_CMD_LINE_TYPE_STRING, + &orte_cmd_line.appfile, OPAL_CMD_LINE_TYPE_STRING, "Provide an appfile; ignore all other command line options" }, /* Number of processes; -c, -n, --n, -np, and --np are all synonyms */ { NULL, 'c', "np", "np", 1, - &orterun_globals.num_procs, OPAL_CMD_LINE_TYPE_INT, + &orte_cmd_line.num_procs, OPAL_CMD_LINE_TYPE_INT, "Number of processes to run" }, { NULL, '\0', "n", "n", 1, - &orterun_globals.num_procs, OPAL_CMD_LINE_TYPE_INT, + &orte_cmd_line.num_procs, OPAL_CMD_LINE_TYPE_INT, "Number of processes to run" }, /* maximum size of VM - typically used to subdivide an allocation */ @@ -414,30 +409,27 @@ static opal_cmd_line_init_t cmd_line_init[] = { { "hwloc_base_cpu_set", '\0', "cpu-set", "cpu-set", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, "Comma-separated list of ranges specifying logical cpus allocated to this job [default: none]"}, - { NULL, 'H', "host", "host", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "List of hosts to invoke processes on" }, /* mpiexec-like arguments */ { NULL, '\0', "wdir", "wdir", 1, - &orterun_globals.wdir, OPAL_CMD_LINE_TYPE_STRING, + &orte_cmd_line.wdir, OPAL_CMD_LINE_TYPE_STRING, "Set the working directory of the started processes" }, { NULL, '\0', "wd", "wd", 1, - &orterun_globals.wdir, OPAL_CMD_LINE_TYPE_STRING, + &orte_cmd_line.wdir, OPAL_CMD_LINE_TYPE_STRING, "Synonym for --wdir" }, { NULL, '\0', "set-cwd-to-session-dir", "set-cwd-to-session-dir", 0, - &orterun_globals.set_cwd_to_session_dir, OPAL_CMD_LINE_TYPE_BOOL, + &orte_cmd_line.set_cwd_to_session_dir, OPAL_CMD_LINE_TYPE_BOOL, "Set the working directory of the started processes to their session directory" }, { NULL, '\0', "path", "path", 1, - &orterun_globals.path, OPAL_CMD_LINE_TYPE_STRING, + &orte_cmd_line.path, OPAL_CMD_LINE_TYPE_STRING, "PATH to be used to look for executables to start processes" }, /* User-level debugger arguments */ { NULL, '\0', "tv", "tv", 0, - &orterun_globals.debugger, OPAL_CMD_LINE_TYPE_BOOL, + &orte_cmd_line.debugger, OPAL_CMD_LINE_TYPE_BOOL, "Deprecated backwards compatibility flag; synonym for \"--debug\"" }, { NULL, '\0', "debug", "debug", 0, - &orterun_globals.debugger, OPAL_CMD_LINE_TYPE_BOOL, + &orte_cmd_line.debugger, OPAL_CMD_LINE_TYPE_BOOL, "Invoke the user-level debugger indicated by the orte_base_user_debugger MCA parameter" }, { "orte_base_user_debugger", '\0', "debugger", "debugger", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, @@ -505,7 +497,7 @@ static opal_cmd_line_init_t cmd_line_init[] = { #endif { NULL, '\0', "disable-recovery", "disable-recovery", 0, - &orterun_globals.disable_recovery, OPAL_CMD_LINE_TYPE_BOOL, + &orte_cmd_line.disable_recovery, OPAL_CMD_LINE_TYPE_BOOL, "Disable recovery (resets all recovery options to off)" }, { "state_novm_select", '\0', "novm", "novm", 0, @@ -517,15 +509,15 @@ static opal_cmd_line_init_t cmd_line_init[] = { "Used staged execution if inadequate resources are present (cannot support MPI jobs)" }, { NULL, '\0', "allow-run-as-root", "allow-run-as-root", 0, - &orterun_globals.run_as_root, OPAL_CMD_LINE_TYPE_BOOL, + &orte_cmd_line.run_as_root, OPAL_CMD_LINE_TYPE_BOOL, "Allow execution as root (STRONGLY DISCOURAGED)" }, { NULL, '\0', "personality", "personality", 1, - &orterun_globals.personality, OPAL_CMD_LINE_TYPE_STRING, + &orte_cmd_line.personality, OPAL_CMD_LINE_TYPE_STRING, "Programming model/language being used (default=\"ompi\")" }, { NULL, '\0', "dvm", "dvm", 0, - &orterun_globals.dvm, OPAL_CMD_LINE_TYPE_BOOL, + &orte_cmd_line.create_dvm, OPAL_CMD_LINE_TYPE_BOOL, "Create a persistent distributed virtual machine (DVM)" }, /* End of list */ @@ -631,7 +623,7 @@ int orterun(int argc, char *argv[]) /* print version if requested. Do this before check for help so that --version --help works as one might expect. */ - if (orterun_globals.version) { + if (orte_cmd_line.version) { char *str, *project_name = NULL; if (0 == strcmp(orte_basename, "mpirun")) { project_name = "Open MPI"; @@ -655,9 +647,9 @@ int orterun(int argc, char *argv[]) * us to proceed if the allow-run-as-root flag was given. Otherwise, * exit with a giant warning flag */ - if (0 == geteuid() && !orterun_globals.run_as_root) { + if (0 == geteuid() && !orte_cmd_line.run_as_root) { fprintf(stderr, "--------------------------------------------------------------------------\n"); - if (orterun_globals.help) { + if (orte_cmd_line.help) { fprintf(stderr, "%s cannot provide the help message when run as root.\n", orte_basename); } else { /* show_help is not yet available, so print an error manually */ @@ -699,7 +691,7 @@ int orterun(int argc, char *argv[]) } /* Check for help request */ - if (orterun_globals.help) { + if (orte_cmd_line.help) { char *str, *args = NULL; char *project_name = NULL; if (0 == strcmp(orte_basename, "mpirun")) { @@ -733,8 +725,8 @@ int orterun(int argc, char *argv[]) * in the global struct as the app_file parser would replace it. * So handle this specific cmd line option manually. */ - orterun_globals.prefix = NULL; - orterun_globals.path_to_mpirun = NULL; + orte_cmd_line.prefix = NULL; + orte_cmd_line.path_to_mpirun = NULL; if (opal_cmd_line_is_taken(&cmd_line, "prefix") || '/' == argv[0][0] || want_prefix_by_default) { size_t param_len; @@ -742,24 +734,24 @@ int orterun(int argc, char *argv[]) char* tmp_basename = NULL; /* If they specified an absolute path, strip off the /bin/" and leave just the prefix */ - orterun_globals.path_to_mpirun = opal_dirname(argv[0]); + orte_cmd_line.path_to_mpirun = opal_dirname(argv[0]); /* Quick sanity check to ensure we got something/bin/ and that the installation tree is at least more or less what we expect it to be */ - tmp_basename = opal_basename(orterun_globals.path_to_mpirun); + tmp_basename = opal_basename(orte_cmd_line.path_to_mpirun); if (0 == strcmp("bin", tmp_basename)) { - char* tmp = orterun_globals.path_to_mpirun; - orterun_globals.path_to_mpirun = opal_dirname(tmp); + char* tmp = orte_cmd_line.path_to_mpirun; + orte_cmd_line.path_to_mpirun = opal_dirname(tmp); free(tmp); } else { - free(orterun_globals.path_to_mpirun); - orterun_globals.path_to_mpirun = NULL; + free(orte_cmd_line.path_to_mpirun); + orte_cmd_line.path_to_mpirun = NULL; } free(tmp_basename); } /* if both are given, check to see if they match */ - if (opal_cmd_line_is_taken(&cmd_line, "prefix") && NULL != orterun_globals.path_to_mpirun) { + if (opal_cmd_line_is_taken(&cmd_line, "prefix") && NULL != orte_cmd_line.path_to_mpirun) { char *tmp_basename; /* if they don't match, then that merits a warning */ param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0)); @@ -767,7 +759,7 @@ int orterun(int argc, char *argv[]) if (0 == strcmp(OPAL_PATH_SEP, &(param[strlen(param)-1]))) { param[strlen(param)-1] = '\0'; } - tmp_basename = strdup(orterun_globals.path_to_mpirun); + tmp_basename = strdup(orte_cmd_line.path_to_mpirun); if (0 == strcmp(OPAL_PATH_SEP, &(tmp_basename[strlen(tmp_basename)-1]))) { tmp_basename[strlen(tmp_basename)-1] = '\0'; } @@ -779,12 +771,12 @@ int orterun(int argc, char *argv[]) * people can specify the backend prefix as different * from the local one */ - free(orterun_globals.path_to_mpirun); - orterun_globals.path_to_mpirun = NULL; + free(orte_cmd_line.path_to_mpirun); + orte_cmd_line.path_to_mpirun = NULL; } free(tmp_basename); - } else if (NULL != orterun_globals.path_to_mpirun) { - param = strdup(orterun_globals.path_to_mpirun); + } else if (NULL != orte_cmd_line.path_to_mpirun) { + param = strdup(orte_cmd_line.path_to_mpirun); } else if (opal_cmd_line_is_taken(&cmd_line, "prefix")){ /* must be --prefix alone */ param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0)); @@ -807,7 +799,7 @@ int orterun(int argc, char *argv[]) } } - orterun_globals.prefix = param; + orte_cmd_line.prefix = param; } want_prefix_by_default = true; } @@ -846,8 +838,8 @@ int orterun(int argc, char *argv[]) opal_finalize(); /* default our personality to OMPI */ - if (NULL == orterun_globals.personality) { - orterun_globals.personality = strdup("ompi"); + if (NULL == orte_cmd_line.personality) { + orte_cmd_line.personality = strdup("ompi"); } /* Check for some "global" command line params */ @@ -865,19 +857,19 @@ int orterun(int argc, char *argv[]) */ return ORTE_ERR_OUT_OF_RESOURCE; } - jdata->personality = strdup(orterun_globals.personality); + jdata->personality = strdup(orte_cmd_line.personality); /* check what user wants us to do with stdin */ - if (0 == strcmp(orterun_globals.stdin_target, "all")) { + if (0 == strcmp(orte_cmd_line.stdin_target, "all")) { jdata->stdin_target = ORTE_VPID_WILDCARD; - } else if (0 == strcmp(orterun_globals.stdin_target, "none")) { + } else if (0 == strcmp(orte_cmd_line.stdin_target, "none")) { jdata->stdin_target = ORTE_VPID_INVALID; } else { - jdata->stdin_target = strtoul(orterun_globals.stdin_target, NULL, 10); + jdata->stdin_target = strtoul(orte_cmd_line.stdin_target, NULL, 10); } /* if we want the argv's indexed, indicate that */ - if (orterun_globals.index_argv) { + if (orte_cmd_line.index_argv) { orte_set_attribute(&jdata->attributes, ORTE_JOB_INDEX_ARGV, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); } @@ -906,21 +898,21 @@ int orterun(int argc, char *argv[]) daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); /* check for request to report uri */ - if (NULL != orterun_globals.report_uri) { + if (NULL != orte_cmd_line.report_uri) { FILE *fp; char *rml_uri; rml_uri = orte_rml.get_contact_info(); - if (0 == strcmp(orterun_globals.report_uri, "-")) { + if (0 == strcmp(orte_cmd_line.report_uri, "-")) { /* if '-', then output to stdout */ printf("%s\n", (NULL == rml_uri) ? "NULL" : rml_uri); - } else if (0 == strcmp(orterun_globals.report_uri, "+")) { + } else if (0 == strcmp(orte_cmd_line.report_uri, "+")) { /* if '+', output to stderr */ fprintf(stderr, "%s\n", (NULL == rml_uri) ? "NULL" : rml_uri); } else { - fp = fopen(orterun_globals.report_uri, "w"); + fp = fopen(orte_cmd_line.report_uri, "w"); if (NULL == fp) { orte_show_help("help-orterun.txt", "orterun:write_file", false, - orte_basename, "uri", orterun_globals.report_uri); + orte_basename, "uri", orte_cmd_line.report_uri); exit(0); } fprintf(fp, "%s\n", (NULL == rml_uri) ? "NULL" : rml_uri); @@ -1104,46 +1096,46 @@ static int init_globals(void) { /* Only CONSTRUCT things once */ if (!globals_init) { - orterun_globals.env_val = NULL; - orterun_globals.appfile = NULL; - orterun_globals.wdir = NULL; - orterun_globals.path = NULL; - orterun_globals.stdin_target = "0"; - orterun_globals.report_pid = NULL; - orterun_globals.report_uri = NULL; - orterun_globals.disable_recovery = false; - orterun_globals.index_argv = false; - orterun_globals.run_as_root = false; - orterun_globals.personality = NULL; - orterun_globals.dvm = false; + orte_cmd_line.env_val = NULL; + orte_cmd_line.appfile = NULL; + orte_cmd_line.wdir = NULL; + orte_cmd_line.path = NULL; + orte_cmd_line.stdin_target = "0"; + orte_cmd_line.report_pid = NULL; + orte_cmd_line.report_uri = NULL; + orte_cmd_line.disable_recovery = false; + orte_cmd_line.index_argv = false; + orte_cmd_line.run_as_root = false; + orte_cmd_line.personality = NULL; + orte_cmd_line.create_dvm = false; } /* Reset the other fields every time */ - orterun_globals.help = false; - orterun_globals.version = false; - orterun_globals.verbose = false; - orterun_globals.debugger = false; - orterun_globals.num_procs = 0; - if( NULL != orterun_globals.env_val ) - free( orterun_globals.env_val ); - orterun_globals.env_val = NULL; - if( NULL != orterun_globals.appfile ) - free( orterun_globals.appfile ); - orterun_globals.appfile = NULL; - if( NULL != orterun_globals.wdir ) - free( orterun_globals.wdir ); - orterun_globals.set_cwd_to_session_dir = false; - orterun_globals.wdir = NULL; - if( NULL != orterun_globals.path ) - free( orterun_globals.path ); - orterun_globals.path = NULL; + orte_cmd_line.help = false; + orte_cmd_line.version = false; + orte_cmd_line.verbose = false; + orte_cmd_line.debugger = false; + orte_cmd_line.num_procs = 0; + if( NULL != orte_cmd_line.env_val ) + free( orte_cmd_line.env_val ); + orte_cmd_line.env_val = NULL; + if( NULL != orte_cmd_line.appfile ) + free( orte_cmd_line.appfile ); + orte_cmd_line.appfile = NULL; + if( NULL != orte_cmd_line.wdir ) + free( orte_cmd_line.wdir ); + orte_cmd_line.set_cwd_to_session_dir = false; + orte_cmd_line.wdir = NULL; + if( NULL != orte_cmd_line.path ) + free( orte_cmd_line.path ); + orte_cmd_line.path = NULL; - orterun_globals.preload_binaries = false; - orterun_globals.preload_files = NULL; + orte_cmd_line.preload_binaries = false; + orte_cmd_line.preload_files = NULL; #if OPAL_ENABLE_FT_CR == 1 - orterun_globals.sstore_load = NULL; + orte_cmd_line.sstore_load = NULL; #endif /* All done */ @@ -1155,19 +1147,19 @@ static int init_globals(void) static int parse_globals(int argc, char* argv[], opal_cmd_line_t *cmd_line) { /* check for request to report pid */ - if (NULL != orterun_globals.report_pid) { + if (NULL != orte_cmd_line.report_pid) { FILE *fp; - if (0 == strcmp(orterun_globals.report_pid, "-")) { + if (0 == strcmp(orte_cmd_line.report_pid, "-")) { /* if '-', then output to stdout */ printf("%d\n", (int)getpid()); - } else if (0 == strcmp(orterun_globals.report_pid, "+")) { + } else if (0 == strcmp(orte_cmd_line.report_pid, "+")) { /* if '+', output to stderr */ fprintf(stderr, "%d\n", (int)getpid()); } else { - fp = fopen(orterun_globals.report_pid, "w"); + fp = fopen(orte_cmd_line.report_pid, "w"); if (NULL == fp) { orte_show_help("help-orterun.txt", "orterun:write_file", false, - orte_basename, "pid", orterun_globals.report_pid); + orte_basename, "pid", orte_cmd_line.report_pid); exit(0); } fprintf(fp, "%d\n", (int)getpid()); @@ -1177,12 +1169,12 @@ static int parse_globals(int argc, char* argv[], opal_cmd_line_t *cmd_line) /* Do we want a user-level debugger? */ - if (orterun_globals.debugger) { - run_debugger(orte_basename, cmd_line, argc, argv, orterun_globals.num_procs); + if (orte_cmd_line.debugger) { + run_debugger(orte_basename, cmd_line, argc, argv, orte_cmd_line.num_procs); } /* if recovery was disabled on the cmd line, do so */ - if (orterun_globals.disable_recovery) { + if (orte_cmd_line.disable_recovery) { orte_enable_recovery = false; orte_max_restarts = 0; } @@ -1389,8 +1381,8 @@ static int create_app(int argc, char* argv[], * $ mpirun -np 2 -mca foo bar --app launch.appfile * Only pick up '-mca foo bar' on this pass. */ - if (NULL != orterun_globals.appfile) { - if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(orterun_globals.personality, argc, 0, argv))) { + if (NULL != orte_cmd_line.appfile) { + if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(orte_cmd_line.personality, argc, 0, argv))) { goto cleanup; } } @@ -1409,9 +1401,9 @@ static int create_app(int argc, char* argv[], /* Is there an appfile in here? */ - if (NULL != orterun_globals.appfile) { + if (NULL != orte_cmd_line.appfile) { OBJ_DESTRUCT(&cmd_line); - return parse_appfile(jdata, strdup(orterun_globals.appfile), app_env); + return parse_appfile(jdata, strdup(orte_cmd_line.appfile), app_env); } /* Setup application context */ @@ -1435,7 +1427,7 @@ static int create_app(int argc, char* argv[], * mpirun -np 2 -mca foo bar ./my-app -mca bip bop * We want to pick up '-mca foo bar' but not '-mca bip bop' */ - if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(orterun_globals.personality, + if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(orte_cmd_line.personality, argc, count, argv))) { goto cleanup; } @@ -1443,8 +1435,8 @@ static int create_app(int argc, char* argv[], /* Grab all OMPI_* environment variables */ app->env = opal_argv_copy(*app_env); - if (ORTE_SUCCESS != (rc = orte_schizo.parse_env(orterun_globals.personality, - orterun_globals.path, + if (ORTE_SUCCESS != (rc = orte_schizo.parse_env(orte_cmd_line.personality, + orte_cmd_line.path, &cmd_line, environ, &app->env))) { goto cleanup; @@ -1453,10 +1445,10 @@ static int create_app(int argc, char* argv[], /* Did the user request a specific wdir? */ - if (NULL != orterun_globals.wdir) { + if (NULL != orte_cmd_line.wdir) { /* if this is a relative path, convert it to an absolute path */ - if (opal_path_is_absolute(orterun_globals.wdir)) { - app->cwd = strdup(orterun_globals.wdir); + if (opal_path_is_absolute(orte_cmd_line.wdir)) { + app->cwd = strdup(orte_cmd_line.wdir); } else { /* get the cwd */ if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) { @@ -1465,10 +1457,10 @@ static int create_app(int argc, char* argv[], goto cleanup; } /* construct the absolute path */ - app->cwd = opal_os_path(false, cwd, orterun_globals.wdir, NULL); + app->cwd = opal_os_path(false, cwd, orte_cmd_line.wdir, NULL); } orte_set_attribute(&app->attributes, ORTE_APP_USER_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - } else if (orterun_globals.set_cwd_to_session_dir) { + } else if (orte_cmd_line.set_cwd_to_session_dir) { orte_set_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); orte_set_attribute(&app->attributes, ORTE_APP_USER_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); } else { @@ -1500,14 +1492,14 @@ static int create_app(int argc, char* argv[], * given above, check to see if they match */ if (opal_cmd_line_is_taken(&cmd_line, "prefix") && - NULL != orterun_globals.prefix) { + NULL != orte_cmd_line.prefix) { /* if they don't match, then that merits a warning */ param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0)); /* ensure we strip any trailing '/' */ if (0 == strcmp(OPAL_PATH_SEP, &(param[strlen(param)-1]))) { param[strlen(param)-1] = '\0'; } - value = strdup(orterun_globals.prefix); + value = strdup(orte_cmd_line.prefix); if (0 == strcmp(OPAL_PATH_SEP, &(value[strlen(value)-1]))) { value[strlen(value)-1] = '\0'; } @@ -1518,11 +1510,11 @@ static int create_app(int argc, char* argv[], * know that one is being used */ free(param); - param = strdup(orterun_globals.prefix); + param = strdup(orte_cmd_line.prefix); } free(value); - } else if (NULL != orterun_globals.prefix) { - param = strdup(orterun_globals.prefix); + } else if (NULL != orte_cmd_line.prefix) { + param = strdup(orte_cmd_line.prefix); } else if (opal_cmd_line_is_taken(&cmd_line, "prefix")){ /* must be --prefix alone */ param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0)); @@ -1592,18 +1584,18 @@ static int create_app(int argc, char* argv[], } /* check for bozo error */ - if (0 > orterun_globals.num_procs) { + if (0 > orte_cmd_line.num_procs) { orte_show_help("help-orterun.txt", "orterun:negative-nprocs", true, orte_basename, app->argv[0], - orterun_globals.num_procs, NULL); + orte_cmd_line.num_procs, NULL); return ORTE_ERR_FATAL; } - app->num_procs = (orte_std_cntr_t)orterun_globals.num_procs; + app->num_procs = (orte_std_cntr_t)orte_cmd_line.num_procs; total_num_apps++; /* Capture any preload flags */ - if (orterun_globals.preload_binaries) { + if (orte_cmd_line.preload_binaries) { orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_BIN, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL); } /* if we were told to cwd to the session dir and the app was given in @@ -1614,21 +1606,21 @@ static int create_app(int argc, char* argv[], */ if (!opal_path_is_absolute(app->argv[0]) && NULL == strstr(app->argv[0], "java")) { - if (orterun_globals.preload_binaries) { + if (orte_cmd_line.preload_binaries) { orte_set_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); } else if (orte_get_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, NULL, OPAL_BOOL)) { orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_BIN, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL); } } - if (NULL != orterun_globals.preload_files) { + if (NULL != orte_cmd_line.preload_files) { orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_FILES, ORTE_ATTR_LOCAL, - orterun_globals.preload_files, OPAL_STRING); + orte_cmd_line.preload_files, OPAL_STRING); } #if OPAL_ENABLE_FT_CR == 1 - if(NULL != orterun_globals.sstore_load) { + if(NULL != orte_cmd_line.sstore_load) { orte_set_attribute(&app->attributes, ORTE_APP_SSTORE_LOAD, ORTE_ATTR_LOCAL, - orterun_globals.sstore_load, OPAL_STRING); + orte_cmd_line.sstore_load, OPAL_STRING); } #endif @@ -1837,9 +1829,9 @@ static int parse_appfile(orte_job_t *jdata, char *filename, char ***env) * Make sure to clear out this variable so we don't do anything odd in * app_create() */ - if( NULL != orterun_globals.appfile ) { - free( orterun_globals.appfile ); - orterun_globals.appfile = NULL; + if( NULL != orte_cmd_line.appfile ) { + free( orte_cmd_line.appfile ); + orte_cmd_line.appfile = NULL; } /* Try to open the file */ diff --git a/orte/tools/orterun/orterun.h b/orte/tools/orterun/orterun.h index c25bfcd96a..640b57bdd0 100644 --- a/orte/tools/orterun/orterun.h +++ b/orte/tools/orterun/orterun.h @@ -32,45 +32,6 @@ BEGIN_C_DECLS */ int orterun(int argc, char *argv[]); -/** - * Global struct for catching orterun command line options. - */ -struct orterun_globals_t { - bool help; - bool version; - bool verbose; - char *report_pid; - char *report_uri; - bool exit; - bool debugger; - int num_procs; - char *env_val; - char *appfile; - char *wdir; - bool set_cwd_to_session_dir; - char *path; - char *preload_files; - bool sleep; - char *stdin_target; - char *prefix; - char *path_to_mpirun; -#if OPAL_ENABLE_FT_CR == 1 - char *sstore_load; -#endif - bool disable_recovery; - bool preload_binaries; - bool index_argv; - bool run_as_root; - char *personality; - bool dvm; -}; - -/** - * Struct holding values gleaned from the orterun command line - - * needed by debugger init - */ -ORTE_DECLSPEC extern struct orterun_globals_t orterun_globals; - END_C_DECLS #endif /* ORTERUN_ORTERUN_H */ diff --git a/orte/util/attr.c b/orte/util/attr.c index d4d96334b6..a3c083504b 100644 --- a/orte/util/attr.c +++ b/orte/util/attr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Intel, Inc. All rights reserved + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -261,6 +261,10 @@ const char *orte_attr_key_to_str(orte_attribute_key_t key) return "JOB-LAUNCH-PROXY"; case ORTE_JOB_NSPACE_REGISTERED: return "JOB-NSPACE-REGISTERED"; + case ORTE_JOB_FIXED_DVM: + return "ORTE-JOB-FIXED-DVM"; + case ORTE_JOB_DVM_JOB: + return "ORTE-JOB-DVM-JOB"; case ORTE_PROC_NOBARRIER: return "PROC-NOBARRIER"; diff --git a/orte/util/attr.h b/orte/util/attr.h index 2bf5b8265d..5f1e590c5a 100644 --- a/orte/util/attr.h +++ b/orte/util/attr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -130,6 +130,9 @@ typedef uint16_t orte_job_flags_t; #define ORTE_JOB_ROOM_NUM (ORTE_JOB_START_KEY + 39) // int - number of remote request's hotel room #define ORTE_JOB_LAUNCH_PROXY (ORTE_JOB_START_KEY + 40) // opal_process_name_t - name of spawn requestor #define ORTE_JOB_NSPACE_REGISTERED (ORTE_JOB_START_KEY + 41) // bool - job has been registered with embedded PMIx server +#define ORTE_JOB_FIXED_DVM (ORTE_JOB_START_KEY + 42) // bool - do not change the size of the DVM for this job +#define ORTE_JOB_DVM_JOB (ORTE_JOB_START_KEY + 43) // bool - job is using a DVM +#define ORTE_JOB_CANCELLED (ORTE_JOB_START_KEY + 44) // bool - job was cancelled #define ORTE_JOB_MAX_KEY 300 diff --git a/orte/util/dash_host/dash_host.c b/orte/util/dash_host/dash_host.c index b81d791a62..03eb72157f 100644 --- a/orte/util/dash_host/dash_host.c +++ b/orte/util/dash_host/dash_host.c @@ -54,7 +54,7 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes, orte_node_t *node, *nd; opal_list_t adds; bool found; - int slots; + int slots=0; bool slots_given; char *cptr; @@ -237,8 +237,8 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes, } node->name = strdup(ndname); OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, - "%s dashhost: added node %s to list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name)); + "%s dashhost: added node %s to list - slots %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, slots)); node->state = ORTE_NODE_STATE_UP; node->slots_inuse = 0; node->slots_max = 0; @@ -273,6 +273,7 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes, node->slots = nd->slots; if (ORTE_FLAG_TEST(nd, ORTE_NODE_FLAG_SLOTS_GIVEN)) { ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN); + node->slots = nd->slots; } break; } diff --git a/orte/util/error_strings.c b/orte/util/error_strings.c index c28212c8bd..2648e2fb22 100644 --- a/orte/util/error_strings.c +++ b/orte/util/error_strings.c @@ -231,6 +231,9 @@ int orte_err2str(int errnum, const char **errmsg) case ORTE_ERR_FORCE_SELECT: retval = "Force select"; break; + case ORTE_ERR_JOB_CANCELLED: + retval = "Job cancelled"; + break; default: if (orte_report_silent_errors) { retval = "Unknown error"; diff --git a/orte/util/hostfile/hostfile.c b/orte/util/hostfile/hostfile.c index 9a68791f31..80efa36b92 100644 --- a/orte/util/hostfile/hostfile.c +++ b/orte/util/hostfile/hostfile.c @@ -285,8 +285,9 @@ static int hostfile_parse_line(int token, opal_list_t* updates, free(node_name); } OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, - "%s hostfile: node %s slots %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots)); + "%s hostfile: node %s slots %d nodes-given %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots, + ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN) ? "TRUE" : "FALSE")); /* mark the slots as "given" since we take them as being the * number specified via the rankfile */ diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index d5f28c31b3..68c3e92a72 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -220,7 +220,6 @@ int orte_util_encode_nodemap(opal_byte_object_t *boptr, bool update) } /* if the daemon doesn't have a node, that's an error */ if (NULL == (node = dmn->node)) { - opal_output(0, "DAEMON %s HAS NO NODE", ORTE_NAME_PRINT(&dmn->name)); ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; }