From 1bd772e8ebf66f705537b9a6e1af2b6093ef8471 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 17 Oct 2018 15:11:38 -0700 Subject: [PATCH] Remove the stale orte-dvm code Users should migrate to https://github.com/pmix/prrte Signed-off-by: Ralph Castain --- config/opal_check_pmi.m4 | 4 - config/orte_config_files.m4 | 2 - ompi/mca/rte/orte/Makefile.am | 19 +- orte/mca/errmgr/dvm/Makefile.am | 37 - orte/mca/errmgr/dvm/errmgr_dvm.c | 632 -------- orte/mca/errmgr/dvm/errmgr_dvm.h | 39 - orte/mca/errmgr/dvm/errmgr_dvm_component.c | 102 -- orte/mca/errmgr/dvm/owner.txt | 7 - orte/mca/state/dvm/Makefile.am | 36 - orte/mca/state/dvm/owner.txt | 7 - orte/mca/state/dvm/state_dvm.c | 688 --------- orte/mca/state/dvm/state_dvm.h | 35 - orte/mca/state/dvm/state_dvm_component.c | 83 - orte/tools/Makefile.am | 10 +- orte/tools/ompi-prun/Makefile.am | 59 - orte/tools/ompi-prun/main.c | 33 - orte/tools/ompi-prun/ompi-prun.1in | 1597 -------------------- orte/tools/ompi-prun/prun | 228 --- orte/tools/ompi-prun/prun.1 | 1597 -------------------- orte/tools/ompi-prun/prun.c | 1373 ----------------- orte/tools/ompi-prun/prun.h | 37 - orte/tools/orte-dvm/Makefile.am | 57 - orte/tools/orte-dvm/orte-dvm.1in | 193 --- orte/tools/orte-dvm/orte-dvm.c | 482 ------ 24 files changed, 2 insertions(+), 7355 deletions(-) delete mode 100644 orte/mca/errmgr/dvm/Makefile.am delete mode 100644 orte/mca/errmgr/dvm/errmgr_dvm.c delete mode 100644 orte/mca/errmgr/dvm/errmgr_dvm.h delete mode 100644 orte/mca/errmgr/dvm/errmgr_dvm_component.c delete mode 100644 orte/mca/errmgr/dvm/owner.txt delete mode 100644 orte/mca/state/dvm/Makefile.am delete mode 100644 orte/mca/state/dvm/owner.txt delete mode 100644 orte/mca/state/dvm/state_dvm.c delete mode 100644 orte/mca/state/dvm/state_dvm.h delete mode 100644 orte/mca/state/dvm/state_dvm_component.c delete mode 100644 orte/tools/ompi-prun/Makefile.am delete mode 100644 orte/tools/ompi-prun/main.c delete mode 100644 orte/tools/ompi-prun/ompi-prun.1in delete mode 100755 orte/tools/ompi-prun/prun delete mode 100644 orte/tools/ompi-prun/prun.1 delete mode 100644 orte/tools/ompi-prun/prun.c delete mode 100644 orte/tools/ompi-prun/prun.h delete mode 100644 orte/tools/orte-dvm/Makefile.am delete mode 100644 orte/tools/orte-dvm/orte-dvm.1in delete mode 100644 orte/tools/orte-dvm/orte-dvm.c diff --git a/config/opal_check_pmi.m4 b/config/opal_check_pmi.m4 index 6c9c0e25a8..037d959b4f 100644 --- a/config/opal_check_pmi.m4 +++ b/config/opal_check_pmi.m4 @@ -248,14 +248,12 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ AC_MSG_ERROR([Cannot continue])]) AC_MSG_CHECKING([if user requested internal PMIx support($with_pmix)]) - opal_prun_happy=no opal_external_pmix_happy=no opal_external_have_pmix1=0 AS_IF([test "$with_pmix" = "internal"], [AC_MSG_RESULT([yes]) opal_external_pmix_happy=no - opal_prun_happy=yes opal_external_pmix_version=internal], [AC_MSG_RESULT([no]) @@ -376,7 +374,6 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ [AC_MSG_RESULT([found]) opal_external_pmix_version=2x opal_external_pmix_version_found=1 - opal_prun_happy=yes opal_external_pmix_happy=yes], [AC_MSG_RESULT([not found])])]) @@ -436,7 +433,6 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ AC_DEFINE_UNQUOTED([OPAL_PMIX_V1],[$opal_external_have_pmix1], [Whether the external PMIx library is v1]) - AM_CONDITIONAL([OPAL_WANT_PRUN], [test "$opal_prun_happy" = "yes"]) AS_IF([test "$opal_external_pmix_happy" = "yes"], [AS_IF([test "$opal_external_pmix_version" = "1x"], diff --git a/config/orte_config_files.m4 b/config/orte_config_files.m4 index 82a7f7f824..b0f79dbb66 100644 --- a/config/orte_config_files.m4 +++ b/config/orte_config_files.m4 @@ -30,7 +30,5 @@ AC_DEFUN([ORTE_CONFIG_FILES],[ orte/tools/orte-top/Makefile orte/tools/orte-info/Makefile orte/tools/orte-server/Makefile - orte/tools/orte-dvm/Makefile - orte/tools/ompi-prun/Makefile ]) ]) diff --git a/ompi/mca/rte/orte/Makefile.am b/ompi/mca/rte/orte/Makefile.am index 451436373b..30dd21b14d 100644 --- a/ompi/mca/rte/orte/Makefile.am +++ b/ompi/mca/rte/orte/Makefile.am @@ -2,7 +2,7 @@ # Copyright (c) 2012 Los Alamos National Security, LLC. # All rights reserved. # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -30,12 +30,6 @@ libmca_rte_orte_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la man_pages = mpirun.1 mpiexec.1 ompi-ps.1 ompi-clean.1 ompi-top.1 ompi-server.1 -if OPAL_WANT_PRUN -if WANT_INSTALL_HEADERS -man_pages += ompi-dvm.1 -endif -endif - if OPAL_INSTALL_BINARIES nodist_man_MANS = $(man_pages) @@ -46,9 +40,6 @@ install-exec-hook: (cd $(DESTDIR)$(bindir); rm -f ompi-clean$(EXEEXT); $(LN_S) orte-clean$(EXEEXT) ompi-clean$(EXEEXT)) (cd $(DESTDIR)$(bindir); rm -f ompi-top$(EXEEXT); $(LN_S) orte-top$(EXEEXT) ompi-top$(EXEEXT)) (cd $(DESTDIR)$(bindir); rm -f ompi-server$(EXEEXT); $(LN_S) orte-server$(EXEEXT) ompi-server$(EXEEXT)) -if OPAL_WANT_PRUN - (cd $(DESTDIR)$(bindir); rm -f ompi-dvm$(EXEEXT); $(LN_S) orte-dvm$(EXEEXT) ompi-dvm$(EXEEXT)) -endif uninstall-local: rm -f $(DESTDIR)$(bindir)/mpirun$(EXEEXT) \ @@ -57,9 +48,6 @@ uninstall-local: $(DESTDIR)$(bindir)/ompi-clean$(EXEEXT) \ $(DESTDIR)$(bindir)/ompi-top$(EXEEXT) \ $(DESTDIR)$(bindir)/ompi-server$(EXEEXT) -if OPAL_WANT_PRUN - rm -f $(DESTDIR)$(bindir)/ompi-dvm$(EXEEXT) -endif endif # OPAL_INSTALL_BINARIES @@ -96,10 +84,5 @@ $(top_builddir)/orte/tools/orte-server/orte-server.1: ompi-server.1: $(top_builddir)/orte/tools/orte-server/orte-server.1 cp -f $(top_builddir)/orte/tools/orte-server/orte-server.1 ompi-server.1 -if OPAL_WANT_PRUN -ompi-dvm.1: $(top_builddir)/orte/tools/orte-dvm/orte-dvm.1 - cp -f $(top_builddir)/orte/tools/orte-dvm/orte-dvm.1 ompi-dvm.1 -endif - clean-local: rm -f $(man_pages) diff --git a/orte/mca/errmgr/dvm/Makefile.am b/orte/mca/errmgr/dvm/Makefile.am deleted file mode 100644 index 43fbe76550..0000000000 --- a/orte/mca/errmgr/dvm/Makefile.am +++ /dev/null @@ -1,37 +0,0 @@ -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - errmgr_dvm.h \ - errmgr_dvm_component.c \ - errmgr_dvm.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_errmgr_dvm_DSO -component_noinst = -component_install = mca_errmgr_dvm.la -else -component_noinst = libmca_errmgr_dvm.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_errmgr_dvm_la_SOURCES = $(sources) -mca_errmgr_dvm_la_LDFLAGS = -module -avoid-version -mca_errmgr_dvm_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_errmgr_dvm_la_SOURCES =$(sources) -libmca_errmgr_dvm_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/errmgr/dvm/errmgr_dvm.c b/orte/mca/errmgr/dvm/errmgr_dvm.c deleted file mode 100644 index f259da2321..0000000000 --- a/orte/mca/errmgr/dvm/errmgr_dvm.c +++ /dev/null @@ -1,632 +0,0 @@ -/* - * Copyright (c) 2009-2011 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2017 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2011 Oracle and/or all its affiliates. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2017 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#include -#ifdef HAVE_SYS_WAIT_H -#include -#endif - -#include "opal/util/output.h" -#include "opal/dss/dss.h" - -#include "orte/mca/iof/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/odls/odls.h" -#include "orte/mca/odls/base/base.h" -#include "orte/mca/odls/base/odls_private.h" -#include "orte/mca/plm/base/plm_private.h" -#include "orte/mca/plm/plm.h" -#include "orte/mca/rmaps/rmaps_types.h" -#include "orte/mca/routed/routed.h" -#include "orte/mca/grpcomm/grpcomm.h" -#include "orte/mca/ess/ess.h" -#include "orte/mca/state/state.h" - -#include "orte/util/error_strings.h" -#include "orte/util/name_fns.h" -#include "orte/util/proc_info.h" -#include "orte/util/show_help.h" -#include "orte/util/threads.h" - -#include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_locks.h" -#include "orte/runtime/orte_quit.h" -#include "orte/runtime/data_type_support/orte_dt_support.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/base.h" -#include "orte/mca/errmgr/base/errmgr_private.h" - -#include "errmgr_dvm.h" - -static int init(void); -static int finalize(void); - -/****************** - * dvm module - ******************/ -orte_errmgr_base_module_t orte_errmgr_dvm_module = { - .init = init, - .finalize = finalize, - .logfn = orte_errmgr_base_log, - .abort = orte_errmgr_base_abort, - .abort_peers = orte_errmgr_base_abort_peers -}; - - -/* - * Local functions - */ -static void job_errors(int fd, short args, void *cbdata); -static void proc_errors(int fd, short args, void *cbdata); - -static int init(void) -{ - /* setup state machine to trap job errors */ - orte_state.add_job_state(ORTE_JOB_STATE_ERROR, job_errors, ORTE_ERROR_PRI); - - /* set the lost connection state to run at MSG priority so - * we can process any last messages from the proc - */ - orte_state.add_proc_state(ORTE_PROC_STATE_COMM_FAILED, proc_errors, ORTE_MSG_PRI); - - /* setup state machine to trap proc errors */ - orte_state.add_proc_state(ORTE_PROC_STATE_ERROR, proc_errors, ORTE_ERROR_PRI); - - return ORTE_SUCCESS; -} - -static int finalize(void) -{ - return ORTE_SUCCESS; -} - -static void _terminate_job(orte_jobid_t jobid) -{ - opal_pointer_array_t procs; - orte_proc_t pobj; - - OBJ_CONSTRUCT(&procs, opal_pointer_array_t); - opal_pointer_array_init(&procs, 1, 1, 1); - OBJ_CONSTRUCT(&pobj, orte_proc_t); - pobj.name.jobid = jobid; - pobj.name.vpid = ORTE_VPID_WILDCARD; - opal_pointer_array_add(&procs, &pobj); - orte_plm.terminate_procs(&procs); - OBJ_DESTRUCT(&procs); - OBJ_DESTRUCT(&pobj); -} - -static void job_errors(int fd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata; - orte_job_state_t jobstate; - opal_buffer_t *answer; - int32_t rc, ret; - int room, *rmptr; - - ORTE_ACQUIRE_OBJECT(caddy); - - /* - * if orte is trying to shutdown, just let it - */ - if (orte_finalizing) { - return; - } - - /* if the jdata is NULL, then we ignore it as this - * is reporting an unrecoverable error - */ - if (NULL == caddy->jdata) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - OBJ_RELEASE(caddy); - return; - } - - /* update the state */ - jdata = caddy->jdata; - jobstate = caddy->job_state; - jdata->state = jobstate; - - OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: job %s reported state %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jdata->jobid), - orte_job_state_to_str(jobstate))); - - if (jdata->jobid == ORTE_PROC_MY_NAME->jobid) { - /* if the daemon job aborted and we haven't heard from everyone yet, - * then this could well have been caused by a daemon not finding - * a way back to us. In this case, output a message indicating a daemon - * died without reporting. Otherwise, say nothing as we - * likely already output an error message */ - if (ORTE_JOB_STATE_ABORTED == jobstate && - jdata->num_procs != jdata->num_reported) { - orte_routing_is_enabled = false; - orte_show_help("help-errmgr-base.txt", "failed-daemon", true); - } - /* there really isn't much else we can do since the problem - * is in the DVM itself, so best just to terminate */ - jdata->num_terminated = jdata->num_procs; - /* activate the terminated state so we can exit */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); - OBJ_RELEASE(caddy); - return; - } - - /* all other cases involve jobs submitted to the DVM - therefore, - * we only inform the submitter of the problem, but do NOT terminate - * the DVM itself */ - - rc = jobstate; - answer = OBJ_NEW(opal_buffer_t); - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &rc, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(caddy); - return; - } - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &jdata->jobid, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(caddy); - return; - } - /* pack the room number */ - rmptr = &room; - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ROOM_NUM, (void**)&rmptr, OPAL_INT)) { - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &room, 1, OPAL_INT))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(caddy); - return; - } - } - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm sending notification of job %s failure to %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jdata->jobid), - ORTE_NAME_PRINT(&jdata->originator))); - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &jdata->originator, answer, - ORTE_RML_TAG_LAUNCH_RESP, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(answer); - } - /* ensure we terminate any processes left running in the DVM */ - _terminate_job(jdata->jobid); - - /* cleanup */ - OBJ_RELEASE(caddy); -} - -static void proc_errors(int fd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata; - orte_proc_t *pptr, *proct; - orte_process_name_t *proc = &caddy->name; - orte_proc_state_t state = caddy->proc_state; - int i; - int32_t i32, *i32ptr; - char *rtmod; - - ORTE_ACQUIRE_OBJECT(caddy); - - OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: for proc %s state %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc), - orte_proc_state_to_str(state))); - - /* - * if orte is trying to shutdown, just let it - */ - if (orte_finalizing) { - goto cleanup; - } - - /* get the job object */ - if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) { - /* could be a race condition */ - goto cleanup; - } - pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid); - - /* get the management conduit's routed module */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - - /* we MUST handle a communication failure before doing anything else - * as it requires some special care to avoid normal termination issues - * for local application procs - */ - if (ORTE_PROC_STATE_COMM_FAILED == state) { - /* is this to a daemon? */ - if (ORTE_PROC_MY_NAME->jobid != proc->jobid) { - /* nope - ignore it */ - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s Comm failure to non-daemon proc - ignoring it", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - goto cleanup; - } - /* if this is my own connection, ignore it */ - if (ORTE_PROC_MY_NAME->vpid == proc->vpid) { - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s Comm failure on my own connection - ignoring it", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - goto cleanup; - } - /* mark the daemon as gone */ - ORTE_FLAG_UNSET(pptr, ORTE_PROC_FLAG_ALIVE); - /* update the state */ - pptr->state = state; - /* adjust our num_procs */ - --orte_process_info.num_procs; - /* if we have ordered orteds to terminate or abort - * is in progress, record it */ - if (orte_orteds_term_ordered || orte_abnormal_term_ordered) { - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s Comm failure: daemons terminating - recording daemon %s as gone", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); - /* remove from dependent routes, if it is one */ - orte_routed.route_lost(rtmod, proc); - /* if all my routes and local children are gone, then terminate ourselves */ - if (0 == orte_routed.num_routes(rtmod)) { - for (i=0; i < orte_local_children->size; i++) { - if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) && - ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_ALIVE) && proct->state < ORTE_PROC_STATE_UNTERMINATED) { - /* at least one is still alive */ - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s Comm failure: at least one proc (%s) still alive", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proct->name))); - goto cleanup; - } - } - /* call our appropriate exit procedure */ - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr_dvm: all routes and children gone - ordering exit", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED); - } else { - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s Comm failure: %d routes remain alive", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (int)orte_routed.num_routes(rtmod))); - } - goto cleanup; - } - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s Comm failure: daemon %s - aborting", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); - /* record the first one to fail */ - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - /* output an error message so the user knows what happened */ - orte_show_help("help-errmgr-base.txt", "node-died", true, - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - orte_process_info.nodename, - ORTE_NAME_PRINT(proc), - pptr->node->name); - /* mark the daemon job as failed */ - jdata->state = ORTE_JOB_STATE_COMM_FAILED; - /* point to the lowest rank to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - /* update our exit code */ - ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); - /* just in case the exit code hadn't been set, do it here - this - * won't override any reported exit code */ - ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_COMM_FAILURE); - } - goto cleanup; - } - - /* update the proc state - can get multiple reports on a proc - * depending on circumstances, so ensure we only do this once - */ - if (pptr->state < ORTE_PROC_STATE_TERMINATED) { - pptr->state = state; - } - - /* if we were ordered to terminate, mark this proc as dead and see if - * any of our routes or local children remain alive - if not, then - * terminate ourselves. */ - if (orte_orteds_term_ordered) { - for (i=0; i < orte_local_children->size; i++) { - if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { - if (ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) { - goto keep_going; - } - } - } - /* if all my routes and children are gone, then terminate - ourselves nicely (i.e., this is a normal termination) */ - if (0 == orte_routed.num_routes(rtmod)) { - OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, - "%s errmgr:default:dvm all routes gone - exiting", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED); - } - } - - keep_going: - /* ensure we record the failed proc properly so we can report - * the error once we terminate - */ - switch (state) { - case ORTE_PROC_STATE_KILLED_BY_CMD: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s killed by cmd", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc))); - /* we ordered this proc to die, so it isn't an abnormal termination - * and we don't flag it as such - */ - if (jdata->num_terminated >= jdata->num_procs) { - /* this job has terminated */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); - } - /* don't abort the job as this isn't an abnormal termination */ - break; - - case ORTE_PROC_STATE_ABORTED: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s aborted", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc))); - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - jdata->state = ORTE_JOB_STATE_ABORTED; - /* point to the first rank to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); - /* kill the job */ - _terminate_job(jdata->jobid); - } - break; - - case ORTE_PROC_STATE_ABORTED_BY_SIG: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s aborted by signal", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc))); - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - jdata->state = ORTE_JOB_STATE_ABORTED_BY_SIG; - /* point to the first rank to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); - /* kill the job */ - _terminate_job(jdata->jobid); - } - break; - - case ORTE_PROC_STATE_TERM_WO_SYNC: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s terminated without sync", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc))); - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - jdata->state = ORTE_JOB_STATE_ABORTED_WO_SYNC; - /* point to the first rank to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); - /* now treat a special case - if the proc exit'd without a required - * sync, it may have done so with a zero exit code. We want to ensure - * that the user realizes there was an error, so in this -one- case, - * we overwrite the process' exit code with the default error code - */ - ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); - /* kill the job */ - _terminate_job(jdata->jobid); - } - break; - - case ORTE_PROC_STATE_FAILED_TO_START: - case ORTE_PROC_STATE_FAILED_TO_LAUNCH: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc), - orte_proc_state_to_str(state))); - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - opal_buffer_t *answer; - int id, *idptr, ret; - - if (ORTE_PROC_STATE_FAILED_TO_START) { - jdata->state = ORTE_JOB_STATE_FAILED_TO_START; - } else { - jdata->state = ORTE_JOB_STATE_FAILED_TO_LAUNCH; - } - /* point to the first rank to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - /* send a notification to the requestor - indicate that this is a spawn response */ - answer = OBJ_NEW(opal_buffer_t); - /* pack the return status */ - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &pptr->exit_code, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(answer); - goto CLEANUP; - } - /* pack the jobid to be returned */ - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &jdata->jobid, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(answer); - goto CLEANUP; - } - idptr = &id; - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ROOM_NUM, (void**)&idptr, OPAL_INT)) { - /* pack the sender's index to the tracking object */ - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, idptr, 1, OPAL_INT))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(answer); - goto CLEANUP; - } - } - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FIXED_DVM, NULL, OPAL_BOOL)) { - /* we need to send the requestor more info about what happened */ - opal_dss.pack(answer, &jdata->state, 1, ORTE_JOB_STATE_T); - opal_dss.pack(answer, &pptr, 1, ORTE_PROC); - opal_dss.pack(answer, &pptr->node, 1, ORTE_NODE); - } - /* return response */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &jdata->originator, answer, - ORTE_RML_TAG_LAUNCH_RESP, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(answer); - } - /* record that we notified about this job */ - jdata->state = ORTE_JOB_STATE_NOTIFIED; - CLEANUP: - /* kill the job */ - _terminate_job(jdata->jobid); - } - /* if this was a daemon, report it */ - if (jdata->jobid == ORTE_PROC_MY_NAME->jobid) { - /* output a message indicating we failed to launch a daemon */ - orte_show_help("help-errmgr-base.txt", "failed-daemon-launch", true); - } - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); - break; - - case ORTE_PROC_STATE_CALLED_ABORT: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s called abort with exit code %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc), pptr->exit_code)); - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - jdata->state = ORTE_JOB_STATE_CALLED_ABORT; - /* point to the first proc to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); - /* kill the job */ - _terminate_job(jdata->jobid); - } - break; - - case ORTE_PROC_STATE_TERM_NON_ZERO: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s exited with non-zero status %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc), - pptr->exit_code)); - ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); - /* track the number of non-zero exits */ - i32 = 0; - i32ptr = &i32; - orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32); - ++i32; - orte_set_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, ORTE_ATTR_LOCAL, i32ptr, OPAL_INT32); - if (orte_abort_non_zero_exit) { - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - jdata->state = ORTE_JOB_STATE_NON_ZERO_TERM; - /* point to the first rank to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - /* kill the job */ - _terminate_job(jdata->jobid); - } - } else { - /* user requested we consider this normal termination */ - if (jdata->num_terminated >= jdata->num_procs) { - /* this job has terminated */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); - } - } - break; - - case ORTE_PROC_STATE_HEARTBEAT_FAILED: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s heartbeat failed", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc))); - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - jdata->state = ORTE_JOB_STATE_HEARTBEAT_FAILED; - /* point to the first rank to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); - /* kill the job */ - _terminate_job(jdata->jobid); - } - /* remove from dependent routes, if it is one */ - orte_routed.route_lost(rtmod, proc); - break; - - case ORTE_PROC_STATE_UNABLE_TO_SEND_MSG: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: unable to send message to proc %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc))); - /* if this proc is one of my daemons, then we are truly - * hosed - so just exit out - */ - if (ORTE_PROC_MY_NAME->jobid == proc->jobid) { - ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED); - break; - } - break; - - default: - /* shouldn't get this, but terminate job if required */ - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s default error %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc), - orte_proc_state_to_str(state))); - if (jdata->num_terminated == jdata->num_procs) { - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); - } - break; - } - /* if the waitpid fired, be sure to let the state machine know */ - if (ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_WAITPID)) { - ORTE_ACTIVATE_PROC_STATE(&pptr->name, ORTE_PROC_STATE_WAITPID_FIRED); - } - - cleanup: - OBJ_RELEASE(caddy); -} diff --git a/orte/mca/errmgr/dvm/errmgr_dvm.h b/orte/mca/errmgr/dvm/errmgr_dvm.h deleted file mode 100644 index 291394d9a5..0000000000 --- a/orte/mca/errmgr/dvm/errmgr_dvm.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#ifndef MCA_ERRMGR_dvm_EXPORT_H -#define MCA_ERRMGR_dvm_EXPORT_H - -#include "orte_config.h" - -#include "orte/mca/errmgr/errmgr.h" - -BEGIN_C_DECLS - -/* - * Local Component structures - */ - -ORTE_MODULE_DECLSPEC extern orte_errmgr_base_component_t mca_errmgr_dvm_component; - -ORTE_DECLSPEC extern orte_errmgr_base_module_t orte_errmgr_dvm_module; - -END_C_DECLS - -#endif /* MCA_ERRMGR_dvm_EXPORT_H */ diff --git a/orte/mca/errmgr/dvm/errmgr_dvm_component.c b/orte/mca/errmgr/dvm/errmgr_dvm_component.c deleted file mode 100644 index 879062893b..0000000000 --- a/orte/mca/errmgr/dvm/errmgr_dvm_component.c +++ /dev/null @@ -1,102 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/base.h" -#include "orte/mca/errmgr/base/errmgr_private.h" -#include "errmgr_dvm.h" - -/* - * Public string for version number - */ -const char *orte_errmgr_dvm_component_version_string = - "ORTE ERRMGR dvm MCA component version " ORTE_VERSION; - -/* - * Local functionality - */ -static int dvm_register(void); -static int dvm_open(void); -static int dvm_close(void); -static int dvm_component_query(mca_base_module_t **module, int *priority); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -orte_errmgr_base_component_t mca_errmgr_dvm_component = { - /* Handle the general mca_component_t struct containing - * meta information about the component dvm - */ - .base_version = { - ORTE_ERRMGR_BASE_VERSION_3_0_0, - /* Component name and version */ - .mca_component_name = "dvm", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = dvm_open, - .mca_close_component = dvm_close, - .mca_query_component = dvm_component_query, - .mca_register_component_params = dvm_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int my_priority; - -static int dvm_register(void) -{ - mca_base_component_t *c = &mca_errmgr_dvm_component.base_version; - - my_priority = 1000; - (void) mca_base_component_var_register(c, "priority", - "Priority of the dvm errmgr component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &my_priority); - - return ORTE_SUCCESS; -} - -static int dvm_open(void) -{ - return ORTE_SUCCESS; -} - -static int dvm_close(void) -{ - return ORTE_SUCCESS; -} - -static int dvm_component_query(mca_base_module_t **module, int *priority) -{ - /* used by DVM masters */ - if (ORTE_PROC_IS_MASTER) { - *priority = my_priority; - *module = (mca_base_module_t *)&orte_errmgr_dvm_module; - return ORTE_SUCCESS; - } - - *module = NULL; - *priority = -1; - return ORTE_ERROR; -} diff --git a/orte/mca/errmgr/dvm/owner.txt b/orte/mca/errmgr/dvm/owner.txt deleted file mode 100644 index 85b4416d20..0000000000 --- a/orte/mca/errmgr/dvm/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: active diff --git a/orte/mca/state/dvm/Makefile.am b/orte/mca/state/dvm/Makefile.am deleted file mode 100644 index 6122ab0e7f..0000000000 --- a/orte/mca/state/dvm/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2015-2018 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - state_dvm.h \ - state_dvm_component.c \ - state_dvm.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_state_dvm_DSO -component_noinst = -component_install = mca_state_dvm.la -else -component_noinst = libmca_state_dvm.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_state_dvm_la_SOURCES = $(sources) -mca_state_dvm_la_LDFLAGS = -module -avoid-version -mca_state_dvm_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_state_dvm_la_SOURCES =$(sources) -libmca_state_dvm_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/state/dvm/owner.txt b/orte/mca/state/dvm/owner.txt deleted file mode 100644 index 85b4416d20..0000000000 --- a/orte/mca/state/dvm/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: active diff --git a/orte/mca/state/dvm/state_dvm.c b/orte/mca/state/dvm/state_dvm.c deleted file mode 100644 index 4d11b21d89..0000000000 --- a/orte/mca/state/dvm/state_dvm.c +++ /dev/null @@ -1,688 +0,0 @@ -/* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#include - -#include "opal/util/output.h" -#include "opal/mca/pmix/pmix.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/filem/filem.h" -#include "orte/mca/grpcomm/grpcomm.h" -#include "orte/mca/iof/base/base.h" -#include "orte/mca/odls/odls_types.h" -#include "orte/mca/plm/base/base.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/regx/regx.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/base/rml_contact.h" -#include "orte/mca/routed/routed.h" -#include "orte/util/session_dir.h" -#include "orte/util/threads.h" -#include "orte/runtime/orte_quit.h" -#include "orte/runtime/orte_wait.h" - -#include "orte/mca/state/state.h" -#include "orte/mca/state/base/base.h" -#include "orte/mca/state/base/state_private.h" -#include "state_dvm.h" - -/* - * Module functions: Global - */ -static int init(void); -static int finalize(void); - -/* local functions */ -static void init_complete(int fd, short args, void *cbdata); -static void vm_ready(int fd, short args, void *cbata); -static void check_complete(int fd, short args, void *cbdata); -static void cleanup_job(int fd, short args, void *cbdata); - -/****************** - * DVM module - used when mpirun is persistent - ******************/ -orte_state_base_module_t orte_state_dvm_module = { - init, - finalize, - orte_state_base_activate_job_state, - orte_state_base_add_job_state, - orte_state_base_set_job_state_callback, - orte_state_base_set_job_state_priority, - orte_state_base_remove_job_state, - orte_state_base_activate_proc_state, - orte_state_base_add_proc_state, - orte_state_base_set_proc_state_callback, - orte_state_base_set_proc_state_priority, - orte_state_base_remove_proc_state -}; - -static void dvm_notify(int sd, short args, void *cbdata); - -/* defined default state machine sequence - individual - * plm's must add a state for launching daemons - */ -static orte_job_state_t launch_states[] = { - ORTE_JOB_STATE_INIT, - ORTE_JOB_STATE_INIT_COMPLETE, - ORTE_JOB_STATE_ALLOCATE, - ORTE_JOB_STATE_ALLOCATION_COMPLETE, - ORTE_JOB_STATE_DAEMONS_LAUNCHED, - ORTE_JOB_STATE_DAEMONS_REPORTED, - ORTE_JOB_STATE_VM_READY, - ORTE_JOB_STATE_MAP, - ORTE_JOB_STATE_MAP_COMPLETE, - ORTE_JOB_STATE_SYSTEM_PREP, - ORTE_JOB_STATE_LAUNCH_APPS, - ORTE_JOB_STATE_SEND_LAUNCH_MSG, - ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE, - ORTE_JOB_STATE_RUNNING, - ORTE_JOB_STATE_REGISTERED, - /* termination states */ - ORTE_JOB_STATE_TERMINATED, - ORTE_JOB_STATE_NOTIFY_COMPLETED, - ORTE_JOB_STATE_NOTIFIED, - ORTE_JOB_STATE_ALL_JOBS_COMPLETE -}; -static orte_state_cbfunc_t launch_callbacks[] = { - orte_plm_base_setup_job, - init_complete, - orte_ras_base_allocate, - orte_plm_base_allocation_complete, - orte_plm_base_daemons_launched, - orte_plm_base_daemons_reported, - vm_ready, - orte_rmaps_base_map_job, - orte_plm_base_mapping_complete, - orte_plm_base_complete_setup, - orte_plm_base_launch_apps, - orte_plm_base_send_launch_msg, - orte_state_base_local_launch_complete, - orte_plm_base_post_launch, - orte_plm_base_registered, - check_complete, - dvm_notify, - cleanup_job, - orte_quit -}; - -static orte_proc_state_t proc_states[] = { - ORTE_PROC_STATE_RUNNING, - ORTE_PROC_STATE_REGISTERED, - ORTE_PROC_STATE_IOF_COMPLETE, - ORTE_PROC_STATE_WAITPID_FIRED, - ORTE_PROC_STATE_TERMINATED -}; -static orte_state_cbfunc_t proc_callbacks[] = { - orte_state_base_track_procs, - orte_state_base_track_procs, - orte_state_base_track_procs, - orte_state_base_track_procs, - orte_state_base_track_procs -}; - -static void force_quit(int fd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - - /* give us a chance to stop the orteds */ - orte_plm.terminate_orteds(); - OBJ_RELEASE(caddy); -} - -/************************ - * API Definitions - ************************/ -static int init(void) -{ - int i, rc; - int num_states; - - /* setup the state machines */ - OBJ_CONSTRUCT(&orte_job_states, opal_list_t); - OBJ_CONSTRUCT(&orte_proc_states, opal_list_t); - - /* setup the job state machine */ - num_states = sizeof(launch_states) / sizeof(orte_job_state_t); - for (i=0; i < num_states; i++) { - if (ORTE_SUCCESS != (rc = orte_state.add_job_state(launch_states[i], - launch_callbacks[i], - ORTE_SYS_PRI))) { - ORTE_ERROR_LOG(rc); - } - } - /* add the termination response */ - if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_DAEMONS_TERMINATED, - orte_quit, ORTE_SYS_PRI))) { - ORTE_ERROR_LOG(rc); - } - /* add a default error response */ - if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_FORCED_EXIT, - force_quit, ORTE_ERROR_PRI))) { - ORTE_ERROR_LOG(rc); - } - /* add callback to report progress, if requested */ - if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_REPORT_PROGRESS, - orte_state_base_report_progress, ORTE_ERROR_PRI))) { - ORTE_ERROR_LOG(rc); - } - if (5 < opal_output_get_verbosity(orte_state_base_framework.framework_output)) { - orte_state_base_print_job_state_machine(); - } - - /* populate the proc state machine to allow us to - * track proc lifecycle changes - */ - num_states = sizeof(proc_states) / sizeof(orte_proc_state_t); - for (i=0; i < num_states; i++) { - if (ORTE_SUCCESS != (rc = orte_state.add_proc_state(proc_states[i], - proc_callbacks[i], - ORTE_SYS_PRI))) { - ORTE_ERROR_LOG(rc); - } - } - if (5 < opal_output_get_verbosity(orte_state_base_framework.framework_output)) { - orte_state_base_print_proc_state_machine(); - } - - return ORTE_SUCCESS; -} - -static int finalize(void) -{ - opal_list_item_t *item; - - /* cleanup the proc state machine */ - while (NULL != (item = opal_list_remove_first(&orte_proc_states))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&orte_proc_states); - - return ORTE_SUCCESS; -} - -static void files_ready(int status, void *cbdata) -{ - orte_job_t *jdata = (orte_job_t*)cbdata; - - if (ORTE_SUCCESS != status) { - ORTE_FORCED_TERMINATE(status); - return; - } else { - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP); - } -} - -static void init_complete(int sd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - - ORTE_ACQUIRE_OBJECT(caddy); - - /* nothing to do here but move along - if it is the - * daemon job, then next step is allocate */ - ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_ALLOCATE); - OBJ_RELEASE(caddy); -} - -static void vm_ready(int fd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - int rc; - opal_buffer_t *buf; - orte_daemon_cmd_flag_t command = ORTE_DAEMON_DVM_NIDMAP_CMD; - orte_grpcomm_signature_t *sig; - opal_buffer_t *wireup; - orte_job_t *jptr; - orte_proc_t *dmn; - opal_byte_object_t bo, *boptr; - int8_t flag; - int32_t numbytes, v; - char *nidmap; - opal_list_t *modex; - opal_value_t *val, *kv; - - ORTE_ACQUIRE_OBJECT(caddy); - - /* if this is my job, then we are done */ - if (ORTE_PROC_MY_NAME->jobid == caddy->jdata->jobid) { - /* if there is only one daemon in the job, then there - * is just a little bit to do */ - if (1 == orte_process_info.num_procs) { - if (!orte_nidmap_communicated) { - if (ORTE_SUCCESS != (rc = orte_regx.nidmap_create(orte_node_pool, &orte_node_regex))) { - ORTE_ERROR_LOG(rc); - return; - } - orte_nidmap_communicated = true; - } - } else { - /* send the daemon map to every daemon in this DVM - we - * do this here so we don't have to do it for every - * job we are going to launch */ - buf = OBJ_NEW(opal_buffer_t); - opal_dss.pack(buf, &command, 1, ORTE_DAEMON_CMD); - /* if we couldn't provide the allocation regex on the orted - * cmd line, then we need to provide all the info here */ - if (!orte_nidmap_communicated) { - if (ORTE_SUCCESS != (rc = orte_regx.nidmap_create(orte_node_pool, &nidmap))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - return; - } - orte_nidmap_communicated = true; - } else { - nidmap = NULL; - } - opal_dss.pack(buf, &nidmap, 1, OPAL_STRING); - if (NULL != nidmap) { - free(nidmap); - } - /* provide the info on the capabilities of each node */ - if (!orte_node_info_communicated) { - flag = 1; - opal_dss.pack(buf, &flag, 1, OPAL_INT8); - if (ORTE_SUCCESS != (rc = orte_regx.encode_nodemap(buf))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - return; - } - orte_node_info_communicated = true; - /* get wireup info for daemons */ - jptr = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - wireup = OBJ_NEW(opal_buffer_t); - for (v=0; v < jptr->procs->size; v++) { - if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, v))) { - continue; - } - val = NULL; - if (opal_pmix.legacy_get()) { - if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, OPAL_PMIX_PROC_URI, NULL, &val)) || NULL == val) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } else { - /* pack the name of the daemon */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } - /* pack the URI */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &val->data.string, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } - OBJ_RELEASE(val); - } - } else { - if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, NULL, NULL, &val)) || NULL == val) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } else { - /* pack the name of the daemon */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } - /* the data is returned as a list of key-value pairs in the opal_value_t */ - if (OPAL_PTR != val->type) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } - modex = (opal_list_t*)val->data.ptr; - numbytes = (int32_t)opal_list_get_size(modex); - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } - OPAL_LIST_FOREACH(kv, modex, opal_value_t) { - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } - } - OPAL_LIST_RELEASE(modex); - OBJ_RELEASE(val); - } - } - } - /* put it in a byte object for xmission */ - opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes); - /* pack the byte object - zero-byte objects are fine */ - bo.size = numbytes; - boptr = &bo; - if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(wireup); - OBJ_RELEASE(buf); - return; - } - /* release the data since it has now been copied into our buffer */ - if (NULL != bo.bytes) { - free(bo.bytes); - } - OBJ_RELEASE(wireup); - } else { - flag = 0; - opal_dss.pack(buf, &flag, 1, OPAL_INT8); - } - - /* goes to all daemons */ - sig = OBJ_NEW(orte_grpcomm_signature_t); - sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid; - sig->signature[0].vpid = ORTE_VPID_WILDCARD; - if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, buf))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(sig); - ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); - return; - } - OBJ_RELEASE(buf); - } - /* notify that the vm is ready */ - fprintf(stdout, "DVM ready\n"); fflush(stdout); - OBJ_RELEASE(caddy); - return; - } - - /* progress the job */ - caddy->jdata->state = ORTE_JOB_STATE_VM_READY; - - /* position any required files */ - if (ORTE_SUCCESS != orte_filem.preposition_files(caddy->jdata, files_ready, caddy->jdata)) { - ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); - } - - /* cleanup */ - OBJ_RELEASE(caddy); -} - -static void check_complete(int fd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata; - orte_proc_t *proc; - int i; - orte_node_t *node; - orte_job_map_t *map; - orte_std_cntr_t index; - char *rtmod; - - ORTE_ACQUIRE_OBJECT(caddy); - jdata = caddy->jdata; - - opal_output_verbose(2, orte_state_base_framework.framework_output, - "%s state:dvm:check_job_complete on job %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid)); - - if (NULL == jdata || jdata->jobid == ORTE_PROC_MY_NAME->jobid) { - /* just check to see if the daemons are complete */ - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:dvm:check_job_complete - received NULL job, checking daemons", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - if (0 == orte_routed.num_routes(rtmod)) { - /* orteds are done! */ - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s orteds complete - exiting", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - if (NULL == jdata) { - jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - } - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED); - OBJ_RELEASE(caddy); - return; - } - OBJ_RELEASE(caddy); - return; - } - - /* mark the job as terminated, but don't override any - * abnormal termination flags - */ - if (jdata->state < ORTE_JOB_STATE_UNTERMINATED) { - jdata->state = ORTE_JOB_STATE_TERMINATED; - } - - /* tell the IOF that the job is complete */ - if (NULL != orte_iof.complete) { - orte_iof.complete(jdata); - } - - /* tell the PMIx subsystem the job is complete */ - if (NULL != opal_pmix.server_deregister_nspace) { - opal_pmix.server_deregister_nspace(jdata->jobid, NULL, NULL); - } - - /* Release the resources used by this job. Since some errmgrs may want - * to continue using resources allocated to the job as part of their - * fault recovery procedure, we only do this once the job is "complete". - * Note that an aborted/killed job -is- flagged as complete and will - * therefore have its resources released. We need to do this after - * we call the errmgr so that any attempt to restart the job will - * avoid doing so in the exact same place as the current job - */ - if (NULL != jdata->map) { - map = jdata->map; - for (index = 0; index < map->nodes->size; index++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, index))) { - continue; - } - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:dvm releasing procs from node %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - node->name)); - for (i = 0; i < node->procs->size; i++) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) { - continue; - } - if (proc->name.jobid != jdata->jobid) { - /* skip procs from another job */ - continue; - } - if (!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_TOOL)) { - node->slots_inuse--; - node->num_procs--; - } - - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:dvm releasing proc %s from node %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name), node->name)); - /* set the entry in the node array to NULL */ - opal_pointer_array_set_item(node->procs, i, NULL); - /* release the proc once for the map entry */ - OBJ_RELEASE(proc); - } - /* set the node location to NULL */ - opal_pointer_array_set_item(map->nodes, index, NULL); - /* maintain accounting */ - OBJ_RELEASE(node); - /* flag that the node is no longer in a map */ - ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); - } - OBJ_RELEASE(map); - jdata->map = NULL; - } - - if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { - /* this was a debugger daemon. notify that a debugger has detached */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DEBUGGER_DETACH); - } else if (jdata->state != ORTE_JOB_STATE_NOTIFIED) { - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:dvm:check_job_completed state is terminated - activating notify", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_NOTIFY_COMPLETED); - /* mark the job as notified */ - jdata->state = ORTE_JOB_STATE_NOTIFIED; - } - - OBJ_RELEASE(caddy); -} - -static void cleanup_job(int sd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata; - - ORTE_ACQUIRE_OBJECT(caddy); - jdata = caddy->jdata; - - /* remove this object from the job array */ - opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, NULL); - - OBJ_RELEASE(caddy); -} - -typedef struct { - opal_list_t *info; - orte_job_t *jdata; -} mycaddy_t; - -static void notify_complete(int status, void *cbdata) -{ - mycaddy_t *mycaddy = (mycaddy_t*)cbdata; - - OPAL_LIST_RELEASE(mycaddy->info); - ORTE_ACTIVATE_JOB_STATE(mycaddy->jdata, ORTE_JOB_STATE_NOTIFIED); - OBJ_RELEASE(mycaddy->jdata); - free(mycaddy); -} - -static void dvm_notify(int sd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = caddy->jdata; - orte_proc_t *pptr=NULL; - int ret; - opal_buffer_t *reply; - orte_daemon_cmd_flag_t command; - orte_grpcomm_signature_t *sig; - bool notify = true; - opal_list_t *info; - opal_value_t *val; - opal_process_name_t pname, *proc, pnotify; - mycaddy_t *mycaddy; - - /* see if there was any problem */ - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, (void**)&pptr, OPAL_PTR) && NULL != pptr) { - ret = pptr->exit_code; - /* or whether we got cancelled by the user */ - } else if (orte_get_attribute(&jdata->attributes, ORTE_JOB_CANCELLED, NULL, OPAL_BOOL)) { - ret = ORTE_ERR_JOB_CANCELLED; - } else { - ret = ORTE_SUCCESS; - } - - if (0 == ret && orte_get_attribute(&jdata->attributes, ORTE_JOB_SILENT_TERMINATION, NULL, OPAL_BOOL)) { - notify = false; - } - /* if the jobid matches that of the requestor, then don't notify */ - proc = &pnotify; - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCH_PROXY, (void**)&proc, OPAL_NAME)) { - if (pnotify.jobid == jdata->jobid) { - notify = false; - } - } - - if (notify) { - /* the source is the job that terminated */ - pname.jobid = jdata->jobid; - pname.vpid = OPAL_VPID_WILDCARD; - - info = OBJ_NEW(opal_list_t); - /* ensure this only goes to the job terminated event handler */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_EVENT_NON_DEFAULT); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(info, &val->super); - /* tell the server not to cache the event as subsequent jobs - * do not need to know about it */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_EVENT_DO_NOT_CACHE); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(info, &val->super); - /* provide the status */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_JOB_TERM_STATUS); - val->type = OPAL_STATUS; - val->data.status = ret; - opal_list_append(info, &val->super); - /* tell the requestor which job or proc */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_PROCID); - val->type = OPAL_NAME; - val->data.name.jobid = jdata->jobid; - if (NULL != pptr) { - val->data.name.vpid = pptr->name.vpid; - } else { - val->data.name.vpid = ORTE_VPID_WILDCARD; - } - opal_list_append(info, &val->super); - /* pass along the proc to be notified */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_EVENT_CUSTOM_RANGE); - val->type = OPAL_NAME; - val->data.name.jobid = pnotify.jobid; - val->data.name.vpid = pnotify.vpid; - opal_list_append(info, &val->super); - /* setup the caddy */ - mycaddy = (mycaddy_t*)malloc(sizeof(mycaddy_t)); - mycaddy->info = info; - OBJ_RETAIN(jdata); - mycaddy->jdata = jdata; - opal_pmix.server_notify_event(OPAL_ERR_JOB_TERMINATED, &pname, - info, notify_complete, mycaddy); - } - - /* now ensure that _all_ daemons know that this job has terminated so even - * those that did not participate in it will know to cleanup the resources - * they assigned to the job. This is necessary now that the mapping function - * has been moved to the backend daemons - otherwise, non-participating daemons - * retain the slot assignments on the participating daemons, and then incorrectly - * map subsequent jobs thinking those nodes are still "busy" */ - reply = OBJ_NEW(opal_buffer_t); - command = ORTE_DAEMON_DVM_CLEANUP_JOB_CMD; - opal_dss.pack(reply, &command, 1, ORTE_DAEMON_CMD); - opal_dss.pack(reply, &jdata->jobid, 1, ORTE_JOBID); - sig = OBJ_NEW(orte_grpcomm_signature_t); - sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid; - sig->signature[0].vpid = ORTE_VPID_WILDCARD; - orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, reply); - OBJ_RELEASE(reply); - OBJ_RELEASE(sig); -} diff --git a/orte/mca/state/dvm/state_dvm.h b/orte/mca/state/dvm/state_dvm.h deleted file mode 100644 index 5137d8422d..0000000000 --- a/orte/mca/state/dvm/state_dvm.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2015 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#ifndef MCA_STATE_DVM_EXPORT_H -#define MCA_STATE_DVM_EXPORT_H - -#include "orte_config.h" - -#include "orte/mca/state/state.h" - -BEGIN_C_DECLS - -/* - * Local Component structures - */ - -ORTE_MODULE_DECLSPEC extern orte_state_base_component_t mca_state_dvm_component; - -ORTE_DECLSPEC extern orte_state_base_module_t orte_state_dvm_module; - -END_C_DECLS - -#endif /* MCA_STATE_DVM_EXPORT_H */ diff --git a/orte/mca/state/dvm/state_dvm_component.c b/orte/mca/state/dvm/state_dvm_component.c deleted file mode 100644 index df17c61ed9..0000000000 --- a/orte/mca/state/dvm/state_dvm_component.c +++ /dev/null @@ -1,83 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2015 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "opal/util/output.h" - -#include "orte/mca/state/state.h" -#include "orte/mca/state/base/base.h" -#include "state_dvm.h" - -/* - * Public string for version number - */ -const char *orte_state_dvm_component_version_string = - "ORTE STATE dvm MCA component version " ORTE_VERSION; - -/* - * Local functionality - */ -static int state_dvm_open(void); -static int state_dvm_close(void); -static int state_dvm_component_query(mca_base_module_t **module, int *priority); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -orte_state_base_component_t mca_state_dvm_component = -{ - /* Handle the general mca_component_t struct containing - * meta information about the component - */ - .base_version = { - ORTE_STATE_BASE_VERSION_1_0_0, - /* Component name and version */ - .mca_component_name = "dvm", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = state_dvm_open, - .mca_close_component = state_dvm_close, - .mca_query_component = state_dvm_component_query, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int state_dvm_open(void) -{ - return ORTE_SUCCESS; -} - -static int state_dvm_close(void) -{ - return ORTE_SUCCESS; -} - -static int state_dvm_component_query(mca_base_module_t **module, int *priority) -{ - /* used by DVM masters */ - if (ORTE_PROC_IS_MASTER) { - *priority = 100; - *module = (mca_base_module_t *)&orte_state_dvm_module; - return ORTE_SUCCESS; - } - - *priority = 0; - *module = NULL; - return ORTE_ERR_NOT_AVAILABLE; -} diff --git a/orte/tools/Makefile.am b/orte/tools/Makefile.am index 228f5f43af..a1a3fcd35d 100644 --- a/orte/tools/Makefile.am +++ b/orte/tools/Makefile.am @@ -42,12 +42,4 @@ DIST_SUBDIRS += \ tools/wrappers \ tools/orte-top \ tools/orte-info \ - tools/orte-server \ - tools/orte-dvm \ - tools/ompi-prun - -if OPAL_WANT_PRUN -SUBDIRS += \ - tools/ompi-prun \ - tools/orte-dvm -endif + tools/orte-server diff --git a/orte/tools/ompi-prun/Makefile.am b/orte/tools/ompi-prun/Makefile.am deleted file mode 100644 index 17ace88ea3..0000000000 --- a/orte/tools/ompi-prun/Makefile.am +++ /dev/null @@ -1,59 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2015-2018 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# This is not quite in the Automake spirit, but we have to do it. -# Since the totalview portion of the library must be built with -g, we -# must eliminate the CFLAGS that are passed in here by default (which -# may already have debugging and/or optimization flags). We use -# post-processed forms of the CFLAGS in the library targets down -# below. - -CFLAGS = $(CFLAGS_WITHOUT_OPTFLAGS) $(DEBUGGER_CFLAGS) - -include $(top_srcdir)/Makefile.ompi-rules - -man_pages = ompi-prun.1 -EXTRA_DIST = $(man_pages:.1=.1in) - -if OPAL_INSTALL_BINARIES - -bin_PROGRAMS = ompi-prun - -nodist_man_MANS = $(man_pages) - -# Ensure that the man pages are rebuilt if the opal_config.h file -# changes; a "good enough" way to know if configure was run again (and -# therefore the release date or version may have changed) -$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h - -endif # OPAL_INSTALL_BINARIES - -ompi_prun_SOURCES = \ - main.c \ - prun.c \ - prun.h - -ompi_prun_LDADD = \ - $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \ - $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la - -distclean-local: - rm -f $(man_pages) diff --git a/orte/tools/ompi-prun/main.c b/orte/tools/ompi-prun/main.c deleted file mode 100644 index 15b205b137..0000000000 --- a/orte/tools/ompi-prun/main.c +++ /dev/null @@ -1,33 +0,0 @@ -/*************************************************************************** - * * - * Open MPI: Open Source High Performance Computing * - * * - * http://www.open-mpi.org/ * - * * - ***************************************************************************/ - -#include "prun.h" - -int main(int argc, char *argv[]) -{ - return prun(argc, argv); -} - -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2017-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ diff --git a/orte/tools/ompi-prun/ompi-prun.1in b/orte/tools/ompi-prun/ompi-prun.1in deleted file mode 100644 index 32b8894346..0000000000 --- a/orte/tools/ompi-prun/ompi-prun.1in +++ /dev/null @@ -1,1597 +0,0 @@ -.\" -*- nroff -*- -.\" Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. -.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. -.\" Copyright (c) 2017-2018 Intel, Inc. All rights reserved. -.\" Copyright (c) 2017 Los Alamos National Security, LLC. All rights -.\" reserved. -.\" $COPYRIGHT$ -.\" -.\" Man page for PSRVR's prun command -.\" -.\" .TH name section center-footer left-footer center-header -.TH PRUN 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" -.\" ************************** -.\" Name Section -.\" ************************** -.SH NAME -. -prun \- Execute serial and parallel jobs with the PMIx Reference Server. - -. -.\" ************************** -.\" Synopsis Section -.\" ************************** -.SH SYNOPSIS -. -.PP -Single Process Multiple Data (SPMD) Model: - -.B prun -[ options ] -.B -[ ] -.P - -Multiple Instruction Multiple Data (MIMD) Model: - -.B prun -[ global_options ] - [ local_options1 ] -.B -[ ] : - [ local_options2 ] -.B -[ ] : - ... : - [ local_optionsN ] -.B -[ ] -.P - -Note that in both models, invoking \fIprun\fP via an absolute path -name is equivalent to specifying the \fI--prefix\fP option with a -\fI\fR value equivalent to the directory where \fIprun\fR -resides, minus its last subdirectory. For example: - - \fB%\fP /usr/local/bin/prun ... - -is equivalent to - - \fB%\fP prun --prefix /usr/local - -. -.\" ************************** -.\" Quick Summary Section -.\" ************************** -.SH QUICK SUMMARY -. -If you are simply looking for how to run an application, you -probably want to use a command line of the following form: - - \fB%\fP prun [ -np X ] [ --hostfile ] - -This will run X copies of \fI\fR in your current run-time -environment (if running under a supported resource manager, PSRVR's -\fIprun\fR will usually automatically use the corresponding resource manager -process starter, as opposed to, for example, \fIrsh\fR or \fIssh\fR, -which require the use of a hostfile, or will default to running all X -copies on the localhost), scheduling (by default) in a round-robin fashion by -CPU slot. See the rest of this page for more details. -.P -Please note that prun automatically binds processes. Three binding patterns are used in the absence of any further directives: -.TP 18 -.B Bind to core: -when the number of processes is <= 2 -. -. -.TP -.B Bind to socket: -when the number of processes is > 2 -. -. -.TP -.B Bind to none: -when oversubscribed -. -. -.P -If your application uses threads, then you probably want to ensure that you are -either not bound at all (by specifying --bind-to none), or bound to multiple cores -using an appropriate binding level or specific number of processing elements per -application process. -. -.\" ************************** -.\" Options Section -.\" ************************** -.SH OPTIONS -. -.I prun -will send the name of the directory where it was invoked on the local -node to each of the remote nodes, and attempt to change to that -directory. See the "Current Working Directory" section below for further -details. -.\" -.\" Start options listing -.\" Indent 10 characters from start of first column to start of second column -.TP 10 -.B -The program executable. This is identified as the first non-recognized argument -to prun. -. -. -.TP -.B -Pass these run-time arguments to every new process. These must always -be the last arguments to \fIprun\fP. If an app context file is used, -\fI\fP will be ignored. -. -. -.TP -.B -h\fR,\fP --help -Display help for this command -. -. -.TP -.B -q\fR,\fP --quiet -Suppress informative messages from prun during application execution. -. -. -.TP -.B -v\fR,\fP --verbose -Be verbose -. -. -.TP -.B -V\fR,\fP --version -Print version number. If no other arguments are given, this will also -cause prun to exit. -. -. -.TP -.B -N \fR\fP -.br -Launch num processes per node on all allocated nodes (synonym for npernode). -. -. -. -.TP -.B -display-map\fR,\fP --display-map -Display a table showing the mapped location of each process prior to launch. -. -. -. -.TP -.B -display-allocation\fR,\fP --display-allocation -Display the detected resource allocation. -. -. -. -.TP -.B -output-proctable\fR,\fP --output-proctable -Output the debugger proctable after launch. -. -. -. -.TP -.B -max-vm-size\fR,\fP --max-vm-size \fR\fP -Number of processes to run. -. -. -. -.TP -.B -novm\fR,\fP --novm -Execute without creating an allocation-spanning virtual machine (only start -daemons on nodes hosting application procs). -. -. -. -.TP -.B -hnp\fR,\fP --hnp \fR\fP -Specify the URI of the \fRpsrvr\fP process, or the name of the file (specified as -file:filename) that contains that info. -. -. -. -.P -Use one of the following options to specify which hosts (nodes) within the \fRpsrvr\fP to run on. -. -. -.TP -.B -H\fR,\fP -host\fR,\fP --host \fR\fP -List of hosts on which to invoke processes. -. -. -.TP -.B -hostfile\fR,\fP --hostfile \fR\fP -Provide a hostfile to use. -.\" JJH - Should have man page for how to format a hostfile properly. -. -. -.TP -.B -default-hostfile\fR,\fP --default-hostfile \fR\fP -Provide a default hostfile. -. -. -.TP -.B -machinefile\fR,\fP --machinefile \fR\fP -Synonym for \fI-hostfile\fP. -. -. -. -. -.TP -.B -cpu-set\fR,\fP --cpu-set \fR\fP -Restrict launched processes to the specified logical cpus on each node (comma-separated -list). Note that the binding options will still apply within the specified envelope - e.g., -you can elect to bind each process to only one cpu within the specified cpu set. -. -. -. -.P -The following options specify the number of processes to launch. Note that none -of the options imply a particular binding policy - e.g., requesting N processes -for each socket does not imply that the processes will be bound to the socket. -. -. -.TP -.B -c\fR,\fP -n\fR,\fP --n\fR,\fP -np \fR<#>\fP -Run this many copies of the program on the given nodes. This option -indicates that the specified file is an executable program and not an -application context. If no value is provided for the number of copies to -execute (i.e., neither the "-np" nor its synonyms are provided on the command -line), prun will automatically execute a copy of the program on -each process slot (see below for description of a "process slot"). This -feature, however, can only be used in the SPMD model and will return an -error (without beginning execution of the application) otherwise. -. -. -.TP -.B —map-by ppr:N: -Launch N times the number of objects of the specified type on each node. -. -. -.TP -.B -npersocket\fR,\fP --npersocket \fR<#persocket>\fP -On each node, launch this many processes times the number of processor -sockets on the node. -The \fI-npersocket\fP option also turns on the \fI-bind-to-socket\fP option. -(deprecated in favor of --map-by ppr:n:socket) -. -. -.TP -.B -npernode\fR,\fP --npernode \fR<#pernode>\fP -On each node, launch this many processes. -(deprecated in favor of --map-by ppr:n:node) -. -. -.TP -.B -pernode\fR,\fP --pernode -On each node, launch one process -- equivalent to \fI-npernode\fP 1. -(deprecated in favor of --map-by ppr:1:node) -. -. -. -. -.P -To map processes: -. -. -.TP -.B --map-by \fR\fP -Map to the specified object, defaults to \fIsocket\fP. Supported options -include slot, hwthread, core, L1cache, L2cache, L3cache, socket, numa, -board, node, sequential, distance, and ppr. Any object can include -modifiers by adding a \fR:\fP and any combination of PE=n (bind n -processing elements to each proc), SPAN (load -balance the processes across the allocation), OVERSUBSCRIBE (allow -more processes on a node than processing elements), and NOOVERSUBSCRIBE. -This includes PPR, where the pattern would be terminated by another colon -to separate it from the modifiers. -. -.TP -.B -bycore\fR,\fP --bycore -Map processes by core (deprecated in favor of --map-by core) -. -.TP -.B -byslot\fR,\fP --byslot -Map and rank processes round-robin by slot. -. -.TP -.B -nolocal\fR,\fP --nolocal -Do not run any copies of the launched application on the same node as -prun is running. This option will override listing the localhost -with \fB--host\fR or any other host-specifying mechanism. -. -.TP -.B -nooversubscribe\fR,\fP --nooversubscribe -Do not oversubscribe any nodes; error (without starting any processes) -if the requested number of processes would cause oversubscription. -This option implicitly sets "max_slots" equal to the "slots" value for -each node. (Enabled by default). -. -.TP -.B -oversubscribe\fR,\fP --oversubscribe -Nodes are allowed to be oversubscribed, even on a managed system, and -overloading of processing elements. -. -.TP -.B -bynode\fR,\fP --bynode -Launch processes one per node, cycling by node in a round-robin -fashion. This spreads processes evenly among nodes and assigns -ranks in a round-robin, "by node" manner. -. -.TP -.B -cpu-list\fR,\fP --cpu-list \fR\fP -List of processor IDs to bind processes to [default=NULL]. -. -. -. -. -.P -To order processes' ranks: -. -. -.TP -.B --rank-by \fR\fP -Rank in round-robin fashion according to the specified object, -defaults to \fIslot\fP. Supported options -include slot, hwthread, core, L1cache, L2cache, L3cache, -socket, numa, board, and node. -. -. -. -. -.P -For process binding: -. -.TP -.B --bind-to \fR\fP -Bind processes to the specified object, defaults to \fIcore\fP. Supported options -include slot, hwthread, core, l1cache, l2cache, l3cache, socket, numa, board, and none. -. -.TP -.B -cpus-per-proc\fR,\fP --cpus-per-proc \fR<#perproc>\fP -Bind each process to the specified number of cpus. -(deprecated in favor of --map-by :PE=n) -. -.TP -.B -cpus-per-rank\fR,\fP --cpus-per-rank \fR<#perrank>\fP -Alias for \fI-cpus-per-proc\fP. -(deprecated in favor of --map-by :PE=n) -. -.TP -.B -bind-to-core\fR,\fP --bind-to-core -Bind processes to cores (deprecated in favor of --bind-to core) -. -.TP -.B -bind-to-socket\fR,\fP --bind-to-socket -Bind processes to processor sockets (deprecated in favor of --bind-to socket) -. -.TP -.B -report-bindings\fR,\fP --report-bindings -Report any bindings for launched processes. -. -. -. -. -.P -For rankfiles: -. -. -.TP -.B -rf\fR,\fP --rankfile \fR\fP -Provide a rankfile file. -. -. -. -. -.P -To manage standard I/O: -. -. -.TP -.B -output-filename\fR,\fP --output-filename \fR\fP -Redirect the stdout, stderr, and stddiag of all processes to a process-unique version of -the specified filename. Any directories in the filename will automatically be created. -Each output file will consist of filename.id, where the id will be the -processes' rank, left-filled with -zero's for correct ordering in listings. -. -. -.TP -.B -stdin\fR,\fP --stdin\fR \fP -The rank of the process that is to receive stdin. The -default is to forward stdin to rank 0, but this option -can be used to forward stdin to any process. It is also acceptable to -specify \fInone\fP, indicating that no processes are to receive stdin. -. -. -.TP -.B -merge-stderr-to-stdout\fR,\fP --merge-stderr-to-stdout -Merge stderr to stdout for each process. -. -. -.TP -.B -tag-output\fR,\fP --tag-output -Tag each line of output to stdout, stderr, and stddiag with \fB[jobid, MCW_rank]\fP -indicating the process jobid and rank of the process that generated the output, -and the channel which generated it. -. -. -.TP -.B -timestamp-output\fR,\fP --timestamp-output -Timestamp each line of output to stdout, stderr, and stddiag. -. -. -.TP -.B -xml\fR,\fP --xml -Provide all output to stdout, stderr, and stddiag in an xml format. -. -. -.TP -.B -xml-file\fR,\fP --xml-file \fR\fP -Provide all output in XML format to the specified file. -. -. -.TP -.B -xterm\fR,\fP --xterm \fR\fP -Display the output from the processes identified by their ranks in separate xterm windows. The ranks are specified -as a comma-separated list of ranges, with a -1 indicating all. A separate -window will be created for each specified process. -.B Note: -xterm will normally terminate the window upon termination of the process running -within it. However, by adding a "!" to the end of the list of specified ranks, -the proper options will be provided to ensure that xterm keeps the window open -\fIafter\fP the process terminates, thus allowing you to see the process' output. -Each xterm window will subsequently need to be manually closed. -.B Note: -In some environments, xterm may require that the executable be in the user's -path, or be specified in absolute or relative terms. Thus, it may be necessary -to specify a local executable as "./foo" instead of just "foo". If xterm fails to -find the executable, prun will hang, but still respond correctly to a ctrl-c. -If this happens, please check that the executable is being specified correctly -and try again. -. -. -. -. -.P -To manage files and runtime environment: -. -. -.TP -.B -path\fR,\fP --path \fR\fP - that will be used when attempting to locate the requested -executables. This is used prior to using the local PATH setting. -. -. -.TP -.B --prefix \fR\fP -Prefix directory that will be used to set the \fIPATH\fR and -\fILD_LIBRARY_PATH\fR on the remote node before invoking -the target process. See the "Remote Execution" section, below. -. -. -.TP -.B --noprefix -Disable the automatic --prefix behavior -. -. -.TP -.B -s\fR,\fP --preload-binary -Copy the specified executable(s) to remote machines prior to starting remote processes. The -executables will be copied to the session directory and will be deleted upon -completion of the job. -. -. -.TP -.B --preload-files \fR\fP -Preload the comma separated list of files to the current working directory of the remote -machines where processes will be launched prior to starting those processes. -. -. -.TP -.B -set-cwd-to-session-dir\fR,\fP --set-cwd-to-session-dir -Set the working directory of the started processes to their session directory. -. -. -.TP -.B -wd \fR\fP -Synonym for \fI-wdir\fP. -. -. -.TP -.B -wdir \fR\fP -Change to the directory before the user's program executes. -See the "Current Working Directory" section for notes on relative paths. -.B Note: -If the \fI-wdir\fP option appears both on the command line and in an -application context, the context will take precedence over the command -line. Thus, if the path to the desired wdir is different -on the backend nodes, then it must be specified as an absolute path that -is correct for the backend node. -. -. -.TP -.B -x \fR\fP -Export the specified environment variables to the remote nodes before -executing the program. Only one environment variable can be specified -per \fI-x\fP option. Existing environment variables can be specified -or new variable names specified with corresponding values. For -example: - \fB%\fP prun -x DISPLAY -x OFILE=/tmp/out ... - -The parser for the \fI-x\fP option is not very sophisticated; it does -not even understand quoted values. Users are advised to set variables -in the environment, and then use \fI-x\fP to export (not define) them. -. -. -. -. -.P -Setting MCA parameters: -. -. -.TP -.B -gpmca\fR,\fP --gpmca \fR \fP -Pass global MCA parameters that are applicable to all contexts. \fI\fP is -the parameter name; \fI\fP is the parameter value. -. -. -.TP -.B -pmca\fR,\fP --pmca \fR \fP -Send arguments to various MCA modules. See the "MCA" section, below. -. -. -.TP -.B -am \fR\fP -Aggregate MCA parameter set file list. -. -. -.TP -.B -tune\fR,\fP --tune \fR\fP -Specify a tune file to set arguments for various MCA modules and environment variables. -See the "Setting MCA parameters and environment variables from file" section, below. -. -. -. -. -.P -For debugging: -. -. -.TP -.B -debug\fR,\fP --debug -Invoke the user-level debugger indicated by the \fIorte_base_user_debugger\fP -MCA parameter. -. -. -.TP -.B --get-stack-traces -When paired with the -.B --timeout -option, -.I prun -will obtain and print out stack traces from all launched processes -that are still alive when the timeout expires. Note that obtaining -stack traces can take a little time and produce a lot of output, -especially for large process-count jobs. -. -. -.TP -.B -debugger\fR,\fP --debugger \fR\fP -Sequence of debuggers to search for when \fI--debug\fP is used (i.e. -a synonym for \fIorte_base_user_debugger\fP MCA parameter). -. -. -.TP -.B --timeout \fR -The maximum number of seconds that -.I prun -will run. After this many seconds, -.I prun -will abort the launched job and exit with a non-zero exit status. -Using -.B --timeout -can be also useful when combined with the -.B --get-stack-traces -option. -. -. -.TP -.B -tv\fR,\fP --tv -Launch processes under the TotalView debugger. -Deprecated backwards compatibility flag. Synonym for \fI--debug\fP. -. -. -. -. -.P -There are also other options: -. -. -.TP -.B --allow-run-as-root -Allow -.I prun -to run when executed by the root user -.RI ( prun -defaults to aborting when launched as the root user). -. -. -.TP -.B --app \fR\fP -Provide an appfile, ignoring all other command line options. -. -. -.TP -.B -cf\fR,\fP --cartofile \fR\fP -Provide a cartography file. -. -. -.TP -.B -continuous\fR,\fP --continuous -Job is to run until explicitly terminated. -. -. -.TP -.B -disable-recovery\fR,\fP --disable-recovery -Disable recovery (resets all recovery options to off). -. -. -.TP -.B -do-not-launch\fR,\fP --do-not-launch -Perform all necessary operations to prepare to launch the application, but do not actually launch it. -. -. -.TP -.B -do-not-resolve\fR,\fP --do-not-resolve -Do not attempt to resolve interfaces. -. -. -.TP -.B -enable-recovery\fR,\fP --enable-recovery -Enable recovery from process failure [Default = disabled]. -. -. -.TP -.B -index-argv-by-rank\fR,\fP --index-argv-by-rank -Uniquely index argv[0] for each process using its rank. -. -. -.TP -.B -max-restarts\fR,\fP --max-restarts \fR\fP -Max number of times to restart a failed process. -. -. -.TP -.B --ppr \fR\fP -Comma-separated list of number of processes on a given resource type [default: none]. -. -. -.TP -.B -report-child-jobs-separately\fR,\fP --report-child-jobs-separately -Return the exit status of the primary job only. -. -. -.TP -.B -report-events\fR,\fP --report-events \fR\fP -Report events to a tool listening at the specified URI. -. -. -.TP -.B -report-pid\fR,\fP --report-pid \fR\fP -Print out prun's PID during startup. The channel must be either a '-' to indicate -that the pid is to be output to stdout, a '+' to indicate that the pid is to be -output to stderr, or a filename to which the pid is to be written. -. -. -.TP -.B -report-uri\fR,\fP --report-uri \fR\fP -Print out prun's URI during startup. The channel must be either a '-' to indicate -that the URI is to be output to stdout, a '+' to indicate that the URI is to be -output to stderr, or a filename to which the URI is to be written. -. -. -.TP -.B -show-progress\fR,\fP --show-progress -Output a brief periodic report on launch progress. -. -. -.TP -.B -terminate\fR,\fP --terminate -Terminate the DVM. -. -. -.TP -.B -use-hwthread-cpus\fR,\fP --use-hwthread-cpus -Use hardware threads as independent cpus. -. -. -.TP -.B -use-regexp\fR,\fP --use-regexp -Use regular expressions for launch. -. -. -. -. -.P -The following options are useful for developers; they are not generally -useful to most users: -. -.TP -.B -d\fR,\fP --debug-devel -Enable debugging. This is not generally useful for most users. -. -. -.TP -.B -display-devel-allocation\fR,\fP --display-devel-allocation -Display a detailed list of the allocation being used by this job. -. -. -.TP -.B -display-devel-map\fR,\fP --display-devel-map -Display a more detailed table showing the mapped location of each process prior to launch. -. -. -.TP -.B -display-diffable-map\fR,\fP --display-diffable-map -Display a diffable process map just before launch. -. -. -.TP -.B -display-topo\fR,\fP --display-topo -Display the topology as part of the process map just before launch. -. -. -.TP -.B --report-state-on-timeout -When paired with the -.B --timeout -command line option, report the run-time subsystem state of each -process when the timeout expires. -. -. -.P -There may be other options listed with \fIprun --help\fP. -. -. -.\" ************************** -.\" Description Section -.\" ************************** -.SH DESCRIPTION -. -One invocation of \fIprun\fP starts an application running under PSRVR. If the application is single process multiple data (SPMD), the application -can be specified on the \fIprun\fP command line. - -If the application is multiple instruction multiple data (MIMD), comprising of -multiple programs, the set of programs and argument can be specified in one of -two ways: Extended Command Line Arguments, and Application Context. -.PP -An application context describes the MIMD program set including all arguments -in a separate file. -.\" See appcontext(5) for a description of the application context syntax. -This file essentially contains multiple \fIprun\fP command lines, less the -command name itself. The ability to specify different options for different -instantiations of a program is another reason to use an application context. -.PP -Extended command line arguments allow for the description of the application -layout on the command line using colons (\fI:\fP) to separate the specification -of programs and arguments. Some options are globally set across all specified -programs (e.g. --hostfile), while others are specific to a single program -(e.g. -np). -. -. -. -.SS Specifying Host Nodes -. -Host nodes can be identified on the \fIprun\fP command line with the \fI-host\fP -option or in a hostfile. -. -.PP -For example, -. -.TP 4 -prun -H aa,aa,bb ./a.out -launches two processes on node aa and one on bb. -. -.PP -Or, consider the hostfile -. - - \fB%\fP cat myhostfile - aa slots=2 - bb slots=2 - cc slots=2 - -. -.PP -Here, we list both the host names (aa, bb, and cc) but also how many "slots" -there are for each. Slots indicate how many processes can potentially execute -on a node. For best performance, the number of slots may be chosen to be the -number of cores on the node or the number of processor sockets. If the hostfile -does not provide slots information, PSRVR will attempt to discover the number -of cores (or hwthreads, if the use-hwthreads-as-cpus option is set) and set the -number of slots to that value. This default behavior also occurs when specifying -the \fI-host\fP option with a single hostname. Thus, the command -. -.TP 4 -prun -H aa ./a.out -launches a number of processes equal to the number of cores on node aa. -. -.PP -. -.TP 4 -prun -hostfile myhostfile ./a.out -will launch two processes on each of the three nodes. -. -.TP 4 -prun -hostfile myhostfile -host aa ./a.out -will launch two processes, both on node aa. -. -.TP 4 -prun -hostfile myhostfile -host dd ./a.out -will find no hosts to run on and abort with an error. -That is, the specified host dd is not in the specified hostfile. -. -.PP -When running under resource managers (e.g., SLURM, Torque, etc.), -PSRVR will obtain both the hostnames and the number of slots directly -from the resource manger. -. -.SS Specifying Number of Processes -. -As we have just seen, the number of processes to run can be set using the -hostfile. Other mechanisms exist. -. -.PP -The number of processes launched can be specified as a multiple of the -number of nodes or processor sockets available. For example, -. -.TP 4 -prun -H aa,bb -npersocket 2 ./a.out -launches processes 0-3 on node aa and process 4-7 on node bb, -where aa and bb are both dual-socket nodes. -The \fI-npersocket\fP option also turns on the \fI-bind-to-socket\fP option, -which is discussed in a later section. -. -.TP 4 -prun -H aa,bb -npernode 2 ./a.out -launches processes 0-1 on node aa and processes 2-3 on node bb. -. -.TP 4 -prun -H aa,bb -npernode 1 ./a.out -launches one process per host node. -. -.TP 4 -prun -H aa,bb -pernode ./a.out -is the same as \fI-npernode\fP 1. -. -. -.PP -Another alternative is to specify the number of processes with the -\fI-np\fP option. Consider now the hostfile -. - - \fB%\fP cat myhostfile - aa slots=4 - bb slots=4 - cc slots=4 - -. -.PP -Now, -. -.TP 4 -prun -hostfile myhostfile -np 6 ./a.out -will launch processes 0-3 on node aa and processes 4-5 on node bb. The remaining -slots in the hostfile will not be used since the \fI-np\fP option indicated -that only 6 processes should be launched. -. -.SS Mapping Processes to Nodes: Using Policies -. -The examples above illustrate the default mapping of process processes -to nodes. This mapping can also be controlled with various -\fIprun\fP options that describe mapping policies. -. -. -.PP -Consider the same hostfile as above, again with \fI-np\fP 6: -. - - node aa node bb node cc - - prun 0 1 2 3 4 5 - - prun --map-by node 0 3 1 4 2 5 - - prun -nolocal 0 1 2 3 4 5 -. -.PP -The \fI--map-by node\fP option will load balance the processes across -the available nodes, numbering each process in a round-robin fashion. -. -.PP -The \fI-nolocal\fP option prevents any processes from being mapped onto the -local host (in this case node aa). While \fIprun\fP typically consumes -few system resources, \fI-nolocal\fP can be helpful for launching very -large jobs where \fIprun\fP may actually need to use noticeable amounts -of memory and/or processing time. -. -.PP -Just as \fI-np\fP can specify fewer processes than there are slots, it can -also oversubscribe the slots. For example, with the same hostfile: -. -.TP 4 -prun -hostfile myhostfile -np 14 ./a.out -will launch processes 0-3 on node aa, 4-7 on bb, and 8-11 on cc. It will -then add the remaining two processes to whichever nodes it chooses. -. -.PP -One can also specify limits to oversubscription. For example, with the same -hostfile: -. -.TP 4 -prun -hostfile myhostfile -np 14 -nooversubscribe ./a.out -will produce an error since \fI-nooversubscribe\fP prevents oversubscription. -. -.PP -Limits to oversubscription can also be specified in the hostfile itself: -. - % cat myhostfile - aa slots=4 max_slots=4 - bb max_slots=4 - cc slots=4 -. -.PP -The \fImax_slots\fP field specifies such a limit. When it does, the -\fIslots\fP value defaults to the limit. Now: -. -.TP 4 -prun -hostfile myhostfile -np 14 ./a.out -causes the first 12 processes to be launched as before, but the remaining -two processes will be forced onto node cc. The other two nodes are -protected by the hostfile against oversubscription by this job. -. -.PP -Using the \fI--nooversubscribe\fR option can be helpful since PSRVR -currently does not get "max_slots" values from the resource manager. -. -.PP -Of course, \fI-np\fP can also be used with the \fI-H\fP or \fI-host\fP -option. For example, -. -.TP 4 -prun -H aa,bb -np 8 ./a.out -launches 8 processes. Since only two hosts are specified, after the first -two processes are mapped, one to aa and one to bb, the remaining processes -oversubscribe the specified hosts. -. -.PP -And here is a MIMD example: -. -.TP 4 -prun -H aa -np 1 hostname : -H bb,cc -np 2 uptime -will launch process 0 running \fIhostname\fP on node aa and processes 1 and 2 -each running \fIuptime\fP on nodes bb and cc, respectively. -. -.SS Mapping, Ranking, and Binding: Oh My! -. -PSRVR employs a three-phase procedure for assigning process locations and -ranks: -. -.TP 10 -\fBmapping\fP -Assigns a default location to each process -. -.TP 10 -\fBranking\fP -Assigns a rank value to each process -. -.TP 10 -\fBbinding\fP -Constrains each process to run on specific processors -. -.PP -The \fImapping\fP step is used to assign a default location to each process -based on the mapper being employed. Mapping by slot, node, and sequentially results -in the assignment of the processes to the node level. In contrast, mapping by object, allows -the mapper to assign the process to an actual object on each node. -. -.PP -\fBNote:\fP the location assigned to the process is independent of where it will be bound - the -assignment is used solely as input to the binding algorithm. -. -.PP -The mapping of process processes to nodes can be defined not just -with general policies but also, if necessary, using arbitrary mappings -that cannot be described by a simple policy. One can use the "sequential -mapper," which reads the hostfile line by line, assigning processes -to nodes in whatever order the hostfile specifies. Use the -\fI-pmca rmaps seq\fP option. For example, using the same hostfile -as before: -. -.PP -prun -hostfile myhostfile -pmca rmaps seq ./a.out -. -.PP -will launch three processes, one on each of nodes aa, bb, and cc, respectively. -The slot counts don't matter; one process is launched per line on -whatever node is listed on the line. -. -.PP -Another way to specify arbitrary mappings is with a rankfile, which -gives you detailed control over process binding as well. Rankfiles -are discussed below. -. -.PP -The second phase focuses on the \fIranking\fP of the process within -the job. PSRVR -separates this from the mapping procedure to allow more flexibility in the -relative placement of processes. This is best illustrated by considering the -following two cases where we used the —map-by ppr:2:socket option: -. -.PP - node aa node bb - - rank-by core 0 1 ! 2 3 4 5 ! 6 7 - - rank-by socket 0 2 ! 1 3 4 6 ! 5 7 - - rank-by socket:span 0 4 ! 1 5 2 6 ! 3 7 -. -.PP -Ranking by core and by slot provide the identical result - a simple -progression of ranks across each node. Ranking by -socket does a round-robin ranking within each node until all processes -have been assigned a rank, and then progresses to the next -node. Adding the \fIspan\fP modifier to the ranking directive causes -the ranking algorithm to treat the entire allocation as a single -entity - thus, the MCW ranks are assigned across all sockets before -circling back around to the beginning. -. -.PP -The \fIbinding\fP phase actually binds each process to a given set of processors. This can -improve performance if the operating system is placing processes -suboptimally. For example, it might oversubscribe some multi-core -processor sockets, leaving other sockets idle; this can lead -processes to contend unnecessarily for common resources. Or, it -might spread processes out too widely; this can be suboptimal if -application performance is sensitive to interprocess communication -costs. Binding can also keep the operating system from migrating -processes excessively, regardless of how optimally those processes -were placed to begin with. -. -.PP -The processors to be used for binding can be identified in terms of -topological groupings - e.g., binding to an l3cache will bind each -process to all processors within the scope of a single L3 cache within -their assigned location. Thus, if a process is assigned by the mapper -to a certain socket, then a \fI—bind-to l3cache\fP directive will -cause the process to be bound to the processors that share a single L3 -cache within that socket. -. -.PP -To help balance loads, the binding directive uses a round-robin method when binding to -levels lower than used in the mapper. For example, consider the case where a job is -mapped to the socket level, and then bound to core. Each socket will have multiple cores, -so if multiple processes are mapped to a given socket, the binding algorithm will assign -each process located to a socket to a unique core in a round-robin manner. -. -.PP -Alternatively, processes mapped by l2cache and then bound to socket will simply be bound -to all the processors in the socket where they are located. In this manner, users can -exert detailed control over relative MCW rank location and binding. -. -.PP -Finally, \fI--report-bindings\fP can be used to report bindings. -. -.PP -As an example, consider a node with two processor sockets, each comprising -four cores. We run \fIprun\fP with \fI-np 4 --report-bindings\fP and -the following additional options: -. - - % prun ... --map-by core --bind-to core - [...] ... binding child [...,0] to cpus 0001 - [...] ... binding child [...,1] to cpus 0002 - [...] ... binding child [...,2] to cpus 0004 - [...] ... binding child [...,3] to cpus 0008 - - % prun ... --map-by socket --bind-to socket - [...] ... binding child [...,0] to socket 0 cpus 000f - [...] ... binding child [...,1] to socket 1 cpus 00f0 - [...] ... binding child [...,2] to socket 0 cpus 000f - [...] ... binding child [...,3] to socket 1 cpus 00f0 - - % prun ... --map-by core:PE=2 --bind-to core - [...] ... binding child [...,0] to cpus 0003 - [...] ... binding child [...,1] to cpus 000c - [...] ... binding child [...,2] to cpus 0030 - [...] ... binding child [...,3] to cpus 00c0 - - % prun ... --bind-to none -. -.PP -Here, \fI--report-bindings\fP shows the binding of each process as a mask. -In the first case, the processes bind to successive cores as indicated by -the masks 0001, 0002, 0004, and 0008. In the second case, processes bind -to all cores on successive sockets as indicated by the masks 000f and 00f0. -The processes cycle through the processor sockets in a round-robin fashion -as many times as are needed. In the third case, the masks show us that -2 cores have been bound per process. In the fourth case, binding is -turned off and no bindings are reported. -. -.PP -PSRVR's support for process binding depends on the underlying -operating system. Therefore, certain process binding options may not be available -on every system. -. -.PP -Process binding can also be set with MCA parameters. -Their usage is less convenient than that of \fIprun\fP options. -On the other hand, MCA parameters can be set not only on the \fIprun\fP -command line, but alternatively in a system or user mca-params.conf file -or as environment variables, as described in the MCA section below. -Some examples include: -. -.PP - prun option MCA parameter key value - - --map-by core rmaps_base_mapping_policy core - --map-by socket rmaps_base_mapping_policy socket - --rank-by core rmaps_base_ranking_policy core - --bind-to core hwloc_base_binding_policy core - --bind-to socket hwloc_base_binding_policy socket - --bind-to none hwloc_base_binding_policy none -. -. -.SS Rankfiles -. -Rankfiles are text files that specify detailed information about how -individual processes should be mapped to nodes, and to which -processor(s) they should be bound. Each line of a rankfile specifies -the location of one process. The general form of each line in the -rankfile is: -. - - rank = slot= -. -.PP -For example: -. - - $ cat myrankfile - rank 0=aa slot=1:0-2 - rank 1=bb slot=0:0,1 - rank 2=cc slot=1-2 - $ prun -H aa,bb,cc,dd -rf myrankfile ./a.out -. -.PP -Means that -. - - Rank 0 runs on node aa, bound to logical socket 1, cores 0-2. - Rank 1 runs on node bb, bound to logical socket 0, cores 0 and 1. - Rank 2 runs on node cc, bound to logical cores 1 and 2. -. -.PP -Rankfiles can alternatively be used to specify \fIphysical\fP processor -locations. In this case, the syntax is somewhat different. Sockets are -no longer recognized, and the slot number given must be the number of -the physical PU as most OS's do not assign a unique physical identifier -to each core in the node. Thus, a proper physical rankfile looks something -like the following: -. - - $ cat myphysicalrankfile - rank 0=aa slot=1 - rank 1=bb slot=8 - rank 2=cc slot=6 -. -.PP -This means that -. - - Rank 0 will run on node aa, bound to the core that contains physical PU 1 - Rank 1 will run on node bb, bound to the core that contains physical PU 8 - Rank 2 will run on node cc, bound to the core that contains physical PU 6 -. -.PP -Rankfiles are treated as \fIlogical\fP by default, and the MCA parameter -rmaps_rank_file_physical must be set to 1 to indicate that the rankfile -is to be considered as \fIphysical\fP. -. -.PP -The hostnames listed above are "absolute," meaning that actual -resolveable hostnames are specified. However, hostnames can also be -specified as "relative," meaning that they are specified in relation -to an externally-specified list of hostnames (e.g., by prun's --host -argument, a hostfile, or a job scheduler). -. -.PP -The "relative" specification is of the form "+n", where X is an -integer specifying the Xth hostname in the set of all available -hostnames, indexed from 0. For example: -. - - $ cat myrankfile - rank 0=+n0 slot=1:0-2 - rank 1=+n1 slot=0:0,1 - rank 2=+n2 slot=1-2 - $ prun -H aa,bb,cc,dd -rf myrankfile ./a.out -. -.PP -All socket/core slot locations are be -specified as -.I logical -indexes. You can use tools such as HWLOC's "lstopo" to find the -logical indexes of socket and cores. -. -. -.SS Application Context or Executable Program? -. -To distinguish the two different forms, \fIprun\fP -looks on the command line for \fI--app\fP option. If -it is specified, then the file named on the command line is -assumed to be an application context. If it is not -specified, then the file is assumed to be an executable program. -. -. -. -.SS Locating Files -. -If no relative or absolute path is specified for a file, prun will first look for files by searching the directories specified -by the \fI--path\fP option. If there is no \fI--path\fP option set or -if the file is not found at the \fI--path\fP location, then prun -will search the user's PATH environment variable as defined on the -source node(s). -.PP -If a relative directory is specified, it must be relative to the initial -working directory determined by the specific starter used. For example when -using the rsh or ssh starters, the initial directory is $HOME by default. Other -starters may set the initial directory to the current working directory from -the invocation of \fIprun\fP. -. -. -. -.SS Current Working Directory -. -The \fI\-wdir\fP prun option (and its synonym, \fI\-wd\fP) allows -the user to change to an arbitrary directory before the program is -invoked. It can also be used in application context files to specify -working directories on specific nodes and/or for specific -applications. -.PP -If the \fI\-wdir\fP option appears both in a context file and on the -command line, the context file directory will override the command -line value. -.PP -If the \fI-wdir\fP option is specified, prun will attempt to -change to the specified directory on all of the remote nodes. If this -fails, \fIprun\fP will abort. -.PP -If the \fI-wdir\fP option is \fBnot\fP specified, prun will send -the directory name where \fIprun\fP was invoked to each of the -remote nodes. The remote nodes will try to change to that -directory. If they are unable (e.g., if the directory does not exist on -that node), then prun will use the default directory determined by -the starter. -.PP -All directory changing occurs before the user's program is invoked. -. -. -. -.SS Standard I/O -. -PSRVR directs UNIX standard input to /dev/null on all processes -except the rank 0 process. The rank 0 process -inherits standard input from \fIprun\fP. -.B Note: -The node that invoked \fIprun\fP need not be the same as the node where the -rank 0 process resides. PSRVR handles the redirection of -\fIprun\fP's standard input to the rank 0 process. -.PP -PSRVR directs UNIX standard output and error from remote nodes to the node -that invoked \fIprun\fP and prints it on the standard output/error of -\fIprun\fP. -Local processes inherit the standard output/error of \fIprun\fP and transfer -to it directly. -.PP -Thus it is possible to redirect standard I/O for applications by -using the typical shell redirection procedure on \fIprun\fP. - - \fB%\fP prun -np 2 my_app < my_input > my_output - -Note that in this example \fIonly\fP the rank 0 process will -receive the stream from \fImy_input\fP on stdin. The stdin on all the other -nodes will be tied to /dev/null. However, the stdout from all nodes will -be collected into the \fImy_output\fP file. -. -. -. -.SS Signal Propagation -. -When prun receives a SIGTERM and SIGINT, it will attempt to kill -the entire job by sending all processes in the job a SIGTERM, waiting -a small number of seconds, then sending all processes in the job a -SIGKILL. -. -.PP -SIGUSR1 and SIGUSR2 signals received by prun are propagated to -all processes in the job. -. -.PP -A SIGTSTOP signal to prun will cause a SIGSTOP signal to be sent -to all of the programs started by prun and likewise a SIGCONT signal -to prun will cause a SIGCONT sent. -. -.PP -Other signals are not currently propagated -by prun. -. -. -.SS Process Termination / Signal Handling -. -During the run of an application, if any process dies abnormally -(either exiting before invoking \fIPMIx_Finalize\fP, or dying as the result of a -signal), \fIprun\fP will print out an error message and kill the rest of the -application. -.PP -. -. -.SS Process Environment -. -Processes in the application inherit their environment from the -PSRVR daemon upon the node on which they are running. The -environment is typically inherited from the user's shell. On remote -nodes, the exact environment is determined by the boot MCA module -used. The \fIrsh\fR launch module, for example, uses either -\fIrsh\fR/\fIssh\fR to launch the PSRVR daemon on remote nodes, and -typically executes one or more of the user's shell-setup files before -launching the daemon. When running dynamically linked -applications which require the \fILD_LIBRARY_PATH\fR environment -variable to be set, care must be taken to ensure that it is correctly -set when booting PSRVR. -.PP -See the "Remote Execution" section for more details. -. -. -.SS Remote Execution -. -PSRVR requires that the \fIPATH\fR environment variable be set to -find executables on remote nodes (this is typically only necessary in -\fIrsh\fR- or \fIssh\fR-based environments -- batch/scheduled -environments typically copy the current environment to the execution -of remote jobs, so if the current environment has \fIPATH\fR and/or -\fILD_LIBRARY_PATH\fR set properly, the remote nodes will also have it -set properly). If PSRVR was compiled with shared library support, -it may also be necessary to have the \fILD_LIBRARY_PATH\fR environment -variable set on remote nodes as well (especially to find the shared -libraries required to run user applications). -.PP -However, it is not always desirable or possible to edit shell -startup files to set \fIPATH\fR and/or \fILD_LIBRARY_PATH\fR. The -\fI--prefix\fR option is provided for some simple configurations where -this is not possible. -.PP -The \fI--prefix\fR option takes a single argument: the base directory -on the remote node where PSRVR is installed. PSRVR will use -this directory to set the remote \fIPATH\fR and \fILD_LIBRARY_PATH\fR -before executing any user applications. This allows -running jobs without having pre-configured the \fIPATH\fR and -\fILD_LIBRARY_PATH\fR on the remote nodes. -.PP -PSRVR adds the basename of the current -node's "bindir" (the directory where PSRVR's executables are -installed) to the prefix and uses that to set the \fIPATH\fR on the -remote node. Similarly, PSRVR adds the basename of the current -node's "libdir" (the directory where PSRVR's libraries are -installed) to the prefix and uses that to set the -\fILD_LIBRARY_PATH\fR on the remote node. For example: -.TP 15 -Local bindir: -/local/node/directory/bin -.TP -Local libdir: -/local/node/directory/lib64 -.PP -If the following command line is used: - - \fB%\fP prun --prefix /remote/node/directory - -PSRVR will add "/remote/node/directory/bin" to the \fIPATH\fR -and "/remote/node/directory/lib64" to the \fLD_LIBRARY_PATH\fR on the -remote node before attempting to execute anything. -.PP -The \fI--prefix\fR option is not sufficient if the installation paths -on the remote node are different than the local node (e.g., if "/lib" -is used on the local node, but "/lib64" is used on the remote node), -or if the installation paths are something other than a subdirectory -under a common prefix. -.PP -Note that executing \fIprun\fR via an absolute pathname is -equivalent to specifying \fI--prefix\fR without the last subdirectory -in the absolute pathname to \fIprun\fR. For example: - - \fB%\fP /usr/local/bin/prun ... - -is equivalent to - - \fB%\fP prun --prefix /usr/local -. -. -. -.SS Exported Environment Variables -. -All environment variables that are named in the form PMIX_* will automatically -be exported to new processes on the local and remote nodes. Environmental -parameters can also be set/forwarded to the new processes using the MCA -parameter \fImca_base_env_list\fP. While the syntax of the \fI\-x\fP option and MCA param -allows the definition of new variables, note that the parser -for these options are currently not very sophisticated - it does not even -understand quoted values. Users are advised to set variables in the -environment and use the option to export them; not to define them. -. -. -. -.SS Setting MCA Parameters -. -The \fI-pmca\fP switch allows the passing of parameters to various MCA -(Modular Component Architecture) modules. -.\" PSRVR's MCA modules are described in detail in psrvrmca(7). -MCA modules have direct impact on programs because they allow tunable -parameters to be set at run time (such as which BTL communication device driver -to use, what parameters to pass to that BTL, etc.). -.PP -The \fI-pmca\fP switch takes two arguments: \fI\fP and \fI\fP. -The \fI\fP argument generally specifies which MCA module will receive the value. -For example, the \fI\fP "btl" is used to select which BTL to be used for -transporting messages. The \fI\fP argument is the value that is -passed. -For example: -. -.TP 4 -prun -pmca btl tcp,self -np 1 foo -Tells PSRVR to use the "tcp" and "self" BTLs, and to run a single copy of -"foo" on an allocated node. -. -.TP -prun -pmca btl self -np 1 foo -Tells PSRVR to use the "self" BTL, and to run a single copy of "foo" on an -allocated node. -.\" And so on. PSRVR's BTL MCA modules are described in psrvrmca_btl(7). -.PP -The \fI-pmca\fP switch can be used multiple times to specify different -\fI\fP and/or \fI\fP arguments. If the same \fI\fP is -specified more than once, the \fI\fPs are concatenated with a comma -(",") separating them. -.PP -Note that the \fI-pmca\fP switch is simply a shortcut for setting environment variables. -The same effect may be accomplished by setting corresponding environment -variables before running \fIprun\fP. -The form of the environment variables that PSRVR sets is: - - PMIX_MCA_= -.PP -Thus, the \fI-pmca\fP switch overrides any previously set environment -variables. The \fI-pmca\fP settings similarly override MCA parameters set -in the -$OPAL_PREFIX/etc/psrvr-mca-params.conf or $HOME/.psrvr/mca-params.conf -file. -. -.PP -Unknown \fI\fP arguments are still set as -environment variable -- they are not checked (by \fIprun\fP) for correctness. -Illegal or incorrect \fI\fP arguments may or may not be reported -- it -depends on the specific MCA module. -.PP -To find the available component types under the MCA architecture, or to find the -available parameters for a specific component, use the \fIpinfo\fP command. -See the \fIpinfo(1)\fP man page for detailed information on the command. -. -. -. -.SS Setting MCA parameters and environment variables from file. -The \fI-tune\fP command line option and its synonym \fI-pmca mca_base_envar_file_prefix\fP allows a user -to set mca parameters and environment variables with the syntax described below. -This option requires a single file or list of files separated by "," to follow. -.PP -A valid line in the file may contain zero or many "-x", "-pmca", or “--pmca” arguments. -The following patterns are supported: -pmca var val -pmca var "val" -x var=val -x var. -If any argument is duplicated in the file, the last value read will be used. -.PP -MCA parameters and environment specified on the command line have higher precedence than variables specified in the file. -. -. -. -.SS Running as root -. -The PSRVR team strongly advises against executing -.I prun -as the root user. Applications should be run as regular -(non-root) users. -. -.PP -Reflecting this advice, prun will refuse to run as root by default. -To override this default, you can add the -.I --allow-run-as-root -option to the -.I prun -command line. -. -.SS Exit status -. -There is no standard definition for what \fIprun\fP should return as an exit -status. After considerable discussion, we settled on the following method for -assigning the \fIprun\fP exit status (note: in the following description, -the "primary" job is the initial application started by prun - all jobs that -are spawned by that job are designated "secondary" jobs): -. -.IP \[bu] 2 -if all processes in the primary job normally terminate with exit status 0, we return 0 -.IP \[bu] -if one or more processes in the primary job normally terminate with non-zero exit status, -we return the exit status of the process with the lowest rank to have a non-zero status -.IP \[bu] -if all processes in the primary job normally terminate with exit status 0, and one or more -processes in a secondary job normally terminate with non-zero exit status, we (a) return -the exit status of the process with the lowest rank in the lowest jobid to have a non-zero -status, and (b) output a message summarizing the exit status of the primary and all secondary jobs. -.IP \[bu] -if the cmd line option --report-child-jobs-separately is set, we will return -only- the -exit status of the primary job. Any non-zero exit status in secondary jobs will be -reported solely in a summary print statement. -. -.PP -By default, PSRVR records and notes that processes exited with non-zero termination status. -This is generally not considered an "abnormal termination" - i.e., PSRVR will not abort a -job if one or more processes return a non-zero status. Instead, the default behavior simply -reports the number of processes terminating with non-zero status upon completion of the job. -.PP -However, in some cases it can be desirable to have the job abort when any process terminates -with non-zero status. For example, a non-PMIx job might detect a bad result from a calculation -and want to abort, but doesn't want to generate a core file. Or a PMIx job might continue past -a call to PMIx_Finalize, but indicate that all processes should abort due to some post-PMIx result. -.PP -It is not anticipated that this situation will occur frequently. However, in the interest of -serving the broader community, PSRVR now has a means for allowing users to direct that jobs be -aborted upon any process exiting with non-zero status. Setting the MCA parameter -"orte_abort_on_non_zero_status" to 1 will cause PSRVR to abort all processes once any process - exits with non-zero status. -.PP -Terminations caused in this manner will be reported on the console as an "abnormal termination", -with the first process to so exit identified along with its exit status. -.PP -.\" ************************** -.\" Return Value Section -.\" ************************** -. -.SH RETURN VALUE -. -\fIprun\fP returns 0 if all processes started by \fIprun\fP exit after calling -PMIx_Finalize. A non-zero value is returned if an internal error occurred in -prun, or one or more processes exited before calling PMIx_Finalize. If an -internal error occurred in prun, the corresponding error code is returned. -In the event that one or more processes exit before calling PMIx_Finalize, the -return value of the rank of the process that \fIprun\fP first notices died -before calling PMIx_Finalize will be returned. Note that, in general, this will -be the first process that died but is not guaranteed to be so. -. -.PP -If the -.B --timeout -command line option is used and the timeout expires before the job -completes (thereby forcing -.I prun -to kill the job) -.I prun -will return an exit status equivalent to the value of -.B ETIMEDOUT -(which is typically 110 on Linux and OS X systems). - -. -.\" ************************** -.\" See Also Section -.\" ************************** -. diff --git a/orte/tools/ompi-prun/prun b/orte/tools/ompi-prun/prun deleted file mode 100755 index 87fad39fec..0000000000 --- a/orte/tools/ompi-prun/prun +++ /dev/null @@ -1,228 +0,0 @@ -#! /bin/sh - -# prun - temporary wrapper script for .libs/prun -# Generated by libtool (GNU libtool) 2.4.6 -# -# The prun program cannot be directly executed until all the libtool -# libraries that it depends on are installed. -# -# This wrapper script should never be moved out of the build directory. -# If it is, it will not operate correctly. - -# Sed substitution that helps us do robust quoting. It backslashifies -# metacharacters that are still active within double-quoted strings. -sed_quote_subst='s|\([`"$\\]\)|\\\1|g' - -# Be Bourne compatible -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then - emulate sh - NULLCMD=: - # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which - # is contrary to our usage. Disable this feature. - alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else - case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac -fi -BIN_SH=xpg4; export BIN_SH # for Tru64 -DUALCASE=1; export DUALCASE # for MKS sh - -# The HP-UX ksh and POSIX shell print the target directory to stdout -# if CDPATH is set. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH - -relink_command="(cd /home/common/openmpi/foobar/orte/tools/prun; LIBRARY_PATH=/opt/local/lib; export LIBRARY_PATH; { test -z \"\${COMPILER_PATH+set}\" || unset COMPILER_PATH || { COMPILER_PATH=; export COMPILER_PATH; }; }; { test -z \"\${GCC_EXEC_PREFIX+set}\" || unset GCC_EXEC_PREFIX || { GCC_EXEC_PREFIX=; export GCC_EXEC_PREFIX; }; }; { test -z \"\${LD_RUN_PATH+set}\" || unset LD_RUN_PATH || { LD_RUN_PATH=; export LD_RUN_PATH; }; }; LD_LIBRARY_PATH=/home/common/openmpi/build/foobar/lib:/home/common/local/lib:/home/common/pmix/build/prrte/lib; export LD_LIBRARY_PATH; PATH=/home/common/openmpi/build/foobar/bin:/home/common/local/bin:/home/common/pmix/build/prrte/bin:/home/common/local/sbin:/usr/lib64/qt-3.3/bin:/home/rhc/perl5/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/home/rhc/.local/bin:/home/rhc/bin; export PATH; gcc -Wall -Wundef -Wno-long-long -Wsign-compare -Wmissing-prototypes -Wstrict-prototypes -Wcomment -pedantic -Werror-implicit-function-declaration -fno-strict-aliasing -mcx16 -pthread -g -o \$progdir/\$file main.o prun.o ../../../orte/.libs/libopen-rte.so /home/common/openmpi/foobar/opal/.libs/libopen-pal.so ../../../opal/.libs/libopen-pal.so -ldl -ludev -lrt -lm -lutil -lz -pthread -Wl,-rpath -Wl,/home/common/openmpi/foobar/orte/.libs -Wl,-rpath -Wl,/home/common/openmpi/foobar/opal/.libs -Wl,-rpath -Wl,/home/common/openmpi/build/foobar/lib)" - -# This environment variable determines our operation mode. -if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then - # install mode needs the following variables: - generated_by_libtool_version='2.4.6' - notinst_deplibs=' ../../../orte/libopen-rte.la /home/common/openmpi/foobar/opal/libopen-pal.la ../../../opal/libopen-pal.la' -else - # When we are sourced in execute mode, $file and $ECHO are already set. - if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then - file="$0" - -# A function that is used when there is no print builtin or printf. -func_fallback_echo () -{ - eval 'cat <<_LTECHO_EOF -$1 -_LTECHO_EOF' -} - ECHO="printf %s\\n" - fi - -# Very basic option parsing. These options are (a) specific to -# the libtool wrapper, (b) are identical between the wrapper -# /script/ and the wrapper /executable/ that is used only on -# windows platforms, and (c) all begin with the string --lt- -# (application programs are unlikely to have options that match -# this pattern). -# -# There are only two supported options: --lt-debug and -# --lt-dump-script. There is, deliberately, no --lt-help. -# -# The first argument to this parsing function should be the -# script's ../../../libtool value, followed by no. -lt_option_debug= -func_parse_lt_options () -{ - lt_script_arg0=$0 - shift - for lt_opt - do - case "$lt_opt" in - --lt-debug) lt_option_debug=1 ;; - --lt-dump-script) - lt_dump_D=`$ECHO "X$lt_script_arg0" | /usr/bin/sed -e 's/^X//' -e 's%/[^/]*$%%'` - test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=. - lt_dump_F=`$ECHO "X$lt_script_arg0" | /usr/bin/sed -e 's/^X//' -e 's%^.*/%%'` - cat "$lt_dump_D/$lt_dump_F" - exit 0 - ;; - --lt-*) - $ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2 - exit 1 - ;; - esac - done - - # Print the debug banner immediately: - if test -n "$lt_option_debug"; then - echo "prun:prun:$LINENO: libtool wrapper (GNU libtool) 2.4.6" 1>&2 - fi -} - -# Used when --lt-debug. Prints its arguments to stdout -# (redirection is the responsibility of the caller) -func_lt_dump_args () -{ - lt_dump_args_N=1; - for lt_arg - do - $ECHO "prun:prun:$LINENO: newargv[$lt_dump_args_N]: $lt_arg" - lt_dump_args_N=`expr $lt_dump_args_N + 1` - done -} - -# Core function for launching the target application -func_exec_program_core () -{ - - if test -n "$lt_option_debug"; then - $ECHO "prun:prun:$LINENO: newargv[0]: $progdir/$program" 1>&2 - func_lt_dump_args ${1+"$@"} 1>&2 - fi - exec "$progdir/$program" ${1+"$@"} - - $ECHO "$0: cannot exec $program $*" 1>&2 - exit 1 -} - -# A function to encapsulate launching the target application -# Strips options in the --lt-* namespace from $@ and -# launches target application with the remaining arguments. -func_exec_program () -{ - case " $* " in - *\ --lt-*) - for lt_wr_arg - do - case $lt_wr_arg in - --lt-*) ;; - *) set x "$@" "$lt_wr_arg"; shift;; - esac - shift - done ;; - esac - func_exec_program_core ${1+"$@"} -} - - # Parse options - func_parse_lt_options "$0" ${1+"$@"} - - # Find the directory that this script lives in. - thisdir=`$ECHO "$file" | /usr/bin/sed 's%/[^/]*$%%'` - test "x$thisdir" = "x$file" && thisdir=. - - # Follow symbolic links until we get to the real thisdir. - file=`ls -ld "$file" | /usr/bin/sed -n 's/.*-> //p'` - while test -n "$file"; do - destdir=`$ECHO "$file" | /usr/bin/sed 's%/[^/]*$%%'` - - # If there was a directory component, then change thisdir. - if test "x$destdir" != "x$file"; then - case "$destdir" in - [\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;; - *) thisdir="$thisdir/$destdir" ;; - esac - fi - - file=`$ECHO "$file" | /usr/bin/sed 's%^.*/%%'` - file=`ls -ld "$thisdir/$file" | /usr/bin/sed -n 's/.*-> //p'` - done - - # Usually 'no', except on cygwin/mingw when embedded into - # the cwrapper. - WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=no - if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then - # special case for '.' - if test "$thisdir" = "."; then - thisdir=`pwd` - fi - # remove .libs from thisdir - case "$thisdir" in - *[\\/].libs ) thisdir=`$ECHO "$thisdir" | /usr/bin/sed 's%[\\/][^\\/]*$%%'` ;; - .libs ) thisdir=. ;; - esac - fi - - # Try to get the absolute directory name. - absdir=`cd "$thisdir" && pwd` - test -n "$absdir" && thisdir="$absdir" - - program=lt-'prun' - progdir="$thisdir/.libs" - - if test ! -f "$progdir/$program" || - { file=`ls -1dt "$progdir/$program" "$progdir/../$program" 2>/dev/null | /usr/bin/sed 1q`; \ - test "X$file" != "X$progdir/$program"; }; then - - file="$$-$program" - - if test ! -d "$progdir"; then - mkdir "$progdir" - else - rm -f "$progdir/$file" - fi - - # relink executable if necessary - if test -n "$relink_command"; then - if relink_command_output=`eval $relink_command 2>&1`; then : - else - $ECHO "$relink_command_output" >&2 - rm -f "$progdir/$file" - exit 1 - fi - fi - - mv -f "$progdir/$file" "$progdir/$program" 2>/dev/null || - { rm -f "$progdir/$program"; - mv -f "$progdir/$file" "$progdir/$program"; } - rm -f "$progdir/$file" - fi - - if test -f "$progdir/$program"; then - if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then - # Run the actual program with our arguments. - func_exec_program ${1+"$@"} - fi - else - # The program doesn't exist. - $ECHO "$0: error: '$progdir/$program' does not exist" 1>&2 - $ECHO "This script is just a wrapper for $program." 1>&2 - $ECHO "See the libtool documentation for more information." 1>&2 - exit 1 - fi -fi diff --git a/orte/tools/ompi-prun/prun.1 b/orte/tools/ompi-prun/prun.1 deleted file mode 100644 index 74ce2294db..0000000000 --- a/orte/tools/ompi-prun/prun.1 +++ /dev/null @@ -1,1597 +0,0 @@ -.\" -*- nroff -*- -.\" Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. -.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. -.\" Copyright (c) 2017-2018 Intel, Inc. All rights reserved. -.\" Copyright (c) 2017 Los Alamos National Security, LLC. All rights -.\" reserved. -.\" $COPYRIGHT$ -.\" -.\" Man page for PSRVR's prun command -.\" -.\" .TH name section center-footer left-footer center-header -.TH PRUN 1 "Unreleased developer copy" "gitclone" "Open MPI" -.\" ************************** -.\" Name Section -.\" ************************** -.SH NAME -. -prun \- Execute serial and parallel jobs with the PMIx Reference Server. - -. -.\" ************************** -.\" Synopsis Section -.\" ************************** -.SH SYNOPSIS -. -.PP -Single Process Multiple Data (SPMD) Model: - -.B prun -[ options ] -.B -[ ] -.P - -Multiple Instruction Multiple Data (MIMD) Model: - -.B prun -[ global_options ] - [ local_options1 ] -.B -[ ] : - [ local_options2 ] -.B -[ ] : - ... : - [ local_optionsN ] -.B -[ ] -.P - -Note that in both models, invoking \fIprun\fP via an absolute path -name is equivalent to specifying the \fI--prefix\fP option with a -\fI\fR value equivalent to the directory where \fIprun\fR -resides, minus its last subdirectory. For example: - - \fB%\fP /usr/local/bin/prun ... - -is equivalent to - - \fB%\fP prun --prefix /usr/local - -. -.\" ************************** -.\" Quick Summary Section -.\" ************************** -.SH QUICK SUMMARY -. -If you are simply looking for how to run an application, you -probably want to use a command line of the following form: - - \fB%\fP prun [ -np X ] [ --hostfile ] - -This will run X copies of \fI\fR in your current run-time -environment (if running under a supported resource manager, PSRVR's -\fIprun\fR will usually automatically use the corresponding resource manager -process starter, as opposed to, for example, \fIrsh\fR or \fIssh\fR, -which require the use of a hostfile, or will default to running all X -copies on the localhost), scheduling (by default) in a round-robin fashion by -CPU slot. See the rest of this page for more details. -.P -Please note that prun automatically binds processes. Three binding patterns are used in the absence of any further directives: -.TP 18 -.B Bind to core: -when the number of processes is <= 2 -. -. -.TP -.B Bind to socket: -when the number of processes is > 2 -. -. -.TP -.B Bind to none: -when oversubscribed -. -. -.P -If your application uses threads, then you probably want to ensure that you are -either not bound at all (by specifying --bind-to none), or bound to multiple cores -using an appropriate binding level or specific number of processing elements per -application process. -. -.\" ************************** -.\" Options Section -.\" ************************** -.SH OPTIONS -. -.I prun -will send the name of the directory where it was invoked on the local -node to each of the remote nodes, and attempt to change to that -directory. See the "Current Working Directory" section below for further -details. -.\" -.\" Start options listing -.\" Indent 10 characters from start of first column to start of second column -.TP 10 -.B -The program executable. This is identified as the first non-recognized argument -to prun. -. -. -.TP -.B -Pass these run-time arguments to every new process. These must always -be the last arguments to \fIprun\fP. If an app context file is used, -\fI\fP will be ignored. -. -. -.TP -.B -h\fR,\fP --help -Display help for this command -. -. -.TP -.B -q\fR,\fP --quiet -Suppress informative messages from prun during application execution. -. -. -.TP -.B -v\fR,\fP --verbose -Be verbose -. -. -.TP -.B -V\fR,\fP --version -Print version number. If no other arguments are given, this will also -cause prun to exit. -. -. -.TP -.B -N \fR\fP -.br -Launch num processes per node on all allocated nodes (synonym for npernode). -. -. -. -.TP -.B -display-map\fR,\fP --display-map -Display a table showing the mapped location of each process prior to launch. -. -. -. -.TP -.B -display-allocation\fR,\fP --display-allocation -Display the detected resource allocation. -. -. -. -.TP -.B -output-proctable\fR,\fP --output-proctable -Output the debugger proctable after launch. -. -. -. -.TP -.B -max-vm-size\fR,\fP --max-vm-size \fR\fP -Number of processes to run. -. -. -. -.TP -.B -novm\fR,\fP --novm -Execute without creating an allocation-spanning virtual machine (only start -daemons on nodes hosting application procs). -. -. -. -.TP -.B -hnp\fR,\fP --hnp \fR\fP -Specify the URI of the \fRpsrvr\fP process, or the name of the file (specified as -file:filename) that contains that info. -. -. -. -.P -Use one of the following options to specify which hosts (nodes) within the \fRpsrvr\fP to run on. -. -. -.TP -.B -H\fR,\fP -host\fR,\fP --host \fR\fP -List of hosts on which to invoke processes. -. -. -.TP -.B -hostfile\fR,\fP --hostfile \fR\fP -Provide a hostfile to use. -.\" JJH - Should have man page for how to format a hostfile properly. -. -. -.TP -.B -default-hostfile\fR,\fP --default-hostfile \fR\fP -Provide a default hostfile. -. -. -.TP -.B -machinefile\fR,\fP --machinefile \fR\fP -Synonym for \fI-hostfile\fP. -. -. -. -. -.TP -.B -cpu-set\fR,\fP --cpu-set \fR\fP -Restrict launched processes to the specified logical cpus on each node (comma-separated -list). Note that the binding options will still apply within the specified envelope - e.g., -you can elect to bind each process to only one cpu within the specified cpu set. -. -. -. -.P -The following options specify the number of processes to launch. Note that none -of the options imply a particular binding policy - e.g., requesting N processes -for each socket does not imply that the processes will be bound to the socket. -. -. -.TP -.B -c\fR,\fP -n\fR,\fP --n\fR,\fP -np \fR<#>\fP -Run this many copies of the program on the given nodes. This option -indicates that the specified file is an executable program and not an -application context. If no value is provided for the number of copies to -execute (i.e., neither the "-np" nor its synonyms are provided on the command -line), prun will automatically execute a copy of the program on -each process slot (see below for description of a "process slot"). This -feature, however, can only be used in the SPMD model and will return an -error (without beginning execution of the application) otherwise. -. -. -.TP -.B —map-by ppr:N: -Launch N times the number of objects of the specified type on each node. -. -. -.TP -.B -npersocket\fR,\fP --npersocket \fR<#persocket>\fP -On each node, launch this many processes times the number of processor -sockets on the node. -The \fI-npersocket\fP option also turns on the \fI-bind-to-socket\fP option. -(deprecated in favor of --map-by ppr:n:socket) -. -. -.TP -.B -npernode\fR,\fP --npernode \fR<#pernode>\fP -On each node, launch this many processes. -(deprecated in favor of --map-by ppr:n:node) -. -. -.TP -.B -pernode\fR,\fP --pernode -On each node, launch one process -- equivalent to \fI-npernode\fP 1. -(deprecated in favor of --map-by ppr:1:node) -. -. -. -. -.P -To map processes: -. -. -.TP -.B --map-by \fR\fP -Map to the specified object, defaults to \fIsocket\fP. Supported options -include slot, hwthread, core, L1cache, L2cache, L3cache, socket, numa, -board, node, sequential, distance, and ppr. Any object can include -modifiers by adding a \fR:\fP and any combination of PE=n (bind n -processing elements to each proc), SPAN (load -balance the processes across the allocation), OVERSUBSCRIBE (allow -more processes on a node than processing elements), and NOOVERSUBSCRIBE. -This includes PPR, where the pattern would be terminated by another colon -to separate it from the modifiers. -. -.TP -.B -bycore\fR,\fP --bycore -Map processes by core (deprecated in favor of --map-by core) -. -.TP -.B -byslot\fR,\fP --byslot -Map and rank processes round-robin by slot. -. -.TP -.B -nolocal\fR,\fP --nolocal -Do not run any copies of the launched application on the same node as -prun is running. This option will override listing the localhost -with \fB--host\fR or any other host-specifying mechanism. -. -.TP -.B -nooversubscribe\fR,\fP --nooversubscribe -Do not oversubscribe any nodes; error (without starting any processes) -if the requested number of processes would cause oversubscription. -This option implicitly sets "max_slots" equal to the "slots" value for -each node. (Enabled by default). -. -.TP -.B -oversubscribe\fR,\fP --oversubscribe -Nodes are allowed to be oversubscribed, even on a managed system, and -overloading of processing elements. -. -.TP -.B -bynode\fR,\fP --bynode -Launch processes one per node, cycling by node in a round-robin -fashion. This spreads processes evenly among nodes and assigns -ranks in a round-robin, "by node" manner. -. -.TP -.B -cpu-list\fR,\fP --cpu-list \fR\fP -List of processor IDs to bind processes to [default=NULL]. -. -. -. -. -.P -To order processes' ranks: -. -. -.TP -.B --rank-by \fR\fP -Rank in round-robin fashion according to the specified object, -defaults to \fIslot\fP. Supported options -include slot, hwthread, core, L1cache, L2cache, L3cache, -socket, numa, board, and node. -. -. -. -. -.P -For process binding: -. -.TP -.B --bind-to \fR\fP -Bind processes to the specified object, defaults to \fIcore\fP. Supported options -include slot, hwthread, core, l1cache, l2cache, l3cache, socket, numa, board, and none. -. -.TP -.B -cpus-per-proc\fR,\fP --cpus-per-proc \fR<#perproc>\fP -Bind each process to the specified number of cpus. -(deprecated in favor of --map-by :PE=n) -. -.TP -.B -cpus-per-rank\fR,\fP --cpus-per-rank \fR<#perrank>\fP -Alias for \fI-cpus-per-proc\fP. -(deprecated in favor of --map-by :PE=n) -. -.TP -.B -bind-to-core\fR,\fP --bind-to-core -Bind processes to cores (deprecated in favor of --bind-to core) -. -.TP -.B -bind-to-socket\fR,\fP --bind-to-socket -Bind processes to processor sockets (deprecated in favor of --bind-to socket) -. -.TP -.B -report-bindings\fR,\fP --report-bindings -Report any bindings for launched processes. -. -. -. -. -.P -For rankfiles: -. -. -.TP -.B -rf\fR,\fP --rankfile \fR\fP -Provide a rankfile file. -. -. -. -. -.P -To manage standard I/O: -. -. -.TP -.B -output-filename\fR,\fP --output-filename \fR\fP -Redirect the stdout, stderr, and stddiag of all processes to a process-unique version of -the specified filename. Any directories in the filename will automatically be created. -Each output file will consist of filename.id, where the id will be the -processes' rank, left-filled with -zero's for correct ordering in listings. -. -. -.TP -.B -stdin\fR,\fP --stdin\fR \fP -The rank of the process that is to receive stdin. The -default is to forward stdin to rank 0, but this option -can be used to forward stdin to any process. It is also acceptable to -specify \fInone\fP, indicating that no processes are to receive stdin. -. -. -.TP -.B -merge-stderr-to-stdout\fR,\fP --merge-stderr-to-stdout -Merge stderr to stdout for each process. -. -. -.TP -.B -tag-output\fR,\fP --tag-output -Tag each line of output to stdout, stderr, and stddiag with \fB[jobid, MCW_rank]\fP -indicating the process jobid and rank of the process that generated the output, -and the channel which generated it. -. -. -.TP -.B -timestamp-output\fR,\fP --timestamp-output -Timestamp each line of output to stdout, stderr, and stddiag. -. -. -.TP -.B -xml\fR,\fP --xml -Provide all output to stdout, stderr, and stddiag in an xml format. -. -. -.TP -.B -xml-file\fR,\fP --xml-file \fR\fP -Provide all output in XML format to the specified file. -. -. -.TP -.B -xterm\fR,\fP --xterm \fR\fP -Display the output from the processes identified by their ranks in separate xterm windows. The ranks are specified -as a comma-separated list of ranges, with a -1 indicating all. A separate -window will be created for each specified process. -.B Note: -xterm will normally terminate the window upon termination of the process running -within it. However, by adding a "!" to the end of the list of specified ranks, -the proper options will be provided to ensure that xterm keeps the window open -\fIafter\fP the process terminates, thus allowing you to see the process' output. -Each xterm window will subsequently need to be manually closed. -.B Note: -In some environments, xterm may require that the executable be in the user's -path, or be specified in absolute or relative terms. Thus, it may be necessary -to specify a local executable as "./foo" instead of just "foo". If xterm fails to -find the executable, prun will hang, but still respond correctly to a ctrl-c. -If this happens, please check that the executable is being specified correctly -and try again. -. -. -. -. -.P -To manage files and runtime environment: -. -. -.TP -.B -path\fR,\fP --path \fR\fP - that will be used when attempting to locate the requested -executables. This is used prior to using the local PATH setting. -. -. -.TP -.B --prefix \fR\fP -Prefix directory that will be used to set the \fIPATH\fR and -\fILD_LIBRARY_PATH\fR on the remote node before invoking -the target process. See the "Remote Execution" section, below. -. -. -.TP -.B --noprefix -Disable the automatic --prefix behavior -. -. -.TP -.B -s\fR,\fP --preload-binary -Copy the specified executable(s) to remote machines prior to starting remote processes. The -executables will be copied to the session directory and will be deleted upon -completion of the job. -. -. -.TP -.B --preload-files \fR\fP -Preload the comma separated list of files to the current working directory of the remote -machines where processes will be launched prior to starting those processes. -. -. -.TP -.B -set-cwd-to-session-dir\fR,\fP --set-cwd-to-session-dir -Set the working directory of the started processes to their session directory. -. -. -.TP -.B -wd \fR\fP -Synonym for \fI-wdir\fP. -. -. -.TP -.B -wdir \fR\fP -Change to the directory before the user's program executes. -See the "Current Working Directory" section for notes on relative paths. -.B Note: -If the \fI-wdir\fP option appears both on the command line and in an -application context, the context will take precedence over the command -line. Thus, if the path to the desired wdir is different -on the backend nodes, then it must be specified as an absolute path that -is correct for the backend node. -. -. -.TP -.B -x \fR\fP -Export the specified environment variables to the remote nodes before -executing the program. Only one environment variable can be specified -per \fI-x\fP option. Existing environment variables can be specified -or new variable names specified with corresponding values. For -example: - \fB%\fP prun -x DISPLAY -x OFILE=/tmp/out ... - -The parser for the \fI-x\fP option is not very sophisticated; it does -not even understand quoted values. Users are advised to set variables -in the environment, and then use \fI-x\fP to export (not define) them. -. -. -. -. -.P -Setting MCA parameters: -. -. -.TP -.B -gpmca\fR,\fP --gpmca \fR \fP -Pass global MCA parameters that are applicable to all contexts. \fI\fP is -the parameter name; \fI\fP is the parameter value. -. -. -.TP -.B -pmca\fR,\fP --pmca \fR \fP -Send arguments to various MCA modules. See the "MCA" section, below. -. -. -.TP -.B -am \fR\fP -Aggregate MCA parameter set file list. -. -. -.TP -.B -tune\fR,\fP --tune \fR\fP -Specify a tune file to set arguments for various MCA modules and environment variables. -See the "Setting MCA parameters and environment variables from file" section, below. -. -. -. -. -.P -For debugging: -. -. -.TP -.B -debug\fR,\fP --debug -Invoke the user-level debugger indicated by the \fIorte_base_user_debugger\fP -MCA parameter. -. -. -.TP -.B --get-stack-traces -When paired with the -.B --timeout -option, -.I prun -will obtain and print out stack traces from all launched processes -that are still alive when the timeout expires. Note that obtaining -stack traces can take a little time and produce a lot of output, -especially for large process-count jobs. -. -. -.TP -.B -debugger\fR,\fP --debugger \fR\fP -Sequence of debuggers to search for when \fI--debug\fP is used (i.e. -a synonym for \fIorte_base_user_debugger\fP MCA parameter). -. -. -.TP -.B --timeout \fR -The maximum number of seconds that -.I prun -will run. After this many seconds, -.I prun -will abort the launched job and exit with a non-zero exit status. -Using -.B --timeout -can be also useful when combined with the -.B --get-stack-traces -option. -. -. -.TP -.B -tv\fR,\fP --tv -Launch processes under the TotalView debugger. -Deprecated backwards compatibility flag. Synonym for \fI--debug\fP. -. -. -. -. -.P -There are also other options: -. -. -.TP -.B --allow-run-as-root -Allow -.I prun -to run when executed by the root user -.RI ( prun -defaults to aborting when launched as the root user). -. -. -.TP -.B --app \fR\fP -Provide an appfile, ignoring all other command line options. -. -. -.TP -.B -cf\fR,\fP --cartofile \fR\fP -Provide a cartography file. -. -. -.TP -.B -continuous\fR,\fP --continuous -Job is to run until explicitly terminated. -. -. -.TP -.B -disable-recovery\fR,\fP --disable-recovery -Disable recovery (resets all recovery options to off). -. -. -.TP -.B -do-not-launch\fR,\fP --do-not-launch -Perform all necessary operations to prepare to launch the application, but do not actually launch it. -. -. -.TP -.B -do-not-resolve\fR,\fP --do-not-resolve -Do not attempt to resolve interfaces. -. -. -.TP -.B -enable-recovery\fR,\fP --enable-recovery -Enable recovery from process failure [Default = disabled]. -. -. -.TP -.B -index-argv-by-rank\fR,\fP --index-argv-by-rank -Uniquely index argv[0] for each process using its rank. -. -. -.TP -.B -max-restarts\fR,\fP --max-restarts \fR\fP -Max number of times to restart a failed process. -. -. -.TP -.B --ppr \fR\fP -Comma-separated list of number of processes on a given resource type [default: none]. -. -. -.TP -.B -report-child-jobs-separately\fR,\fP --report-child-jobs-separately -Return the exit status of the primary job only. -. -. -.TP -.B -report-events\fR,\fP --report-events \fR\fP -Report events to a tool listening at the specified URI. -. -. -.TP -.B -report-pid\fR,\fP --report-pid \fR\fP -Print out prun's PID during startup. The channel must be either a '-' to indicate -that the pid is to be output to stdout, a '+' to indicate that the pid is to be -output to stderr, or a filename to which the pid is to be written. -. -. -.TP -.B -report-uri\fR,\fP --report-uri \fR\fP -Print out prun's URI during startup. The channel must be either a '-' to indicate -that the URI is to be output to stdout, a '+' to indicate that the URI is to be -output to stderr, or a filename to which the URI is to be written. -. -. -.TP -.B -show-progress\fR,\fP --show-progress -Output a brief periodic report on launch progress. -. -. -.TP -.B -terminate\fR,\fP --terminate -Terminate the DVM. -. -. -.TP -.B -use-hwthread-cpus\fR,\fP --use-hwthread-cpus -Use hardware threads as independent cpus. -. -. -.TP -.B -use-regexp\fR,\fP --use-regexp -Use regular expressions for launch. -. -. -. -. -.P -The following options are useful for developers; they are not generally -useful to most users: -. -.TP -.B -d\fR,\fP --debug-devel -Enable debugging. This is not generally useful for most users. -. -. -.TP -.B -display-devel-allocation\fR,\fP --display-devel-allocation -Display a detailed list of the allocation being used by this job. -. -. -.TP -.B -display-devel-map\fR,\fP --display-devel-map -Display a more detailed table showing the mapped location of each process prior to launch. -. -. -.TP -.B -display-diffable-map\fR,\fP --display-diffable-map -Display a diffable process map just before launch. -. -. -.TP -.B -display-topo\fR,\fP --display-topo -Display the topology as part of the process map just before launch. -. -. -.TP -.B --report-state-on-timeout -When paired with the -.B --timeout -command line option, report the run-time subsystem state of each -process when the timeout expires. -. -. -.P -There may be other options listed with \fIprun --help\fP. -. -. -.\" ************************** -.\" Description Section -.\" ************************** -.SH DESCRIPTION -. -One invocation of \fIprun\fP starts an application running under PSRVR. If the application is single process multiple data (SPMD), the application -can be specified on the \fIprun\fP command line. - -If the application is multiple instruction multiple data (MIMD), comprising of -multiple programs, the set of programs and argument can be specified in one of -two ways: Extended Command Line Arguments, and Application Context. -.PP -An application context describes the MIMD program set including all arguments -in a separate file. -.\" See appcontext(5) for a description of the application context syntax. -This file essentially contains multiple \fIprun\fP command lines, less the -command name itself. The ability to specify different options for different -instantiations of a program is another reason to use an application context. -.PP -Extended command line arguments allow for the description of the application -layout on the command line using colons (\fI:\fP) to separate the specification -of programs and arguments. Some options are globally set across all specified -programs (e.g. --hostfile), while others are specific to a single program -(e.g. -np). -. -. -. -.SS Specifying Host Nodes -. -Host nodes can be identified on the \fIprun\fP command line with the \fI-host\fP -option or in a hostfile. -. -.PP -For example, -. -.TP 4 -prun -H aa,aa,bb ./a.out -launches two processes on node aa and one on bb. -. -.PP -Or, consider the hostfile -. - - \fB%\fP cat myhostfile - aa slots=2 - bb slots=2 - cc slots=2 - -. -.PP -Here, we list both the host names (aa, bb, and cc) but also how many "slots" -there are for each. Slots indicate how many processes can potentially execute -on a node. For best performance, the number of slots may be chosen to be the -number of cores on the node or the number of processor sockets. If the hostfile -does not provide slots information, PSRVR will attempt to discover the number -of cores (or hwthreads, if the use-hwthreads-as-cpus option is set) and set the -number of slots to that value. This default behavior also occurs when specifying -the \fI-host\fP option with a single hostname. Thus, the command -. -.TP 4 -prun -H aa ./a.out -launches a number of processes equal to the number of cores on node aa. -. -.PP -. -.TP 4 -prun -hostfile myhostfile ./a.out -will launch two processes on each of the three nodes. -. -.TP 4 -prun -hostfile myhostfile -host aa ./a.out -will launch two processes, both on node aa. -. -.TP 4 -prun -hostfile myhostfile -host dd ./a.out -will find no hosts to run on and abort with an error. -That is, the specified host dd is not in the specified hostfile. -. -.PP -When running under resource managers (e.g., SLURM, Torque, etc.), -PSRVR will obtain both the hostnames and the number of slots directly -from the resource manger. -. -.SS Specifying Number of Processes -. -As we have just seen, the number of processes to run can be set using the -hostfile. Other mechanisms exist. -. -.PP -The number of processes launched can be specified as a multiple of the -number of nodes or processor sockets available. For example, -. -.TP 4 -prun -H aa,bb -npersocket 2 ./a.out -launches processes 0-3 on node aa and process 4-7 on node bb, -where aa and bb are both dual-socket nodes. -The \fI-npersocket\fP option also turns on the \fI-bind-to-socket\fP option, -which is discussed in a later section. -. -.TP 4 -prun -H aa,bb -npernode 2 ./a.out -launches processes 0-1 on node aa and processes 2-3 on node bb. -. -.TP 4 -prun -H aa,bb -npernode 1 ./a.out -launches one process per host node. -. -.TP 4 -prun -H aa,bb -pernode ./a.out -is the same as \fI-npernode\fP 1. -. -. -.PP -Another alternative is to specify the number of processes with the -\fI-np\fP option. Consider now the hostfile -. - - \fB%\fP cat myhostfile - aa slots=4 - bb slots=4 - cc slots=4 - -. -.PP -Now, -. -.TP 4 -prun -hostfile myhostfile -np 6 ./a.out -will launch processes 0-3 on node aa and processes 4-5 on node bb. The remaining -slots in the hostfile will not be used since the \fI-np\fP option indicated -that only 6 processes should be launched. -. -.SS Mapping Processes to Nodes: Using Policies -. -The examples above illustrate the default mapping of process processes -to nodes. This mapping can also be controlled with various -\fIprun\fP options that describe mapping policies. -. -. -.PP -Consider the same hostfile as above, again with \fI-np\fP 6: -. - - node aa node bb node cc - - prun 0 1 2 3 4 5 - - prun --map-by node 0 3 1 4 2 5 - - prun -nolocal 0 1 2 3 4 5 -. -.PP -The \fI--map-by node\fP option will load balance the processes across -the available nodes, numbering each process in a round-robin fashion. -. -.PP -The \fI-nolocal\fP option prevents any processes from being mapped onto the -local host (in this case node aa). While \fIprun\fP typically consumes -few system resources, \fI-nolocal\fP can be helpful for launching very -large jobs where \fIprun\fP may actually need to use noticeable amounts -of memory and/or processing time. -. -.PP -Just as \fI-np\fP can specify fewer processes than there are slots, it can -also oversubscribe the slots. For example, with the same hostfile: -. -.TP 4 -prun -hostfile myhostfile -np 14 ./a.out -will launch processes 0-3 on node aa, 4-7 on bb, and 8-11 on cc. It will -then add the remaining two processes to whichever nodes it chooses. -. -.PP -One can also specify limits to oversubscription. For example, with the same -hostfile: -. -.TP 4 -prun -hostfile myhostfile -np 14 -nooversubscribe ./a.out -will produce an error since \fI-nooversubscribe\fP prevents oversubscription. -. -.PP -Limits to oversubscription can also be specified in the hostfile itself: -. - % cat myhostfile - aa slots=4 max_slots=4 - bb max_slots=4 - cc slots=4 -. -.PP -The \fImax_slots\fP field specifies such a limit. When it does, the -\fIslots\fP value defaults to the limit. Now: -. -.TP 4 -prun -hostfile myhostfile -np 14 ./a.out -causes the first 12 processes to be launched as before, but the remaining -two processes will be forced onto node cc. The other two nodes are -protected by the hostfile against oversubscription by this job. -. -.PP -Using the \fI--nooversubscribe\fR option can be helpful since PSRVR -currently does not get "max_slots" values from the resource manager. -. -.PP -Of course, \fI-np\fP can also be used with the \fI-H\fP or \fI-host\fP -option. For example, -. -.TP 4 -prun -H aa,bb -np 8 ./a.out -launches 8 processes. Since only two hosts are specified, after the first -two processes are mapped, one to aa and one to bb, the remaining processes -oversubscribe the specified hosts. -. -.PP -And here is a MIMD example: -. -.TP 4 -prun -H aa -np 1 hostname : -H bb,cc -np 2 uptime -will launch process 0 running \fIhostname\fP on node aa and processes 1 and 2 -each running \fIuptime\fP on nodes bb and cc, respectively. -. -.SS Mapping, Ranking, and Binding: Oh My! -. -PSRVR employs a three-phase procedure for assigning process locations and -ranks: -. -.TP 10 -\fBmapping\fP -Assigns a default location to each process -. -.TP 10 -\fBranking\fP -Assigns a rank value to each process -. -.TP 10 -\fBbinding\fP -Constrains each process to run on specific processors -. -.PP -The \fImapping\fP step is used to assign a default location to each process -based on the mapper being employed. Mapping by slot, node, and sequentially results -in the assignment of the processes to the node level. In contrast, mapping by object, allows -the mapper to assign the process to an actual object on each node. -. -.PP -\fBNote:\fP the location assigned to the process is independent of where it will be bound - the -assignment is used solely as input to the binding algorithm. -. -.PP -The mapping of process processes to nodes can be defined not just -with general policies but also, if necessary, using arbitrary mappings -that cannot be described by a simple policy. One can use the "sequential -mapper," which reads the hostfile line by line, assigning processes -to nodes in whatever order the hostfile specifies. Use the -\fI-pmca rmaps seq\fP option. For example, using the same hostfile -as before: -. -.PP -prun -hostfile myhostfile -pmca rmaps seq ./a.out -. -.PP -will launch three processes, one on each of nodes aa, bb, and cc, respectively. -The slot counts don't matter; one process is launched per line on -whatever node is listed on the line. -. -.PP -Another way to specify arbitrary mappings is with a rankfile, which -gives you detailed control over process binding as well. Rankfiles -are discussed below. -. -.PP -The second phase focuses on the \fIranking\fP of the process within -the job. PSRVR -separates this from the mapping procedure to allow more flexibility in the -relative placement of processes. This is best illustrated by considering the -following two cases where we used the —map-by ppr:2:socket option: -. -.PP - node aa node bb - - rank-by core 0 1 ! 2 3 4 5 ! 6 7 - - rank-by socket 0 2 ! 1 3 4 6 ! 5 7 - - rank-by socket:span 0 4 ! 1 5 2 6 ! 3 7 -. -.PP -Ranking by core and by slot provide the identical result - a simple -progression of ranks across each node. Ranking by -socket does a round-robin ranking within each node until all processes -have been assigned a rank, and then progresses to the next -node. Adding the \fIspan\fP modifier to the ranking directive causes -the ranking algorithm to treat the entire allocation as a single -entity - thus, the MCW ranks are assigned across all sockets before -circling back around to the beginning. -. -.PP -The \fIbinding\fP phase actually binds each process to a given set of processors. This can -improve performance if the operating system is placing processes -suboptimally. For example, it might oversubscribe some multi-core -processor sockets, leaving other sockets idle; this can lead -processes to contend unnecessarily for common resources. Or, it -might spread processes out too widely; this can be suboptimal if -application performance is sensitive to interprocess communication -costs. Binding can also keep the operating system from migrating -processes excessively, regardless of how optimally those processes -were placed to begin with. -. -.PP -The processors to be used for binding can be identified in terms of -topological groupings - e.g., binding to an l3cache will bind each -process to all processors within the scope of a single L3 cache within -their assigned location. Thus, if a process is assigned by the mapper -to a certain socket, then a \fI—bind-to l3cache\fP directive will -cause the process to be bound to the processors that share a single L3 -cache within that socket. -. -.PP -To help balance loads, the binding directive uses a round-robin method when binding to -levels lower than used in the mapper. For example, consider the case where a job is -mapped to the socket level, and then bound to core. Each socket will have multiple cores, -so if multiple processes are mapped to a given socket, the binding algorithm will assign -each process located to a socket to a unique core in a round-robin manner. -. -.PP -Alternatively, processes mapped by l2cache and then bound to socket will simply be bound -to all the processors in the socket where they are located. In this manner, users can -exert detailed control over relative MCW rank location and binding. -. -.PP -Finally, \fI--report-bindings\fP can be used to report bindings. -. -.PP -As an example, consider a node with two processor sockets, each comprising -four cores. We run \fIprun\fP with \fI-np 4 --report-bindings\fP and -the following additional options: -. - - % prun ... --map-by core --bind-to core - [...] ... binding child [...,0] to cpus 0001 - [...] ... binding child [...,1] to cpus 0002 - [...] ... binding child [...,2] to cpus 0004 - [...] ... binding child [...,3] to cpus 0008 - - % prun ... --map-by socket --bind-to socket - [...] ... binding child [...,0] to socket 0 cpus 000f - [...] ... binding child [...,1] to socket 1 cpus 00f0 - [...] ... binding child [...,2] to socket 0 cpus 000f - [...] ... binding child [...,3] to socket 1 cpus 00f0 - - % prun ... --map-by core:PE=2 --bind-to core - [...] ... binding child [...,0] to cpus 0003 - [...] ... binding child [...,1] to cpus 000c - [...] ... binding child [...,2] to cpus 0030 - [...] ... binding child [...,3] to cpus 00c0 - - % prun ... --bind-to none -. -.PP -Here, \fI--report-bindings\fP shows the binding of each process as a mask. -In the first case, the processes bind to successive cores as indicated by -the masks 0001, 0002, 0004, and 0008. In the second case, processes bind -to all cores on successive sockets as indicated by the masks 000f and 00f0. -The processes cycle through the processor sockets in a round-robin fashion -as many times as are needed. In the third case, the masks show us that -2 cores have been bound per process. In the fourth case, binding is -turned off and no bindings are reported. -. -.PP -PSRVR's support for process binding depends on the underlying -operating system. Therefore, certain process binding options may not be available -on every system. -. -.PP -Process binding can also be set with MCA parameters. -Their usage is less convenient than that of \fIprun\fP options. -On the other hand, MCA parameters can be set not only on the \fIprun\fP -command line, but alternatively in a system or user mca-params.conf file -or as environment variables, as described in the MCA section below. -Some examples include: -. -.PP - prun option MCA parameter key value - - --map-by core rmaps_base_mapping_policy core - --map-by socket rmaps_base_mapping_policy socket - --rank-by core rmaps_base_ranking_policy core - --bind-to core hwloc_base_binding_policy core - --bind-to socket hwloc_base_binding_policy socket - --bind-to none hwloc_base_binding_policy none -. -. -.SS Rankfiles -. -Rankfiles are text files that specify detailed information about how -individual processes should be mapped to nodes, and to which -processor(s) they should be bound. Each line of a rankfile specifies -the location of one process. The general form of each line in the -rankfile is: -. - - rank = slot= -. -.PP -For example: -. - - $ cat myrankfile - rank 0=aa slot=1:0-2 - rank 1=bb slot=0:0,1 - rank 2=cc slot=1-2 - $ prun -H aa,bb,cc,dd -rf myrankfile ./a.out -. -.PP -Means that -. - - Rank 0 runs on node aa, bound to logical socket 1, cores 0-2. - Rank 1 runs on node bb, bound to logical socket 0, cores 0 and 1. - Rank 2 runs on node cc, bound to logical cores 1 and 2. -. -.PP -Rankfiles can alternatively be used to specify \fIphysical\fP processor -locations. In this case, the syntax is somewhat different. Sockets are -no longer recognized, and the slot number given must be the number of -the physical PU as most OS's do not assign a unique physical identifier -to each core in the node. Thus, a proper physical rankfile looks something -like the following: -. - - $ cat myphysicalrankfile - rank 0=aa slot=1 - rank 1=bb slot=8 - rank 2=cc slot=6 -. -.PP -This means that -. - - Rank 0 will run on node aa, bound to the core that contains physical PU 1 - Rank 1 will run on node bb, bound to the core that contains physical PU 8 - Rank 2 will run on node cc, bound to the core that contains physical PU 6 -. -.PP -Rankfiles are treated as \fIlogical\fP by default, and the MCA parameter -rmaps_rank_file_physical must be set to 1 to indicate that the rankfile -is to be considered as \fIphysical\fP. -. -.PP -The hostnames listed above are "absolute," meaning that actual -resolveable hostnames are specified. However, hostnames can also be -specified as "relative," meaning that they are specified in relation -to an externally-specified list of hostnames (e.g., by prun's --host -argument, a hostfile, or a job scheduler). -. -.PP -The "relative" specification is of the form "+n", where X is an -integer specifying the Xth hostname in the set of all available -hostnames, indexed from 0. For example: -. - - $ cat myrankfile - rank 0=+n0 slot=1:0-2 - rank 1=+n1 slot=0:0,1 - rank 2=+n2 slot=1-2 - $ prun -H aa,bb,cc,dd -rf myrankfile ./a.out -. -.PP -All socket/core slot locations are be -specified as -.I logical -indexes. You can use tools such as HWLOC's "lstopo" to find the -logical indexes of socket and cores. -. -. -.SS Application Context or Executable Program? -. -To distinguish the two different forms, \fIprun\fP -looks on the command line for \fI--app\fP option. If -it is specified, then the file named on the command line is -assumed to be an application context. If it is not -specified, then the file is assumed to be an executable program. -. -. -. -.SS Locating Files -. -If no relative or absolute path is specified for a file, prun will first look for files by searching the directories specified -by the \fI--path\fP option. If there is no \fI--path\fP option set or -if the file is not found at the \fI--path\fP location, then prun -will search the user's PATH environment variable as defined on the -source node(s). -.PP -If a relative directory is specified, it must be relative to the initial -working directory determined by the specific starter used. For example when -using the rsh or ssh starters, the initial directory is $HOME by default. Other -starters may set the initial directory to the current working directory from -the invocation of \fIprun\fP. -. -. -. -.SS Current Working Directory -. -The \fI\-wdir\fP prun option (and its synonym, \fI\-wd\fP) allows -the user to change to an arbitrary directory before the program is -invoked. It can also be used in application context files to specify -working directories on specific nodes and/or for specific -applications. -.PP -If the \fI\-wdir\fP option appears both in a context file and on the -command line, the context file directory will override the command -line value. -.PP -If the \fI-wdir\fP option is specified, prun will attempt to -change to the specified directory on all of the remote nodes. If this -fails, \fIprun\fP will abort. -.PP -If the \fI-wdir\fP option is \fBnot\fP specified, prun will send -the directory name where \fIprun\fP was invoked to each of the -remote nodes. The remote nodes will try to change to that -directory. If they are unable (e.g., if the directory does not exist on -that node), then prun will use the default directory determined by -the starter. -.PP -All directory changing occurs before the user's program is invoked. -. -. -. -.SS Standard I/O -. -PSRVR directs UNIX standard input to /dev/null on all processes -except the rank 0 process. The rank 0 process -inherits standard input from \fIprun\fP. -.B Note: -The node that invoked \fIprun\fP need not be the same as the node where the -rank 0 process resides. PSRVR handles the redirection of -\fIprun\fP's standard input to the rank 0 process. -.PP -PSRVR directs UNIX standard output and error from remote nodes to the node -that invoked \fIprun\fP and prints it on the standard output/error of -\fIprun\fP. -Local processes inherit the standard output/error of \fIprun\fP and transfer -to it directly. -.PP -Thus it is possible to redirect standard I/O for applications by -using the typical shell redirection procedure on \fIprun\fP. - - \fB%\fP prun -np 2 my_app < my_input > my_output - -Note that in this example \fIonly\fP the rank 0 process will -receive the stream from \fImy_input\fP on stdin. The stdin on all the other -nodes will be tied to /dev/null. However, the stdout from all nodes will -be collected into the \fImy_output\fP file. -. -. -. -.SS Signal Propagation -. -When prun receives a SIGTERM and SIGINT, it will attempt to kill -the entire job by sending all processes in the job a SIGTERM, waiting -a small number of seconds, then sending all processes in the job a -SIGKILL. -. -.PP -SIGUSR1 and SIGUSR2 signals received by prun are propagated to -all processes in the job. -. -.PP -A SIGTSTOP signal to prun will cause a SIGSTOP signal to be sent -to all of the programs started by prun and likewise a SIGCONT signal -to prun will cause a SIGCONT sent. -. -.PP -Other signals are not currently propagated -by prun. -. -. -.SS Process Termination / Signal Handling -. -During the run of an application, if any process dies abnormally -(either exiting before invoking \fIPMIx_Finalize\fP, or dying as the result of a -signal), \fIprun\fP will print out an error message and kill the rest of the -application. -.PP -. -. -.SS Process Environment -. -Processes in the application inherit their environment from the -PSRVR daemon upon the node on which they are running. The -environment is typically inherited from the user's shell. On remote -nodes, the exact environment is determined by the boot MCA module -used. The \fIrsh\fR launch module, for example, uses either -\fIrsh\fR/\fIssh\fR to launch the PSRVR daemon on remote nodes, and -typically executes one or more of the user's shell-setup files before -launching the daemon. When running dynamically linked -applications which require the \fILD_LIBRARY_PATH\fR environment -variable to be set, care must be taken to ensure that it is correctly -set when booting PSRVR. -.PP -See the "Remote Execution" section for more details. -. -. -.SS Remote Execution -. -PSRVR requires that the \fIPATH\fR environment variable be set to -find executables on remote nodes (this is typically only necessary in -\fIrsh\fR- or \fIssh\fR-based environments -- batch/scheduled -environments typically copy the current environment to the execution -of remote jobs, so if the current environment has \fIPATH\fR and/or -\fILD_LIBRARY_PATH\fR set properly, the remote nodes will also have it -set properly). If PSRVR was compiled with shared library support, -it may also be necessary to have the \fILD_LIBRARY_PATH\fR environment -variable set on remote nodes as well (especially to find the shared -libraries required to run user applications). -.PP -However, it is not always desirable or possible to edit shell -startup files to set \fIPATH\fR and/or \fILD_LIBRARY_PATH\fR. The -\fI--prefix\fR option is provided for some simple configurations where -this is not possible. -.PP -The \fI--prefix\fR option takes a single argument: the base directory -on the remote node where PSRVR is installed. PSRVR will use -this directory to set the remote \fIPATH\fR and \fILD_LIBRARY_PATH\fR -before executing any user applications. This allows -running jobs without having pre-configured the \fIPATH\fR and -\fILD_LIBRARY_PATH\fR on the remote nodes. -.PP -PSRVR adds the basename of the current -node's "bindir" (the directory where PSRVR's executables are -installed) to the prefix and uses that to set the \fIPATH\fR on the -remote node. Similarly, PSRVR adds the basename of the current -node's "libdir" (the directory where PSRVR's libraries are -installed) to the prefix and uses that to set the -\fILD_LIBRARY_PATH\fR on the remote node. For example: -.TP 15 -Local bindir: -/local/node/directory/bin -.TP -Local libdir: -/local/node/directory/lib64 -.PP -If the following command line is used: - - \fB%\fP prun --prefix /remote/node/directory - -PSRVR will add "/remote/node/directory/bin" to the \fIPATH\fR -and "/remote/node/directory/lib64" to the \fLD_LIBRARY_PATH\fR on the -remote node before attempting to execute anything. -.PP -The \fI--prefix\fR option is not sufficient if the installation paths -on the remote node are different than the local node (e.g., if "/lib" -is used on the local node, but "/lib64" is used on the remote node), -or if the installation paths are something other than a subdirectory -under a common prefix. -.PP -Note that executing \fIprun\fR via an absolute pathname is -equivalent to specifying \fI--prefix\fR without the last subdirectory -in the absolute pathname to \fIprun\fR. For example: - - \fB%\fP /usr/local/bin/prun ... - -is equivalent to - - \fB%\fP prun --prefix /usr/local -. -. -. -.SS Exported Environment Variables -. -All environment variables that are named in the form PMIX_* will automatically -be exported to new processes on the local and remote nodes. Environmental -parameters can also be set/forwarded to the new processes using the MCA -parameter \fImca_base_env_list\fP. While the syntax of the \fI\-x\fP option and MCA param -allows the definition of new variables, note that the parser -for these options are currently not very sophisticated - it does not even -understand quoted values. Users are advised to set variables in the -environment and use the option to export them; not to define them. -. -. -. -.SS Setting MCA Parameters -. -The \fI-pmca\fP switch allows the passing of parameters to various MCA -(Modular Component Architecture) modules. -.\" PSRVR's MCA modules are described in detail in psrvrmca(7). -MCA modules have direct impact on programs because they allow tunable -parameters to be set at run time (such as which BTL communication device driver -to use, what parameters to pass to that BTL, etc.). -.PP -The \fI-pmca\fP switch takes two arguments: \fI\fP and \fI\fP. -The \fI\fP argument generally specifies which MCA module will receive the value. -For example, the \fI\fP "btl" is used to select which BTL to be used for -transporting messages. The \fI\fP argument is the value that is -passed. -For example: -. -.TP 4 -prun -pmca btl tcp,self -np 1 foo -Tells PSRVR to use the "tcp" and "self" BTLs, and to run a single copy of -"foo" on an allocated node. -. -.TP -prun -pmca btl self -np 1 foo -Tells PSRVR to use the "self" BTL, and to run a single copy of "foo" on an -allocated node. -.\" And so on. PSRVR's BTL MCA modules are described in psrvrmca_btl(7). -.PP -The \fI-pmca\fP switch can be used multiple times to specify different -\fI\fP and/or \fI\fP arguments. If the same \fI\fP is -specified more than once, the \fI\fPs are concatenated with a comma -(",") separating them. -.PP -Note that the \fI-pmca\fP switch is simply a shortcut for setting environment variables. -The same effect may be accomplished by setting corresponding environment -variables before running \fIprun\fP. -The form of the environment variables that PSRVR sets is: - - PMIX_MCA_= -.PP -Thus, the \fI-pmca\fP switch overrides any previously set environment -variables. The \fI-pmca\fP settings similarly override MCA parameters set -in the -$OPAL_PREFIX/etc/psrvr-mca-params.conf or $HOME/.psrvr/mca-params.conf -file. -. -.PP -Unknown \fI\fP arguments are still set as -environment variable -- they are not checked (by \fIprun\fP) for correctness. -Illegal or incorrect \fI\fP arguments may or may not be reported -- it -depends on the specific MCA module. -.PP -To find the available component types under the MCA architecture, or to find the -available parameters for a specific component, use the \fIpinfo\fP command. -See the \fIpinfo(1)\fP man page for detailed information on the command. -. -. -. -.SS Setting MCA parameters and environment variables from file. -The \fI-tune\fP command line option and its synonym \fI-pmca mca_base_envar_file_prefix\fP allows a user -to set mca parameters and environment variables with the syntax described below. -This option requires a single file or list of files separated by "," to follow. -.PP -A valid line in the file may contain zero or many "-x", "-pmca", or “--pmca” arguments. -The following patterns are supported: -pmca var val -pmca var "val" -x var=val -x var. -If any argument is duplicated in the file, the last value read will be used. -.PP -MCA parameters and environment specified on the command line have higher precedence than variables specified in the file. -. -. -. -.SS Running as root -. -The PSRVR team strongly advises against executing -.I prun -as the root user. Applications should be run as regular -(non-root) users. -. -.PP -Reflecting this advice, prun will refuse to run as root by default. -To override this default, you can add the -.I --allow-run-as-root -option to the -.I prun -command line. -. -.SS Exit status -. -There is no standard definition for what \fIprun\fP should return as an exit -status. After considerable discussion, we settled on the following method for -assigning the \fIprun\fP exit status (note: in the following description, -the "primary" job is the initial application started by prun - all jobs that -are spawned by that job are designated "secondary" jobs): -. -.IP \[bu] 2 -if all processes in the primary job normally terminate with exit status 0, we return 0 -.IP \[bu] -if one or more processes in the primary job normally terminate with non-zero exit status, -we return the exit status of the process with the lowest rank to have a non-zero status -.IP \[bu] -if all processes in the primary job normally terminate with exit status 0, and one or more -processes in a secondary job normally terminate with non-zero exit status, we (a) return -the exit status of the process with the lowest rank in the lowest jobid to have a non-zero -status, and (b) output a message summarizing the exit status of the primary and all secondary jobs. -.IP \[bu] -if the cmd line option --report-child-jobs-separately is set, we will return -only- the -exit status of the primary job. Any non-zero exit status in secondary jobs will be -reported solely in a summary print statement. -. -.PP -By default, PSRVR records and notes that processes exited with non-zero termination status. -This is generally not considered an "abnormal termination" - i.e., PSRVR will not abort a -job if one or more processes return a non-zero status. Instead, the default behavior simply -reports the number of processes terminating with non-zero status upon completion of the job. -.PP -However, in some cases it can be desirable to have the job abort when any process terminates -with non-zero status. For example, a non-PMIx job might detect a bad result from a calculation -and want to abort, but doesn't want to generate a core file. Or a PMIx job might continue past -a call to PMIx_Finalize, but indicate that all processes should abort due to some post-PMIx result. -.PP -It is not anticipated that this situation will occur frequently. However, in the interest of -serving the broader community, PSRVR now has a means for allowing users to direct that jobs be -aborted upon any process exiting with non-zero status. Setting the MCA parameter -"orte_abort_on_non_zero_status" to 1 will cause PSRVR to abort all processes once any process - exits with non-zero status. -.PP -Terminations caused in this manner will be reported on the console as an "abnormal termination", -with the first process to so exit identified along with its exit status. -.PP -.\" ************************** -.\" Return Value Section -.\" ************************** -. -.SH RETURN VALUE -. -\fIprun\fP returns 0 if all processes started by \fIprun\fP exit after calling -PMIx_Finalize. A non-zero value is returned if an internal error occurred in -prun, or one or more processes exited before calling PMIx_Finalize. If an -internal error occurred in prun, the corresponding error code is returned. -In the event that one or more processes exit before calling PMIx_Finalize, the -return value of the rank of the process that \fIprun\fP first notices died -before calling PMIx_Finalize will be returned. Note that, in general, this will -be the first process that died but is not guaranteed to be so. -. -.PP -If the -.B --timeout -command line option is used and the timeout expires before the job -completes (thereby forcing -.I prun -to kill the job) -.I prun -will return an exit status equivalent to the value of -.B ETIMEDOUT -(which is typically 110 on Linux and OS X systems). - -. -.\" ************************** -.\" See Also Section -.\" ************************** -. diff --git a/orte/tools/ompi-prun/prun.c b/orte/tools/ompi-prun/prun.c deleted file mode 100644 index a03c54c11b..0000000000 --- a/orte/tools/ompi-prun/prun.c +++ /dev/null @@ -1,1373 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2008 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include -#include -#include -#ifdef HAVE_STRINGS_H -#include -#endif /* HAVE_STRINGS_H */ -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_PARAM_H -#include -#endif -#include -#include -#include -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_SYS_WAIT_H -#include -#endif /* HAVE_SYS_WAIT_H */ -#ifdef HAVE_SYS_TIME_H -#include -#endif /* HAVE_SYS_TIME_H */ -#include -#ifdef HAVE_SYS_STAT_H -#include -#endif - -#include "opal/mca/event/event.h" -#include "opal/mca/installdirs/installdirs.h" -#include "opal/mca/pmix/base/base.h" -#include "opal/mca/base/base.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/util/basename.h" -#include "opal/util/cmd_line.h" -#include "opal/util/opal_environ.h" -#include "opal/util/opal_getcwd.h" -#include "opal/util/show_help.h" -#include "opal/util/fd.h" -#include "opal/sys/atomic.h" - -#include "opal/version.h" -#include "opal/runtime/opal.h" -#include "opal/runtime/opal_info_support.h" -#include "opal/runtime/opal_progress_threads.h" -#include "opal/util/os_path.h" -#include "opal/util/path.h" -#include "opal/class/opal_pointer_array.h" -#include "opal/dss/dss.h" - -#include "orte/runtime/runtime.h" -#include "orte/runtime/orte_globals.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/schizo/base/base.h" -#include "orte/mca/state/state.h" -#include "orte/orted/orted_submit.h" - -/* ensure I can behave like a daemon */ -#include "prun.h" - -typedef struct { - opal_object_t super; - opal_pmix_lock_t lock; - opal_list_t info; -} myinfo_t; -static void mcon(myinfo_t *p) -{ - OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); - OBJ_CONSTRUCT(&p->info, opal_list_t); -} -static void mdes(myinfo_t *p) -{ - OPAL_PMIX_DESTRUCT_LOCK(&p->lock); - OPAL_LIST_DESTRUCT(&p->info); -} -static OBJ_CLASS_INSTANCE(myinfo_t, opal_object_t, - mcon, mdes); - -static struct { - bool terminate_dvm; - bool system_server_first; - bool system_server_only; - int pid; -} myoptions; - -static opal_list_t job_info; -static volatile bool active = false; -static orte_jobid_t myjobid = ORTE_JOBID_INVALID; -static myinfo_t myinfo; - -static int create_app(int argc, char* argv[], - opal_list_t *jdata, - opal_pmix_app_t **app, - bool *made_app, char ***app_env); -static int parse_locals(opal_list_t *jdata, int argc, char* argv[]); -static void set_classpath_jar_file(opal_pmix_app_t *app, int index, char *jarfile); -static size_t evid = INT_MAX; - - -static opal_cmd_line_init_t cmd_line_init[] = { - /* tell the dvm to terminate */ - { NULL, '\0', "terminate", "terminate", 0, - &myoptions.terminate_dvm, OPAL_CMD_LINE_TYPE_BOOL, - "Terminate the DVM", OPAL_CMD_LINE_OTYPE_DVM }, - - /* look first for a system server */ - { NULL, '\0', "system-server-first", "system-server-first", 0, - &myoptions.system_server_first, OPAL_CMD_LINE_TYPE_BOOL, - "First look for a system server and connect to it if found", OPAL_CMD_LINE_OTYPE_DVM }, - - /* connect only to a system server */ - { NULL, '\0', "system-server-only", "system-server-only", 0, - &myoptions.system_server_only, OPAL_CMD_LINE_TYPE_BOOL, - "Connect only to a system-level server", OPAL_CMD_LINE_OTYPE_DVM }, - - /* provide a connection PID */ - { NULL, '\0', "pid", "pid", 1, - &myoptions.pid, OPAL_CMD_LINE_TYPE_INT, - "PID of the session-level daemon to which we should connect", - OPAL_CMD_LINE_OTYPE_DVM }, - - /* End of list */ - { NULL, '\0', NULL, NULL, 0, - NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } -}; - - -static void infocb(int status, - opal_list_t *info, - void *cbdata, - opal_pmix_release_cbfunc_t release_fn, - void *release_cbdata) -{ - opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata; - OPAL_ACQUIRE_OBJECT(lock); - - if (NULL != release_fn) { - release_fn(release_cbdata); - } - OPAL_PMIX_WAKEUP_THREAD(lock); -} - -static void regcbfunc(int status, size_t ref, void *cbdata) -{ - opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata; - OPAL_ACQUIRE_OBJECT(lock); - evid = ref; - OPAL_PMIX_WAKEUP_THREAD(lock); -} - -static void opcbfunc(int status, void *cbdata) -{ - opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata; - OPAL_ACQUIRE_OBJECT(lock); - OPAL_PMIX_WAKEUP_THREAD(lock); -} - -static bool fired = false; -static void evhandler(int status, - const opal_process_name_t *source, - opal_list_t *info, opal_list_t *results, - opal_pmix_notification_complete_fn_t cbfunc, - void *cbdata) -{ - opal_value_t *val; - int jobstatus=0; - orte_jobid_t jobid = ORTE_JOBID_INVALID; - - /* we should always have info returned to us - if not, there is - * nothing we can do */ - if (NULL != info) { - OPAL_LIST_FOREACH(val, info, opal_value_t) { - if (0 == strcmp(val->key, OPAL_PMIX_JOB_TERM_STATUS)) { - jobstatus = val->data.integer; - } else if (0 == strcmp(val->key, OPAL_PMIX_PROCID)) { - jobid = val->data.name.jobid; - } - } - if (orte_cmd_options.verbose && (myjobid != ORTE_JOBID_INVALID && jobid == myjobid)) { - opal_output(0, "JOB %s COMPLETED WITH STATUS %d", - ORTE_JOBID_PRINT(jobid), jobstatus); - } - } - - /* only terminate if this was our job - keep in mind that we - * can get notifications of job termination prior to our spawn - * having completed! */ - if (!fired && (myjobid != ORTE_JOBID_INVALID && jobid == myjobid)) { - fired = true; - active = false; - } - - /* we _always_ have to execute the evhandler callback or - * else the event progress engine will hang */ - if (NULL != cbfunc) { - cbfunc(OPAL_SUCCESS, NULL, NULL, NULL, cbdata); - } -} - -typedef struct { - opal_pmix_lock_t lock; - opal_list_t list; -} mylock_t; - - -static void setupcbfunc(int status, - opal_list_t *info, - void *provided_cbdata, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - mylock_t *mylock = (mylock_t*)provided_cbdata; - opal_value_t *kv; - - if (NULL != info) { - /* cycle across the provided info */ - while (NULL != (kv = (opal_value_t*)opal_list_remove_first(info))) { - opal_list_append(&mylock->list, &kv->super); - } - } - - /* release the caller */ - if (NULL != cbfunc) { - cbfunc(OPAL_SUCCESS, cbdata); - } - - OPAL_PMIX_WAKEUP_THREAD(&mylock->lock); -} - -static void launchhandler(int status, - const opal_process_name_t *source, - opal_list_t *info, opal_list_t *results, - opal_pmix_notification_complete_fn_t cbfunc, - void *cbdata) -{ - opal_value_t *p; - - /* the info list will include the launch directives, so - * transfer those to the myinfo_t for return to the main thread */ - while (NULL != (p = (opal_value_t*)opal_list_remove_first(info))) { - opal_list_append(&myinfo.info, &p->super); - } - - /* we _always_ have to execute the evhandler callback or - * else the event progress engine will hang */ - if (NULL != cbfunc) { - cbfunc(OPAL_SUCCESS, NULL, NULL, NULL, cbdata); - } - - /* now release the thread */ - OPAL_PMIX_WAKEUP_THREAD(&myinfo.lock); -} - -int prun(int argc, char *argv[]) -{ - int rc, i; - char *param; - opal_pmix_lock_t lock; - opal_list_t apps, *lt; - opal_pmix_app_t *app; - opal_value_t *val, *kv, *kv2; - opal_list_t info, codes; - struct timespec tp = {0, 100000}; - mylock_t mylock; - - /* init the globals */ - memset(&orte_cmd_options, 0, sizeof(orte_cmd_options)); - memset(&myoptions, 0, sizeof(myoptions)); - OBJ_CONSTRUCT(&job_info, opal_list_t); - OBJ_CONSTRUCT(&apps, opal_list_t); - - /* search the argv for MCA params */ - for (i=0; NULL != argv[i]; i++) { - if (':' == argv[i][0] || - NULL == argv[i+1] || NULL == argv[i+2]) { - break; - } - if (0 == strncmp(argv[i], "-"OPAL_MCA_CMD_LINE_ID, strlen("-"OPAL_MCA_CMD_LINE_ID)) || - 0 == strncmp(argv[i], "--"OPAL_MCA_CMD_LINE_ID, strlen("--"OPAL_MCA_CMD_LINE_ID)) || - 0 == strncmp(argv[i], "-g"OPAL_MCA_CMD_LINE_ID, strlen("-g"OPAL_MCA_CMD_LINE_ID)) || - 0 == strncmp(argv[i], "--g"OPAL_MCA_CMD_LINE_ID, strlen("--g"OPAL_MCA_CMD_LINE_ID))) { - (void) mca_base_var_env_name (argv[i+1], ¶m); - opal_setenv(param, argv[i+2], true, &environ); - free(param); - } else if (0 == strcmp(argv[i], "-am") || - 0 == strcmp(argv[i], "--am")) { - (void)mca_base_var_env_name("mca_base_param_file_prefix", ¶m); - opal_setenv(param, argv[i+1], true, &environ); - free(param); - } else if (0 == strcmp(argv[i], "-tune") || - 0 == strcmp(argv[i], "--tune")) { - (void)mca_base_var_env_name("mca_base_envar_file_prefix", ¶m); - opal_setenv(param, argv[i+1], true, &environ); - free(param); - } - } - - /* init only the util portion of OPAL */ - if (OPAL_SUCCESS != (rc = opal_init_util(&argc, &argv))) { - return rc; - } - - /* set our proc type for schizo selection */ - orte_process_info.proc_type = ORTE_PROC_TOOL; - - /* open the SCHIZO framework so we can setup the command line */ - if (ORTE_SUCCESS != (rc = mca_base_framework_open(&orte_schizo_base_framework, 0))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_schizo_base_select())) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* setup our cmd line */ - orte_cmd_line = OBJ_NEW(opal_cmd_line_t); - if (OPAL_SUCCESS != (rc = opal_cmd_line_add(orte_cmd_line, cmd_line_init))) { - return rc; - } - - /* setup the rest of the cmd line only once */ - if (OPAL_SUCCESS != (rc = orte_schizo.define_cli(orte_cmd_line))) { - return rc; - } - - /* now that options have been defined, finish setup */ - mca_base_cmd_line_setup(orte_cmd_line); - - /* parse the result to get values */ - if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(orte_cmd_line, - true, false, argc, argv)) ) { - if (OPAL_ERR_SILENT != rc) { - fprintf(stderr, "%s: command line error (%s)\n", argv[0], - opal_strerror(rc)); - } - return rc; - } - - /* see if print version is requested. Do this before - * check for help so that --version --help works as - * one might expect. */ - if (orte_cmd_options.version) { - char *str; - str = opal_info_make_version_str("all", - OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION, - OPAL_GREEK_VERSION, - OPAL_REPO_REV); - if (NULL != str) { - fprintf(stdout, "%s (%s) %s\n\nReport bugs to %s\n", - "prun", "PMIx Reference Server", str, PACKAGE_BUGREPORT); - free(str); - } - exit(0); - } - - /* check if we are running as root - if we are, then only allow - * us to proceed if the allow-run-as-root flag was given. Otherwise, - * exit with a giant warning flag - */ - if (0 == geteuid() && !orte_cmd_options.run_as_root) { - /* show_help is not yet available, so print an error manually */ - fprintf(stderr, "--------------------------------------------------------------------------\n"); - if (orte_cmd_options.help) { - fprintf(stderr, "prun cannot provide the help message when run as root.\n\n"); - } else { - fprintf(stderr, "prun has detected an attempt to run as root.\n\n"); - } - - fprintf(stderr, "Running as root is *strongly* discouraged as any mistake (e.g., in\n"); - fprintf(stderr, "defining TMPDIR) or bug can result in catastrophic damage to the OS\n"); - fprintf(stderr, "file system, leaving your system in an unusable state.\n\n"); - - fprintf(stderr, "We strongly suggest that you run prun as a non-root user.\n\n"); - - fprintf(stderr, "You can override this protection by adding the --allow-run-as-root\n"); - fprintf(stderr, "option to your command line. However, we reiterate our strong advice\n"); - fprintf(stderr, "against doing so - please do so at your own risk.\n"); - fprintf(stderr, "--------------------------------------------------------------------------\n"); - exit(1); - } - - /* process any mca params */ - rc = mca_base_cmd_line_process_args(orte_cmd_line, &environ, &environ); - if (ORTE_SUCCESS != rc) { - return rc; - } - - /* Check for help request */ - if (orte_cmd_options.help) { - char *str, *args = NULL; - args = opal_cmd_line_get_usage_msg(orte_cmd_line); - str = opal_show_help_string("help-orterun.txt", "orterun:usage", false, - "prun", "PSVR", OPAL_VERSION, - "prun", args, - PACKAGE_BUGREPORT); - if (NULL != str) { - printf("%s", str); - free(str); - } - free(args); - - /* If someone asks for help, that should be all we do */ - exit(0); - } - - /* ensure we ONLY take the ess/tool component */ - opal_setenv(OPAL_MCA_PREFIX"ess", "tool", true, &environ); - /* tell the ess/tool component how we want to connect */ - if (myoptions.system_server_only) { - opal_setenv(OPAL_MCA_PREFIX"ess_tool_system_server_only", "1", true, &environ); - } - if (myoptions.system_server_first) { - opal_setenv(OPAL_MCA_PREFIX"ess_tool_system_server_first", "1", true, &environ); - } - /* if they specified the DVM's pid, then pass it along */ - if (0 != myoptions.pid) { - opal_asprintf(¶m, "%d", myoptions.pid); - opal_setenv(OPAL_MCA_PREFIX"ess_tool_server_pid", param, true, &environ); - free(param); - } - /* if they specified the URI, then pass it along */ - if (NULL != orte_cmd_options.hnp) { - opal_setenv("PMIX_MCA_ptl_tcp_server_uri", orte_cmd_options.hnp, true, &environ); - } - - /* now initialize ORTE */ - if (OPAL_SUCCESS != (rc = orte_init(&argc, &argv, ORTE_PROC_TOOL))) { - OPAL_ERROR_LOG(rc); - return rc; - } - - /* if the user just wants us to terminate a DVM, then do so */ - if (myoptions.terminate_dvm) { - OBJ_CONSTRUCT(&info, opal_list_t); - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_JOB_CTRL_TERMINATE); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&info, &val->super); - fprintf(stderr, "TERMINATING DVM..."); - OPAL_PMIX_CONSTRUCT_LOCK(&lock); - rc = opal_pmix.job_control(NULL, &info, infocb, (void*)&lock); - OPAL_PMIX_WAIT_THREAD(&lock); - OPAL_PMIX_DESTRUCT_LOCK(&lock); - OPAL_LIST_DESTRUCT(&info); - fprintf(stderr, "DONE\n"); - goto DONE; - } - - /* get here if they want to run an application, so let's parse - * the cmd line to get it */ - - if (OPAL_SUCCESS != (rc = parse_locals(&apps, argc, argv))) { - OPAL_ERROR_LOG(rc); - OPAL_LIST_DESTRUCT(&apps); - goto DONE; - } - - /* bozo check */ - if (0 == opal_list_get_size(&apps)) { - opal_output(0, "No application specified!"); - goto DONE; - } - - /* init flag */ - active = true; - - /* register for job terminations so we get notified when - * our job completes */ - OPAL_PMIX_CONSTRUCT_LOCK(&lock); - OBJ_CONSTRUCT(&info, opal_list_t); - val = OBJ_NEW(opal_value_t); - val->key = strdup("foo"); - val->type = OPAL_INT; - val->data.integer = OPAL_ERR_JOB_TERMINATED; - opal_list_append(&info, &val->super); - opal_pmix.register_evhandler(&info, NULL, evhandler, regcbfunc, &lock); - OPAL_PMIX_WAIT_THREAD(&lock); - OPAL_PMIX_DESTRUCT_LOCK(&lock); - OPAL_LIST_DESTRUCT(&info); - - /* we want to be notified upon job completion */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_NOTIFY_COMPLETION); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - - /* see if they specified the personality */ - if (NULL != orte_cmd_options.personality) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_PERSONALITY); - val->type = OPAL_STRING; - val->data.string = strdup(orte_cmd_options.personality); - opal_list_append(&job_info, &val->super); - } - - /* check for stdout/err directives */ - /* if we were asked to tag output, mark it so */ - if (orte_cmd_options.tag_output) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_TAG_OUTPUT); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - /* if we were asked to timestamp output, mark it so */ - if (orte_cmd_options.timestamp_output) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_TIMESTAMP_OUTPUT); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - /* if we were asked to output to files, pass it along */ - if (NULL != orte_cmd_options.output_filename) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_OUTPUT_TO_FILE); - val->type = OPAL_STRING; - /* if the given filename isn't an absolute path, then - * convert it to one so the name will be relative to - * the directory where prun was given as that is what - * the user will have seen */ - if (!opal_path_is_absolute(orte_cmd_options.output_filename)) { - char cwd[OPAL_PATH_MAX]; - getcwd(cwd, sizeof(cwd)); - val->data.string = opal_os_path(false, cwd, orte_cmd_options.output_filename, NULL); - } else { - val->data.string = strdup(orte_cmd_options.output_filename); - } - opal_list_append(&job_info, &val->super); - } - /* if we were asked to merge stderr to stdout, mark it so */ - if (orte_cmd_options.merge) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_MERGE_STDERR_STDOUT); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - - /* check what user wants us to do with stdin */ - if (NULL != orte_cmd_options.stdin_target) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_STDIN_TGT); - val->type = OPAL_UINT32; - opal_list_append(&job_info, &val->super); - if (0 == strcmp(orte_cmd_options.stdin_target, "all")) { - val->data.uint32 = ORTE_VPID_WILDCARD; - } else if (0 == strcmp(orte_cmd_options.stdin_target, "none")) { - val->data.uint32 = ORTE_VPID_INVALID; - } else { - val->data.uint32 = strtoul(orte_cmd_options.stdin_target, NULL, 10); - } - } - - /* if we want the argv's indexed, indicate that */ - if (orte_cmd_options.index_argv) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_INDEX_ARGV); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - - if (NULL != orte_cmd_options.mapping_policy) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_MAPBY); - val->type = OPAL_STRING; - val->data.string = strdup(orte_cmd_options.mapping_policy); - opal_list_append(&job_info, &val->super); - } else if (orte_cmd_options.pernode) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_PPR); - val->type = OPAL_STRING; - val->data.string = strdup("1:node"); - opal_list_append(&job_info, &val->super); - } else if (0 < orte_cmd_options.npernode) { - /* define the ppr */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_PPR); - val->type = OPAL_STRING; - opal_asprintf(&val->data.string, "%d:node", orte_cmd_options.npernode); - opal_list_append(&job_info, &val->super); - } else if (0 < orte_cmd_options.npersocket) { - /* define the ppr */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_PPR); - val->type = OPAL_STRING; - opal_asprintf(&val->data.string, "%d:socket", orte_cmd_options.npernode); - opal_list_append(&job_info, &val->super); - } - - /* if the user specified cpus/rank, set it */ - if (0 < orte_cmd_options.cpus_per_proc) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_CPUS_PER_PROC); - val->type = OPAL_UINT32; - val->data.uint32 = orte_cmd_options.cpus_per_proc; - opal_list_append(&job_info, &val->super); - } - - /* if the user specified a ranking policy, then set it */ - if (NULL != orte_cmd_options.ranking_policy) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_RANKBY); - val->type = OPAL_STRING; - val->data.string = strdup(orte_cmd_options.ranking_policy); - opal_list_append(&job_info, &val->super); - } - - /* if the user specified a binding policy, then set it */ - if (NULL != orte_cmd_options.binding_policy) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_BINDTO); - val->type = OPAL_STRING; - val->data.string = strdup(orte_cmd_options.binding_policy); - opal_list_append(&job_info, &val->super); - } - - /* if they asked for nolocal, mark it so */ - if (orte_cmd_options.nolocal) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_NO_PROCS_ON_HEAD); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - if (orte_cmd_options.no_oversubscribe) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_NO_OVERSUBSCRIBE); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - if (orte_cmd_options.oversubscribe) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_NO_OVERSUBSCRIBE); - val->type = OPAL_BOOL; - val->data.flag = false; - opal_list_append(&job_info, &val->super); - } - if (orte_cmd_options.report_bindings) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_REPORT_BINDINGS); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - if (NULL != orte_cmd_options.cpu_list) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_CPU_LIST); - val->type = OPAL_STRING; - val->data.string = strdup(orte_cmd_options.cpu_list); - opal_list_append(&job_info, &val->super); - } - - /* mark if recovery was enabled on the cmd line */ - if (orte_enable_recovery) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_JOB_RECOVERABLE); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - /* record the max restarts */ - if (0 < orte_max_restarts) { - OPAL_LIST_FOREACH(app, &apps, opal_pmix_app_t) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_MAX_RESTARTS); - val->type = OPAL_UINT32; - val->data.uint32 = orte_max_restarts; - opal_list_append(&app->info, &val->super); - } - } - /* if continuous operation was specified */ - if (orte_cmd_options.continuous) { - /* mark this job as continuously operating */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_JOB_CONTINUOUS); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - - /* pickup any relevant envars */ - if (NULL != opal_pmix.server_setup_application) { - OBJ_CONSTRUCT(&info, opal_list_t); - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_SETUP_APP_ENVARS); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&info, &val->super); - - OPAL_PMIX_CONSTRUCT_LOCK(&mylock.lock); - OBJ_CONSTRUCT(&mylock.list, opal_list_t); - rc = opal_pmix.server_setup_application(ORTE_PROC_MY_NAME->jobid, - &info, setupcbfunc, &mylock); - if (OPAL_SUCCESS != rc) { - OPAL_LIST_DESTRUCT(&info); - OPAL_PMIX_DESTRUCT_LOCK(&mylock.lock); - OBJ_DESTRUCT(&mylock.list); - goto DONE; - } - OPAL_PMIX_WAIT_THREAD(&mylock.lock); - OPAL_PMIX_DESTRUCT_LOCK(&mylock.lock); - /* transfer any returned ENVARS to the job_info */ - while (NULL != (val = (opal_value_t*)opal_list_remove_first(&mylock.list))) { - if (0 == strcmp(val->key, OPAL_PMIX_SET_ENVAR) || - 0 == strcmp(val->key, OPAL_PMIX_ADD_ENVAR) || - 0 == strcmp(val->key, OPAL_PMIX_UNSET_ENVAR) || - 0 == strcmp(val->key, OPAL_PMIX_PREPEND_ENVAR) || - 0 == strcmp(val->key, OPAL_PMIX_APPEND_ENVAR)) { - opal_list_append(&job_info, &val->super); - } else { - OBJ_RELEASE(val); - } - } - OPAL_LIST_DESTRUCT(&mylock.list); - } - - /* if we were launched by a tool wanting to direct our - * operation, then we need to pause here and give it - * a chance to tell us what we need to do */ - if (NULL != (param = getenv("PMIX_LAUNCHER_PAUSE_FOR_TOOL")) && - 0 == strcmp(param, "1")) { - /* register for the PMIX_LAUNCH_DIRECTIVE event */ - OPAL_PMIX_CONSTRUCT_LOCK(&lock); - OBJ_CONSTRUCT(&codes, opal_list_t); - val = OBJ_NEW(opal_value_t); - val->key = strdup("foo"); - val->type = OPAL_INT; - val->data.integer = OPAL_PMIX_LAUNCH_DIRECTIVE; - opal_list_append(&codes, &val->super); - /* setup the myinfo object to capture the returned - * values - must do so prior to registering in case - * the event has already arrived */ - OBJ_CONSTRUCT(&myinfo, myinfo_t); - /* go ahead and register */ - opal_pmix.register_evhandler(&codes, NULL, launchhandler, regcbfunc, &lock); - OPAL_PMIX_WAIT_THREAD(&lock); - OPAL_PMIX_DESTRUCT_LOCK(&lock); - OPAL_LIST_DESTRUCT(&codes); - /* now wait for the launch directives to arrive */ - OPAL_PMIX_WAIT_THREAD(&myinfo.lock); - /* process the returned directives */ - OPAL_LIST_FOREACH(val, &myinfo.info, opal_value_t) { - if (0 == strcmp(val->key, OPAL_PMIX_DEBUG_JOB_DIRECTIVES)) { - /* there will be a pointer to a list containing the directives */ - lt = (opal_list_t*)val->data.ptr; - while (NULL != (kv = (opal_value_t*)opal_list_remove_first(lt))) { - opal_output(0, "JOB DIRECTIVE: %s", kv->key); - opal_list_append(&job_info, &kv->super); - } - } else if (0 == strcmp(val->key, OPAL_PMIX_DEBUG_APP_DIRECTIVES)) { - /* there will be a pointer to a list containing the directives */ - lt = (opal_list_t*)val->data.ptr; - OPAL_LIST_FOREACH(kv, lt, opal_value_t) { - opal_output(0, "APP DIRECTIVE: %s", kv->key); - OPAL_LIST_FOREACH(app, &apps, opal_pmix_app_t) { - /* the value can only be on one list at a time, so replicate it */ - kv2 = OBJ_NEW(opal_value_t); - opal_value_xfer(kv2, kv); - opal_list_append(&app->info, &kv2->super); - } - } - } - } - } - - if (OPAL_SUCCESS != (rc = opal_pmix.spawn(&job_info, &apps, &myjobid))) { - opal_output(0, "Job failed to spawn: %s", opal_strerror(rc)); - goto DONE; - } - OPAL_LIST_DESTRUCT(&job_info); - OPAL_LIST_DESTRUCT(&apps); - - if (orte_cmd_options.verbose) { - opal_output(0, "JOB %s EXECUTING", OPAL_JOBID_PRINT(myjobid)); - } - - while (active) { - nanosleep(&tp, NULL); - } - OPAL_PMIX_CONSTRUCT_LOCK(&lock); - opal_pmix.deregister_evhandler(evid, opcbfunc, &lock); - OPAL_PMIX_WAIT_THREAD(&lock); - OPAL_PMIX_DESTRUCT_LOCK(&lock); - - DONE: - /* cleanup and leave */ - orte_finalize(); - return 0; -} - -static int parse_locals(opal_list_t *jdata, int argc, char* argv[]) -{ - int i, rc; - int temp_argc; - char **temp_argv, **env; - opal_pmix_app_t *app; - bool made_app; - - /* Make the apps */ - temp_argc = 0; - temp_argv = NULL; - opal_argv_append(&temp_argc, &temp_argv, argv[0]); - - /* NOTE: This bogus env variable is necessary in the calls to - create_app(), below. See comment immediately before the - create_app() function for an explanation. */ - - env = NULL; - for (i = 1; i < argc; ++i) { - if (0 == strcmp(argv[i], ":")) { - /* Make an app with this argv */ - if (opal_argv_count(temp_argv) > 1) { - if (NULL != env) { - opal_argv_free(env); - env = NULL; - } - app = NULL; - rc = create_app(temp_argc, temp_argv, jdata, &app, &made_app, &env); - if (OPAL_SUCCESS != rc) { - /* Assume that the error message has already been - printed; no need to cleanup -- we can just - exit */ - exit(1); - } - if (made_app) { - opal_list_append(jdata, &app->super); - } - - /* Reset the temps */ - - temp_argc = 0; - temp_argv = NULL; - opal_argv_append(&temp_argc, &temp_argv, argv[0]); - } - } else { - opal_argv_append(&temp_argc, &temp_argv, argv[i]); - } - } - - if (opal_argv_count(temp_argv) > 1) { - app = NULL; - rc = create_app(temp_argc, temp_argv, jdata, &app, &made_app, &env); - if (ORTE_SUCCESS != rc) { - /* Assume that the error message has already been printed; - no need to cleanup -- we can just exit */ - exit(1); - } - if (made_app) { - opal_list_append(jdata, &app->super); - } - } - if (NULL != env) { - opal_argv_free(env); - } - opal_argv_free(temp_argv); - - /* All done */ - - return ORTE_SUCCESS; -} - - -/* - * This function takes a "char ***app_env" parameter to handle the - * specific case: - * - * orterun --mca foo bar -app appfile - * - * That is, we'll need to keep foo=bar, but the presence of the app - * file will cause an invocation of parse_appfile(), which will cause - * one or more recursive calls back to create_app(). Since the - * foo=bar value applies globally to all apps in the appfile, we need - * to pass in the "base" environment (that contains the foo=bar value) - * when we parse each line in the appfile. - * - * This is really just a special case -- when we have a simple case like: - * - * orterun --mca foo bar -np 4 hostname - * - * Then the upper-level function (parse_locals()) calls create_app() - * with a NULL value for app_env, meaning that there is no "base" - * environment that the app needs to be created from. - */ -static int create_app(int argc, char* argv[], - opal_list_t *jdata, - opal_pmix_app_t **app_ptr, - bool *made_app, char ***app_env) -{ - char cwd[OPAL_PATH_MAX]; - int i, j, count, rc; - char *param, *value; - opal_pmix_app_t *app = NULL; - bool found = false; - char *appname = NULL; - opal_value_t *val; - - *made_app = false; - - /* parse the cmd line - do this every time thru so we can - * repopulate the globals */ - if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(orte_cmd_line, true, false, - argc, argv)) ) { - if (OPAL_ERR_SILENT != rc) { - fprintf(stderr, "%s: command line error (%s)\n", argv[0], - opal_strerror(rc)); - } - return rc; - } - - /* Setup application context */ - app = OBJ_NEW(opal_pmix_app_t); - opal_cmd_line_get_tail(orte_cmd_line, &count, &app->argv); - - /* See if we have anything left */ - if (0 == count) { - opal_show_help("help-orterun.txt", "orterun:executable-not-specified", - true, "prun", "prun"); - rc = OPAL_ERR_NOT_FOUND; - goto cleanup; - } - - /* Grab all MCA environment variables */ - app->env = opal_argv_copy(*app_env); - for (i=0; NULL != environ[i]; i++) { - if (0 == strncmp("PMIX_", environ[i], 5) || - 0 == strncmp("OMPI_", environ[i], 5)) { - /* check for duplicate in app->env - this - * would have been placed there by the - * cmd line processor. By convention, we - * always let the cmd line override the - * environment - */ - param = strdup(environ[i]); - value = strchr(param, '='); - *value = '\0'; - value++; - opal_setenv(param, value, false, &app->env); - free(param); - } - } - - /* set necessary env variables for external usage from tune conf file*/ - int set_from_file = 0; - char **vars = NULL; - if (OPAL_SUCCESS == mca_base_var_process_env_list_from_file(&vars) && - NULL != vars) { - for (i=0; NULL != vars[i]; i++) { - value = strchr(vars[i], '='); - /* terminate the name of the param */ - *value = '\0'; - /* step over the equals */ - value++; - /* overwrite any prior entry */ - opal_setenv(vars[i], value, true, &app->env); - /* save it for any comm_spawn'd apps */ - opal_setenv(vars[i], value, true, &orte_forwarded_envars); - } - set_from_file = 1; - opal_argv_free(vars); - } - /* Did the user request to export any environment variables on the cmd line? */ - char *env_set_flag; - env_set_flag = getenv("OMPI_MCA_mca_base_env_list"); - if (opal_cmd_line_is_taken(orte_cmd_line, "x")) { - if (NULL != env_set_flag) { - opal_show_help("help-orterun.txt", "orterun:conflict-env-set", false); - return ORTE_ERR_FATAL; - } - j = opal_cmd_line_get_ninsts(orte_cmd_line, "x"); - for (i = 0; i < j; ++i) { - param = opal_cmd_line_get_param(orte_cmd_line, "x", i, 0); - - if (NULL != (value = strchr(param, '='))) { - /* terminate the name of the param */ - *value = '\0'; - /* step over the equals */ - value++; - /* overwrite any prior entry */ - opal_setenv(param, value, true, &app->env); - /* save it for any comm_spawn'd apps */ - opal_setenv(param, value, true, &orte_forwarded_envars); - } else { - value = getenv(param); - if (NULL != value) { - /* overwrite any prior entry */ - opal_setenv(param, value, true, &app->env); - /* save it for any comm_spawn'd apps */ - opal_setenv(param, value, true, &orte_forwarded_envars); - } else { - opal_output(0, "Warning: could not find environment variable \"%s\"\n", param); - } - } - } - } else if (NULL != env_set_flag) { - /* if mca_base_env_list was set, check if some of env vars were set via -x from a conf file. - * If this is the case, error out. - */ - if (!set_from_file) { - /* set necessary env variables for external usage */ - vars = NULL; - if (OPAL_SUCCESS == mca_base_var_process_env_list(env_set_flag, &vars) && - NULL != vars) { - for (i=0; NULL != vars[i]; i++) { - value = strchr(vars[i], '='); - /* terminate the name of the param */ - *value = '\0'; - /* step over the equals */ - value++; - /* overwrite any prior entry */ - opal_setenv(vars[i], value, true, &app->env); - /* save it for any comm_spawn'd apps */ - opal_setenv(vars[i], value, true, &orte_forwarded_envars); - } - opal_argv_free(vars); - } - } else { - opal_show_help("help-orterun.txt", "orterun:conflict-env-set", false); - return ORTE_ERR_FATAL; - } - } - - /* Did the user request a specific wdir? */ - - if (NULL != orte_cmd_options.wdir) { - /* if this is a relative path, convert it to an absolute path */ - if (opal_path_is_absolute(orte_cmd_options.wdir)) { - app->cwd = strdup(orte_cmd_options.wdir); - } else { - /* get the cwd */ - if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) { - opal_show_help("help-orterun.txt", "orterun:init-failure", - true, "get the cwd", rc); - goto cleanup; - } - /* construct the absolute path */ - app->cwd = opal_os_path(false, cwd, orte_cmd_options.wdir, NULL); - } - } else if (orte_cmd_options.set_cwd_to_session_dir) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_SET_SESSION_CWD); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&app->info, &val->super); - } else { - if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) { - opal_show_help("help-orterun.txt", "orterun:init-failure", - true, "get the cwd", rc); - goto cleanup; - } - app->cwd = strdup(cwd); - } - - /* Did the user specify a hostfile. Need to check for both - * hostfile and machine file. - * We can only deal with one hostfile per app context, otherwise give an error. - */ - found = false; - if (0 < (j = opal_cmd_line_get_ninsts(orte_cmd_line, "hostfile"))) { - if (1 < j) { - opal_show_help("help-orterun.txt", "orterun:multiple-hostfiles", - true, "prun", NULL); - return ORTE_ERR_FATAL; - } else { - value = opal_cmd_line_get_param(orte_cmd_line, "hostfile", 0, 0); - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_HOSTFILE); - val->type = OPAL_STRING; - val->data.string = value; - opal_list_append(&app->info, &val->super); - found = true; - } - } - if (0 < (j = opal_cmd_line_get_ninsts(orte_cmd_line, "machinefile"))) { - if (1 < j || found) { - opal_show_help("help-orterun.txt", "orterun:multiple-hostfiles", - true, "prun", NULL); - return ORTE_ERR_FATAL; - } else { - value = opal_cmd_line_get_param(orte_cmd_line, "machinefile", 0, 0); - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_HOSTFILE); - val->type = OPAL_STRING; - val->data.string = value; - opal_list_append(&app->info, &val->super); - } - } - - /* Did the user specify any hosts? */ - if (0 < (j = opal_cmd_line_get_ninsts(orte_cmd_line, "host"))) { - char **targ=NULL, *tval; - for (i = 0; i < j; ++i) { - value = opal_cmd_line_get_param(orte_cmd_line, "host", i, 0); - opal_argv_append_nosize(&targ, value); - } - tval = opal_argv_join(targ, ','); - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_HOST); - val->type = OPAL_STRING; - val->data.string = tval; - opal_list_append(&app->info, &val->super); - } - - /* check for bozo error */ - if (0 > orte_cmd_options.num_procs) { - opal_show_help("help-orterun.txt", "orterun:negative-nprocs", - true, "prun", app->argv[0], - orte_cmd_options.num_procs, NULL); - return ORTE_ERR_FATAL; - } - - app->maxprocs = orte_cmd_options.num_procs; - - /* see if we need to preload the binary to - * find the app - don't do this for java apps, however, as we - * can't easily find the class on the cmd line. Java apps have to - * preload their binary via the preload_files option - */ - if (NULL == strstr(app->argv[0], "java")) { - if (orte_cmd_options.preload_binaries) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_SET_SESSION_CWD); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&app->info, &val->super); - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_PRELOAD_BIN); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&app->info, &val->super); - } - } - if (NULL != orte_cmd_options.preload_files) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_PRELOAD_FILES); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&app->info, &val->super); - } - - /* Do not try to find argv[0] here -- the starter is responsible - for that because it may not be relevant to try to find it on - the node where orterun is executing. So just strdup() argv[0] - into app. */ - - app->cmd = strdup(app->argv[0]); - if (NULL == app->cmd) { - opal_show_help("help-orterun.txt", "orterun:call-failed", - true, "prun", "library", "strdup returned NULL", errno); - rc = ORTE_ERR_NOT_FOUND; - goto cleanup; - } - - /* if this is a Java application, we have a bit more work to do. Such - * applications actually need to be run under the Java virtual machine - * and the "java" command will start the "executable". So we need to ensure - * that all the proper java-specific paths are provided - */ - appname = opal_basename(app->cmd); - if (0 == strcmp(appname, "java")) { - /* see if we were given a library path */ - found = false; - for (i=1; NULL != app->argv[i]; i++) { - if (NULL != strstr(app->argv[i], "java.library.path")) { - char *dptr; - /* find the '=' that delineates the option from the path */ - if (NULL == (dptr = strchr(app->argv[i], '='))) { - /* that's just wrong */ - rc = ORTE_ERR_BAD_PARAM; - goto cleanup; - } - /* step over the '=' */ - ++dptr; - /* yep - but does it include the path to the mpi libs? */ - found = true; - if (NULL == strstr(app->argv[i], opal_install_dirs.libdir)) { - /* doesn't appear to - add it to be safe */ - if (':' == app->argv[i][strlen(app->argv[i]-1)]) { - opal_asprintf(&value, "-Djava.library.path=%s%s", dptr, opal_install_dirs.libdir); - } else { - opal_asprintf(&value, "-Djava.library.path=%s:%s", dptr, opal_install_dirs.libdir); - } - free(app->argv[i]); - app->argv[i] = value; - } - break; - } - } - if (!found) { - /* need to add it right after the java command */ - opal_asprintf(&value, "-Djava.library.path=%s", opal_install_dirs.libdir); - opal_argv_insert_element(&app->argv, 1, value); - free(value); - } - - /* see if we were given a class path */ - found = false; - for (i=1; NULL != app->argv[i]; i++) { - if (NULL != strstr(app->argv[i], "cp") || - NULL != strstr(app->argv[i], "classpath")) { - /* yep - but does it include the path to the mpi libs? */ - found = true; - /* check if mpi.jar exists - if so, add it */ - value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL); - if (access(value, F_OK ) != -1) { - set_classpath_jar_file(app, i+1, "mpi.jar"); - } - free(value); - /* check for oshmem support */ - value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL); - if (access(value, F_OK ) != -1) { - set_classpath_jar_file(app, i+1, "shmem.jar"); - } - free(value); - /* always add the local directory */ - opal_asprintf(&value, "%s:%s", app->cwd, app->argv[i+1]); - free(app->argv[i+1]); - app->argv[i+1] = value; - break; - } - } - if (!found) { - /* check to see if CLASSPATH is in the environment */ - found = false; // just to be pedantic - for (i=0; NULL != environ[i]; i++) { - if (0 == strncmp(environ[i], "CLASSPATH", strlen("CLASSPATH"))) { - value = strchr(environ[i], '='); - ++value; /* step over the = */ - opal_argv_insert_element(&app->argv, 1, value); - /* check for mpi.jar */ - value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL); - if (access(value, F_OK ) != -1) { - set_classpath_jar_file(app, 1, "mpi.jar"); - } - free(value); - /* check for shmem.jar */ - value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL); - if (access(value, F_OK ) != -1) { - set_classpath_jar_file(app, 1, "shmem.jar"); - } - free(value); - /* always add the local directory */ - opal_asprintf(&value, "%s:%s", app->cwd, app->argv[1]); - free(app->argv[1]); - app->argv[1] = value; - opal_argv_insert_element(&app->argv, 1, "-cp"); - found = true; - break; - } - } - if (!found) { - /* need to add it right after the java command - have - * to include the working directory and trust that - * the user set cwd if necessary - */ - char *str, *str2; - /* always start with the working directory */ - str = strdup(app->cwd); - /* check for mpi.jar */ - value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL); - if (access(value, F_OK ) != -1) { - opal_asprintf(&str2, "%s:%s", str, value); - free(str); - str = str2; - } - free(value); - /* check for shmem.jar */ - value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL); - if (access(value, F_OK ) != -1) { - opal_asprintf(&str2, "%s:%s", str, value); - free(str); - str = str2; - } - free(value); - opal_argv_insert_element(&app->argv, 1, str); - free(str); - opal_argv_insert_element(&app->argv, 1, "-cp"); - } - } - /* try to find the actual command - may not be perfect */ - for (i=1; i < opal_argv_count(app->argv); i++) { - if (NULL != strstr(app->argv[i], "java.library.path")) { - continue; - } else if (NULL != strstr(app->argv[i], "cp") || - NULL != strstr(app->argv[i], "classpath")) { - /* skip the next field */ - i++; - continue; - } - /* declare this the winner */ - opal_setenv("OMPI_COMMAND", app->argv[i], true, &app->env); - /* collect everything else as the cmd line */ - if ((i+1) < opal_argv_count(app->argv)) { - value = opal_argv_join(&app->argv[i+1], ' '); - opal_setenv("OMPI_ARGV", value, true, &app->env); - free(value); - } - break; - } - } else { - /* add the cmd to the environment for MPI_Info to pickup */ - opal_setenv("OMPI_COMMAND", appname, true, &app->env); - if (1 < opal_argv_count(app->argv)) { - value = opal_argv_join(&app->argv[1], ' '); - opal_setenv("OMPI_ARGV", value, true, &app->env); - free(value); - } - } - - *app_ptr = app; - app = NULL; - *made_app = true; - - /* All done */ - - cleanup: - if (NULL != app) { - OBJ_RELEASE(app); - } - if (NULL != appname) { - free(appname); - } - return rc; -} - -static void set_classpath_jar_file(opal_pmix_app_t *app, int index, char *jarfile) -{ - if (NULL == strstr(app->argv[index], jarfile)) { - /* nope - need to add it */ - char *fmt = ':' == app->argv[index][strlen(app->argv[index]-1)] - ? "%s%s/%s" : "%s:%s/%s"; - char *str; - opal_asprintf(&str, fmt, app->argv[index], opal_install_dirs.libdir, jarfile); - free(app->argv[index]); - app->argv[index] = str; - } -} diff --git a/orte/tools/ompi-prun/prun.h b/orte/tools/ompi-prun/prun.h deleted file mode 100644 index eb86cc6d00..0000000000 --- a/orte/tools/ompi-prun/prun.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. - * All rights reserved - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef PRUN_H -#define PRUN_H - -#include "orte_config.h" - -BEGIN_C_DECLS - -/** - * Main body of prun functionality - */ -int prun(int argc, char *argv[]); - -END_C_DECLS - -#endif /* ORTERUN_ORTERUN_H */ diff --git a/orte/tools/orte-dvm/Makefile.am b/orte/tools/orte-dvm/Makefile.am deleted file mode 100644 index 3723b846cd..0000000000 --- a/orte/tools/orte-dvm/Makefile.am +++ /dev/null @@ -1,57 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2015 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# This is not quite in the Automake spirit, but we have to do it. -# Since the totalview portion of the library must be built with -g, we -# must eliminate the CFLAGS that are passed in here by default (which -# may already have debugging and/or optimization flags). We use -# post-processed forms of the CFLAGS in the library targets down -# below. - -CFLAGS = $(CFLAGS_WITHOUT_OPTFLAGS) $(DEBUGGER_CFLAGS) - -include $(top_srcdir)/Makefile.ompi-rules - -man_pages = orte-dvm.1 -EXTRA_DIST = $(man_pages:.1=.1in) - -if OPAL_INSTALL_BINARIES - -bin_PROGRAMS = orte-dvm - -nodist_man_MANS = $(man_pages) - -# Ensure that the man pages are rebuilt if the opal_config.h file -# changes; a "good enough" way to know if configure was run again (and -# therefore the release date or version may have changed) -$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h - -endif # OPAL_INSTALL_BINARIES - -orte_dvm_SOURCES = \ - orte-dvm.c - -orte_dvm_LDADD = \ - $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \ - $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la - -distclean-local: - rm -f $(man_pages) diff --git a/orte/tools/orte-dvm/orte-dvm.1in b/orte/tools/orte-dvm/orte-dvm.1in deleted file mode 100644 index d4d74df913..0000000000 --- a/orte/tools/orte-dvm/orte-dvm.1in +++ /dev/null @@ -1,193 +0,0 @@ -.\” -*- nroff -*- -.\" Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. -.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. -.\” Copyright (c) 2015 Intel, Inc. All rights reserved -.\" $COPYRIGHT$ -.\" -.\" Man page for ORTE's orte-dvm command -.\" -.\" .TH name section center-footer left-footer center-header -.TH ORTE-DVM 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" -.\" ************************** -.\" Name Section -.\" ************************** -.SH NAME -. -orte-dvm, ompi_dvm \- Establish a Distributed Virtual Machine (DVM). - -.B Note: -\fIorte-dvm\fP and \fIompi-dvm\fP are synonyms for each -other. Using either of the names will produce the same behavior. -. -.\" ************************** -.\" Synopsis Section -.\" ************************** -.SH SYNOPSIS -. -.PP -.B orte-dvm -[ options ] -.P - -Invoking \fIorte-dvm\fP via an absolute path -name is equivalent to specifying the \fI--prefix\fP option with a -\fI\fR value equivalent to the directory where \fIorte-dvm\fR -resides, minus its last subdirectory. For example: - - \fB%\fP /usr/local/bin/orte-dvm ... - -is equivalent to - - \fB%\fP orte-dvm --prefix /usr/local - -. -.\" ************************** -.\" Quick Summary Section -.\" ************************** -.SH QUICK SUMMARY -. -\fIorte-dvm\fP will establish a DVM that can be used to execute subsequent -applications. Use of \fIorte-dvm\fP can be advantageous, for example, when you want to -execute a number of short-lived tasks. In such cases, the time required to start -the ORTE DVM can be a significant fraction of the time to execute the -overall application. Thus, creating a persistent DVM can speed the overall -execution. In addition, a persistent DVM will support executing multiple parallel -applications while maintaining separation between their respective cores. -.\" ************************** -.\" Options Section -.\" ************************** -.SH OPTIONS -. -.\" -.\" Start options listing -.\" Indent 10 characters from start of first column to start of second column -. -.TP -.B -h\fR,\fP --help -Display help for this command -. -. -.TP -.B -V\fR,\fP --version -Print version number. If no other arguments are given, this will also -cause orte-dvm to exit. -. -. -.P -Use one of the following options to specify which hosts (nodes) of the cluster to use -for the DVM. -. -. -.TP -.B -H\fR,\fP -host\fR,\fP --host \fR\fP -List of hosts for the DVM. -. -. -.TP -.B --hostfile\fR,\fP --hostfile \fR\fP -Provide a hostfile to use. -. -. -.TP -.B -machinefile\fR,\fP --machinefile \fR\fP -Synonym for \fI-hostfile\fP. -. -. -.TP -.B --prefix \fR\fP -Prefix directory that will be used to set the \fIPATH\fR and -\fILD_LIBRARY_PATH\fR on the remote node before invoking the ORTE daemon. -. -. -..P -Setting MCA parameters: -. -. -.TP -.B -gmca\fR,\fP --gmca \fR \fP -Pass global MCA parameters that are applicable to all contexts. \fI\fP is -the parameter name; \fI\fP is the parameter value. -. -. -.TP -.B -mca\fR,\fP --mca -Send arguments to various MCA modules. See the "MCA" section, below. -. -. -. -. -.TP -.B -report-uri\fR,\fP --report-uri -Print out orte-dvm's URI during startup. The channel must be either a '-' to indicate that -the URI is to be output to stdout, a '+' to indicate that the URI is to be output to stderr, -or a filename to which the URI is to be written. -. -. -.P -The following options are useful for developers; they are not generally -useful to most ORTE and/or MPI users: -. -.TP -.B -d\fR,\fP --debug-devel -Enable debugging of the ORTE layer. -. -. -.TP -.B --debug-daemons-file -Enable debugging of the ORTE daemons in the DVM, storing -output in files. -. -. -.P -There may be other options listed with \fIorte-dvm --help\fP. -. -. -.\" ************************** -.\" Description Section -.\" ************************** -.SH DESCRIPTION -. -\fIorte-dvm\fP starts a Distributed Virtual Machine (DVM) by launching -a daemon on each node of the allocation, as modified or specified by -the \fI-host\fP and \fI-hostfile\fP options. Applications can subsequently -be executed using the \fIorte-submit\fP command. -. -The DVM remains in operation until receiving the \fIorte-submit -terminate\fP -command. -. -. -. -.SS Specifying Host Nodes -. -Host nodes can be identified on the \fIorte-dvm\fP command line with the \fI-host\fP -option or in a hostfile. -. -.PP -For example, -. -.TP 4 -orte-dvm -H aa,aa,bb ./a.out -launches two processes on node aa and one on bb. -. -.PP -Or, consider the hostfile -. - - \fB%\fP cat myhostfile - aa slots=2 - bb slots=2 - cc slots=2 - -. -.PP -Here, we list both the host names (aa, bb, and cc) but also how many "slots" -there are for each. Slots indicate how many processes can potentially execute -on a node. For best performance, the number of slots may be chosen to be the -number of cores on the node or the number of processor sockets. If the hostfile -does not provide slots information, a default of 1 is assumed. -When running under resource managers (e.g., SLURM, Torque, etc.), -Open MPI will obtain both the hostnames and the number of slots directly -from the resource manger. -. -. diff --git a/orte/tools/orte-dvm/orte-dvm.c b/orte/tools/orte-dvm/orte-dvm.c deleted file mode 100644 index 522c539af3..0000000000 --- a/orte/tools/orte-dvm/orte-dvm.c +++ /dev/null @@ -1,482 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2008 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include -#include -#include -#ifdef HAVE_STRINGS_H -#include -#endif /* HAVE_STRINGS_H */ -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_PARAM_H -#include -#endif -#include -#include -#include -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_SYS_WAIT_H -#include -#endif /* HAVE_SYS_WAIT_H */ -#ifdef HAVE_SYS_TIME_H -#include -#endif /* HAVE_SYS_TIME_H */ -#include -#ifdef HAVE_SYS_STAT_H -#include -#endif - -#include "opal/mca/event/event.h" -#include "opal/mca/installdirs/installdirs.h" -#include "opal/mca/base/base.h" -#include "opal/mca/pmix/pmix.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/util/basename.h" -#include "opal/util/cmd_line.h" -#include "opal/util/opal_environ.h" -#include "opal/util/opal_getcwd.h" -#include "opal/util/show_help.h" -#include "opal/util/fd.h" -#include "opal/util/daemon_init.h" - -#include "opal/version.h" -#include "opal/runtime/opal.h" -#include "opal/runtime/opal_info_support.h" -#include "opal/util/os_path.h" -#include "opal/util/path.h" -#include "opal/class/opal_pointer_array.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/grpcomm/grpcomm.h" -#include "orte/mca/odls/odls.h" -#include "orte/mca/oob/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/base/rml_contact.h" -#include "orte/mca/state/state.h" - -#include "orte/runtime/runtime.h" -#include "orte/runtime/orte_globals.h" -#include "orte/util/show_help.h" -#include "orte/util/threads.h" - -#include "orte/orted/orted.h" - -/* - * Globals - */ -static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT; - -/* - * Globals - */ -static struct { - bool help; - bool version; - char *prefix; - bool run_as_root; - bool set_sid; - bool daemonize; - bool system_server; - char *report_uri; - bool remote_connections; -} myglobals; - -static opal_cmd_line_init_t cmd_line_init[] = { - /* Various "obvious" options */ - { NULL, 'h', NULL, "help", 0, - &myglobals.help, OPAL_CMD_LINE_TYPE_BOOL, - "This help message" }, - { NULL, 'V', NULL, "version", 0, - &myglobals.version, OPAL_CMD_LINE_TYPE_BOOL, - "Print version and exit" }, - - { NULL, '\0', "prefix", "prefix", 1, - &myglobals.prefix, OPAL_CMD_LINE_TYPE_STRING, - "Prefix to be used to look for ORTE executables" }, - - { "orte_daemonize", '\0', NULL, "daemonize", 0, - &myglobals.daemonize, OPAL_CMD_LINE_TYPE_BOOL, - "Daemonize the orte-dvm into the background" }, - - { NULL, '\0', NULL, "set-sid", 0, - &myglobals.set_sid, OPAL_CMD_LINE_TYPE_BOOL, - "Direct the orte-dvm to separate from the current session"}, - - { "orte_debug_daemons", '\0', "debug-daemons", "debug-daemons", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Debug daemons" }, - - { "orte_debug", 'd', "debug-devel", "debug-devel", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Enable debugging of OpenRTE" }, - - { NULL, '\0', "allow-run-as-root", "allow-run-as-root", 0, - &myglobals.run_as_root, OPAL_CMD_LINE_TYPE_BOOL, - "Allow execution as root (STRONGLY DISCOURAGED)" }, - - /* Specify the launch agent to be used */ - { "orte_launch_agent", '\0', "launch-agent", "launch-agent", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Command used to start processes on remote nodes (default: orted)" }, - - /* maximum size of VM - typically used to subdivide an allocation */ - { "orte_max_vm_size", '\0', "max-vm-size", "max-vm-size", 1, - NULL, OPAL_CMD_LINE_TYPE_INT, - "Maximum size of VM" }, - - /* Set a hostfile */ - { NULL, '\0', "hostfile", "hostfile", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide a hostfile" }, - { NULL, '\0', "machinefile", "machinefile", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide a hostfile" }, - { "orte_default_hostfile", '\0', "default-hostfile", "default-hostfile", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide a default hostfile" }, - - { NULL, 'H', "host", "host", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "List of hosts to invoke processes on" }, - - { NULL, '\0', "system-server", "system-server", 0, - &myglobals.system_server, OPAL_CMD_LINE_TYPE_BOOL, - "Provide a system-level server connection point - only one allowed per node" }, - - { NULL, '\0', "report-uri", "report-uri", 1, - &myglobals.report_uri, OPAL_CMD_LINE_TYPE_STRING, - "Printout URI on stdout [-], stderr [+], or a file [anything else]", - OPAL_CMD_LINE_OTYPE_DEBUG }, - - { NULL, '\0', "remote-tools", "remote-tools", 0, - &myglobals.remote_connections, OPAL_CMD_LINE_TYPE_BOOL, - "Enable connections from remote tools" }, - - /* End of list */ - { NULL, '\0', NULL, NULL, 0, - NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } -}; - -int main(int argc, char *argv[]) -{ - int rc, i, j; - opal_cmd_line_t cmd_line; - char *param, *value; - orte_job_t *jdata=NULL; - orte_app_context_t *app; - - /* Setup and parse the command line */ - memset(&myglobals, 0, sizeof(myglobals)); - /* find our basename (the name of the executable) so that we can - use it in pretty-print error messages */ - orte_basename = opal_basename(argv[0]); - - opal_cmd_line_create(&cmd_line, cmd_line_init); - mca_base_cmd_line_setup(&cmd_line); - if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(&cmd_line, true, false, - argc, argv)) ) { - if (OPAL_ERR_SILENT != rc) { - fprintf(stderr, "%s: command line error (%s)\n", argv[0], - opal_strerror(rc)); - } - return rc; - } - - /* print version if requested. Do this before check for help so - that --version --help works as one might expect. */ - if (myglobals.version) { - char *str; - str = opal_info_make_version_str("all", - OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION, - OPAL_GREEK_VERSION, - OPAL_REPO_REV); - if (NULL != str) { - fprintf(stdout, "%s %s\n\nReport bugs to %s\n", - orte_basename, str, PACKAGE_BUGREPORT); - free(str); - } - exit(0); - } - - /* check if we are running as root - if we are, then only allow - * us to proceed if the allow-run-as-root flag was given. Otherwise, - * exit with a giant warning flag - */ - if (0 == geteuid() && !myglobals.run_as_root) { - /* show_help is not yet available, so print an error manually */ - fprintf(stderr, "--------------------------------------------------------------------------\n"); - if (myglobals.help) { - fprintf(stderr, "%s cannot provide the help message when run as root.\n\n", orte_basename); - } else { - fprintf(stderr, "%s has detected an attempt to run as root.\n\n", orte_basename); - } - - fprintf(stderr, "Running at root is *strongly* discouraged as any mistake (e.g., in\n"); - fprintf(stderr, "defining TMPDIR) or bug can result in catastrophic damage to the OS\n"); - fprintf(stderr, "file system, leaving your system in an unusable state.\n\n"); - - fprintf(stderr, "We strongly suggest that you run %s as a non-root user.\n\n", orte_basename); - - fprintf(stderr, "You can override this protection by adding the --allow-run-as-root\n"); - fprintf(stderr, "option to your command line. However, we reiterate our strong advice\n"); - fprintf(stderr, "against doing so - please do so at your own risk.\n"); - fprintf(stderr, "--------------------------------------------------------------------------\n"); - exit(1); - } - - /* - * Since this process can now handle MCA/GMCA parameters, make sure to - * process them. - * NOTE: It is "safe" to call mca_base_cmd_line_process_args() before - * opal_init_util() since mca_base_cmd_line_process_args() does *not* - * depend upon opal_init_util() functionality. - */ - if (OPAL_SUCCESS != mca_base_cmd_line_process_args(&cmd_line, &environ, &environ)) { - exit(1); - } - - /* Need to initialize OPAL so that install_dirs are filled in */ - if (OPAL_SUCCESS != opal_init(&argc, &argv)) { - exit(1); - } - - /* Check for help request */ - if (myglobals.help) { - char *str, *args = NULL; - char *project_name = NULL; - if (0 == strcmp(orte_basename, "mpirun")) { - project_name = "Open MPI"; - } else { - project_name = "OpenRTE"; - } - args = opal_cmd_line_get_usage_msg(&cmd_line); - str = opal_show_help_string("help-orterun.txt", "orterun:usage", false, - orte_basename, project_name, OPAL_VERSION, - orte_basename, args, - PACKAGE_BUGREPORT); - if (NULL != str) { - printf("%s", str); - free(str); - } - free(args); - - /* If someone asks for help, that should be all we do */ - exit(0); - } - - if (myglobals.system_server) { - /* we should act as system-level PMIx server */ - opal_setenv(OPAL_MCA_PREFIX"pmix_system_server", "1", true, &environ); - } - /* always act as session-level PMIx server */ - opal_setenv(OPAL_MCA_PREFIX"pmix_session_server", "1", true, &environ); - /* if we were asked to report a uri, set the MCA param to do so */ - if (NULL != myglobals.report_uri) { - opal_setenv("PMIX_MCA_ptl_tcp_report_uri", myglobals.report_uri, true, &environ); - } - if (myglobals.remote_connections) { - opal_setenv("PMIX_MCA_ptl_tcp_remote_connections", "1", true, &environ); - } - - /* Setup MCA params */ - orte_register_params(); - - /* save the environment for launch purposes. This MUST be - * done so that we can pass it to any local procs we - * spawn - otherwise, those local procs won't see any - * non-MCA envars were set in the enviro prior to calling - * orterun - */ - orte_launch_environ = opal_argv_copy(environ); - -#if defined(HAVE_SETSID) - /* see if we were directed to separate from current session */ - if (myglobals.set_sid) { - setsid(); - } -#endif - - /* detach from controlling terminal - * otherwise, remain attached so output can get to us - */ - if(!orte_debug_flag && - !orte_debug_daemons_flag && - myglobals.daemonize) { - opal_daemon_init(NULL); - } - - /* Intialize our Open RTE environment */ - if (ORTE_SUCCESS != (rc = orte_init(&argc, &argv, ORTE_PROC_MASTER))) { - /* cannot call ORTE_ERROR_LOG as it could be the errmgr - * never got loaded! - */ - return rc; - } - /* finalize OPAL. As it was opened again from orte_init->opal_init - * we continue to have a reference count on it. So we have to finalize it twice... - */ - opal_finalize(); - - /* get the daemon job object - was created by ess/hnp component */ - if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { - orte_show_help("help-orterun.txt", "bad-job-object", true, - orte_basename); - exit(0); - } - /* also should have created a daemon "app" */ - if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) { - orte_show_help("help-orterun.txt", "bad-app-object", true, - orte_basename); - exit(0); - } - - /* Did the user specify a prefix, or want prefix by default? */ - if (opal_cmd_line_is_taken(&cmd_line, "prefix") || want_prefix_by_default) { - size_t param_len; - /* if both the prefix was given and we have a prefix - * given above, check to see if they match - */ - if (opal_cmd_line_is_taken(&cmd_line, "prefix") && - NULL != myglobals.prefix) { - /* if they don't match, then that merits a warning */ - param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0)); - /* ensure we strip any trailing '/' */ - if (0 == strcmp(OPAL_PATH_SEP, &(param[strlen(param)-1]))) { - param[strlen(param)-1] = '\0'; - } - value = strdup(myglobals.prefix); - if (0 == strcmp(OPAL_PATH_SEP, &(value[strlen(value)-1]))) { - value[strlen(value)-1] = '\0'; - } - if (0 != strcmp(param, value)) { - orte_show_help("help-orterun.txt", "orterun:app-prefix-conflict", - true, orte_basename, value, param); - /* let the global-level prefix take precedence since we - * know that one is being used - */ - free(param); - param = strdup(myglobals.prefix); - } - free(value); - } else if (NULL != myglobals.prefix) { - param = myglobals.prefix; - } else if (opal_cmd_line_is_taken(&cmd_line, "prefix")){ - /* must be --prefix alone */ - param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0)); - } else { - /* --enable-orterun-prefix-default was given to orterun */ - param = strdup(opal_install_dirs.prefix); - } - - if (NULL != param) { - /* "Parse" the param, aka remove superfluous path_sep. */ - param_len = strlen(param); - while (0 == strcmp (OPAL_PATH_SEP, &(param[param_len-1]))) { - param[param_len-1] = '\0'; - param_len--; - if (0 == param_len) { - orte_show_help("help-orterun.txt", "orterun:empty-prefix", - true, orte_basename, orte_basename); - return ORTE_ERR_FATAL; - } - } - orte_set_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, ORTE_ATTR_GLOBAL, param, OPAL_STRING); - free(param); - } - } - - /* Did the user specify a hostfile. Need to check for both - * hostfile and machine file. - * We can only deal with one hostfile per app context, otherwise give an error. - */ - if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "hostfile"))) { - if(1 < j) { - orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles", - true, orte_basename, NULL); - return ORTE_ERR_FATAL; - } else { - value = opal_cmd_line_get_param(&cmd_line, "hostfile", 0, 0); - orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_LOCAL, value, OPAL_STRING); - } - } - if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "machinefile"))) { - if(1 < j || orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, NULL, OPAL_STRING)) { - orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles", - true, orte_basename, NULL); - return ORTE_ERR_FATAL; - } else { - value = opal_cmd_line_get_param(&cmd_line, "machinefile", 0, 0); - orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_LOCAL, value, OPAL_STRING); - } - } - - /* Did the user specify any hosts? */ - if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "host"))) { - char **targ=NULL, *tval; - for (i = 0; i < j; ++i) { - value = opal_cmd_line_get_param(&cmd_line, "host", i, 0); - opal_argv_append_nosize(&targ, value); - } - tval = opal_argv_join(targ, ','); - orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL, tval, OPAL_STRING); - opal_argv_free(targ); - free(tval); - } - OBJ_DESTRUCT(&cmd_line); - - /* setup to listen for commands sent specifically to me, even though I would probably - * be the one sending them! Unfortunately, since I am a participating daemon, - * there are times I need to send a command to "all daemons", and that means *I* have - * to receive it too - */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON, - ORTE_RML_PERSISTENT, orte_daemon_recv, NULL); - - /* spawn the DVM - we skip the initial steps as this - * isn't a user-level application */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ALLOCATE); - - /* loop the event lib until an exit event is detected */ - while (orte_event_base_active) { - opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); - } - ORTE_ACQUIRE_OBJECT(orte_event_base_active); - - /* cleanup and leave */ - orte_finalize(); - - if (orte_debug_flag) { - fprintf(stderr, "exiting with status %d\n", orte_exit_status); - } - exit(orte_exit_status); -}