diff --git a/.gitignore b/.gitignore
index d79ef330c4..ec6546a8de 100644
--- a/.gitignore
+++ b/.gitignore
@@ -476,6 +476,9 @@ orte/tools/orte-checkpoint/orte-checkpoint.1
 orte/tools/orte-checkpoint/ompi-checkpoint.1
 orte/tools/orte-clean/orte-clean
 orte/tools/orte-clean/orte-clean.1
+orte/tools/orte-dvm/orte-dvm
+orte/tools/orte-dvm/orte-dvm.1
+ompi/mca/rte/orte/ompi-dvm.1
 orte/tools/orte-info/orte-info
 orte/tools/orte-info/orte-info.1
 orte/tools/orte-migrate/orte-migrate
@@ -488,6 +491,9 @@ orte/tools/orte-restart/orte-restart.1
 orte/tools/orte-restart/ompi-restart.1
 orte/tools/orte-server/orte-server
 orte/tools/orte-server/orte-server.1
+orte/tools/orte-submit/orte-submit
+orte/tools/orte-submit/orte-submit.1
+ompi/mca/rte/orte/ompi-submit.1
 orte/tools/orte-top/orte-top
 orte/tools/orte-top/orte-top.1
 orte/tools/orted/orted
diff --git a/config/orte_config_files.m4 b/config/orte_config_files.m4
index a0e87b174b..dc87637d34 100644
--- a/config/orte_config_files.m4
+++ b/config/orte_config_files.m4
@@ -5,7 +5,8 @@
 #                         University Research and Technology
 #                         Corporation.  All rights reserved.
 # Copyright (c) 2011-2012 Los Alamos National Security, LLC.  All rights
-#                         reserved. 
+#                         reserved.
+# Copyright (c) 2015      Intel, Inc. All rights reserved
 # $COPYRIGHT$
 # 
 # Additional copyrights may follow
@@ -32,5 +33,7 @@ AC_DEFUN([ORTE_CONFIG_FILES],[
         orte/tools/orte-migrate/Makefile
         orte/tools/orte-info/Makefile
         orte/tools/orte-server/Makefile
+        orte/tools/orte-submit/Makefile
+        orte/tools/orte-dvm/Makefile
     ])
 ])
diff --git a/ompi/mca/rte/orte/Makefile.am b/ompi/mca/rte/orte/Makefile.am
index a5f57b1e48..f5c1f210a5 100644
--- a/ompi/mca/rte/orte/Makefile.am
+++ b/ompi/mca/rte/orte/Makefile.am
@@ -27,7 +27,7 @@ libmca_rte_orte_la_SOURCES =$(sources) $(headers)
 libmca_rte_orte_la_LDFLAGS = -module -avoid-version
 libmca_rte_orte_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la
 
-man_pages = mpirun.1 mpiexec.1 ompi-ps.1 ompi-clean.1 ompi-top.1 ompi-server.1
+man_pages = mpirun.1 mpiexec.1 ompi-ps.1 ompi-clean.1 ompi-top.1 ompi-server.1 ompi-dvm.1 ompi-submit.1
 
 if WANT_FT
 man_pages += ompi-checkpoint.1 ompi-restart.1
@@ -43,6 +43,8 @@ install-exec-hook:
 	(cd $(DESTDIR)$(bindir); rm -f ompi-clean$(EXEEXT); $(LN_S) orte-clean$(EXEEXT) ompi-clean$(EXEEXT))
 	(cd $(DESTDIR)$(bindir); rm -f ompi-top$(EXEEXT); $(LN_S) orte-top$(EXEEXT) ompi-top$(EXEEXT))
 	(cd $(DESTDIR)$(bindir); rm -f ompi-server$(EXEEXT); $(LN_S) orte-server$(EXEEXT) ompi-server$(EXEEXT))
+	(cd $(DESTDIR)$(bindir); rm -f ompi-dvm$(EXEEXT); $(LN_S) orte-dvm$(EXEEXT) ompi-dvm$(EXEEXT))
+	(cd $(DESTDIR)$(bindir); rm -f ompi-submit$(EXEEXT); $(LN_S) orte-submit$(EXEEXT) ompi-submit$(EXEEXT))
 if WANT_FT
 	(cd $(DESTDIR)$(bindir); rm -f ompi-checkpoint$(EXEEXT); $(LN_S) orte-checkpoint$(EXEEXT) ompi-checkpoint$(EXEEXT))
 	(cd $(DESTDIR)$(bindir); rm -f ompi-restart$(EXEEXT); $(LN_S) orte-restart$(EXEEXT) ompi-restart$(EXEEXT))
@@ -55,7 +57,9 @@ uninstall-local:
 		$(DESTDIR)$(bindir)/ompi-ps$(EXEEXT) \
 		$(DESTDIR)$(bindir)/ompi-clean$(EXEEXT) \
 		$(DESTDIR)$(bindir)/ompi-top$(EXEEXT) \
-		$(DESTDIR)$(bindir)/ompi-server$(EXEEXT)
+		$(DESTDIR)$(bindir)/ompi-server$(EXEEXT) \
+		$(DESTDIR)$(bindir)/ompi-dvm$(EXEEXT) \
+		$(DESTDIR)$(bindir)/ompi-submit$(EXEEXT)
 if WANT_FT
 	rm -f $(DESTDIR)$(bindir)/ompi-checkpoint$(EXEEXT) \
 		$(DESTDIR)$(bindir)/ompi-restart$(EXEEXT) \
@@ -115,5 +119,11 @@ $(top_builddir)/orte/tools/orte-server/orte-server.1:
 ompi-server.1: $(top_builddir)/orte/tools/orte-server/orte-server.1
 	cp -f $(top_builddir)/orte/tools/orte-server/orte-server.1 ompi-server.1
 
+ompi-dvm.1: $(top_builddir)/orte/tools/orte-dvm/orte-dvm.1
+	cp -f $(top_builddir)/orte/tools/orte-dvm/orte-dvm.1 ompi-dvm.1
+
+ompi-submit.1: $(top_builddir)/orte/tools/orte-submit/orte-submit.1
+	cp -f $(top_builddir)/orte/tools/orte-submit/orte-submit.1 ompi-submit.1
+
 clean-local:
 	rm -f $(man_pages)
diff --git a/orte/mca/ess/base/ess_base_std_tool.c b/orte/mca/ess/base/ess_base_std_tool.c
index a203f13699..571b54ee5d 100644
--- a/orte/mca/ess/base/ess_base_std_tool.c
+++ b/orte/mca/ess/base/ess_base_std_tool.c
@@ -11,7 +11,7 @@
  *                         All rights reserved.
  * Copyright (c) 2011-2013 Los Alamos National Security, LLC.
  *                         All rights reserved.
- * Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
+ * Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
  * Copyright (c) 2014      Hochschule Esslingen.  All rights reserved.
  *
  * $COPYRIGHT$
@@ -36,19 +36,22 @@
 #include "opal/mca/event/event.h"
 #include "opal/runtime/opal.h"
 #include "opal/runtime/opal_cr.h"
+#include "opal/runtime/opal_progress_threads.h"
 #include "opal/util/arch.h"
 #include "opal/util/proc.h"
 
 #include "orte/mca/oob/base/base.h"
+#include "orte/mca/plm/base/base.h"
 #include "orte/mca/rml/base/base.h"
 #include "orte/mca/routed/base/base.h"
-#include "orte/mca/errmgr/errmgr.h"
+#include "orte/mca/errmgr/base/base.h"
 #include "orte/mca/iof/base/base.h"
 #include "orte/mca/state/base/base.h"
 #if OPAL_ENABLE_FT_CR == 1
 #include "orte/mca/snapc/base/base.h"
 #include "orte/mca/sstore/base/base.h"
 #endif
+#include "orte/mca/schizo/base/base.h"
 #include "orte/util/proc_info.h"
 #include "orte/util/session_dir.h"
 #include "orte/util/show_help.h"
@@ -59,6 +62,8 @@
 
 #include "orte/mca/ess/base/base.h"
 
+static bool progress_thread_running = false;
+
 int orte_ess_base_tool_setup(void)
 {
     int ret;
@@ -79,6 +84,9 @@ int orte_ess_base_tool_setup(void)
          * so it will do the right things.
          */
         orte_process_info.proc_type |= ORTE_PROC_NON_MPI;
+        /* get a separate orte event base */
+        orte_event_base = opal_start_progress_thread("orte", true);
+        progress_thread_running = true;
     }
     
     /* open and setup the state machine */
@@ -93,6 +101,18 @@ int orte_ess_base_tool_setup(void)
         goto error;
     }
 
+    /* open and setup the error manager */
+    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
+        ORTE_ERROR_LOG(ret);
+        error = "orte_errmgr_base_open";
+        goto error;
+    }
+    if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
+        ORTE_ERROR_LOG(ret);
+        error = "orte_errmgr_base_select";
+        goto error;
+    }
+
     /* Setup the communication infrastructure */
     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
         ORTE_ERROR_LOG(ret);
@@ -177,6 +197,15 @@ int orte_ess_base_tool_setup(void)
             error = "orte_iof_base_select";
             goto error;
         }
+        /* if we were given an HNP, then also setup the PLM in case this
+         * tool wants to request that we spawn something for it */
+        if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_plm_base_framework, 0))) {
+            ORTE_ERROR_LOG(ret);
+            error = "orte_plm_base_open";
+            goto error;
+        }
+        /* we don't select the plm framework as we only want the
+         * base proxy functions */
     }
     
 #if OPAL_ENABLE_FT_CR == 1
@@ -208,7 +237,19 @@ int orte_ess_base_tool_setup(void)
     /* Tools do not need all the OPAL CR stuff */
     opal_cr_set_enabled(false);
 #endif
-    
+
+    /* setup schizo in case we are parsing cmd lines */
+    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) {
+        ORTE_ERROR_LOG(ret);
+        error = "orte_schizo_base_open";
+        goto error;
+    }
+    if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) {
+        ORTE_ERROR_LOG(ret);
+        error = "orte_schizo_base_select";
+        goto error;
+    }
+
     return ORTE_SUCCESS;
     
  error:
@@ -237,6 +278,13 @@ int orte_ess_base_tool_finalize(void)
     }
     (void) mca_base_framework_close(&orte_routed_base_framework);
     (void) mca_base_framework_close(&orte_rml_base_framework);
+    (void) mca_base_framework_close(&orte_schizo_base_framework);
+    (void) mca_base_framework_close(&orte_errmgr_base_framework);
 
+    /* release the event base */
+    if (progress_thread_running) {
+        opal_stop_progress_thread("orte", true);
+        progress_thread_running = false;
+    }
     return ORTE_SUCCESS;    
 }
diff --git a/orte/mca/ess/env/ess_env_component.c b/orte/mca/ess/env/ess_env_component.c
index 547dfb2d3d..3dc05c6b36 100644
--- a/orte/mca/ess/env/ess_env_component.c
+++ b/orte/mca/ess/env/ess_env_component.c
@@ -66,21 +66,11 @@ orte_ess_env_component_open(void)
 
 int orte_ess_env_component_query(mca_base_module_t **module, int *priority)
 {
-    /* we are the env module, so set the priority to
-     * be higher than the tool component so that a
-     * tool launched as a distributed set of procs
-     * (i.e., a "tool with name") will select this
-     * module, but low enough that any other environment
-     * will override us
-     */
-
-    /* if we don't have a path back to the HNP, then we
-     * were not launched by mpirun, so don't pick us as
-     * it would be impossible for the correct env vars
-     * to have been set!
-     */
-    if (NULL != orte_process_info.my_hnp_uri) {
-        *priority = 20;
+    /* we are the env module, only used by daemons that are
+     * launched by ssh so allow any enviro-specifc modules
+     * to override us */
+    if (ORTE_PROC_IS_DAEMON) {
+        *priority = 1;
         *module = (mca_base_module_t *)&orte_ess_env_module;
         return ORTE_SUCCESS;
     }
diff --git a/orte/mca/ess/env/ess_env_module.c b/orte/mca/ess/env/ess_env_module.c
index c0fdd96f04..534ae40fdb 100644
--- a/orte/mca/ess/env/ess_env_module.c
+++ b/orte/mca/ess/env/ess_env_module.c
@@ -112,64 +112,22 @@ static int rte_init(void)
     /* if I am a daemon, complete my setup using the
      * default procedure
      */
-    if (ORTE_PROC_IS_DAEMON) {
-        if (NULL != orte_node_regex) {
-            /* extract the nodes */
-            if (ORTE_SUCCESS != (ret = orte_regex_extract_node_names(orte_node_regex, &hosts))) {
-                error = "orte_regex_extract_node_names";
-                goto error;
-            }
-        }
-        if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) {
-            ORTE_ERROR_LOG(ret);
-            error = "orte_ess_base_orted_setup";
+    if (NULL != orte_node_regex) {
+        /* extract the nodes */
+        if (ORTE_SUCCESS != (ret = orte_regex_extract_node_names(orte_node_regex, &hosts))) {
+            error = "orte_regex_extract_node_names";
             goto error;
         }
-        opal_argv_free(hosts);
-        return ORTE_SUCCESS;
     }
-    
-    if (ORTE_PROC_IS_TOOL) {
-        /* otherwise, if I am a tool proc, use that procedure */
-        if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
-            ORTE_ERROR_LOG(ret);
-            error = "orte_ess_base_tool_setup";
-            goto error;
-        }
-        return ORTE_SUCCESS;
-        
-    }
-    
-    /* use the default procedure to finish my setup */
-    if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(true))) {
+    if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) {
         ORTE_ERROR_LOG(ret);
-        error = "orte_ess_base_app_setup";
+        error = "orte_ess_base_orted_setup";
         goto error;
     }
-    
-    /* setup process binding */
-    if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) {
-        error = "proc_binding";
-        goto error;
-    }
-
-    /* if we are an ORTE app - and not an MPI app - then
-     * we need to exchange our connection info here.
-     * MPI_Init has its own modex, so we don't need to do
-     * two of them. However, if we don't do a modex at all,
-     * then processes have no way to communicate
-     *
-     * NOTE: only do this when the process originally launches.
-     * Cannot do this on a restart as the rest of the processes
-     * in the job won't be executing this step, so we would hang
-     */
-    if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) {
-        opal_pmix.fence(NULL, 0);
-    }
-    
+    opal_argv_free(hosts);
     return ORTE_SUCCESS;
 
-error:
+ error:
     if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
         orte_show_help("help-orte-runtime.txt",
                        "orte_init:startup:internal-failure",
@@ -183,29 +141,10 @@ static int rte_finalize(void)
 {
     int ret;
 
-    /* if I am a daemon, finalize using the default procedure */
-    if (ORTE_PROC_IS_DAEMON) {
-        if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
-            ORTE_ERROR_LOG(ret);
-        }
-        return ret;
-    } else if (ORTE_PROC_IS_TOOL) {
-        /* otherwise, if I am a tool proc, use that procedure */
-        if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
-            ORTE_ERROR_LOG(ret);
-        }
-        /* as a tool, I didn't create a nidmap - so just return now */
-        return ret;
-    }
-
-    /* otherwise, I must be an application process
-     * use the default procedure to finish
-     */
-    if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
+    if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
         ORTE_ERROR_LOG(ret);
     }
-
-    return ORTE_SUCCESS;
+    return ret;
 }
 
 static int env_set_name(void)
diff --git a/orte/mca/ess/tool/ess_tool_module.c b/orte/mca/ess/tool/ess_tool_module.c
index 5949e18e76..035f0b8ee0 100644
--- a/orte/mca/ess/tool/ess_tool_module.c
+++ b/orte/mca/ess/tool/ess_tool_module.c
@@ -9,6 +9,7 @@
  *                         University of Stuttgart.  All rights reserved.
  * Copyright (c) 2004-2005 The Regents of the University of California.
  *                         All rights reserved.
+ * Copyright (c) 2015      Intel, Inc. All rights reserved.
  * $COPYRIGHT$
  * 
  * Additional copyrights may follow
@@ -19,6 +20,7 @@
 
 #include "orte_config.h"
 #include "orte/constants.h"
+#include "opal/hash_string.h"
 
 #include <sys/types.h>
 #include <stdio.h>
@@ -57,28 +59,50 @@ static int rte_init(void)
 {
     int ret;
     char *error = NULL;
-    
+    orte_jobid_t jobid;
+    orte_vpid_t vpid;
+
     /* run the prolog */
     if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
         error = "orte_ess_base_std_prolog";
         goto error;
     }
 
-    /* If we are a tool with no name, then responsibility for
-     * defining the name falls to the PLM component for our
-     * respective environment.
-     * Just call the base function for this.
-     *
-     * NOTE: Tools with names - i.e., tools consisting of a
-     * distributed set of processes - will select and use
-     * the appropriate enviro-specific module and -not- this one!
-     */
-    if (ORTE_SUCCESS != (ret = orte_plm_base_set_hnp_name())) {
-        ORTE_ERROR_LOG(ret);
-        error = "orte_plm_base_set_hnp_name";
-        goto error;
+    
+    if (NULL != orte_ess_base_jobid &&
+        NULL != orte_ess_base_vpid) {
+        opal_output_verbose(2, orte_ess_base_framework.framework_output,
+                            "ess:tool:obtaining name from environment");
+        if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) {
+            return(ret);
+        }
+        ORTE_PROC_MY_NAME->jobid = jobid;
+        if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_vpid(&vpid, orte_ess_base_vpid))) {
+            return(ret);
+        }
+        ORTE_PROC_MY_NAME->vpid = vpid;
+    } else {
+        /* If we are a tool with no name, then define it here */
+        uint16_t jobfam;
+        uint32_t hash32;
+        uint32_t bias;
+    
+        opal_output_verbose(2, orte_ess_base_framework.framework_output,
+                            "ess:tool:computing name");
+        /* hash the nodename */
+        OPAL_HASH_STR(orte_process_info.nodename, hash32);
+        bias = (uint32_t)orte_process_info.pid;
+        /* fold in the bias */
+        hash32 = hash32 ^ bias;
+    
+        /* now compress to 16-bits */
+        jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32));
+    
+        /* set the name */
+        ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16);
+        ORTE_PROC_MY_NAME->vpid = 0;
     }
-
+    
     /* do the rest of the standard tool init */
     if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
         ORTE_ERROR_LOG(ret);
@@ -88,7 +112,7 @@ static int rte_init(void)
 
     return ORTE_SUCCESS;        
 
-error:
+ error:
     if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
         orte_show_help("help-orte-runtime.txt",
                        "orte_init:startup:internal-failure",
diff --git a/orte/mca/plm/base/plm_base_receive.c b/orte/mca/plm/base/plm_base_receive.c
index 8e70ee452c..64ac6749be 100644
--- a/orte/mca/plm/base/plm_base_receive.c
+++ b/orte/mca/plm/base/plm_base_receive.c
@@ -164,28 +164,26 @@ void orte_plm_base_recv(int status, orte_process_name_t* sender,
         jdata->originator.vpid = sender->vpid;
 
         /* get the parent's job object */
-        if (NULL == (parent = orte_get_job_data_object(sender->jobid))) {
-            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
-            goto ANSWER_LAUNCH;
+        if (NULL != (parent = orte_get_job_data_object(sender->jobid))) {
+            /* if the prefix was set in the parent's job, we need to transfer
+             * that prefix to the child's app_context so any further launch of
+             * orteds can find the correct binary. There always has to be at
+             * least one app_context in both parent and child, so we don't
+             * need to check that here. However, be sure not to overwrite
+             * the prefix if the user already provided it!
+             */
+            app = (orte_app_context_t*)opal_pointer_array_get_item(parent->apps, 0);
+            child_app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0);
+            prefix_dir = NULL;
+            if (orte_get_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, (void**)&prefix_dir, OPAL_STRING) &&
+                !orte_get_attribute(&child_app->attributes, ORTE_APP_PREFIX_DIR, NULL, OPAL_STRING)) {
+                orte_set_attribute(&child_app->attributes, ORTE_APP_PREFIX_DIR, ORTE_ATTR_GLOBAL, prefix_dir, OPAL_STRING);
+            }
+            if (NULL != prefix_dir) {
+                free(prefix_dir);
+            }
         }
-       /* if the prefix was set in the parent's job, we need to transfer
-         * that prefix to the child's app_context so any further launch of
-         * orteds can find the correct binary. There always has to be at
-         * least one app_context in both parent and child, so we don't
-         * need to check that here. However, be sure not to overwrite
-         * the prefix if the user already provided it!
-         */
-        app = (orte_app_context_t*)opal_pointer_array_get_item(parent->apps, 0);
-        child_app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0);
-        prefix_dir = NULL;
-        if (orte_get_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, (void**)&prefix_dir, OPAL_STRING) &&
-            !orte_get_attribute(&child_app->attributes, ORTE_APP_PREFIX_DIR, NULL, OPAL_STRING)) {
-            orte_set_attribute(&child_app->attributes, ORTE_APP_PREFIX_DIR, ORTE_ATTR_GLOBAL, prefix_dir, OPAL_STRING);
-        }
-        if (NULL != prefix_dir) {
-            free(prefix_dir);
-        }
-
+        
         /* if the user asked to forward any envars, cycle through the app contexts
          * in the comm_spawn request and add them
          */
@@ -210,18 +208,20 @@ void orte_plm_base_recv(int status, orte_process_name_t* sender,
             goto ANSWER_LAUNCH;
         }
 
-        if( NULL == parent->bookmark ) {
-            /* find the sender's node in the job map */
-            if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(parent->procs, sender->vpid))) {
-                /* set the bookmark so the child starts from that place - this means
-                 * that the first child process could be co-located with the proc
-                 * that called comm_spawn, assuming slots remain on that node. Otherwise,
-                 * the procs will start on the next available node
-                 */
-                jdata->bookmark = proc->node;
+        if (NULL != parent) {
+            if (NULL == parent->bookmark) {
+                /* find the sender's node in the job map */
+                if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(parent->procs, sender->vpid))) {
+                    /* set the bookmark so the child starts from that place - this means
+                     * that the first child process could be co-located with the proc
+                     * that called comm_spawn, assuming slots remain on that node. Otherwise,
+                     * the procs will start on the next available node
+                     */
+                    jdata->bookmark = proc->node;
+                }
+            } else {
+                jdata->bookmark = parent->bookmark;
             }
-        } else {
-            jdata->bookmark = parent->bookmark;
         }
 
         /* launch it */
diff --git a/orte/mca/rml/oob/rml_oob_send.c b/orte/mca/rml/oob/rml_oob_send.c
index 8addff084d..3331856b70 100644
--- a/orte/mca/rml/oob/rml_oob_send.c
+++ b/orte/mca/rml/oob/rml_oob_send.c
@@ -270,7 +270,7 @@ int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
         return ORTE_ERR_BAD_PARAM;
     }
 
-    if( NULL == peer ||
+    if (NULL == peer ||
         OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer) ) {
         /* cannot send to an invalid peer */
         ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
diff --git a/orte/mca/routed/direct/routed_direct.c b/orte/mca/routed/direct/routed_direct.c
index 97dc58b739..639263678f 100644
--- a/orte/mca/routed/direct/routed_direct.c
+++ b/orte/mca/routed/direct/routed_direct.c
@@ -71,6 +71,7 @@ orte_routed_module_t orte_routed_direct_module = {
 #endif
 };
 
+static orte_process_name_t mylifeline;
 static orte_process_name_t *lifeline = NULL;
 static opal_list_t my_children;
 
@@ -509,7 +510,12 @@ static bool route_is_defined(const orte_process_name_t *target)
 
 static int set_lifeline(orte_process_name_t *proc)
 {
-    /* there is no lifeline */
+    OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
+                         "%s routed:direct: set lifeline to %s",
+                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
+                         ORTE_NAME_PRINT(proc)));
+    mylifeline = *proc;
+    lifeline = &mylifeline;
     return ORTE_SUCCESS;
 }
 
diff --git a/orte/mca/state/dvm/Makefile.am b/orte/mca/state/dvm/Makefile.am
new file mode 100644
index 0000000000..e741d817fa
--- /dev/null
+++ b/orte/mca/state/dvm/Makefile.am
@@ -0,0 +1,34 @@
+#
+# Copyright (c) 2015      Intel, Inc. All rights reserved.
+# $COPYRIGHT$
+# 
+# Additional copyrights may follow
+# 
+# $HEADER$
+#
+
+sources = \
+        state_dvm.h \
+        state_dvm_component.c \
+        state_dvm.c
+
+# Make the output library in this directory, and name it either
+# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
+# (for static builds).
+
+if MCA_BUILD_orte_state_dvm_DSO
+component_noinst =
+component_install = mca_state_dvm.la
+else
+component_noinst = libmca_state_dvm.la
+component_install =
+endif
+
+mcacomponentdir = $(ortelibdir)
+mcacomponent_LTLIBRARIES = $(component_install)
+mca_state_dvm_la_SOURCES = $(sources)
+mca_state_dvm_la_LDFLAGS = -module -avoid-version
+
+noinst_LTLIBRARIES = $(component_noinst)
+libmca_state_dvm_la_SOURCES =$(sources)
+libmca_state_dvm_la_LDFLAGS = -module -avoid-version
diff --git a/orte/mca/state/dvm/state_dvm.c b/orte/mca/state/dvm/state_dvm.c
new file mode 100644
index 0000000000..b09859084d
--- /dev/null
+++ b/orte/mca/state/dvm/state_dvm.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright (c) 2015      Intel, Inc. All rights reserved
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+#include "orte_config.h"
+
+#include <sys/types.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif  /* HAVE_UNISTD_H */
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+
+#include "opal/util/output.h"
+
+#include "orte/mca/errmgr/errmgr.h"
+#include "orte/mca/filem/filem.h"
+#include "orte/mca/iof/iof.h"
+#include "orte/mca/plm/base/base.h"
+#include "orte/mca/ras/base/base.h"
+#include "orte/mca/rmaps/base/base.h"
+#include "orte/mca/routed/routed.h"
+#include "orte/util/session_dir.h"
+#include "orte/runtime/orte_quit.h"
+
+#include "orte/mca/state/state.h"
+#include "orte/mca/state/base/base.h"
+#include "orte/mca/state/base/state_private.h"
+#include "state_dvm.h"
+
+/*
+ * Module functions: Global
+ */
+static int init(void);
+static int finalize(void);
+
+/* local functions */
+static void vm_ready(int fd, short args, void *cbata);
+void check_complete(int fd, short args, void *cbdata);
+
+/******************
+ * DVM module - used when mpirun is persistent
+ ******************/
+orte_state_base_module_t orte_state_dvm_module = {
+    init,
+    finalize,
+    orte_state_base_activate_job_state,
+    orte_state_base_add_job_state,
+    orte_state_base_set_job_state_callback,
+    orte_state_base_set_job_state_priority,
+    orte_state_base_remove_job_state,
+    orte_state_base_activate_proc_state,
+    orte_state_base_add_proc_state,
+    orte_state_base_set_proc_state_callback,
+    orte_state_base_set_proc_state_priority,
+    orte_state_base_remove_proc_state
+};
+
+/* defined default state machine sequence - individual
+ * plm's must add a state for launching daemons
+ */
+static orte_job_state_t launch_states[] = {
+    ORTE_JOB_STATE_INIT,
+    ORTE_JOB_STATE_INIT_COMPLETE,
+    ORTE_JOB_STATE_ALLOCATE,
+    ORTE_JOB_STATE_ALLOCATION_COMPLETE,
+    ORTE_JOB_STATE_DAEMONS_LAUNCHED,
+    ORTE_JOB_STATE_DAEMONS_REPORTED,
+    ORTE_JOB_STATE_VM_READY,
+    ORTE_JOB_STATE_MAP,
+    ORTE_JOB_STATE_MAP_COMPLETE,
+    ORTE_JOB_STATE_SYSTEM_PREP,
+    ORTE_JOB_STATE_LAUNCH_APPS,
+    ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE,
+    ORTE_JOB_STATE_RUNNING,
+    ORTE_JOB_STATE_REGISTERED,
+    /* termination states */
+    ORTE_JOB_STATE_TERMINATED,
+    ORTE_JOB_STATE_NOTIFY_COMPLETED,
+    ORTE_JOB_STATE_ALL_JOBS_COMPLETE
+};
+static orte_state_cbfunc_t launch_callbacks[] = {
+    orte_plm_base_setup_job,
+    orte_plm_base_setup_job_complete,
+    orte_ras_base_allocate,
+    orte_plm_base_allocation_complete,
+    orte_plm_base_daemons_launched,
+    orte_plm_base_daemons_reported,
+    vm_ready,
+    orte_rmaps_base_map_job,
+    orte_plm_base_mapping_complete,
+    orte_plm_base_complete_setup,
+    orte_plm_base_launch_apps,
+    orte_state_base_local_launch_complete,
+    orte_plm_base_post_launch,
+    orte_plm_base_registered,
+    check_complete,
+    orte_state_base_cleanup_job,
+    orte_quit
+};
+
+static orte_proc_state_t proc_states[] = {
+    ORTE_PROC_STATE_RUNNING,
+    ORTE_PROC_STATE_REGISTERED,
+    ORTE_PROC_STATE_IOF_COMPLETE,
+    ORTE_PROC_STATE_WAITPID_FIRED,
+    ORTE_PROC_STATE_TERMINATED
+};
+static orte_state_cbfunc_t proc_callbacks[] = {
+    orte_state_base_track_procs,
+    orte_state_base_track_procs,
+    orte_state_base_track_procs,
+    orte_state_base_track_procs,
+    orte_state_base_track_procs
+};
+
+static void force_quit(int fd, short args, void *cbdata)
+{
+    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
+
+    /* give us a chance to stop the orteds */
+    orte_plm.terminate_orteds();
+    OBJ_RELEASE(caddy);
+}
+
+/************************
+ * API Definitions
+ ************************/
+static int init(void)
+{
+    int i, rc;
+    int num_states;
+
+    /* setup the state machines */
+    OBJ_CONSTRUCT(&orte_job_states, opal_list_t);
+    OBJ_CONSTRUCT(&orte_proc_states, opal_list_t);
+
+    /* setup the job state machine */
+    num_states = sizeof(launch_states) / sizeof(orte_job_state_t);
+    for (i=0; i < num_states; i++) {
+        if (ORTE_SUCCESS != (rc = orte_state.add_job_state(launch_states[i],
+                                                           launch_callbacks[i],
+                                                           ORTE_SYS_PRI))) {
+            ORTE_ERROR_LOG(rc);
+        }
+    }
+    /* add the termination response */
+    if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_DAEMONS_TERMINATED,
+                                                       orte_quit, ORTE_SYS_PRI))) {
+        ORTE_ERROR_LOG(rc);
+    }
+    /* add a default error response */
+    if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_FORCED_EXIT,
+                                                       force_quit, ORTE_ERROR_PRI))) {
+        ORTE_ERROR_LOG(rc);
+    }
+    /* add callback to report progress, if requested */
+    if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_REPORT_PROGRESS,
+                                                       orte_state_base_report_progress, ORTE_ERROR_PRI))) {
+        ORTE_ERROR_LOG(rc);
+    }
+    if (5 < opal_output_get_verbosity(orte_state_base_framework.framework_output)) {
+        orte_state_base_print_job_state_machine();
+    }
+
+    /* populate the proc state machine to allow us to
+     * track proc lifecycle changes
+     */
+    num_states = sizeof(proc_states) / sizeof(orte_proc_state_t);
+    for (i=0; i < num_states; i++) {
+        if (ORTE_SUCCESS != (rc = orte_state.add_proc_state(proc_states[i],
+                                                            proc_callbacks[i],
+                                                            ORTE_SYS_PRI))) {
+            ORTE_ERROR_LOG(rc);
+        }
+    }
+    if (5 < opal_output_get_verbosity(orte_state_base_framework.framework_output)) {
+        orte_state_base_print_proc_state_machine();
+    }
+
+    return ORTE_SUCCESS;
+}
+
+static int finalize(void)
+{
+    opal_list_item_t *item;
+
+    /* cleanup the proc state machine */
+    while (NULL != (item = opal_list_remove_first(&orte_proc_states))) {
+        OBJ_RELEASE(item);
+    }
+    OBJ_DESTRUCT(&orte_proc_states);
+
+    return ORTE_SUCCESS;
+}
+
+static void files_ready(int status, void *cbdata)
+{
+    orte_job_t *jdata = (orte_job_t*)cbdata;
+
+    if (ORTE_SUCCESS != status) {
+        ORTE_FORCED_TERMINATE(status);
+    } else {
+        ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
+    }
+}
+
+static void vm_ready(int fd, short args, void *cbdata)
+{
+    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
+
+    /* if this is my job, then we are done */
+    if (ORTE_PROC_MY_NAME->jobid == caddy->jdata->jobid) {
+        /* notify that the vm is ready */
+        opal_output(0, "DVM ready");
+        OBJ_RELEASE(caddy);
+        return;
+    }
+    
+    /* progress the job */
+    caddy->jdata->state = ORTE_JOB_STATE_VM_READY;
+
+    /* position any required files */
+    if (ORTE_SUCCESS != orte_filem.preposition_files(caddy->jdata, files_ready, caddy->jdata)) {
+        ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
+    }
+
+    /* cleanup */
+    OBJ_RELEASE(caddy);
+}
+
+void check_complete(int fd, short args, void *cbdata)
+{
+    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
+    orte_job_t *jdata = caddy->jdata;
+
+    orte_proc_t *proc;
+    int i;
+    orte_std_cntr_t j;
+    orte_job_t *job;
+    orte_node_t *node;
+    orte_job_map_t *map;
+    orte_std_cntr_t index;
+    bool one_still_alive;
+    orte_vpid_t lowest=0;
+    int32_t i32, *i32ptr;
+
+    opal_output_verbose(2, orte_state_base_framework.framework_output,
+                        "%s state:base:check_job_complete on job %s",
+                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
+                        (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid));
+
+    if (NULL == jdata || jdata->jobid == ORTE_PROC_MY_NAME->jobid) {
+        /* just check to see if the daemons are complete */
+        OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
+                             "%s state:base:check_job_complete - received NULL job, checking daemons",
+                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
+        goto CHECK_DAEMONS;
+    } else {
+        /* mark the job as terminated, but don't override any
+         * abnormal termination flags
+         */
+        if (jdata->state < ORTE_JOB_STATE_UNTERMINATED) {
+            jdata->state = ORTE_JOB_STATE_TERMINATED;
+        }
+    }
+
+    /* tell the IOF that the job is complete */
+    if (NULL != orte_iof.complete) {
+        orte_iof.complete(jdata);
+    }
+
+    i32ptr = &i32;
+    if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32) && !orte_abort_non_zero_exit) {
+        if (!orte_report_child_jobs_separately || 1 == ORTE_LOCAL_JOBID(jdata->jobid)) {
+            /* update the exit code */
+            ORTE_UPDATE_EXIT_STATUS(lowest);
+        }
+
+        /* warn user */
+        opal_output(orte_clean_output,
+                    "-------------------------------------------------------\n"
+                    "While %s job %s terminated normally, %d %s. Further examination may be required.\n"
+                    "-------------------------------------------------------",
+                    (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "the primary" : "child",
+                    (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid),
+                    i32, (1 == i32) ? "process returned\na non-zero exit code." :
+                    "processes returned\nnon-zero exit codes.");
+    }
+
+    OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
+                         "%s state:base:check_job_completed declared job %s terminated with state %s - checking all jobs",
+                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
+                         ORTE_JOBID_PRINT(jdata->jobid),
+                         orte_job_state_to_str(jdata->state)));
+    
+    /* if this job is a continuously operating one, then don't do
+     * anything further - just return here
+     */
+    if (NULL != jdata &&
+        (orte_get_attribute(&jdata->attributes, ORTE_JOB_CONTINUOUS_OP, NULL, OPAL_BOOL) ||
+         ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RECOVERABLE))) {
+        goto CHECK_ALIVE;
+    }
+    
+    /* if the job that is being checked is the HNP, then we are
+     * trying to terminate the orteds. In that situation, we
+     * do -not- check all jobs - we simply notify the HNP
+     * that the orteds are complete. Also check special case
+     * if jdata is NULL - we want
+     * to definitely declare the job done if the orteds
+     * have completed, no matter what else may be happening.
+     * This can happen if a ctrl-c hits in the "wrong" place
+     * while launching
+     */
+ CHECK_DAEMONS:
+    if (jdata == NULL || jdata->jobid == ORTE_PROC_MY_NAME->jobid) {
+        if (0 == orte_routed.num_routes()) {
+            /* orteds are done! */
+            OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
+                                 "%s orteds complete - exiting",
+                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
+            if (NULL == jdata) {
+                jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
+            }
+            ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED);
+            OBJ_RELEASE(caddy);
+            return;
+        }
+        OBJ_RELEASE(caddy);
+        return;
+    }
+    
+    /* Release the resources used by this job. Since some errmgrs may want
+     * to continue using resources allocated to the job as part of their
+     * fault recovery procedure, we only do this once the job is "complete".
+     * Note that an aborted/killed job -is- flagged as complete and will
+     * therefore have its resources released. We need to do this after
+     * we call the errmgr so that any attempt to restart the job will
+     * avoid doing so in the exact same place as the current job
+     */
+    if (NULL != jdata->map  && jdata->state == ORTE_JOB_STATE_TERMINATED) {
+        map = jdata->map;
+        for (index = 0; index < map->nodes->size; index++) {
+            if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, index))) {
+                continue;
+            }
+            OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
+                                 "%s releasing procs from node %s",
+                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
+                                 node->name));
+            for (i = 0; i < node->procs->size; i++) {
+                if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
+                    continue;
+                }
+                if (proc->name.jobid != jdata->jobid) {
+                    /* skip procs from another job */
+                    continue;
+                }
+                node->slots_inuse--;
+                node->num_procs--;
+                OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
+                                     "%s releasing proc %s from node %s",
+                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
+                                     ORTE_NAME_PRINT(&proc->name), node->name));
+                /* set the entry in the node array to NULL */
+                opal_pointer_array_set_item(node->procs, i, NULL);
+                /* release the proc once for the map entry */
+                OBJ_RELEASE(proc);
+            }
+            /* set the node location to NULL */
+            opal_pointer_array_set_item(map->nodes, index, NULL);
+            /* maintain accounting */
+            OBJ_RELEASE(node);
+            /* flag that the node is no longer in a map */
+            ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
+        }
+        OBJ_RELEASE(map);
+        jdata->map = NULL;
+    }
+    
+ CHECK_ALIVE:
+    /* now check to see if all jobs are done - trigger notification of this jdata
+     * object when we find it
+     */
+    one_still_alive = false;
+    for (j=1; j < orte_job_data->size; j++) {
+        if (NULL == (job = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, j))) {
+            /* since we are releasing jdata objects as we
+             * go, we can no longer assume that the job_data
+             * array is left justified
+             */
+            continue;
+        }
+        /* if this is the job we are checking AND it normally terminated,
+         * then activate the "notify_completed" state - this will release
+         * the job state, but is provided so that the HNP main code can
+         * take alternative actions if desired. If the state is killed_by_cmd,
+         * then go ahead and release it. We cannot release it if it
+         * abnormally terminated as mpirun needs the info so it can
+         * report appropriately to the user
+         *
+         * NOTE: do not release the primary job (j=1) so we
+         * can pretty-print completion message
+         */
+        if (NULL != jdata && job->jobid == jdata->jobid) {
+            if (jdata->state == ORTE_JOB_STATE_TERMINATED) {
+                OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
+                                     "%s state:base:check_job_completed state is terminated - activating notify",
+                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
+                ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_NOTIFY_COMPLETED);
+                one_still_alive = true;
+            } else if (jdata->state == ORTE_JOB_STATE_KILLED_BY_CMD ||
+                       jdata->state == ORTE_JOB_STATE_NOTIFIED) {
+                OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
+                                     "%s state:base:check_job_completed state is killed or notified - cleaning up",
+                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
+                /* release this object, ensuring that the
+                 * pointer array internal accounting
+                 * is maintained!
+                 */
+                if (1 < j) {
+		    if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
+			/* this was a debugger daemon. notify that a debugger has detached */
+			ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DEBUGGER_DETACH);
+		    }
+                    opal_pointer_array_set_item(orte_job_data, j, NULL);  /* ensure the array has a NULL */
+                    OBJ_RELEASE(jdata);
+                }
+            }
+            continue;
+        }
+        /* if the job is flagged to not be monitored, skip it */
+        if (ORTE_FLAG_TEST(job, ORTE_JOB_FLAG_DO_NOT_MONITOR)) {
+            continue;
+        }
+        /* when checking for job termination, we must be sure to NOT check
+         * our own job as it - rather obviously - has NOT terminated!
+         */
+        if (job->num_terminated < job->num_procs) {
+            /* we have at least one job that is not done yet - we cannot
+             * just return, though, as we need to ensure we cleanout the
+             * job data for the job that just completed
+             */
+            OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
+                                 "%s state:base:check_job_completed job %s is not terminated (%d:%d)",
+                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
+                                 ORTE_JOBID_PRINT(job->jobid),
+                                 job->num_terminated, job->num_procs));
+            one_still_alive = true;
+        }
+        else {
+            OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
+                                 "%s state:base:check_job_completed job %s is terminated (%d vs %d [%s])",
+                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
+                                 ORTE_JOBID_PRINT(job->jobid),
+                                 job->num_terminated, job->num_procs,
+                                 (NULL == jdata) ? "UNKNOWN" : orte_job_state_to_str(jdata->state) ));
+        }
+    }
+    /* if a job is still alive, we just return */
+    if (one_still_alive) {
+        OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
+                             "%s state:base:check_job_completed at least one job is not terminated",
+                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
+        OBJ_RELEASE(caddy);
+        return;
+    }
+    /* if we get here, then all jobs are done, so terminate */
+    OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
+                         "%s state:base:check_job_completed all jobs terminated",
+                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
+
+    /* stop the job timeout event, if set */
+    if (NULL != orte_mpiexec_timeout) {
+        OBJ_RELEASE(orte_mpiexec_timeout);
+        orte_mpiexec_timeout = NULL;
+    }
+
+    /* set the exit status to 0 - this will only happen if it
+     * wasn't already set by an error condition
+     */
+    ORTE_UPDATE_EXIT_STATUS(0);
+
+    /* order daemon termination - this tells us to cleanup
+     * our local procs as well as telling remote daemons
+     * to die
+     */
+    orte_plm.terminate_orteds();
+
+    OBJ_RELEASE(caddy);
+}
diff --git a/orte/mca/state/dvm/state_dvm.h b/orte/mca/state/dvm/state_dvm.h
new file mode 100644
index 0000000000..78c798ce7c
--- /dev/null
+++ b/orte/mca/state/dvm/state_dvm.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2015      Intel, Inc. All rights reserved.
+ *
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+/**
+ * @file
+ * 
+ */
+
+#ifndef MCA_STATE_DVM_EXPORT_H
+#define MCA_STATE_DVM_EXPORT_H
+
+#include "orte_config.h"
+
+#include "orte/mca/state/state.h"
+
+BEGIN_C_DECLS
+
+/*
+ * Local Component structures
+ */
+
+ORTE_MODULE_DECLSPEC extern orte_state_base_component_t mca_state_dvm_component;
+
+ORTE_DECLSPEC extern orte_state_base_module_t orte_state_dvm_module;
+
+END_C_DECLS
+
+#endif /* MCA_STATE_DVM_EXPORT_H */
diff --git a/orte/mca/state/dvm/state_dvm_component.c b/orte/mca/state/dvm/state_dvm_component.c
new file mode 100644
index 0000000000..07909fc22e
--- /dev/null
+++ b/orte/mca/state/dvm/state_dvm_component.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2015      Intel, Inc. All rights reserved.
+ *
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+#include "orte_config.h"
+#include "opal/util/output.h"
+
+#include "orte/mca/state/state.h"
+#include "orte/mca/state/base/base.h"
+#include "state_dvm.h"
+
+/*
+ * Public string for version number
+ */
+const char *orte_state_dvm_component_version_string = 
+    "ORTE STATE dvm MCA component version " ORTE_VERSION;
+
+/*
+ * Local functionality
+ */
+static int state_dvm_open(void);
+static int state_dvm_close(void);
+static int state_dvm_component_query(mca_base_module_t **module, int *priority);
+
+/*
+ * Instantiate the public struct with all of our public information
+ * and pointer to our public functions in it
+ */
+orte_state_base_component_t mca_state_dvm_component =
+{
+    /* Handle the general mca_component_t struct containing 
+     *  meta information about the component
+     */
+    {
+        ORTE_STATE_BASE_VERSION_1_0_0,
+        /* Component name and version */
+        "dvm",
+        ORTE_MAJOR_VERSION,
+        ORTE_MINOR_VERSION,
+        ORTE_RELEASE_VERSION,
+        
+        /* Component open and close functions */
+        state_dvm_open,
+        state_dvm_close,
+        state_dvm_component_query
+    },
+    {
+        /* The component is checkpoint ready */
+        MCA_BASE_METADATA_PARAM_CHECKPOINT
+    },
+};
+
+static int state_dvm_open(void) 
+{
+    return ORTE_SUCCESS;
+}
+
+static int state_dvm_close(void)
+{
+    return ORTE_SUCCESS;
+}
+
+static int state_dvm_component_query(mca_base_module_t **module, int *priority)
+{
+    /* we are only used when an envar is set directing it,
+     * so set our priority very low */
+    *priority = 0;
+    *module = (mca_base_module_t *)&orte_state_dvm_module;
+    return ORTE_SUCCESS;        
+}
diff --git a/orte/tools/Makefile.am b/orte/tools/Makefile.am
index 8ddebc8387..6ac7656410 100644
--- a/orte/tools/Makefile.am
+++ b/orte/tools/Makefile.am
@@ -13,7 +13,7 @@
 # Copyright (c) 2006-2008 Cisco Systems, Inc.  All rights reserved.
 # Copyright (c) 2011-2013 Los Alamos National Security, LLC.  All rights
 #                         reserved. 
-# Copyright (c) 2014      Intel, Inc.  All rights reserved.
+# Copyright (c) 2014-2015 Intel, Inc.  All rights reserved.
 # $COPYRIGHT$
 # 
 # Additional copyrights may follow
@@ -35,7 +35,9 @@ SUBDIRS += \
         tools/orte-top \
         tools/orte-info \
         tools/orte-migrate \
-        tools/orte-server
+        tools/orte-server \
+        tools/orte-submit \
+        tools/orte-dvm
 
 DIST_SUBDIRS += \
 	tools/orte-checkpoint \
@@ -48,5 +50,7 @@ DIST_SUBDIRS += \
         tools/orte-top \
         tools/orte-info \
         tools/orte-migrate \
-        tools/orte-server
+        tools/orte-server \
+        tools/orte-submit \
+        tools/orte-dvm
 
diff --git a/orte/tools/orte-dvm/Makefile.am b/orte/tools/orte-dvm/Makefile.am
new file mode 100644
index 0000000000..ba1b0a3382
--- /dev/null
+++ b/orte/tools/orte-dvm/Makefile.am
@@ -0,0 +1,57 @@
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright (c) 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, 
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright (c) 2008-2014 Cisco Systems, Inc.  All rights reserved.
+# Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
+# Copyright (c) 2015      Intel, Inc.  All rights reserved.
+# $COPYRIGHT$
+# 
+# Additional copyrights may follow
+# 
+# $HEADER$
+#
+
+# This is not quite in the Automake spirit, but we have to do it.
+# Since the totalview portion of the library must be built with -g, we
+# must eliminate the CFLAGS that are passed in here by default (which
+# may already have debugging and/or optimization flags).  We use
+# post-processed forms of the CFLAGS in the library targets down
+# below.
+
+CFLAGS = $(CFLAGS_WITHOUT_OPTFLAGS) $(DEBUGGER_CFLAGS)
+
+include $(top_srcdir)/Makefile.ompi-rules
+
+man_pages = orte-dvm.1
+EXTRA_DIST = $(man_pages:.1=.1in)
+
+if OPAL_INSTALL_BINARIES
+
+bin_PROGRAMS = orte-dvm
+
+nodist_man_MANS = $(man_pages)
+
+# Ensure that the man pages are rebuilt if the opal_config.h file
+# changes; a "good enough" way to know if configure was run again (and
+# therefore the release date or version may have changed)
+$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h
+
+endif # OPAL_INSTALL_BINARIES
+
+orte_dvm_SOURCES = \
+        orte-dvm.c
+
+orte_dvm_LDADD = \
+	$(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \
+	$(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la
+
+distclean-local:
+	rm -f $(man_pages)
diff --git a/orte/tools/orte-dvm/orte-dvm.1in b/orte/tools/orte-dvm/orte-dvm.1in
new file mode 100644
index 0000000000..d4d74df913
--- /dev/null
+++ b/orte/tools/orte-dvm/orte-dvm.1in
@@ -0,0 +1,193 @@
+.\” -*- nroff -*-
+.\" Copyright (c) 2009-2014 Cisco Systems, Inc.  All rights reserved.
+.\" Copyright (c) 2008-2009 Sun Microsystems, Inc.  All rights reserved.
+.\” Copyright (c) 2015      Intel, Inc. All rights reserved
+.\" $COPYRIGHT$
+.\"
+.\" Man page for ORTE's orte-dvm command
+.\"
+.\" .TH name     section center-footer   left-footer  center-header
+.TH ORTE-DVM 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.\" **************************
+.\"    Name Section
+.\" **************************
+.SH NAME
+.
+orte-dvm, ompi_dvm \- Establish a Distributed Virtual Machine (DVM).
+
+.B Note:
+\fIorte-dvm\fP and \fIompi-dvm\fP are synonyms for each
+other.  Using either of the names will produce the same behavior.
+.
+.\" **************************
+.\"    Synopsis Section
+.\" **************************
+.SH SYNOPSIS
+.
+.PP
+.B orte-dvm
+[ options ]
+.P
+
+Invoking \fIorte-dvm\fP via an absolute path
+name is equivalent to specifying the \fI--prefix\fP option with a
+\fI<dir>\fR value equivalent to the directory where \fIorte-dvm\fR
+resides, minus its last subdirectory.  For example:
+
+    \fB%\fP /usr/local/bin/orte-dvm ...
+
+is equivalent to
+
+    \fB%\fP orte-dvm --prefix /usr/local
+
+.
+.\" **************************
+.\"    Quick Summary Section
+.\" **************************
+.SH QUICK SUMMARY
+.
+\fIorte-dvm\fP will establish a DVM that can be used to execute subsequent
+applications. Use of \fIorte-dvm\fP can be advantageous, for example, when you want to
+execute a number of short-lived tasks. In such cases, the time required to start
+the ORTE DVM can be a significant fraction of the time to execute the
+overall application. Thus, creating a persistent DVM can speed the overall
+execution. In addition, a persistent DVM will support executing multiple parallel
+applications while maintaining separation between their respective cores.
+.\" **************************
+.\"    Options Section
+.\" **************************
+.SH OPTIONS
+.
+.\"
+.\" Start options listing
+.\"    Indent 10 characters from start of first column to start of second column
+.
+.TP
+.B -h\fR,\fP --help
+Display help for this command
+.
+.
+.TP
+.B -V\fR,\fP --version
+Print version number.  If no other arguments are given, this will also
+cause orte-dvm to exit.
+.
+.
+.P
+Use one of the following options to specify which hosts (nodes) of the cluster to use
+for the DVM.
+.
+.
+.TP
+.B -H\fR,\fP -host\fR,\fP --host \fR<host1,host2,...,hostN>\fP
+List of hosts for the DVM.
+.
+.
+.TP
+.B
+-hostfile\fR,\fP --hostfile \fR<hostfile>\fP
+Provide a hostfile to use.
+.
+.
+.TP
+.B -machinefile\fR,\fP --machinefile \fR<machinefile>\fP
+Synonym for \fI-hostfile\fP.
+.
+.
+.TP
+.B --prefix \fR<dir>\fP
+Prefix directory that will be used to set the \fIPATH\fR and
+\fILD_LIBRARY_PATH\fR on the remote node before invoking the ORTE daemon.
+.
+.
+..P
+Setting MCA parameters:
+.
+.
+.TP
+.B -gmca\fR,\fP --gmca \fR<key> <value>\fP
+Pass global MCA parameters that are applicable to all contexts. \fI<key>\fP is
+the parameter name; \fI<value>\fP is the parameter value.
+.
+.
+.TP
+.B -mca\fR,\fP --mca <key> <value>
+Send arguments to various MCA modules.  See the "MCA" section, below.
+.
+.
+.
+.
+.TP
+.B -report-uri\fR,\fP --report-uri <channel>
+Print out orte-dvm's URI during startup. The channel must be either a '-' to indicate that
+the URI is to be output to stdout, a '+' to indicate that the URI is to be output to stderr,
+or a filename to which the URI is to be written.
+.
+.
+.P
+The following options are useful for developers; they are not generally
+useful to most ORTE and/or MPI users:
+.
+.TP
+.B -d\fR,\fP --debug-devel
+Enable debugging of the ORTE layer.
+.
+.
+.TP
+.B --debug-daemons-file
+Enable debugging of the ORTE daemons in the DVM, storing
+output in files.
+.
+.
+.P
+There may be other options listed with \fIorte-dvm --help\fP.
+.
+.
+.\" **************************
+.\"    Description Section
+.\" **************************
+.SH DESCRIPTION
+.
+\fIorte-dvm\fP starts a Distributed Virtual Machine (DVM) by launching
+a daemon on each node of the allocation, as modified or specified by
+the \fI-host\fP and \fI-hostfile\fP options. Applications can subsequently
+be executed using the \fIorte-submit\fP command.
+.
+The DVM remains in operation until receiving the \fIorte-submit -terminate\fP
+command.
+.
+.
+.
+.SS Specifying Host Nodes
+.
+Host nodes can be identified on the \fIorte-dvm\fP command line with the \fI-host\fP
+option or in a hostfile.
+.
+.PP
+For example,
+.
+.TP 4
+orte-dvm -H aa,aa,bb ./a.out
+launches two processes on node aa and one on bb.
+.
+.PP
+Or, consider the hostfile
+.
+
+   \fB%\fP cat myhostfile
+   aa slots=2
+   bb slots=2
+   cc slots=2
+
+.
+.PP
+Here, we list both the host names (aa, bb, and cc) but also how many "slots"
+there are for each.  Slots indicate how many processes can potentially execute
+on a node.  For best performance, the number of slots may be chosen to be the
+number of cores on the node or the number of processor sockets.  If the hostfile
+does not provide slots information, a default of 1 is assumed.
+When running under resource managers (e.g., SLURM, Torque, etc.),
+Open MPI will obtain both the hostnames and the number of slots directly
+from the resource manger.
+.
+.
diff --git a/orte/tools/orte-dvm/orte-dvm.c b/orte/tools/orte-dvm/orte-dvm.c
new file mode 100644
index 0000000000..72e59020a8
--- /dev/null
+++ b/orte/tools/orte-dvm/orte-dvm.c
@@ -0,0 +1,446 @@
+/* -*- C -*-
+ *
+ * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2008 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2006-2014 Cisco Systems, Inc.  All rights reserved.
+ * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2007-2013 Los Alamos National Security, LLC.  All rights
+ *                         reserved. 
+ * Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "orte_config.h"
+#include "orte/constants.h"
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+#include <stdio.h>
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif  /* HAVE_STDLIB_H */
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif  /* HAVE_STRINGS_H */
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#include <errno.h>
+#include <signal.h>
+#include <ctype.h>
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif  /* HAVE_SYS_TYPES_H */
+#ifdef HAVE_SYS_WAIT_H
+#include <sys/wait.h>
+#endif  /* HAVE_SYS_WAIT_H */
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif  /* HAVE_SYS_TIME_H */
+#include <fcntl.h>
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+
+#include "opal/mca/event/event.h"
+#include "opal/mca/installdirs/installdirs.h"
+#include "opal/mca/base/base.h"
+#include "opal/util/argv.h"
+#include "opal/util/output.h"
+#include "opal/util/basename.h"
+#include "opal/util/cmd_line.h"
+#include "opal/util/opal_environ.h"
+#include "opal/util/opal_getcwd.h"
+#include "opal/util/show_help.h"
+#include "opal/util/fd.h"
+
+#include "opal/version.h"
+#include "opal/runtime/opal.h"
+#include "opal/util/os_path.h"
+#include "opal/util/path.h"
+#include "opal/class/opal_pointer_array.h"
+
+#include "orte/mca/errmgr/errmgr.h"
+#include "orte/mca/rml/rml.h"
+#include "orte/mca/rml/base/rml_contact.h"
+#include "orte/mca/state/state.h"
+
+#include "orte/runtime/runtime.h"
+#include "orte/runtime/orte_globals.h"
+#include "orte/util/show_help.h"
+
+#include "orte/orted/orted.h"
+
+/*
+ * Globals
+ */
+static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT;
+
+/*
+ * Globals
+ */
+static struct {
+    bool help;
+    bool version;
+    char *report_uri;
+    char *basename;
+    char *prefix;
+} myglobals;
+
+static opal_cmd_line_init_t cmd_line_init[] = {
+    /* Various "obvious" options */
+    { NULL, 'h', NULL, "help", 0,
+      &myglobals.help, OPAL_CMD_LINE_TYPE_BOOL,
+      "This help message" },
+    { NULL, 'V', NULL, "version", 0,
+      &myglobals.version, OPAL_CMD_LINE_TYPE_BOOL,
+      "Print version and exit" },
+
+    { NULL, '\0', "report-uri", "report-uri", 1,
+      &myglobals.report_uri, OPAL_CMD_LINE_TYPE_STRING,
+      "Printout URI on stdout [-], stderr [+], or a file [anything else]" },
+    
+    { NULL, '\0', "prefix", "prefix", 1,
+      &myglobals.prefix, OPAL_CMD_LINE_TYPE_STRING,
+      "Prefix to be used to look for ORTE executables" },
+
+    /* End of list */
+    { NULL, '\0', NULL, NULL, 0,
+      NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
+};
+
+static void notify_requestor(int sd, short args, void *cbdata);
+
+int main(int argc, char *argv[])
+{
+    int rc, i, j;
+    opal_cmd_line_t cmd_line;
+    char *param, *value;
+    orte_job_t *jdata=NULL;
+    orte_app_context_t *app;
+    char *uri;
+
+    /* Setup and parse the command line */
+    memset(&myglobals, 0, sizeof(myglobals));
+    /* find our basename (the name of the executable) so that we can
+       use it in pretty-print error messages */
+    myglobals.basename = opal_basename(argv[0]);
+    
+    opal_cmd_line_create(&cmd_line, cmd_line_init);
+    mca_base_cmd_line_setup(&cmd_line);
+    if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(&cmd_line, true,
+                                                  argc, argv)) ) {
+        if (OPAL_ERR_SILENT != rc) {
+            fprintf(stderr, "%s: command line error (%s)\n", argv[0],
+                    opal_strerror(rc));
+        }
+        return rc;
+    }
+
+    /*
+     * Since this process can now handle MCA/GMCA parameters, make sure to
+     * process them.
+     * NOTE: It is "safe" to call mca_base_cmd_line_process_args() before 
+     *  opal_init_util() since mca_base_cmd_line_process_args() does *not*
+     *  depend upon opal_init_util() functionality.
+     */
+    if (OPAL_SUCCESS != mca_base_cmd_line_process_args(&cmd_line, &environ, &environ)) {
+        exit(1);
+    }
+
+    /* Need to initialize OPAL so that install_dirs are filled in */
+    if (OPAL_SUCCESS != opal_init(&argc, &argv)) {
+        exit(1);
+    }
+    
+    /* Check for some "global" command line params */
+    /* print version if requested.  Do this before check for help so
+       that --version --help works as one might expect. */
+    if (myglobals.version) {
+        char *str;
+        char *project_name = NULL;
+        if (0 == strcmp(myglobals.basename, "ompi-dvm")) {
+            project_name = "Open MPI";
+        } else {
+            project_name = "OpenRTE";
+        }
+        str = opal_show_help_string("help-orterun.txt", "orterun:version", 
+                                    false,
+                                    myglobals.basename, project_name, OPAL_VERSION,
+                                    PACKAGE_BUGREPORT);
+        if (NULL != str) {
+            printf("%s", str);
+            free(str);
+        }
+        exit(0);
+    }
+
+    /* Check for help request */
+    if (myglobals.help) {
+        char *str, *args = NULL;
+        char *project_name = NULL;
+        if (0 == strcmp(myglobals.basename, "mpirun")) {
+            project_name = "Open MPI";
+        } else {
+            project_name = "OpenRTE";
+        }
+        args = opal_cmd_line_get_usage_msg(&cmd_line);
+        str = opal_show_help_string("help-orterun.txt", "orterun:usage", false,
+                                    myglobals.basename, project_name, OPAL_VERSION,
+                                    myglobals.basename, args,
+                                    PACKAGE_BUGREPORT);
+        if (NULL != str) {
+            printf("%s", str);
+            free(str);
+        }
+        free(args);
+
+        /* If someone asks for help, that should be all we do */
+        exit(0);
+    }
+
+    /* flag that I am the HNP */
+    orte_process_info.proc_type = ORTE_PROC_HNP;
+    
+    /* Setup MCA params */
+    orte_register_params();
+
+    /* specify the DVM state machine */
+    putenv("OMPI_MCA_state=dvm");
+    
+    /* Intialize our Open RTE environment */
+    if (ORTE_SUCCESS != (rc = orte_init(&argc, &argv, ORTE_PROC_HNP))) {
+        /* cannot call ORTE_ERROR_LOG as it could be the errmgr
+         * never got loaded!
+         */
+        return rc;
+    }
+    /* finalize OPAL. As it was opened again from orte_init->opal_init
+     * we continue to have a reference count on it. So we have to finalize it twice...
+     */
+    opal_finalize();
+
+    /* check for request to report uri */
+    uri = orte_rml.get_contact_info();
+    if (NULL != myglobals.report_uri) {
+        FILE *fp;
+        if (0 == strcmp(myglobals.report_uri, "-")) {
+            /* if '-', then output to stdout */
+            printf("VMURI: %s\n", uri);
+        } else if (0 == strcmp(myglobals.report_uri, "+")) {
+            /* if '+', output to stderr */
+            fprintf(stderr, "VMURI: %s\n", uri);
+        } else {
+            fp = fopen(myglobals.report_uri, "w");
+            if (NULL == fp) {
+                orte_show_help("help-orterun.txt", "orterun:write_file", false,
+                               myglobals.basename, "pid", myglobals.report_uri);
+                exit(0);
+            }
+            fprintf(fp, "%s\n", uri);
+            fclose(fp);
+        }
+        free(uri);
+    } else {
+        printf("VMURI: %s\n", uri);
+    }
+
+    /* get the daemon job object - was created by ess/hnp component */
+    if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
+        orte_show_help("help-orterun.txt", "bad-job-object", true,
+                       myglobals.basename);
+        exit(0);
+    }
+    /* also should have created a daemon "app" */
+    if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) {
+        orte_show_help("help-orterun.txt", "bad-app-object", true,
+                       myglobals.basename);
+        exit(0);
+    }
+
+    /* Did the user specify a prefix, or want prefix by default? */
+    if (opal_cmd_line_is_taken(&cmd_line, "prefix") || want_prefix_by_default) {
+        size_t param_len;
+        /* if both the prefix was given and we have a prefix
+         * given above, check to see if they match
+         */
+        if (opal_cmd_line_is_taken(&cmd_line, "prefix") &&
+            NULL != myglobals.prefix) {
+            /* if they don't match, then that merits a warning */
+            param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0));
+            /* ensure we strip any trailing '/' */
+            if (0 == strcmp(OPAL_PATH_SEP, &(param[strlen(param)-1]))) {
+                param[strlen(param)-1] = '\0';
+            }
+            value = strdup(myglobals.prefix);
+            if (0 == strcmp(OPAL_PATH_SEP, &(value[strlen(value)-1]))) {
+                value[strlen(value)-1] = '\0';
+            }
+            if (0 != strcmp(param, value)) {
+                orte_show_help("help-orterun.txt", "orterun:app-prefix-conflict",
+                               true, myglobals.basename, value, param);
+                /* let the global-level prefix take precedence since we
+                 * know that one is being used
+                 */
+                free(param);
+                param = strdup(myglobals.prefix);
+            }
+            free(value);
+        } else if (NULL != myglobals.prefix) {
+            param = myglobals.prefix;
+        } else if (opal_cmd_line_is_taken(&cmd_line, "prefix")){
+            /* must be --prefix alone */
+            param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0));
+        } else {
+            /* --enable-orterun-prefix-default was given to orterun */
+            param = strdup(opal_install_dirs.prefix);
+        }
+
+        if (NULL != param) {
+            /* "Parse" the param, aka remove superfluous path_sep. */
+            param_len = strlen(param);
+            while (0 == strcmp (OPAL_PATH_SEP, &(param[param_len-1]))) {
+                param[param_len-1] = '\0';
+                param_len--;
+                if (0 == param_len) {
+                    orte_show_help("help-orterun.txt", "orterun:empty-prefix",
+                                   true, myglobals.basename, myglobals.basename);
+                    return ORTE_ERR_FATAL;
+                }
+            }
+            orte_set_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, ORTE_ATTR_GLOBAL, param, OPAL_STRING);
+            free(param);
+        }
+    }
+
+    /* Did the user specify a hostfile. Need to check for both 
+     * hostfile and machine file. 
+     * We can only deal with one hostfile per app context, otherwise give an error.
+     */
+    if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "hostfile"))) {
+        if(1 < j) {
+            orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles",
+                           true, myglobals.basename, NULL);
+            return ORTE_ERR_FATAL;
+        } else {
+            value = opal_cmd_line_get_param(&cmd_line, "hostfile", 0, 0);
+            orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_LOCAL, value, OPAL_STRING);
+        }
+    }
+    if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "machinefile"))) {
+        if(1 < j || orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, NULL, OPAL_STRING)) {
+            orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles",
+                           true, myglobals.basename, NULL);
+            return ORTE_ERR_FATAL;
+        } else {
+            value = opal_cmd_line_get_param(&cmd_line, "machinefile", 0, 0);
+            orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_LOCAL, value, OPAL_STRING);
+        }
+    }
+ 
+    /* Did the user specify any hosts? */
+    if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "host"))) {
+        char **targ=NULL, *tval;
+        for (i = 0; i < j; ++i) {
+            value = opal_cmd_line_get_param(&cmd_line, "host", i, 0);
+            opal_argv_append_nosize(&targ, value);
+        }
+        tval = opal_argv_join(targ, ',');
+        orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL, tval, OPAL_STRING);
+        opal_argv_free(targ);
+        free(tval);
+    }
+    OBJ_DESTRUCT(&cmd_line);
+
+    /* setup to listen for commands sent specifically to me, even though I would probably
+     * be the one sending them! Unfortunately, since I am a participating daemon,
+     * there are times I need to send a command to "all daemons", and that means *I* have
+     * to receive it too
+     */
+    orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON,
+                            ORTE_RML_PERSISTENT, orte_daemon_recv, NULL);
+
+    /* override the notify_completed state so we can send a message
+     * back to anyone who submits a job to us telling them the job
+     * completed */
+    if (ORTE_SUCCESS != (rc = orte_state.set_job_state_callback(ORTE_JOB_STATE_NOTIFY_COMPLETED, notify_requestor))) {
+        ORTE_ERROR_LOG(rc);
+        ORTE_UPDATE_EXIT_STATUS(rc);
+        exit(orte_exit_status);
+    }
+
+    /* spawn the DVM - we skip the initial steps as this
+     * isn't a user-level application */
+    ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ALLOCATE);
+
+    /* loop the event lib until an exit event is detected */
+    while (orte_event_base_active) {
+        opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE);
+    }
+
+    /* cleanup and leave */
+    orte_finalize();
+
+    if (orte_debug_flag) {
+        fprintf(stderr, "exiting with status %d\n", orte_exit_status);
+    }
+    exit(orte_exit_status);
+}
+
+static void send_callback(int status, orte_process_name_t *peer,
+                          opal_buffer_t* buffer, orte_rml_tag_t tag,
+                          void* cbdata)
+
+{
+    orte_job_t *jdata = (orte_job_t*)cbdata;
+    
+    OBJ_RELEASE(buffer);
+    /* cleanup the job object */
+    opal_pointer_array_set_item(orte_job_data, ORTE_LOCAL_JOBID(jdata->jobid), NULL);
+    OBJ_RELEASE(jdata);
+}
+static void notify_requestor(int sd, short args, void *cbdata)
+{
+    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
+    orte_job_t *jdata = caddy->jdata;
+    orte_proc_t *pptr;
+    int ret;
+    opal_buffer_t *reply;
+    
+    opal_output(0, "%s dvm: job %s has completed",
+                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
+                (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid));
+
+    /* notify the requestor */
+    reply = OBJ_NEW(opal_buffer_t);
+    /* see if there was any problem */
+    if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, (void**)&pptr, OPAL_PTR) && NULL != pptr) {
+        ret = pptr->exit_code;
+    } else {
+        ret = 0;
+    }
+    opal_dss.pack(reply, &ret, 1, OPAL_INT);
+    orte_rml.send_buffer_nb(&jdata->originator, reply, ORTE_RML_TAG_TOOL, send_callback, jdata);
+
+    /* we cannot cleanup the job object as we might
+     * hit an error during transmission, so clean it
+     * up in the send callback */
+    OBJ_RELEASE(caddy);
+}
+
diff --git a/orte/tools/orte-submit/Makefile.am b/orte/tools/orte-submit/Makefile.am
new file mode 100644
index 0000000000..e69634cb27
--- /dev/null
+++ b/orte/tools/orte-submit/Makefile.am
@@ -0,0 +1,57 @@
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright (c) 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, 
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright (c) 2008-2014 Cisco Systems, Inc.  All rights reserved.
+# Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
+# Copyright (c) 2015      Intel, Inc.  All rights reserved.
+# $COPYRIGHT$
+# 
+# Additional copyrights may follow
+# 
+# $HEADER$
+#
+
+# This is not quite in the Automake spirit, but we have to do it.
+# Since the totalview portion of the library must be built with -g, we
+# must eliminate the CFLAGS that are passed in here by default (which
+# may already have debugging and/or optimization flags).  We use
+# post-processed forms of the CFLAGS in the library targets down
+# below.
+
+CFLAGS = $(CFLAGS_WITHOUT_OPTFLAGS) $(DEBUGGER_CFLAGS)
+
+include $(top_srcdir)/Makefile.ompi-rules
+
+man_pages = orte-submit.1
+EXTRA_DIST = $(man_pages:.1=.1in)
+
+if OPAL_INSTALL_BINARIES
+
+bin_PROGRAMS = orte-submit
+
+nodist_man_MANS = $(man_pages)
+
+# Ensure that the man pages are rebuilt if the opal_config.h file
+# changes; a "good enough" way to know if configure was run again (and
+# therefore the release date or version may have changed)
+$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h
+
+endif # OPAL_INSTALL_BINARIES
+
+orte_submit_SOURCES = \
+        orte-submit.c
+
+orte_submit_LDADD = \
+	$(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \
+	$(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la
+
+distclean-local:
+	rm -f $(man_pages)
diff --git a/orte/tools/orte-submit/orte-submit.1in b/orte/tools/orte-submit/orte-submit.1in
new file mode 100644
index 0000000000..605d893c0d
--- /dev/null
+++ b/orte/tools/orte-submit/orte-submit.1in
@@ -0,0 +1,1430 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2009-2014 Cisco Systems, Inc.  All rights reserved.
+.\" Copyright (c) 2008-2009 Sun Microsystems, Inc.  All rights reserved.
+.\” Copyright (c) 2015      Intel, Inc. All rights reserved.
+.\" $COPYRIGHT$
+.\"
+.\" Man page for ORTE's orte-submit command
+.\"
+.\" .TH name     section center-footer   left-footer  center-header
+.TH ORTE-SUBMIT 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.\" **************************
+.\"    Name Section
+.\" **************************
+.SH NAME
+.
+orte-submit, ompi-submit \- Execute serial and parallel jobs in Open MPI using a DVM.
+
+.B Note:
+\fIompi-submit\fP and \fIorte-submit\fP are synonyms for each
+other.  Using either of the names will produce the same behavior.
+.
+.\" **************************
+.\"    Synopsis Section
+.\" **************************
+.SH SYNOPSIS
+.
+.PP
+Single Process Multiple Data (SPMD) Model:
+
+.B ompi-submit
+[ options ]
+.B <program>
+[ <args> ]
+.P
+
+Multiple Instruction Multiple Data (MIMD) Model:
+
+.B ompi-submit
+[ global_options ]
+       [ local_options1 ]
+.B <program1>
+[ <args1> ] :
+       [ local_options2 ]
+.B <program2>
+[ <args2> ] :
+       ... :
+       [ local_optionsN ]
+.B <programN>
+[ <argsN> ]
+.P
+
+Note that in both models, invoking \fIompi-submit\fP via an absolute path
+name is equivalent to specifying the \fI--prefix\fP option with a
+\fI<dir>\fR value equivalent to the directory where \fIompi-submit\fR
+resides, minus its last subdirectory.  For example:
+
+    \fB%\fP /usr/local/bin/ompi-submit ...
+
+is equivalent to
+
+    \fB%\fP ompi-submit --prefix /usr/local
+
+.
+.\" **************************
+.\"    Quick Summary Section
+.\" **************************
+.SH QUICK SUMMARY
+.
+.B
+Use of \fIorte-submit\fP requires that you first start the Distributed Virtual
+Machine (DVM) using \fIorte-dvm\fP.
+.P
+If you are simply looking for how to run an MPI application, you
+probably want to use a command line of the following form:
+
+    \fB%\fP ompi-submit [ -np X ] [ --hostfile <filename> ]  <program>
+
+This will run X copies of \fI<program>\fR in your current run-time
+environment (if running under a supported resource manager, Open MPI's
+\fIompi-submit\fR will usually automatically use the corresponding resource manager
+process starter, as opposed to, for example, \fIrsh\fR or \fIssh\fR,
+which require the use of a hostfile, or will default to running all X
+copies on the localhost), scheduling (by default) in a round-robin fashion by
+CPU slot.  See the rest of this page for more details.
+.P
+Please note that ompi-submit automatically binds processes as of the start of the
+v1.8 series. Two binding patterns are used in the absence of any further directives:
+.TP 18
+.B Bind to core:
+when the number of processes is <= 2
+.
+.
+.TP
+.B Bind to socket:
+when the number of processes is > 2
+.
+.
+.P
+If your application uses threads, then you probably want to ensure that you are
+either not bound at all (by specifying --bind-to none), or bound to multiple cores
+using an appropriate binding level or specific number of processing elements per
+application process.
+.
+.\" **************************
+.\"    Options Section
+.\" **************************
+.SH OPTIONS
+.
+.I ompi-submit
+will send the name of the directory where it was invoked on the local
+node to each of the remote nodes, and attempt to change to that
+directory.  See the "Current Working Directory" section below for further
+details.
+.\"
+.\" Start options listing
+.\"    Indent 10 characters from start of first column to start of second column
+.TP 10
+.B <program>
+The program executable. This is identified as the first non-recognized argument
+to ompi-submit.
+.
+.
+.TP
+.B <args>
+Pass these run-time arguments to every new process.  These must always
+be the last arguments to \fIompi-submit\fP. If an app context file is used,
+\fI<args>\fP will be ignored.
+.
+.
+.TP
+.B -h\fR,\fP --help
+Display help for this command
+.
+.
+.TP
+.B -q\fR,\fP --quiet
+Suppress informative messages from orte-submit during application execution.
+.
+.
+.TP
+.B -v\fR,\fP --verbose
+Be verbose
+.
+.
+.TP
+.B -V\fR,\fP --version
+Print version number.  If no other arguments are given, this will also
+cause orte-submit to exit.
+.
+.
+.
+.
+.P
+Use one of the following options to specify which hosts (nodes) of the DVM to run on.
+Specifying hosts outside the DVM will result in an error.
+.
+.
+.TP
+.B -H\fR,\fP -host\fR,\fP --host \fR<host1,host2,...,hostN>\fP
+List of hosts on which to invoke processes.
+.
+.
+.TP
+.B
+-hostfile\fR,\fP --hostfile \fR<hostfile>\fP
+Provide a hostfile to use.
+.\" JJH - Should have man page for how to format a hostfile properly.
+.
+.
+.TP
+.B -machinefile\fR,\fP --machinefile \fR<machinefile>\fP
+Synonym for \fI-hostfile\fP.
+.
+.
+.
+.
+.P
+The following options specify the number of processes to launch. Note that none
+of the options imply a particular binding policy - e.g., requesting N processes
+for each socket does not imply that the processes will be bound to the socket.
+.
+.
+.TP
+.B -c\fR,\fP -n\fR,\fP --n\fR,\fP -np \fR<#>\fP
+Run this many copies of the program on the given nodes.  This option
+indicates that the specified file is an executable program and not an
+application context. If no value is provided for the number of copies to
+execute (i.e., neither the "-np" nor its synonyms are provided on the command
+line), Open MPI will automatically execute a copy of the program on
+each process slot (see below for description of a "process slot"). This
+feature, however, can only be used in the SPMD model and will return an
+error (without beginning execution of the application) otherwise.
+.
+.
+.TP
+.B —map-by ppr:N:<object>
+Launch N times the number of objects of the specified type on each node.
+.
+.
+.TP
+.B -npersocket\fR,\fP --npersocket <#persocket>
+On each node, launch this many processes times the number of processor
+sockets on the node.
+The \fI-npersocket\fP option also turns on the \fI-bind-to-socket\fP option.
+(deprecated in favor of --map-by ppr:n:socket)
+.
+.
+.TP
+.B -npernode\fR,\fP --npernode <#pernode>
+On each node, launch this many processes.
+(deprecated in favor of --map-by ppr:n:node)
+.
+.
+.TP
+.B -pernode\fR,\fP --pernode
+On each node, launch one process -- equivalent to \fI-npernode\fP 1.
+(deprecated in favor of --map-by ppr:1:node)
+.
+.
+.
+.
+.P
+To map processes:
+.
+.
+.TP
+.B --map-by <foo>
+Map to the specified object, defaults to \fIsocket\fP. Supported options
+include slot, hwthread, core, L1cache, L2cache, L3cache, socket, numa,
+board, node, sequential, distance, and ppr. Any object can include
+modifiers by adding a \fR:\fP and any combination of PE=n (bind n
+processing elements to each proc), SPAN (load
+balance the processes across the allocation), OVERSUBSCRIBE (allow
+more processes on a node than processing elements), and NOOVERSUBSCRIBE.
+This includes PPR, where the pattern would be terminated by another colon
+to separate it from the modifiers.
+.
+.TP
+.B -bycore\fR,\fP --bycore
+Map processes by core (deprecated in favor of --map-by core)
+.
+.TP
+.B -bysocket\fR,\fP --bysocket
+Map processes by socket (deprecated in favor of --map-by socket)
+.
+.TP
+.B -nolocal\fR,\fP --nolocal
+Do not run any copies of the launched application on the same node as
+orte-submit is running.  This option will override listing the localhost
+with \fB--host\fR or any other host-specifying mechanism.
+.
+.TP
+.B -nooversubscribe\fR,\fP --nooversubscribe
+Do not oversubscribe any nodes; error (without starting any processes)
+if the requested number of processes would cause oversubscription.
+This option implicitly sets "max_slots" equal to the "slots" value for
+each node.
+.
+.TP
+.B -bynode\fR,\fP --bynode
+Launch processes one per node, cycling by node in a round-robin
+fashion.  This spreads processes evenly among nodes and assigns
+MPI_COMM_WORLD ranks in a round-robin, "by node" manner.
+.
+.
+.
+.
+.P
+To order processes' ranks in MPI_COMM_WORLD:
+.
+.
+.TP
+.B --rank-by <foo>
+Rank in round-robin fashion according to the specified object,
+defaults to \fIslot\fP. Supported options
+include slot, hwthread, core, L1cache, L2cache, L3cache,
+socket, numa, board, and node.
+.
+.
+.
+.
+.P
+For process binding:
+.
+.TP
+.B --bind-to <foo>
+Bind processes to the specified object, defaults to \fIcore\fP. Supported options
+include slot, hwthread, core, l1cache, l2cache, l3cache, socket, numa, board, and none.
+.
+.TP
+.B -cpus-per-proc\fR,\fP --cpus-per-proc <#perproc>
+Bind each process to the specified number of cpus.
+(deprecated in favor of --map-by <obj>:PE=n)
+.
+.TP
+.B -cpus-per-rank\fR,\fP --cpus-per-rank <#perrank>
+Alias for \fI-cpus-per-proc\fP.
+(deprecated in favor of --map-by <obj>:PE=n)
+.
+.TP
+.B -bind-to-core\fR,\fP --bind-to-core
+Bind processes to cores (deprecated in favor of --bind-to core)
+.
+.TP
+.B -bind-to-socket\fR,\fP --bind-to-socket
+Bind processes to processor sockets  (deprecated in favor of --bind-to socket)
+.
+.TP
+.B -bind-to-none\fR,\fP --bind-to-none
+Do not bind processes  (deprecated in favor of --bind-to none)
+.
+.TP
+.B -report-bindings\fR,\fP --report-bindings
+Report any bindings for launched processes.
+.
+.TP
+.B -slot-list\fR,\fP --slot-list <slots>
+List of processor IDs to be used for binding MPI processes. The specified bindings will
+be applied to all MPI processes. See explanation below for syntax.
+.
+.
+.
+.
+.P
+For rankfiles:
+.
+.
+.TP
+.B -rf\fR,\fP --rankfile <rankfile>
+Provide a rankfile file.
+.
+.
+.
+.
+.P
+To manage standard I/O:
+.
+.
+.TP
+.B -output-filename\fR,\fP --output-filename \fR<filename>\fP
+Redirect the stdout, stderr, and stddiag of all processes to a process-unique version of
+the specified filename. Any directories in the filename will automatically be created.
+Each output file will consist of filename.id, where the id will be the
+processes' rank in MPI_COMM_WORLD, left-filled with
+zero's for correct ordering in listings.
+.
+.
+.TP
+.B -stdin\fR,\fP --stdin <rank>
+The MPI_COMM_WORLD rank of the process that is to receive stdin. The
+default is to forward stdin to MPI_COMM_WORLD rank 0, but this option
+can be used to forward stdin to any process. It is also acceptable to
+specify \fInone\fP, indicating that no processes are to receive stdin.
+.
+.
+.TP
+.B -tag-output\fR,\fP --tag-output
+Tag each line of output to stdout, stderr, and stddiag with \fB[jobid, MCW_rank]<stdxxx>\fP indicating the process jobid
+and MPI_COMM_WORLD rank of the process that generated the output, and the channel which generated it.
+.
+.
+.TP
+.B -timestamp-output\fR,\fP --timestamp-output
+Timestamp each line of output to stdout, stderr, and stddiag.
+.
+.
+.TP
+.B -xml\fR,\fP --xml
+Provide all output to stdout, stderr, and stddiag in an xml format.
+.
+.
+.TP
+.B -xterm\fR,\fP --xterm \fR<ranks>\fP
+Display the output from the processes identified by their
+MPI_COMM_WORLD ranks in separate xterm windows. The ranks are specified
+as a comma-separated list of ranges, with a -1 indicating all. A separate
+window will be created for each specified process.
+.B Note:
+xterm will normally terminate the window upon termination of the process running
+within it. However, by adding a "!" to the end of the list of specified ranks,
+the proper options will be provided to ensure that xterm keeps the window open
+\fIafter\fP the process terminates, thus allowing you to see the process' output.
+Each xterm window will subsequently need to be manually closed.
+.B Note:
+In some environments, xterm may require that the executable be in the user's
+path, or be specified in absolute or relative terms. Thus, it may be necessary
+to specify a local executable as "./foo" instead of just "foo". If xterm fails to
+find the executable, ompi-submit will hang, but still respond correctly to a ctrl-c.
+If this happens, please check that the executable is being specified correctly
+and try again.
+.
+.
+.
+.
+.P
+To manage files and runtime environment:
+.
+.
+.TP
+.B -path\fR,\fP --path \fR<path>\fP
+<path> that will be used when attempting to locate the requested
+executables.  This is used prior to using the local PATH setting.
+.
+.
+.TP
+.B --prefix \fR<dir>\fP
+Prefix directory that will be used to set the \fIPATH\fR and
+\fILD_LIBRARY_PATH\fR on the remote node before invoking Open MPI or
+the target process.  See the "Remote Execution" section, below.
+.
+.
+.TP
+.B --preload-binary
+Copy the specified executable(s) to remote machines prior to starting remote processes. The
+executables will be copied to the Open MPI session directory and will be deleted upon
+completion of the job.
+.
+.
+.TP
+.B --preload-files <files>
+Preload the comma separated list of files to the current working directory of the remote
+machines where processes will be launched prior to starting those processes.
+.
+.
+.TP
+.B --preload-files-dest-dir <path>
+The destination directory to be used for preload-files, if other than the current working
+directory. By default, the absolute and relative paths provided by --preload-files are used.
+.
+.
+.TP
+.B -wd \fR<dir>\fP
+Synonym for \fI-wdir\fP.
+.
+.
+.TP
+.B -wdir \fR<dir>\fP
+Change to the directory <dir> before the user's program executes.
+See the "Current Working Directory" section for notes on relative paths.
+.B Note:
+If the \fI-wdir\fP option appears both on the command line and in an
+application context, the context will take precedence over the command
+line. Thus, if the path to the desired wdir is different
+on the backend nodes, then it must be specified as an absolute path that
+is correct for the backend node.
+.
+.
+.TP
+.B -x \fR<env>\fP
+Export the specified environment variables to the remote nodes before
+executing the program.  Only one environment variable can be specified
+per \fI-x\fP option.  Existing environment variables can be specified
+or new variable names specified with corresponding values.  For
+example:
+    \fB%\fP ompi-submit -x DISPLAY -x OFILE=/tmp/out ...
+
+The parser for the \fI-x\fP option is not very sophisticated; it does
+not even understand quoted values.  Users are advised to set variables
+in the environment, and then use \fI-x\fP to export (not define) them.
+.
+.
+.
+.
+.P
+Setting MCA parameters:
+.
+.
+.TP
+.B -gmca\fR,\fP --gmca \fR<key> <value>\fP
+Pass global MCA parameters that are applicable to all contexts. \fI<key>\fP is
+the parameter name; \fI<value>\fP is the parameter value.
+.
+.
+.TP
+.B -mca\fR,\fP --mca <key> <value>
+Send arguments to various MCA modules.  See the "MCA" section, below.
+.
+.
+.
+.
+.P
+For debugging:
+.
+.
+.TP
+.B -debug\fR,\fP --debug
+Invoke the user-level debugger indicated by the \fIorte_base_user_debugger\fP
+MCA parameter.
+.
+.
+.TP
+.B -debugger\fR,\fP --debugger
+Sequence of debuggers to search for when \fI--debug\fP is used (i.e.
+a synonym for \fIorte_base_user_debugger\fP MCA parameter).
+.
+.
+.TP
+.B -tv\fR,\fP --tv
+Launch processes under the TotalView debugger.
+Deprecated backwards compatibility flag. Synonym for \fI--debug\fP.
+.
+.
+.
+.
+.P
+There are also other options:
+.
+.
+.TP
+.B --allow-run-as-root
+Allow
+.I ompi-submit
+to run when executed by the root user
+.RI ( ompi-submit
+defaults to aborting when launched as the root user).
+.
+.
+.TP
+.B -aborted\fR,\fP --aborted \fR<#>\fP
+Set the maximum number of aborted processes to display.
+.
+.
+.TP
+.B --app \fR<appfile>\fP
+Provide an appfile, ignoring all other command line options.
+.
+.
+.TP
+.B -cf\fR,\fP --cartofile \fR<cartofile>\fP
+Provide a cartography file.
+.
+.
+.TP
+.B --hetero
+Indicates that multiple app_contexts are being provided that are a mix of 32/64-bit binaries.
+.
+.
+.TP
+.B -ompi-server\fR,\fP --ompi-server <uri or file>
+Specify the URI of the Open MPI server (or the ompi-submit to be used as the server)
+, the name
+of the file (specified as file:filename) that
+contains that info, or the PID (specified as pid:#) of the ompi-submit to be used as
+ the server.
+The Open MPI server is used to support multi-application data exchange via
+the MPI-2 MPI_Publish_name and MPI_Lookup_name functions.
+.
+.
+.
+.
+.P
+The following options are useful for developers; they are not generally
+useful to most ORTE and/or MPI users:
+.
+.TP
+.B -d\fR,\fP --debug-devel
+Enable debugging of the OmpiRTE (the run-time layer in Open MPI).
+This is not generally useful for most users.
+.
+.
+.
+.P
+There may be other options listed with \fIompi-submit --help\fP.
+.
+.
+.SS Environment Variables
+.
+.TP
+.B MPIEXEC_TIMEOUT
+The maximum number of seconds that
+.I ompi-submit
+.RI ( mpiexec )
+will run.  After this many seconds,
+.I ompi-submit
+will abort the launched job and exit.
+.
+.
+.\" **************************
+.\"    Description Section
+.\" **************************
+.SH DESCRIPTION
+.
+One invocation of \fIompi-submit\fP starts an MPI application running under Open
+MPI. If the application is single process multiple data (SPMD), the application
+can be specified on the \fIompi-submit\fP command line.
+
+If the application is multiple instruction multiple data (MIMD), comprising of
+multiple programs, the set of programs and argument can be specified in one of
+two ways: Extended Command Line Arguments, and Application Context.
+.PP
+An application context describes the MIMD program set including all arguments
+in a separate file.
+.\" See appcontext(5) for a description of the application context syntax.
+This file essentially contains multiple \fIompi-submit\fP command lines, less the
+command name itself.  The ability to specify different options for different
+instantiations of a program is another reason to use an application context.
+.PP
+Extended command line arguments allow for the description of the application
+layout on the command line using colons (\fI:\fP) to separate the specification
+of programs and arguments. Some options are globally set across all specified
+programs (e.g. --hostfile), while others are specific to a single program
+(e.g. -np).
+.
+.
+.
+.SS Specifying Host Nodes
+.
+Host nodes can be identified on the \fIompi-submit\fP command line with the \fI-host\fP
+option or in a hostfile.
+.
+.PP
+For example,
+.
+.TP 4
+ompi-submit -H aa,aa,bb ./a.out
+launches two processes on node aa and one on bb.
+.
+.PP
+Or, consider the hostfile
+.
+
+   \fB%\fP cat myhostfile
+   aa slots=2
+   bb slots=2
+   cc slots=2
+
+.
+.PP
+Since the DVM was started with \fIorte-dvm\fP, \fIorte-submit\fP
+will ignore any slots arguments in the hostfile. Values provided
+via hostfile to \fIorte-dvm\fP will control the behavior.
+.
+.PP
+.
+.TP 4
+ompi-submit -hostfile myhostfile ./a.out
+will launch two processes on each of the three nodes.
+.
+.TP 4
+ompi-submit -hostfile myhostfile -host aa ./a.out
+will launch two processes, both on node aa.
+.
+.TP 4
+ompi-submit -hostfile myhostfile -host dd ./a.out
+will find no hosts to run on and abort with an error.
+That is, the specified host dd is not in the specified hostfile.
+.
+.SS Specifying Number of Processes
+.
+As we have just seen, the number of processes to run can be set using the
+hostfile.  Other mechanisms exist.
+.
+.PP
+The number of processes launched can be specified as a multiple of the
+number of nodes or processor sockets available.  For example,
+.
+.TP 4
+ompi-submit -H aa,bb -npersocket 2 ./a.out
+launches processes 0-3 on node aa and process 4-7 on node bb,
+where aa and bb are both dual-socket nodes.
+The \fI-npersocket\fP option also turns on the \fI-bind-to-socket\fP option,
+which is discussed in a later section.
+.
+.TP 4
+ompi-submit -H aa,bb -npernode 2 ./a.out
+launches processes 0-1 on node aa and processes 2-3 on node bb.
+.
+.TP 4
+ompi-submit -H aa,bb -npernode 1 ./a.out
+launches one process per host node.
+.
+.TP 4
+ompi-submit -H aa,bb -pernode ./a.out
+is the same as \fI-npernode\fP 1.
+.
+.
+.PP
+Another alternative is to specify the number of processes with the
+\fI-np\fP option.  Consider now the hostfile
+.
+
+   \fB%\fP cat myhostfile
+   aa slots=4
+   bb slots=4
+   cc slots=4
+
+.
+.PP
+Now,
+.
+.TP 4
+ompi-submit -hostfile myhostfile -np 6 ./a.out
+will launch processes 0-3 on node aa and processes 4-5 on node bb.  The remaining
+slots in the hostfile will not be used since the \fI-np\fP option indicated
+that only 6 processes should be launched.
+.
+.SS Mapping Processes to Nodes:  Using Policies
+.
+The examples above illustrate the default mapping of process processes
+to nodes.  This mapping can also be controlled with various
+\fIompi-submit\fP options that describe mapping policies.
+.
+.
+.PP
+Consider the same hostfile as above, again with \fI-np\fP 6:
+.
+
+                          node aa      node bb      node cc
+
+  ompi-submit                  0 1 2 3      4 5
+
+  ompi-submit --map-by node    0 3          1 4          2 5
+
+  ompi-submit -nolocal                      0 1 2 3      4 5
+.
+.PP
+The \fI--map-by node\fP option will load balance the processes across
+the available nodes, numbering each process in a round-robin fashion.
+.
+.PP
+The \fI-nolocal\fP option prevents any processes from being mapped onto the
+local host (in this case node aa).  While \fIompi-submit\fP typically consumes
+few system resources, \fI-nolocal\fP can be helpful for launching very
+large jobs where \fIompi-submit\fP may actually need to use noticeable amounts
+of memory and/or processing time.
+.
+.PP
+Just as \fI-np\fP can specify fewer processes than there are slots, it can
+also oversubscribe the slots.  For example, with the same hostfile:
+.
+.TP 4
+ompi-submit -hostfile myhostfile -np 14 ./a.out
+will launch processes 0-3 on node aa, 4-7 on bb, and 8-11 on cc.  It will
+then add the remaining two processes to whichever nodes it chooses.
+.
+.PP
+One can also specify limits to oversubscription.  For example, with the same
+hostfile:
+.
+.TP 4
+ompi-submit -hostfile myhostfile -np 14 -nooversubscribe ./a.out
+will produce an error since \fI-nooversubscribe\fP prevents oversubscription.
+.
+.PP
+Limits to oversubscription can also be specified in the hostfile itself:
+.
+ % cat myhostfile
+ aa slots=4 max_slots=4
+ bb         max_slots=4
+ cc slots=4
+.
+.PP
+The \fImax_slots\fP field specifies such a limit.  When it does, the
+\fIslots\fP value defaults to the limit.  Now:
+.
+.TP 4
+ompi-submit -hostfile myhostfile -np 14 ./a.out
+causes the first 12 processes to be launched as before, but the remaining
+two processes will be forced onto node cc.  The other two nodes are
+protected by the hostfile against oversubscription by this job.
+.
+.PP
+Using the \fI--nooversubscribe\fR option can be helpful since Open MPI
+currently does not get "max_slots" values from the resource manager.
+.
+.PP
+Of course, \fI-np\fP can also be used with the \fI-H\fP or \fI-host\fP
+option.  For example,
+.
+.TP 4
+ompi-submit -H aa,bb -np 8 ./a.out
+launches 8 processes.  Since only two hosts are specified, after the first
+two processes are mapped, one to aa and one to bb, the remaining processes
+oversubscribe the specified hosts.
+.
+.PP
+And here is a MIMD example:
+.
+.TP 4
+ompi-submit -H aa -np 1 hostname : -H bb,cc -np 2 uptime
+will launch process 0 running \fIhostname\fP on node aa and processes 1 and 2
+each running \fIuptime\fP on nodes bb and cc, respectively.
+.
+.SS Mapping, Ranking, and Binding: Oh My!
+.
+Open MPI employs a three-phase procedure for assigning process locations and
+ranks:
+.
+.TP 10
+\fBmapping\fP
+Assigns a default location to each process
+.
+.TP 10
+\fBranking\fP
+Assigns an MPI_COMM_WORLD rank value to each process
+.
+.TP 10
+\fBbinding\fP
+Constrains each process to run on specific processors
+.
+.PP
+The \fImapping\fP step is used to assign a default location to each process
+based on the mapper being employed. Mapping by slot, node, and sequentially results
+in the assignment of the processes to the node level. In contrast, mapping by object, allows
+the mapper to assign the process to an actual object on each node.
+.
+.PP
+\fBNote:\fP the location assigned to the process is independent of where it will be bound - the
+assignment is used solely as input to the binding algorithm.
+.
+.PP
+The mapping of process processes to nodes can be defined not just
+with general policies but also, if necessary, using arbitrary mappings
+that cannot be described by a simple policy.  One can use the "sequential
+mapper," which reads the hostfile line by line, assigning processes
+to nodes in whatever order the hostfile specifies.  Use the
+\fI-mca rmaps seq\fP option.  For example, using the same hostfile
+as before:
+.
+.PP
+ompi-submit -hostfile myhostfile -mca rmaps seq ./a.out
+.
+.PP
+will launch three processes, one on each of nodes aa, bb, and cc, respectively.
+The slot counts don't matter;  one process is launched per line on
+whatever node is listed on the line.
+.
+.PP
+Another way to specify arbitrary mappings is with a rankfile, which
+gives you detailed control over process binding as well.  Rankfiles
+are discussed below.
+.
+.PP
+The second phase focuses on the \fIranking\fP of the process within
+the job's MPI_COMM_WORLD.  Open MPI
+separates this from the mapping procedure to allow more flexibility in the
+relative placement of MPI processes. This is best illustrated by considering the
+following two cases where we used the —map-by ppr:2:socket option:
+.
+.PP
+                          node aa       node bb
+
+    rank-by core         0 1 ! 2 3     4 5 ! 6 7
+
+   rank-by socket        0 2 ! 1 3     4 6 ! 5 7
+
+   rank-by socket:span   0 4 ! 1 5     2 6 ! 3 7
+.
+.PP
+Ranking by core and by slot provide the identical result - a simple
+progression of MPI_COMM_WORLD ranks across each node. Ranking by
+socket does a round-robin ranking within each node until all processes
+have been assigned an MCW rank, and then progresses to the next
+node. Adding the \fIspan\fP modifier to the ranking directive causes
+the ranking algorithm to treat the entire allocation as a single
+entity - thus, the MCW ranks are assigned across all sockets before
+circling back around to the beginning.
+.
+.PP
+The \fIbinding\fP phase actually binds each process to a given set of processors. This can
+improve performance if the operating system is placing processes
+suboptimally.  For example, it might oversubscribe some multi-core
+processor sockets, leaving other sockets idle;  this can lead
+processes to contend unnecessarily for common resources.  Or, it
+might spread processes out too widely;  this can be suboptimal if
+application performance is sensitive to interprocess communication
+costs.  Binding can also keep the operating system from migrating
+processes excessively, regardless of how optimally those processes
+were placed to begin with.
+.
+.PP
+The processors to be used for binding can be identified in terms of
+topological groupings - e.g., binding to an l3cache will bind each
+process to all processors within the scope of a single L3 cache within
+their assigned location. Thus, if a process is assigned by the mapper
+to a certain socket, then a \fI—bind-to l3cache\fP directive will
+cause the process to be bound to the processors that share a single L3
+cache within that socket.
+.
+.PP
+To help balance loads, the binding directive uses a round-robin method when binding to
+levels lower than used in the mapper. For example, consider the case where a job is
+mapped to the socket level, and then bound to core. Each socket will have multiple cores,
+so if multiple processes are mapped to a given socket, the binding algorithm will assign
+each process located to a socket to a unique core in a round-robin manner.
+.
+.PP
+Alternatively, processes mapped by l2cache and then bound to socket will simply be bound
+to all the processors in the socket where they are located. In this manner, users can
+exert detailed control over relative MCW rank location and binding.
+.
+.PP
+Finally, \fI--report-bindings\fP can be used to report bindings.
+.
+.PP
+As an example, consider a node with two processor sockets, each comprising
+four cores.  We run \fIompi-submit\fP with \fI-np 4 --report-bindings\fP and
+the following additional options:
+.
+
+ % ompi-submit ... --map-by core --bind-to core
+ [...] ... binding child [...,0] to cpus 0001
+ [...] ... binding child [...,1] to cpus 0002
+ [...] ... binding child [...,2] to cpus 0004
+ [...] ... binding child [...,3] to cpus 0008
+
+ % ompi-submit ... --map-by socket -0bind-to socket
+ [...] ... binding child [...,0] to socket 0 cpus 000f
+ [...] ... binding child [...,1] to socket 1 cpus 00f0
+ [...] ... binding child [...,2] to socket 0 cpus 000f
+ [...] ... binding child [...,3] to socket 1 cpus 00f0
+
+ % ompi-submit ... --map-by core:PE=2 -bind-to core
+ [...] ... binding child [...,0] to cpus 0003
+ [...] ... binding child [...,1] to cpus 000c
+ [...] ... binding child [...,2] to cpus 0030
+ [...] ... binding child [...,3] to cpus 00c0
+
+ % ompi-submit ... --bind-to none
+.
+.PP
+Here, \fI--report-bindings\fP shows the binding of each process as a mask.
+In the first case, the processes bind to successive cores as indicated by
+the masks 0001, 0002, 0004, and 0008.  In the second case, processes bind
+to all cores on successive sockets as indicated by the masks 000f and 00f0.
+The processes cycle through the processor sockets in a round-robin fashion
+as many times as are needed.  In the third case, the masks show us that
+2 cores have been bound per process.  In the fourth case, binding is
+turned off and no bindings are reported.
+.
+.PP
+Open MPI's support for process binding depends on the underlying
+operating system.  Therefore, certain process binding options may not be available
+on every system.
+.
+.PP
+Process binding can also be set with MCA parameters.
+Their usage is less convenient than that of \fIompi-submit\fP options.
+On the other hand, MCA parameters can be set not only on the \fIompi-submit\fP
+command line, but alternatively in a system or user mca-params.conf file
+or as environment variables, as described in the MCA section below.
+Some examples include:
+.
+.PP
+    ompi-submit option          MCA parameter key         value
+
+  --map-by core          rmaps_base_mapping_policy   core
+  --map-by socket        rmaps_base_mapping_policy   socket
+  --rank-by core         rmaps_base_ranking_policy   core
+  --bind-to core         hwloc_base_binding_policy   core
+  --bind-to socket       hwloc_base_binding_policy   socket
+  --bind-to none         hwloc_base_binding_policy   none
+.
+.
+.SS Rankfiles
+.
+Rankfiles are text files that specify detailed information about how
+individual processes should be mapped to nodes, and to which
+processor(s) they should be bound.  Each line of a rankfile specifies
+the location of one process (for MPI jobs, the process' "rank" refers
+to its rank in MPI_COMM_WORLD).  The general form of each line in the
+rankfile is:
+.
+
+    rank <N>=<hostname> slot=<slot list>
+.
+.PP
+For example:
+.
+
+    $ cat myrankfile
+    rank 0=aa slot=1:0-2
+    rank 1=bb slot=0:0,1
+    rank 2=cc slot=1-2
+    $ ompi-submit -H aa,bb,cc,dd -rf myrankfile ./a.out
+.
+.PP
+Means that
+.
+
+  Rank 0 runs on node aa, bound to logical socket 1, cores 0-2.
+  Rank 1 runs on node bb, bound to logical socket 0, cores 0 and 1.
+  Rank 2 runs on node cc, bound to logical cores 1 and 2.
+.
+.PP
+Rankfiles can alternatively be used to specify \fIphysical\fP processor
+locations. In this case, the syntax is somewhat different. Sockets are
+no longer recognized, and the slot number given must be the number of
+the physical PU as most OS's do not assign a unique physical identifier
+to each core in the node. Thus, a proper physical rankfile looks something
+like the following:
+.
+
+    $ cat myphysicalrankfile
+    rank 0=aa slot=1
+    rank 1=bb slot=8
+    rank 2=cc slot=6
+.
+.PP
+This means that
+.
+
+  Rank 0 will run on node aa, bound to the core that contains physical PU 1
+  Rank 1 will run on node bb, bound to the core that contains physical PU 8
+  Rank 2 will run on node cc, bound to the core that contains physical PU 6
+.
+.PP
+Rankfiles are treated as \fIlogical\fP by default, and the MCA parameter
+rmaps_rank_file_physical must be set to 1 to indicate that the rankfile
+is to be considered as \fIphysical\fP.
+.
+.PP
+The hostnames listed above are "absolute," meaning that actual
+resolveable hostnames are specified.  However, hostnames can also be
+specified as "relative," meaning that they are specified in relation
+to an externally-specified list of hostnames (e.g., by ompi-submit's --host
+argument, a hostfile, or a job scheduler).
+.
+.PP
+The "relative" specification is of the form "+n<X>", where X is an
+integer specifying the Xth hostname in the set of all available
+hostnames, indexed from 0.  For example:
+.
+
+    $ cat myrankfile
+    rank 0=+n0 slot=1:0-2
+    rank 1=+n1 slot=0:0,1
+    rank 2=+n2 slot=1-2
+    $ ompi-submit -H aa,bb,cc,dd -rf myrankfile ./a.out
+.
+.PP
+Starting with Open MPI v1.7, all socket/core slot locations are be
+specified as
+.I logical
+indexes (the Open MPI v1.6 series used
+.I physical
+indexes).  You can use tools such as HWLOC's "lstopo" to find the
+logical indexes of socket and cores.
+.
+.
+.SS Application Context or Executable Program?
+.
+To distinguish the two different forms, \fIompi-submit\fP
+looks on the command line for \fI--app\fP option.  If
+it is specified, then the file named on the command line is
+assumed to be an application context.  If it is not
+specified, then the file is assumed to be an executable program.
+.
+.
+.
+.SS Locating Files
+.
+If no relative or absolute path is specified for a file, Open
+MPI will first look for files by searching the directories specified
+by the \fI--path\fP option.  If there is no \fI--path\fP option set or
+if the file is not found at the \fI--path\fP location, then Open MPI
+will search the user's PATH environment variable as defined on the
+source node(s).
+.PP
+If a relative directory is specified, it must be relative to the initial
+working directory determined by the specific starter used. For example when
+using the rsh or ssh starters, the initial directory is $HOME by default. Other
+starters may set the initial directory to the current working directory from
+the invocation of \fIompi-submit\fP.
+.
+.
+.
+.SS Current Working Directory
+.
+The \fI\-wdir\fP ompi-submit option (and its synonym, \fI\-wd\fP) allows
+the user to change to an arbitrary directory before the program is
+invoked.  It can also be used in application context files to specify
+working directories on specific nodes and/or for specific
+applications.
+.PP
+If the \fI\-wdir\fP option appears both in a context file and on the
+command line, the context file directory will override the command
+line value.
+.PP
+If the \fI-wdir\fP option is specified, Open MPI will attempt to
+change to the specified directory on all of the remote nodes. If this
+fails, \fIompi-submit\fP will abort.
+.PP
+If the \fI-wdir\fP option is \fBnot\fP specified, Open MPI will send
+the directory name where \fIompi-submit\fP was invoked to each of the
+remote nodes. The remote nodes will try to change to that
+directory. If they are unable (e.g., if the directory does not exist on
+that node), then Open MPI will use the default directory determined by
+the starter.
+.PP
+All directory changing occurs before the user's program is invoked; it
+does not wait until \fIMPI_INIT\fP is called.
+.
+.
+.
+.SS Standard I/O
+.
+Open MPI directs UNIX standard input to /dev/null on all processes
+except the MPI_COMM_WORLD rank 0 process. The MPI_COMM_WORLD rank 0 process
+inherits standard input from \fIompi-submit\fP.
+.B Note:
+The node that invoked \fIompi-submit\fP need not be the same as the node where the
+MPI_COMM_WORLD rank 0 process resides. Open MPI handles the redirection of
+\fIompi-submit\fP's standard input to the rank 0 process.
+.PP
+Open MPI directs UNIX standard output and error from remote nodes to the node
+that invoked \fIompi-submit\fP and prints it on the standard output/error of
+\fIompi-submit\fP.
+Local processes inherit the standard output/error of \fIompi-submit\fP and transfer
+to it directly.
+.PP
+Thus it is possible to redirect standard I/O for Open MPI applications by
+using the typical shell redirection procedure on \fIompi-submit\fP.
+
+      \fB%\fP ompi-submit -np 2 my_app < my_input > my_output
+
+Note that in this example \fIonly\fP the MPI_COMM_WORLD rank 0 process will
+receive the stream from \fImy_input\fP on stdin.  The stdin on all the other
+nodes will be tied to /dev/null.  However, the stdout from all nodes will
+be collected into the \fImy_output\fP file.
+.
+.
+.
+.SS Signal Propagation
+.
+When orte-submit receives a SIGTERM and SIGINT, it will attempt to kill
+the entire job by sending all processes in the job a SIGTERM, waiting
+a small number of seconds, then sending all processes in the job a
+SIGKILL.
+.
+.PP
+SIGUSR1 and SIGUSR2 signals received by orte-submit are propagated to
+all processes in the job.
+.
+.PP
+One can turn on forwarding of SIGSTOP and SIGCONT to the program executed
+by ompi-submit by setting the MCA parameter orte_forward_job_control to 1.
+A SIGTSTOP signal to ompi-submit will then cause a SIGSTOP signal to be sent
+to all of the programs started by ompi-submit and likewise a SIGCONT signal
+to ompi-submit will cause a SIGCONT sent.
+.
+.PP
+Other signals are not currently propagated
+by orte-submit.
+.
+.
+.SS Process Termination / Signal Handling
+.
+During the run of an MPI application, if any process dies abnormally
+(either exiting before invoking \fIMPI_FINALIZE\fP, or dying as the result of a
+signal), \fIompi-submit\fP will print out an error message and kill the rest of the
+MPI application.
+.PP
+User signal handlers should probably avoid trying to cleanup MPI state
+(Open MPI is currently not async-signal-safe; see MPI_Init_thread(3)
+for details about
+.I MPI_THREAD_MULTIPLE
+and thread safety).  For example, if a segmentation fault occurs in
+\fIMPI_SEND\fP (perhaps because a bad buffer was passed in) and a user
+signal handler is invoked, if this user handler attempts to invoke
+\fIMPI_FINALIZE\fP, Bad Things could happen since Open MPI was already
+"in" MPI when the error occurred.  Since \fIompi-submit\fP will notice that
+the process died due to a signal, it is probably not necessary (and
+safest) for the user to only clean up non-MPI state.
+.
+.
+.
+.SS Process Environment
+.
+Processes in the MPI application inherit their environment from the
+Open RTE daemon upon the node on which they are running.  The
+environment is typically inherited from the user's shell.  On remote
+nodes, the exact environment is determined by the boot MCA module
+used.  The \fIrsh\fR launch module, for example, uses either
+\fIrsh\fR/\fIssh\fR to launch the Open RTE daemon on remote nodes, and
+typically executes one or more of the user's shell-setup files before
+launching the Open RTE daemon.  When running dynamically linked
+applications which require the \fILD_LIBRARY_PATH\fR environment
+variable to be set, care must be taken to ensure that it is correctly
+set when booting Open MPI.
+.PP
+See the "Remote Execution" section for more details.
+.
+.
+.SS Remote Execution
+.
+Open MPI requires that the \fIPATH\fR environment variable be set to
+find executables on remote nodes (this is typically only necessary in
+\fIrsh\fR- or \fIssh\fR-based environments -- batch/scheduled
+environments typically copy the current environment to the execution
+of remote jobs, so if the current environment has \fIPATH\fR and/or
+\fILD_LIBRARY_PATH\fR set properly, the remote nodes will also have it
+set properly).  If Open MPI was compiled with shared library support,
+it may also be necessary to have the \fILD_LIBRARY_PATH\fR environment
+variable set on remote nodes as well (especially to find the shared
+libraries required to run user MPI applications).
+.PP
+However, it is not always desirable or possible to edit shell
+startup files to set \fIPATH\fR and/or \fILD_LIBRARY_PATH\fR.  The
+\fI--prefix\fR option is provided for some simple configurations where
+this is not possible.
+.PP
+The \fI--prefix\fR option takes a single argument: the base directory
+on the remote node where Open MPI is installed.  Open MPI will use
+this directory to set the remote \fIPATH\fR and \fILD_LIBRARY_PATH\fR
+before executing any Open MPI or user applications.  This allows
+running Open MPI jobs without having pre-configured the \fIPATH\fR and
+\fILD_LIBRARY_PATH\fR on the remote nodes.
+.PP
+Open MPI adds the basename of the current
+node's "bindir" (the directory where Open MPI's executables are
+installed) to the prefix and uses that to set the \fIPATH\fR on the
+remote node.  Similarly, Open MPI adds the basename of the current
+node's "libdir" (the directory where Open MPI's libraries are
+installed) to the prefix and uses that to set the
+\fILD_LIBRARY_PATH\fR on the remote node.  For example:
+.TP 15
+Local bindir:
+/local/node/directory/bin
+.TP
+Local libdir:
+/local/node/directory/lib64
+.PP
+If the following command line is used:
+
+    \fB%\fP ompi-submit --prefix /remote/node/directory
+
+Open MPI will add "/remote/node/directory/bin" to the \fIPATH\fR
+and "/remote/node/directory/lib64" to the \fLD_LIBRARY_PATH\fR on the
+remote node before attempting to execute anything.
+.PP
+The \fI--prefix\fR option is not sufficient if the installation paths
+on the remote node are different than the local node (e.g., if "/lib"
+is used on the local node, but "/lib64" is used on the remote node),
+or if the installation paths are something other than a subdirectory
+under a common prefix.
+.PP
+Note that executing \fIompi-submit\fR via an absolute pathname is
+equivalent to specifying \fI--prefix\fR without the last subdirectory
+in the absolute pathname to \fIompi-submit\fR.  For example:
+
+    \fB%\fP /usr/local/bin/ompi-submit ...
+
+is equivalent to
+
+    \fB%\fP ompi-submit --prefix /usr/local
+.
+.
+.
+.SS Exported Environment Variables
+.
+All environment variables that are named in the form OMPI_* will automatically
+be exported to new processes on the local and remote nodes. Environmental
+parameters can also be set/forwarded to the new processes using the MCA
+parameter \fImca_base_env_list\fP. The \fI\-x\fP option to \fIompi-submit\fP has
+been deprecated, but the syntax of the MCA param follows that prior
+example. While the syntax of the \fI\-x\fP option and MCA param
+allows the definition of new variables, note that the parser
+for these options are currently not very sophisticated - it does not even
+understand quoted values.  Users are advised to set variables in the
+environment and use the option to export them; not to define them.
+.
+.
+.
+.SS Setting MCA Parameters
+.
+The \fI-mca\fP switch allows the passing of parameters to various MCA
+(Modular Component Architecture) modules.
+.\" Open MPI's MCA modules are described in detail in ompimca(7).
+MCA modules have direct impact on MPI programs because they allow tunable
+parameters to be set at run time (such as which BTL communication device driver
+to use, what parameters to pass to that BTL, etc.).
+.PP
+The \fI-mca\fP switch takes two arguments: \fI<key>\fP and \fI<value>\fP.
+The \fI<key>\fP argument generally specifies which MCA module will receive the value.
+For example, the \fI<key>\fP "btl" is used to select which BTL to be used for
+transporting MPI messages.  The \fI<value>\fP argument is the value that is
+passed.
+For example:
+.
+.TP 4
+ompi-submit -mca btl tcp,self -np 1 foo
+Tells Open MPI to use the "tcp" and "self" BTLs, and to run a single copy of
+"foo" an allocated node.
+.
+.TP
+ompi-submit -mca btl self -np 1 foo
+Tells Open MPI to use the "self" BTL, and to run a single copy of "foo" an
+allocated node.
+.\" And so on.  Open MPI's BTL MCA modules are described in ompimca_btl(7).
+.PP
+The \fI-mca\fP switch can be used multiple times to specify different
+\fI<key>\fP and/or \fI<value>\fP arguments.  If the same \fI<key>\fP is
+specified more than once, the \fI<value>\fPs are concatenated with a comma
+(",") separating them.
+.PP
+Note that the \fI-mca\fP switch is simply a shortcut for setting environment variables.
+The same effect may be accomplished by setting corresponding environment
+variables before running \fIompi-submit\fP.
+The form of the environment variables that Open MPI sets is:
+
+      OMPI_MCA_<key>=<value>
+.PP
+Thus, the \fI-mca\fP switch overrides any previously set environment
+variables.  The \fI-mca\fP settings similarly override MCA parameters set
+in the
+$OPAL_PREFIX/etc/openmpi-mca-params.conf or $HOME/.openmpi/mca-params.conf
+file.
+.
+.PP
+Unknown \fI<key>\fP arguments are still set as
+environment variable -- they are not checked (by \fIompi-submit\fP) for correctness.
+Illegal or incorrect \fI<value>\fP arguments may or may not be reported -- it
+depends on the specific MCA module.
+.PP
+To find the available component types under the MCA architecture, or to find the
+available parameters for a specific component, use the \fIompi_info\fP command.
+See the \fIompi_info(1)\fP man page for detailed information on the command.
+.
+.SS Running as root
+.
+The Open MPI team strongly advises against executing
+.I ompi-submit
+as the root user.  MPI applications should be run as regular
+(non-root) users.
+.
+.PP
+Reflecting this advice, ompi-submit will refuse to run as root by default.
+To override this default, you can add the
+.I --allow-run-as-root
+option to the
+.I ompi-submit
+command line.
+.
+.SS Exit status
+.
+There is no standard definition for what \fIompi-submit\fP should return as an exit
+status. After considerable discussion, we settled on the following method for
+assigning the \fIompi-submit\fP exit status (note: in the following description,
+the "primary" job is the initial application started by ompi-submit - all jobs that
+are spawned by that job are designated "secondary" jobs):
+.
+.IP \[bu] 2
+if all processes in the primary job normally terminate with exit status 0, we return 0
+.IP \[bu]
+if one or more processes in the primary job normally terminate with non-zero exit status,
+we return the exit status of the process with the lowest MPI_COMM_WORLD rank to have a non-zero status
+.IP \[bu]
+if all processes in the primary job normally terminate with exit status 0, and one or more
+processes in a secondary job normally terminate with non-zero exit status, we (a) return
+the exit status of the process with the lowest MPI_COMM_WORLD rank in the lowest jobid to have a non-zero status, and (b)
+output a message summarizing the exit status of the primary and all secondary jobs.
+.IP \[bu]
+if the cmd line option --report-child-jobs-separately is set, we will return -only- the
+exit status of the primary job. Any non-zero exit status in secondary jobs will be
+reported solely in a summary print statement.
+.
+.PP
+By default, OMPI records and notes that MPI processes exited with non-zero termination status.
+This is generally not considered an "abnormal termination" - i.e., OMPI will not abort an MPI
+job if one or more processes return a non-zero status. Instead, the default behavior simply
+reports the number of processes terminating with non-zero status upon completion of the job.
+.PP
+However, in some cases it can be desirable to have the job abort when any process terminates
+with non-zero status. For example, a non-MPI job might detect a bad result from a calculation
+and want to abort, but doesn't want to generate a core file. Or an MPI job might continue past
+a call to MPI_Finalize, but indicate that all processes should abort due to some post-MPI result.
+.PP
+It is not anticipated that this situation will occur frequently. However, in the interest of
+serving the broader community, OMPI now has a means for allowing users to direct that jobs be
+aborted upon any process exiting with non-zero status. Setting the MCA parameter
+"orte_abort_on_non_zero_status" to 1 will cause OMPI to abort all processes once any process
+ exits with non-zero status.
+.PP
+Terminations caused in this manner will be reported on the console as an "abnormal termination",
+with the first process to so exit identified along with its exit status.
+.PP
+.
+.\" **************************
+.\"    Examples Section
+.\" **************************
+.SH EXAMPLES
+Be sure also to see the examples throughout the sections above.
+.
+.TP 4
+ompi-submit -np 4 -mca btl ib,tcp,self prog1
+Run 4 copies of prog1 using the "ib", "tcp", and "self" BTL's for the
+transport of MPI messages.
+.
+.
+.TP 4
+ompi-submit -np 4 -mca btl tcp,sm,self
+.br
+--mca btl_tcp_if_include eth0 prog1
+.br
+Run 4 copies of prog1 using the "tcp", "sm" and "self" BTLs for the
+transport of MPI messages, with TCP using only the eth0 interface to
+communicate.  Note that other BTLs have similar if_include MCA
+parameters.
+.
+.\" **************************
+.\"    Diagnostics Section
+.\" **************************
+.
+.\" .SH DIAGNOSTICS
+.\" .TP 4
+.\" Error Msg:
+.\" Description
+.
+.\" **************************
+.\"    Return Value Section
+.\" **************************
+.
+.SH RETURN VALUE
+.
+\fIompi-submit\fP returns 0 if all processes started by \fIompi-submit\fP exit after calling
+MPI_FINALIZE.  A non-zero value is returned if an internal error occurred in
+ompi-submit, or one or more processes exited before calling MPI_FINALIZE.  If an
+internal error occurred in ompi-submit, the corresponding error code is returned.
+In the event that one or more processes exit before calling MPI_FINALIZE, the
+return value of the MPI_COMM_WORLD rank of the process that \fIompi-submit\fP first notices died
+before calling MPI_FINALIZE will be returned.  Note that, in general, this will
+be the first process that died but is not guaranteed to be so.
+.
+.\" **************************
+.\"    See Also Section
+.\" **************************
+.
+.SH SEE ALSO
+MPI_Init_thread(3)
diff --git a/orte/tools/orte-submit/orte-submit.c b/orte/tools/orte-submit/orte-submit.c
new file mode 100644
index 0000000000..5f4f055e60
--- /dev/null
+++ b/orte/tools/orte-submit/orte-submit.c
@@ -0,0 +1,1468 @@
+/* -*- C -*-
+ *
+ * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2008 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2006-2014 Cisco Systems, Inc.  All rights reserved.
+ * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2007-2013 Los Alamos National Security, LLC.  All rights
+ *                         reserved. 
+ * Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "orte_config.h"
+#include "orte/constants.h"
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+#include <stdio.h>
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif  /* HAVE_STDLIB_H */
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif  /* HAVE_STRINGS_H */
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#include <errno.h>
+#include <signal.h>
+#include <ctype.h>
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif  /* HAVE_SYS_TYPES_H */
+#ifdef HAVE_SYS_WAIT_H
+#include <sys/wait.h>
+#endif  /* HAVE_SYS_WAIT_H */
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif  /* HAVE_SYS_TIME_H */
+#include <fcntl.h>
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+
+#include "opal/dss/dss.h"
+#include "opal/mca/event/event.h"
+#include "opal/mca/installdirs/installdirs.h"
+#include "opal/mca/hwloc/base/base.h"
+#include "opal/mca/base/base.h"
+#include "opal/util/argv.h"
+#include "opal/util/output.h"
+#include "opal/util/basename.h"
+#include "opal/util/cmd_line.h"
+#include "opal/util/opal_environ.h"
+#include "opal/util/opal_getcwd.h"
+#include "opal/util/show_help.h"
+#include "opal/util/fd.h"
+#include "opal/sys/atomic.h"
+#if OPAL_ENABLE_FT_CR == 1
+#include "opal/runtime/opal_cr.h"
+#endif
+
+#include "opal/version.h"
+#include "opal/runtime/opal.h"
+#include "opal/util/os_path.h"
+#include "opal/util/path.h"
+#include "opal/class/opal_pointer_array.h"
+#include "opal/dss/dss.h"
+
+#include "orte/mca/odls/odls_types.h"
+#include "orte/mca/plm/plm.h"
+#include "orte/mca/schizo/schizo.h"
+#include "orte/mca/errmgr/errmgr.h"
+#include "orte/mca/rml/rml.h"
+#include "orte/mca/rml/base/rml_contact.h"
+#include "orte/mca/routed/routed.h"
+
+#include "orte/runtime/runtime.h"
+#include "orte/runtime/orte_globals.h"
+#include "orte/runtime/orte_wait.h"
+#include "orte/runtime/orte_quit.h"
+#include "orte/util/show_help.h"
+
+/* local functions */
+static void orte_timeout_wakeup(int sd, short args, void *cbdata);
+static void local_recv(int status, orte_process_name_t* sender,
+                       opal_buffer_t *buffer,
+                       orte_rml_tag_t tag, void *cbdata);
+
+/*
+ * Globals
+ */
+static char **global_mca_env = NULL;
+static orte_std_cntr_t total_num_apps = 0;
+static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT;
+
+/*
+ * Globals
+ */
+static struct {
+    bool help;
+    bool version;
+    char *report_pid;
+    char *stdin_target;
+    bool index_argv;
+    bool preload_binaries;
+    char *preload_files;
+    char *appfile;
+    int num_procs;
+    char *hnp;
+    char *wdir;
+    bool set_cwd_to_session_dir;
+    char *path;
+    bool enable_recovery;
+    char *personality;
+    char *basename;
+    char *prefix;
+    bool terminate;
+} myglobals;
+
+static opal_cmd_line_init_t cmd_line_init[] = {
+    /* Various "obvious" options */
+    { NULL, 'h', NULL, "help", 0,
+      &myglobals.help, OPAL_CMD_LINE_TYPE_BOOL,
+      "This help message" },
+    { NULL, 'V', NULL, "version", 0,
+      &myglobals.version, OPAL_CMD_LINE_TYPE_BOOL,
+      "Print version and exit" },
+
+    { NULL, '\0', "report-pid", "report-pid", 1,
+      &myglobals.report_pid, OPAL_CMD_LINE_TYPE_STRING,
+      "Printout pid on stdout [-], stderr [+], or a file [anything else]" },
+    
+    /* select stdin option */
+    { NULL, '\0', "stdin", "stdin", 1,
+      &myglobals.stdin_target, OPAL_CMD_LINE_TYPE_STRING,
+      "Specify procs to receive stdin [rank, all, none] (default: 0, indicating rank 0)" },
+    
+    /* request that argv[0] be indexed */
+    { NULL, '\0', "index-argv-by-rank", "index-argv-by-rank", 0,
+      &myglobals.index_argv, OPAL_CMD_LINE_TYPE_BOOL,
+      "Uniquely index argv[0] for each process using its rank" },
+
+    /* Preload the binary on the remote machine */
+    { NULL, 's', NULL, "preload-binary", 0,
+      &myglobals.preload_binaries, OPAL_CMD_LINE_TYPE_BOOL,
+      "Preload the binary on the remote machine before starting the remote process." },
+
+    /* Preload files on the remote machine */
+    { NULL, '\0', NULL, "preload-files", 1,
+      &myglobals.preload_files, OPAL_CMD_LINE_TYPE_STRING,
+      "Preload the comma separated list of files to the remote machines current working directory before starting the remote process." },
+
+    /* Use an appfile */
+    { NULL, '\0', NULL, "app", 1,
+      &myglobals.appfile, OPAL_CMD_LINE_TYPE_STRING,
+      "Provide an appfile; ignore all other command line options" },
+
+    /* Number of processes; -c, -n, --n, -np, and --np are all
+       synonyms */
+    { NULL, 'c', "np", "np", 1,
+      &myglobals.num_procs, OPAL_CMD_LINE_TYPE_INT,
+      "Number of processes to run" },
+    { NULL, '\0', "n", "n", 1,
+      &myglobals.num_procs, OPAL_CMD_LINE_TYPE_INT,
+      "Number of processes to run" },
+    
+    /* uri of Open MPI HNP, or at least where to get it */
+    { NULL, '\0', "hnp", "hnp", 1,
+      &myglobals.hnp, OPAL_CMD_LINE_TYPE_STRING,
+      "Specify the URI of the Open MPI server, or the name of the file (specified as file:filename) that contains that info" },
+    
+    /* uri of Open MPI HNP, or at least where to get it */
+    { NULL, '\0', "terminate", "terminate", 0,
+      &myglobals.terminate, OPAL_CMD_LINE_TYPE_BOOL,
+      "Terminate the DVM" },
+    
+
+    /* Export environment variables; potentially used multiple times,
+       so it does not make sense to set into a variable */
+    { NULL, 'x', NULL, NULL, 1,
+      NULL, OPAL_CMD_LINE_TYPE_NULL,
+      "Export an environment variable, optionally specifying a value (e.g., \"-x foo\" exports the environment variable foo and takes its value from the current environment; \"-x foo=bar\" exports the environment variable name foo and sets its value to \"bar\" in the started processes)" },
+
+      /* Mapping controls */
+    { "rmaps_base_display_map", '\0', "display-map", "display-map", 0,
+      NULL, OPAL_CMD_LINE_TYPE_BOOL,
+      "Display the process map just before launch"},
+    { "rmaps_base_display_devel_map", '\0', "display-devel-map", "display-devel-map", 0,
+       NULL, OPAL_CMD_LINE_TYPE_BOOL,
+       "Display a detailed process map (mostly intended for developers) just before launch"},
+    { "rmaps_base_display_topo_with_map", '\0', "display-topo", "display-topo", 0,
+       NULL, OPAL_CMD_LINE_TYPE_BOOL,
+       "Display the topology as part of the process map (mostly intended for developers) just before launch"},
+    { "rmaps_base_display_diffable_map", '\0', "display-diffable-map", "display-diffable-map", 0,
+       NULL, OPAL_CMD_LINE_TYPE_BOOL,
+       "Display a diffable process map (mostly intended for developers) just before launch"},
+    { NULL, 'H', "host", "host", 1,
+      NULL, OPAL_CMD_LINE_TYPE_STRING,
+      "List of hosts to invoke processes on" },
+    { "rmaps_base_no_schedule_local", '\0', "nolocal", "nolocal", 0,
+      NULL, OPAL_CMD_LINE_TYPE_BOOL,
+      "Do not run any MPI applications on the local node" },
+    { "rmaps_base_no_oversubscribe", '\0', "nooversubscribe", "nooversubscribe", 0,
+      NULL, OPAL_CMD_LINE_TYPE_BOOL,
+      "Nodes are not to be oversubscribed, even if the system supports such operation"},
+    { "rmaps_base_oversubscribe", '\0', "oversubscribe", "oversubscribe", 0,
+      NULL, OPAL_CMD_LINE_TYPE_BOOL,
+      "Nodes are allowed to be oversubscribed, even on a managed system, and overloading of processing elements"},
+    { "rmaps_base_cpus_per_rank", '\0', "cpus-per-proc", "cpus-per-proc", 1,
+      NULL, OPAL_CMD_LINE_TYPE_INT,
+      "Number of cpus to use for each process [default=1]" },
+    { "rmaps_base_cpus_per_rank", '\0', "cpus-per-rank", "cpus-per-rank", 1,
+      NULL, OPAL_CMD_LINE_TYPE_INT,
+      "Synonym for cpus-per-proc" },
+
+    /* Nperxxx options that do not require topology and are always
+     * available - included for backwards compatibility
+     */
+    { "rmaps_ppr_pernode", '\0', "pernode", "pernode", 0,
+      NULL, OPAL_CMD_LINE_TYPE_BOOL,
+      "Launch one process per available node" },
+    { "rmaps_ppr_n_pernode", '\0', "npernode", "npernode", 1,
+        NULL, OPAL_CMD_LINE_TYPE_INT,
+        "Launch n processes per node on all allocated nodes" },
+    { "rmaps_ppr_n_pernode", '\0', "N", NULL, 1,
+        NULL, OPAL_CMD_LINE_TYPE_INT,
+        "Launch n processes per node on all allocated nodes (synonym for npernode)" },
+
+#if OPAL_HAVE_HWLOC
+    /* declare hardware threads as independent cpus */
+    { "hwloc_base_use_hwthreads_as_cpus", '\0', "use-hwthread-cpus", "use-hwthread-cpus", 0,
+      NULL, OPAL_CMD_LINE_TYPE_BOOL,
+      "Use hardware threads as independent cpus" },
+
+    /* include npersocket for backwards compatibility */
+    { "rmaps_ppr_n_persocket", '\0', "npersocket", "npersocket", 1,
+      NULL, OPAL_CMD_LINE_TYPE_INT,
+      "Launch n processes per socket on all allocated nodes" },
+
+    /* Mapping options */
+    { "rmaps_base_mapping_policy", '\0', NULL, "map-by", 1,
+      NULL, OPAL_CMD_LINE_TYPE_STRING,
+      "Mapping Policy [slot | hwthread | core | socket (default) | numa | board | node]" },
+
+      /* Ranking options */
+    { "rmaps_base_ranking_policy", '\0', NULL, "rank-by", 1,
+      NULL, OPAL_CMD_LINE_TYPE_STRING,
+      "Ranking Policy [slot (default) | hwthread | core | socket | numa | board | node]" },
+
+      /* Binding options */
+    { "hwloc_base_binding_policy", '\0', NULL, "bind-to", 1,
+      NULL, OPAL_CMD_LINE_TYPE_STRING,
+      "Policy for binding processes. Allowed values: none, hwthread, core, l1cache, l2cache, l3cache, socket, numa, board (\"none\" is the default when oversubscribed, \"core\" is the default when np<=2, and \"socket\" is the default when np>2). Allowed qualifiers: overload-allowed, if-supported" },
+
+    { "hwloc_base_report_bindings", '\0', "report-bindings", "report-bindings", 0,
+      NULL, OPAL_CMD_LINE_TYPE_BOOL,
+      "Whether to report process bindings to stderr" },
+
+    /* slot list option */
+    { "hwloc_base_slot_list", '\0', "slot-list", "slot-list", 1,
+      NULL, OPAL_CMD_LINE_TYPE_STRING,
+      "List of processor IDs to bind processes to [default=NULL]"},
+
+#else
+    /* Mapping options */
+    { "rmaps_base_mapping_policy", '\0', NULL, "map-by", 1,
+      NULL, OPAL_CMD_LINE_TYPE_STRING,
+      "Mapping Policy [slot (default) | node]" },
+
+      /* Ranking options */
+    { "rmaps_base_ranking_policy", '\0', NULL, "rank-by", 1,
+      NULL, OPAL_CMD_LINE_TYPE_STRING,
+      "Ranking Policy [slot (default) | node]" },
+#endif
+
+    /* mpiexec-like arguments */
+    { NULL, '\0', "wdir", "wdir", 1,
+      &myglobals.wdir, OPAL_CMD_LINE_TYPE_STRING,
+      "Set the working directory of the started processes" },
+    { NULL, '\0', "wd", "wd", 1,
+      &myglobals.wdir, OPAL_CMD_LINE_TYPE_STRING,
+      "Synonym for --wdir" },
+    { NULL, '\0', "set-cwd-to-session-dir", "set-cwd-to-session-dir", 0,
+      &myglobals.set_cwd_to_session_dir, OPAL_CMD_LINE_TYPE_BOOL,
+      "Set the working directory of the started processes to their session directory" },
+    { NULL, '\0', "path", "path", 1,
+      &myglobals.path, OPAL_CMD_LINE_TYPE_STRING,
+      "PATH to be used to look for executables to start processes" },
+
+    { NULL, '\0', "enable-recovery", "enable-recovery", 0,
+      &myglobals.enable_recovery, OPAL_CMD_LINE_TYPE_BOOL,
+      "Enable recovery (resets all recovery options to on)" },
+
+    { NULL, '\0', "personality", "personality", 1,
+      &myglobals.personality, OPAL_CMD_LINE_TYPE_STRING,
+      "Programming model/language being used (default=\"ompi\")" },
+
+    /* End of list */
+    { NULL, '\0', NULL, NULL, 0,
+      NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
+};
+
+/*
+ * Local functions
+ */
+static int create_app(int argc, char* argv[],
+                      orte_job_t *jdata,
+                      orte_app_context_t **app,
+                      bool *made_app, char ***app_env);
+static int init_globals(void);
+static int parse_globals(int argc, char* argv[], opal_cmd_line_t *cmd_line);
+static int parse_locals(orte_job_t *jdata, int argc, char* argv[]);
+static void set_classpath_jar_file(orte_app_context_t *app, int index, char *jarfile);
+static int parse_appfile(orte_job_t *jdata, char *filename, char ***env);
+
+
+int main(int argc, char *argv[])
+{
+    int rc;
+    opal_cmd_line_t cmd_line;
+    char *param;
+    orte_job_t *jdata=NULL;
+    char *hnpenv;
+    
+    /* Setup and parse the command line */
+    memset(&myglobals, 0, sizeof(myglobals));
+    /* find our basename (the name of the executable) so that we can
+       use it in pretty-print error messages */
+    myglobals.basename = opal_basename(argv[0]);
+
+    
+    opal_cmd_line_create(&cmd_line, cmd_line_init);
+    mca_base_cmd_line_setup(&cmd_line);
+    if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(&cmd_line, true,
+                                                  argc, argv)) ) {
+        if (OPAL_ERR_SILENT != rc) {
+            fprintf(stderr, "%s: command line error (%s)\n", argv[0],
+                    opal_strerror(rc));
+        }
+        return rc;
+    }
+
+    /*
+     * Since this process can now handle MCA/GMCA parameters, make sure to
+     * process them.
+     */
+    if (OPAL_SUCCESS != mca_base_cmd_line_process_args(&cmd_line, &environ, &environ)) {
+        exit(1);
+    }
+    
+    /* Ensure that enough of OPAL is setup for us to be able to run */
+    /*
+     * NOTE: (JJH)
+     *  We need to allow 'mca_base_cmd_line_process_args()' to process command
+     *  line arguments *before* calling opal_init_util() since the command
+     *  line could contain MCA parameters that affect the way opal_init_util()
+     *  functions. AMCA parameters are one such option normally received on the
+     *  command line that affect the way opal_init_util() behaves.
+     *  It is "safe" to call mca_base_cmd_line_process_args() before 
+     *  opal_init_util() since mca_base_cmd_line_process_args() does *not*
+     *  depend upon opal_init_util() functionality.
+     */
+    /* Need to initialize OPAL so that install_dirs are filled in */
+    if (OPAL_SUCCESS != opal_init(&argc, &argv)) {
+        exit(1);
+    }
+    
+    /* Check for some "global" command line params */
+    parse_globals(argc, argv, &cmd_line);
+
+    /* if they didn't point us at an HNP, that's an error */
+    if (NULL == myglobals.hnp) {
+        fprintf(stderr, "orte-submit: required option --hnp not provided\n");
+        exit(1);
+    }
+    OBJ_DESTRUCT(&cmd_line);
+
+    if (0 == strncmp(myglobals.hnp, "file", strlen("file")) ||
+        0 == strncmp(myglobals.hnp, "FILE", strlen("FILE"))) {
+        char input[1024], *filename;
+        FILE *fp;
+            
+        /* it is a file - get the filename */
+        filename = strchr(myglobals.hnp, ':');
+        if (NULL == filename) {
+            /* filename is not correctly formatted */
+            orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "uri", myglobals.hnp);
+            exit(1);
+        }
+        ++filename; /* space past the : */
+            
+        if (0 >= strlen(filename)) {
+            /* they forgot to give us the name! */
+            orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "uri", myglobals.hnp);
+            exit(1);
+        }
+            
+        /* open the file and extract the uri */
+        fp = fopen(filename, "r");
+        if (NULL == fp) { /* can't find or read file! */
+            orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-access", true, myglobals.hnp);
+            exit(1);
+        }
+        if (NULL == fgets(input, 1024, fp)) {
+            /* something malformed about file */
+            fclose(fp);
+            orte_show_help("help-orte-top.txt", "orte-top:hnp-file-bad", true, myglobals.hnp);
+            exit(1);
+        }
+        fclose(fp);
+        input[strlen(input)-1] = '\0';  /* remove newline */
+        /* construct the target hnp info */
+        asprintf(&hnpenv, "OMPI_MCA_orte_hnp_uri=%s", input);
+    } else {
+        /* should just be the uri itself - construct the target hnp info */
+        asprintf(&hnpenv, "OMPI_MCA_orte_hnp_uri=%s", myglobals.hnp);
+    }
+    putenv(hnpenv);  // must not free
+    
+    /* Setup MCA params */
+    orte_register_params();
+
+    /* flag that I am a TOOL */
+    orte_process_info.proc_type = ORTE_PROC_TOOL;
+    
+   /* Intialize our Open RTE environment
+     * Set the flag telling orte_init that I am NOT a
+     * singleton, but am "infrastructure" - prevents setting
+     * up incorrect infrastructure that only a singleton would
+     * require
+     */
+    if (ORTE_SUCCESS != (rc = orte_init(&argc, &argv, ORTE_PROC_TOOL))) {
+        /* cannot call ORTE_ERROR_LOG as it could be the errmgr
+         * never got loaded!
+         */
+        return rc;
+    }
+    /* finalize OPAL. As it was opened again from orte_init->opal_init
+     * we continue to have a reference count on it. So we have to finalize it twice...
+     */
+    opal_finalize();
+
+    /* set the info in our contact table */
+    orte_rml.set_contact_info(orte_process_info.my_hnp_uri);
+    /* extract the name */
+    if (ORTE_SUCCESS != orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, ORTE_PROC_MY_HNP, NULL)) {
+        orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
+        exit(1);
+    }
+    /* set the route to be direct */
+    if (ORTE_SUCCESS != orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) {
+        orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
+        orte_finalize();
+        exit(1);
+    }
+    
+     /* set the target hnp as our lifeline so we will terminate if it exits */
+    orte_routed.set_lifeline(ORTE_PROC_MY_HNP);
+
+    /* setup to listen for HNP response to my commands */
+    orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_TOOL,
+                            ORTE_RML_PERSISTENT, local_recv, NULL);
+
+    /* set a timeout event in case the HNP doesn't answer */
+    
+    /* if this is the terminate command, just send it */
+    if (myglobals.terminate) {
+        opal_buffer_t *buf;
+        orte_daemon_cmd_flag_t cmd = ORTE_DAEMON_HALT_VM_CMD;
+        buf = OBJ_NEW(opal_buffer_t);
+        opal_dss.pack(buf, &cmd, 1, ORTE_DAEMON_CMD_T);
+        orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
+                                ORTE_RML_TAG_DAEMON,
+                                orte_rml_send_callback, NULL);
+        goto waiting;
+    }
+    
+    /* default our personality to OMPI */
+    if (NULL == myglobals.personality) {
+        myglobals.personality = strdup("ompi");
+    }
+
+    /* create a new job object to hold the info for this one - the
+     * jobid field will be filled in by the PLM when the job is
+     * launched
+     */
+    jdata = OBJ_NEW(orte_job_t);
+    if (NULL == jdata) {
+        /* cannot call ORTE_ERROR_LOG as the errmgr
+         * hasn't been loaded yet!
+         */
+        return ORTE_ERR_OUT_OF_RESOURCE;
+    }
+    jdata->personality = strdup(myglobals.personality);
+    
+    /* check what user wants us to do with stdin */
+    if (NULL != myglobals.stdin_target) {
+        if (0 == strcmp(myglobals.stdin_target, "all")) {
+            jdata->stdin_target = ORTE_VPID_WILDCARD;
+        } else if (0 == strcmp(myglobals.stdin_target, "none")) {
+            jdata->stdin_target = ORTE_VPID_INVALID;
+        } else {
+            jdata->stdin_target = strtoul(myglobals.stdin_target, NULL, 10);
+        }
+    }
+    
+    /* if we want the argv's indexed, indicate that */
+    if (myglobals.index_argv) {
+        orte_set_attribute(&jdata->attributes, ORTE_JOB_INDEX_ARGV, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
+    }
+
+    /* Parse each app, adding it to the job object */
+    parse_locals(jdata, argc, argv);
+    
+    if (0 == jdata->num_apps) {
+        /* This should never happen -- this case should be caught in
+           create_app(), but let's just double check... */
+        orte_show_help("help-orterun.txt", "orterun:nothing-to-do",
+                       true, myglobals.basename);
+        exit(ORTE_ERROR_DEFAULT_EXIT_CODE);
+    }
+
+    /* check for a job timeout specification, to be provided in seconds
+     * as that is what MPICH used
+     */
+    if (NULL != (param = getenv("MPIEXEC_TIMEOUT"))) {
+        if (NULL == (orte_mpiexec_timeout = OBJ_NEW(orte_timer_t))) {
+            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
+            ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_OUT_OF_RESOURCE);
+            goto DONE;
+        }
+        orte_mpiexec_timeout->tv.tv_sec = strtol(param, NULL, 10);
+        orte_mpiexec_timeout->tv.tv_usec = 0;
+        opal_event_evtimer_set(orte_event_base, orte_mpiexec_timeout->ev,
+                               orte_timeout_wakeup, jdata);
+        opal_event_set_priority(orte_mpiexec_timeout->ev, ORTE_ERROR_PRI);
+        opal_event_evtimer_add(orte_mpiexec_timeout->ev, &orte_mpiexec_timeout->tv);
+    }
+
+    /* if recovery was disabled on the cmd line, do so */
+    if (myglobals.enable_recovery) {
+        ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_RECOVERABLE);
+    }
+
+    /* ask the HNP to spawn the job for us */
+    rc = orte_plm.spawn(jdata);
+
+ waiting:
+    /* loop the event lib until an exit event is detected */
+    while (orte_event_base_active) {
+        opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE);
+    }
+
+ DONE:
+    /* cleanup and leave */
+    orte_finalize();
+
+    if (orte_debug_flag) {
+        fprintf(stderr, "exiting with status %d\n", orte_exit_status);
+    }
+    exit(orte_exit_status);
+}
+
+static int init_globals(void)
+{
+    /* Reset the other fields every time */
+    myglobals.help = false;
+    myglobals.version = false;
+    myglobals.num_procs =  0;
+    if (NULL != myglobals.appfile) {
+        free(myglobals.appfile);
+    }
+    myglobals.appfile = NULL;
+    if (NULL != myglobals.wdir) {
+        free(myglobals.wdir);
+    }
+    myglobals.set_cwd_to_session_dir = false;
+    myglobals.wdir = NULL;
+    if (NULL != myglobals.path) {
+        free(myglobals.path);
+    }
+    myglobals.path = NULL;
+
+    myglobals.preload_binaries = false;
+    myglobals.preload_files  = NULL;
+
+    /* All done */
+    return ORTE_SUCCESS;
+}
+
+
+static int parse_globals(int argc, char* argv[], opal_cmd_line_t *cmd_line)
+{
+    /* print version if requested.  Do this before check for help so
+       that --version --help works as one might expect. */
+    if (myglobals.version) {
+        char *str, *project_name = NULL;
+        if (0 == strcmp(myglobals.basename, "ompi-submit")) {
+            project_name = "Open MPI";
+        } else {
+            project_name = "OpenRTE";
+        }
+        str = opal_show_help_string("help-orterun.txt", "orterun:version", 
+                                    false,
+                                    myglobals.basename, project_name, OPAL_VERSION,
+                                    PACKAGE_BUGREPORT);
+        if (NULL != str) {
+            printf("%s", str);
+            free(str);
+        }
+        exit(0);
+    }
+
+    /* Check for help request */
+    if (myglobals.help) {
+        char *str, *args = NULL;
+        char *project_name = NULL;
+        if (0 == strcmp(myglobals.basename, "ompi-submit")) {
+            project_name = "Open MPI";
+        } else {
+            project_name = "OpenRTE";
+        }
+        args = opal_cmd_line_get_usage_msg(cmd_line);
+        str = opal_show_help_string("help-orterun.txt", "orterun:usage", false,
+                                    myglobals.basename, project_name, OPAL_VERSION,
+                                    myglobals.basename, args,
+                                    PACKAGE_BUGREPORT);
+        if (NULL != str) {
+            printf("%s", str);
+            free(str);
+        }
+        free(args);
+
+        /* If someone asks for help, that should be all we do */
+        exit(0);
+    }
+
+    /* check for request to report pid */
+    if (NULL != myglobals.report_pid) {
+        FILE *fp;
+        if (0 == strcmp(myglobals.report_pid, "-")) {
+            /* if '-', then output to stdout */
+            printf("%d\n", (int)getpid());
+        } else if (0 == strcmp(myglobals.report_pid, "+")) {
+            /* if '+', output to stderr */
+            fprintf(stderr, "%d\n", (int)getpid());
+        } else {
+            fp = fopen(myglobals.report_pid, "w");
+            if (NULL == fp) {
+                orte_show_help("help-orterun.txt", "orterun:write_file", false,
+                               myglobals.basename, "pid", myglobals.report_pid);
+                exit(0);
+            }
+            fprintf(fp, "%d\n", (int)getpid());
+            fclose(fp);
+        }
+    }
+    
+    return ORTE_SUCCESS;
+}
+
+
+static int parse_locals(orte_job_t *jdata, int argc, char* argv[])
+{
+    int i, rc, app_num;
+    int temp_argc;
+    char **temp_argv, **env;
+    orte_app_context_t *app;
+    bool made_app;
+    orte_std_cntr_t j, size1;
+
+    /* Make the apps */
+    temp_argc = 0;
+    temp_argv = NULL;
+    opal_argv_append(&temp_argc, &temp_argv, argv[0]);
+
+    /* NOTE: This bogus env variable is necessary in the calls to
+       create_app(), below.  See comment immediately before the
+       create_app() function for an explanation. */
+
+    env = NULL;
+    for (app_num = 0, i = 1; i < argc; ++i) {
+        if (0 == strcmp(argv[i], ":")) {
+            /* Make an app with this argv */
+            if (opal_argv_count(temp_argv) > 1) {
+                if (NULL != env) {
+                    opal_argv_free(env);
+                    env = NULL;
+                }
+                app = NULL;
+                rc = create_app(temp_argc, temp_argv, jdata, &app, &made_app, &env);
+                /** keep track of the number of apps - point this app_context to that index */
+                if (ORTE_SUCCESS != rc) {
+                    /* Assume that the error message has already been
+                       printed; no need to cleanup -- we can just
+                       exit */
+                    exit(1);
+                }
+                if (made_app) {
+                    app->idx = app_num;
+                    ++app_num;
+                    opal_pointer_array_add(jdata->apps, app);
+                    ++jdata->num_apps;
+                }
+
+                /* Reset the temps */
+
+                temp_argc = 0;
+                temp_argv = NULL;
+                opal_argv_append(&temp_argc, &temp_argv, argv[0]);
+            }
+        } else {
+            opal_argv_append(&temp_argc, &temp_argv, argv[i]);
+        }
+    }
+
+    if (opal_argv_count(temp_argv) > 1) {
+        app = NULL;
+        rc = create_app(temp_argc, temp_argv, jdata, &app, &made_app, &env);
+        if (ORTE_SUCCESS != rc) {
+            /* Assume that the error message has already been printed;
+               no need to cleanup -- we can just exit */
+            exit(1);
+        }
+        if (made_app) {
+            app->idx = app_num;
+            ++app_num;
+            opal_pointer_array_add(jdata->apps, app);
+            ++jdata->num_apps;
+        }
+    }
+    if (NULL != env) {
+        opal_argv_free(env);
+    }
+    opal_argv_free(temp_argv);
+
+   /* Once we've created all the apps, add the global MCA params to
+       each app's environment (checking for duplicates, of
+       course -- yay opal_environ_merge()).  */
+
+    if (NULL != global_mca_env) {
+        size1 = (size_t)opal_pointer_array_get_size(jdata->apps);
+        /* Iterate through all the apps */
+        for (j = 0; j < size1; ++j) {
+            app = (orte_app_context_t *)
+                opal_pointer_array_get_item(jdata->apps, j);
+            if (NULL != app) {
+                /* Use handy utility function */
+                env = opal_environ_merge(global_mca_env, app->env);
+                opal_argv_free(app->env);
+                app->env = env;
+            }
+        }
+    }
+
+    /* Now take a subset of the MCA params and set them as MCA
+       overrides here in orterun (so that when we orte_init() later,
+       all the components see these MCA params).  Here's how we decide
+       which subset of the MCA params we set here in orterun:
+
+       1. If any global MCA params were set, use those
+       2. If no global MCA params were set and there was only one app,
+          then use its app MCA params
+       3. Otherwise, don't set any
+    */
+
+    env = NULL;
+    if (NULL != global_mca_env) {
+        env = global_mca_env;
+    } else {
+        if (opal_pointer_array_get_size(jdata->apps) >= 1) {
+            /* Remember that pointer_array's can be padded with NULL
+               entries; so only use the app's env if there is exactly
+               1 non-NULL entry */
+            app = (orte_app_context_t *)
+                opal_pointer_array_get_item(jdata->apps, 0);
+            if (NULL != app) {
+                env = app->env;
+                for (j = 1; j < opal_pointer_array_get_size(jdata->apps); ++j) {
+                    if (NULL != opal_pointer_array_get_item(jdata->apps, j)) {
+                        env = NULL;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    if (NULL != env) {
+        size1 = opal_argv_count(env);
+        for (j = 0; j < size1; ++j) {
+            /* Use-after-Free error possible here.  putenv does not copy
+             * the string passed to it, and instead stores only the pointer.
+             * env[j] may be freed later, in which case the pointer
+             * in environ will now be left dangling into a deallocated
+             * region.
+             * So we make a copy of the variable.
+             */
+            char *s = strdup(env[j]);
+            
+            if (NULL == s) {
+                return OPAL_ERR_OUT_OF_RESOURCE;
+            }
+            putenv(s);
+        }
+    }
+
+    /* All done */
+
+    return ORTE_SUCCESS;
+}
+
+
+/*
+ * This function takes a "char ***app_env" parameter to handle the
+ * specific case:
+ *
+ *   orterun --mca foo bar -app appfile
+ *
+ * That is, we'll need to keep foo=bar, but the presence of the app
+ * file will cause an invocation of parse_appfile(), which will cause
+ * one or more recursive calls back to create_app().  Since the
+ * foo=bar value applies globally to all apps in the appfile, we need
+ * to pass in the "base" environment (that contains the foo=bar value)
+ * when we parse each line in the appfile.
+ *
+ * This is really just a special case -- when we have a simple case like:
+ *
+ *   orterun --mca foo bar -np 4 hostname
+ *
+ * Then the upper-level function (parse_locals()) calls create_app()
+ * with a NULL value for app_env, meaning that there is no "base"
+ * environment that the app needs to be created from.
+ */
+static int create_app(int argc, char* argv[],
+                      orte_job_t *jdata,
+                      orte_app_context_t **app_ptr,
+                      bool *made_app, char ***app_env)
+{
+    opal_cmd_line_t cmd_line;
+    char cwd[OPAL_PATH_MAX];
+    int i, j, count, rc;
+    char *param, *value;
+    orte_app_context_t *app = NULL;
+    bool cmd_line_made = false;
+    bool found = false;
+    char *appname;
+
+    *made_app = false;
+
+    /* Pre-process the command line if we are going to parse an appfile later.
+     * save any mca command line args so they can be passed
+     * separately to the daemons.
+     * Use Case:
+     *  $ cat launch.appfile
+     *  -np 1 -mca aaa bbb ./my-app -mca ccc ddd
+     *  -np 1 -mca aaa bbb ./my-app -mca eee fff
+     *  $ mpirun -np 2 -mca foo bar --app launch.appfile
+     * Only pick up '-mca foo bar' on this pass.
+     */
+    if (NULL != myglobals.appfile) {
+        if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(myglobals.personality, argc, 0, argv))) {
+            goto cleanup;
+        }
+    }
+    
+    /* Parse application command line options. */
+
+    init_globals();
+    opal_cmd_line_create(&cmd_line, cmd_line_init);
+    mca_base_cmd_line_setup(&cmd_line);
+    cmd_line_made = true;
+    rc = opal_cmd_line_parse(&cmd_line, true, argc, argv);
+    if (ORTE_SUCCESS != rc) {
+        goto cleanup;
+    }
+    mca_base_cmd_line_process_args(&cmd_line, app_env, &global_mca_env);
+
+    /* Is there an appfile in here? */
+
+    if (NULL != myglobals.appfile) {
+        OBJ_DESTRUCT(&cmd_line);
+        return parse_appfile(jdata, strdup(myglobals.appfile), app_env);
+    }
+
+    /* Setup application context */
+
+    app = OBJ_NEW(orte_app_context_t);
+    opal_cmd_line_get_tail(&cmd_line, &count, &app->argv);
+
+    /* See if we have anything left */
+
+    if (0 == count) {
+        orte_show_help("help-orterun.txt", "orterun:executable-not-specified",
+                       true, myglobals.basename, myglobals.basename);
+        rc = ORTE_ERR_NOT_FOUND;
+        goto cleanup;
+    }
+
+    /*
+     * Get mca parameters so we can pass them to the daemons.
+     * Use the count determined above to make sure we do not go past
+     * the executable name. Example:
+     *   mpirun -np 2 -mca foo bar ./my-app -mca bip bop
+     * We want to pick up '-mca foo bar' but not '-mca bip bop'
+     */
+    if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(myglobals.personality,
+                                                    argc, count, argv))) {
+        goto cleanup;
+    }
+    
+    /* Grab all OMPI_* environment variables */
+
+    app->env = opal_argv_copy(*app_env);
+    if (ORTE_SUCCESS != (rc = orte_schizo.parse_env(myglobals.personality,
+                                                    myglobals.path,
+                                                    &cmd_line, NULL,
+                                                    environ, &app->env))) {
+        goto cleanup;
+    }
+    
+
+    /* Did the user request a specific wdir? */
+
+    if (NULL != myglobals.wdir) {
+        /* if this is a relative path, convert it to an absolute path */
+        if (opal_path_is_absolute(myglobals.wdir)) {
+            app->cwd = strdup(myglobals.wdir);
+        } else {
+            /* get the cwd */
+            if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) {
+                orte_show_help("help-orterun.txt", "orterun:init-failure",
+                               true, "get the cwd", rc);
+                goto cleanup;
+            }
+            /* construct the absolute path */
+            app->cwd = opal_os_path(false, cwd, myglobals.wdir, NULL);
+        }
+        orte_set_attribute(&app->attributes, ORTE_APP_USER_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
+    } else if (myglobals.set_cwd_to_session_dir) {
+        orte_set_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
+        orte_set_attribute(&app->attributes, ORTE_APP_USER_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
+    } else {
+        if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) {
+            orte_show_help("help-orterun.txt", "orterun:init-failure",
+                           true, "get the cwd", rc);
+            goto cleanup;
+        }
+        app->cwd = strdup(cwd);
+    }
+
+    /* if this is the first app_context, check for prefix directions.
+     * We only do this for the first app_context because the launchers
+     * only look at the first one when setting the prefix - we do NOT
+     * support per-app_context prefix settings!
+     */
+    if (0 == total_num_apps) {
+        /* Check to see if the user explicitly wanted to disable automatic
+           --prefix behavior */
+        
+        if (opal_cmd_line_is_taken(&cmd_line, "noprefix")) {
+            want_prefix_by_default = false;
+        }
+
+        /* Did the user specify a prefix, or want prefix by default? */
+        if (opal_cmd_line_is_taken(&cmd_line, "prefix") || want_prefix_by_default) {
+            size_t param_len;
+            /* if both the prefix was given and we have a prefix
+             * given above, check to see if they match
+             */
+            if (opal_cmd_line_is_taken(&cmd_line, "prefix") &&
+                NULL != myglobals.prefix) {
+                /* if they don't match, then that merits a warning */
+                param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0));
+                /* ensure we strip any trailing '/' */
+                if (0 == strcmp(OPAL_PATH_SEP, &(param[strlen(param)-1]))) {
+                    param[strlen(param)-1] = '\0';
+                }
+                value = strdup(myglobals.prefix);
+                if (0 == strcmp(OPAL_PATH_SEP, &(value[strlen(value)-1]))) {
+                    value[strlen(value)-1] = '\0';
+                }
+                if (0 != strcmp(param, value)) {
+                    orte_show_help("help-orterun.txt", "orterun:app-prefix-conflict",
+                                   true, myglobals.basename, value, param);
+                    /* let the global-level prefix take precedence since we
+                     * know that one is being used
+                     */
+                    free(param);
+                    param = strdup(myglobals.prefix);
+                }
+                free(value);
+            } else if (NULL != myglobals.prefix) {
+                param = myglobals.prefix;
+            } else if (opal_cmd_line_is_taken(&cmd_line, "prefix")){
+                /* must be --prefix alone */
+                param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0));
+            } else {
+                /* --enable-orterun-prefix-default was given to orterun */
+                param = strdup(opal_install_dirs.prefix);
+            }
+
+            if (NULL != param) {
+                /* "Parse" the param, aka remove superfluous path_sep. */
+                param_len = strlen(param);
+                while (0 == strcmp (OPAL_PATH_SEP, &(param[param_len-1]))) {
+                    param[param_len-1] = '\0';
+                    param_len--;
+                    if (0 == param_len) {
+                        orte_show_help("help-orterun.txt", "orterun:empty-prefix",
+                                       true, myglobals.basename, myglobals.basename);
+                        return ORTE_ERR_FATAL;
+                    }
+                }
+                orte_set_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, ORTE_ATTR_GLOBAL, param, OPAL_STRING);
+                free(param);
+            }
+        }
+    }
+
+    /* Did the user specify a hostfile. Need to check for both 
+     * hostfile and machine file. 
+     * We can only deal with one hostfile per app context, otherwise give an error.
+     */
+    if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "hostfile"))) {
+        if(1 < j) {
+            orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles",
+                           true, myglobals.basename, NULL);
+            return ORTE_ERR_FATAL;
+        } else {
+            value = opal_cmd_line_get_param(&cmd_line, "hostfile", 0, 0);
+            orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_LOCAL, value, OPAL_STRING);
+        }
+    }
+    if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "machinefile"))) {
+        if(1 < j || orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, NULL, OPAL_STRING)) {
+            orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles",
+                           true, myglobals.basename, NULL);
+            return ORTE_ERR_FATAL;
+        } else {
+            value = opal_cmd_line_get_param(&cmd_line, "machinefile", 0, 0);
+            orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_LOCAL, value, OPAL_STRING);
+        }
+    }
+ 
+    /* Did the user specify any hosts? */
+    if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "host"))) {
+        char **targ=NULL, *tval;
+        for (i = 0; i < j; ++i) {
+            value = opal_cmd_line_get_param(&cmd_line, "host", i, 0);
+            opal_argv_append_nosize(&targ, value);
+        }
+        tval = opal_argv_join(targ, ',');
+        orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL, tval, OPAL_STRING);
+        opal_argv_free(targ);
+        free(tval);
+    }
+
+    /* check for bozo error */
+    if (0 > myglobals.num_procs) {
+        orte_show_help("help-orterun.txt", "orterun:negative-nprocs",
+                       true, myglobals.basename, app->argv[0],
+                       myglobals.num_procs, NULL);
+        return ORTE_ERR_FATAL;
+    }
+
+    app->num_procs = (orte_std_cntr_t)myglobals.num_procs;
+    total_num_apps++;
+
+    /* Capture any preload flags */
+    if (myglobals.preload_binaries) {
+        orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_BIN, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL);
+    }
+    /* if we were told to cwd to the session dir and the app was given in
+     * relative syntax, then we need to preload the binary to
+     * find the app - don't do this for java apps, however, as we
+     * can't easily find the class on the cmd line. Java apps have to
+     * preload their binary via the preload_files option
+     */
+    if (!opal_path_is_absolute(app->argv[0]) &&
+        NULL == strstr(app->argv[0], "java")) {
+        if (myglobals.preload_binaries) {
+            orte_set_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
+        } else if (orte_get_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, NULL, OPAL_BOOL)) {
+            orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_BIN, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL);
+        }
+    }
+    if (NULL != myglobals.preload_files) {
+        orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_FILES, ORTE_ATTR_LOCAL,
+                           myglobals.preload_files, OPAL_STRING);
+    }
+
+    /* Do not try to find argv[0] here -- the starter is responsible
+       for that because it may not be relevant to try to find it on
+       the node where orterun is executing.  So just strdup() argv[0]
+       into app. */
+
+    app->app = strdup(app->argv[0]);
+    if (NULL == app->app) {
+        orte_show_help("help-orterun.txt", "orterun:call-failed",
+                       true, myglobals.basename, "library", "strdup returned NULL", errno);
+        rc = ORTE_ERR_NOT_FOUND;
+        goto cleanup;
+    }
+
+    /* if this is a Java application, we have a bit more work to do. Such
+     * applications actually need to be run under the Java virtual machine
+     * and the "java" command will start the "executable". So we need to ensure
+     * that all the proper java-specific paths are provided
+     */
+    appname = opal_basename(app->app);
+    if (0 == strcmp(appname, "java")) {
+        /* see if we were given a library path */
+        found = false;
+        for (i=1; NULL != app->argv[i]; i++) {
+            if (NULL != strstr(app->argv[i], "java.library.path")) {
+                /* yep - but does it include the path to the mpi libs? */
+                found = true;
+                if (NULL == strstr(app->argv[i], opal_install_dirs.libdir)) {
+                    /* doesn't appear to - add it to be safe */
+                    if (':' == app->argv[i][strlen(app->argv[i]-1)]) {
+                        asprintf(&value, "-Djava.library.path=%s%s", app->argv[i], opal_install_dirs.libdir);
+                    } else {
+                        asprintf(&value, "-Djava.library.path=%s:%s", app->argv[i], opal_install_dirs.libdir);
+                    }
+                    free(app->argv[i]);
+                    app->argv[i] = value;
+                }
+                break;
+            }
+        }
+        if (!found) {
+            /* need to add it right after the java command */
+            asprintf(&value, "-Djava.library.path=%s", opal_install_dirs.libdir);
+            opal_argv_insert_element(&app->argv, 1, value);
+            free(value);
+        }
+        
+        /* see if we were given a class path */
+        found = false;
+        for (i=1; NULL != app->argv[i]; i++) {
+            if (NULL != strstr(app->argv[i], "cp") ||
+                NULL != strstr(app->argv[i], "classpath")) {
+                /* yep - but does it include the path to the mpi libs? */
+                found = true;
+                /* check if mpi.jar exists - if so, add it */
+                value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL);
+                if (access(value, F_OK ) != -1) {
+                    set_classpath_jar_file(app, i+1, "mpi.jar");
+                }
+                free(value);
+                /* check for oshmem support */
+                value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL);
+                if (access(value, F_OK ) != -1) {
+                    set_classpath_jar_file(app, i+1, "shmem.jar");
+                }
+                free(value);
+                /* always add the local directory */
+                asprintf(&value, "%s:%s", app->cwd, app->argv[i+1]);
+                free(app->argv[i+1]);
+                app->argv[i+1] = value;
+                break;
+            }
+        }
+        if (!found) {
+            /* check to see if CLASSPATH is in the environment */
+            found = false;  // just to be pedantic
+            for (i=0; NULL != environ[i]; i++) {
+                if (0 == strncmp(environ[i], "CLASSPATH", strlen("CLASSPATH"))) {
+                    value = strchr(environ[i], '=');
+                    ++value; /* step over the = */
+                    opal_argv_insert_element(&app->argv, 1, value);
+                    /* check for mpi.jar */
+                    value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL);
+                    if (access(value, F_OK ) != -1) {
+                        set_classpath_jar_file(app, 1, "mpi.jar");
+                    }
+                    free(value);
+                    /* check for shmem.jar */
+                    value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL);
+                    if (access(value, F_OK ) != -1) {
+                        set_classpath_jar_file(app, 1, "shmem.jar");
+                    }
+                    free(value);
+                    /* always add the local directory */
+                    (void)asprintf(&value, "%s:%s", app->cwd, app->argv[1]);
+                    free(app->argv[1]);
+                    app->argv[1] = value;
+                    opal_argv_insert_element(&app->argv, 1, "-cp");
+                    found = true;
+                    break;
+                }
+            }
+            if (!found) {
+                /* need to add it right after the java command - have
+                 * to include the working directory and trust that
+                 * the user set cwd if necessary
+                 */
+                char *str, *str2;
+                /* always start with the working directory */
+                str = strdup(app->cwd);
+                /* check for mpi.jar */
+                value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL);
+                if (access(value, F_OK ) != -1) {
+                    (void)asprintf(&str2, "%s:%s", str, value);
+                    free(str);
+                    str = str2;
+                }
+                free(value);
+                /* check for shmem.jar */
+                value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL);
+                if (access(value, F_OK ) != -1) {
+                    asprintf(&str2, "%s:%s", str, value);
+                    free(str);
+                    str = str2;
+                }
+                free(value);
+                opal_argv_insert_element(&app->argv, 1, str);
+                free(str);
+                opal_argv_insert_element(&app->argv, 1, "-cp");
+            }
+        }
+        /* try to find the actual command - may not be perfect */
+        for (i=1; i < opal_argv_count(app->argv); i++) {
+            if (NULL != strstr(app->argv[i], "java.library.path")) {
+                continue;
+            } else if (NULL != strstr(app->argv[i], "cp") ||
+                       NULL != strstr(app->argv[i], "classpath")) {
+                /* skip the next field */
+                i++;
+                continue;
+            }
+            /* declare this the winner */
+            opal_setenv("OMPI_COMMAND", app->argv[i], true, &app->env);
+            /* collect everything else as the cmd line */
+            if ((i+1) < opal_argv_count(app->argv)) {
+                value = opal_argv_join(&app->argv[i+1], ' ');
+                opal_setenv("OMPI_ARGV", value, true, &app->env);
+                free(value);
+            }
+            break;
+        }
+    } else {
+        /* add the cmd to the environment for MPI_Info to pickup */
+        opal_setenv("OMPI_COMMAND", appname, true, &app->env);
+        if (1 < opal_argv_count(app->argv)) {
+            value = opal_argv_join(&app->argv[1], ' ');
+            opal_setenv("OMPI_ARGV", value, true, &app->env);
+            free(value);
+        }
+    }
+    free(appname);
+    
+    *app_ptr = app;
+    app = NULL;
+    *made_app = true;
+
+    /* All done */
+
+ cleanup:
+    if (NULL != app) {
+        OBJ_RELEASE(app);
+    }
+    if (cmd_line_made) {
+        OBJ_DESTRUCT(&cmd_line);
+    }
+    return rc;
+}
+
+static void set_classpath_jar_file(orte_app_context_t *app, int index, char *jarfile)
+{
+    if (NULL == strstr(app->argv[index], jarfile)) {
+        /* nope - need to add it */
+        char *fmt = ':' == app->argv[index][strlen(app->argv[index]-1)]
+                    ? "%s%s/%s" : "%s:%s/%s";
+        char *str;
+        asprintf(&str, fmt, app->argv[index], opal_install_dirs.libdir, jarfile);
+        free(app->argv[index]);
+        app->argv[index] = str;
+    }
+}
+
+static int parse_appfile(orte_job_t *jdata, char *filename, char ***env)
+{
+    size_t i, len;
+    FILE *fp;
+    char line[BUFSIZ];
+    int rc, argc, app_num;
+    char **argv;
+    orte_app_context_t *app;
+    bool blank, made_app;
+    char bogus[] = "bogus ";
+    char **tmp_env;
+
+    /*
+     * Make sure to clear out this variable so we don't do anything odd in
+     * app_create()
+     */
+    if (NULL != myglobals.appfile) {
+        free(myglobals.appfile);
+        myglobals.appfile = NULL;
+    }
+
+    /* Try to open the file */
+
+    fp = fopen(filename, "r");
+    if (NULL == fp) {
+        orte_show_help("help-orterun.txt", "orterun:appfile-not-found", true,
+                       filename);
+        return ORTE_ERR_NOT_FOUND;
+    }
+
+    /* Read in line by line */
+
+    line[sizeof(line) - 1] = '\0';
+    app_num = 0;
+    do {
+
+        /* We need a bogus argv[0] (because when argv comes in from
+           the command line, argv[0] is "orterun", so the parsing
+           logic ignores it).  So create one here rather than making
+           an argv and then pre-pending a new argv[0] (which would be
+           rather inefficient). */
+
+        line[0] = '\0';
+        strcat(line, bogus);
+
+        if (NULL == fgets(line + sizeof(bogus) - 1,
+                          sizeof(line) - sizeof(bogus) - 1, fp)) {
+            break;
+        }
+
+        /* Remove a trailing newline */
+
+        len = strlen(line);
+        if (len > 0 && '\n' == line[len - 1]) {
+            line[len - 1] = '\0';
+            if (len > 0) {
+                --len;
+            }
+        }
+
+        /* Remove comments */
+
+        for (i = 0; i < len; ++i) {
+            if ('#' == line[i]) {
+                line[i] = '\0';
+                break;
+            } else if (i + 1 < len && '/' == line[i] && '/' == line[i + 1]) {
+                line[i] = '\0';
+                break;
+            }
+        }
+
+        /* Is this a blank line? */
+
+        len = strlen(line);
+        for (blank = true, i = sizeof(bogus); i < len; ++i) {
+            if (!isspace(line[i])) {
+                blank = false;
+                break;
+            }
+        }
+        if (blank) {
+            continue;
+        }
+
+        /* We got a line with *something* on it.  So process it */
+
+        argv = opal_argv_split(line, ' ');
+        argc = opal_argv_count(argv);
+        if (argc > 0) {
+
+            /* Create a temporary env to use in the recursive call --
+               that is: don't disturb the original env so that we can
+               have a consistent global env.  This allows for the
+               case:
+
+                   orterun --mca foo bar --appfile file
+
+               where the "file" contains multiple apps.  In this case,
+               each app in "file" will get *only* foo=bar as the base
+               environment from which its specific environment is
+               constructed. */
+
+            if (NULL != *env) {
+                tmp_env = opal_argv_copy(*env);
+                if (NULL == tmp_env) {
+                    return ORTE_ERR_OUT_OF_RESOURCE;
+                }
+            } else {
+                tmp_env = NULL;
+            }
+
+            rc = create_app(argc, argv, jdata, &app, &made_app, &tmp_env);
+            if (ORTE_SUCCESS != rc) {
+                /* Assume that the error message has already been
+                   printed; no need to cleanup -- we can just exit */
+                exit(1);
+            }
+            if (NULL != tmp_env) {
+                opal_argv_free(tmp_env);
+            }
+            if (made_app) {
+                app->idx = app_num;
+                ++app_num;
+                opal_pointer_array_add(jdata->apps, app);
+                ++jdata->num_apps;
+            }
+        }
+    } while (!feof(fp));
+    fclose(fp);
+
+    /* All done */
+
+    free(filename);
+    return ORTE_SUCCESS;
+}
+
+void orte_timeout_wakeup(int sd, short args, void *cbdata)
+{
+    char *tm;
+
+    /* this function gets called when the job execution time
+     * has hit a prescribed limit - so just abort
+     */
+    tm = getenv("MPIEXEC_TIMEOUT");
+    orte_show_help("help-orterun.txt", "orterun:timeout",
+                   true, (NULL == tm) ? "NULL" : tm);
+    ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
+    orte_event_base_active = false;
+}
+
+static void local_recv(int status, orte_process_name_t* sender,
+                       opal_buffer_t *buffer,
+                       orte_rml_tag_t tag, void *cbdata)
+{
+    int rc, ret;
+    int32_t cnt;
+    
+    /* unpack the completion status of the job */
+    cnt = 1;
+    if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ret, &cnt, OPAL_INT))) {
+        ORTE_UPDATE_EXIT_STATUS(rc);
+    }
+    /* update our exit status to match */
+    ORTE_UPDATE_EXIT_STATUS(ret);
+
+    /* eject us from the event loop - we are done */
+}
+
diff --git a/orte/tools/orterun/Makefile.am b/orte/tools/orterun/Makefile.am
index d95e27dd2b..d2d49e6ca7 100644
--- a/orte/tools/orterun/Makefile.am
+++ b/orte/tools/orterun/Makefile.am
@@ -11,6 +11,7 @@
 #                         All rights reserved.
 # Copyright (c) 2008-2014 Cisco Systems, Inc.  All rights reserved.
 # Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
+# Copyright (c) 2015      Intel, Inc.  All rights reserved.
 # $COPYRIGHT$
 # 
 # Additional copyrights may follow
diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c
index 1a802526fe..26bdff0be2 100644
--- a/orte/tools/orterun/orterun.c
+++ b/orte/tools/orterun/orterun.c
@@ -549,6 +549,10 @@ static opal_cmd_line_init_t cmd_line_init[] = {
       &orterun_globals.personality, OPAL_CMD_LINE_TYPE_STRING,
       "Programming model/language being used (default=\"ompi\")" },
 
+    { NULL, '\0', "dvm", "dvm", 0,
+      &orterun_globals.dvm, OPAL_CMD_LINE_TYPE_BOOL,
+      "Programming model/language being used (default=\"ompi\")" },
+
     /* End of list */
     { NULL, '\0', NULL, NULL, 0,
       NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
@@ -1131,6 +1135,7 @@ static int init_globals(void)
         orterun_globals.index_argv = false;
         orterun_globals.run_as_root = false;
         orterun_globals.personality = NULL;
+        orterun_globals.dvm = false;
     }
 
     /* Reset the other fields every time */
diff --git a/orte/tools/orterun/orterun.h b/orte/tools/orterun/orterun.h
index 5f1f0fbab7..2ad00ccca4 100644
--- a/orte/tools/orterun/orterun.h
+++ b/orte/tools/orterun/orterun.h
@@ -65,6 +65,7 @@ struct orterun_globals_t {
     bool index_argv;
     bool run_as_root;
     char *personality;
+    bool dvm;
 };
 
 /**