diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c
index 049739e66d..3cef652ed6 100644
--- a/orte/mca/odls/base/odls_base_default_fns.c
+++ b/orte/mca/odls/base/odls_base_default_fns.c
@@ -2851,7 +2851,13 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
             kill_local(child->pid, SIGTERM);
 
             /* check to see if it died - the child_died function will continue
-             * to check every microsecond until we reach the timeout
+             * to check until we reach the timeout
+	     *
+	     * In practice, it doesn't matter what child_died reports
+	     * - we KILL the process anyway, to be sure it's dead.
+	     * However, what it does do is delay the KILL until either
+	     * the process is verified dead or the timeout elapsed,
+	     * which gives it time enough to shut down.
              */
             if (!child_died(child)) {
                 /* if it still isn't dead, try killing it one more time */
diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c
index 0398a7d487..7f5c7674fc 100644
--- a/orte/mca/odls/default/odls_default_module.c
+++ b/orte/mca/odls/default/odls_default_module.c
@@ -190,10 +190,12 @@ static bool odls_default_child_died(orte_odls_child_t *child)
 {
     time_t end;
     pid_t ret;
-    struct timeval t;
-    fd_set bogus;
         
-    end = time(NULL) + orte_odls_globals.timeout_before_sigkill;
+    /* Because of rounding in time (which returns whole seconds) we
+     * have to add 1 to our wait number: this means that we wait
+     * somewhere between (target) and (target)+1 seconds.  Otherwise,
+     * the default 1s actually means 'somwhere between 0 and 1s'. */
+    end = time(NULL) + orte_odls_globals.timeout_before_sigkill + 1;
     do {
         ret = waitpid(child->pid, &child->exit_code, WNOHANG);
         if (child->pid == ret) {
@@ -208,13 +210,17 @@ static bool odls_default_child_died(orte_odls_child_t *child)
              * as there is no error - this is a race condition problem
              * that occasionally causes us to incorrectly report a proc
              * as refusing to die. Unfortunately, errno may not be reset
-             * by waitpid in this case, so we cannot check it - just assume
-             * the proc has indeed died
+             * by waitpid in this case, so we cannot check it.
+	     *
+	     * (note the previous fix to this, to return 'process dead'
+	     * here, fixes the race condition at the cost of reporting
+	     * all live processes have immediately died!  Better to
+	     * occasionally report a dead process as still living -
+	     * which will occasionally trip the timeout for cases that
+	     * are right on the edge.)
              */
-            OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
-                                 "%s odls:default:WAITPID INDICATES PROC %d HAS ALREADY EXITED",
-                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)(child->pid)));
-            return true;
+
+	    /* Do nothing, process still alive */
         } else if (-1 == ret && ECHILD == errno) {
             /* The pid no longer exists, so we'll call this "good
                enough for government work" */
@@ -224,14 +230,14 @@ static bool odls_default_child_died(orte_odls_child_t *child)
             return true;
         }
         
-        /* Bogus delay for 1 usec (sched_yeild() -- even if we have it
-           -- changed behavior in 2.6.3x Linux flavors to be
-           undesirable. */
-        t.tv_sec = 0;
-        t.tv_usec = 1;
-        FD_ZERO(&bogus);
-        FD_SET(0, &bogus);
-        select(1, &bogus, NULL, NULL, &t);
+        /* Bogus delay for 1 msec - let's actually give the CPU some time
+         * to quit the other process (sched_yield() -- even if we have it
+         * -- changed behavior in 2.6.3x Linux flavors to be undesirable)
+         * Don't use select on a bogus file descriptor here as it has proven
+         * unreliable and sometimes immediately returns - we really, really
+         * -do- want to wait a bit!
+         */
+        usleep(1000);
     } while (time(NULL) < end);
 
     /* The child didn't die, so return false */
diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c
index 9f23ef5b26..23879fd00c 100644
--- a/orte/orted/orted_comm.c
+++ b/orte/orted/orted_comm.c
@@ -739,7 +739,7 @@ int orte_daemon_process_commands(orte_process_name_t* sender,
 
         /****    EXIT COMMAND    ****/
     case ORTE_DAEMON_EXIT_CMD:
-        if (orte_debug_daemons_flag) {
+        if (1) {
             opal_output(0, "%s orted_cmd: received exit cmd",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
         }
@@ -757,10 +757,12 @@ int orte_daemon_process_commands(orte_process_name_t* sender,
             
         /****    HALT VM COMMAND    ****/
     case ORTE_DAEMON_HALT_VM_CMD:
-        if (orte_debug_daemons_flag) {
+        if (1) {
             opal_output(0, "%s orted_cmd: received halt vm",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
         }
+        /* kill the local procs */
+        orte_odls.kill_local_procs(NULL);
         /* trigger our appropriate exit procedure
          * NOTE: this event will fire -after- any zero-time events
          * so any pending relays -do- get sent first