1
1

Multiple sigchld reports can occur within a single event callback, so have to reap them until none remain. Also, need to ensure the daemon is flagged as alive prior to calling wait_cb

Refs trac:4717

This commit was SVN r32020.

The following Trac tickets were found above:
  Ticket 4717 --> https://svn.open-mpi.org/trac/ompi/ticket/4717
Этот коммит содержится в:
Ralph Castain 2014-06-17 18:46:40 +00:00
родитель 42bf7466fc
Коммит 5216bd5558
2 изменённых файлов: 27 добавлений и 27 удалений

Просмотреть файл

@ -921,6 +921,7 @@ static void process_launch_list(int fd, short args, void *cbdata)
}
caddy = (orte_plm_rsh_caddy_t*)item;
/* register the sigchild callback */
ORTE_FLAG_SET(caddy->daemon, ORTE_PROC_FLAG_ALIVE);
orte_wait_cb(caddy->daemon, rsh_wait_daemon, (void*)caddy);
/* fork a child to exec the rsh/ssh session */

Просмотреть файл

@ -248,12 +248,14 @@ static void wait_signal_callback(int fd, short event, void *arg)
return;
}
/* retrieve the pid */
retry:
/* we can have multiple children leave but only get one
* sigchild callback, so reap all the waitpids until we
* don't get anything valid back */
while (1) {
pid = waitpid(-1, &status, WNOHANG);
if (-1 == pid && EINTR == errno) {
/* try it again */
goto retry;
continue;
}
/* if we got garbage, then nothing we can do */
if (pid <= 0) {
@ -270,11 +272,8 @@ static void wait_signal_callback(int fd, short event, void *arg)
}
opal_list_remove_item(&pending_cbs, &t2->super);
OBJ_RELEASE(t2);
return;
break;
}
}
}
/* if we get here, then this sigchild occurred prior to someone
* registering it, or after someone mistakenly removed it. Either
* way, there really isn't anything we can do with it */
}