1
1

Deal with the SIGCHLD issue in LSF.

lsb_launch tampers with SIGCHLD signal handler. We are forced to reinstall our own signal handler after a call to this function.

This commit fixes trac:1356.

This commit was SVN r19033.

The following Trac tickets were found above:
  Ticket 1356 --> https://svn.open-mpi.org/trac/ompi/ticket/1356
This commit is contained in:
Thomas Herault 2008-07-25 15:23:23 +00:00
parent 7e6e104fc3
commit 28dc80b67e
3 changed files with 40 additions and 3 deletions

View File

@ -2,7 +2,7 @@
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* Copyright (c) 2004-2008 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -12,6 +12,8 @@
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2008 Institut National de Recherche en Informatique
* et Automatique. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -289,6 +291,13 @@ static int plm_lsf_launch_job(orte_job_t *jdata)
}
}
/* lsb_launch tampers with SIGCHLD.
* After the call to lsb_launch, the signal handler for SIGCHLD is NULL.
* So, we disable the SIGCHLD handler of libevent for the duration of
* the call to lsb_launch
*/
orte_wait_disable();
/* exec the daemon(s). Do NOT wait for lsb_launch to complete as
* it only completes when the processes it starts - in this case,
* the orteds - complete. We need to go ahead and return so
@ -299,8 +308,10 @@ static int plm_lsf_launch_job(orte_job_t *jdata)
ORTE_ERROR_LOG(ORTE_ERR_FAILED_TO_START);
opal_output(0, "lsb_launch failed: %d", rc);
rc = ORTE_ERR_FAILED_TO_START;
orte_wait_enable(); /* re-enable our SIGCHLD handler */
goto cleanup;
}
orte_wait_enable(); /* re-enable our SIGCHLD handler */
/* wait for daemons to callback */
if (ORTE_SUCCESS !=

View File

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* Copyright (c) 2004-2008 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -11,6 +11,8 @@
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2008 Institut National de Recherche en Informatique
* et Automatique. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -209,6 +211,19 @@ static void internal_waitpid_callback(int fd, short event, void *arg);
* Interface Functions
*
********************************************************************/
void
orte_wait_disable(void)
{
opal_event_del(&handler);
}
void
orte_wait_enable(void)
{
opal_event_add(&handler, NULL);
}
int
orte_wait_init(void)
{

View File

@ -2,13 +2,15 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* Copyright (c) 2004-2008 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Institut National de Recherche en Informatique
* et Automatique. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -48,6 +50,15 @@ BEGIN_C_DECLS
/** typedef for callback function used in \c ompi_rte_wait_cb */
typedef void (*orte_wait_fn_t)(pid_t wpid, int status, void *data);
/**
* Disable / re-Enable SIGCHLD handler
*
* These functions have to be used after orte_wait_init was called.
*/
ORTE_DECLSPEC void orte_wait_enable(void);
ORTE_DECLSPEC void orte_wait_disable(void);
/**
* Wait for process terminiation
*