1
1
openmpi/orte/runtime/orte_wait.h
Ralph Castain bd8b4f7f1e Sorry for mid-day commit, but I had promised on the call to do this upon my return.
Roll in the ORTE state machine. Remove last traces of opal_sos. Remove UTK epoch code.

Please see the various emails about the state machine change for details. I'll send something out later with more info on the new arch.

This commit was SVN r26242.
2012-04-06 14:23:13 +00:00

200 строки
7.6 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Institut National de Recherche en Informatique
* et Automatique. All rights reserved.
* Copyright (c) 2011 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Interface for waitpid / async notification of child death with the
* libevent runtime system.
*/
#ifndef ORTE_WAIT_H
#define ORTE_WAIT_H
#include "orte_config.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#include "opal/dss/dss.h"
#include "opal/util/output.h"
#include "opal/sys/atomic.h"
#include "opal/mca/event/event.h"
#include "orte/types.h"
#include "orte/mca/rml/rml_types.h"
#include "opal/runtime/opal_progress.h"
BEGIN_C_DECLS
/** typedef for callback function used in \c ompi_rte_wait_cb */
typedef void (*orte_wait_fn_t)(pid_t wpid, int status, void *data);
/**
* Disable / re-Enable SIGCHLD handler
*
* These functions have to be used after orte_wait_init was called.
*/
ORTE_DECLSPEC void orte_wait_enable(void);
ORTE_DECLSPEC void orte_wait_disable(void);
/**
* Wait for process terminiation
*
* Similar to \c waitpid, \c orte_waitpid utilizes the run-time
* event library for process terminiation notification. The \c
* WUNTRACED option is not supported, but the \c WNOHANG option is
* supported.
*
* \note A \c wpid value of \c -1 is not currently supported and will
* return an error.
*/
ORTE_DECLSPEC pid_t orte_waitpid(pid_t wpid, int *status, int options);
/**
* Register a callback for process termination
*
* Register a callback for notification when \c wpid causes a SIGCHLD.
* \c waitpid() will have already been called on the process at this
* time.
*
* If a thread is already blocked in \c ompi_rte_waitpid for \c wpid,
* this function will return \c ORTE_ERR_EXISTS. It is illegal for
* multiple callbacks to be registered for a single \c wpid
* (OMPI_EXISTS will be returned in this case).
*
* \warning It is not legal for \c wpid to be -1 when registering a
* callback.
*/
ORTE_DECLSPEC int orte_wait_cb(pid_t wpid, orte_wait_fn_t callback, void *data);
ORTE_DECLSPEC int orte_wait_cb_cancel(pid_t wpid);
ORTE_DECLSPEC int orte_wait_cb_disable(void);
ORTE_DECLSPEC int orte_wait_cb_enable(void);
/* define an object for timer events */
typedef struct {
opal_object_t super;
struct timeval tv;
opal_event_t *ev;
void *payload;
} orte_timer_t;
OBJ_CLASS_DECLARATION(orte_timer_t);
/**
* In a number of places within the code, we want to setup a timer
* to detect when some procedure failed to complete. For example,
* when we launch the daemons, we frequently have no way to directly
* detect that a daemon failed to launch. Setting a timer allows us
* to automatically fail out of the launch if we don't hear from a
* daemon in some specified time window.
*
* Computing the amount of time to wait takes a few lines of code, but
* this macro encapsulates those lines along with the timer event
* definition just as a convenience. It also centralizes the
* necessary checks to ensure that the microsecond field is always
* less than 1M since some systems care about that, and to ensure
* that the computed wait time doesn't exceed the desired max
* wait
*
* NOTE: the callback function is responsible for releasing the timer
* event back to the event pool!
*/
#define ORTE_DETECT_TIMEOUT(n, deltat, maxwait, cbfunc, cbd) \
do { \
orte_timer_t *tmp; \
int timeout; \
tmp = OBJ_NEW(orte_timer_t); \
tmp->payload = (cbd); \
opal_event_evtimer_set(orte_event_base, \
tmp->ev, (cbfunc), tmp); \
opal_event_set_priority(tmp->ev, ORTE_ERROR_PRI); \
timeout = (deltat) * (n); \
if ((maxwait) > 0 && timeout > (maxwait)) { \
timeout = (maxwait); \
} \
tmp->tv.tv_sec = timeout/1000000; \
tmp->tv.tv_usec = timeout%1000000; \
OPAL_OUTPUT_VERBOSE((1, orte_debug_output, \
"defining timeout: %ld sec %ld usec at %s:%d", \
(long)tmp->tv.tv_sec, (long)tmp->tv.tv_usec, \
__FILE__, __LINE__)); \
opal_event_evtimer_add(tmp->ev, &tmp->tv); \
}while(0); \
/**
* There are places in the code where we just want to periodically
* wakeup to do something, and then go back to sleep again. Setting
* a timer allows us to do this
*
* NOTE: the callback function is responsible for releasing the timer
* event back to the event pool when done! Otherwise, the finalize
* function will take care of it.
*/
#define ORTE_TIMER_EVENT(sec, usec, cbfunc, pri) \
do { \
orte_timer_t *tm; \
tm = OBJ_NEW(orte_timer_t); \
opal_event_evtimer_set(orte_event_base, \
tm->ev, (cbfunc), tm); \
opal_event_set_priority(tm->ev, (pri)); \
tm->tv.tv_sec = (sec) + (usec)/1000000; \
tm->tv.tv_usec = (usec) % 1000000; \
OPAL_OUTPUT_VERBOSE((1, orte_debug_output, \
"defining timer event: %ld sec %ld usec at %s:%d", \
(long)tm->tv.tv_sec, (long)tm->tv.tv_usec, \
__FILE__, __LINE__)); \
opal_event_evtimer_add(tm->ev, &tm->tv); \
}while(0); \
/**
* \internal
*
* Initialize the wait system (allocate mutexes, etc.)
*/
ORTE_DECLSPEC int orte_wait_init(void);
/**
* Kill all processes we are waiting on.
*/
ORTE_DECLSPEC int orte_wait_kill(int sig);
/**
* \internal
*
* Finalize the wait system (deallocate mutexes, etc.)
*/
ORTE_DECLSPEC int orte_wait_finalize(void);
END_C_DECLS
#endif /* #ifndef ORTE_WAIT_H */