1
1
openmpi/opal/mca/event/libevent207/libevent207.h
Josh Hursey 66af515061 Fix C/R functionality with the new libtool. This fixes the case where the restarted process cannot be checkpointed or finalized.
Short Version:
--------------
Event engine needs to be flushed so it does not use old/stale file descriptors.

Long Version:
-------------
The problem was that the restarted process was waiting for the socket to the local daemon to finish establishing during the 'sync' operation. The core problem was that the daemon was sending a header of 36 bytes, but the restarted process only received 35 bytes of the message. So the restarted process became stuck waiting for the last byte to arrive.

After many hours of digging, I figured out that the event engine was using the same file descriptor for its evsig_cb functionality (to signal itself when a signal arrives). So when the daemon wrote in to the new fd the event engine was stealing the first byte (*shakes fist at event engine*) before the recv() could be posted.

The solution is to use the event_reinit() function on restart to re-establish the now-stale file descriptors in the event engine. This seems to have fixed the problem.


A few other minor things:
-------------------------
 * Add a check to make sure the event engine is balanced in its init/finalize
 * Add the opal_event_base_close() to the BLCR restart exec function (still not 100% sure it is needed, but there it is).

This commit was SVN r24296.
2011-01-25 22:43:47 +00:00

174 строки
5.1 KiB
C

/*
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* When this component is used, this file is included in the rest of
* the OPAL/ORTE/OMPI code base via opal/mca/event/event.h. As such,
* this header represents the public interface to this static component.
*/
#ifndef MCA_OPAL_EVENT_LIBEVENT207_H
#define MCA_OPAL_EVENT_LIBEVENT207_H
#include "opal_config.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#undef WIN32_LEAN_AND_MEAN
#endif
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#ifndef WIN32
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#endif
#include <errno.h>
#include <signal.h>
#include <string.h>
#include <assert.h>
#include <time.h>
#include "opal/class/opal_object.h"
#include "opal/threads/mutex.h"
#include "opal/threads/threads.h"
#include "opal/util/output.h"
#include "opal/constants.h"
#include "opal/util/argv.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/event/base/base.h"
#include "libevent/event.h"
#include "libevent/include/event2/thread.h"
#include "opal/mca/event/event.h"
typedef struct event opal_event_t;
/*** Overload the event_base_t struct ***/
/* This may (hopefully) be a temporary change
* to deal with cross-base sync. Specifically,
* when an event in one base needs to release
* a condition_wait in another base, we need
* to "wakeup" the event base in the second base
* so the condition_wait can be checked
*
* On a more permanent level, use this to update
* the event base when it is being progressed in
* a separate thread.
*/
typedef struct {
struct event_base *base;
opal_event_t update_event;
int update_pipe[2];
} opal_event_base_t;
typedef struct {
opal_event_t *ev;
uint8_t op;
} opal_event_update_t;
#define OPAL_EVENT_NOOP 0x00
#define OPAL_EVENT_ADD 0x01
#define OPAL_EVENT_DEL 0x02
#define OPAL_UPDATE_EVBASE(b, evt, ad) \
do { \
opal_event_update_t up; \
up.ev = (evt); \
up.op = (ad); \
opal_fd_write((b)->update_pipe[1], sizeof(opal_event_update_t), &up); \
} while(0);
BEGIN_C_DECLS
/* Temporary global - will be replaced by layer-specific event bases */
OPAL_DECLSPEC extern opal_event_base_t *opal_event_base;
#define OPAL_EV_TIMEOUT EV_TIMEOUT
#define OPAL_EV_READ EV_READ
#define OPAL_EV_WRITE EV_WRITE
#define OPAL_EV_SIGNAL EV_SIGNAL
/* Persistent event: won't get removed automatically when activated. */
#define OPAL_EV_PERSIST EV_PERSIST
#define OPAL_EVLOOP_ONCE EVLOOP_ONCE /**< Block at most once. */
#define OPAL_EVLOOP_NONBLOCK EVLOOP_NONBLOCK /**< Do not block. */
/* Global function to create and release an event base */
OPAL_DECLSPEC opal_event_base_t* opal_event_base_create(void);
OPAL_DECLSPEC void opal_event_base_finalize(opal_event_base_t *base);
OPAL_DECLSPEC int opal_event_init(void);
OPAL_DECLSPEC int opal_event_reinit(opal_event_base_t *base);
/* thread support APIs */
#if OPAL_EVENT_HAVE_THREAD_SUPPORT
#ifdef WIN32
#define opal_event_use_threads(x) evthread_use_windows_threads(x)
#else
#define opal_event_use_threads(x) evthread_use_pthreads(x)
#endif
#else
#define opal_event_use_threads(x)
#endif
/* Basic event APIs */
#define opal_event_set_debug_output(x) event_set_debug_output((x))
#define opal_event_set(b, ev, fd, fg, cb, arg) event_assign((ev), (b)->base, (fd), (fg), (event_callback_fn) (cb), (arg))
#define opal_event_add(ev, tv) event_add((ev), (tv))
#define opal_event_del(ev) event_del((ev))
#define opal_event_active(x, y, z) event_active((x), (y), (z))
/* Timer APIs */
#define opal_event_evtimer_new(b, cb, arg) event_new((b)->base, -1, 0, (event_callback_fn) (cb), (arg))
#define opal_event_evtimer_add(ev, tv) event_add((ev), (tv))
#define opal_event_evtimer_set(b, ev, cb, arg) event_assign((ev), (b)->base, -1, 0, (event_callback_fn) (cb), (arg))
#define opal_event_evtimer_del(ev) event_del((ev))
#define opal_event_evtimer_pending(ev, tv) event_pending((ev), EV_TIMEOUT, (tv))
#define opal_event_evtimer_initialized(ev) event_initialized((ev))
/* Signal APIs */
#define opal_event_signal_add(ev, tv) event_add((ev), (tv))
#define opal_event_signal_set(b, ev, fd, cb, arg) event_assign((ev), (b)->base, (fd), EV_SIGNAL|EV_PERSIST, (event_callback_fn) (cb), (arg))
#define opal_event_signal_del(ev) event_del((ev))
#define opal_event_signal_pending(ev, tv) event_pending((ev), EV_SIGNAL, (tv))
#define opal_event_signal_initalized(ev) event_initialized((ev))
#define opal_event_get_signal(ev) event_get_signal((ev))
#define opal_event_loop(b, fg) event_base_loop((b->base), (fg))
#define opal_event_dispatch(b) event_base_loop((b)->base, 0)
END_C_DECLS
#endif /* MCA_OPAL_EVENT_LIBEVENT207_H */