/* $OpenBSD: kqueue.c,v 1.5 2002/07/10 14:41:31 art Exp $ */ /* * Copyright 2000-2002 Niels Provos * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "opal_config.h" #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_SYS_TIME_H #include #else #include #endif #include #include #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #ifdef HAVE_INTTYPES_H #include #endif #if defined(HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) #define INTPTR(x) (intptr_t)x #else #define INTPTR(x) x #endif #include "event.h" #include "log.h" #include "opal/threads/mutex.h" extern opal_mutex_t opal_event_lock; #define EVLIST_X_KQINKERNEL 0x1000 #define NEVENT 64 struct kqop { struct kevent *changes; int nchanges; struct kevent *events; int nevents; int kq; }; static void *kq_init (void); static int kq_add (void *, struct opal_event *); static int kq_del (void *, struct opal_event *); static int kq_recalc (struct event_base *, void *, int); static int kq_dispatch (struct event_base *, void *, struct timeval *); static int kq_insert (struct kqop *, struct kevent *); const struct opal_eventop opal_kqops = { "kqueue", kq_init, kq_add, kq_del, kq_recalc, kq_dispatch }; static void * kq_init(void) { int kq; struct kqop *kqueueop; /* Disable kqueue when this environment variable is set */ if (getenv("EVENT_NOKQUEUE")) return (NULL); if (!(kqueueop = calloc(1, sizeof(struct kqop)))) return (NULL); /* Initalize the kernel queue */ if ((kq = kqueue()) == -1) { event_warn("kqueue"); free (kqueueop); return (NULL); } kqueueop->kq = kq; /* Initalize fields */ kqueueop->changes = malloc(NEVENT * sizeof(struct kevent)); if (kqueueop->changes == NULL) { free (kqueueop); return (NULL); } kqueueop->events = malloc(NEVENT * sizeof(struct kevent)); if (kqueueop->events == NULL) { free (kqueueop->changes); free (kqueueop); return (NULL); } kqueueop->nevents = NEVENT; /* Check for Mac OS X kqueue bug. */ kqueueop->changes[0].ident = -1; kqueueop->changes[0].filter = EVFILT_READ; kqueueop->changes[0].flags = EV_ADD; /* * If kqueue works, then kevent will succeed, and it will * stick an error in events[0]. If kqueue is broken, then * kevent will fail. */ if (kevent(kq, kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 || kqueueop->events[0].ident != (unsigned int) -1 || kqueueop->events[0].flags != EV_ERROR) { event_warn("%s: detected broken kqueue; not using.", __func__); free(kqueueop->changes); free(kqueueop->events); free(kqueueop); close(kq); return (NULL); } return (kqueueop); } static int kq_recalc(struct event_base *base, void *arg, int max) { return (0); } static int kq_insert(struct kqop *kqop, struct kevent *kev) { int nevents = kqop->nevents; if (kqop->nchanges == nevents) { struct kevent *newchange; struct kevent *newresult; nevents *= 2; newchange = realloc(kqop->changes, nevents * sizeof(struct kevent)); if (newchange == NULL) { event_warn("%s: malloc", __func__); return (-1); } kqop->changes = newchange; newresult = realloc(kqop->events, nevents * sizeof(struct kevent)); /* * If we fail, we don't have to worry about freeing, * the next realloc will pick it up. */ if (newresult == NULL) { event_warn("%s: malloc", __func__); return (-1); } kqop->events = newresult; kqop->nevents = nevents; } memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent)); event_debug(("%s: fd %d %s%s", __func__, kev->ident, kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE", kev->flags == EV_DELETE ? " (del)" : "")); return (0); } static void kq_sighandler(int sig) { /* Do nothing here */ } static int kq_dispatch(struct event_base *base, void *arg, struct timeval *tv) { struct kqop *kqop = arg; struct kevent *changes = kqop->changes; struct kevent *events = kqop->events; struct opal_event *ev; struct timespec ts; int i, res; TIMEVAL_TO_TIMESPEC(tv, &ts); /* we should release the lock if we're going to enter the kernel in a multi-threaded application. However, if we're single threaded, there's really no advantage to releasing the lock and it just takes up time we could spend doing something else. */ OPAL_THREAD_UNLOCK(&opal_event_lock); res = kevent(kqop->kq, changes, kqop->nchanges, events, kqop->nevents, &ts); OPAL_THREAD_LOCK(&opal_event_lock); kqop->nchanges = 0; if (res == -1) { if (errno != EINTR) { event_warn("kevent"); return (-1); } return (0); } event_debug(("%s: kevent reports %d", __func__, res)); for (i = 0; i < res; i++) { int which = 0; if (events[i].flags & EV_ERROR) { /* * Error messages that can happen, when a delete fails. * EBADF happens when the file discriptor has been * closed, * ENOENT when the file discriptor was closed and * then reopened. * EINVAL for some reasons not understood; EINVAL * should not be returned ever; but FreeBSD does :-\ * An error is also indicated when a callback deletes * an event we are still processing. In that case * the data field is set to ENOENT. */ if (events[i].data == EBADF || events[i].data == EINVAL || events[i].data == ENOENT) continue; errno = events[i].data; return (-1); } ev = (struct opal_event *)events[i].udata; if (events[i].filter == EVFILT_READ) { which |= OPAL_EV_READ; } else if (events[i].filter == EVFILT_WRITE) { which |= OPAL_EV_WRITE; } else if (events[i].filter == EVFILT_SIGNAL) { which |= OPAL_EV_SIGNAL; } if (!which) continue; if (!(ev->ev_events & OPAL_EV_PERSIST)) opal_event_del_i(ev); opal_event_active_i(ev, which, ev->ev_events & OPAL_EV_SIGNAL ? events[i].data : 1); } return (0); } static int kq_add(void *arg, struct opal_event *ev) { struct kqop *kqop = arg; struct kevent kev; if (ev->ev_events & OPAL_EV_SIGNAL) { int nsignal = OPAL_EVENT_SIGNAL(ev); memset(&kev, 0, sizeof(kev)); kev.ident = nsignal; kev.filter = EVFILT_SIGNAL; kev.flags = EV_ADD; if (!(ev->ev_events & OPAL_EV_PERSIST)) kev.flags |= EV_ONESHOT; kev.udata = (void *) INTPTR(ev); if (kq_insert(kqop, &kev) == -1) return (-1); if (signal(nsignal, kq_sighandler) == SIG_ERR) return (-1); ev->ev_flags |= EVLIST_X_KQINKERNEL; return (0); } if (ev->ev_events & OPAL_EV_READ) { memset(&kev, 0, sizeof(kev)); kev.ident = ev->ev_fd; kev.filter = EVFILT_READ; #ifdef NOTE_EOF /* Make it behave like select() and poll() */ kev.fflags = NOTE_EOF; #endif kev.flags = EV_ADD; if (!(ev->ev_events & OPAL_EV_PERSIST)) kev.flags |= EV_ONESHOT; kev.udata = (void *) INTPTR(ev); if (kq_insert(kqop, &kev) == -1) return (-1); ev->ev_flags |= EVLIST_X_KQINKERNEL; } if (ev->ev_events & OPAL_EV_WRITE) { memset(&kev, 0, sizeof(kev)); kev.ident = ev->ev_fd; kev.filter = EVFILT_WRITE; kev.flags = EV_ADD; if (!(ev->ev_events & OPAL_EV_PERSIST)) kev.flags |= EV_ONESHOT; kev.udata = (void *) INTPTR(ev); if (kq_insert(kqop, &kev) == -1) return (-1); ev->ev_flags |= EVLIST_X_KQINKERNEL; } return (0); } static int kq_del(void *arg, struct opal_event *ev) { struct kqop *kqop = arg; struct kevent kev; if (!(ev->ev_flags & EVLIST_X_KQINKERNEL)) return (0); if (ev->ev_events & OPAL_EV_SIGNAL) { int nsignal = OPAL_EVENT_SIGNAL(ev); memset(&kev, 0, sizeof(kev)); kev.ident = (int)signal; kev.filter = EVFILT_SIGNAL; kev.flags = EV_DELETE; if (kq_insert(kqop, &kev) == -1) return (-1); if (signal(nsignal, SIG_DFL) == SIG_ERR) return (-1); ev->ev_flags &= ~EVLIST_X_KQINKERNEL; return (0); } if (ev->ev_events & OPAL_EV_READ) { memset(&kev, 0, sizeof(kev)); kev.ident = ev->ev_fd; kev.filter = EVFILT_READ; kev.flags = EV_DELETE; if (kq_insert(kqop, &kev) == -1) return (-1); ev->ev_flags &= ~EVLIST_X_KQINKERNEL; } if (ev->ev_events & OPAL_EV_WRITE) { memset(&kev, 0, sizeof(kev)); kev.ident = ev->ev_fd; kev.filter = EVFILT_WRITE; kev.flags = EV_DELETE; if (kq_insert(kqop, &kev) == -1) return (-1); ev->ev_flags &= ~EVLIST_X_KQINKERNEL; } return (0); }