Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/thorpej-futex]: src/sys Native implementation of the Linux eventfd(2) API.



details:   https://anonhg.NetBSD.org/src/rev/dedcaef9823f
branches:  thorpej-futex
changeset: 947503:dedcaef9823f
user:      thorpej <thorpej%NetBSD.org@localhost>
date:      Mon Dec 14 16:00:51 2020 +0000

description:
Native implementation of the Linux eventfd(2) API.

diffstat:

 sys/kern/files.kern      |    3 +-
 sys/kern/sys_eventfd.c   |  583 +++++++++++++++++++++++++++++++++++++++++++++++
 sys/kern/syscalls.master |    4 +-
 sys/sys/Makefile         |    4 +-
 sys/sys/eventfd.h        |   57 ++++
 sys/sys/file.h           |    7 +-
 6 files changed, 651 insertions(+), 7 deletions(-)

diffs (truncated from 739 to 300 lines):

diff -r 43859e503dfa -r dedcaef9823f sys/kern/files.kern
--- a/sys/kern/files.kern       Mon Dec 14 14:37:44 2020 +0000
+++ b/sys/kern/files.kern       Mon Dec 14 16:00:51 2020 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: files.kern,v 1.53.2.1 2020/12/14 14:38:13 thorpej Exp $
+#      $NetBSD: files.kern,v 1.53.2.2 2020/12/14 16:00:51 thorpej Exp $
 
 #
 # kernel sources
@@ -157,6 +157,7 @@
 file   kern/subr_xcall.c               kern
 file   kern/sys_aio.c                  aio
 file   kern/sys_descrip.c              kern
+file   kern/sys_eventfd.c              kern
 file   kern/sys_futex.c                kern
 file   kern/sys_generic.c              kern
 file   kern/sys_getrandom.c            kern
diff -r 43859e503dfa -r dedcaef9823f sys/kern/sys_eventfd.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/kern/sys_eventfd.c    Mon Dec 14 16:00:51 2020 +0000
@@ -0,0 +1,583 @@
+/*     $NetBSD: sys_eventfd.c,v 1.1.2.1 2020/12/14 16:00:51 thorpej Exp $      */
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: sys_eventfd.c,v 1.1.2.1 2020/12/14 16:00:51 thorpej Exp $");
+
+/*
+ * eventfd
+ *
+ * Eventfd objects present a simple counting object associated with a
+ * file descriptor.  Writes and reads to this file descriptor increment
+ * and decrement the count, respectively.  When the count is non-zero,
+ * the descriptor is considered "readable", and when less than the max
+ * value (EVENTFD_MAXVAL), is considered "writable".
+ *
+ * This implementation is API compatible with the Linux eventfd(2)
+ * interface.
+ */
+
+#include <sys/types.h>
+#include <sys/condvar.h>
+#include <sys/eventfd.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/kauth.h>
+#include <sys/mutex.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/select.h>
+#include <sys/stat.h>
+#include <sys/syscallargs.h>
+#include <sys/uio.h>
+
+struct eventfd {
+       kmutex_t        efd_lock;
+       kcondvar_t      efd_read_wait;
+       kcondvar_t      efd_write_wait;
+       kcondvar_t      efd_restart_wait;
+       struct selinfo  efd_read_sel;
+       struct selinfo  efd_write_sel;
+       eventfd_t       efd_val;
+       int64_t         efd_nwaiters;
+       bool            efd_restarting;
+       bool            efd_has_read_waiters;
+       bool            efd_has_write_waiters;
+       bool            efd_is_semaphore;
+
+       /*
+        * Information kept for stat(2).
+        */
+       struct timespec efd_btime;      /* time created */
+       struct timespec efd_mtime;      /* last write */
+       struct timespec efd_atime;      /* last read */
+};
+
+#define        EVENTFD_MAXVAL  (UINT64_MAX - 1)
+
+/*
+ * eventfd_create:
+ *
+ *     Create an eventfd object.
+ */
+static struct eventfd *
+eventfd_create(unsigned int const val, int const flags)
+{
+       struct eventfd * const efd = kmem_zalloc(sizeof(*efd), KM_SLEEP);
+
+       mutex_init(&efd->efd_lock, MUTEX_DEFAULT, IPL_NONE);
+       cv_init(&efd->efd_read_wait, "efdread");
+       cv_init(&efd->efd_write_wait, "efdwrite");
+       cv_init(&efd->efd_restart_wait, "efdrstrt");
+       selinit(&efd->efd_read_sel);
+       selinit(&efd->efd_write_sel);
+       efd->efd_val = val;
+       efd->efd_is_semaphore = !!(flags & EFD_SEMAPHORE);
+       getnanotime(&efd->efd_btime);
+
+       /* Caller deals with EFD_CLOEXEC and EFD_NONBLOCK. */
+
+       return efd;
+}
+
+/*
+ * eventfd_destroy:
+ *
+ *     Destroy an eventfd object.
+ */
+static void
+eventfd_destroy(struct eventfd * const efd)
+{
+
+       KASSERT(efd->efd_nwaiters == 0);
+       KASSERT(efd->efd_restarting == false);
+       KASSERT(efd->efd_has_read_waiters == false);
+       KASSERT(efd->efd_has_write_waiters == false);
+
+       cv_destroy(&efd->efd_read_wait);
+       cv_destroy(&efd->efd_write_wait);
+       cv_destroy(&efd->efd_restart_wait);
+
+       seldestroy(&efd->efd_read_sel);
+       seldestroy(&efd->efd_write_sel);
+
+       mutex_destroy(&efd->efd_lock);
+}
+
+/*
+ * eventfd_wait:
+ *
+ *     Block on an eventfd.  Handles non-blocking, as well as
+ *     the restart cases.
+ */
+static int
+eventfd_wait(struct eventfd * const efd, int const fflag, bool const is_write)
+{
+       kcondvar_t *waitcv;
+       int error;
+
+       if (fflag & FNONBLOCK) {
+               return EAGAIN;
+       }
+
+       /*
+        * We're going to block.  If there is a restart in-progress,
+        * wait for that to complete first.
+        */
+       while (efd->efd_restarting) {
+               cv_wait(&efd->efd_restart_wait, &efd->efd_lock);
+       }
+
+       if (is_write) {
+               efd->efd_has_write_waiters = true;
+               waitcv = &efd->efd_write_wait;
+       } else {
+               efd->efd_has_read_waiters = true;
+               waitcv = &efd->efd_read_wait;
+       }
+
+       efd->efd_nwaiters++;
+       KASSERT(efd->efd_nwaiters > 0);
+       error = cv_wait_sig(waitcv, &efd->efd_lock);
+       efd->efd_nwaiters--;
+       KASSERT(efd->efd_nwaiters >= 0);
+
+       /*
+        * If a restart was triggered while we were asleep, we need
+        * to return ERESTART if no other error was returned.  If we
+        * are the last waiter coming out of the restart drain, clear
+        * the condition.
+        */
+       if (efd->efd_restarting) {
+               if (error == 0) {
+                       error = ERESTART;
+               }
+               if (efd->efd_nwaiters == 0) {
+                       efd->efd_restarting = false;
+                       cv_broadcast(&efd->efd_restart_wait);
+               }
+       }
+
+       return error;
+}
+
+/*
+ * eventfd_wake:
+ *
+ *     Wake LWPs block on an eventfd.
+ */
+static void
+eventfd_wake(struct eventfd * const efd, bool const is_write)
+{
+       kcondvar_t *waitcv = NULL;
+       struct selinfo *sel;
+       int pollev;
+
+       if (is_write) {
+               if (efd->efd_has_read_waiters) {
+                       waitcv = &efd->efd_read_wait;
+                       efd->efd_has_read_waiters = false;
+               }
+               sel = &efd->efd_read_sel;
+               pollev = POLLIN | POLLRDNORM;
+       } else {
+               if (efd->efd_has_write_waiters) {
+                       waitcv = &efd->efd_write_wait;
+                       efd->efd_has_write_waiters = false;
+               }
+               sel = &efd->efd_write_sel;
+               pollev = POLLOUT | POLLWRNORM;
+       }
+       if (waitcv != NULL) {
+               cv_broadcast(waitcv);
+       }
+       selnotify(sel, pollev, NOTE_SUBMIT);
+}
+
+/*
+ * eventfd file operations
+ */
+
+static int
+eventfd_fop_read(file_t * const fp, off_t * const offset,
+    struct uio * const uio, kauth_cred_t const cred, int const flags)
+{
+       struct eventfd * const efd = fp->f_eventfd;
+       int const fflag = fp->f_flag;
+       eventfd_t return_value;
+       int error;
+
+       if (uio->uio_resid < sizeof(eventfd_t)) {
+               return EINVAL;
+       }
+
+       mutex_enter(&efd->efd_lock);
+
+       while (efd->efd_val == 0) {
+               if ((error = eventfd_wait(efd, fflag, false)) != 0) {
+                       mutex_exit(&efd->efd_lock);
+                       return error;
+               }
+       }
+
+       if (efd->efd_is_semaphore) {
+               return_value = 1;
+               efd->efd_val--;
+       } else {
+               return_value = efd->efd_val;
+               efd->efd_val = 0;
+       }
+
+       getnanotime(&efd->efd_atime);
+       eventfd_wake(efd, false);
+
+       /* XXX Should we unlock before the uiomove()? */
+
+       error = uiomove(&return_value, sizeof(return_value), uio);
+
+       /* XXX Should we restore eventfd state if uiomove() fails? */
+
+       mutex_exit(&efd->efd_lock);
+
+       return error;
+}
+
+static int
+eventfd_fop_write(file_t * const fp, off_t * const offset,
+    struct uio * const uio, kauth_cred_t const cred, int const flags)
+{
+       struct eventfd * const efd = fp->f_eventfd;
+       int const fflag = fp->f_flag;
+       eventfd_t write_value;



Home | Main Index | Thread Index | Old Index