Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src - Make this needed sequence always work for condvars, by not...



details:   https://anonhg.NetBSD.org/src/rev/afef9607eacd
branches:  trunk
changeset: 930622:afef9607eacd
user:      ad <ad%NetBSD.org@localhost>
date:      Fri Apr 10 17:16:21 2020 +0000

description:
- Make this needed sequence always work for condvars, by not touching the CV
  again after wakeup.  Previously it could panic because cv_signal() could
  be called by cv_wait_sig() + others:

        cv_broadcast(cv);
        cv_destroy(cv);

- In support of the above, if an LWP doing a timed wait is awoken by
  cv_broadcast() or cv_signal(), don't return an error if the timer
  fires after the fact, i.e. either succeed or fail, not both.

- Remove LOCKDEBUG code for CVs which never worked properly and is of
  questionable use.

diffstat:

 share/man/man9/condvar.9  |   27 +++++---
 sys/kern/kern_condvar.c   |  136 ++++++++++-----------------------------------
 sys/kern/kern_sleepq.c    |   15 +++-
 sys/kern/subr_lockdebug.c |  103 ++++++----------------------------
 sys/sys/lockdebug.h       |    7 +--
 sys/sys/lwp.h             |    4 +-
 6 files changed, 79 insertions(+), 213 deletions(-)

diffs (truncated from 616 to 300 lines):

diff -r fe36df09a9b8 -r afef9607eacd share/man/man9/condvar.9
--- a/share/man/man9/condvar.9  Fri Apr 10 17:02:33 2020 +0000
+++ b/share/man/man9/condvar.9  Fri Apr 10 17:16:21 2020 +0000
@@ -1,6 +1,6 @@
-.\"    $NetBSD: condvar.9,v 1.21 2019/12/12 02:34:55 pgoyette Exp $
+.\"    $NetBSD: condvar.9,v 1.22 2020/04/10 17:16:21 ad Exp $
 .\"
-.\" Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc.
+.\" Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
 .\" All rights reserved.
 .\"
 .\" This code is derived from software contributed to The NetBSD Foundation
@@ -27,7 +27,7 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd December 12, 2019
+.Dd April 9, 2020
 .Dt CONDVAR 9
 .Os
 .Sh NAME
@@ -116,7 +116,9 @@
 .It Fn cv_destroy "cv"
 .Pp
 Release resources used by a CV.
-The CV must not be in use when it is destroyed, and must not be used afterwards.
+If there could be waiters, they should be awoken first with
+.Fn cv_broadcast .
+The CV must not be be used afterwards.
 .It Fn cv_wait "cv" "mtx"
 .Pp
 Cause the current LWP to wait non-interruptably for access to a resource,
@@ -145,10 +147,11 @@
 Non-interruptable waits have the potential to deadlock the system, and so must
 be kept short (typically, under one second).
 .Pp
-Upon being awakened, the calling LWP should verify the availability
-of the resource (or other condition).
-It should not blindly assume that the resource is now available.
-If the resource is still not available, the calling LWP may call
+.Fn cv_wait
+is typically used within a loop or restartable code sequence, because it
+may awaken spuriously.
+The calling LWP should re-check the condition that caused the wait.
+If necessary, the calling LWP may call
 .Fn cv_wait
 again to continue waiting.
 .It Fn cv_wait_sig "cv" "mtx"
@@ -234,8 +237,12 @@
 .Fa bt Li "+" Fa epsilon .
 .It Fn cv_signal "cv"
 .Pp
-Awaken one LWP (potentially among many) that is waiting on the specified
-condition variable.
+Awaken one LWP waiting on the specified condition variable.
+Where there are waiters sleeping non-interruptaby, more than one
+LWP may be awoken.
+This can be used to avoid a "thundering herd" problem, where a large
+number of LWPs are awoken following an event, but only one LWP can
+process the event.
 The mutex passed to the wait function
 .Po Fa mtx Pc
 must also be held when calling
diff -r fe36df09a9b8 -r afef9607eacd sys/kern/kern_condvar.c
--- a/sys/kern/kern_condvar.c   Fri Apr 10 17:02:33 2020 +0000
+++ b/sys/kern/kern_condvar.c   Fri Apr 10 17:16:21 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: kern_condvar.c,v 1.44 2020/03/26 19:46:42 ad Exp $     */
+/*     $NetBSD: kern_condvar.c,v 1.45 2020/04/10 17:16:21 ad Exp $     */
 
 /*-
  * Copyright (c) 2006, 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_condvar.c,v 1.44 2020/03/26 19:46:42 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_condvar.c,v 1.45 2020/04/10 17:16:21 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -77,41 +77,7 @@
        .sobj_owner     = syncobj_noowner,
 };
 
-lockops_t cv_lockops = {
-       .lo_name = "Condition variable",
-       .lo_type = LOCKOPS_CV,
-       .lo_dump = NULL,
-};
-
 static const char deadcv[] = "deadcv";
-#ifdef LOCKDEBUG
-static const char nodebug[] = "nodebug";
-
-#define CV_LOCKDEBUG_HANDOFF(l, cv) cv_lockdebug_handoff(l, cv)
-#define CV_LOCKDEBUG_PROCESS(l, cv) cv_lockdebug_process(l, cv)
-
-static inline void
-cv_lockdebug_handoff(lwp_t *l, kcondvar_t *cv)
-{
-
-       if (CV_DEBUG_P(cv))
-               l->l_flag |= LW_CVLOCKDEBUG;
-}
-
-static inline void
-cv_lockdebug_process(lwp_t *l, kcondvar_t *cv)
-{
-
-       if ((l->l_flag & LW_CVLOCKDEBUG) == 0)
-               return;
-
-       l->l_flag &= ~LW_CVLOCKDEBUG;
-       LOCKDEBUG_UNLOCKED(true, cv, CV_RA, 0);
-}
-#else
-#define CV_LOCKDEBUG_HANDOFF(l, cv) __nothing
-#define CV_LOCKDEBUG_PROCESS(l, cv) __nothing
-#endif
 
 /*
  * cv_init:
@@ -121,16 +87,7 @@
 void
 cv_init(kcondvar_t *cv, const char *wmesg)
 {
-#ifdef LOCKDEBUG
-       bool dodebug;
 
-       dodebug = LOCKDEBUG_ALLOC(cv, &cv_lockops,
-           (uintptr_t)__builtin_return_address(0));
-       if (!dodebug) {
-               /* XXX This will break vfs_lockf. */
-               wmesg = nodebug;
-       }
-#endif
        KASSERT(wmesg != NULL);
        CV_SET_WMESG(cv, wmesg);
        sleepq_init(CV_SLEEPQ(cv));
@@ -145,9 +102,9 @@
 cv_destroy(kcondvar_t *cv)
 {
 
-       LOCKDEBUG_FREE(CV_DEBUG_P(cv), cv);
 #ifdef DIAGNOSTIC
        KASSERT(cv_is_valid(cv));
+       KASSERT(!cv_has_waiters(cv));
        CV_SET_WMESG(cv, deadcv);
 #endif
 }
@@ -168,8 +125,6 @@
        KASSERT(!cpu_intr_p());
        KASSERT((l->l_pflag & LP_INTR) == 0 || panicstr != NULL);
 
-       LOCKDEBUG_LOCKED(CV_DEBUG_P(cv), cv, mtx, CV_RA, 0);
-
        l->l_kpriority = true;
        mp = sleepq_hashlock(cv);
        sq = CV_SLEEPQ(cv);
@@ -180,30 +135,6 @@
 }
 
 /*
- * cv_exit:
- *
- *     After resuming execution, check to see if we have been restarted
- *     as a result of cv_signal().  If we have, but cannot take the
- *     wakeup (because of eg a pending Unix signal or timeout) then try
- *     to ensure that another LWP sees it.  This is necessary because
- *     there may be multiple waiters, and at least one should take the
- *     wakeup if possible.
- */
-static inline int
-cv_exit(kcondvar_t *cv, kmutex_t *mtx, lwp_t *l, const int error)
-{
-
-       mutex_enter(mtx);
-       if (__predict_false(error != 0))
-               cv_signal(cv);
-
-       LOCKDEBUG_UNLOCKED(CV_DEBUG_P(cv), cv, CV_RA, 0);
-       KASSERT(cv_is_valid(cv));
-
-       return error;
-}
-
-/*
  * cv_unsleep:
  *
  *     Remove an LWP from the condition variable and sleep queue.  This
@@ -239,14 +170,6 @@
        KASSERT(mutex_owned(mtx));
 
        cv_enter(cv, mtx, l);
-
-       /*
-        * We can't use cv_exit() here since the cv might be destroyed before
-        * this thread gets a chance to run.  Instead, hand off the lockdebug
-        * responsibility to the thread that wakes us up.
-        */
-
-       CV_LOCKDEBUG_HANDOFF(l, cv);
        (void)sleepq_block(0, false);
        mutex_enter(mtx);
 }
@@ -269,7 +192,8 @@
 
        cv_enter(cv, mtx, l);
        error = sleepq_block(0, true);
-       return cv_exit(cv, mtx, l, error);
+       mutex_enter(mtx);
+       return error;
 }
 
 /*
@@ -291,7 +215,8 @@
 
        cv_enter(cv, mtx, l);
        error = sleepq_block(timo, false);
-       return cv_exit(cv, mtx, l, error);
+       mutex_enter(mtx);
+       return error;
 }
 
 /*
@@ -315,7 +240,8 @@
 
        cv_enter(cv, mtx, l);
        error = sleepq_block(timo, true);
-       return cv_exit(cv, mtx, l, error);
+       mutex_enter(mtx);
+       return error;
 }
 
 /*
@@ -482,7 +408,6 @@
 cv_signal(kcondvar_t *cv)
 {
 
-       /* LOCKDEBUG_WAKEUP(CV_DEBUG_P(cv), cv, CV_RA); */
        KASSERT(cv_is_valid(cv));
 
        if (__predict_false(!LIST_EMPTY(CV_SLEEPQ(cv))))
@@ -503,23 +428,29 @@
        kmutex_t *mp;
        lwp_t *l;
 
-       KASSERT(cv_is_valid(cv));
-
+       /*
+        * Keep waking LWPs until a non-interruptable waiter is found.  An
+        * interruptable waiter could fail to do something useful with the
+        * wakeup due to an error return from cv_[timed]wait_sig(), and the
+        * caller of cv_signal() may not expect such a scenario.
+        *
+        * This isn't a problem for non-interruptable waits (untimed and
+        * timed), because if such a waiter is woken here it will not return
+        * an error.
+        */
        mp = sleepq_hashlock(cv);
        sq = CV_SLEEPQ(cv);
-       l = LIST_FIRST(sq);
-       if (__predict_false(l == NULL)) {
-               mutex_spin_exit(mp);
-               return;
+       while ((l = LIST_FIRST(sq)) != NULL) {
+               KASSERT(l->l_sleepq == sq);
+               KASSERT(l->l_mutex == mp);
+               KASSERT(l->l_wchan == cv);
+               if ((l->l_flag & LW_SINTR) == 0) {
+                       sleepq_remove(sq, l);
+                       break;
+               } else
+                       sleepq_remove(sq, l);
        }
-       KASSERT(l->l_sleepq == sq);
-       KASSERT(l->l_mutex == mp);
-       KASSERT(l->l_wchan == cv);
-       CV_LOCKDEBUG_PROCESS(l, cv);
-       sleepq_remove(sq, l);
        mutex_spin_exit(mp);
-
-       KASSERT(cv_is_valid(cv));
 }
 
 /*
@@ -532,7 +463,6 @@
 cv_broadcast(kcondvar_t *cv)
 {
 
-       /* LOCKDEBUG_WAKEUP(CV_DEBUG_P(cv), cv, CV_RA); */
        KASSERT(cv_is_valid(cv));
 
        if (__predict_false(!LIST_EMPTY(CV_SLEEPQ(cv))))  
@@ -551,23 +481,17 @@
 {
        sleepq_t *sq;
        kmutex_t *mp;
-       lwp_t *l, *next;



Home | Main Index | Thread Index | Old Index