Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys Fix kern/45093 as discussed on tech-kern@:



details:   https://anonhg.NetBSD.org/src/rev/ad3ab00687f2
branches:  trunk
changeset: 766819:ad3ab00687f2
user:      bouyer <bouyer%NetBSD.org@localhost>
date:      Sat Jul 02 17:53:50 2011 +0000

description:
Fix kern/45093 as discussed on tech-kern@:
http://mail-index.netbsd.org/tech-kern/2011/06/17/msg010734.html

The cause of the problem is that the so_pendfree is processed with
the softnet_lock held at one point, and processing the list
calls sodoloanfree() which may kpause(). As the thread sleeps with
softnet_lock held, it ultimately cause a deadlock (see the PR or tech-kern
thread for details).
Although it should be possible to call sodopendfree() after releasing
the socket lock, it's not so easy to know where he socket lock is held and
where it's not, so we may hit the issue again later.
Add a kernel thread to handle the so_pendfree list, and wake up this
thread when adding mbufs to this list. Get rid of the various sodopendfree()
calls, hopefully fixing definitively the problem.

diffstat:

 sys/kern/init_main.c   |    8 ++-
 sys/kern/uipc_socket.c |  110 ++++++++++++++++++++----------------------------
 sys/sys/socketvar.h    |    3 +-
 3 files changed, 53 insertions(+), 68 deletions(-)

diffs (266 lines):

diff -r 018c2a803dab -r ad3ab00687f2 sys/kern/init_main.c
--- a/sys/kern/init_main.c      Sat Jul 02 17:37:28 2011 +0000
+++ b/sys/kern/init_main.c      Sat Jul 02 17:53:50 2011 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: init_main.c,v 1.432 2011/06/12 03:35:56 rmind Exp $    */
+/*     $NetBSD: init_main.c,v 1.433 2011/07/02 17:53:50 bouyer Exp $   */
 
 /*-
  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
@@ -97,7 +97,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.432 2011/06/12 03:35:56 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.433 2011/07/02 17:53:50 bouyer Exp $");
 
 #include "opt_ddb.h"
 #include "opt_ipsec.h"
@@ -495,6 +495,10 @@
 
        spldebug_start();
 
+       /* Initialize sockets thread(s) */
+       soinit1();
+
+
        /* Configure the system hardware.  This will enable interrupts. */
        configure();
 
diff -r 018c2a803dab -r ad3ab00687f2 sys/kern/uipc_socket.c
--- a/sys/kern/uipc_socket.c    Sat Jul 02 17:37:28 2011 +0000
+++ b/sys/kern/uipc_socket.c    Sat Jul 02 17:53:50 2011 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: uipc_socket.c,v 1.204 2011/06/26 16:42:42 christos Exp $       */
+/*     $NetBSD: uipc_socket.c,v 1.205 2011/07/02 17:53:50 bouyer Exp $ */
 
 /*-
  * Copyright (c) 2002, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -63,7 +63,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.204 2011/06/26 16:42:42 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.205 2011/07/02 17:53:50 bouyer Exp $");
 
 #include "opt_compat_netbsd.h"
 #include "opt_sock_counters.h"
@@ -92,6 +92,7 @@
 #include <sys/kauth.h>
 #include <sys/mutex.h>
 #include <sys/condvar.h>
+#include <sys/kthread.h>
 
 #ifdef COMPAT_50
 #include <compat/sys/time.h>
@@ -144,7 +145,7 @@
 #endif
 
 static kmutex_t so_pendfree_lock;
-static struct mbuf *so_pendfree;
+static struct mbuf *so_pendfree = NULL;
 
 #ifndef SOMAXKVA
 #define        SOMAXKVA (16 * 1024 * 1024)
@@ -157,8 +158,9 @@
 
 #define        SOCK_LOAN_CHUNK         65536
 
-static size_t sodopendfree(void);
-static size_t sodopendfreel(void);
+static void sopendfree_thread(void *);
+static kcondvar_t pendfree_thread_cv;
+static lwp_t *sopendfree_lwp;
 
 static void sysctl_kern_somaxkva_setup(void);
 static struct sysctllog *socket_sysctllog;
@@ -170,21 +172,6 @@
 
        mutex_enter(&so_pendfree_lock);
        while (socurkva + len > somaxkva) {
-               size_t freed;
-
-               /*
-                * try to do pendfree.
-                */
-
-               freed = sodopendfreel();
-
-               /*
-                * if some kva was freed, try again.
-                */
-
-               if (freed)
-                       continue;
-
                SOSEND_COUNTER_INCR(&sosend_kvalimit);
                error = cv_wait_sig(&socurkva_cv, &so_pendfree_lock);
                if (error) {
@@ -277,56 +264,45 @@
        sokvafree(sva, len);
 }
 
-static size_t
-sodopendfree(void)
-{
-       size_t rv;
-
-       if (__predict_true(so_pendfree == NULL))
-               return 0;
-
-       mutex_enter(&so_pendfree_lock);
-       rv = sodopendfreel();
-       mutex_exit(&so_pendfree_lock);
-
-       return rv;
-}
-
 /*
- * sodopendfreel: free mbufs on "pendfree" list.
+ * sopendfree_thread: free mbufs on "pendfree" list.
  * unlock and relock so_pendfree_lock when freeing mbufs.
- *
- * => called with so_pendfree_lock held.
  */
 
-static size_t
-sodopendfreel(void)
+static void
+sopendfree_thread(void *v)
 {
        struct mbuf *m, *next;
-       size_t rv = 0;
+       size_t rv;
 
-       KASSERT(mutex_owned(&so_pendfree_lock));
+       mutex_enter(&so_pendfree_lock);
 
-       while (so_pendfree != NULL) {
-               m = so_pendfree;
-               so_pendfree = NULL;
-               mutex_exit(&so_pendfree_lock);
+       for (;;) {
+               rv = 0;
+               while (so_pendfree != NULL) {
+                       m = so_pendfree;
+                       so_pendfree = NULL;
+                       mutex_exit(&so_pendfree_lock);
 
-               for (; m != NULL; m = next) {
-                       next = m->m_next;
-                       KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0);
-                       KASSERT(m->m_ext.ext_refcnt == 0);
+                       for (; m != NULL; m = next) {
+                               next = m->m_next;
+                               KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0);
+                               KASSERT(m->m_ext.ext_refcnt == 0);
 
-                       rv += m->m_ext.ext_size;
-                       sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf,
-                           m->m_ext.ext_size);
-                       pool_cache_put(mb_cache, m);
+                               rv += m->m_ext.ext_size;
+                               sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf,
+                                   m->m_ext.ext_size);
+                               pool_cache_put(mb_cache, m);
+                       }
+
+                       mutex_enter(&so_pendfree_lock);
                }
-
-               mutex_enter(&so_pendfree_lock);
+               if (rv)
+                       cv_broadcast(&socurkva_cv);
+               cv_wait(&pendfree_thread_cv, &so_pendfree_lock);
        }
-
-       return (rv);
+       panic("sopendfree_thread");
+       /* NOTREACHED */
 }
 
 void
@@ -345,7 +321,7 @@
        mutex_enter(&so_pendfree_lock);
        m->m_next = so_pendfree;
        so_pendfree = m;
-       cv_broadcast(&socurkva_cv);
+       cv_signal(&pendfree_thread_cv);
        mutex_exit(&so_pendfree_lock);
 }
 
@@ -415,7 +391,6 @@
        KASSERT(ce == &sokva_reclaimerentry);
        KASSERT(obj == NULL);
 
-       sodopendfree();
        if (!vm_map_starved_p(kernel_map)) {
                return CALLBACK_CHAIN_ABORT;
        }
@@ -497,6 +472,7 @@
        mutex_init(&so_pendfree_lock, MUTEX_DEFAULT, IPL_VM);
        softnet_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
        cv_init(&socurkva_cv, "sokva");
+       cv_init(&pendfree_thread_cv, "sopendfr");
        soinit2();
 
        /* Set the initial adjusted socket buffer size. */
@@ -510,6 +486,15 @@
            socket_listener_cb, NULL);
 }
 
+void
+soinit1(void)
+{
+       int error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
+           sopendfree_thread, NULL, &sopendfree_lwp, "sopendfree");
+       if (error)
+               panic("soinit1 %d", error);
+}
+
 /*
  * Socket operation routines.
  * These routines are called by the routines in
@@ -878,7 +863,6 @@
                error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
                    NULL, NULL, NULL, NULL);
        }
-       sodopendfree();
        return (error);
 }
 
@@ -911,7 +895,6 @@
        short           wakeup_state = 0;
 
        p = l->l_proc;
-       sodopendfree();
        clen = 0;
 
        /*
@@ -1187,9 +1170,6 @@
        else
                flags = 0;
 
-       if ((flags & MSG_DONTWAIT) == 0)
-               sodopendfree();
-
        if (flags & MSG_OOB) {
                m = m_get(M_WAIT, MT_DATA);
                solock(so);
diff -r 018c2a803dab -r ad3ab00687f2 sys/sys/socketvar.h
--- a/sys/sys/socketvar.h       Sat Jul 02 17:37:28 2011 +0000
+++ b/sys/sys/socketvar.h       Sat Jul 02 17:53:50 2011 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: socketvar.h,v 1.125 2011/06/26 16:43:12 christos Exp $ */
+/*     $NetBSD: socketvar.h,v 1.126 2011/07/02 17:53:51 bouyer Exp $   */
 
 /*-
  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
@@ -280,6 +280,7 @@
 int    sbwait(struct sockbuf *);
 int    sb_max_set(u_long);
 void   soinit(void);
+void   soinit1(void);
 void   soinit2(void);
 int    soabort(struct socket *);
 int    soaccept(struct socket *, struct mbuf *);



Home | Main Index | Thread Index | Old Index