NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

kern/45492: buffer leak in cgd(4) (possibly not cgd's fault)



>Number:         45492
>Category:       kern
>Synopsis:       buffer leak in cgd(4) (possibly not cgd's fault)
>Confidential:   no
>Severity:       critical
>Priority:       high
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Tue Oct 18 18:30:00 +0000 2011
>Originator:     Alan Barrett
>Release:        NetBSD 5.99.56
>Organization:
Not much
>Environment:
System: NetBSD 5.99.56 i386
>Description:
Under heavy i/o load, the cgd(4) driver allocates kernel memory
that is never freed.

After a while, this can lead to deadlocks when all kernel memory
is exhausted.

>How-To-Repeat:
1. Apply the appended patch to src/sys/dev/cgd.c, to make it print
   statistics.
2. Build a kernel with options DEBUG and the patch from (1) above.
3. Boot the kernel from (2) above.
4. Remove all non-cgd swap devices, and add swap on cgd.  For example:

   $ swapctl -l
   Device      512-blocks     Used    Avail Capacity  Priority
   /dev/wd0b     50331648        0 50331648     0%    0
   $ sudo swapctl -d /dev/wd0b
   $ swapctl -l
   no swap devices configured
   $ cgdconfig -g -k randomkey aes-cbc 256 \
     | sudo cgdconfig -V none cgd0 /dev/wd0b /dev/stdin
   $ sudo disklabel cgd0 | grep '^ [a-z]:'
    a:  50331648         0     4.2BSD      0     0     0  # (Cyl. 0 - 24575)
    d:  50331648         0     unused      0     0        # (Cyl. 0 - 24575)
   $ sudo swapctl -a /dev/cgd0d:w
   $ swapctl -l
   Device      512-blocks     Used    Avail Capacity  Priority
   /dev/cgd0d    50331648        0 50331648     0%    0

5. Do something that causes lots of swap activity.  For example:

   $ cd /usr/src/tests/lib/libc/regex
   $ make USETOOLS=never dependall
   [output not shown]
   $ atf-run t_exhaust & atf_run t_exhaust &
   [output not shown]
   [wait a little while]
   $ pkill -9 t_exhaust

6. Observe that cgd's buffer usage increases without bound, and that
   cgdstart is called more often than cgdiodone.  For example:

   $ dmesg | grep cgd_putdata | tail -1
   cgd_putdata: nbufs=5034 (nget-nput)=5031 (nstart-niodone)=5031 nget=5330666 
nput=5325635 nstart=7784381 niodone=7779350

   The desired result is that (nget-nput) and (nstart-niodone) may
   grow a little during bursts of high activity, but should return
   to 0 or 1 when there is no swapping, and nbufs should similarly
   return to 2 or 3 when there is no swapping.  The exact values
   depend on whether or not the last call to cgdiodone had (data
   == cs->sc_data).

Here'e the patch mentioned in (1) above:

--- cgd.c       14 Oct 2011 09:23:30 -0000      1.75
+++ cgd.c       17 Oct 2011 13:35:38 -0000
@@ -41,6 +41,7 @@ __KERNEL_RCSID(0, "$NetBSD: cgd.c,v 1.75
 #include <sys/bufq.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
+#include <sys/mutex.h>
 #include <sys/pool.h>
 #include <sys/ioctl.h>
 #include <sys/device.h>
@@ -124,22 +125,42 @@ extern struct cfdriver cgd_cd;
 #endif
 
 #ifdef DEBUG
-int cgddebug = 0;
 
 #define CGDB_FOLLOW    0x1
-#define CGDB_IO        0x2
+#define CGDB_IO                0x2
 #define CGDB_CRYPTO    0x4
+#define CGDB_STATS     0x8
 
 #define IFDEBUG(x,y)           if (cgddebug & (x)) y
 #define DPRINTF(x,y)           IFDEBUG(x, printf y)
 #define DPRINTF_FOLLOW(y)      DPRINTF(CGDB_FOLLOW, y)
 
+#define ADJSTATS(_member, _op) /* e.g. ADJSTATS(nfoo, ++) */   \
+       (mutex_enter(&cgdstats_lock),                           \
+       (cgdstats._member _op),                                 \
+       mutex_exit(&cgdstats_lock))
+
+int cgddebug = CGDB_STATS;
+
+static kmutex_t cgdstats_lock;
+static volatile bool cgdstats_lock_inited = false;
+struct {
+       int nbufs;
+       uint64_t nput;
+       uint64_t nget;
+       uint64_t nread;
+       uint64_t nwrite;
+       uint64_t nstart;
+       uint64_t niodone;
+} cgdstats = {.nbufs = 0};
+
 static void    hexprint(const char *, void *, int);
 
 #else
 #define IFDEBUG(x,y)
 #define DPRINTF(x,y)
 #define DPRINTF_FOLLOW(y)
+#define ADJSTATS(_member,_op)
 #endif
 
 #ifdef DIAGNOSTIC
@@ -185,12 +206,19 @@ cgd_attach(device_t parent, device_t sel
 {
        struct cgd_softc *sc = device_private(self);
 
+#ifdef DEBUG
+       if (!cgdstats_lock_inited) {
+               mutex_init(&cgdstats_lock, MUTEX_DEFAULT, IPL_NONE);
+               cgdstats_lock_inited = true;
+       }
+#endif
+
        sc->sc_dev = self;
        simple_lock_init(&sc->sc_slock);
        dk_sc_init(&sc->sc_dksc, sc, device_xname(sc->sc_dev));
        disk_init(&sc->sc_dksc.sc_dkdev, sc->sc_dksc.sc_xname, &cgddkdriver);
 
-        if (!pmf_device_register(self, NULL, NULL))
+       if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "unable to register power management 
hooks\n");
 }
 
@@ -232,6 +260,7 @@ cgd_spawn(int unit)
        cfdata_t cf;
 
        cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
+       ADJSTATS(nbufs, ++);
        cf->cf_name = cgd_cd.cd_name;
        cf->cf_atname = cgd_cd.cd_name;
        cf->cf_unit = unit;
@@ -251,6 +280,7 @@ cgd_destroy(device_t dev)
        if (error)
                return error;
        free(cf, M_DEVBUF);
+       ADJSTATS(nbufs, --);
        return 0;
 }
 
@@ -259,6 +289,13 @@ cgdopen(dev_t dev, int flags, int fmt, s
 {
        struct  cgd_softc *cs;
 
+#ifdef DEBUG
+       if (!cgdstats_lock_inited) {
+               mutex_init(&cgdstats_lock, MUTEX_DEFAULT, IPL_NONE);
+               cgdstats_lock_inited = true;
+       }
+#endif
+
        DPRINTF_FOLLOW(("cgdopen(0x%"PRIx64", %d)\n", dev, flags));
        GETCGD_SOFTC(cs, dev);
        return dk_open(di, &cs->sc_dksc, dev, flags, fmt, l);
@@ -339,6 +376,7 @@ cgd_getdata(struct dk_softc *dksc, unsig
        struct  cgd_softc *cs =dksc->sc_osc;
        void *  data = NULL;
 
+       ADJSTATS(nget, ++);
        simple_lock(&cs->sc_slock);
        if (cs->sc_data_used == 0) {
                cs->sc_data_used = 1;
@@ -349,6 +387,7 @@ cgd_getdata(struct dk_softc *dksc, unsig
        if (data)
                return data;
 
+       ADJSTATS(nbufs, ++);
        return malloc(size, M_DEVBUF, M_NOWAIT);
 }
 
@@ -357,13 +396,26 @@ cgd_putdata(struct dk_softc *dksc, void 
 {
        struct  cgd_softc *cs =dksc->sc_osc;
 
+       ADJSTATS(nput, ++);
+       simple_lock(&cs->sc_slock);
        if (data == cs->sc_data) {
-               simple_lock(&cs->sc_slock);
                cs->sc_data_used = 0;
-               simple_unlock(&cs->sc_slock);
        } else {
                free(data, M_DEVBUF);
+               ADJSTATS(nbufs, --);
+               DPRINTF(CGDB_STATS,
+                   ("cgd_putdata: nbufs=%d"
+                    " (nget-nput)=%"PRIu64
+                    " (nstart-niodone)=%"PRIu64
+                    " nget=%"PRIu64" nput=%"PRIu64
+                    " nstart=%"PRIu64" niodone=%"PRIu64"\n",
+                    cgdstats.nbufs,
+                    (cgdstats.nget - cgdstats.nput),
+                    (cgdstats.nstart - cgdstats.niodone),
+                    cgdstats.nget, cgdstats.nput,
+                    cgdstats.nstart, cgdstats.niodone));
        }
+       simple_unlock(&cs->sc_slock);
 }
 
 static int
@@ -376,6 +428,7 @@ cgdstart(struct dk_softc *dksc, struct b
        daddr_t bn;
        struct  vnode *vp;
 
+       ADJSTATS(nstart, ++);
        DPRINTF_FOLLOW(("cgdstart(%p, %p)\n", dksc, bp));
        disk_busy(&dksc->sc_dkdev); /* XXX: put in dksubr.c */
 
@@ -441,6 +494,7 @@ cgdiodone(struct buf *nbp)
 
        KDASSERT(cs);
 
+       ADJSTATS(niodone, ++);
        DPRINTF_FOLLOW(("cgdiodone(%p)\n", nbp));
        DPRINTF(CGDB_IO, ("cgdiodone: bp %p bcount %d resid %d\n",
            obp, obp->b_bcount, obp->b_resid));
@@ -488,6 +542,7 @@ cgdread(dev_t dev, struct uio *uio, int 
        struct  cgd_softc *cs;
        struct  dk_softc *dksc;
 
+       ADJSTATS(nread, ++);
        DPRINTF_FOLLOW(("cgdread(0x%llx, %p, %d)\n",
            (unsigned long long)dev, uio, flags));
        GETCGD_SOFTC(cs, dev);
@@ -504,6 +559,7 @@ cgdwrite(dev_t dev, struct uio *uio, int
        struct  cgd_softc *cs;
        struct  dk_softc *dksc;
 
+       ADJSTATS(nwrite, ++);
        DPRINTF_FOLLOW(("cgdwrite(0x%"PRIx64", %p, %d)\n", dev, uio, flags));
        GETCGD_SOFTC(cs, dev);
        dksc = &cs->sc_dksc;
@@ -676,7 +732,9 @@ cgd_ioctl_set(struct cgd_softc *cs, void
 
        bufq_alloc(&cs->sc_dksc.sc_bufq, "fcfs", 0);
 
+       assert(cs->sc_data == NULL);
        cs->sc_data = malloc(MAXPHYS, M_DEVBUF, M_WAITOK);
+       ADJSTATS(nbufs, ++);
        cs->sc_data_used = 0;
 
        cs->sc_dksc.sc_flags |= DKF_INITED;
@@ -723,6 +781,7 @@ cgd_ioctl_clr(struct cgd_softc *cs, stru
        cs->sc_cfuncs->cf_destroy(cs->sc_cdata.cf_priv);
        free(cs->sc_tpath, M_DEVBUF);
        free(cs->sc_data, M_DEVBUF);
+       ADJSTATS(nbufs, -= 2);
        cs->sc_data_used = 0;
        cs->sc_dksc.sc_flags &= ~DKF_INITED;
        disk_detach(&cs->sc_dksc.sc_dkdev);

>Fix:
Unknown.  It's possible that the bug is in the generic swap or 
disk layers, or in the wd(4) driver, even though the symptopms 
appear in the wd(4) driver.




Home | Main Index | Thread Index | Old Index