tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

ataraid(4): fix for PR kern/38273



Hi,

for the past days I've been trying to fix the problem described
in PR/kern/38273 reported by Greg A. Woods.

There are some problems currently between ld(4) and ataraid(4):

ldstart() acquires the ld's spin lock and runs the sc->sc_start
callback function. This one points to ld_ataraid_start_raid0/span().
While on this function, there are two LOCKDEBUG panics:

1) ld_ataraid_make_cbuf() calls buf_init() that results in a LOCKDEBUG
   panic.
2) ld_ataraid_start_raid0/span() acquires the adaptive mutex v_interlock
   for the buf.

To fix these issues I came up with a patch that does the following:

- Uses a per device (ld) pool_cache(9) pool and not global as before.
- Uses the ctor/dtor arguments of pool_cache(9) to run buf_init() releasing
  and reacquiring the ld's spin lock.
- Uses a softint(9) to fire off requests to avoid acquiring the adaptive
  mutex with the spin lock held.

I've tested it with a LOCKDEBUG/DIAGNOSTIC/DEBUG kernel and didn't see
any problems, deadlocks or any other kind of problem.

If you know any other way or the proper way to fix this, please let me know.
That patch is the only one that really works all the time, compared to
other options that I tried.

Please review and/or test if you are able to.

Index: ld_ataraid.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ata/ld_ataraid.c,v
retrieving revision 1.32
diff -b -u -p -r1.32 ld_ataraid.c
--- ld_ataraid.c        16 Sep 2008 11:45:30 -0000      1.32
+++ ld_ataraid.c        17 Sep 2008 13:13:08 -0000
@@ -90,6 +90,12 @@ struct ld_ataraid_softc {
        struct vnode *sc_vnodes[ATA_RAID_MAX_DISKS];
 
        void    (*sc_iodone)(struct buf *);
+
+       pool_cache_t sc_cbufpool;
+
+       SIMPLEQ_HEAD(, cbuf) sc_cbufq;
+
+       void    *sc_sih_cookie;
 };
 
 static int     ld_ataraid_match(struct device *, struct cfdata *, void *);
@@ -97,6 +103,10 @@ static void ld_ataraid_attach(struct dev
 
 static int     ld_ataraid_dump(struct ld_softc *, void *, int, int);
 
+static int     cbufpool_ctor(void *, void *, int);
+static void    cbufpool_dtor(void *, void *);
+
+static void    ld_ataraid_start_vstrategy(void *);
 static int     ld_ataraid_start_span(struct ld_softc *, struct buf *);
 
 static int     ld_ataraid_start_raid0(struct ld_softc *, struct buf *);
@@ -113,9 +123,6 @@ static int  ld_ataraid_biodisk(struct ld_
 CFATTACH_DECL_NEW(ld_ataraid, sizeof(struct ld_ataraid_softc),
     ld_ataraid_match, ld_ataraid_attach, NULL, NULL);
 
-static int ld_ataraid_initialized;
-static struct pool ld_ataraid_cbufpl;
-
 struct cbuf {
        struct buf      cb_buf;         /* new I/O buf */
        struct buf      *cb_obp;        /* ptr. to original I/O buf */
@@ -127,8 +134,8 @@ struct cbuf {
 #define        CBUF_IODONE     0x00000001      /* I/O is already successfully 
done */
 };
 
-#define        CBUF_GET()      pool_get(&ld_ataraid_cbufpl, PR_NOWAIT);
-#define        CBUF_PUT(cbp)   pool_put(&ld_ataraid_cbufpl, (cbp))
+#define        CBUF_GET()      pool_cache_get(sc->sc_cbufpool, PR_NOWAIT);
+#define        CBUF_PUT(cbp)   pool_cache_put(sc->sc_cbufpool, (cbp))
 
 static int
 ld_ataraid_match(device_t parent, cfdata_t match, void *aux)
@@ -151,11 +158,10 @@ ld_ataraid_attach(device_t parent, devic
 
        ld->sc_dv = self;
 
-       if (ld_ataraid_initialized == 0) {
-               ld_ataraid_initialized = 1;
-               pool_init(&ld_ataraid_cbufpl, sizeof(struct cbuf), 0,
-                   0, 0, "ldcbuf", NULL, IPL_BIO);
-       }
+       sc->sc_cbufpool = pool_cache_init(sizeof(struct cbuf), 0,
+           0, 0, "ldcbuf", NULL, IPL_BIO, cbufpool_ctor, cbufpool_dtor, sc);
+       sc->sc_sih_cookie = softint_establish(SOFTINT_BIO,
+           ld_ataraid_start_vstrategy, sc);
 
        sc->sc_aai = aai;       /* this data persists */
 
@@ -246,9 +252,33 @@ ld_ataraid_attach(device_t parent, devic
                panic("%s: bioctl registration failed\n",
                    device_xname(ld->sc_dv));
 #endif
+       SIMPLEQ_INIT(&sc->sc_cbufq);
        ldattach(ld);
 }
 
+static int
+cbufpool_ctor(void *arg, void *obj, int flags)
+{
+       struct ld_ataraid_softc *sc = arg;
+       struct ld_softc *ld = &sc->sc_ld;
+       struct cbuf *cbp = obj;
+
+       /* We release/reacquire the spinlock before calling buf_init() */
+       mutex_exit(&ld->sc_mutex);
+       buf_init(&cbp->cb_buf);
+       mutex_enter(&ld->sc_mutex);
+
+       return 0;
+}
+
+static void
+cbufpool_dtor(void *arg, void *obj)
+{
+       struct cbuf *cbp = obj;
+
+       buf_destroy(&cbp->cb_buf);
+}
+
 static struct cbuf *
 ld_ataraid_make_cbuf(struct ld_ataraid_softc *sc, struct buf *bp,
     u_int comp, daddr_t bn, void *addr, long bcount)
@@ -257,8 +287,7 @@ ld_ataraid_make_cbuf(struct ld_ataraid_s
 
        cbp = CBUF_GET();
        if (cbp == NULL)
-               return (NULL);
-       buf_init(&cbp->cb_buf);
+               return NULL;
        cbp->cb_buf.b_flags = bp->b_flags;
        cbp->cb_buf.b_oflags = bp->b_oflags;
        cbp->cb_buf.b_cflags = bp->b_cflags;
@@ -277,7 +306,24 @@ ld_ataraid_make_cbuf(struct ld_ataraid_s
        cbp->cb_other = NULL;
        cbp->cb_flags = 0;
 
-       return (cbp);
+       return cbp;
+}
+
+static void
+ld_ataraid_start_vstrategy(void *arg)
+{
+       struct ld_ataraid_softc *sc = arg;
+       struct cbuf *cbp;
+
+       while ((cbp = SIMPLEQ_FIRST(&sc->sc_cbufq)) != NULL) {
+               SIMPLEQ_REMOVE_HEAD(&sc->sc_cbufq, cb_q);
+               if ((cbp->cb_buf.b_flags & B_READ) == 0) {
+                       mutex_enter(&cbp->cb_buf.b_vp->v_interlock);
+                       cbp->cb_buf.b_vp->v_numoutput++;
+                       mutex_exit(&cbp->cb_buf.b_vp->v_interlock);
+               }
+               VOP_STRATEGY(cbp->cb_buf.b_vp, &cbp->cb_buf);
+       }
 }
 
 static int
@@ -286,7 +332,6 @@ ld_ataraid_start_span(struct ld_softc *l
        struct ld_ataraid_softc *sc = (void *) ld;
        struct ataraid_array_info *aai = sc->sc_aai;
        struct ataraid_disk_info *adi;
-       SIMPLEQ_HEAD(, cbuf) cbufq;
        struct cbuf *cbp;
        char *addr;
        daddr_t bn;
@@ -294,7 +339,6 @@ ld_ataraid_start_span(struct ld_softc *l
        u_int comp;
 
        /* Allocate component buffers. */
-       SIMPLEQ_INIT(&cbufq);
        addr = bp->b_data;
 
        /* Find the first component. */
@@ -316,12 +360,11 @@ ld_ataraid_start_span(struct ld_softc *l
                cbp = ld_ataraid_make_cbuf(sc, bp, comp, bn, addr, rcount);
                if (cbp == NULL) {
                        /* Free the already allocated component buffers. */
-                       while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) {
-                               SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q);
-                               buf_destroy(&cbp->cb_buf);
+                       while ((cbp = SIMPLEQ_FIRST(&sc->sc_cbufq)) != NULL) {
+                               SIMPLEQ_REMOVE_HEAD(&sc->sc_cbufq, cb_q);
                                CBUF_PUT(cbp);
                        }
-                       return (EAGAIN);
+                       return EAGAIN;
                }
 
                /*
@@ -331,31 +374,22 @@ ld_ataraid_start_span(struct ld_softc *l
                adi = &aai->aai_disks[++comp];
                bn = 0;
 
-               SIMPLEQ_INSERT_TAIL(&cbufq, cbp, cb_q);
+               SIMPLEQ_INSERT_TAIL(&sc->sc_cbufq, cbp, cb_q);
                addr += rcount;
        }
 
        /* Now fire off the requests. */
-       while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) {
-               SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q);
-               if ((cbp->cb_buf.b_flags & B_READ) == 0) {
-                       mutex_enter(&cbp->cb_buf.b_vp->v_interlock);
-                       cbp->cb_buf.b_vp->v_numoutput++;
-                       mutex_exit(&cbp->cb_buf.b_vp->v_interlock);
-               }
-               VOP_STRATEGY(cbp->cb_buf.b_vp, &cbp->cb_buf);
-       }
+       softint_schedule(sc->sc_sih_cookie);
 
-       return (0);
+       return 0;
 }
 
 static int
 ld_ataraid_start_raid0(struct ld_softc *ld, struct buf *bp)
 {
-       struct ld_ataraid_softc *sc = (void *) ld;
+       struct ld_ataraid_softc *sc = (void *)ld;
        struct ataraid_array_info *aai = sc->sc_aai;
        struct ataraid_disk_info *adi;
-       SIMPLEQ_HEAD(, cbuf) cbufq;
        struct cbuf *cbp, *other_cbp;
        char *addr;
        daddr_t bn, cbn, tbn, off;
@@ -363,10 +397,9 @@ ld_ataraid_start_raid0(struct ld_softc *
        u_int comp;
        const int read = bp->b_flags & B_READ;
        const int mirror = aai->aai_level & AAI_L_RAID1;
-       int error;
+       int error = 0;
 
        /* Allocate component buffers. */
-       SIMPLEQ_INIT(&cbufq);
        addr = bp->b_data;
        bn = bp->b_rawblkno;
 
@@ -417,14 +450,13 @@ resource_shortage:
                        error = EAGAIN;
 free_and_exit:
                        /* Free the already allocated component buffers. */
-                       while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) {
-                               SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q);
-                               buf_destroy(&cbp->cb_buf);
+                       while ((cbp = SIMPLEQ_FIRST(&sc->sc_cbufq)) != NULL) {
+                               SIMPLEQ_REMOVE_HEAD(&sc->sc_cbufq, cb_q);
                                CBUF_PUT(cbp);
                        }
-                       return (error);
+                       return error;
                }
-               SIMPLEQ_INSERT_TAIL(&cbufq, cbp, cb_q);
+               SIMPLEQ_INSERT_TAIL(&sc->sc_cbufq, cbp, cb_q);
                if (mirror && !read && comp < aai->aai_width) {
                        comp += aai->aai_width;
                        adi = &aai->aai_disks[comp];
@@ -433,7 +465,8 @@ free_and_exit:
                                    comp, cbn, addr, rcount);
                                if (other_cbp == NULL)
                                        goto resource_shortage;
-                               SIMPLEQ_INSERT_TAIL(&cbufq, other_cbp, cb_q);
+                               SIMPLEQ_INSERT_TAIL(&sc->sc_cbufq,
+                                   other_cbp, cb_q);
                                other_cbp->cb_other = cbp;
                                cbp->cb_other = other_cbp;
                        }
@@ -443,17 +476,9 @@ free_and_exit:
        }
 
        /* Now fire off the requests. */
-       while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) {
-               SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q);
-               if ((cbp->cb_buf.b_flags & B_READ) == 0) {
-                       mutex_enter(&cbp->cb_buf.b_vp->v_interlock);
-                       cbp->cb_buf.b_vp->v_numoutput++;
-                       mutex_exit(&cbp->cb_buf.b_vp->v_interlock);
-               }
-               VOP_STRATEGY(cbp->cb_buf.b_vp, &cbp->cb_buf);
-       }
+       softint_schedule(sc->sc_sih_cookie);
 
-       return (0);
+       return error;
 }
 
 /*
@@ -530,7 +555,6 @@ ld_ataraid_iodone_raid0(struct buf *vbp)
                        other_cbp->cb_flags |= CBUF_IODONE;
        }
        count = cbp->cb_buf.b_bcount;
-       buf_destroy(&cbp->cb_buf);
        CBUF_PUT(cbp);
 
        if (other_cbp != NULL)






Home | Main Index | Thread Index | Old Index