Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/ufs/mfs Redo the way MFS does I/O to the server's addres...



details:   https://anonhg.NetBSD.org/src/rev/1cc0321d8064
branches:  trunk
changeset: 486240:1cc0321d8064
user:      thorpej <thorpej%NetBSD.org@localhost>
date:      Tue May 16 17:20:23 2000 +0000

description:
Redo the way MFS does I/O to the server's address space.  Instead of
queueing up buffers and awakening the MFS server process to do the I/O,
we do the I/O to the server process's address space directly using
facilities provided by UVM.

This makes it possible for buffers attempting to flush out while the
MFS is being unmounted to actually do the I/O, where before it would
fail if the server process wasn't in the MFS idle loop (i.e. had been
signaled and was attempting to exit).

Should fix kern/10122 (I can no longer reproduce the problem described
in the PR when running with these changes), and any number of other
MFS-related complaints made by people over time.

diffstat:

 sys/ufs/mfs/mfs_extern.h |    3 +-
 sys/ufs/mfs/mfs_vfsops.c |   26 ++----
 sys/ufs/mfs/mfs_vnops.c  |  169 ++++++++++++++++++++++++++++++++++------------
 sys/ufs/mfs/mfsnode.h    |    3 +-
 4 files changed, 137 insertions(+), 64 deletions(-)

diffs (truncated from 340 to 300 lines):

diff -r ec4abb69b74c -r 1cc0321d8064 sys/ufs/mfs/mfs_extern.h
--- a/sys/ufs/mfs/mfs_extern.h  Tue May 16 16:54:33 2000 +0000
+++ b/sys/ufs/mfs/mfs_extern.h  Tue May 16 17:20:23 2000 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: mfs_extern.h,v 1.9 2000/03/16 18:20:07 jdolecek Exp $  */
+/*     $NetBSD: mfs_extern.h,v 1.10 2000/05/16 17:20:23 thorpej Exp $  */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -62,7 +62,6 @@
 /* mfs_vnops.c */
 int    mfs_open        __P((void *));
 int    mfs_strategy    __P((void *));
-void   mfs_doio        __P((struct buf *, caddr_t));
 int    mfs_bmap        __P((void *));
 int    mfs_close       __P((void *));
 int    mfs_inactive    __P((void *));
diff -r ec4abb69b74c -r 1cc0321d8064 sys/ufs/mfs/mfs_vfsops.c
--- a/sys/ufs/mfs/mfs_vfsops.c  Tue May 16 16:54:33 2000 +0000
+++ b/sys/ufs/mfs/mfs_vfsops.c  Tue May 16 17:20:23 2000 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: mfs_vfsops.c,v 1.26 2000/05/16 00:24:08 thorpej Exp $  */
+/*     $NetBSD: mfs_vfsops.c,v 1.27 2000/05/16 17:20:23 thorpej Exp $  */
 
 /*
  * Copyright (c) 1989, 1990, 1993, 1994
@@ -157,8 +157,10 @@
        mfsp->mfs_size = mfs_rootsize;
        mfsp->mfs_vnode = rootvp;
        mfsp->mfs_proc = NULL;          /* indicate kernel space */
+       mfsp->mfs_alive = 1;
        BUFQ_INIT(&mfsp->mfs_buflist);
        if ((error = ffs_mountfs(rootvp, mp, p)) != 0) {
+               mfsp->mfs_alive = 0;
                mp->mnt_op->vfs_refcount--;
                vfs_unbusy(mp);
                free(mp, M_MOUNT);
@@ -261,8 +263,9 @@
        mfsp->mfs_vnode = devvp;
        mfsp->mfs_proc = p;
        BUFQ_INIT(&mfsp->mfs_buflist);
+       mfsp->mfs_alive = 1;
        if ((error = ffs_mountfs(devvp, mp, p)) != 0) {
-               BUFQ_FIRST(&mfsp->mfs_buflist) = (struct buf *) -1;
+               mfsp->mfs_alive = 0;
                vrele(devvp);
                return (error);
        }
@@ -283,9 +286,8 @@
  * Used to grab the process and keep it in the kernel to service
  * memory filesystem I/O requests.
  *
- * Loop servicing I/O requests.
- * Copy the requested data into or out of the memory filesystem
- * address space.
+ * What we actually do is just wait until we're told to go away.
+ * mfs_strategy() does I/O directly to the process's address space.
  */
 /* ARGSUSED */
 int
@@ -296,12 +298,9 @@
 {
        struct vnode *vp = VFSTOUFS(mp)->um_devvp;
        struct mfsnode *mfsp = VTOMFS(vp);
-       struct buf *bp;
-       caddr_t base;
        int sleepreturn = 0;
 
-       base = mfsp->mfs_baseoff;
-       while (BUFQ_FIRST(&mfsp->mfs_buflist) != (struct buf *) -1) {
+       while (mfsp->mfs_alive) {
                /*
                 * If a non-ignored signal is received, try to unmount.
                 * If that fails, or the filesystem is already in the
@@ -316,13 +315,8 @@
                        sleepreturn = 0;
                        continue;
                }
-
-               while ((bp = BUFQ_FIRST(&mfsp->mfs_buflist)) != NULL) {
-                       BUFQ_REMOVE(&mfsp->mfs_buflist, bp);
-                       mfs_doio(bp, base);
-                       wakeup((caddr_t)bp);
-               }
-               sleepreturn = tsleep(vp, mfs_pri, "mfsidl", 0);
+               sleepreturn = tsleep((void *)&mfsp->mfs_alive, mfs_pri,
+                   "mfsidl", 0);
        }
        return (sleepreturn);
 }
diff -r ec4abb69b74c -r 1cc0321d8064 sys/ufs/mfs/mfs_vnops.c
--- a/sys/ufs/mfs/mfs_vnops.c   Tue May 16 16:54:33 2000 +0000
+++ b/sys/ufs/mfs/mfs_vnops.c   Tue May 16 17:20:23 2000 +0000
@@ -1,4 +1,40 @@
-/*     $NetBSD: mfs_vnops.c,v 1.21 2000/05/16 00:24:08 thorpej Exp $   */
+/*     $NetBSD: mfs_vnops.c,v 1.22 2000/05/16 17:20:23 thorpej Exp $   */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe of Zembu Labs, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the NetBSD
+ *     Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
 
 /*
  * Copyright (c) 1989, 1993
@@ -44,12 +80,14 @@
 #include <sys/map.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
+#include <sys/uio.h>
+
+#include <vm/vm.h>
+#include <uvm/uvm_extern.h>
 
 #include <miscfs/genfs/genfs.h>
 #include <miscfs/specfs/specdev.h>
 
-#include <machine/vmparam.h>
-
 #include <ufs/mfs/mfsnode.h>
 #include <ufs/mfs/mfs_extern.h>
 
@@ -144,50 +182,101 @@
        struct buf *bp = ap->a_bp;
        struct mfsnode *mfsp;
        struct vnode *vp;
-       struct proc *p = curproc;               /* XXX */
+       struct proc *p;
+       struct uio auio;
+       struct iovec aiov;
+       caddr_t base;
 
        if (!vfinddev(bp->b_dev, VBLK, &vp) || vp->v_usecount == 0)
                panic("mfs_strategy: bad dev");
+
        mfsp = VTOMFS(vp);
-       /* check for mini-root access */
+       p = mfsp->mfs_proc;
+
+       bp->b_error = 0;
+
+       base = mfsp->mfs_baseoff + (bp->b_blkno << DEV_BSHIFT);
+
+       /*
+        * We have to preserve order, so what we do here is put
+        * ourselves on the end of the queue and then wait for
+        * our buffer to bubble to the front.  This is necessary
+        * in case we sleep faulting in the pages for the I/O
+        * and another process comes in to do I/O while we're
+        * sleeping.
+        *
+        * The fact that our buffer is still at the front of
+        * the queue while we process the I/O serves as a
+        * mutex.
+        */
+       BUFQ_INSERT_TAIL(&mfsp->mfs_buflist, bp);
+       while (BUFQ_FIRST(&mfsp->mfs_buflist) != bp)
+               (void) tsleep(&mfsp->mfs_buflist, PRIBIO,
+                   "mfsio", 0);
+
        if (mfsp->mfs_proc == NULL) {
-               caddr_t base;
-
-               base = mfsp->mfs_baseoff + (bp->b_blkno << DEV_BSHIFT);
+               /*
+                * Access to kernel-space miniroot.
+                */
                if (bp->b_flags & B_READ)
                        memcpy(bp->b_data, base, bp->b_bcount);
                else
                        memcpy(base, bp->b_data, bp->b_bcount);
-               biodone(bp);
-       } else if (mfsp->mfs_proc == p) {
-               mfs_doio(bp, mfsp->mfs_baseoff);
+       } else if (mfsp->mfs_proc == curproc) {
+               /*
+                * The MFS server process is doing the I/O itself
+                * (possibly unmounting the file system).  Do the
+                * I/O to the address space directly.
+                */
+               if (bp->b_flags & B_READ)
+                       bp->b_error = copyin(base, bp->b_data, bp->b_bcount);
+               else
+                       bp->b_error = copyout(bp->b_data, base, bp->b_bcount);
        } else {
-               BUFQ_INSERT_TAIL(&mfsp->mfs_buflist, bp);
-               wakeup((caddr_t)vp);
-       }
-       return (0);
-}
+               aiov.iov_base = bp->b_data;
+               aiov.iov_len = bp->b_bcount;
 
-/*
- * Memory file system I/O.
- *
- * Trivial on the HP since buffer has already been mapping into KVA space.
- */
-void
-mfs_doio(bp, base)
-       struct buf *bp;
-       caddr_t base;
-{
-       base += (bp->b_blkno << DEV_BSHIFT);
-       if (bp->b_flags & B_READ)
-               bp->b_error = copyin(base, bp->b_data, bp->b_bcount);
-       else
-               bp->b_error = copyout(bp->b_data, base, bp->b_bcount);
-       if (bp->b_error)
+               auio.uio_iov = &aiov;
+               auio.uio_iovcnt = 1;
+               auio.uio_offset = (vaddr_t)base;
+               auio.uio_resid = bp->b_bcount;
+               auio.uio_segflg = UIO_SYSSPACE;
+               auio.uio_rw = (bp->b_flags & B_READ) ? UIO_READ : UIO_WRITE;
+               auio.uio_procp = p;
+
+               /* XXXCDC: how should locking work here? */
+               if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1)) {
+                       bp->b_error = EFAULT;
+                       goto out;
+               }
+
+               /*
+                * XXX I don't think PHOLD()/PRELE() is really necessary,
+                * XXX here.  --thorpej
+                */
+
+               PHOLD(p);                       /* XXX */
+               p->p_vmspace->vm_refcnt++;      /* XXX */
+               bp->b_error = uvm_io(&p->p_vmspace->vm_map, &auio);
+               PRELE(p);                       /* XXX */
+               p->p_vmspace->vm_refcnt--;      /* XXX */
+       }
+ out:
+       if (bp->b_error != 0)
                bp->b_flags |= B_ERROR;
        else
                bp->b_resid = 0;
+
+       /*
+        * Pull our buffer off the front of the queue, thereby releasing
+        * the mutex, and awaken any threads waiting to do I/O.
+        */
+       BUFQ_REMOVE(&mfsp->mfs_buflist, bp);
+       if (BUFQ_FIRST(&mfsp->mfs_buflist) != NULL)
+               wakeup(&mfsp->mfs_buflist);
+
        biodone(bp);
+       return (0);
 }
 
 /*
@@ -230,18 +319,9 @@
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct mfsnode *mfsp = VTOMFS(vp);
-       struct buf *bp;
        int error;
 
        /*
-        * Finish any pending I/O requests.
-        */
-       while ((bp = BUFQ_FIRST(&mfsp->mfs_buflist)) != NULL) {
-               BUFQ_REMOVE(&mfsp->mfs_buflist, bp);
-               mfs_doio(bp, mfsp->mfs_baseoff);
-               wakeup((caddr_t)bp);
-       }
-       /*
         * On last close of a memory filesystem



Home | Main Index | Thread Index | Old Index