tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: ffs fsync patch - block devices and wapbl



On Tue, Apr 19, 2011 at 11:44:16PM +0000, YAMAMOTO Takashi wrote:
> hi,
> 
> > Block devices have two different properties.  First they are device nodes
> > having access times etc. and reside on a file system.  Second they may have
> > a file system mounted on them.  With WAPBL it is important to handle these
> > properties separately.
> > 
> > Relevant PRs are 41189, 41192, 41977, 42149, 42551, 44377 and 44746 at 
> > least.
> > 
> > The attached diff should solve these problems by:
> > 
> > - Replace the ugly sync loop in ffs_full_fsync() with vflushbuf().  This
> >   loop is a left-over of softdeps and not needed anymore.
> > 
> > - Merge ffs_vfs_fsync() with ffs_full_fsync() so we have only on operation
> >   whether the request comes from ffs or from other file system via 
> > VFS_FSYNC().
> > 
> > - Take care which mount to test for WAPBL -- v_mount to update the times and
> >   wapbl_vptomp() to update the dirty blocks.  Never update times when called
> >   by VFS_FSYNC().
> > 
> > 
> > Comments or objections?
> 
> thanks for taking a look on this.
> 
> ffs_fsync should not need to know if the filesystem used on its VBLK node is
> ffs or not.  it should just call VFS_FSYNC as the other filesystems
> (ie. spec_fsync) do.  the FSYNC_VFS flag should go away.

You mean like the attached diff?  All we loose is the ctime update.

-- 
Juergen Hannken-Illjes - hannken%eis.cs.tu-bs.de@localhost - TU Braunschweig 
(Germany)
Index: sys/ufs/ffs/ffs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ffs/ffs_vnops.c,v
retrieving revision 1.117
diff -p -u -4 -r1.117 ffs_vnops.c
--- sys/ufs/ffs/ffs_vnops.c     15 Apr 2011 15:54:11 -0000      1.117
+++ sys/ufs/ffs/ffs_vnops.c     21 Apr 2011 08:17:47 -0000
@@ -174,9 +174,9 @@ const struct vnodeopv_entry_desc ffs_spe
        { &vop_poll_desc, spec_poll },                  /* poll */
        { &vop_kqfilter_desc, spec_kqfilter },          /* kqfilter */
        { &vop_revoke_desc, spec_revoke },              /* revoke */
        { &vop_mmap_desc, spec_mmap },                  /* mmap */
-       { &vop_fsync_desc, ffs_fsync },                 /* fsync */
+       { &vop_fsync_desc, spec_fsync },                /* fsync */
        { &vop_seek_desc, spec_seek },                  /* seek */
        { &vop_remove_desc, spec_remove },              /* remove */
        { &vop_link_desc, spec_link },                  /* link */
        { &vop_rename_desc, spec_rename },              /* rename */
@@ -398,23 +398,18 @@ out:
 /* ARGSUSED */
 int
 ffs_full_fsync(struct vnode *vp, int flags)
 {
-       struct buf *bp, *nbp;
-       int error, passes, skipmeta, waitfor, i;
+       int error, waitfor, i;
        struct mount *mp;
 
-       KASSERT(VTOI(vp) != NULL);
        KASSERT(vp->v_tag == VT_UFS);
+       KASSERT(VTOI(vp) != NULL);
+       KASSERT(vp->v_type != VCHR && vp->v_type != VBLK);
 
        error = 0;
 
        mp = vp->v_mount;
-       if (vp->v_type == VBLK && vp->v_specmountpoint != NULL) {
-               mp = vp->v_specmountpoint;
-       } else {
-               mp = vp->v_mount;
-       }
 
        /*
         * Flush all dirty data associated with the vnode.
         */
@@ -432,9 +427,8 @@ ffs_full_fsync(struct vnode *vp, int fla
                        return error;
        }
 
 #ifdef WAPBL
-       mp = wapbl_vptomp(vp);
        if (mp && mp->mnt_wapbl) {
                /*
                 * Don't bother writing out metadata if the syncer is
                 * making the request.  We will let the sync vnode
@@ -476,89 +470,15 @@ ffs_full_fsync(struct vnode *vp, int fla
                return error;
        }
 #endif /* WAPBL */
 
-       /*
-        * Write out metadata for non-logging file systems. XXX This block
-        * should be simplified now that softdep is gone.
-        */
-       passes = NIADDR + 1;
-       skipmeta = 0;
-       if (flags & FSYNC_WAIT)
-               skipmeta = 1;
-
-loop:
-       mutex_enter(&bufcache_lock);
-       LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
-               bp->b_cflags &= ~BC_SCANNED;
-       }
-       for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
-               nbp = LIST_NEXT(bp, b_vnbufs);
-               if (bp->b_cflags & (BC_BUSY | BC_SCANNED))
-                       continue;
-               if ((bp->b_oflags & BO_DELWRI) == 0)
-                       panic("ffs_fsync: not dirty");
-               if (skipmeta && bp->b_lblkno < 0)
-                       continue;
-               bp->b_cflags |= BC_BUSY | BC_VFLUSH | BC_SCANNED;
-               mutex_exit(&bufcache_lock);
-               /*
-                * On our final pass through, do all I/O synchronously
-                * so that we can find out if our flush is failing
-                * because of write errors.
-                */
-               if (passes > 0 || !(flags & FSYNC_WAIT))
-                       (void) bawrite(bp);
-               else if ((error = bwrite(bp)) != 0)
-                       return (error);
-               /*
-                * Since we unlocked during the I/O, we need
-                * to start from a known point.
-                */
-               mutex_enter(&bufcache_lock);
-               nbp = LIST_FIRST(&vp->v_dirtyblkhd);
-       }
-       mutex_exit(&bufcache_lock);
-       if (skipmeta) {
-               skipmeta = 0;
-               goto loop;
+       error = vflushbuf(vp, (flags & FSYNC_WAIT) != 0);
+       if (error == 0) {
+               waitfor = (flags & FSYNC_WAIT) ? UPDATE_WAIT : 0;
+               error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE | waitfor);
        }
-
-       if ((flags & FSYNC_WAIT) != 0) {
-               mutex_enter(&vp->v_interlock);
-               while (vp->v_numoutput) {
-                       cv_wait(&vp->v_cv, &vp->v_interlock);
-               }
-               mutex_exit(&vp->v_interlock);
-
-               /*
-                * Ensure that any filesystem metadata associated
-                * with the vnode has been written.
-                */
-               if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
-                       /*
-                       * Block devices associated with filesystems may
-                       * have new I/O requests posted for them even if
-                       * the vnode is locked, so no amount of trying will
-                       * get them clean. Thus we give block devices a
-                       * good effort, then just give up. For all other file
-                       * types, go around and try again until it is clean.
-                       */
-                       if (passes > 0) {
-                               passes--;
-                               goto loop;
-                       }
-#ifdef DIAGNOSTIC
-                       if (vp->v_type != VBLK)
-                               vprint("ffs_fsync: dirty", vp);
-#endif
-               }
-       }
-
-       waitfor = (flags & FSYNC_WAIT) ? UPDATE_WAIT : 0;
-       error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE | waitfor);
-
        if (error == 0 && (flags & FSYNC_CACHE) != 0) {
+               i = 1;
                (void)VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &i, FWRITE,
                    kauth_cred_get());
        }
 
Index: sys/ufs/ffs/ffs_vfsops.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ffs/ffs_vfsops.c,v
retrieving revision 1.265
diff -p -u -4 -r1.265 ffs_vfsops.c
--- sys/ufs/ffs/ffs_vfsops.c    27 Mar 2011 08:04:50 -0000      1.265
+++ sys/ufs/ffs/ffs_vfsops.c    21 Apr 2011 08:17:47 -0000
@@ -2070,16 +2070,14 @@ ffs_suspendctl(struct mount *mp, int cmd
        }
 }
 
 /*
- * Synch vnode for a mounted file system.  This is called for foreign
- * vnodes, i.e. non-ffs.
+ * Synch vnode for a mounted file system.
  */
 static int
 ffs_vfs_fsync(vnode_t *vp, int flags)
 {
-       int error, passes, skipmeta, i, pflags;
-       buf_t *bp, *nbp;
+       int error, i, pflags;
 #ifdef WAPBL
        struct mount *mp;
 #endif
 
@@ -2129,82 +2127,11 @@ ffs_vfs_fsync(vnode_t *vp, int flags)
                return 0;
        }
 #endif /* WAPBL */
 
-       /*
-        * Write out metadata for non-logging file systems. XXX This block
-        * should be simplified now that softdep is gone.
-        */
-       passes = NIADDR + 1;
-       skipmeta = 0;
-       if (flags & FSYNC_WAIT)
-               skipmeta = 1;
-
-loop:
-       mutex_enter(&bufcache_lock);
-       LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
-               bp->b_cflags &= ~BC_SCANNED;
-       }
-       for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
-               nbp = LIST_NEXT(bp, b_vnbufs);
-               if (bp->b_cflags & (BC_BUSY | BC_SCANNED))
-                       continue;
-               if ((bp->b_oflags & BO_DELWRI) == 0)
-                       panic("ffs_fsync: not dirty");
-               if (skipmeta && bp->b_lblkno < 0)
-                       continue;
-               bp->b_cflags |= BC_BUSY | BC_VFLUSH | BC_SCANNED;
-               mutex_exit(&bufcache_lock);
-               /*
-                * On our final pass through, do all I/O synchronously
-                * so that we can find out if our flush is failing
-                * because of write errors.
-                */
-               if (passes > 0 || !(flags & FSYNC_WAIT))
-                       (void) bawrite(bp);
-               else if ((error = bwrite(bp)) != 0)
-                       return (error);
-               /*
-                * Since we unlocked during the I/O, we need
-                * to start from a known point.
-                */
-               mutex_enter(&bufcache_lock);
-               nbp = LIST_FIRST(&vp->v_dirtyblkhd);
-       }
-       mutex_exit(&bufcache_lock);
-       if (skipmeta) {
-               skipmeta = 0;
-               goto loop;
-       }
-
-       if ((flags & FSYNC_WAIT) != 0) {
-               mutex_enter(&vp->v_interlock);
-               while (vp->v_numoutput) {
-                       cv_wait(&vp->v_cv, &vp->v_interlock);
-               }
-               mutex_exit(&vp->v_interlock);
-
-               if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
-                       /*
-                       * Block devices associated with filesystems may
-                       * have new I/O requests posted for them even if
-                       * the vnode is locked, so no amount of trying will
-                       * get them clean. Thus we give block devices a
-                       * good effort, then just give up. For all other file
-                       * types, go around and try again until it is clean.
-                       */
-                       if (passes > 0) {
-                               passes--;
-                               goto loop;
-                       }
-#ifdef DIAGNOSTIC
-                       if (vp->v_type != VBLK)
-                               vprint("ffs_fsync: dirty", vp);
-#endif
-               }
-       }
-
+       error = vflushbuf(vp, (flags & FSYNC_WAIT) != 0);
        if (error == 0 && (flags & FSYNC_CACHE) != 0) {
+               i = 1;
                (void)VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE,
                    kauth_cred_get());
        }
 
Index: sys/kern/vfs_subr.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_subr.c,v
retrieving revision 1.421
diff -p -u -4 -r1.421 vfs_subr.c
--- sys/kern/vfs_subr.c 2 Apr 2011 04:28:56 -0000       1.421
+++ sys/kern/vfs_subr.c 21 Apr 2011 08:17:45 -0000
@@ -271,13 +271,13 @@ restart:
  * Flush all dirty buffers from a vnode.
  * Called with the underlying vnode locked, which should prevent new dirty
  * buffers from being queued.
  */
-void
+int
 vflushbuf(struct vnode *vp, int sync)
 {
        struct buf *bp, *nbp;
-       int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
+       int error, flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
        bool dirty;
 
        mutex_enter(&vp->v_interlock);
        (void) VOP_PUTPAGES(vp, 0, 0, flags);
@@ -297,16 +297,19 @@ loop:
                 * since there is no way to quickly wait for them below.
                 */
                if (bp->b_vp == vp || sync == 0)
                        (void) bawrite(bp);
-               else
-                       (void) bwrite(bp);
+               else {
+                       error = bwrite(bp);
+                       if (error)
+                               return error;
+               }
                goto loop;
        }
        mutex_exit(&bufcache_lock);
 
        if (sync == 0)
-               return;
+               return 0;
 
        mutex_enter(&vp->v_interlock);
        while (vp->v_numoutput != 0)
                cv_wait(&vp->v_cv, &vp->v_interlock);
@@ -316,8 +319,10 @@ loop:
        if (dirty) {
                vprint("vflushbuf: dirty", vp);
                goto loop;
        }
+
+       return 0;
 }
 
 /*
  * Create a vnode for a block device.
Index: sys/sys/vnode.h
===================================================================
RCS file: /cvsroot/src/sys/sys/vnode.h,v
retrieving revision 1.228
diff -p -u -4 -r1.228 vnode.h
--- sys/sys/vnode.h     18 Apr 2011 15:53:04 -0000      1.228
+++ sys/sys/vnode.h     21 Apr 2011 08:17:46 -0000
@@ -551,9 +551,9 @@ int vaccess(enum vtype, mode_t, uid_t, g
 void   vattr_null(struct vattr *);
 void   vdevgone(int, int, int, enum vtype);
 int    vfinddev(dev_t, enum vtype, struct vnode **);
 int    vflush(struct mount *, struct vnode *, int);
-void   vflushbuf(struct vnode *, int);
+int    vflushbuf(struct vnode *, int);
 int    vget(struct vnode *, int);
 bool   vtryget(struct vnode *);
 void   vgone(struct vnode *);
 int    vinvalbuf(struct vnode *, int, kauth_cred_t, struct lwp *, bool, int);


Home | Main Index | Thread Index | Old Index