tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: ffs fsync patch - block devices and wapbl
On Tue, Apr 19, 2011 at 11:44:16PM +0000, YAMAMOTO Takashi wrote:
> hi,
>
> > Block devices have two different properties. First they are device nodes
> > having access times etc. and reside on a file system. Second they may have
> > a file system mounted on them. With WAPBL it is important to handle these
> > properties separately.
> >
> > Relevant PRs are 41189, 41192, 41977, 42149, 42551, 44377 and 44746 at
> > least.
> >
> > The attached diff should solve these problems by:
> >
> > - Replace the ugly sync loop in ffs_full_fsync() with vflushbuf(). This
> > loop is a left-over of softdeps and not needed anymore.
> >
> > - Merge ffs_vfs_fsync() with ffs_full_fsync() so we have only on operation
> > whether the request comes from ffs or from other file system via
> > VFS_FSYNC().
> >
> > - Take care which mount to test for WAPBL -- v_mount to update the times and
> > wapbl_vptomp() to update the dirty blocks. Never update times when called
> > by VFS_FSYNC().
> >
> >
> > Comments or objections?
>
> thanks for taking a look on this.
>
> ffs_fsync should not need to know if the filesystem used on its VBLK node is
> ffs or not. it should just call VFS_FSYNC as the other filesystems
> (ie. spec_fsync) do. the FSYNC_VFS flag should go away.
You mean like the attached diff? All we loose is the ctime update.
--
Juergen Hannken-Illjes - hannken%eis.cs.tu-bs.de@localhost - TU Braunschweig
(Germany)
Index: sys/ufs/ffs/ffs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ffs/ffs_vnops.c,v
retrieving revision 1.117
diff -p -u -4 -r1.117 ffs_vnops.c
--- sys/ufs/ffs/ffs_vnops.c 15 Apr 2011 15:54:11 -0000 1.117
+++ sys/ufs/ffs/ffs_vnops.c 21 Apr 2011 08:17:47 -0000
@@ -174,9 +174,9 @@ const struct vnodeopv_entry_desc ffs_spe
{ &vop_poll_desc, spec_poll }, /* poll */
{ &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */
{ &vop_revoke_desc, spec_revoke }, /* revoke */
{ &vop_mmap_desc, spec_mmap }, /* mmap */
- { &vop_fsync_desc, ffs_fsync }, /* fsync */
+ { &vop_fsync_desc, spec_fsync }, /* fsync */
{ &vop_seek_desc, spec_seek }, /* seek */
{ &vop_remove_desc, spec_remove }, /* remove */
{ &vop_link_desc, spec_link }, /* link */
{ &vop_rename_desc, spec_rename }, /* rename */
@@ -398,23 +398,18 @@ out:
/* ARGSUSED */
int
ffs_full_fsync(struct vnode *vp, int flags)
{
- struct buf *bp, *nbp;
- int error, passes, skipmeta, waitfor, i;
+ int error, waitfor, i;
struct mount *mp;
- KASSERT(VTOI(vp) != NULL);
KASSERT(vp->v_tag == VT_UFS);
+ KASSERT(VTOI(vp) != NULL);
+ KASSERT(vp->v_type != VCHR && vp->v_type != VBLK);
error = 0;
mp = vp->v_mount;
- if (vp->v_type == VBLK && vp->v_specmountpoint != NULL) {
- mp = vp->v_specmountpoint;
- } else {
- mp = vp->v_mount;
- }
/*
* Flush all dirty data associated with the vnode.
*/
@@ -432,9 +427,8 @@ ffs_full_fsync(struct vnode *vp, int fla
return error;
}
#ifdef WAPBL
- mp = wapbl_vptomp(vp);
if (mp && mp->mnt_wapbl) {
/*
* Don't bother writing out metadata if the syncer is
* making the request. We will let the sync vnode
@@ -476,89 +470,15 @@ ffs_full_fsync(struct vnode *vp, int fla
return error;
}
#endif /* WAPBL */
- /*
- * Write out metadata for non-logging file systems. XXX This block
- * should be simplified now that softdep is gone.
- */
- passes = NIADDR + 1;
- skipmeta = 0;
- if (flags & FSYNC_WAIT)
- skipmeta = 1;
-
-loop:
- mutex_enter(&bufcache_lock);
- LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
- bp->b_cflags &= ~BC_SCANNED;
- }
- for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
- nbp = LIST_NEXT(bp, b_vnbufs);
- if (bp->b_cflags & (BC_BUSY | BC_SCANNED))
- continue;
- if ((bp->b_oflags & BO_DELWRI) == 0)
- panic("ffs_fsync: not dirty");
- if (skipmeta && bp->b_lblkno < 0)
- continue;
- bp->b_cflags |= BC_BUSY | BC_VFLUSH | BC_SCANNED;
- mutex_exit(&bufcache_lock);
- /*
- * On our final pass through, do all I/O synchronously
- * so that we can find out if our flush is failing
- * because of write errors.
- */
- if (passes > 0 || !(flags & FSYNC_WAIT))
- (void) bawrite(bp);
- else if ((error = bwrite(bp)) != 0)
- return (error);
- /*
- * Since we unlocked during the I/O, we need
- * to start from a known point.
- */
- mutex_enter(&bufcache_lock);
- nbp = LIST_FIRST(&vp->v_dirtyblkhd);
- }
- mutex_exit(&bufcache_lock);
- if (skipmeta) {
- skipmeta = 0;
- goto loop;
+ error = vflushbuf(vp, (flags & FSYNC_WAIT) != 0);
+ if (error == 0) {
+ waitfor = (flags & FSYNC_WAIT) ? UPDATE_WAIT : 0;
+ error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE | waitfor);
}
-
- if ((flags & FSYNC_WAIT) != 0) {
- mutex_enter(&vp->v_interlock);
- while (vp->v_numoutput) {
- cv_wait(&vp->v_cv, &vp->v_interlock);
- }
- mutex_exit(&vp->v_interlock);
-
- /*
- * Ensure that any filesystem metadata associated
- * with the vnode has been written.
- */
- if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
- /*
- * Block devices associated with filesystems may
- * have new I/O requests posted for them even if
- * the vnode is locked, so no amount of trying will
- * get them clean. Thus we give block devices a
- * good effort, then just give up. For all other file
- * types, go around and try again until it is clean.
- */
- if (passes > 0) {
- passes--;
- goto loop;
- }
-#ifdef DIAGNOSTIC
- if (vp->v_type != VBLK)
- vprint("ffs_fsync: dirty", vp);
-#endif
- }
- }
-
- waitfor = (flags & FSYNC_WAIT) ? UPDATE_WAIT : 0;
- error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE | waitfor);
-
if (error == 0 && (flags & FSYNC_CACHE) != 0) {
+ i = 1;
(void)VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &i, FWRITE,
kauth_cred_get());
}
Index: sys/ufs/ffs/ffs_vfsops.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ffs/ffs_vfsops.c,v
retrieving revision 1.265
diff -p -u -4 -r1.265 ffs_vfsops.c
--- sys/ufs/ffs/ffs_vfsops.c 27 Mar 2011 08:04:50 -0000 1.265
+++ sys/ufs/ffs/ffs_vfsops.c 21 Apr 2011 08:17:47 -0000
@@ -2070,16 +2070,14 @@ ffs_suspendctl(struct mount *mp, int cmd
}
}
/*
- * Synch vnode for a mounted file system. This is called for foreign
- * vnodes, i.e. non-ffs.
+ * Synch vnode for a mounted file system.
*/
static int
ffs_vfs_fsync(vnode_t *vp, int flags)
{
- int error, passes, skipmeta, i, pflags;
- buf_t *bp, *nbp;
+ int error, i, pflags;
#ifdef WAPBL
struct mount *mp;
#endif
@@ -2129,82 +2127,11 @@ ffs_vfs_fsync(vnode_t *vp, int flags)
return 0;
}
#endif /* WAPBL */
- /*
- * Write out metadata for non-logging file systems. XXX This block
- * should be simplified now that softdep is gone.
- */
- passes = NIADDR + 1;
- skipmeta = 0;
- if (flags & FSYNC_WAIT)
- skipmeta = 1;
-
-loop:
- mutex_enter(&bufcache_lock);
- LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
- bp->b_cflags &= ~BC_SCANNED;
- }
- for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
- nbp = LIST_NEXT(bp, b_vnbufs);
- if (bp->b_cflags & (BC_BUSY | BC_SCANNED))
- continue;
- if ((bp->b_oflags & BO_DELWRI) == 0)
- panic("ffs_fsync: not dirty");
- if (skipmeta && bp->b_lblkno < 0)
- continue;
- bp->b_cflags |= BC_BUSY | BC_VFLUSH | BC_SCANNED;
- mutex_exit(&bufcache_lock);
- /*
- * On our final pass through, do all I/O synchronously
- * so that we can find out if our flush is failing
- * because of write errors.
- */
- if (passes > 0 || !(flags & FSYNC_WAIT))
- (void) bawrite(bp);
- else if ((error = bwrite(bp)) != 0)
- return (error);
- /*
- * Since we unlocked during the I/O, we need
- * to start from a known point.
- */
- mutex_enter(&bufcache_lock);
- nbp = LIST_FIRST(&vp->v_dirtyblkhd);
- }
- mutex_exit(&bufcache_lock);
- if (skipmeta) {
- skipmeta = 0;
- goto loop;
- }
-
- if ((flags & FSYNC_WAIT) != 0) {
- mutex_enter(&vp->v_interlock);
- while (vp->v_numoutput) {
- cv_wait(&vp->v_cv, &vp->v_interlock);
- }
- mutex_exit(&vp->v_interlock);
-
- if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
- /*
- * Block devices associated with filesystems may
- * have new I/O requests posted for them even if
- * the vnode is locked, so no amount of trying will
- * get them clean. Thus we give block devices a
- * good effort, then just give up. For all other file
- * types, go around and try again until it is clean.
- */
- if (passes > 0) {
- passes--;
- goto loop;
- }
-#ifdef DIAGNOSTIC
- if (vp->v_type != VBLK)
- vprint("ffs_fsync: dirty", vp);
-#endif
- }
- }
-
+ error = vflushbuf(vp, (flags & FSYNC_WAIT) != 0);
if (error == 0 && (flags & FSYNC_CACHE) != 0) {
+ i = 1;
(void)VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE,
kauth_cred_get());
}
Index: sys/kern/vfs_subr.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_subr.c,v
retrieving revision 1.421
diff -p -u -4 -r1.421 vfs_subr.c
--- sys/kern/vfs_subr.c 2 Apr 2011 04:28:56 -0000 1.421
+++ sys/kern/vfs_subr.c 21 Apr 2011 08:17:45 -0000
@@ -271,13 +271,13 @@ restart:
* Flush all dirty buffers from a vnode.
* Called with the underlying vnode locked, which should prevent new dirty
* buffers from being queued.
*/
-void
+int
vflushbuf(struct vnode *vp, int sync)
{
struct buf *bp, *nbp;
- int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
+ int error, flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
bool dirty;
mutex_enter(&vp->v_interlock);
(void) VOP_PUTPAGES(vp, 0, 0, flags);
@@ -297,16 +297,19 @@ loop:
* since there is no way to quickly wait for them below.
*/
if (bp->b_vp == vp || sync == 0)
(void) bawrite(bp);
- else
- (void) bwrite(bp);
+ else {
+ error = bwrite(bp);
+ if (error)
+ return error;
+ }
goto loop;
}
mutex_exit(&bufcache_lock);
if (sync == 0)
- return;
+ return 0;
mutex_enter(&vp->v_interlock);
while (vp->v_numoutput != 0)
cv_wait(&vp->v_cv, &vp->v_interlock);
@@ -316,8 +319,10 @@ loop:
if (dirty) {
vprint("vflushbuf: dirty", vp);
goto loop;
}
+
+ return 0;
}
/*
* Create a vnode for a block device.
Index: sys/sys/vnode.h
===================================================================
RCS file: /cvsroot/src/sys/sys/vnode.h,v
retrieving revision 1.228
diff -p -u -4 -r1.228 vnode.h
--- sys/sys/vnode.h 18 Apr 2011 15:53:04 -0000 1.228
+++ sys/sys/vnode.h 21 Apr 2011 08:17:46 -0000
@@ -551,9 +551,9 @@ int vaccess(enum vtype, mode_t, uid_t, g
void vattr_null(struct vattr *);
void vdevgone(int, int, int, enum vtype);
int vfinddev(dev_t, enum vtype, struct vnode **);
int vflush(struct mount *, struct vnode *, int);
-void vflushbuf(struct vnode *, int);
+int vflushbuf(struct vnode *, int);
int vget(struct vnode *, int);
bool vtryget(struct vnode *);
void vgone(struct vnode *);
int vinvalbuf(struct vnode *, int, kauth_cred_t, struct lwp *, bool, int);
Home |
Main Index |
Thread Index |
Old Index