Subject: kern/8996: VOP_UPDATE: waitfor -> flags
To: None <gnats-bugs@gnats.netbsd.org>
From: None <perseant@hhhh.org>
List: netbsd-bugs
Date: 12/15/1999 16:04:13
>Number:         8996
>Category:       kern
>Synopsis:       Differentiate between dirop and non-dirop updates
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    kern-bug-people (Kernel Bug People)
>State:          open
>Class:          change-request
>Submitter-Id:   net
>Arrival-Date:   Wed Dec 15 16:03:01 1999
>Last-Modified:
>Originator:     Konrad Schroder
>Organization:
                                                Konrad Schroder
                                                perseant@hhhh.org
>Release:        1999-12-15
>Environment:
(ignore this)
System: NetBSD wumpus.prism.washington.edu 1.4 NetBSD 1.4 (WUMPUS) #11: Thu Jul 8 16:58:33 PDT 1999 perseant@wumpus.prism.washington.edu:/usr/src/sys/arch/i386/compile/WUMPUS i386


>Description:
Currently VOP_UPDATE takes as its final argument a boolean, "waitfor", which
does not differentiate between two different types of update that have
different desired behaviors in at least FFS+softdep and LFS: update because
of a directory operation, in which case both FFS+softdep and LFS *do not* want
to wait for completion; and some other update (e.g. sync), where they do.

Currently the ufs code addresses the first case for FFS+softdep by specifying
waitfor as "!DOINGSOFTDEP(tvp)".  The problem is not addressed in the LFS code
but the waitfor argument, coming from UFS code, has been ignored.  (This is one
reason why NFS-exported LFSs do not work.)

I suggest instead changing the final argument to a set of flags, a la VOP_FSYNC;
with two flags defined, UPDATE_WAIT and UPDATE_DIROP.  Filesystems that do not
understand delayed dirop writes can test against (UPDATE_WAIT|UPDATE_DIROP).
>How-To-Repeat:
Code inspection.
>Fix:

The following diff should implement the change:

Index: kern/vnode_if.src
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/vnode_if.src,v
retrieving revision 1.23
diff -u -r1.23 vnode_if.src
--- vnode_if.src	1999/12/07 23:57:49	1.23
+++ vnode_if.src	1999/12/15 23:26:38
@@ -535,7 +535,7 @@
 	IN struct vnode *vp;
 	IN struct timespec *access;
 	IN struct timespec *modify;
-	IN int waitfor;
+	IN int flags;
 };
 
 # 
Index: miscfs/genfs/genfs_vnops.c
===================================================================
RCS file: /cvsroot/syssrc/sys/miscfs/genfs/genfs_vnops.c,v
retrieving revision 1.15
diff -u -r1.15 genfs_vnops.c
--- genfs_vnops.c	1999/11/15 18:49:10	1.15
+++ genfs_vnops.c	1999/12/15 23:26:47
@@ -89,7 +89,7 @@
 	if ((ap->a_flags & FSYNC_DATAONLY) != 0)
 		return (0);
 	else
-		return (VOP_UPDATE(ap->a_vp, NULL, NULL, wait));
+		return (VOP_UPDATE(ap->a_vp, NULL, NULL, wait ? UPDATE_WAIT : 0));
 }
 
 int
Index: msdosfs/msdosfs_denode.c
===================================================================
RCS file: /cvsroot/syssrc/sys/msdosfs/msdosfs_denode.c,v
retrieving revision 1.36
diff -u -r1.36 msdosfs_denode.c
--- msdosfs_denode.c	1999/03/24 05:51:27	1.36
+++ msdosfs_denode.c	1999/12/15 23:26:48
@@ -326,7 +326,7 @@
 	int waitfor;
 {
 
-	return (VOP_UPDATE(DETOV(dep), NULL, NULL, waitfor));
+	return (VOP_UPDATE(DETOV(dep), NULL, NULL, waitfor ? UPDATE_WAIT : 0));
 }
 
 /*
Index: msdosfs/msdosfs_vnops.c
===================================================================
RCS file: /cvsroot/syssrc/sys/msdosfs/msdosfs_vnops.c,v
retrieving revision 1.89
diff -u -r1.89 msdosfs_vnops.c
--- msdosfs_vnops.c	1999/11/15 18:49:11	1.89
+++ msdosfs_vnops.c	1999/12/15 23:26:49
@@ -756,7 +756,7 @@
 		struct vnode *a_vp;
 		struct timespec *a_access;
 		struct timespec *a_modify;
-		int a_waitfor;
+		int a_flags;
 	} */ *ap = v;
 	struct buf *bp;
 	struct direntry *dirp;
@@ -782,7 +782,7 @@
 	if (error)
 		return (error);
 	DE_EXTERNALIZE(dirp, dep);
-	if (ap->a_waitfor == MNT_WAIT)
+	if (ap->a_flags & (UPDATE_WAIT|UPDATE_DIROP))
 		return (bwrite(bp));
 	else {
 		bdwrite(bp);
Index: sys/vnode.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/vnode.h,v
retrieving revision 1.67
diff -u -r1.67 vnode.h
--- vnode.h	1999/11/17 22:53:47	1.67
+++ vnode.h	1999/12/15 23:26:53
@@ -239,6 +239,9 @@
 #define FSYNC_RECLAIM	0x0004		/* fsync: hint: vnode is being reclaimed */
 #define FSYNC_LAZY	0x0008		/* fsync: lazy sync (trickle) */
 
+#define UPDATE_WAIT	0x0001		/* update: wait for completion */
+#define UPDATE_DIROP	0x0002		/* update: fs' choice to wait or not */
+
 #define	HOLDRELE(vp)	holdrele(vp)
 #define	VHOLD(vp)	vhold(vp)
 #define	VREF(vp)	vref(vp)
Index: sys/vnode_if.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/vnode_if.h,v
retrieving revision 1.21
diff -u -r1.21 vnode_if.h
--- vnode_if.h	1999/12/07 23:58:27	1.21
+++ vnode_if.h	1999/12/15 23:26:54
@@ -1038,23 +1038,23 @@
 	struct vnode *a_vp;
 	struct timespec *a_access;
 	struct timespec *a_modify;
-	int a_waitfor;
+	int a_flags;
 };
 extern struct vnodeop_desc vop_update_desc;
 static __inline int VOP_UPDATE __P((struct vnode *, struct timespec *, 
     struct timespec *, int)) __attribute__((__unused__));
-static __inline int VOP_UPDATE(vp, access, modify, waitfor)
+static __inline int VOP_UPDATE(vp, access, modify, flags)
 	struct vnode *vp;
 	struct timespec *access;
 	struct timespec *modify;
-	int waitfor;
+	int flags;
 {
 	struct vop_update_args a;
 	a.a_desc = VDESC(vop_update);
 	a.a_vp = vp;
 	a.a_access = access;
 	a.a_modify = modify;
-	a.a_waitfor = waitfor;
+	a.a_flags = flags;
 	return (VCALL(vp, VOFFSET(vop_update), &a));
 }
 
Index: ufs/ext2fs/ext2fs_inode.c
===================================================================
RCS file: /cvsroot/syssrc/sys/ufs/ext2fs/ext2fs_inode.c,v
retrieving revision 1.13
diff -u -r1.13 ext2fs_inode.c
--- ext2fs_inode.c	1999/03/24 05:51:30	1.13
+++ ext2fs_inode.c	1999/12/15 23:26:56
@@ -116,8 +116,8 @@
  * used to specify that the inode needs to be updated but that the times have
  * already been set. The access and modified times are taken from the second
  * and third parameters; the inode change time is always taken from the current
- * time. If waitfor is set, then wait for the disk write of the inode to
- * complete.
+ * time. If UPDATE_WAIT or UPDATE_DIROP is set, then wait for the disk
+ * write of the inode to complete.
  */
 int
 ext2fs_update(v)
@@ -127,7 +127,7 @@
 		struct vnode *a_vp;
 		struct timespec *a_access;
 		struct timespec *a_modify;
-		int a_waitfor;
+		int a_flags;
 	} */ *ap = v;
 	register struct m_ext2fs *fs;
 	struct buf *bp;
@@ -157,7 +157,8 @@
 	cp = (caddr_t)bp->b_data +
 	    (ino_to_fsbo(fs, ip->i_number) * EXT2_DINODE_SIZE);
 	e2fs_isave(&ip->i_din.e2fs_din, (struct ext2fs_dinode *)cp);
-	if (ap->a_waitfor && (ap->a_vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
+	if ((ap->a_flags & (UPDATE_WAIT|UPDATE_DIROP))
+	    && (ap->a_vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
 		return (bwrite(bp));
 	else {
 		bdwrite(bp);
Index: ufs/ffs/ffs_inode.c
===================================================================
RCS file: /cvsroot/syssrc/sys/ufs/ffs/ffs_inode.c,v
retrieving revision 1.29
diff -u -r1.29 ffs_inode.c
--- ffs_inode.c	1999/11/15 18:49:13	1.29
+++ ffs_inode.c	1999/12/15 23:26:57
@@ -75,8 +75,8 @@
  * updated but that the times have already been set. The access
  * and modified times are taken from the second and third parameters;
  * the inode change time is always taken from the current time. If
- * waitfor is set, then wait for the disk write of the inode to
- * complete.
+ * UPDATE_WAIT flag is set, or UPDATE_DIROP is set and we are not doing
+ * softupdates, then wait for the disk write of the inode to complete.
  */
 
 int
@@ -87,7 +87,7 @@
 		struct vnode *a_vp;
 		struct timespec *a_access;
 		struct timespec *a_modify;
-		int a_waitfor;
+		int a_flags;
 	} */ *ap = v;
 	register struct fs *fs;
 	struct buf *bp;
@@ -95,6 +95,7 @@
 	int error;
 	struct timespec ts;
 	caddr_t cp;
+	int waitfor;
 
 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
 		return (0);
@@ -103,10 +104,16 @@
 	FFS_ITIMES(ip,
 	    ap->a_access ? ap->a_access : &ts,
 	    ap->a_modify ? ap->a_modify : &ts, &ts);
-	if ((ip->i_flag & IN_MODIFIED) == 0 && ap->a_waitfor != MNT_WAIT)
+	if ((ip->i_flag & IN_MODIFIED) == 0 && ap->a_flags != UPDATE_WAIT)
 		return (0);
 	ip->i_flag &= ~IN_MODIFIED;
 	fs = ip->i_fs;
+
+	waitfor=0;
+	if ((ap->a_flags & UPDATE_DIROP) && !DOINGSOFTDEP(ap->a_vp))
+		waitfor=1;
+	waitfor |= ap->a_flags & UPDATE_WAIT;
+
 	/*
 	 * Ensure that uid and gid are correct. This is a temporary
 	 * fix until fsck has been changed to do the update.
@@ -123,7 +130,7 @@
 		return (error);
 	}
 	if (DOINGSOFTDEP(ap->a_vp))
-		softdep_update_inodeblock(ip, bp, ap->a_waitfor);
+		softdep_update_inodeblock(ip, bp, waitfor);
 	else if (ip->i_ffs_effnlink != ip->i_ffs_nlink)
 		panic("ffs_update: bad link cnt");
 	cp = (caddr_t)bp->b_data +
@@ -134,7 +141,7 @@
 	else
 #endif
 		memcpy(cp, &ip->i_din.ffs_din, DINODE_SIZE);
-	if (ap->a_waitfor && (ap->a_vp->v_mount->mnt_flag & MNT_ASYNC) == 0) {
+	if (waitfor && (ap->a_vp->v_mount->mnt_flag & MNT_ASYNC) == 0) {
 		return (bwrite(bp));
 	} else {
 		bdwrite(bp);
Index: ufs/lfs/lfs.h
===================================================================
RCS file: /cvsroot/syssrc/sys/ufs/lfs/lfs.h,v
retrieving revision 1.19
diff -u -r1.19 lfs.h
--- lfs.h	1999/12/15 07:10:34	1.19
+++ lfs.h	1999/12/15 23:26:57
@@ -125,22 +125,6 @@
 #define	LFS_LABELPAD	8192		/* LFS label size */
 #define	LFS_SBPAD	8192		/* LFS superblock size */
 
-/*
- * XXX
- * This is a kluge and NEEDS to go away.
- *
- * Right now, ufs code handles most of the calls for directory operations
- * such as create, mkdir, link, etc.  As a result VOP_UPDATE is being
- * called with waitfor set (since ffs does these things synchronously).
- * Since LFS does not want to do these synchronously, we treat the last
- * argument to lfs_update as a set of flags.  If LFS_SYNC is set, then
- * the update should be synchronous, if not, do it asynchronously.
- * Unfortunately, this means that LFS won't work with NFS yet because
- * NFS goes through paths that will make normal calls to ufs which will
- * call lfs with a last argument of 1.
- */
-#define	LFS_SYNC	0x02
-
 /* On-disk and in-memory checkpoint segment usage structure. */
 typedef struct segusage SEGUSE;
 struct segusage {
Index: ufs/lfs/lfs_inode.c
===================================================================
RCS file: /cvsroot/syssrc/sys/ufs/lfs/lfs_inode.c,v
retrieving revision 1.28
diff -u -r1.28 lfs_inode.c
--- lfs_inode.c	1999/11/23 23:52:42	1.28
+++ lfs_inode.c	1999/12/15 23:26:57
@@ -122,7 +122,7 @@
 				  struct vnode *a_vp;
 				  struct timespec *a_access;
 				  struct timespec *a_modify;
-				  int a_waitfor;
+				  int a_flags;
 				  } */ *ap = v;
 	struct inode *ip;
 	struct vnode *vp = ap->a_vp;
@@ -139,9 +139,9 @@
 	 * already been scheduled for writing, but the writes have not
 	 * yet completed, lfs_vflush will not be called, and vinvalbuf
 	 * will cause a panic.  So, we must wait until any pending write
-	 * for our inode completes, if we are called with LFS_SYNC set.
+	 * for our inode completes, if we are called with UPDATE_WAIT set.
 	 */
-	while((ap->a_waitfor & LFS_SYNC) && WRITEINPROG(vp)) {
+	while((ap->a_flags & UPDATE_WAIT) && WRITEINPROG(vp)) {
 #ifdef DEBUG_LFS
 		printf("lfs_update: sleeping on inode %d (in-progress)\n",ip->i_number);
 #endif
@@ -160,7 +160,7 @@
 	}
 	
 	/* If sync, push back the vnode and any dirty blocks it may have. */
-	if(ap->a_waitfor & LFS_SYNC) {
+	if(ap->a_flags & UPDATE_WAIT) {
 		/* Avoid flushing VDIROP. */
 		++fs->lfs_diropwait;
 		while(vp->v_flag & VDIROP) {
Index: ufs/lfs/lfs_vnops.c
===================================================================
RCS file: /cvsroot/syssrc/sys/ufs/lfs/lfs_vnops.c,v
retrieving revision 1.34
diff -u -r1.34 lfs_vnops.c
--- lfs_vnops.c	1999/12/15 07:19:07	1.34
+++ lfs_vnops.c	1999/12/15 23:26:57
@@ -277,7 +277,7 @@
 	} */ *ap = v;
 	
 	return (VOP_UPDATE(ap->a_vp, NULL, NULL,
-			   (ap->a_flags & FSYNC_WAIT) != 0 ? LFS_SYNC : 0)); /* XXX */
+			   (ap->a_flags & FSYNC_WAIT) != 0 ? UPDATE_WAIT : 0)); /* XXX */
 }
 
 /*
Index: ufs/ufs/ufs_lookup.c
===================================================================
RCS file: /cvsroot/syssrc/sys/ufs/ufs/ufs_lookup.c,v
retrieving revision 1.27
diff -u -r1.27 ufs_lookup.c
--- ufs_lookup.c	1999/11/15 18:49:15	1.27
+++ ufs_lookup.c	1999/12/15 23:26:58
@@ -794,7 +794,7 @@
 			error = VOP_BWRITE(bp);
 		}
 		TIMEVAL_TO_TIMESPEC(&time, &ts);
-		ret = VOP_UPDATE(dvp, &ts, &ts, !DOINGSOFTDEP(dvp));
+		ret = VOP_UPDATE(dvp, &ts, &ts, UPDATE_DIROP);
 		if (error == 0)
 			return (ret);
 		return (error);
Index: ufs/ufs/ufs_readwrite.c
===================================================================
RCS file: /cvsroot/syssrc/sys/ufs/ufs/ufs_readwrite.c,v
retrieving revision 1.23
diff -u -r1.23 ufs_readwrite.c
--- ufs_readwrite.c	1999/11/15 18:49:15	1.23
+++ ufs_readwrite.c	1999/12/15 23:26:58
@@ -160,7 +160,7 @@
 	if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
 		ip->i_flag |= IN_ACCESS;
 		if ((ap->a_ioflag & IO_SYNC) == IO_SYNC)
-			error = VOP_UPDATE(vp, NULL, NULL, 1);
+			error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
 	}
 	return (error);
 }
@@ -295,6 +295,6 @@
 			uio->uio_resid = resid;
 		}
 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC)
-		error = VOP_UPDATE(vp, NULL, NULL, 1);
+		error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
 	return (error);
 }
Index: ufs/ufs/ufs_vnops.c
===================================================================
RCS file: /cvsroot/syssrc/sys/ufs/ufs/ufs_vnops.c,v
retrieving revision 1.61
diff -u -r1.61 ufs_vnops.c
--- ufs_vnops.c	1999/12/13 19:07:21	1.61
+++ ufs_vnops.c	1999/12/15 23:26:59
@@ -676,7 +676,7 @@
 	ip->i_flag |= IN_CHANGE;
 	if (DOINGSOFTDEP(vp))
 		softdep_increase_linkcnt(ip);
-	error = VOP_UPDATE(vp, NULL, NULL, !DOINGSOFTDEP(vp));
+	error = VOP_UPDATE(vp, NULL, NULL, UPDATE_DIROP);
 	if (!error) {
 		ufs_makedirentry(ip, cnp, &newdir);
 		error = ufs_direnter(dvp, vp, &newdir, cnp, NULL);
@@ -914,7 +914,7 @@
 	if (DOINGSOFTDEP(fvp))
 		softdep_increase_linkcnt(ip);
 	ip->i_flag |= IN_CHANGE;
-	if ((error = VOP_UPDATE(fvp, NULL, NULL, !DOINGSOFTDEP(fvp))) != 0) {
+	if ((error = VOP_UPDATE(fvp, NULL, NULL, UPDATE_DIROP)) != 0) {
 		VOP_UNLOCK(fvp, 0);
 		goto bad;
 	}
@@ -977,7 +977,7 @@
 			if (DOINGSOFTDEP(tdvp))
 				softdep_increase_linkcnt(dp);
 			if ((error = VOP_UPDATE(tdvp, NULL, NULL, 
-			    !DOINGSOFTDEP(tdvp))) != 0) {
+			    UPDATE_DIROP)) != 0) {
 				dp->i_ffs_effnlink--;
 				dp->i_ffs_nlink--;
 				dp->i_flag |= IN_CHANGE;
@@ -991,7 +991,7 @@
 				dp->i_ffs_effnlink--;
 				dp->i_ffs_nlink--;
 				dp->i_flag |= IN_CHANGE;
-				(void)VOP_UPDATE(tdvp, NULL, NULL, 1);
+				(void)VOP_UPDATE(tdvp, NULL, NULL, UPDATE_WAIT);
 			}
 			goto bad;
 		}
@@ -1209,7 +1209,7 @@
 		softdep_increase_linkcnt(ip);
 	if (cnp->cn_flags & ISWHITEOUT)
 		ip->i_ffs_flags |= UF_OPAQUE;
-	error = VOP_UPDATE(tvp, NULL, NULL, !DOINGSOFTDEP(tvp));
+	error = VOP_UPDATE(tvp, NULL, NULL, UPDATE_DIROP);
 
 	/*
 	 * Bump link count in parent directory to reflect work done below.
@@ -1221,7 +1221,7 @@
 	dp->i_flag |= IN_CHANGE;
 	if (DOINGSOFTDEP(dvp))
 		softdep_increase_linkcnt(dp);
-	if ((error = VOP_UPDATE(dvp, NULL, NULL, !DOINGSOFTDEP(dvp))) != 0)
+	if ((error = VOP_UPDATE(dvp, NULL, NULL, UPDATE_DIROP)) != 0)
 		goto bad;
 
 	/*
@@ -1269,7 +1269,7 @@
 			blkoff += DIRBLKSIZ;
 		}
 	}
-	if ((error = VOP_UPDATE(tvp, NULL, NULL, !DOINGSOFTDEP(tvp))) != 0) {
+	if ((error = VOP_UPDATE(tvp, NULL, NULL, UPDATE_DIROP)) != 0) {
 		(void)VOP_BWRITE(bp);
 		goto bad;
 	}
@@ -1984,7 +1984,7 @@
 	/*
 	 * Make sure inode goes to disk before directory entry.
 	 */
-	if ((error = VOP_UPDATE(tvp, NULL, NULL, !DOINGSOFTDEP(tvp))) != 0)
+	if ((error = VOP_UPDATE(tvp, NULL, NULL, UPDATE_DIROP)) != 0)
 		goto bad;
 	ufs_makedirentry(ip, cnp, &newdir);
 	if ((error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL)) != 0)
>Audit-Trail:
>Unformatted: