Subject: Re: ufs-ism in lookup(9)
To: None <wrstuden@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 04/03/2004 23:34:38
--NextPart-20040403232804-0245300
Content-Type: Text/Plain; charset=us-ascii

hi,

> > i'd say the comments are wrong, however, ok, i have no particular problem
> > with your suggested way.  (ie. change the MAKEENTRY's behaviour)
> 
> Thank you. Also put something in there somewhere about what we should do 
> about negaitve caching & DELETE or RENAME for the remote fs case. I'd 
> suggest turning the cnp->cn_nameiop != CREATE into cnp->cn_nameiop == 
> LOOKUP, but the point's still up for discussion if you want something 
> else.

after some more thoughts, i think it's better to leave the decision to
each filesystems as they should know their own characteristics better.
how about the attached diffs?  (ufs and nfs only.)

YAMAMOTO Takashi

--NextPart-20040403232804-0245300
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="lookup4.diff"

Index: kern/vfs_lookup.c
===================================================================
--- kern/vfs_lookup.c	(revision 462)
+++ kern/vfs_lookup.c	(working copy)
@@ -323,7 +323,6 @@ lookup(ndp)
 	struct vnode *dp = 0;		/* the directory we are searching */
 	struct vnode *tdp;		/* saved dp */
 	struct mount *mp;		/* mount table entry */
-	int docache;			/* == 0 do not cache last component */
 	int wantparent;			/* 1 => wantparent or lockparent flag */
 	int rdonly;			/* lookup read-only flag bit */
 	int error = 0;
@@ -335,10 +334,6 @@ lookup(ndp)
 	 * Setup: break out flag bits into variables.
 	 */
 	wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
-	docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
-	if (cnp->cn_nameiop == DELETE ||
-	    (wantparent && cnp->cn_nameiop != CREATE))
-		docache = 0;
 	rdonly = cnp->cn_flags & RDONLY;
 	ndp->ni_dvp = NULL;
 	cnp->cn_flags &= ~ISSYMLINK;
@@ -428,13 +423,8 @@ dirloop:
 	 * a directory.  Cache all intervening lookups, but not the final one.
 	 */
 	if (*cp == '\0') {
-		if (docache)
-			cnp->cn_flags |= MAKEENTRY;
-		else
-			cnp->cn_flags &= ~MAKEENTRY;
 		cnp->cn_flags |= ISLASTCN;
 	} else {
-		cnp->cn_flags |= MAKEENTRY;
 		cnp->cn_flags &= ~ISLASTCN;
 	}
 	if (cnp->cn_namelen == 2 &&
Index: kern/vfs_cache.c
===================================================================
--- kern/vfs_cache.c	(revision 637)
+++ kern/vfs_cache.c	(working copy)
@@ -179,7 +179,6 @@ cache_lookup(struct vnode *dvp, struct v
 	int error;
 
 	if (!doingcache) {
-		cnp->cn_flags &= ~MAKEENTRY;
 		*vpp = NULL;
 		return (-1);
 	}
@@ -187,7 +186,6 @@ cache_lookup(struct vnode *dvp, struct v
 	if (cnp->cn_namelen > NCHNAMLEN) {
 		/* XXXSMP - updating stats without lock; do we care? */
 		nchstats.ncs_long++;
-		cnp->cn_flags &= ~MAKEENTRY;
 		goto fail;
 	}
 	simple_lock(&namecache_slock);
@@ -196,31 +194,22 @@ cache_lookup(struct vnode *dvp, struct v
 		nchstats.ncs_miss++;
 		goto fail_wlock;
 	}
-	if ((cnp->cn_flags & MAKEENTRY) == 0) {
-		nchstats.ncs_badhits++;
-		goto remove;
-	} else if (ncp->nc_vp == NULL) {
+	if (ncp->nc_vp == NULL) {
 		/*
 		 * Restore the ISWHITEOUT flag saved earlier.
 		 */
 		cnp->cn_flags |= ncp->nc_flags;
-		if (cnp->cn_nameiop != CREATE ||
-		    (cnp->cn_flags & ISLASTCN) == 0) {
-			nchstats.ncs_neghits++;
-			/*
-			 * Move this slot to end of LRU chain,
-			 * if not already there.
-			 */
-			if (TAILQ_NEXT(ncp, nc_lru) != 0) {
-				TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
-				TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
-			}
-			simple_unlock(&namecache_slock);
-			return (ENOENT);
-		} else {
-			nchstats.ncs_badhits++;
-			goto remove;
+		nchstats.ncs_neghits++;
+		/*
+		 * Move this slot to end of LRU chain,
+		 * if not already there.
+		 */
+		if (TAILQ_NEXT(ncp, nc_lru) != 0) {
+			TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+			TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
 		}
+		simple_unlock(&namecache_slock);
+		return (ENOENT);
 	}
 
 	vp = ncp->nc_vp;
@@ -313,15 +302,6 @@ cache_lookup(struct vnode *dvp, struct v
 	*vpp = vp;
 	return (0);
 
-remove:
-	/*
-	 * Last component and we are renaming or deleting,
-	 * the cache entry is invalid, or otherwise don't
-	 * want cache entry to exist.
-	 */
-	cache_remove(ncp);
-	cache_free(ncp);
-
 fail_wlock:
 	simple_unlock(&namecache_slock);
 fail:
@@ -407,12 +387,10 @@ cache_enter(struct vnode *dvp, struct vn
 	struct nchashhead *ncpp;
 	struct ncvhashhead *nvcpp;
 
-#ifdef DIAGNOSTIC
-	if (cnp->cn_namelen > NCHNAMLEN)
-		panic("cache_enter: name too long");
-#endif
 	if (!doingcache)
 		return;
+	if (cnp->cn_namelen > NCHNAMLEN)
+		return;
 	/*
 	 * Free the cache slot at head of lru chain.
 	 */
Index: ufs/ufs/ufs_lookup.c
===================================================================
--- ufs/ufs/ufs_lookup.c	(revision 599)
+++ ufs/ufs/ufs_lookup.c	(working copy)
@@ -133,6 +133,7 @@ ufs_lookup(v)
 	const int needswap = UFS_MPNEEDSWAP(ap->a_dvp->v_mount);
 	int dirblksiz = DIRBLKSIZ;
 	ino_t foundino;
+	boolean_t toremove, tocreate;
 	if (UFS_MPISAPPLEUFS(ap->a_dvp->v_mount)) {
 		dirblksiz = APPLEUFS_DIRBLKSIZ;
 	}
@@ -155,8 +156,11 @@ ufs_lookup(v)
 	if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0)
 		return (error);
 
-	if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
-	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
+	toremove = (flags & ISLASTCN) &&
+	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME);
+	tocreate = (flags & ISLASTCN) &&
+	    (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME);
+	if (toremove && (vdp->v_mount->mnt_flag & MNT_RDONLY))
 		return (EROFS);
 
 	/*
@@ -166,8 +170,45 @@ ufs_lookup(v)
 	 * check the name cache to see if the directory/name pair
 	 * we are looking for is known already.
 	 */
-	if ((error = cache_lookup(vdp, vpp, cnp)) >= 0)
-		return (error);
+	/*
+	 * XXX
+	 * this chunk of the code is common between some filesystems
+	 * which rely on side effects of lookup.
+	 * probably it's better to make this into a subroutine.
+	 */
+	error = cache_lookup(vdp, vpp, cnp);
+	if (error >= 0) {
+
+		/*
+		 * we can't always use cached results because
+		 * the subsequent dirop VOP relies on the side effects of
+		 * non-cached lookup.
+		 */
+
+		if (!(error == ENOENT && tocreate) && !(error == 0 && toremove))
+			return (error);
+
+		/*
+		 * we need side effects of lookup.  undo locking.
+		 */
+
+		if (vdp == *vpp)
+			vrele(*vpp);
+		else if (*vpp != NULL)
+			vput(*vpp);
+		if (cnp->cn_flags & PDIRUNLOCK) {
+			vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY);
+			cnp->cn_flags &= ~PDIRUNLOCK;
+		}
+		*vpp = NULL;
+
+		/*
+		 * purge cache.
+		 * XXX it should belong to each dirops?
+		 */
+
+		cache_purge1(vdp, cnp, 0);
+	}
 
 	/*
 	 * Suppress search for slots unless creating
@@ -177,8 +218,7 @@ ufs_lookup(v)
 	 */
 	slotstatus = FOUND;
 	slotfreespace = slotsize = slotneeded = 0;
-	if ((nameiop == CREATE || nameiop == RENAME) &&
-	    (flags & ISLASTCN)) {
+	if (tocreate) {
 		slotstatus = NONE;
 		slotneeded = (sizeof(struct direct) - MAXNAMLEN +
 			cnp->cn_namelen + 3) &~ 3;
@@ -371,11 +411,10 @@ notfound:
 	 * directory has not been removed, then can consider
 	 * allowing file to be created.
 	 */
-	if ((nameiop == CREATE || nameiop == RENAME ||
-	     (nameiop == DELETE &&
+	if ((tocreate || (nameiop == DELETE &&
 	      (ap->a_cnp->cn_flags & DOWHITEOUT) &&
-	      (ap->a_cnp->cn_flags & ISWHITEOUT))) &&
-	    (flags & ISLASTCN) && dp->i_ffs_effnlink != 0) {
+	      (ap->a_cnp->cn_flags & ISWHITEOUT) &&
+	    (flags & ISLASTCN))) && dp->i_ffs_effnlink != 0) {
 		/*
 		 * Access for write is interpreted as allowing
 		 * creation of files in the directory.
@@ -435,8 +474,9 @@ notfound:
 	/*
 	 * Insert name into cache (as non-existent) if appropriate.
 	 */
-	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
-		cache_enter(vdp, *vpp, cnp);
+	if (!tocreate)
+		cache_enter(vdp, NULL, cnp);
+	KASSERT(*vpp == NULL);
 	return (ENOENT);
 
 found:
@@ -605,8 +645,9 @@ found:
 	/*
 	 * Insert name into cache if appropriate.
 	 */
-	if (cnp->cn_flags & MAKEENTRY)
+	if (!toremove)
 		cache_enter(vdp, *vpp, cnp);
+	KASSERT(*vpp != NULL);
 	return (0);
 }
 
Index: nfs/nfs_vnops.c
===================================================================
--- nfs/nfs_vnops.c	(revision 645)
+++ nfs/nfs_vnops.c	(working copy)
@@ -830,6 +830,7 @@ nfs_lookup(v)
 	nfsfh_t *fhp;
 	struct nfsnode *np;
 	int lockparent, wantparent, error = 0, attrflag, fhsize;
+	boolean_t toremove, tocreate;
 	const int v3 = NFS_ISV3(dvp);
 
 	cnp->cn_flags &= ~PDIRUNLOCK;
@@ -837,8 +838,11 @@ nfs_lookup(v)
 
 	*vpp = NULLVP;
 	newvp = NULLVP;
-	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
-	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
+	toremove = (flags & ISLASTCN) &&
+	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME);
+	tocreate = (flags & ISLASTCN) &&
+	    (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME);
+	if (toremove && (dvp->v_mount->mnt_flag & MNT_RDONLY))
 		return (EROFS);
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
@@ -866,6 +870,7 @@ nfs_lookup(v)
 		}
 
 		if (cnp->cn_flags & PDIRUNLOCK) {
+			/* XXX locking order */
 			err2 = vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 			if (err2 != 0) {
 				*vpp = NULLVP;
@@ -887,10 +892,23 @@ nfs_lookup(v)
 		}
 
 		if (error == ENOENT) {
+			/*
+			 * if the directory is changed by others,
+			 * we can't trust negative caches.
+			 */
 			if (!VOP_GETATTR(dvp, &vattr, cnp->cn_cred,
 			    cnp->cn_proc) && timespeccmp(&vattr.va_mtime,
-			    &VTONFS(dvp)->n_nctime, ==))
-				return ENOENT;
+			    &VTONFS(dvp)->n_nctime, ==)) {
+				if (tocreate) {
+					if (dvp->v_mount->mnt_flag & MNT_RDONLY)
+						error = EROFS;
+					else
+						error = EJUSTRETURN;
+					cnp->cn_flags |= SAVENAME;
+					cache_purge1(dvp, cnp, 0);
+				}
+				return error;
+			}
 			cache_purge1(dvp, NULL, PURGE_CHILDREN);
 			timespecclear(&np->n_nctime);
 			goto dorpc;
@@ -901,7 +919,7 @@ nfs_lookup(v)
 			&& vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime)
 		{
 			nfsstats.lookupcache_hits++;
-			if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
+			if (toremove || tocreate)
 				cnp->cn_flags |= SAVENAME;
 			if ((!lockparent || !(flags & ISLASTCN)) &&	
 			     newvp != dvp)
@@ -1030,12 +1048,12 @@ dorpc:
 			cnp->cn_flags |= PDIRUNLOCK;
 		}
 	}
+	KASSERT(error == 0);
+	KASSERT(newvp != NULL);
 	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 		cnp->cn_flags |= SAVENAME;
-	if ((cnp->cn_flags & MAKEENTRY) &&
-	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
+	if (!toremove)
 		nfs_cache_enter(dvp, newvp, cnp);
-	}
 	*vpp = newvp;
 	nfsm_reqdone;
 	if (error) {
@@ -1045,8 +1063,7 @@ dorpc:
 		 * (the nfsm_* macros will jump to nfsm_reqdone
 		 * on error).
 		 */
-		if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) &&
-		    cnp->cn_nameiop != CREATE) {
+		if (error == ENOENT && !tocreate) {
 			nfs_cache_enter(dvp, NULL, cnp);
 		}
 		if (newvp != NULLVP) {
@@ -1054,8 +1071,7 @@ dorpc:
 			if (newvp != dvp)
 				VOP_UNLOCK(newvp, 0);
 		}
-		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
-		    (flags & ISLASTCN) && error == ENOENT) {
+		if (tocreate && error == ENOENT) {
 			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
 				error = EROFS;
 			else
@@ -1072,6 +1088,7 @@ validate:
 	 * make sure we have valid type and size.
 	 */
 
+	KASSERT(error == 0);
 	newvp = *vpp;
 	if (newvp->v_type == VNON) {
 		struct vattr vattr; /* dummy */
@@ -1514,6 +1531,8 @@ nfs_mknodrpc(dvp, vpp, cnp, vap)
 	u_int32_t rdev;
 	const int v3 = NFS_ISV3(dvp);
 
+	KASSERT(cnp->cn_nameiop == CREATE);
+
 	if (vap->va_type == VCHR || vap->va_type == VBLK)
 		rdev = txdr_unsigned(vap->va_rdev);
 	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
@@ -1564,8 +1583,7 @@ nfs_mknodrpc(dvp, vpp, cnp, vap)
 		if (newvp)
 			vput(newvp);
 	} else {
-		if (cnp->cn_flags & MAKEENTRY)
-			nfs_cache_enter(dvp, newvp, cnp);
+		nfs_cache_enter(dvp, newvp, cnp);
 		*vpp = newvp;
 	}
 	PNBUF_PUT(cnp->cn_pnbuf);
@@ -1626,6 +1644,9 @@ nfs_create(v)
 	struct mbuf *mreq, *mrep, *md, *mb;
 	const int v3 = NFS_ISV3(dvp);
 
+	KASSERT(cnp->cn_nameiop == CREATE);
+	KASSERT(cnp->cn_flags & HASBUF);
+
 	/*
 	 * Oops, not for me..
 	 */
@@ -1694,8 +1715,7 @@ again:
 	} else if (v3 && (fmode & O_EXCL))
 		error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
 	if (!error) {
-		if (cnp->cn_flags & MAKEENTRY)
-			nfs_cache_enter(dvp, newvp, cnp);
+		nfs_cache_enter(dvp, newvp, cnp);
 		*ap->a_vpp = newvp;
 	}
 	PNBUF_PUT(cnp->cn_pnbuf);
@@ -1777,6 +1797,8 @@ nfs_remove(v)
 	} else if (!np->n_sillyrename)
 		error = nfs_sillyrename(dvp, vp, cnp);
 	PNBUF_PUT(cnp->cn_pnbuf);
+	if (!error)
+		cache_purge1(dvp, cnp, 0);
 	if (!error && nfs_getattrcache(vp, &vattr) == 0 &&
 	    vattr.va_nlink == 1) {
 		np->n_flag |= NREMOVED;
@@ -1891,11 +1913,11 @@ nfs_rename(v)
 
 	VN_KNOTE(fdvp, NOTE_WRITE);
 	VN_KNOTE(tdvp, NOTE_WRITE);
-	if (fvp->v_type == VDIR) {
-		if (tvp != NULL && tvp->v_type == VDIR)
-			cache_purge(tdvp);
-		cache_purge(fdvp);
-	}
+	if (fvp->v_type == VDIR)
+		cache_purge(fvp); /* we need to flush DOTDOT entry */
+	else
+		cache_purge1(fdvp, fcnp, 0);
+	cache_purge1(tdvp, tcnp, 0);
 out:
 	if (tdvp == tvp)
 		vrele(tdvp);
@@ -2162,6 +2184,8 @@ nfs_mkdir(v)
 	struct mbuf *mreq, *mrep, *md, *mb;
 	const int v3 = NFS_ISV3(dvp);
 
+	KASSERT(cnp->cn_nameiop == CREATE);
+
 	len = cnp->cn_namelen;
 	nfsstats.rpccnt[NFSPROC_MKDIR]++;
 	nfsm_reqhead(dnp, NFSPROC_MKDIR,
@@ -2210,8 +2234,7 @@ nfs_mkdir(v)
 			vput(newvp);
 	} else {
 		VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
-		if (cnp->cn_flags & MAKEENTRY)
-			nfs_cache_enter(dvp, newvp, cnp);
+		nfs_cache_enter(dvp, newvp, cnp);
 		*ap->a_vpp = newvp;
 	}
 	PNBUF_PUT(cnp->cn_pnbuf);

--NextPart-20040403232804-0245300--