Subject: NFS server cache bug
To: None <bugs@openbsd.org>
From: None <rick@snowhite.cis.uoguelph.ca>
List: tech-kern
Date: 08/10/2002 11:13:01
I have no idea if this race condition problem still lives in any of the
current source code trees, but I bumped into it when testing V4 and
thought I'd let you know.

The basic problem is that nfsrv_getcache() does either a malloc or
gets the lru entry (with a potential to sleep in both cases). While
sleeping, it is possible for a retried copy of the same request to
get into the cache, resulting in duplicates and potential problems
later. My code base is now quite different, but I think the following
fixes the problem.

Basically, move the malloc to the top, before the search loop and change
the "else" case to look for an unlocked entry without sleeping and fall
back on using the newly malloc'd one, avoiding any sleeps after the
search loop.

Here is a "diff -c" for the nfs_srvcache.c that was in OpenBSD 2.6.
(If the entire file is more useful, you can get it via anonymous
ftp to snowhite.cis.uoguelph.ca in pub/nfs/nfs_srvcache.c.fixed.)

In case you are wondering, the V4 server did ok at the recent bakeoff
(although it still is missing some capabilities and needs more work) and
the CITI folks at the Univ. of Michigan are working on the client.
(I'll be putting an updated tarball on the ftp site soon, but unless you
 are really keen, it might be better to wait until the code is merged
 with the Univ. of Michigan work.)

Good luck with it, rick
--- diff -c nfs_srvcache.c ---
*** nfs_srvcache.c.sav	Fri Aug  9 22:20:43 2002
--- nfs_srvcache.c	Fri Aug  9 22:55:40 2002
***************
*** 168,174 ****
  	struct nfssvc_sock *slp;
  	struct mbuf **repp;
  {
! 	register struct nfsrvcache *rp;
  	struct mbuf *mb;
  	struct sockaddr_in *saddr;
  	caddr_t bpos;
--- 168,174 ----
  	struct nfssvc_sock *slp;
  	struct mbuf **repp;
  {
! 	register struct nfsrvcache *rp, *newrp;
  	struct mbuf *mb;
  	struct sockaddr_in *saddr;
  	caddr_t bpos;
***************
*** 180,185 ****
--- 180,188 ----
  	 */
  	if (!nd->nd_nam2)
  		return (RC_DOIT);
+ 	newrp = (struct nfsrvcache *)malloc((u_long)sizeof *newrp,
+ 	    M_NFSD, M_WAITOK);
+ 	bzero((char *)newrp, sizeof *newrp);
  loop:
  	for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0;
  	    rp = rp->rc_hash.le_next) {
***************
*** 221,252 ****
  				rp->rc_flag &= ~RC_WANTED;
  				wakeup((caddr_t)rp);
  			}
  			return (ret);
  		}
  	}
  	nfsstats.srvcache_misses++;
  	if (numnfsrvcache < desirednfsrvcache) {
- 		rp = (struct nfsrvcache *)malloc((u_long)sizeof *rp,
- 		    M_NFSD, M_WAITOK);
- 		bzero((char *)rp, sizeof *rp);
  		numnfsrvcache++;
! 		rp->rc_flag = RC_LOCKED;
  	} else {
! 		rp = nfsrvlruhead.tqh_first;
! 		while ((rp->rc_flag & RC_LOCKED) != 0) {
! 			rp->rc_flag |= RC_WANTED;
! 			(void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
! 			rp = nfsrvlruhead.tqh_first;
  		}
- 		rp->rc_flag |= RC_LOCKED;
- 		LIST_REMOVE(rp, rc_hash);
- 		TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru);
- 		if (rp->rc_flag & RC_REPMBUF)
- 			m_freem(rp->rc_reply);
- 		if (rp->rc_flag & RC_NAM)
- 			MFREE(rp->rc_nam, mb);
- 		rp->rc_flag &= (RC_LOCKED | RC_WANTED);
  	}
  	TAILQ_INSERT_TAIL(&nfsrvlruhead, rp, rc_lru);
  	rp->rc_state = RC_INPROG;
  	rp->rc_xid = nd->nd_retxid;
--- 224,257 ----
  				rp->rc_flag &= ~RC_WANTED;
  				wakeup((caddr_t)rp);
  			}
+ 			free((caddr_t)newrp, M_NFSD);
  			return (ret);
  		}
  	}
  	nfsstats.srvcache_misses++;
  	if (numnfsrvcache < desirednfsrvcache) {
  		numnfsrvcache++;
! 		rp = newrp;
  	} else {
! 		rp = TAILQ_FIRST(&nfsrvlruhead);
! 		while (rp != TAILQ_END(&nfsrvlruhead) &&
! 			(rp->rc_flag & (RC_LOCKED | RC_WANTED)))
! 			rp = TAILQ_NEXT(rp, rc_lru);
! 		if (rp != TAILQ_END(&nfsrvlruhead)) {
! 			LIST_REMOVE(rp, rc_hash);
! 			TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru);
! 			if (rp->rc_flag & RC_REPMBUF)
! 				m_freem(rp->rc_reply);
! 			if (rp->rc_flag & RC_NAM)
! 				MFREE(rp->rc_nam, mb);
! 			rp->rc_flag = 0;
! 			free((caddr_t)newrp, M_NFSD);
! 		} else {
! 			numnfsrvcache++;
! 			rp = newrp;
  		}
  	}
+ 	rp->rc_flag = RC_LOCKED;
  	TAILQ_INSERT_TAIL(&nfsrvlruhead, rp, rc_lru);
  	rp->rc_state = RC_INPROG;
  	rp->rc_xid = nd->nd_retxid;
***************
*** 334,339 ****
--- 339,348 ----
  		nextrp = rp->rc_lru.tqe_next;
  		LIST_REMOVE(rp, rc_hash);
  		TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru);
+ 		if (rp->rc_flag & RC_REPMBUF)
+ 			m_freem(rp->rc_reply);
+ 		if (rp->rc_flag & RC_NAM)
+ 			MFREE(rp->rc_nam, mb);
  		free(rp, M_NFSD);
  	}
  	numnfsrvcache = 0;