Subject: kern/33280: Potential Multi-Processor locking bugs
To: None <kern-bug-people@netbsd.org, gnats-admin@netbsd.org,>
From: None <netbsd-bugs@c--e.de>
List: netbsd-bugs
Date: 04/18/2006 09:45:00
>Number: 33280
>Category: kern
>Synopsis: Potential Multi-Processor locking bugs
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: kern-bug-people
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Tue Apr 18 09:45:00 +0000 2006
>Originator: Christian Ehrhardt
>Release: Snapshot of CURRENT dated 2006.04.05.00.00.00
>Organization:
>Environment:
n/a
>Description:
There are several potential simple_lock locking bugs on Multi-processor
systems. All of these were found by code inspection, not by running the code.
I'm pretty sure that those are real bugs but it would be great if someone
with more knowledge about the code could check and potentially confirm this.
I sent this report to kern-tech and it was suggested that I also make a
bug report out of it to make sure it doesn't get lost. Each report is
accompanied by a proposed patch, mainly to illustrate the point.
* uvm_loanzero may call uvm_analloc which will return with anon->an_lock
locked. This lock is never dropped by uvm_loanzero and AFAICS the caller
doesn't drop it either. Proposed patch:
Index: uvm_loan.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_loan.c,v
retrieving revision 1.58
diff -u -r1.58 uvm_loan.c
--- uvm_loan.c 31 Jan 2006 14:11:25 -0000 1.58
+++ uvm_loan.c 13 Apr 2006 12:12:06 -0000
@@ -922,6 +922,7 @@
pg->loan_count++;
uvm_pageactivate(pg);
uvm_unlock_pageq();
+ simple_unlock(&anon->an_lock);
simple_unlock(&uvm_loanzero_object.vmobjlock);
**output = anon;
(*output)++;
* lfs_strategy: If fs->lfs_seglock is NULL which can apparently happen, the
following piece of code looks like it can deadlock on lfs_interlock:
for (i = 0; i < fs->lfs_cleanind; i++) {
if (sn == dtosn(fs, fs->lfs_cleanint[i]) &&
tbn >= fs->lfs_cleanint[i]) {
/* ... */
LOCK simple_lock(&fs->lfs_interlock);
if (fs->lfs_seglock)
UNLOCK IS CONDITIONAL ltsleep(&fs->lfs_seglock,
(PRIBIO + 1) | PNORELOCK,
"lfs_strategy", 0,
&fs->lfs_interlock);
/* Things may be different now; start over. */
slept = 1;
break;
}
}
DEADLOCK simple_lock(&fs->lfs_interlock);
Proposed patch:
Index: lfs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/lfs/lfs_vnops.c,v
retrieving revision 1.162
diff -u -r1.162 lfs_vnops.c
--- lfs_vnops.c 1 Apr 2006 00:13:01 -0000 1.162
+++ lfs_vnops.c 13 Apr 2006 12:20:43 -0000
@@ -1105,11 +1105,14 @@
"lfs_strategy: sleeping on ino %d lbn %"
PRId64 "\n", ip->i_number, bp->b_lblkno));
simple_lock(&fs->lfs_interlock);
- if (fs->lfs_seglock)
+ if (fs->lfs_seglock) {
ltsleep(&fs->lfs_seglock,
(PRIBIO + 1) | PNORELOCK,
"lfs_strategy", 0,
&fs->lfs_interlock);
+ } else {
+ simple_unlock(&fs->lfs_interlock);
+ }
/* Things may be different now; start over. */
slept = 1;
break;
* physio: The first call to ltsleep should apparently use o &obp->b_interlock
instead of bp->b_interlock (bp is probably NULL here). Proposed patch:
Index: kern_physio.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_physio.c,v
retrieving revision 1.72
diff -u -r1.72 kern_physio.c
--- kern_physio.c 16 Jan 2006 21:45:38 -0000 1.72
+++ kern_physio.c 13 Apr 2006 12:26:55 -0000
@@ -300,7 +300,7 @@
/* [mark the buffer wanted] */
obp->b_flags |= B_WANTED;
/* [wait until the buffer is available] */
- ltsleep(obp, PRIBIO+1, "physbuf", 0, &bp->b_interlock);
+ ltsleep(obp, PRIBIO+1, "physbuf", 0, &obp->b_interlock);
}
/* Mark it busy, so nobody else will use it. */
* lfs_flush_dirops: If (vp->v_flag & VXLOCK) the simple_lock should be
reacquired before continue. Proposed patch:
Index: lfs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/lfs/lfs_vnops.c,v
retrieving revision 1.162
diff -u -r1.162 lfs_vnops.c
--- lfs_vnops.c 1 Apr 2006 00:13:01 -0000 1.162
+++ lfs_vnops.c 13 Apr 2006 12:30:24 -0000
@@ -1186,8 +1189,10 @@
* make sure that we don't clear IN_MODIFIED
* unnecessarily.
*/
- if (vp->v_flag & VXLOCK)
+ if (vp->v_flag & VXLOCK) {
+ simple_lock(&fs->lfs_interlock);
continue;
+ }
if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
needunlock = 1;
} else {
* ptmioctl: This is probably harmless but still: fd_getfile returns a
struct file with fp->f_slock held. It should probably be unlocked
before ffree (fp).
Index: tty_ptm.c
===================================================================
RCS file: /cvsroot/src/sys/kern/tty_ptm.c,v
retrieving revision 1.7
diff -u -r1.7 tty_ptm.c
--- tty_ptm.c 11 Dec 2005 12:24:30 -0000 1.7
+++ tty_ptm.c 13 Apr 2006 12:39:56 -0000
@@ -379,6 +379,7 @@
}
bad:
fp = fd_getfile(p->p_fd, cfd);
+ simple_unlock (fp->f_slock);
fdremove(p->p_fd, cfd);
ffree(fp);
return error;
* lfs_flush: If the first call to vfs_busy returns no error (only_onefs
must be TRUE) mountlist_slock will be dropped although documentation
and the rest of the function seem to indicate that it isn't held.
The caller has apparently no way to know if the lock was dropped or not.
I could not find any call site that actually uses only_onefs != 0, still I
think something along the following proposed patch is useful:
Index: lfs_bio.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/lfs/lfs_bio.c,v
retrieving revision 1.90
diff -u -r1.90 lfs_bio.c
--- lfs_bio.c 5 Mar 2006 17:33:33 -0000 1.90
+++ lfs_bio.c 13 Apr 2006 12:53:25 -0000
@@ -568,8 +568,7 @@
if (only_onefs) {
KASSERT(fs != NULL);
- if (vfs_busy(fs->lfs_ivnode->v_mount, LK_NOWAIT,
- &mountlist_slock))
+ if (vfs_busy(fs->lfs_ivnode->v_mount, LK_NOWAIT, NULL))
goto errout;
simple_lock(&fs->lfs_interlock);
lfs_flush_fs(fs, flags);
* lfs_reserveavail: The error return in the while loop should probably
unlock fs->lfs_interlock. Proposed patch:
Index: lfs_bio.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/lfs/lfs_bio.c,v
retrieving revision 1.90
diff -u -r1.90 lfs_bio.c
--- lfs_bio.c 5 Mar 2006 17:33:33 -0000 1.90
+++ lfs_bio.c 13 Apr 2006 13:00:23 -0000
@@ -251,8 +251,10 @@
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */
vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */
#endif
- if (error)
+ if (error) {
+ simple_unlock(&fs->lfs_interlock);
return error;
+ }
}
#ifdef DEBUG
if (slept) {
* linux_ioctl_termios: The call to FILE_USE should be immediatly before
the FREAD | FWRITE test not after it or FILE_UNUSE will be called after
the jump to out without a corresponding FILE_USE. Proposed patch:
Index: linux_termios.c
===================================================================
RCS file: /cvsroot/src/sys/compat/linux/common/linux_termios.c,v
retrieving revision 1.25
diff -u -r1.25 linux_termios.c
--- linux_termios.c 15 Feb 2006 09:31:17 -0000 1.25
+++ linux_termios.c 13 Apr 2006 13:03:49 -0000
@@ -89,13 +89,13 @@
if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
return (EBADF);
+ FILE_USE(fp);
+
if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
error = EBADF;
goto out;
}
- FILE_USE(fp);
-
bsdioctl = fp->f_ops->fo_ioctl;
com = SCARG(uap, com);
retval[0] = 0;
* svr4_fil_ioctl: FILE_USE/FILE_UNUSE is completly missing in this function.
fp is locked after fd_getfile and never unlocked (would be done by FILE_USE).
No proposed patch.
* layer_node_alloc: In the error path (which probably can't happen)
lmp->layerm_hashlock is not unlocked. Proposed patch:
Index: layer_subr.c
===================================================================
RCS file: /cvsroot/src/sys/miscfs/genfs/layer_subr.c,v
retrieving revision 1.18
diff -u -r1.18 layer_subr.c
--- layer_subr.c 11 Dec 2005 12:24:50 -0000 1.18
+++ layer_subr.c 13 Apr 2006 13:15:28 -0000
@@ -264,6 +264,7 @@
vp->v_type = VBAD; /* node is discarded */
vp->v_op = dead_vnodeop_p; /* so ops will still work */
vrele(vp); /* get rid of it. */
+ simple_unlock(&lmp->layerm_hashlock);
return (error);
}
/*
* lfs_vflush: The jump to nextbp skips an unlock of vp->v_interlock.
Proposed patch:
Index: lfs_segment.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/lfs/lfs_segment.c,v
retrieving revision 1.171
diff -u -r1.171 lfs_segment.c
--- lfs_segment.c 24 Mar 2006 20:05:32 -0000 1.171
+++ lfs_segment.c 13 Apr 2006 13:18:47 -0000
@@ -250,6 +250,7 @@
wakeup(&fs->lfs_avail);
lfs_freebuf(fs, bp);
bp = NULL;
+ simple_unlock(&vp->v_interlock); goto nextbp;
}
}
* layer_unlock: If LK_INTERLOCK is set vp->v_interlock may be unlocked twice:
Once explicitly and a second time implicilty by lockmgr. LK_INTERLOCK
is cleared from the variable flags but not from ap->a_flags which is
used with lockmgr. This is not so much of a problem because there seems
to be no call site that actually uses LK_INTERLOCK with layer_unlock or
VOP_UNLOCK. Still here's a proposed patch:
Index: layer_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/miscfs/genfs/layer_vnops.c,v
retrieving revision 1.26
diff -u -r1.26 layer_vnops.c
--- layer_vnops.c 11 Dec 2005 12:24:50 -0000 1.26
+++ layer_vnops.c 13 Apr 2006 13:23:43 -0000
@@ -687,7 +687,7 @@
flags &= ~LK_INTERLOCK;
}
VOP_UNLOCK(LAYERVPTOLOWERVP(vp), flags);
- return (lockmgr(&vp->v_lock, ap->a_flags | LK_RELEASE,
+ return (lockmgr(&vp->v_lock, flags | LK_RELEASE,
&vp->v_interlock));
}
}
>How-To-Repeat:
>Fix: