Subject: Re: pagedeamon deadlock (3) with back trace
To: Greg Oster <oster@cs.usask.ca>
From: Gilbert Fernandes <gilbertf@netbsd-fr.org>
List: current-users
Date: 02/22/2004 12:20:54
--HcAYCG3uE/tztfnV
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Sat, Feb 21, 2004 at 11:30:07AM -0600, Greg Oster wrote:

> Thanks for the info... (This problem is *really* annoying for me, as 
> I'm sure it is for you, and it needs to be fixed soon...)

use the two following patches. apply them
using patch -p0 < patchname from /usr/src/sys

worked fine for me. tortured disk and memory
and i no longer get pagedaemon deadlocks.

patches come from the venerable Yamamoto Takashi
(yamt@mwd.biglobe.ne.jp)

tell him if it works fine for you too :)

-- 
Gilbert Fernandes

--HcAYCG3uE/tztfnV
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="softdep.inodedep3.diff"

Index: ufs/ffs/ffs_softdep.c
===================================================================
--- ufs/ffs/ffs_softdep.c	(revision 573)
+++ ufs/ffs/ffs_softdep.c	(revision 575)
@@ -481,6 +481,71 @@ softdep_freequeue_process(void)
 	}
 }
 
+static char emerginoblk[MAXBSIZE];
+static int emerginoblk_inuse;
+static const struct buf *emerginoblk_origbp;
+static struct simplelock emerginoblk_slock = SIMPLELOCK_INITIALIZER;
+
+static __inline void *
+inodedep_allocdino(struct inodedep *inodedep, size_t size)
+{
+	void *vp;
+	int s;
+	const struct buf *origbp;
+
+	KASSERT(inodedep->id_savedino1 == NULL);
+
+	if (curproc != uvm.pagedaemon_proc)
+		return malloc(size, M_INODEDEP, M_WAITOK);
+
+	vp = malloc(size, M_INODEDEP, M_NOWAIT);
+	if (vp)
+		return vp;
+
+	origbp = inodedep->id_buf;
+
+	s = splbio();
+	simple_lock(&emerginoblk_slock);
+	while (emerginoblk_inuse && emerginoblk_origbp != origbp)
+		ltsleep(&emerginoblk_inuse, PVM, "emdino", 0,
+		    &emerginoblk_slock);
+	emerginoblk_origbp = origbp;
+	emerginoblk_inuse++;
+	KASSERT(emerginoblk_inuse <= sizeof(emerginoblk) /
+	    MIN(sizeof(struct ufs1_dinode), sizeof(struct ufs2_dinode)));
+	simple_unlock(&emerginoblk_slock);
+	splx(s);
+
+	KASSERT(inodedep->id_savedino1 == NULL);
+
+	return emerginoblk + ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
+}
+
+static __inline void
+inodedep_freedino(struct inodedep *inodedep)
+{
+	void *vp = inodedep->id_savedino1;
+
+	inodedep->id_savedino1 = NULL;
+	KASSERT(vp != NULL);
+	if (__predict_false((void *)&emerginoblk[0] <= vp &&
+	    vp < (void *)&emerginoblk[MAXBSIZE])) {
+		int s;
+
+		KASSERT(emerginoblk_inuse > 0);
+		s = splbio();
+		simple_lock(&emerginoblk_slock);
+		emerginoblk_inuse--;
+		wakeup(&emerginoblk_inuse);
+		simple_unlock(&emerginoblk_slock);
+		splx(s);
+
+		return;
+	}
+
+	free(vp, M_INODEDEP);
+}
+
 /*
  * Worklist queue management.
  * These routines require that the lock be held.
@@ -2355,8 +2420,7 @@ check_inode_unwritten(inodedep)
 	if (inodedep->id_state & ONWORKLIST)
 		WORKLIST_REMOVE(&inodedep->id_list);
 	if (inodedep->id_savedino1 != NULL) {
-		FREE(inodedep->id_savedino1, M_INODEDEP);
-		inodedep->id_savedino1 = NULL;
+		inodedep_freedino(inodedep);
 	}
 	if (free_inodedep(inodedep) == 0)
 		panic("check_inode_unwritten: busy inode");
@@ -3517,8 +3581,8 @@ initiate_write_inodeblock_ufs1(inodedep,
 	if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 		if (inodedep->id_savedino1 != NULL)
 			panic("initiate_write_inodeblock: already doing I/O");
-		MALLOC(inodedep->id_savedino1, struct ufs1_dinode *,
-		    sizeof(struct ufs1_dinode), M_INODEDEP, M_WAITOK);
+		inodedep->id_savedino1 =
+		    inodedep_allocdino(inodedep, sizeof(struct ufs1_dinode));
 		*inodedep->id_savedino1 = *dp;
 		bzero((caddr_t)dp, sizeof(struct ufs1_dinode));
 		return;
@@ -3657,8 +3721,8 @@ initiate_write_inodeblock_ufs2(inodedep,
 	if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 		if (inodedep->id_savedino2 != NULL)
 			panic("initiate_write_inodeblock_ufs2: I/O underway");
-		MALLOC(inodedep->id_savedino2, struct ufs2_dinode *,
-		    sizeof(struct ufs2_dinode), M_INODEDEP, M_WAITOK);
+		inodedep->id_savedino2 =
+		    inodedep_allocdino(inodedep, sizeof(struct ufs2_dinode));
 		*inodedep->id_savedino2 = *dp;
 		bzero((caddr_t)dp, sizeof(struct ufs2_dinode));
 		return;
@@ -4144,8 +4208,7 @@ handle_written_inodeblock(inodedep, bp)
 			*dp1 = *inodedep->id_savedino1;
 		else
 			*dp2 = *inodedep->id_savedino2;
-		FREE(inodedep->id_savedino1, M_INODEDEP);
-		inodedep->id_savedino1 = NULL;
+		inodedep_freedino(inodedep);
 		if ((bp->b_flags & B_DELWRI) == 0)
 			stat_inode_bitmap++;
 		bdirty(bp);

--HcAYCG3uE/tztfnV
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="softdep.indirdep.diff"

Index: ufs/ffs/ffs_softdep.c
===================================================================
--- ufs/ffs/ffs_softdep.c	(revision 572)
+++ ufs/ffs/ffs_softdep.c	(revision 573)
@@ -65,7 +65,6 @@ extern struct simplelock bqueue_slock; /
 MALLOC_DEFINE(M_PAGEDEP, "pagedep", "file page dependencies");
 MALLOC_DEFINE(M_INODEDEP, "inodedep", "Inode depependencies");
 MALLOC_DEFINE(M_NEWBLK, "newblk", "New block allocation");
-MALLOC_DEFINE(M_INDIRDEP, "indirdep", "Indirect block dependencies");
 
 /*
  * These definitions need to be adapted to the system to which
@@ -3360,7 +3359,6 @@ softdep_disk_io_initiation(bp)
 	struct worklist *wk, *nextwk;
 	struct indirdep *indirdep;
 	struct inodedep *inodedep;
-	caddr_t saveddata;
 
 	/*
 	 * We only care about write operations. There should never
@@ -3411,15 +3409,11 @@ softdep_disk_io_initiation(bp)
 			/*
 			 * Replace up-to-date version with safe version.
 			 */
-			MALLOC(saveddata, caddr_t, bp->b_bcount, M_INDIRDEP,
-			    M_WAITOK);
 			ACQUIRE_LOCK(&lk);
 			indirdep->ir_state &= ~ATTACHED;
 			indirdep->ir_state |= UNDONE;
-			indirdep->ir_saveddata = saveddata;
-			bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
-			bcopy(indirdep->ir_savebp->b_data, bp->b_data,
-			      bp->b_bcount);
+			indirdep->ir_saveddata = bp->b_data;
+			bp->b_data = indirdep->ir_savebp->b_data;
 			FREE_LOCK(&lk);
 			continue;
 
@@ -3962,8 +3956,7 @@ softdep_disk_write_complete(bp)
 			indirdep = WK_INDIRDEP(wk);
 			if (indirdep->ir_state & GOINGAWAY)
 				panic("disk_write_complete: indirdep gone");
-			bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount);
-			FREE(indirdep->ir_saveddata, M_INDIRDEP);
+			bp->b_data = indirdep->ir_saveddata;
 			indirdep->ir_saveddata = 0;
 			indirdep->ir_state &= ~UNDONE;
 			indirdep->ir_state |= ATTACHED;

--HcAYCG3uE/tztfnV--