Subject: diffs for poolifying softdep
To: None <tech-kern@netbsd.org>
From: Chuck Silvers <chuq@chuq.com>
List: tech-kern
Date: 09/13/2001 23:29:29
--OXfL5xGRrasGEqWY
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

hi folks,

attached is a set of diffs which convert the softdep code to use pools
for most memory allocations.  this is complicated by various of the softdep
datastructures being freed from interrupt context, but rather than allocate
those from kmem_map I just put them on a queue and free them from the syncer
(which calls into the softdep code at least once per second).  this should
prevent people from running out of kmem_map space if they increase the
maximum number of pending softdeps (which will happen automatically when
you increase the maximum number of vnodes after the change I posted
yesterday).

I'm going to commit both of these this weekend if there is no controversy.

-Chuck

--OXfL5xGRrasGEqWY
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="diff.softdep-pool"

Index: ufs/ffs/ffs_softdep.c
===================================================================
RCS file: /cvsroot/syssrc/sys/ufs/ffs/ffs_softdep.c,v
retrieving revision 1.14
diff -u -r1.14 ffs_softdep.c
--- ufs/ffs/ffs_softdep.c	2001/08/30 03:55:42	1.14
+++ ufs/ffs/ffs_softdep.c	2001/09/08 06:56:26
@@ -74,25 +74,40 @@
 /*
  * Mapping of dependency structure types to malloc types.
  */
-#define	D_PAGEDEP	M_PAGEDEP
-#define	D_INODEDEP	M_INODEDEP
-#define	D_NEWBLK	M_NEWBLK
-#define	D_BMSAFEMAP	M_BMSAFEMAP
-#define	D_ALLOCDIRECT	M_ALLOCDIRECT
-#define	D_INDIRDEP	M_INDIRDEP
-#define	D_ALLOCINDIR	M_ALLOCINDIR
-#define	D_FREEFRAG	M_FREEFRAG
-#define	D_FREEBLKS	M_FREEBLKS
-#define	D_FREEFILE	M_FREEFILE
-#define	D_DIRADD	M_DIRADD
-#define	D_MKDIR		M_MKDIR
-#define	D_DIRREM	M_DIRREM
-/*
- * Names of malloc types.
- */
-extern char *memname[];
-#define TYPENAME(type) ((unsigned)(type) < M_LAST ? memname[type] : "???")
-#define DtoM(type) (type)
+#define	D_PAGEDEP	1
+#define	D_INODEDEP	2
+#define	D_NEWBLK	3
+#define	D_BMSAFEMAP	4
+#define	D_ALLOCDIRECT	5
+#define	D_INDIRDEP	6
+#define	D_ALLOCINDIR	7
+#define	D_FREEFRAG	8
+#define	D_FREEBLKS	9
+#define	D_FREEFILE	10
+#define	D_DIRADD	11
+#define	D_MKDIR		12
+#define	D_DIRREM	13
+#define D_LAST		13
+/*
+ * Names of softdep types.
+ */
+const char *softdep_typenames[] = {
+	"invalid",
+	"pagedep",
+	"inodedep",
+	"newblk",
+	"bmsafemap",
+	"allocdirect",
+	"indirdep",
+	"allocindir",
+	"freefrag",
+	"freeblks",
+	"diradd",
+	"mkdir",
+	"dirrem",
+};
+#define TYPENAME(type) \
+	((unsigned)(type) < D_LAST ? softdep_typenames[type] : "???")
 /*
  * Finding the current process.
  */
@@ -353,6 +368,94 @@
 }
 
 /*
+ * Memory management.
+ */
+
+static struct pool pagedep_pool;
+static struct pool inodedep_pool;
+static struct pool newblk_pool;
+static struct pool bmsafemap_pool;
+static struct pool allocdirect_pool;
+static struct pool indirdep_pool;
+static struct pool allocindir_pool;
+static struct pool freefrag_pool;
+static struct pool freeblks_pool;
+static struct pool freefile_pool;
+static struct pool diradd_pool;
+static struct pool mkdir_pool;
+static struct pool dirrem_pool;
+
+static __inline void
+softdep_free(struct worklist *item, int type)
+{
+	switch (type) {
+
+	case D_PAGEDEP:
+		pool_put(&pagedep_pool, item);
+		return;
+
+	case D_INODEDEP:
+		pool_put(&inodedep_pool, item);
+		return;
+
+	case D_BMSAFEMAP:
+		pool_put(&bmsafemap_pool, item);
+		return;
+
+	case D_ALLOCDIRECT:
+		pool_put(&allocdirect_pool, item);
+		return;
+
+	case D_INDIRDEP:
+		pool_put(&indirdep_pool, item);
+		return;
+
+	case D_ALLOCINDIR:
+		pool_put(&allocindir_pool, item);
+		return;
+
+	case D_FREEFRAG:
+		pool_put(&freefrag_pool, item);
+		return;
+
+	case D_FREEBLKS:
+		pool_put(&freeblks_pool, item);
+		return;
+
+	case D_FREEFILE:
+		pool_put(&freefile_pool, item);
+		return;
+
+	case D_DIRADD:
+		pool_put(&diradd_pool, item);
+		return;
+
+	case D_MKDIR:
+		pool_put(&mkdir_pool, item);
+		return;
+
+	case D_DIRREM:
+		pool_put(&dirrem_pool, item);
+		return;
+
+	}
+	panic("softdep_free: unknown type %d", type);
+}
+
+struct workhead softdep_tofree;
+
+static __inline void
+softdep_queuefree(struct worklist *item)
+{
+	int s;
+
+	s = splbio();
+	LIST_INSERT_HEAD(&softdep_tofree, item , wk_list);
+	splx(s);
+}
+
+
+/*
  * Worklist queue management.
  * These routines require that the lock be held.
  */
@@ -365,7 +468,7 @@
 	(item)->wk_state &= ~ONWORKLIST;	\
 	LIST_REMOVE(item, wk_list);		\
 } while (0)
-#define WORKITEM_FREE(item, type) FREE(item, DtoM(type))
+#define WORKITEM_FREE(item, type) softdep_queuefree(item, type)
 
 #else /* DEBUG */
 static	void worklist_insert __P((struct workhead *, struct worklist *));
@@ -411,9 +514,7 @@
 
 	if (item->wk_state & ONWORKLIST)
 		panic("workitem_free: still on list");
-	if (item->wk_type != type)
-		panic("workitem_free: type mismatch");
-	FREE(item, DtoM(type));
+	softdep_queuefree(item);
 }
 #endif /* DEBUG */
 
@@ -497,6 +598,18 @@
 	struct worklist *wk;
 	struct fs *matchfs;
 	int matchcnt;
+	int s;
+
+	/*
+	 * first process any items on the delay-free queue.
+	 */
+	s = splbio();
+	while ((wk = LIST_FIRST(&softdep_tofree)) != NULL) {
+		LIST_REMOVE(wk, wk_list);
+		softdep_free(wk, wk->wk_type);
+	}
+	splx(s);
+
 	/*
 	 * Record the process identifier of our caller so that we can give
 	 * this process preferential treatment in request_cleanup below.
@@ -753,8 +866,7 @@
 		ACQUIRE_LOCK(&lk);
 		goto top;
 	}
-	MALLOC(pagedep, struct pagedep *, sizeof(struct pagedep), M_PAGEDEP,
-		M_WAITOK);
+	pagedep = pool_get(&pagedep_pool, PR_WAITOK);
 	bzero(pagedep, sizeof(struct pagedep));
 	pagedep->pd_list.wk_type = D_PAGEDEP;
 	pagedep->pd_mnt = mp;
@@ -830,8 +942,7 @@
 		goto top;
 	}
 	num_inodedep += 1;
-	MALLOC(inodedep, struct inodedep *, sizeof(struct inodedep),
-		M_INODEDEP, M_WAITOK);
+	inodedep = pool_get(&inodedep_pool, PR_WAITOK);
 	inodedep->id_list.wk_type = D_INODEDEP;
 	inodedep->id_fs = fs;
 	inodedep->id_ino = inum;
@@ -892,8 +1003,7 @@
 	}
 	if (sema_get(&newblk_in_progress, 0) == 0)
 		goto top;
-	MALLOC(newblk, struct newblk *, sizeof(struct newblk),
-		M_NEWBLK, M_WAITOK);
+	newblk = pool_get(&newblk_pool, PR_WAITOK);
 	newblk->nb_state = 0;
 	newblk->nb_fs = fs;
 	newblk->nb_newblkno = newblkno;
@@ -929,6 +1039,46 @@
 	for (i = 0; i < PCBPHASHSIZE; i++) {
 		LIST_INIT(&pcbphashhead[i]);
 	}
+
+	pool_init(&pagedep_pool, sizeof(struct pagedep), 0, 0, 0,
+	    "pagedeppl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
+	    M_PAGEDEP);
+	pool_init(&inodedep_pool, sizeof(struct inodedep), 0, 0, 0,
+	    "inodedeppl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
+	    M_INODEDEP);
+	pool_init(&newblk_pool, sizeof(struct newblk), 0, 0, 0,
+	    "newblkpl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
+	    M_NEWBLK);
+	pool_init(&bmsafemap_pool, sizeof(struct bmsafemap), 0, 0, 0,
+	    "bmsafemappl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
+	    M_BMSAFEMAP);
+	pool_init(&allocdirect_pool, sizeof(struct allocdirect), 0, 0, 0,
+	    "allocdirectpl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
+	    M_ALLOCDIRECT);
+	pool_init(&indirdep_pool, sizeof(struct indirdep), 0, 0, 0,
+	    "indirdeppl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
+	    M_INDIRDEP);
+	pool_init(&allocindir_pool, sizeof(struct allocindir), 0, 0, 0,
+	    "allocindirpl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
+	    M_ALLOCINDIR);
+	pool_init(&freefrag_pool, sizeof(struct freefrag), 0, 0, 0,
+	    "freefragpl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
+	    M_FREEFRAG);
+	pool_init(&freeblks_pool, sizeof(struct freeblks), 0, 0, 0,
+	    "freeblkspl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
+	    M_FREEBLKS);
+	pool_init(&freefile_pool, sizeof(struct freefile), 0, 0, 0,
+	    "freefilepl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
+	    M_FREEFILE);
+	pool_init(&diradd_pool, sizeof(struct diradd), 0, 0, 0,
+	    "diraddpl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
+	    M_DIRADD);
+	pool_init(&mkdir_pool, sizeof(struct mkdir), 0, 0, 0,
+	    "mkdirpl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
+	    M_MKDIR);
+	pool_init(&dirrem_pool, sizeof(struct dirrem), 0, 0, 0,
+	    "dirrempl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
+	    M_DIRREM);
 }
 
 /*
@@ -1094,8 +1244,7 @@
 		if (wk->wk_type == D_BMSAFEMAP)
 			return (WK_BMSAFEMAP(wk));
 	FREE_LOCK(&lk);
-	MALLOC(bmsafemap, struct bmsafemap *, sizeof(struct bmsafemap),
-		M_BMSAFEMAP, M_WAITOK);
+	bmsafemap = pool_get(&bmsafemap_pool, PR_WAITOK);
 	bmsafemap->sm_list.wk_type = D_BMSAFEMAP;
 	bmsafemap->sm_list.wk_state = 0;
 	bmsafemap->sm_buf = bp;
@@ -1154,8 +1303,7 @@
 	struct pagedep *pagedep;
 	struct newblk *newblk;
 
-	MALLOC(adp, struct allocdirect *, sizeof(struct allocdirect),
-		M_ALLOCDIRECT, M_WAITOK);
+	adp = pool_get(&allocdirect_pool, PR_WAITOK);
 	bzero(adp, sizeof(struct allocdirect));
 	adp->ad_list.wk_type = D_ALLOCDIRECT;
 	adp->ad_lbn = lbn;
@@ -1186,7 +1334,7 @@
 		LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps);
 	}
 	LIST_REMOVE(newblk, nb_hash);
-	FREE(newblk, M_NEWBLK);
+	pool_put(&newblk_pool, newblk);
 
 	/*
 	 * If we were not passed a bp to attach the dep to,
@@ -1318,8 +1466,7 @@
 	fs = ip->i_fs;
 	if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag)
 		panic("newfreefrag: frag size");
-	MALLOC(freefrag, struct freefrag *, sizeof(struct freefrag),
-		M_FREEFRAG, M_WAITOK);
+	freefrag = pool_get(&freefrag_pool, PR_WAITOK);
 	freefrag->ff_list.wk_type = D_FREEFRAG;
 	freefrag->ff_state = ip->i_ffs_uid & ~ONWORKLIST; /* XXX - used below */
 	freefrag->ff_inum = ip->i_number;
@@ -1353,7 +1500,7 @@
 	lockmgr(&vp.v_glock, LK_EXCLUSIVE, NULL);
 	ffs_blkfree(&tip, freefrag->ff_blkno, freefrag->ff_fragsize);
 	lockmgr(&vp.v_glock, LK_RELEASE, NULL);
-	FREE(freefrag, M_FREEFRAG);
+	pool_put(&freefrag_pool, freefrag);
 }
 
 /*
@@ -1393,8 +1540,7 @@
 {
 	struct allocindir *aip;
 
-	MALLOC(aip, struct allocindir *, sizeof(struct allocindir),
-		M_ALLOCINDIR, M_WAITOK);
+	aip = pool_get(&allocindir_pool, PR_WAITOK);
 	bzero(aip, sizeof(struct allocindir));
 	aip->ai_list.wk_type = D_ALLOCINDIR;
 	aip->ai_state = ATTACHED;
@@ -1523,7 +1669,7 @@
 				    aip, ai_deps);
 			}
 			LIST_REMOVE(newblk, nb_hash);
-			FREE(newblk, M_NEWBLK);
+			pool_put(&newblk_pool, newblk);
 			aip->ai_indirdep = indirdep;
 			/*
 			 * Check to see if there is an existing dependency
@@ -1556,12 +1702,11 @@
 				brelse(newindirdep->ir_savebp);
 				softdep_lockedbufs--;
 			}
-			WORKITEM_FREE((caddr_t)newindirdep, D_INDIRDEP);
+			WORKITEM_FREE(newindirdep, D_INDIRDEP);
 		}
 		if (indirdep)
 			break;
-		MALLOC(newindirdep, struct indirdep *, sizeof(struct indirdep),
-			M_INDIRDEP, M_WAITOK);
+		newindirdep = pool_get(&indirdep_pool, PR_WAITOK);
 		newindirdep->ir_list.wk_type = D_INDIRDEP;
 		newindirdep->ir_state = ATTACHED;
 		LIST_INIT(&newindirdep->ir_deplisthd);
@@ -1625,8 +1770,7 @@
 
 	if (length != 0)
 		panic("softdep_setup_freeblocks: non-zero length");
-	MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks),
-		M_FREEBLKS, M_WAITOK);
+	freeblks = pool_get(&freeblks_pool, PR_WAITOK);
 	bzero(freeblks, sizeof(struct freeblks));
 	freeblks->fb_list.wk_type = D_FREEBLKS;
 	freeblks->fb_uid = ip->i_ffs_uid;
@@ -1884,8 +2028,7 @@
 	/*
 	 * This sets up the inode de-allocation dependency.
 	 */
-	MALLOC(freefile, struct freefile *, sizeof(struct freefile),
-		M_FREEFILE, M_WAITOK);
+	freefile = pool_get(&freefile_pool, PR_WAITOK);
 	freefile->fx_list.wk_type = D_FREEFILE;
 	freefile->fx_list.wk_state = 0;
 	freefile->fx_mode = ap->a_mode;
@@ -2213,7 +2356,7 @@
 	fs = dp->i_fs;
 	lbn = lblkno(fs, diroffset);
 	offset = blkoff(fs, diroffset);
-	MALLOC(dap, struct diradd *, sizeof(struct diradd), M_DIRADD, M_WAITOK);
+	dap = pool_get(&diradd_pool, PR_WAITOK);
 	bzero(dap, sizeof(struct diradd));
 	dap->da_list.wk_type = D_DIRADD;
 	dap->da_state = ATTACHED;
@@ -2224,13 +2367,11 @@
 		ACQUIRE_LOCK(&lk);
 	} else {
 		dap->da_state |= MKDIR_BODY | MKDIR_PARENT;
-		MALLOC(mkdir1, struct mkdir *, sizeof(struct mkdir), M_MKDIR,
-		    M_WAITOK);
+		mkdir1 = pool_get(&mkdir_pool, PR_WAITOK);
 		mkdir1->md_list.wk_type = D_MKDIR;
 		mkdir1->md_state = MKDIR_BODY;
 		mkdir1->md_diradd = dap;
-		MALLOC(mkdir2, struct mkdir *, sizeof(struct mkdir), M_MKDIR,
-		    M_WAITOK);
+		mkdir2 = pool_get(&mkdir_pool, PR_WAITOK);
 		mkdir2->md_list.wk_type = D_MKDIR;
 		mkdir2->md_state = MKDIR_PARENT;
 		mkdir2->md_diradd = dap;
@@ -2473,8 +2614,7 @@
 		(void) request_cleanup(FLUSH_REMOVE, 0);
 
 	num_dirrem += 1;
-	MALLOC(dirrem, struct dirrem *, sizeof(struct dirrem),
-		M_DIRREM, M_WAITOK);
+	dirrem = pool_get(&dirrem_pool, PR_WAITOK);
 	bzero(dirrem, sizeof(struct dirrem));
 	dirrem->dm_list.wk_type = D_DIRREM;
 	dirrem->dm_state = isrmdir ? RMDIR : 0;
@@ -2570,8 +2710,7 @@
 	 * Whiteouts do not need diradd dependencies.
 	 */
 	if (newinum != WINO) {
-		MALLOC(dap, struct diradd *, sizeof(struct diradd),
-		    M_DIRADD, M_WAITOK);
+		dap = pool_get(&diradd_pool, PR_WAITOK);
 		bzero(dap, sizeof(struct diradd));
 		dap->da_list.wk_type = D_DIRADD;
 		dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE;
@@ -3860,12 +3999,8 @@
 		if (error != 0)
 			return (error);
 		if (flushparent) {
-#ifdef __FreeBSD__
-			error = UFS_UPDATE(pvp, 1);
-#else
 			VTOI(pvp)->i_flag |= IN_MODIFIED;
 			error = VOP_UPDATE(pvp, NULL, NULL, UPDATE_WAIT);
-#endif
 			if (error) {
 				vput(pvp);
 				return (error);
@@ -4353,12 +4488,8 @@
 		 */
 		if (dap->da_state & MKDIR_PARENT) {
 			FREE_LOCK(&lk);
-#ifdef __FreeBSD__
-			error = UFS_UPDATE(pvp, 1);
-#else
 			VTOI(pvp)->i_flag |= IN_MODIFIED;
 			error = VOP_UPDATE(pvp, NULL, NULL, UPDATE_WAIT);
-#endif
 			if (error)
 				break;
 			ACQUIRE_LOCK(&lk);

--OXfL5xGRrasGEqWY--