Subject: diff to speed up fdalloc using two-level bitmaps (resend)
To: None <tech-perform@netbsd.org>
From: Niels Provos <provos@citi.umich.edu>
List: tech-perform
Date: 10/28/2003 03:41:26
[Resend as I sent to the wrong mailing list at first]

Hi,

the following diff speeds up our file descriptor allocation performance.
This is the approach suggested by

  http://www.usenix.org/publications/library/proceedings/usenix98/banga.html

You can find a qualitative benchmark result at

  http://www.citi.umich.edu/u/provos/benchmark/netbsd-fdalloc.jpg

The actualy benchmark was posted to tech-perform@ earlier.

I would appreciate feedback and if people with 64-bit architectures
could test this please.

Thank you,
 Niels.

Index: sys/filedesc.h
===================================================================
RCS file: /cvsroot/src/sys/sys/filedesc.h,v
retrieving revision 1.30
diff -u -r1.30 filedesc.h
--- sys/filedesc.h	7 Aug 2003 16:34:04 -0000	1.30
+++ sys/filedesc.h	28 Oct 2003 07:53:11 -0000
@@ -50,11 +50,18 @@
  */
 #define	NDFILE		20
 #define	NDEXTENT	50		/* 250 bytes in 256-byte alloc */
+#define	NDENTRIES	32		/* 32 fds per entry */
+#define	NDENTRYMASK	(NDENTRIES - 1)
+#define	NDENTRYSHIFT	5		/* bits per entry */
+#define	NDLOSLOTS(x)	(((x) + NDENTRIES - 1) >> NDENTRYSHIFT)
+#define	NDHISLOTS(x)	((NDLOSLOTS(x) + NDENTRIES - 1) >> NDENTRYSHIFT)
 
 struct filedesc {
 	struct file	**fd_ofiles;	/* file structures for open files */
 	char		*fd_ofileflags;	/* per-process open file flags */
 	int		fd_nfiles;	/* number of open files allocated */
+	u_int		*fd_himap;	/* each bit points to 32 fds */
+	u_int		*fd_lomap;	/* bitmap of free fds */
 	int		fd_lastfile;	/* high-water mark of fd_ofiles */
 	int		fd_freefile;	/* approx. next free file */
 	int		fd_refcnt;	/* reference count */
@@ -91,6 +98,12 @@
 	 */
 	struct file	*fd_dfiles[NDFILE];
 	char		fd_dfileflags[NDFILE];
+	/*
+	 * There arrays are used when the number of open files is
+	 * <= 1024, and are then pointed to by the pointers above.
+	 */
+	u_int		fd_dhimap[NDENTRIES >> NDENTRYSHIFT];
+	u_int		fd_dlomap[NDENTRIES];
 };
 
 /*
Index: kern/kern_descrip.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_descrip.c,v
retrieving revision 1.114
diff -u -r1.114 kern_descrip.c
--- kern/kern_descrip.c	22 Sep 2003 12:59:55 -0000	1.114
+++ kern/kern_descrip.c	28 Oct 2003 07:53:11 -0000
@@ -82,7 +82,9 @@
 
 static __inline void	fd_used(struct filedesc *, int);
 static __inline void	fd_unused(struct filedesc *, int);
+static __inline int	find_next_zero(u_int *, int, u_int);
 int			finishdup(struct proc *, int, int, register_t *);
+int			find_last_set(struct filedesc *, int);
 int			fcntl_forfs(int, struct proc *, int, void *);
 
 dev_type_open(filedescopen);
@@ -92,9 +94,71 @@
 	nostop, notty, nopoll, nommap, nokqfilter,
 };
 
+static __inline int
+find_next_zero (u_int *bitmap, int want, u_int bits)
+{
+        int i, off, maxoff;
+	u_int sub;
+
+        if (want > bits)
+                return -1;
+
+	off = want >> NDENTRYSHIFT;
+	i = want & NDENTRYMASK;
+        if (i) {
+		sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i));
+		if (sub != ~0)
+			goto found;
+		off++;
+        }
+
+	maxoff = NDLOSLOTS(bits);
+        while (off < maxoff) {
+                if ((sub = bitmap[off]) != ~0)
+                        goto found;
+		off++;
+        }
+
+        return -1;
+
+ found:
+        return (off << NDENTRYSHIFT) + ffs(~sub) - 1;
+}
+
+int
+find_last_set(struct filedesc *fd, int last)
+{
+	int off, i;
+	struct file **ofiles = fd->fd_ofiles;
+	u_int *bitmap = fd->fd_lomap;
+
+	off = (last - 1) >> NDENTRYSHIFT;
+
+	while (!bitmap[off] && off >= 0)
+		off--;
+
+	if (off < 0)
+		return 0;
+       
+	i = ((off + 1) << NDENTRYSHIFT) - 1;
+	if (i >= last)
+		i = last - 1;
+
+	while (i > 0 && ofiles[i] == NULL)
+		i--;
+
+	return i;
+}
+
+
 static __inline void
 fd_used(struct filedesc *fdp, int fd)
 {
+	u_int off = fd >> NDENTRYSHIFT;
+
+	fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK);
+	if (fdp->fd_lomap[off] == ~0)
+		fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK);
 
 	if (fd > fdp->fd_lastfile)
 		fdp->fd_lastfile = fd;
@@ -103,19 +167,21 @@
 static __inline void
 fd_unused(struct filedesc *fdp, int fd)
 {
+	u_int off = fd >> NDENTRYSHIFT;
 
 	if (fd < fdp->fd_freefile)
 		fdp->fd_freefile = fd;
+
+	if (fdp->fd_lomap[off] == ~0)
+		fdp->fd_himap[off >> NDENTRYSHIFT] &= ~(1 << (off & NDENTRYMASK));
+	fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK));
+
 #ifdef DIAGNOSTIC
 	if (fd > fdp->fd_lastfile)
 		panic("fd_unused: fd_lastfile inconsistent");
 #endif
-	if (fd == fdp->fd_lastfile) {
-		do {
-			fd--;
-		} while (fd >= 0 && fdp->fd_ofiles[fd] == NULL);
-		fdp->fd_lastfile = fd;
-	}
+	if (fd == fdp->fd_lastfile)
+		fdp->fd_lastfile = find_last_set(fdp, fd);
 }
 
 /*
@@ -646,6 +712,7 @@
 {
 	struct filedesc	*fdp;
 	int i, lim, last;
+	u_int off, new;
 
 	fdp = p->p_fd;
 
@@ -656,15 +723,32 @@
 	 */
 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
 	last = min(fdp->fd_nfiles, lim);
+ again:
 	if ((i = want) < fdp->fd_freefile)
 		i = fdp->fd_freefile;
-	for (; i < last; i++) {
-		if (fdp->fd_ofiles[i] == NULL) {
-			fd_used(fdp, i);
-			if (want <= fdp->fd_freefile)
-				fdp->fd_freefile = i;
-			*result = i;
-			return (0);
+	off = i >> NDENTRYSHIFT;
+	new = find_next_zero(fdp->fd_himap, off,
+	    (last + NDENTRIES - 1) >> NDENTRYSHIFT);
+	if (new != -1) {
+		i = find_next_zero(&fdp->fd_lomap[new], 
+		    new > off ? 0 : i & NDENTRYMASK, NDENTRIES);
+		if (i == -1) {
+			/* 
+			 * free file descriptor in this block was
+			 * below want, try again with higher want.
+			 */
+			want = (new + 1) << NDENTRYSHIFT;
+			goto again;
+		}
+		i += (new << NDENTRYSHIFT);
+		if (i < last) {
+			if (fdp->fd_ofiles[i] == NULL) {
+				fd_used(fdp, i);
+				if (want <= fdp->fd_freefile)
+					fdp->fd_freefile = i;
+				*result = i;
+				return (0);
+			}
 		}
 	}
 
@@ -683,6 +767,7 @@
 	int		i, nfiles;
 	struct file	**newofile;
 	char		*newofileflags;
+	u_int		*newhimap, *newlomap;
 
 	fdp = p->p_fd;
 
@@ -705,6 +790,31 @@
 	memset(newofileflags + i, 0, nfiles * sizeof(char) - i);
 	if (fdp->fd_nfiles > NDFILE)
 		free(fdp->fd_ofiles, M_FILEDESC);
+
+	if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) {
+		newhimap = malloc(NDHISLOTS(nfiles) * sizeof(u_int),
+		    M_FILEDESC, M_WAITOK);
+		newlomap = malloc(NDLOSLOTS(nfiles) * sizeof(u_int),
+		    M_FILEDESC, M_WAITOK);
+
+		memcpy(newhimap, fdp->fd_himap,
+		    (i = NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int)));
+		memset((char *)newhimap + i, 0,
+		    NDHISLOTS(nfiles) * sizeof(u_int) - i);
+
+		memcpy(newlomap, fdp->fd_lomap,
+		    (i = NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int)));
+		memset((char *)newlomap + i, 0,
+		    NDLOSLOTS(nfiles) * sizeof(u_int) - i);
+
+		if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
+			free(fdp->fd_himap, M_FILEDESC);
+			free(fdp->fd_lomap, M_FILEDESC);
+		}
+		fdp->fd_himap = newhimap;
+		fdp->fd_lomap = newlomap;
+	}
+
 	fdp->fd_ofiles = newofile;
 	fdp->fd_ofileflags = newofileflags;
 	fdp->fd_nfiles = nfiles;
@@ -772,6 +882,7 @@
 	if (nfiles >= maxfiles) {
 		tablefull("file", "increase kern.maxfiles or MAXFILES");
 		simple_unlock(&filelist_slock);
+		fd_unused(p->p_fd, i);
 		pool_put(&file_pool, fp);
 		return (ENFILE);
 	}
@@ -927,6 +1038,9 @@
 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
 	newfdp->fd_fd.fd_nfiles = NDFILE;
 	newfdp->fd_fd.fd_knlistsize = -1;
+	newfdp->fd_fd.fd_himap = newfdp->fd_dhimap;
+	newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap;
+
 }
 
 /*
@@ -1008,9 +1122,23 @@
 		newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK);
 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
 	}
+	if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) {
+		newfdp->fd_himap =
+		    ((struct filedesc0 *) newfdp)->fd_dhimap;
+		newfdp->fd_lomap =
+		    ((struct filedesc0 *) newfdp)->fd_dlomap;
+	} else {
+		newfdp->fd_himap = malloc(NDHISLOTS(i) * sizeof(u_int),
+		    M_FILEDESC, M_WAITOK);
+		newfdp->fd_lomap = malloc(NDLOSLOTS(i) * sizeof(u_int),
+		    M_FILEDESC, M_WAITOK);
+	}
+
 	newfdp->fd_nfiles = i;
 	memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file **));
 	memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char));
+	memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i) * sizeof(u_int));
+	memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i) * sizeof(u_int));
 	/*
 	 * kq descriptors cannot be copied.
 	 */
@@ -1060,6 +1188,10 @@
 	p->p_fd = NULL;
 	if (fdp->fd_nfiles > NDFILE)
 		free(fdp->fd_ofiles, M_FILEDESC);
+	if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
+		free(fdp->fd_himap, M_FILEDESC);
+		free(fdp->fd_lomap, M_FILEDESC);
+	}
 	if (fdp->fd_knlist)
 		free(fdp->fd_knlist, M_KEVENT);
 	if (fdp->fd_knhash)