Subject: diff to speed up fdalloc using two-level bitmaps (resend)
To: None <tech-perform@netbsd.org>
From: Niels Provos <provos@citi.umich.edu>
List: tech-perform
Date: 10/28/2003 03:41:26
[Resend as I sent to the wrong mailing list at first]
Hi,
the following diff speeds up our file descriptor allocation performance.
This is the approach suggested by
http://www.usenix.org/publications/library/proceedings/usenix98/banga.html
You can find a qualitative benchmark result at
http://www.citi.umich.edu/u/provos/benchmark/netbsd-fdalloc.jpg
The actualy benchmark was posted to tech-perform@ earlier.
I would appreciate feedback and if people with 64-bit architectures
could test this please.
Thank you,
Niels.
Index: sys/filedesc.h
===================================================================
RCS file: /cvsroot/src/sys/sys/filedesc.h,v
retrieving revision 1.30
diff -u -r1.30 filedesc.h
--- sys/filedesc.h 7 Aug 2003 16:34:04 -0000 1.30
+++ sys/filedesc.h 28 Oct 2003 07:53:11 -0000
@@ -50,11 +50,18 @@
*/
#define NDFILE 20
#define NDEXTENT 50 /* 250 bytes in 256-byte alloc */
+#define NDENTRIES 32 /* 32 fds per entry */
+#define NDENTRYMASK (NDENTRIES - 1)
+#define NDENTRYSHIFT 5 /* bits per entry */
+#define NDLOSLOTS(x) (((x) + NDENTRIES - 1) >> NDENTRYSHIFT)
+#define NDHISLOTS(x) ((NDLOSLOTS(x) + NDENTRIES - 1) >> NDENTRYSHIFT)
struct filedesc {
struct file **fd_ofiles; /* file structures for open files */
char *fd_ofileflags; /* per-process open file flags */
int fd_nfiles; /* number of open files allocated */
+ u_int *fd_himap; /* each bit points to 32 fds */
+ u_int *fd_lomap; /* bitmap of free fds */
int fd_lastfile; /* high-water mark of fd_ofiles */
int fd_freefile; /* approx. next free file */
int fd_refcnt; /* reference count */
@@ -91,6 +98,12 @@
*/
struct file *fd_dfiles[NDFILE];
char fd_dfileflags[NDFILE];
+ /*
+ * There arrays are used when the number of open files is
+ * <= 1024, and are then pointed to by the pointers above.
+ */
+ u_int fd_dhimap[NDENTRIES >> NDENTRYSHIFT];
+ u_int fd_dlomap[NDENTRIES];
};
/*
Index: kern/kern_descrip.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_descrip.c,v
retrieving revision 1.114
diff -u -r1.114 kern_descrip.c
--- kern/kern_descrip.c 22 Sep 2003 12:59:55 -0000 1.114
+++ kern/kern_descrip.c 28 Oct 2003 07:53:11 -0000
@@ -82,7 +82,9 @@
static __inline void fd_used(struct filedesc *, int);
static __inline void fd_unused(struct filedesc *, int);
+static __inline int find_next_zero(u_int *, int, u_int);
int finishdup(struct proc *, int, int, register_t *);
+int find_last_set(struct filedesc *, int);
int fcntl_forfs(int, struct proc *, int, void *);
dev_type_open(filedescopen);
@@ -92,9 +94,71 @@
nostop, notty, nopoll, nommap, nokqfilter,
};
+static __inline int
+find_next_zero (u_int *bitmap, int want, u_int bits)
+{
+ int i, off, maxoff;
+ u_int sub;
+
+ if (want > bits)
+ return -1;
+
+ off = want >> NDENTRYSHIFT;
+ i = want & NDENTRYMASK;
+ if (i) {
+ sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i));
+ if (sub != ~0)
+ goto found;
+ off++;
+ }
+
+ maxoff = NDLOSLOTS(bits);
+ while (off < maxoff) {
+ if ((sub = bitmap[off]) != ~0)
+ goto found;
+ off++;
+ }
+
+ return -1;
+
+ found:
+ return (off << NDENTRYSHIFT) + ffs(~sub) - 1;
+}
+
+int
+find_last_set(struct filedesc *fd, int last)
+{
+ int off, i;
+ struct file **ofiles = fd->fd_ofiles;
+ u_int *bitmap = fd->fd_lomap;
+
+ off = (last - 1) >> NDENTRYSHIFT;
+
+ while (!bitmap[off] && off >= 0)
+ off--;
+
+ if (off < 0)
+ return 0;
+
+ i = ((off + 1) << NDENTRYSHIFT) - 1;
+ if (i >= last)
+ i = last - 1;
+
+ while (i > 0 && ofiles[i] == NULL)
+ i--;
+
+ return i;
+}
+
+
static __inline void
fd_used(struct filedesc *fdp, int fd)
{
+ u_int off = fd >> NDENTRYSHIFT;
+
+ fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK);
+ if (fdp->fd_lomap[off] == ~0)
+ fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK);
if (fd > fdp->fd_lastfile)
fdp->fd_lastfile = fd;
@@ -103,19 +167,21 @@
static __inline void
fd_unused(struct filedesc *fdp, int fd)
{
+ u_int off = fd >> NDENTRYSHIFT;
if (fd < fdp->fd_freefile)
fdp->fd_freefile = fd;
+
+ if (fdp->fd_lomap[off] == ~0)
+ fdp->fd_himap[off >> NDENTRYSHIFT] &= ~(1 << (off & NDENTRYMASK));
+ fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK));
+
#ifdef DIAGNOSTIC
if (fd > fdp->fd_lastfile)
panic("fd_unused: fd_lastfile inconsistent");
#endif
- if (fd == fdp->fd_lastfile) {
- do {
- fd--;
- } while (fd >= 0 && fdp->fd_ofiles[fd] == NULL);
- fdp->fd_lastfile = fd;
- }
+ if (fd == fdp->fd_lastfile)
+ fdp->fd_lastfile = find_last_set(fdp, fd);
}
/*
@@ -646,6 +712,7 @@
{
struct filedesc *fdp;
int i, lim, last;
+ u_int off, new;
fdp = p->p_fd;
@@ -656,15 +723,32 @@
*/
lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
last = min(fdp->fd_nfiles, lim);
+ again:
if ((i = want) < fdp->fd_freefile)
i = fdp->fd_freefile;
- for (; i < last; i++) {
- if (fdp->fd_ofiles[i] == NULL) {
- fd_used(fdp, i);
- if (want <= fdp->fd_freefile)
- fdp->fd_freefile = i;
- *result = i;
- return (0);
+ off = i >> NDENTRYSHIFT;
+ new = find_next_zero(fdp->fd_himap, off,
+ (last + NDENTRIES - 1) >> NDENTRYSHIFT);
+ if (new != -1) {
+ i = find_next_zero(&fdp->fd_lomap[new],
+ new > off ? 0 : i & NDENTRYMASK, NDENTRIES);
+ if (i == -1) {
+ /*
+ * free file descriptor in this block was
+ * below want, try again with higher want.
+ */
+ want = (new + 1) << NDENTRYSHIFT;
+ goto again;
+ }
+ i += (new << NDENTRYSHIFT);
+ if (i < last) {
+ if (fdp->fd_ofiles[i] == NULL) {
+ fd_used(fdp, i);
+ if (want <= fdp->fd_freefile)
+ fdp->fd_freefile = i;
+ *result = i;
+ return (0);
+ }
}
}
@@ -683,6 +767,7 @@
int i, nfiles;
struct file **newofile;
char *newofileflags;
+ u_int *newhimap, *newlomap;
fdp = p->p_fd;
@@ -705,6 +790,31 @@
memset(newofileflags + i, 0, nfiles * sizeof(char) - i);
if (fdp->fd_nfiles > NDFILE)
free(fdp->fd_ofiles, M_FILEDESC);
+
+ if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) {
+ newhimap = malloc(NDHISLOTS(nfiles) * sizeof(u_int),
+ M_FILEDESC, M_WAITOK);
+ newlomap = malloc(NDLOSLOTS(nfiles) * sizeof(u_int),
+ M_FILEDESC, M_WAITOK);
+
+ memcpy(newhimap, fdp->fd_himap,
+ (i = NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int)));
+ memset((char *)newhimap + i, 0,
+ NDHISLOTS(nfiles) * sizeof(u_int) - i);
+
+ memcpy(newlomap, fdp->fd_lomap,
+ (i = NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int)));
+ memset((char *)newlomap + i, 0,
+ NDLOSLOTS(nfiles) * sizeof(u_int) - i);
+
+ if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
+ free(fdp->fd_himap, M_FILEDESC);
+ free(fdp->fd_lomap, M_FILEDESC);
+ }
+ fdp->fd_himap = newhimap;
+ fdp->fd_lomap = newlomap;
+ }
+
fdp->fd_ofiles = newofile;
fdp->fd_ofileflags = newofileflags;
fdp->fd_nfiles = nfiles;
@@ -772,6 +882,7 @@
if (nfiles >= maxfiles) {
tablefull("file", "increase kern.maxfiles or MAXFILES");
simple_unlock(&filelist_slock);
+ fd_unused(p->p_fd, i);
pool_put(&file_pool, fp);
return (ENFILE);
}
@@ -927,6 +1038,9 @@
newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
newfdp->fd_fd.fd_nfiles = NDFILE;
newfdp->fd_fd.fd_knlistsize = -1;
+ newfdp->fd_fd.fd_himap = newfdp->fd_dhimap;
+ newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap;
+
}
/*
@@ -1008,9 +1122,23 @@
newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK);
newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
}
+ if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) {
+ newfdp->fd_himap =
+ ((struct filedesc0 *) newfdp)->fd_dhimap;
+ newfdp->fd_lomap =
+ ((struct filedesc0 *) newfdp)->fd_dlomap;
+ } else {
+ newfdp->fd_himap = malloc(NDHISLOTS(i) * sizeof(u_int),
+ M_FILEDESC, M_WAITOK);
+ newfdp->fd_lomap = malloc(NDLOSLOTS(i) * sizeof(u_int),
+ M_FILEDESC, M_WAITOK);
+ }
+
newfdp->fd_nfiles = i;
memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file **));
memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char));
+ memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i) * sizeof(u_int));
+ memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i) * sizeof(u_int));
/*
* kq descriptors cannot be copied.
*/
@@ -1060,6 +1188,10 @@
p->p_fd = NULL;
if (fdp->fd_nfiles > NDFILE)
free(fdp->fd_ofiles, M_FILEDESC);
+ if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
+ free(fdp->fd_himap, M_FILEDESC);
+ free(fdp->fd_lomap, M_FILEDESC);
+ }
if (fdp->fd_knlist)
free(fdp->fd_knlist, M_KEVENT);
if (fdp->fd_knhash)