Subject: Re: revised two-level bitmap fdalloc diff
To: Niels Provos <provos@citi.umich.edu>
From: Jaromir Dolecek <jdolecek@NetBSD.org>
List: tech-perform
Date: 10/29/2003 12:28:38
BTW, how do we stand in the socket allocation benchmark now from
bulk.fefe.de, which measured the time to find lowest unused file
descriptor too? IIRC OpenBSD did quite bad there, compared with
FreeBSD & Linux.

Jaromir

Niels Provos wrote:
> This is the diff that I would like to commit if I can get the okay
> from someone on it.  It addresses Jason's comments and does not
> include the inline assembly ffz.  We need some include file
> infrastructure for that.
> 
> Niels.
> 
> Index: sys/filedesc.h
> ===================================================================
> RCS file: /cvsroot/src/sys/sys/filedesc.h,v
> retrieving revision 1.30
> diff -u -r1.30 filedesc.h
> --- sys/filedesc.h	7 Aug 2003 16:34:04 -0000	1.30
> +++ sys/filedesc.h	29 Oct 2003 07:47:54 -0000
> @@ -50,11 +50,18 @@
>   */
>  #define	NDFILE		20
>  #define	NDEXTENT	50		/* 250 bytes in 256-byte alloc */
> +#define	NDENTRIES	32		/* 32 fds per entry */
> +#define	NDENTRYMASK	(NDENTRIES - 1)
> +#define	NDENTRYSHIFT	5		/* bits per entry */
> +#define	NDLOSLOTS(x)	(((x) + NDENTRIES - 1) >> NDENTRYSHIFT)
> +#define	NDHISLOTS(x)	((NDLOSLOTS(x) + NDENTRIES - 1) >> NDENTRYSHIFT)
>  
>  struct filedesc {
>  	struct file	**fd_ofiles;	/* file structures for open files */
>  	char		*fd_ofileflags;	/* per-process open file flags */
>  	int		fd_nfiles;	/* number of open files allocated */
> +	uint32_t	*fd_himap;	/* each bit points to 32 fds */
> +	uint32_t	*fd_lomap;	/* bitmap of free fds */
>  	int		fd_lastfile;	/* high-water mark of fd_ofiles */
>  	int		fd_freefile;	/* approx. next free file */
>  	int		fd_refcnt;	/* reference count */
> @@ -91,6 +98,12 @@
>  	 */
>  	struct file	*fd_dfiles[NDFILE];
>  	char		fd_dfileflags[NDFILE];
> +	/*
> +	 * These arrays are used when the number of open files is
> +	 * <= 1024, and are then pointed to by the pointers above.
> +	 */
> +	uint32_t	fd_dhimap[NDENTRIES >> NDENTRYSHIFT];
> +	uint32_t	fd_dlomap[NDENTRIES];
>  };
>  
>  /*
> Index: kern/kern_descrip.c
> ===================================================================
> RCS file: /cvsroot/src/sys/kern/kern_descrip.c,v
> retrieving revision 1.114
> diff -u -r1.114 kern_descrip.c
> --- kern/kern_descrip.c	22 Sep 2003 12:59:55 -0000	1.114
> +++ kern/kern_descrip.c	29 Oct 2003 07:47:54 -0000
> @@ -82,7 +82,9 @@
>  
>  static __inline void	fd_used(struct filedesc *, int);
>  static __inline void	fd_unused(struct filedesc *, int);
> +static __inline int	find_next_zero(uint32_t *, int, u_int);
>  int			finishdup(struct proc *, int, int, register_t *);
> +int			find_last_set(struct filedesc *, int);
>  int			fcntl_forfs(int, struct proc *, int, void *);
>  
>  dev_type_open(filedescopen);
> @@ -92,9 +94,70 @@
>  	nostop, notty, nopoll, nommap, nokqfilter,
>  };
>  
> +static __inline int
> +find_next_zero(uint32_t *bitmap, int want, u_int bits)
> +{
> +	int i, off, maxoff;
> +	uint32_t sub;
> +
> +	if (want > bits)
> +		return -1;
> +
> +	off = want >> NDENTRYSHIFT;
> +	i = want & NDENTRYMASK;
> +	if (i) {
> +		sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i));
> +		if (sub != ~0)
> +			goto found;
> +		off++;
> +	}
> +
> +	maxoff = NDLOSLOTS(bits);
> +	while (off < maxoff) {
> +		if ((sub = bitmap[off]) != ~0)
> +			goto found;
> +		off++;
> +	}
> +
> +	return (-1);
> +
> + found:
> +	return (off << NDENTRYSHIFT) + ffs(~sub) - 1;
> +}
> +
> +int
> +find_last_set(struct filedesc *fd, int last)
> +{
> +	int off, i;
> +	struct file **ofiles = fd->fd_ofiles;
> +	uint32_t *bitmap = fd->fd_lomap;
> +
> +	off = (last - 1) >> NDENTRYSHIFT;
> +
> +	while (!bitmap[off] && off >= 0)
> +		off--;
> +
> +	if (off < 0)
> +		return (0);
> +       
> +	i = ((off + 1) << NDENTRYSHIFT) - 1;
> +	if (i >= last)
> +		i = last - 1;
> +
> +	while (i > 0 && ofiles[i] == NULL)
> +		i--;
> +
> +	return (i);
> +}
> +
>  static __inline void
>  fd_used(struct filedesc *fdp, int fd)
>  {
> +	u_int off = fd >> NDENTRYSHIFT;
> +
> +	fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK);
> +	if (fdp->fd_lomap[off] == ~0)
> +		fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK);
>  
>  	if (fd > fdp->fd_lastfile)
>  		fdp->fd_lastfile = fd;
> @@ -103,19 +166,21 @@
>  static __inline void
>  fd_unused(struct filedesc *fdp, int fd)
>  {
> +	u_int off = fd >> NDENTRYSHIFT;
>  
>  	if (fd < fdp->fd_freefile)
>  		fdp->fd_freefile = fd;
> +
> +	if (fdp->fd_lomap[off] == ~0)
> +		fdp->fd_himap[off >> NDENTRYSHIFT] &= ~(1 << (off & NDENTRYMASK));
> +	fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK));
> +
>  #ifdef DIAGNOSTIC
>  	if (fd > fdp->fd_lastfile)
>  		panic("fd_unused: fd_lastfile inconsistent");
>  #endif
> -	if (fd == fdp->fd_lastfile) {
> -		do {
> -			fd--;
> -		} while (fd >= 0 && fdp->fd_ofiles[fd] == NULL);
> -		fdp->fd_lastfile = fd;
> -	}
> +	if (fd == fdp->fd_lastfile)
> +		fdp->fd_lastfile = find_last_set(fdp, fd);
>  }
>  
>  /*
> @@ -646,6 +711,7 @@
>  {
>  	struct filedesc	*fdp;
>  	int i, lim, last;
> +	u_int off, new;
>  
>  	fdp = p->p_fd;
>  
> @@ -656,15 +722,32 @@
>  	 */
>  	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
>  	last = min(fdp->fd_nfiles, lim);
> + again:
>  	if ((i = want) < fdp->fd_freefile)
>  		i = fdp->fd_freefile;
> -	for (; i < last; i++) {
> -		if (fdp->fd_ofiles[i] == NULL) {
> -			fd_used(fdp, i);
> -			if (want <= fdp->fd_freefile)
> -				fdp->fd_freefile = i;
> -			*result = i;
> -			return (0);
> +	off = i >> NDENTRYSHIFT;
> +	new = find_next_zero(fdp->fd_himap, off,
> +	    (last + NDENTRIES - 1) >> NDENTRYSHIFT);
> +	if (new != -1) {
> +		i = find_next_zero(&fdp->fd_lomap[new], 
> +		    new > off ? 0 : i & NDENTRYMASK, NDENTRIES);
> +		if (i == -1) {
> +			/* 
> +			 * free file descriptor in this block was
> +			 * below want, try again with higher want.
> +			 */
> +			want = (new + 1) << NDENTRYSHIFT;
> +			goto again;
> +		}
> +		i += (new << NDENTRYSHIFT);
> +		if (i < last) {
> +			if (fdp->fd_ofiles[i] == NULL) {
> +				fd_used(fdp, i);
> +				if (want <= fdp->fd_freefile)
> +					fdp->fd_freefile = i;
> +				*result = i;
> +				return (0);
> +			}
>  		}
>  	}
>  
> @@ -683,6 +766,7 @@
>  	int		i, nfiles;
>  	struct file	**newofile;
>  	char		*newofileflags;
> +	uint32_t	*newhimap, *newlomap;
>  
>  	fdp = p->p_fd;
>  
> @@ -705,6 +789,31 @@
>  	memset(newofileflags + i, 0, nfiles * sizeof(char) - i);
>  	if (fdp->fd_nfiles > NDFILE)
>  		free(fdp->fd_ofiles, M_FILEDESC);
> +
> +	if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) {
> +		newhimap = malloc(NDHISLOTS(nfiles) * sizeof(uint32_t),
> +		    M_FILEDESC, M_WAITOK);
> +		newlomap = malloc(NDLOSLOTS(nfiles) * sizeof(uint32_t),
> +		    M_FILEDESC, M_WAITOK);
> +
> +		memcpy(newhimap, fdp->fd_himap,
> +		    (i = NDHISLOTS(fdp->fd_nfiles) * sizeof(uint32_t)));
> +		memset((char *)newhimap + i, 0,
> +		    NDHISLOTS(nfiles) * sizeof(uint32_t) - i);
> +
> +		memcpy(newlomap, fdp->fd_lomap,
> +		    (i = NDLOSLOTS(fdp->fd_nfiles) * sizeof(uint32_t)));
> +		memset((char *)newlomap + i, 0,
> +		    NDLOSLOTS(nfiles) * sizeof(uint32_t) - i);
> +
> +		if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
> +			free(fdp->fd_himap, M_FILEDESC);
> +			free(fdp->fd_lomap, M_FILEDESC);
> +		}
> +		fdp->fd_himap = newhimap;
> +		fdp->fd_lomap = newlomap;
> +	}
> +
>  	fdp->fd_ofiles = newofile;
>  	fdp->fd_ofileflags = newofileflags;
>  	fdp->fd_nfiles = nfiles;
> @@ -772,6 +881,7 @@
>  	if (nfiles >= maxfiles) {
>  		tablefull("file", "increase kern.maxfiles or MAXFILES");
>  		simple_unlock(&filelist_slock);
> +		fd_unused(p->p_fd, i);
>  		pool_put(&file_pool, fp);
>  		return (ENFILE);
>  	}
> @@ -927,6 +1037,8 @@
>  	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
>  	newfdp->fd_fd.fd_nfiles = NDFILE;
>  	newfdp->fd_fd.fd_knlistsize = -1;
> +	newfdp->fd_fd.fd_himap = newfdp->fd_dhimap;
> +	newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap;
>  }
>  
>  /*
> @@ -1008,9 +1120,23 @@
>  		newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK);
>  		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
>  	}
> +	if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) {
> +		newfdp->fd_himap =
> +		    ((struct filedesc0 *) newfdp)->fd_dhimap;
> +		newfdp->fd_lomap =
> +		    ((struct filedesc0 *) newfdp)->fd_dlomap;
> +	} else {
> +		newfdp->fd_himap = malloc(NDHISLOTS(i) * sizeof(uint32_t),
> +		    M_FILEDESC, M_WAITOK);
> +		newfdp->fd_lomap = malloc(NDLOSLOTS(i) * sizeof(uint32_t),
> +		    M_FILEDESC, M_WAITOK);
> +	}
> +
>  	newfdp->fd_nfiles = i;
>  	memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file **));
>  	memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char));
> +	memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i)*sizeof(uint32_t));
> +	memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i)*sizeof(uint32_t));
>  	/*
>  	 * kq descriptors cannot be copied.
>  	 */
> @@ -1060,6 +1186,10 @@
>  	p->p_fd = NULL;
>  	if (fdp->fd_nfiles > NDFILE)
>  		free(fdp->fd_ofiles, M_FILEDESC);
> +	if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
> +		free(fdp->fd_himap, M_FILEDESC);
> +		free(fdp->fd_lomap, M_FILEDESC);
> +	}
>  	if (fdp->fd_knlist)
>  		free(fdp->fd_knlist, M_KEVENT);
>  	if (fdp->fd_knhash)
> 

-- 
Jaromir Dolecek <jdolecek@NetBSD.org>            http://www.NetBSD.cz/
-=- We should be mindful of the potential goal, but as the tantric    -=-
-=- Buddhist masters say, ``You may notice during meditation that you -=-
-=- sometimes levitate or glow.   Do not let this distract you.''     -=-