Subject: fdexpand() memory shortage check (Re: kern/14721)
To: None <tech-kern@netbsd.org>
From: None <jaromir.dolecek@artisys.cz>
List: tech-kern
Date: 12/14/2001 11:08:03
--ELM715090587-2025-0_
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset=US-ASCII

Hi,
the problem in kern/14721 happens due to fdexpand() eventually
trying to allocate bigger chunk of kernel memory for the process than
possible with kmem_map (which holds normally like 1/4 of physical
memory).
A fix here is for the fdexpand() function to fail gracefully
and for the caller to handle this appropriately. This is
implemented in the appended patch.

One possible issue I've been discussing with Soda privately is
whether or not it's suitable to panic system in such situation.
When several such programs as the one in kern/14721 run
and don't finish, they can make the kernel to allocate almost
all it's memory to file descriptor arrays, and thus no more
KVA might be available for other operation. I'm not sure
how big problem is nearly-full kmem_map. Would that cause
any problems severe enough to justify panic in such situation?

Jaromir
-- 
Jaromir Dolecek <jaromir.dolecek@artisys.cz>
ARTISYS, s.r.o., Stursova 71, 61600 Brno, Czech Republic
phone: +420-5-41224836 / fax: +420-5-41224870 / http://www.artisys.cz

--ELM715090587-2025-0_
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset=US-ASCII
Content-Disposition: attachment; filename=fdexp.diff

Index: kern/kern_descrip.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/kern_descrip.c,v
retrieving revision 1.83
diff -u -p -r1.83 kern_descrip.c
--- kern_descrip.c	2001/12/07 07:09:29	1.83
+++ kern_descrip.c	2001/12/14 09:59:08
@@ -147,8 +147,7 @@ sys_dup(struct proc *p, void *v, registe
 	FILE_USE(fp);
 
 	if ((error = fdalloc(p, 0, &new)) != 0) {
-		if (error == ENOSPC) {
-			fdexpand(p);
+		if (error == ENOSPC && (error = fdexpand(p)) == 0) {
 			FILE_UNUSE(fp, p);
 			goto restart;
 		}
@@ -196,8 +195,7 @@ sys_dup2(struct proc *p, void *v, regist
 
 	if (new >= fdp->fd_nfiles) {
 		if ((error = fdalloc(p, new, &i)) != 0) {
-			if (error == ENOSPC) {
-				fdexpand(p);
+			if (error == ENOSPC && (error = fdexpand(p)) == 0) {
 				FILE_UNUSE(fp, p);
 				goto restart;
 			}
@@ -262,8 +260,7 @@ sys_fcntl(struct proc *p, void *v, regis
 			goto out;
 		}
 		if ((error = fdalloc(p, newmin, &i)) != 0) {
-			if (error == ENOSPC) {
-				fdexpand(p);
+			if (error == ENOSPC && (error = fdexpand(p)) == 0) {
 				FILE_UNUSE(fp, p);
 				goto restart;
 			}
@@ -635,7 +632,7 @@ fdalloc(struct proc *p, int want, int *r
 	}
 }
 
-void
+int
 fdexpand(struct proc *p)
 {
 	struct filedesc	*fdp;
@@ -649,7 +646,14 @@ fdexpand(struct proc *p)
 		nfiles = NDEXTENT;
 	else
 		nfiles = 2 * fdp->fd_nfiles;
-	newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
+	newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK|M_CANFAIL);
+	if (__predict_false(newofile == NULL)) {
+		int uid = p->p_cred && p->p_ucred ? p->p_ucred->cr_uid : -1;
+
+		log(LOG_INFO, "pid %d (%s), uid %d: fdexpand() to %d files failed ",
+				p->p_pid, p->p_comm, uid, nfiles);
+		return (ENOMEM);
+	}
 	newofileflags = (char *) &newofile[nfiles];
 	/*
 	 * Copy the existing ofile and ofileflags arrays
@@ -668,6 +672,8 @@ fdexpand(struct proc *p)
 	fdp->fd_ofileflags = newofileflags;
 	fdp->fd_nfiles = nfiles;
 	fdexpanded++;
+
+	return (0);
 }
 
 /*
Index: kern/uipc_usrreq.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/uipc_usrreq.c,v
retrieving revision 1.53
diff -u -p -r1.53 uipc_usrreq.c
--- uipc_usrreq.c	2001/11/12 15:25:34	1.53
+++ uipc_usrreq.c	2001/12/14 09:59:44
@@ -481,7 +481,9 @@ u_long	unpst_recvspace = PIPSIZ;
 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
 u_long	unpdg_recvspace = 4*1024;
 
+/* (maxfiles + unp_rights) MUST fit into struct file's f_count/f_msgcount */
 int	unp_rights;			/* file descriptors in flight */
+#define UNPRIGHTS_SZ	INT_MAX
 
 int
 unp_attach(so)
@@ -868,8 +870,7 @@ unp_externalize(rights)
 				fdremove(p->p_fd, fdp[i]);
 
 			if (error == ENOSPC) {
-				fdexpand(p);
-				error = 0;
+				error = fdexpand(p);
 			} else {
 				/*
 				 * This is the error that has historically
@@ -939,6 +940,10 @@ unp_internalize(control, p)
 			return (EBADF);
 	}
 
+	/* Verify that unp_rights would not overflow */
+	if (__predict_false(unp_rights > UNPRIGHTS_SZ - nfds))
+		return (EMFILE);
+
 	/* Make sure we have room for the struct file pointers */
  morespace:
 	neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
@@ -957,6 +962,14 @@ unp_internalize(control, p)
 		/* copy the data to the cluster */
 		memcpy(mtod(control, char *), cm, cm->cmsg_len);
 		cm = mtod(control, struct cmsghdr *);
+
+		/*
+		 * Verify that unp_rights would not overflow again, we might
+		 * have been blocked.
+		 */
+		if (__predict_false(unp_rights > UNPRIGHTS_SZ - nfds))
+			return (EMFILE);
+
 		goto morespace;
 	}
 
Index: sys/filedesc.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/filedesc.h,v
retrieving revision 1.23
diff -u -p -r1.23 filedesc.h
--- filedesc.h	2001/06/14 20:32:49	1.23
+++ filedesc.h	2001/12/14 09:59:48
@@ -102,7 +102,7 @@ struct filedesc0 {
  */
 int	dupfdopen(struct proc *p, int indx, int dfd, int mode, int error);
 int	fdalloc(struct proc *p, int want, int *result);
-void	fdexpand(struct proc *p);
+int	fdexpand(struct proc *p);
 int	fdavail(struct proc *p, int n);
 int	falloc(struct proc *p, struct file **resultfp, int *resultfd);
 void	ffree(struct file *);

--ELM715090587-2025-0_--