Subject: Speeding up "pstat -T"
To: None <tech-kern@netbsd.org>
From: Simon Burge <simonb@wasabisystems.com>
List: tech-kern
Date: 10/02/2003 23:22:44
My server ran out of open files today, so I tried "pstat -T".  On
this box it wanted to allocated a 40-odd MB chunk of memory, copy the
complete vnode table to userland and not even use it.

The default return values for sysctls that return arrays when they don't
pass any storage is to return the size of the array needed, plus some
slop.  "pstat -T" wants an exact count.  What the following patch does is
adds a optional flag to the kern.vnode sysctl that will perform an exact
count instead of an estimate.  On an 550MHz i386 with 256k vnode limit,
this drops the process size from over 43MB to 112kB, and the run time
for 20 calls to "pstat -T" from over 13 seconds to just under a second.

Anyone see problems with this?

Simon.
--
Simon Burge                                   <simonb@wasabisystems.com>
NetBSD Development, Support and Service:   http://www.wasabisystems.com/


Index: sys/kern/vfs_subr.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_subr.c,v
retrieving revision 1.206
diff -d -p -u -r1.206 vfs_subr.c
--- sys/kern/vfs_subr.c	14 Sep 2003 11:09:48 -0000	1.206
+++ sys/kern/vfs_subr.c	2 Oct 2003 11:52:23 -0000
@@ -2073,23 +2073,33 @@ int kinfo_vgetfailed;
  */
 /* ARGSUSED */
 int
-sysctl_vnode(where, sizep, p)
-	char *where;
+sysctl_vnode(name, namelen, vwhere, sizep, p)
+	int *name;
+	u_int namelen;
+	void *vwhere;
 	size_t *sizep;
 	struct proc *p;
 {
 	struct mount *mp, *nmp;
 	struct vnode *nvp, *vp;
-	char *bp = where, *savebp;
-	char *ewhere;
-	int error;
+	char *bp, *savebp;
+	char *where, *ewhere;
+	int error, realcount;
+
+	if (namelen == 0)
+		realcount = 0;
+	else if (namelen == 1 && name[0] == KERN_VNODE_REALCOUNT)
+		realcount = 1;
+	else
+		return EINVAL;
 
 #define VPTRSZ	sizeof(struct vnode *)
 #define VNODESZ	sizeof(struct vnode)
-	if (where == NULL) {
+	if (vwhere == NULL && realcount == 0) {
 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
-		return (0);
+		return 0;
 	}
+	where = bp = vwhere;
 	ewhere = where + *sizep;
 
 	simple_lock(&mountlist_slock);
@@ -2118,15 +2128,24 @@ again:
 				goto again;
 			}
 			nvp = LIST_NEXT(vp, v_mntvnodes);
+
+			/*
+			 * If only counting real number of used vnodes,
+			 * bump the bp pointer and continue.
+			 */
+			if (where == NULL) {
+				bp += VPTRSZ + VNODESZ;
+				continue;
+			}
 			if (bp + VPTRSZ + VNODESZ > ewhere) {
 				simple_unlock(&mntvnode_slock);
 				*sizep = bp - where;
-				return (ENOMEM);
+				return ENOMEM;
 			}
 			simple_unlock(&mntvnode_slock);
 			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
 			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
-				return (error);
+				return error;
 			bp += VPTRSZ + VNODESZ;
 			simple_lock(&mntvnode_slock);
 		}
@@ -2138,7 +2157,7 @@ again:
 	simple_unlock(&mountlist_slock);
 
 	*sizep = bp - where;
-	return (0);
+	return 0;
 }
 
 /*
Index: sys/sys/sysctl.h
===================================================================
RCS file: /cvsroot/src/sys/sys/sysctl.h,v
retrieving revision 1.99
diff -d -p -u -r1.99 sysctl.h
--- sys/sys/sysctl.h	28 Sep 2003 13:02:19 -0000	1.99
+++ sys/sys/sysctl.h	2 Oct 2003 11:52:24 -0000
@@ -297,6 +297,10 @@ struct ctlname {
 #define	KERN_PROC_TTY_REVOKE	((dev_t)-2)	/* revoked tty */
 
 /*
+ * KERN_VNODE flag
+ */
+#define	KERN_VNODE_REALCOUNT	1	/* exact count, not estimate */
+/*
  * KERN_PROC subtype ops return arrays of augmented proc structures:
  */
 struct kinfo_proc {
@@ -775,7 +779,7 @@ int sysctl_rdminstruct(void *, size_t *,
 int sysctl_clockrate(void *, size_t *);
 int sysctl_disknames(void *, size_t *);
 int sysctl_diskstats(int *, u_int, void *, size_t *);
-int sysctl_vnode(char *, size_t *, struct proc *);
+int sysctl_vnode(int *, u_int, void *, size_t *, struct proc *);
 int sysctl_ntptime(void *, size_t *);
 #ifdef GPROF
 int sysctl_doprof(int *, u_int, void *, size_t *, void *, size_t);
Index: usr.sbin/pstat/pstat.c
===================================================================
RCS file: /cvsroot/src/usr.sbin/pstat/pstat.c,v
retrieving revision 1.83
diff -d -p -u -r1.83 pstat.c
--- usr.sbin/pstat/pstat.c	7 Aug 2003 11:25:38 -0000	1.83
+++ usr.sbin/pstat/pstat.c	2 Oct 2003 11:52:24 -0000
@@ -160,7 +160,7 @@ struct mount *
 char *	kinfo_vnodes __P((int *));
 void	layer_header __P((void));
 int	layer_print __P((struct vnode *, int));
-char *	loadvnodes __P((int *));
+char *	loadvnodes __P((int *, int));
 int	main __P((int, char **));
 void	mount_print __P((struct mount *));
 void	nfs_header __P((void));
@@ -283,10 +283,10 @@ vnodemode()
 	    __P((struct vnode *, int)); /* per-fs data printer */
 
 	mp = NULL;
-	e_vnodebase = loadvnodes(&numvnodes);
+	e_vnodebase = loadvnodes(&numvnodes, totalflag);
 	if (totalflag) {
 		(void)printf("%7d vnodes\n", numvnodes);
-		goto out;
+		return;
 	}
 	endvnode = e_vnodebase + numvnodes * (VPTRSZ + VNODESZ);
 	(void)printf("%d active vnodes\n", numvnodes);
@@ -335,7 +335,6 @@ vnodemode()
 		(void)printf("\n");
 	}
 
- out:
 	free(e_vnodebase);
 }
 
@@ -694,10 +693,11 @@ mount_print(mp)
 }
 
 char *
-loadvnodes(avnodes)
+loadvnodes(avnodes, totalflag)
 	int *avnodes;
+	int totalflag;
 {
-	int mib[2];
+	int mib[3];
 	size_t copysize;
 	char *vnodebase;
 
@@ -709,6 +709,13 @@ loadvnodes(avnodes)
 	}
 	mib[0] = CTL_KERN;
 	mib[1] = KERN_VNODE;
+	if (totalflag) {
+		mib[2] = KERN_VNODE_REALCOUNT;
+		if (sysctl(mib, 3, NULL, &copysize, NULL, 0) == -1)
+			err(1, "sysctl: KERN_VNODE"); 
+		*avnodes = copysize / (VPTRSZ + VNODESZ);
+		return (NULL);
+	}
 	if (sysctl(mib, 2, NULL, &copysize, NULL, 0) == -1)
 		err(1, "sysctl: KERN_VNODE");
 	if ((vnodebase = malloc(copysize)) == NULL)