tech-userlevel archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

optimize ps(1) for slow machines



ps(1) is very slow on old machines when its output is sorted by CPU
usage. This is because it calculate CPU usage of two processes using
exp(3) and log(3) every time when it compare them in qsort(3). This
means that it carries out expensive floating-point arithmetic
O[N log(N)] times when N processes are shown.

Here, I attached a patch where

(1) CPU usage is calculated only once per process
(2) do not call log(3) in CPU usage calculations

This significantly improves performance on, e.g., a m68k box without
FPU. In addition, I did

(3) in donlist{,_sysctl}, use common default values and warn user
appropriately when an error occurs

Can I commit this? Any suggestions or comments?

Thanks,
Rin
====
--- src/bin/ps/extern.h.orig	2016-11-27 08:57:40.203612409 +0900
+++ src/bin/ps/extern.h	2016-11-27 08:58:08.290786342 +0900
@@ -36,8 +36,8 @@
  * defined the types we use.
  */
-extern double ccpu;
-extern int eval, fscale, mempages, nlistread, rawcpu, maxslp, uspace;
+extern double log_ccpu;
+extern int eval, fscale, mempages, nlistread, maxslp, uspace;
 extern int sumrusage, termwidth, totwidth;
 extern int needenv, needcomm, commandonly;
 extern uid_t myuid;
@@ -49,8 +49,8 @@
 void	 command(void *, VARENT *, enum mode);
 void	 cpuid(void *, VARENT *, enum mode);
 void	 cputime(void *, VARENT *, enum mode);
-int	 donlist(void);
-int	 donlist_sysctl(void);
+void	 donlist(void);
+void	 donlist_sysctl(void);
 void	 fmt_puts(char *, int *);
 void	 fmt_putc(int, int *);
 void	 elapsed(void *, VARENT *, enum mode);
--- src/bin/ps/nlist.c.orig	2016-11-27 08:57:40.203696767 +0900
+++ src/bin/ps/nlist.c	2016-11-27 08:58:08.291042909 +0900
@@ -97,50 +97,73 @@
 	{ .n_name = NULL }
 };
-double ccpu; /* kernel _ccpu variable */
+double	log_ccpu;			/* log of kernel _ccpu variable */
 int	nlistread;			/* if nlist already read. */
 int	mempages;			/* number of pages of phys. memory */
 int	fscale;				/* kernel _fscale variable */
 int	maxslp;				/* kernel _maxslp variable */
 int	uspace;				/* kernel USPACE value */
+/* XXX Hopefully reasonable default */
+#define MEMPAGES 	0
+#ifndef FSCALE
+#define FSCALE		(1 << 8)
+#endif
+#define LOG_CCPU	(-1.0 / 20.0)
+#ifndef MAXSLP
+#define MAXSLP		20
+#endif
+#ifndef USPACE
+#define USPACE		(getpagesize())
+#endif
+
 #define	kread(x, v) \
 	kvm_read(kd, psnl[x].n_value, (char *)&v, sizeof v) != sizeof(v)
-int
+void
 donlist(void)
 {
-	int rval;
 	fixpt_t xccpu;
- rval = 0;
 	nlistread = 1;
+
 	if (kvm_nlist(kd, psnl)) {
 		nlisterr(psnl);
 		eval = 1;
-		return (1);
+		fscale = FSCALE;
+		mempages = MEMPAGES;
+		log_ccpu = LOG_CCPU;
+		maxslp = MAXSLP;
+		return;
 	}
+
 	if (kread(X_FSCALE, fscale)) {
 		warnx("fscale: %s", kvm_geterr(kd));
-		eval = rval = 1;
+		eval = 1;
+		fscale = FSCALE;
 	}
+
 	if (kread(X_PHYSMEM, mempages)) {
 		warnx("avail_start: %s", kvm_geterr(kd));
-		eval = rval = 1;
+		eval = 1;
+		mempages = MEMPAGES;
 	}
+
 	if (kread(X_CCPU, xccpu)) {
 		warnx("ccpu: %s", kvm_geterr(kd));
-		eval = rval = 1;
-	}
+		eval = 1;
+		log_ccpu = LOG_CCPU;
+	} else
+		log_ccpu = log((double)xccpu / fscale);
+
 	if (kread(X_MAXSLP, maxslp)) {
 		warnx("maxslp: %s", kvm_geterr(kd));
-		eval = rval = 1;
+		eval = 1;
+		maxslp = MAXSLP;
 	}
-	ccpu = (double)xccpu / fscale;
-	return (rval);
 }
-int
+void
 donlist_sysctl(void)
 {
 	int mib[2];
@@ -149,49 +172,53 @@
 	uint64_t memsize;
nlistread = 1;
-	mib[0] = CTL_HW;
-	mib[1] = HW_PHYSMEM64;
-	size = sizeof(memsize);
-	if (sysctl(mib, 2, &memsize, &size, NULL, 0) == 0)
-		mempages = memsize / getpagesize();
-	else
-		mempages = 0;
mib[0] = CTL_KERN;
 	mib[1] = KERN_FSCALE;
 	size = sizeof(fscale);
-	if (sysctl(mib, 2, &fscale, &size, NULL, 0) == -1)
-		fscale = (1 << 8);	/* XXX Hopefully reasonable default */
+	if (sysctl(mib, 2, &fscale, &size, NULL, 0)) {
+		warn("fscale");
+		eval = 1;
+		fscale = FSCALE;
+	}
+
+	mib[0] = CTL_HW;
+	mib[1] = HW_PHYSMEM64;
+	size = sizeof(memsize);
+	if (sysctl(mib, 2, &memsize, &size, NULL, 0)) {
+		warn("avail_start");
+		eval = 1;
+		mempages = MEMPAGES;
+	} else
+		mempages = memsize / getpagesize();
mib[0] = CTL_KERN;
 	mib[1] = KERN_CCPU;
 	size = sizeof(xccpu);
-	if (sysctl(mib, 2, &xccpu, &size, NULL, 0) == -1)
-		ccpu = exp(-1.0 / 20.0); /* XXX Hopefully reasonable default */
-	else
-		ccpu = (double)xccpu / fscale;
+	if (sysctl(mib, 2, &xccpu, &size, NULL, 0)) {
+		warn("ccpu");
+		eval = 1;
+		log_ccpu = LOG_CCPU;
+	} else
+		log_ccpu = log((double)xccpu / fscale);
mib[0] = CTL_VM;
 	mib[1] = VM_MAXSLP;
 	size = sizeof(maxslp);
-	if (sysctl(mib, 2, &maxslp, &size, NULL, 0) == -1)
-#ifdef MAXSLP
+	if (sysctl(mib, 2, &maxslp, &size, NULL, 0)) {
+		warn("maxslp");
+		eval = 1;
 		maxslp = MAXSLP;
-#else
-		maxslp = 20;		/* XXX Hopefully reasonable default */
-#endif
+	}
mib[0] = CTL_VM;
 	mib[1] = VM_USPACE;
 	size = sizeof(uspace);
-	if (sysctl(mib, 2, &uspace, &size, NULL, 0) == -1)
-#ifdef USPACE
+	if (sysctl(mib, 2, &uspace, &size, NULL, 0)) {
+		warn("uspace");
+		eval = 1;
 		uspace = USPACE;
-#else
-		uspace = getpagesize();	/* XXX Hopefully reasonable default */
-#endif
-
-	return 0;
+	}
 }
void
--- src/bin/ps/print.c.orig	2016-11-27 08:57:40.203755706 +0900
+++ src/bin/ps/print.c	2016-11-27 08:58:08.291516516 +0900
@@ -1091,50 +1091,25 @@
 	cputime1(secs, psecs, v, mode);
 }
-double
-getpcpu(const struct kinfo_proc2 *k)
-{
-	static int failure;
-
-	if (!nlistread)
-		failure = (kd) ? donlist() : 1;
-	if (failure)
-		return (0.0);
-
-#define	fxtofl(fixpt)	((double)(fixpt) / fscale)
-
-	if (k->p_swtime == 0 || k->p_realstat == SZOMB)
-		return (0.0);
-	if (rawcpu)
-		return (100.0 * fxtofl(k->p_pctcpu));
-	return (100.0 * fxtofl(k->p_pctcpu) /
-		(1.0 - exp(k->p_swtime * log(ccpu))));
-}
-
 void
 pcpu(void *arg, VARENT *ve, enum mode mode)
 {
-	struct kinfo_proc2 *k;
 	VAR *v;
 	double dbl;
- k = arg;
 	v = ve->var;
-	dbl = getpcpu(k);
+	dbl = ((struct pinfo *)arg)->pcpu;
 	doubleprintorsetwidth(v, dbl, (dbl >= 99.95) ? 0 : 1, mode);
 }
double
 getpmem(const struct kinfo_proc2 *k)
 {
-	static int failure;
 	double fracmem;
 	int szptudot;
if (!nlistread)
-		failure = (kd) ? donlist() : 1;
-	if (failure)
-		return (0.0);
+		donlist();
/* XXX want pmap ptpages, segtab, etc. (per architecture) */
 	szptudot = uspace/getpagesize();
--- src/bin/ps/ps.c.orig	2016-11-27 08:57:40.203842089 +0900
+++ src/bin/ps/ps.c	2016-11-27 16:59:01.356325791 +0900
@@ -89,6 +89,7 @@
 #include <kvm.h>
 #include <limits.h>
 #include <locale.h>
+#include <math.h>
 #include <nlist.h>
 #include <paths.h>
 #include <pwd.h>
@@ -106,12 +107,10 @@
 #define	GETOPTSTR	"aAcCeghjk:LlM:mN:O:o:p:rSsTt:U:uvW:wx"
 #define	ARGOPTS		"kMNOopUW"
-struct kinfo_proc2 *kinfo;
 struct varlist displaylist = SIMPLEQ_HEAD_INITIALIZER(displaylist);
 struct varlist sortlist = SIMPLEQ_HEAD_INITIALIZER(sortlist);
int eval; /* exit value */
-int	rawcpu;			/* -C */
 int	sumrusage;		/* -S */
 int	termwidth;		/* width of screen (0 == infinity) */
 int	totwidth;		/* calculated width of requested variables */
@@ -124,6 +123,8 @@
 		    struct kinfo_lwp *, int);
 static struct kinfo_proc2
 		*getkinfo_kvm(kvm_t *, int, int, int *);
+static struct pinfo
+		*setpinfo(struct kinfo_proc2 *, int, int, int);
 static char	*kludge_oldps_options(char *);
 static int	 pscomp(const void *, const void *);
 static void	 scanvars(void);
@@ -197,12 +198,14 @@
 int
 main(int argc, char *argv[])
 {
+	struct kinfo_proc2 *kinfo;
+	struct pinfo *pinfo;
 	struct varent *vent;
 	struct winsize ws;
 	struct kinfo_lwp *kl, *l;
 	int ch, i, j, fmt, lineno, nentries, nlwps;
 	long long flag;
-	int prtheader, wflag, what, xflg, showlwps;
+	int calc_pcpu, prtheader, wflag, what, xflg, rawcpu, showlwps;
 	char *nlistf, *memf, *swapf, errbuf[_POSIX2_LINE_MAX];
 	char *ttname;
@@ -387,7 +390,7 @@
 	} else
 		kd = kvm_openfiles(nlistf, memf, swapf, O_RDONLY, errbuf);
- if (kd == 0)
+	if (kd == NULL)
 		errx(1, "%s", errbuf);
if (!fmt)
@@ -395,10 +398,17 @@
/* Add default sort criteria */
 	parsesort("tdev,pid");
+	calc_pcpu = 0;
 	SIMPLEQ_FOREACH(vent, &sortlist, next) {
 		if (vent->var->flag & LWP || vent->var->type == UNSPECIFIED)
 			warnx("Cannot sort on %s, sort key ignored",
 				vent->var->name);
+		if (vent->var->type == PCPU)
+			calc_pcpu = 1;
+	}
+	SIMPLEQ_FOREACH(vent, &displaylist, next) {
+		if (vent->var->type == PCPU)
+			calc_pcpu = 1;
 	}
/*
@@ -415,10 +425,12 @@
 		printheader();
 		exit(1);
 	}
+	pinfo = setpinfo(kinfo, nentries, calc_pcpu, rawcpu);
+
 	/*
 	 * sort proc list
 	 */
-	qsort(kinfo, nentries, sizeof(struct kinfo_proc2), pscomp);
+	qsort(pinfo, nentries, sizeof(struct pinfo), pscomp);
 	/*
 	 * For each proc, call each variable output function in
 	 * "setwidth" mode to determine the widest element of
@@ -426,7 +438,8 @@
 	 */
for (i = 0; i < nentries; i++) {
-		struct kinfo_proc2 *ki = &kinfo[i];
+		struct pinfo *pi = &pinfo[i];
+		struct kinfo_proc2 *ki = pi->ki;
if (xflg == 0 && (ki->p_tdev == (uint32_t)NODEV ||
 		    (ki->p_flag & P_CONTROLT) == 0))
@@ -439,7 +452,7 @@
 		if (showlwps == 0) {
 			l = pick_representative_lwp(ki, kl, nlwps);
 			SIMPLEQ_FOREACH(vent, &displaylist, next)
-				OUTPUT(vent, ki, l, WIDTHMODE);
+				OUTPUT(vent, l, pi, ki, WIDTHMODE);
 		} else {
 			/* The printing is done with the loops
 			 * reversed, but here we don't need that,
@@ -447,7 +460,7 @@
 			 */
 			SIMPLEQ_FOREACH(vent, &displaylist, next)
 				for (j = 0; j < nlwps; j++)
-					OUTPUT(vent, ki, &kl[j], WIDTHMODE);
+					OUTPUT(vent, &kl[j], pi, ki, WIDTHMODE);
 		}
 	}
 	/*
@@ -461,7 +474,8 @@
 	 * print mode.
 	 */
 	for (i = lineno = 0; i < nentries; i++) {
-		struct kinfo_proc2 *ki = &kinfo[i];
+		struct pinfo *pi = &pinfo[i];
+		struct kinfo_proc2 *ki = pi->ki;
if (xflg == 0 && (ki->p_tdev == (uint32_t)NODEV ||
 		    (ki->p_flag & P_CONTROLT ) == 0))
@@ -473,7 +487,7 @@
 		if (showlwps == 0) {
 			l = pick_representative_lwp(ki, kl, nlwps);
 			SIMPLEQ_FOREACH(vent, &displaylist, next) {
-				OUTPUT(vent, ki, l, PRINTMODE);
+				OUTPUT(vent, l, pi, ki, PRINTMODE);
 				if (SIMPLEQ_NEXT(vent, next) != NULL)
 					(void)putchar(' ');
 			}
@@ -486,7 +500,7 @@
 		} else {
 			for (j = 0; j < nlwps; j++) {
 				SIMPLEQ_FOREACH(vent, &displaylist, next) {
-					OUTPUT(vent, ki, &kl[j], PRINTMODE);
+					OUTPUT(vent, &kl[j], pi, ki, PRINTMODE);
 					if (SIMPLEQ_NEXT(vent, next) != NULL)
 						(void)putchar(' ');
 				}
@@ -499,8 +513,7 @@
 			}
 		}
 	}
-	exit(eval);
-	/* NOTREACHED */
+	return eval;
 }
static struct kinfo_lwp *
@@ -561,7 +574,6 @@
 	return kl;
 }
-
 static struct kinfo_proc2 *
 getkinfo_kvm(kvm_t *kdp, int what, int flag, int *nentriesp)
 {
@@ -570,6 +582,36 @@
 	    nentriesp));
 }
+static struct pinfo *
+setpinfo(struct kinfo_proc2 *ki, int nentries, int calc_pcpu, int rawcpu)
+{
+	struct pinfo *pi;
+	int i;
+
+	pi = malloc(nentries * sizeof(struct pinfo));
+	if (pi == NULL)
+		err(1, "malloc");
+
+	if (calc_pcpu && !nlistread)
+		donlist();
+
+	for (i = 0; i < nentries; i++) {
+		pi[i].ki = &ki[i];
+		if (!calc_pcpu)
+			continue;
+		if (ki[i].p_realstat == SZOMB ||
+		    (!rawcpu && ki[i].p_swtime == 0)) {
+			pi[i].pcpu = 0.0;
+			continue;
+		}
+		pi[i].pcpu = 100.0 * (double)ki[i].p_pctcpu / fscale;
+		if (!rawcpu)
+			pi[i].pcpu /= 1.0 - exp(ki[i].p_swtime * log_ccpu);
+	}
+
+	return pi;
+}
+
 static void
 scanvars(void)
 {
@@ -588,8 +630,10 @@
 static int
 pscomp(const void *a, const void *b)
 {
-	const struct kinfo_proc2 *ka = (const struct kinfo_proc2 *)a;
-	const struct kinfo_proc2 *kb = (const struct kinfo_proc2 *)b;
+	const struct pinfo *pa = (const struct pinfo *)a;
+	const struct pinfo *pb = (const struct pinfo *)b;
+	const struct kinfo_proc2 *ka = pa->ki;
+	const struct kinfo_proc2 *kb = pb->ki;
int i;
 	int64_t i64;
@@ -634,8 +678,8 @@
 		case UINT32:
 			RDIFF(uint32_t);
 		case SIGLIST:
-			sa = (const void *)((const char *)a + v->off);
-			sb = (const void *)((const char *)b + v->off);
+			sa = (const void *)((const char *)ka + v->off);
+			sb = (const void *)((const char *)kb + v->off);
 			i = 0;
 			do {
 				if (sa->__bits[i] > sb->__bits[i])
@@ -669,7 +713,7 @@
 				return i64 > 0 ? 1 : -1;
 			continue;
 		case PCPU:
-			i = getpcpu(kb) - getpcpu(ka);
+			i = pb->pcpu - pa->pcpu;
 			if (i != 0)
 				return i;
 			continue;
--- src/bin/ps/ps.h.orig	2016-11-27 08:57:40.203862550 +0900
+++ src/bin/ps/ps.h	2016-11-27 08:58:08.292300182 +0900
@@ -86,9 +86,16 @@
 	double	longestnd;	/* longest negative double */
 } VAR;
-#define OUTPUT(vent, ki, kl, mode) do { \
+struct pinfo {
+	struct kinfo_proc2 *ki;
+	double pcpu;
+};
+
+#define	OUTPUT(vent, kl, pi, ki, mode) do {				\
 	if ((vent)->var->flag & LWP)					\
 		((vent)->var->oproc)((void *)(kl), (vent), (mode));	\
+	else if ((vent)->var->type == PCPU)				\
+		((vent)->var->oproc)((void *)(pi), (vent), (mode));	\
 	else								\
 		((vent)->var->oproc)((void *)(ki), (vent), (mode));	\
 	} while (/*CONSTCOND*/ 0)


Home | Main Index | Thread Index | Old Index