Subject: kern/31542: scheduler doesn't work well with many runnable processes
To: None <kern-bug-people@netbsd.org, gnats-admin@netbsd.org,>
From: None <yamt@mwd.biglobe.ne.jp>
List: netbsd-bugs
Date: 10/10/2005 11:19:00
>Number: 31542
>Category: kern
>Synopsis: scheduler doesn't work well with many runnable processes
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: kern-bug-people
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Mon Oct 10 11:19:00 +0000 2005
>Originator: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
>Release: NetBSD 3.99.8
>Organization:
>Environment:
System: NetBSD kaeru 3.99.8 NetBSD 3.99.8 (build.kaeru.xen.nodebug) #10: Thu Aug 25 20:00:08 JST 2005 takashi@kaeru:/home/takashi/work/kernel/build.kaeru.xen.nodebug i386
Architecture: i386
Machine: i386
>Description:
- schedcpu() decreases p_estcpu of all processes
every seconds, by at least 1 regardless of load average.
- schedclock() increases p_estcpu of curproc by 1,
at about 16 hz.
in the consequence, if a system has >16 processes
with runnable lwps, their p_estcpu are not likely increased.
>How-To-Repeat:
>Fix:
make p_estcpu fixpt_t.
Index: sys/sched.h
===================================================================
--- sys/sched.h (revision 1389)
+++ sys/sched.h (working copy)
@@ -180,7 +180,8 @@ struct schedstate_percpu {
#define PPQ (128 / RUNQUE_NQS) /* priorities per queue */
#define NICE_WEIGHT 2 /* priorities per nice level */
-#define ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - PPQ)
+#define ESTCPU_SHIFT 11
+#define ESTCPULIM(e) min((e), (NICE_WEIGHT * PRIO_MAX - PPQ) << ESTCPU_SHIFT)
extern int schedhz; /* ideally: 16 */
extern int rrticks; /* ticks per roundrobin() */
Index: sys/proc.h
===================================================================
--- sys/proc.h (revision 1335)
+++ sys/proc.h (working copy)
@@ -201,7 +201,7 @@ struct proc {
struct sadata *p_sa; /* Scheduler activation information */
/* scheduling */
- u_int p_estcpu; /* Time averaged value of p_cpticks XXX belongs in p_startcopy section */
+ fixpt_t p_estcpu; /* Time averaged value of p_cpticks XXX belongs in p_startcopy section */
int p_cpticks; /* Ticks of CPU time */
fixpt_t p_pctcpu; /* %cpu for this process during p_swtime */
Index: kern/kern_synch.c
===================================================================
--- kern/kern_synch.c (revision 1390)
+++ kern/kern_synch.c (working copy)
@@ -223,8 +223,16 @@ roundrobin(struct cpu_info *ci)
/* calculations for digital decay to forget 90% of usage in 5*loadav sec */
#define loadfactor(loadav) (2 * (loadav))
-#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
+static __inline fixpt_t
+decay_cpu(fixpt_t loadfac, fixpt_t estcpu)
+{
+
+ /* XXX tweak shifts to avoid 64 bit arithmetics? */
+
+ return ((uint64_t)estcpu * loadfac / (loadfac + FSCALE));
+}
+
/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
@@ -253,7 +261,6 @@ schedcpu(void *arg)
struct lwp *l;
struct proc *p;
int s, minslp;
- unsigned int newcpu;
int clkhz;
proclist_lock_read();
@@ -295,8 +302,7 @@ schedcpu(void *arg)
(p->p_cpticks * FSCALE / clkhz)) >> FSHIFT;
#endif
p->p_cpticks = 0;
- newcpu = (u_int)decay_cpu(loadfac, p->p_estcpu);
- p->p_estcpu = newcpu;
+ p->p_estcpu = decay_cpu(loadfac, p->p_estcpu);
splx(s); /* Done with the process CPU ticks update */
SCHED_LOCK(s);
LIST_FOREACH(l, &p->p_lwps, l_sibling) {
@@ -325,13 +331,13 @@ schedcpu(void *arg)
/*
* Recalculate the priority of a process after it has slept for a while.
* For all load averages >= 1 and max p_estcpu of 255, sleeping for at
- * least six times the loadfactor will decay p_estcpu to zero.
+ * least six times the loadfactor will decay p_estcpu to zero. XXX XXX
*/
void
updatepri(struct lwp *l)
{
struct proc *p = l->l_proc;
- unsigned int newcpu;
+ fixpt_t newcpu;
fixpt_t loadfac;
SCHED_ASSERT_LOCKED();
@@ -344,7 +350,7 @@ updatepri(struct lwp *l)
else {
l->l_slptime--; /* the first time was done in schedcpu */
while (newcpu && --l->l_slptime)
- newcpu = (int) decay_cpu(loadfac, newcpu);
+ newcpu = decay_cpu(loadfac, newcpu);
p->p_estcpu = newcpu;
}
resetpriority(l);
@@ -1105,7 +1111,7 @@ resetpriority(struct lwp *l)
SCHED_ASSERT_LOCKED();
- newpriority = PUSER + p->p_estcpu +
+ newpriority = PUSER + (p->p_estcpu >> ESTCPU_SHIFT) +
NICE_WEIGHT * (p->p_nice - NZERO);
newpriority = min(newpriority, MAXPRI);
l->l_usrpri = newpriority;
@@ -1145,7 +1151,7 @@ schedclock(struct lwp *l)
struct proc *p = l->l_proc;
int s;
- p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
+ p->p_estcpu = ESTCPULIM(p->p_estcpu + (1 << ESTCPU_SHIFT));
SCHED_LOCK(s);
resetpriority(l);
SCHED_UNLOCK(s);
Index: kern/init_sysctl.c
===================================================================
--- kern/init_sysctl.c (revision 1392)
+++ kern/init_sysctl.c (working copy)
@@ -2782,7 +2782,7 @@ fill_kproc2(struct proc *p, struct kinfo
ki->p_tdev = NODEV;
}
- ki->p_estcpu = p->p_estcpu;
+ ki->p_estcpu = p->p_estcpu >> ESTCPU_SHIFT;
ki->p_rtime_sec = p->p_rtime.tv_sec;
ki->p_rtime_usec = p->p_rtime.tv_usec;
ki->p_cpticks = p->p_cpticks;
>Unformatted: