Subject: Further works on yamt-idlelwp
To: None <tech-kern@netbsd.org>
From: Mindaugas R. <rmind@NetBSD.org>
List: tech-kern
Date: 03/05/2007 02:27:16
This is a multi-part message in MIME format.

--Multipart=_Mon__5_Mar_2007_02_27_16_+0200_PAZ6qsK=IWB2K5Le
Content-Type: text/plain; charset=US-ASCII
Content-Transfer-Encoding: 7bit

Hello,
I have made a small patch for further scheduler works. Shortly what it does:

1) Add sched_rq, sched_proc_sd, sched_lwp_sd in struct cpu_data, proc, lwp.
These would be a pointers for dynamically allocated scheduler-specific data.
Currently, not used for SCHED_4BSD (yet).
For now, there are dummy functions in sched_4bsd.c, IMHO it would be good to
nano-optimize these useless calls, even if they aren't expensive.

2) Add an additional sched_lwp_fork/sched_lwp_exit hooks, for allocating per
LWP scheduler-specific data. Also, add sched_slept() hook, which would handle
LWP sleeping/blocking.

3) Add a general sysctl node KERN_SCHED - there would go scheduler options.
At this moment, SCHED_4BSD has a scheduler exponential decay value (ccpu),
which was moved to sched_4bsd.c . All other scheduler-specific options, if
any, must go to the module too.
Please note that userland part should change too.

4) Move old LWP enqueue from mi_switch() to sched_nextlwp(). Other
implementations of scheduler could (and will) do that differently. In this
case, maybe it would worth changing the name, eg, to sched_switch()?

Any objections/comments about these?

Next, we need to solve few issues. In essence:

1. Currently there are two general functions sched_lock() and sched_unlock()
for runqueue (and all the scheduler) locking. From now, one is going to use a
runqueues per CPU, hence this should be changed.
a) Add a kmutex_t in struct cpu_data (there is a general MI data) and it
would be a generic lock for runqueue.
b) Add kmutex_t in scheduler-specific area and move sched_lock/sched_unlock
to scheduler module. This would be more flexible, IMHO.
In any case, prototype would probably change to:
    static inline void sched_lock (struct cpu_info *ci, const intheldmutex);
Any other suggestions?

2. As already discussed, currently, there is a problem with p_estcpu and
p_pctcpu, which should be scheduler-independent. As mentioned, there is also
a ccpu value, which is SCHED_4BSD specific. These are used in ps(1), top(1),
etc. It would be good to find a solution what and how to deal with these. All
suggestions are welcome.

Thanks.

-- 
Best regards,
Mindaugas
www.NetBSD.org

--Multipart=_Mon__5_Mar_2007_02_27_16_+0200_PAZ6qsK=IWB2K5Le
Content-Type: text/plain;
 name="sched_1.diff"
Content-Disposition: attachment;
 filename="sched_1.diff"
Content-Transfer-Encoding: 7bit

Index: kern/init_sysctl.c
===================================================================
RCS file: /cvsroot/src/sys/kern/init_sysctl.c,v
retrieving revision 1.96.2.1
diff -u -p -r1.96.2.1 init_sysctl.c
--- kern/init_sysctl.c	27 Feb 2007 16:54:16 -0000	1.96.2.1
+++ kern/init_sysctl.c	4 Mar 2007 03:46:18 -0000
@@ -328,7 +328,6 @@ SYSCTL_SETUP(sysctl_root_setup, "sysctl 
 SYSCTL_SETUP(sysctl_kern_setup, "sysctl kern subtree setup")
 {
 	extern int kern_logsigexit;	/* defined in kern/kern_sig.c */
-	extern fixpt_t ccpu;		/* defined in kern/kern_synch.c */
 	extern int dumponpanic;		/* defined in kern/subr_prf.c */
 	const struct sysctlnode *rnode;
 
@@ -644,12 +643,6 @@ SYSCTL_SETUP(sysctl_kern_setup, "sysctl 
 		       CTL_KERN, KERN_FSCALE, CTL_EOL);
 	sysctl_createv(clog, 0, NULL, NULL,
 		       CTLFLAG_PERMANENT,
-		       CTLTYPE_INT, "ccpu",
-		       SYSCTL_DESCR("Scheduler exponential decay value"),
-		       NULL, 0, &ccpu, 0,
-		       CTL_KERN, KERN_CCPU, CTL_EOL);
-	sysctl_createv(clog, 0, NULL, NULL,
-		       CTLFLAG_PERMANENT,
 		       CTLTYPE_STRUCT, "cp_time",
 		       SYSCTL_DESCR("Clock ticks spent in different CPU states"),
 		       sysctl_kern_cptime, 0, NULL, 0,
Index: kern/kern_lwp.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_lwp.c,v
retrieving revision 1.55.2.5
diff -u -p -r1.55.2.5 kern_lwp.c
--- kern/kern_lwp.c	27 Feb 2007 16:54:22 -0000	1.55.2.5
+++ kern/kern_lwp.c	4 Mar 2007 03:46:18 -0000
@@ -525,6 +525,7 @@ newlwp(struct lwp *l1, struct proc *p2, 
 	l2->l_cpu = l1->l_cpu;
 	l2->l_flag = inmem ? LW_INMEM : 0;
 	lwp_initspecific(l2);
+	sched_lwp_fork(l2);
 
 	if (p2->p_flag & PK_SYSTEM) {
 		/*
@@ -582,7 +583,7 @@ newlwp(struct lwp *l1, struct proc *p2, 
 /*
  * Quit the process.
  * this can only be used meaningfully if you're willing to switch away. 
- * Calling with l!=curlwp would be weird.
+ * Calling with l != curlwp would be weird.
  */
 void
 lwp_exit(struct lwp *l)
@@ -628,6 +629,8 @@ lwp_exit(struct lwp *l)
 	/* Delete the specificdata while it's still safe to sleep. */
 	specificdata_fini(lwp_specificdata_domain, &l->l_specdataref);
 
+	sched_lwp_exit(l);
+
 	/*
 	 * Release our cached credentials.
 	 */
Index: kern/kern_sleepq.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_sleepq.c,v
retrieving revision 1.4.2.4
diff -u -p -r1.4.2.4 kern_sleepq.c
--- kern/kern_sleepq.c	27 Feb 2007 16:54:24 -0000	1.4.2.4
+++ kern/kern_sleepq.c	4 Mar 2007 03:46:18 -0000
@@ -230,6 +230,8 @@ sleepq_enqueue(sleepq_t *sq, pri_t pri, 
 	l->l_sleeperr = 0;
 	l->l_nvcsw++;
 
+	sched_slept(l);
+
 	sq->sq_waiters++;
 	sleepq_insert(sq, l, sobj);
 }
Index: kern/kern_synch.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_synch.c,v
retrieving revision 1.177.2.12
diff -u -p -r1.177.2.12 kern_synch.c
--- kern/kern_synch.c	3 Mar 2007 15:37:41 -0000	1.177.2.12
+++ kern/kern_synch.c	4 Mar 2007 03:46:19 -0000
@@ -392,19 +392,6 @@ mi_switch(struct lwp *l, struct lwp *new
 #endif
 
 	/*
-	 * If on the CPU and we have gotten this far, then we must yield.
-	 */
-	KASSERT(l->l_stat != LSRUN);
-	if (l->l_stat == LSONPROC) {
-		KASSERT(lwp_locked(l, &sched_mutex));
-		l->l_stat = LSRUN;
-		if ((l->l_flag & LW_IDLE) == 0) {
-			sched_enqueue(l);
-		}
-	}
-	uvmexp.swtch++;
-
-	/*
 	 * Process is about to yield the CPU; clear the appropriate
 	 * scheduling flags.
 	 */
@@ -427,15 +414,12 @@ mi_switch(struct lwp *l, struct lwp *new
 	}
 #endif
 
+	newl = sched_nextlwp(l, newl);
 	if (newl == NULL) {
-		newl = sched_nextlwp();
-	}
-	if (newl != NULL) {
-		KASSERT(lwp_locked(newl, &sched_mutex));
-		sched_dequeue(newl);
-	} else {
 		newl = l->l_cpu->ci_data.cpu_idlelwp;
 		KASSERT(newl != NULL);
+	} else {
+		sched_dequeue(newl);
 	}
 	KASSERT(lwp_locked(newl, &sched_mutex));
 	newl->l_stat = LSONPROC;
Index: kern/sched_4bsd.c
===================================================================
RCS file: /cvsroot/src/sys/kern/Attic/sched_4bsd.c,v
retrieving revision 1.1.2.8
diff -u -p -r1.1.2.8 sched_4bsd.c
--- kern/sched_4bsd.c	27 Feb 2007 16:54:26 -0000	1.1.2.8
+++ kern/sched_4bsd.c	4 Mar 2007 03:46:19 -0000
@@ -92,6 +92,7 @@ __KERNEL_RCSID(0, "$NetBSD: sched_4bsd.c
 #include <sys/signalvar.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
+#include <sys/sysctl.h>
 #include <sys/kauth.h>
 #include <sys/lockdebug.h>
 
@@ -668,7 +669,7 @@ checkrunqueue(int whichq, struct lwp *l)
 void
 sched_enqueue(struct lwp *l)
 {
-	struct prochd*rq;
+	struct prochd *rq;
 	struct lwp *prev;
 	const int whichq = lwp_eprio(l) / PPQ;
 
@@ -730,12 +731,25 @@ sched_dequeue(struct lwp *l)
 }
 
 struct lwp *
-sched_nextlwp(void)
+sched_nextlwp(struct lwp *l, struct lwp *newl)
 {
 	const struct prochd *rq;
-	struct lwp *l;
 	int whichq;
 
+	KASSERT(l != NULL);
+	KASSERT(l->l_stat != LSRUN);
+
+	if (l->l_stat == LSONPROC) {
+		KASSERT(lwp_locked(l, &sched_mutex));
+		l->l_stat = LSRUN;
+		if ((l->l_flag & LW_IDLE) == 0) {
+			sched_enqueue(l);
+		}
+	}
+
+	if (newl)
+		return newl;
+
 	if (sched_whichqs == 0) {
 		return NULL;
 	}
@@ -749,12 +763,43 @@ sched_nextlwp(void)
 	whichq = ffs(sched_whichqs) - 1;
 #endif
 	rq = &sched_qs[whichq];
-	l = rq->ph_link;
-	return l;
+	return rq->ph_link;
 }
 
 #endif /* !defined(__HAVE_MD_RUNQUEUE) */
 
+/* XXX: Dummy */
+void sched_lwp_fork(struct lwp *l) {
+}
+
+void sched_lwp_exit(struct lwp *l) {
+}
+
+void sched_slept(struct lwp *l) {
+}
+
+/* SysCtl */
+
+SYSCTL_SETUP(sysctl_sched_setup, "sysctl kern.sched subtree setup") {
+	sysctl_createv(clog, 0, NULL, NULL,
+		CTLFLAG_PERMANENT,
+		CTLTYPE_NODE, "kern", NULL,
+		NULL, 0, NULL, 0,
+		CTL_KERN, CTL_EOL);
+	sysctl_createv(clog, 0, NULL, NULL,
+		CTLFLAG_PERMANENT,
+		CTLTYPE_NODE, "sched",
+		SYSCTL_DESCR("Scheduler options"),
+		NULL, 0, NULL, 0,
+		CTL_KERN, KERN_SCHED, CTL_EOL);
+	sysctl_createv(clog, 0, NULL, NULL,
+		CTLFLAG_PERMANENT,
+		CTLTYPE_INT, "ccpu",
+		SYSCTL_DESCR("Scheduler exponential decay value"),
+		NULL, 0, &ccpu, 0,
+		CTL_KERN, KERN_SCHED, CTL_CREATE, CTL_EOL);
+}
+
 #if defined(DDB)
 void
 sched_print_runqueue(void (*pr)(const char *, ...))
Index: sys/cpu_data.h
===================================================================
RCS file: /cvsroot/src/sys/sys/cpu_data.h,v
retrieving revision 1.7.2.1
diff -u -p -r1.7.2.1 cpu_data.h
--- sys/cpu_data.h	17 Feb 2007 10:31:01 -0000	1.7.2.1
+++ sys/cpu_data.h	4 Mar 2007 03:46:21 -0000
@@ -63,7 +63,8 @@ struct lwp;
 
 struct cpu_data {
 	struct schedstate_percpu cpu_schedstate; /* scheduler state */
-	struct lwp *cpu_idlelwp;	/* idle lwp */
+	struct lwp	*cpu_idlelwp;	/* idle lwp */
+	void		*sched_rq;	/* Scheduler-specific runqueue */
 
 	struct callout * volatile cpu_callout;	/* MP: a callout running */
 
Index: sys/lwp.h
===================================================================
RCS file: /cvsroot/src/sys/sys/lwp.h,v
retrieving revision 1.48.2.4
diff -u -p -r1.48.2.4 lwp.h
--- sys/lwp.h	27 Feb 2007 16:55:15 -0000	1.48.2.4
+++ sys/lwp.h	4 Mar 2007 03:46:21 -0000
@@ -72,6 +72,7 @@ struct	lwp {
 	/* Scheduling and overall state */
 	struct lwp	*l_forw;	/* s: run queue */
 	struct lwp	*l_back;	/* s: run queue */
+	void		*sched_lwp_sd;	/* s: Scheduler-specific structure */
 	struct cpu_info *volatile l_cpu;/* s: CPU we're on if LSONPROC */
 	kmutex_t * volatile l_mutex;	/* l: ptr to mutex on sched state */
 	struct user	*l_addr;	/* l: KVA of u-area (PROC ONLY) */
Index: sys/proc.h
===================================================================
RCS file: /cvsroot/src/sys/sys/proc.h,v
retrieving revision 1.236.2.4
diff -u -p -r1.236.2.4 proc.h
--- sys/proc.h	27 Feb 2007 16:55:16 -0000	1.236.2.4
+++ sys/proc.h	4 Mar 2007 03:46:21 -0000
@@ -266,6 +266,7 @@ struct proc {
 	struct lwp	*p_zomblwp;	/* s: detached LWP to be reaped */
 
 	/* scheduling */
+	void		*sched_proc_sd;	/* s: Scheduler-specific structure */
 	fixpt_t		p_estcpu;	/* t: Time averaged value of p_cpticks XXX belongs in p_startcopy section */
 	fixpt_t		p_estcpu_inherited;
 	unsigned int	p_forktime;
Index: sys/sched.h
===================================================================
RCS file: /cvsroot/src/sys/sys/sched.h,v
retrieving revision 1.30.2.7
diff -u -p -r1.30.2.7 sched.h
--- sys/sched.h	27 Feb 2007 17:20:53 -0000	1.30.2.7
+++ sys/sched.h	4 Mar 2007 03:46:22 -0000
@@ -174,7 +174,7 @@ void sched_clock(struct lwp *); /* Calle
 
 /* Runqueue-related functions */
 inline boolean_t sched_curcpu_runnable_p(void); /* Indicate runnable processes on current CPU */
-struct lwp * sched_nextlwp(void);		/* Select LWP to run on the CPU next */
+struct lwp *sched_nextlwp(struct lwp *, struct lwp *);	/* Select LWP to run on the CPU next */
 void sched_enqueue(struct lwp *);	/* Place a process on its runqueue */
 void sched_dequeue(struct lwp *);	/* Remove a process from its runqueue */
 
@@ -184,8 +184,11 @@ void sched_nice(struct proc *, int);		/*
 /* General helper functions */
 void sched_proc_fork(struct proc *, struct proc *);	/* Inherit scheduling history */
 void sched_proc_exit(struct proc *, struct proc *);	/* Chargeback parents */
-void sched_print_runqueue(void (*pr)(const char *, ...));	/* Print runqueues in DDB */
+void sched_lwp_fork(struct lwp *);
+void sched_lwp_exit(struct lwp *);
+void sched_slept(struct lwp *);
 void sched_setrunnable(struct lwp *);	/* Scheduler-specific actions for setrunnable() */
+void sched_print_runqueue(void (*pr)(const char *, ...));	/* Print runqueues in DDB */
 
 /* Functions common to all scheduler implementations */
 void sched_wakeup(volatile const void *);
Index: sys/sysctl.h
===================================================================
RCS file: /cvsroot/src/sys/sys/sysctl.h,v
retrieving revision 1.166.2.1
diff -u -p -r1.166.2.1 sysctl.h
--- sys/sysctl.h	27 Feb 2007 16:55:18 -0000	1.166.2.1
+++ sys/sysctl.h	4 Mar 2007 03:46:22 -0000
@@ -239,7 +239,7 @@ struct ctlname {
 #define	KERN_PROC2		47	/* struct: process entries */
 #define	KERN_PROC_ARGS		48	/* struct: process argv/env */
 #define	KERN_FSCALE		49	/* int: fixpt FSCALE */
-#define	KERN_CCPU		50	/* int: fixpt ccpu */
+#define	KERN_OLDCCPU		50	/* old: fixpt ccpu */
 #define	KERN_CP_TIME		51	/* struct: CPU time counters */
 #define	KERN_OLDSYSVIPC_INFO	52	/* old: number of valid kern ids */
 #define	KERN_MSGBUF		53	/* kernel message buffer */
@@ -273,6 +273,7 @@ struct ctlname {
 #define	KERN_ARND		81	/* void *buf, size_t siz random */
 #define	KERN_SYSVIPC		82	/* node: SysV IPC parameters */
 #define	KERN_MAXID		83	/* number of valid kern ids */
+#define	KERN_SCHED		84	/* node: Scheduler parameters */
 
 
 #define	CTL_KERN_NAMES { \
@@ -326,7 +327,7 @@ struct ctlname {
 	{ "proc2", CTLTYPE_STRUCT }, \
 	{ "proc_args", CTLTYPE_STRING }, \
 	{ "fscale", CTLTYPE_INT }, \
-	{ "ccpu", CTLTYPE_INT }, \
+	{ 0, 0 }, \
 	{ "cp_time", CTLTYPE_STRUCT }, \
 	{ 0, 0 }, \
 	{ "msgbuf", CTLTYPE_STRUCT }, \



--Multipart=_Mon__5_Mar_2007_02_27_16_+0200_PAZ6qsK=IWB2K5Le--