Subject: Re: Moving scheduler semantics from cpu_switch() to kern_synch.c
To: None <garrett_damore@tadpole.com>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 10/01/2006 20:56:19
--NextPart-20060930211225-0902601
Content-Type: Text/Plain; charset=us-ascii
> > > => idle() (written in C, in MI code) implements idle policy. This is
> > > where we can check for new processes to run, zero free pages, etc. If
> > > there is no "idle time" work to do, then cpu_idle() is called.
> > >
> > > => cpu_idle() does NOT loop! cpu_idle() simply does the truly MD
> > > things that idle would do, e.g. call the HLT instruction or do other
> > > idle-time power saving, etc. Once that special instruction has
> > > finished executing, we know that something has happened (i.e. an
> > > interrupt that may have caused an LWP to become runnable), so we
> > > return back to idle(), which loops around again (thus checking for
> > > runnable LWPs... lather, rinse, repeat).
>
> cpu "wakeup" code and the correcponding part of idle() should be MD.
> otherwise i agree.
>
> > Yes, yes yes! I agree with all of it! So when can we do it? :-)
>
> whenever we have enough volunteers for each of our too many ports.
> (as usual :-)
>
> i think i had some old (pre-lwp i guess) idle thread code for i386
> somewhere in my home dir. i'll try to dig it up unless anyone has
> newer one.
i found and ported it to -current. (attached)
YAMAMOTO Takashi
--NextPart-20060930211225-0902601
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="i.diff"
Index: conf/files
===================================================================
--- conf/files (revision 1799)
+++ conf/files (working copy)
@@ -1283,6 +1283,7 @@ file kern/kern_event.c
file kern/kern_exec.c
file kern/kern_exit.c
file kern/kern_fork.c
+file kern/kern_idle.c
file kern/kern_kcont.c kcont
file kern/kern_kthread.c
file kern/kern_ktrace.c
Index: kern/kern_synch.c
===================================================================
--- kern/kern_synch.c (revision 1794)
+++ kern/kern_synch.c (working copy)
@@ -127,8 +127,8 @@ int rrticks; /* number of hardclock tic
/*
* The global scheduler state.
*/
-struct prochd sched_qs[RUNQUE_NQS]; /* run queues */
-volatile uint32_t sched_whichqs; /* bitmap of non-empty queues */
+static struct prochd sched_qs[RUNQUE_NQS]; /* run queues */
+volatile static uint32_t sched_whichqs; /* bitmap of non-empty queues */
static struct slpque sched_slpque[SLPQUE_TABLESIZE]; /* sleep queues */
struct simplelock sched_lock = SIMPLELOCK_INITIALIZER;
@@ -156,7 +156,7 @@ roundrobin(struct cpu_info *ci)
spc->spc_rrticks = rrticks;
- if (curlwp != NULL) {
+ if (!CURCPU_IDLE_P()) {
if (spc->spc_flags & SPCF_SEENRR) {
/*
* The process has already been through a roundrobin
@@ -327,6 +327,8 @@ schedcpu(void *arg)
*/
minslp = 2;
LIST_FOREACH(l, &p->p_lwps, l_sibling) {
+ if ((l->l_flag & L_IDLE) != 0)
+ continue;
l->l_swtime++;
if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
l->l_stat == LSSUSPENDED) {
@@ -363,6 +365,8 @@ schedcpu(void *arg)
LIST_FOREACH(l, &p->p_lwps, l_sibling) {
if (l->l_slptime > 1)
continue;
+ if ((l->l_flag & L_IDLE) != 0)
+ continue;
resetpriority(l);
if (l->l_priority >= PUSER) {
if (l->l_stat == LSRUN &&
@@ -435,7 +439,7 @@ ltsleep(volatile const void *ident, int
volatile struct simplelock *interlock)
{
struct lwp *l = curlwp;
- struct proc *p = l ? l->l_proc : NULL;
+ struct proc *p = l->l_proc;
struct slpque *qp;
struct sadata_upcall *sau;
int sig, s;
@@ -451,7 +455,7 @@ ltsleep(volatile const void *ident, int
* in the shutdown case is disgusting but partly necessary given
* how shutdown (barely) works.
*/
- if (cold || (doing_shutdown && (panicstr || (l == NULL)))) {
+ if (cold || (doing_shutdown && (panicstr || CURCPU_IDLE_P()))) {
/*
* After a panic, or during autoconfiguration,
* just give interrupts a chance, then just return;
@@ -727,18 +731,11 @@ awaken(struct lwp *l)
#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
void
-sched_unlock_idle(void)
+sched_unlock(void)
{
simple_unlock(&sched_lock);
}
-
-void
-sched_lock_idle(void)
-{
-
- simple_lock(&sched_lock);
-}
#endif /* MULTIPROCESSOR || LOCKDEBUG */
/*
@@ -889,6 +886,7 @@ preempt(int more)
struct lwp *l = curlwp;
int r, s;
+ KASSERT(!CURCPU_IDLE_P());
SCHED_LOCK(s);
l->l_priority = l->l_usrpri;
l->l_stat = LSRUN;
@@ -901,6 +899,13 @@ preempt(int more)
sa_preempt(l);
}
+boolean_t
+sched_curcpu_runnable_p(void)
+{
+
+ return sched_whichqs != 0;
+}
+
/*
* The machine independent parts of context switch.
* Must be called at splsched() (no higher!) and with
@@ -940,47 +945,49 @@ mi_switch(struct lwp *l, struct lwp *new
simple_lock_switchcheck();
#endif
- /*
- * Compute the amount of time during which the current
- * process was running.
- */
- microtime(&tv);
- u = p->p_rtime.tv_usec +
- (tv.tv_usec - spc->spc_runtime.tv_usec);
- s = p->p_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
- if (u < 0) {
- u += 1000000;
- s--;
- } else if (u >= 1000000) {
- u -= 1000000;
- s++;
- }
- p->p_rtime.tv_usec = u;
- p->p_rtime.tv_sec = s;
+ if ((l->l_flag & L_IDLE) == 0) {
+ /*
+ * Compute the amount of time during which the current
+ * process was running.
+ */
+ microtime(&tv);
+ u = p->p_rtime.tv_usec +
+ (tv.tv_usec - spc->spc_runtime.tv_usec);
+ s = p->p_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
+ if (u < 0) {
+ u += 1000000;
+ s--;
+ } else if (u >= 1000000) {
+ u -= 1000000;
+ s++;
+ }
+ p->p_rtime.tv_usec = u;
+ p->p_rtime.tv_sec = s;
- /*
- * Check if the process exceeds its CPU resource allocation.
- * If over max, kill it. In any case, if it has run for more
- * than 10 minutes, reduce priority to give others a chance.
- */
- rlim = &p->p_rlimit[RLIMIT_CPU];
- if (s >= rlim->rlim_cur) {
/*
- * XXXSMP: we're inside the scheduler lock perimeter;
- * use sched_psignal.
+ * Check if the process exceeds its CPU resource allocation.
+ * If over max, kill it. In any case, if it has run for more
+ * than 10 minutes, reduce priority to give others a chance.
*/
- if (s >= rlim->rlim_max)
- sched_psignal(p, SIGKILL);
- else {
- sched_psignal(p, SIGXCPU);
- if (rlim->rlim_cur < rlim->rlim_max)
- rlim->rlim_cur += 5;
+ rlim = &p->p_rlimit[RLIMIT_CPU];
+ if (s >= rlim->rlim_cur) {
+ /*
+ * XXXSMP: we're inside the scheduler lock perimeter;
+ * use sched_psignal.
+ */
+ if (s >= rlim->rlim_max)
+ sched_psignal(p, SIGKILL);
+ else {
+ sched_psignal(p, SIGXCPU);
+ if (rlim->rlim_cur < rlim->rlim_max)
+ rlim->rlim_cur += 5;
+ }
+ }
+ if (autonicetime && s > autonicetime &&
+ kauth_cred_geteuid(p->p_cred) && p->p_nice == NZERO) {
+ p->p_nice = autoniceval + NZERO;
+ resetpriority(l);
}
- }
- if (autonicetime && s > autonicetime &&
- kauth_cred_geteuid(p->p_cred) && p->p_nice == NZERO) {
- p->p_nice = autoniceval + NZERO;
- resetpriority(l);
}
/*
@@ -1006,14 +1013,30 @@ mi_switch(struct lwp *l, struct lwp *new
* Switch to the new current process. When we
* run again, we'll return back here.
*/
- uvmexp.swtch++;
if (newl == NULL) {
- retval = cpu_switch(l, NULL);
- } else {
+ newl = nextrunqueue();
+ }
+ if (newl != NULL) {
remrunqueue(newl);
+ } else {
+ newl = l->l_cpu->ci_data.cpu_idlelwp;
+ KASSERT(newl != NULL);
+ }
+ newl->l_stat = LSONPROC;
+ if (l != newl) {
+ uvmexp.swtch++;
+ pmap_deactivate(l);
+ curlwp = newl;
+ newl->l_cpu = l->l_cpu;
cpu_switchto(l, newl);
+ KASSERT(curlwp == l);
+ pmap_activate(l);
+ retval = 1;
+ } else {
+ sched_unlock();
retval = 0;
}
+ spl0();
/*
* If we are using h/w performance counters, restore context.
@@ -1037,7 +1060,9 @@ mi_switch(struct lwp *l, struct lwp *new
*/
KDASSERT(l->l_cpu != NULL);
KDASSERT(l->l_cpu == curcpu());
- microtime(&l->l_cpu->ci_schedstate.spc_runtime);
+ if ((l->l_flag & L_IDLE) == 0) {
+ microtime(&l->l_cpu->ci_schedstate.spc_runtime);
+ }
/*
* Reacquire the kernel_lock now. We do this after we've
@@ -1045,6 +1070,7 @@ mi_switch(struct lwp *l, struct lwp *new
* we reacquire the interlock.
*/
KERNEL_LOCK_ACQUIRE_COUNT(hold_count);
+ (void)splsched(); /* XXX */
return retval;
}
@@ -1107,6 +1133,7 @@ setrunnable(struct lwp *l)
SCHED_ASSERT_LOCKED();
+ KASSERT((l->l_flag & L_IDLE) == 0);
switch (l->l_stat) {
case 0:
case LSRUN:
@@ -1205,6 +1232,7 @@ schedclock(struct lwp *l)
struct proc *p = l->l_proc;
int s;
+ KASSERT(!CURCPU_IDLE_P());
p->p_estcpu = ESTCPULIM(p->p_estcpu + (1 << ESTCPU_SHIFT));
SCHED_LOCK(s);
resetpriority(l);
@@ -1288,13 +1316,6 @@ scheduler_wait_hook(struct proc *parent,
}
/*
- * Low-level routines to access the run queue. Optimised assembler
- * routines can override these.
- */
-
-#ifndef __HAVE_MD_RUNQUEUE
-
-/*
* On some architectures, it's faster to use a MSB ordering for the priorites
* than the traditional LSB ordering.
*/
@@ -1305,6 +1326,13 @@ scheduler_wait_hook(struct proc *parent,
#endif
/*
+ * Low-level routines to access the run queue. Optimised assembler
+ * routines can override these.
+ */
+
+#ifndef __HAVE_MD_RUNQUEUE
+
+/*
* The primitives that manipulate the run queues. whichqs tells which
* of the 32 queues qs have processes in them. Setrunqueue puts processes
* into queues, remrunqueue removes them from queues. The running process is
@@ -1424,5 +1452,57 @@ remrunqueue(struct lwp *l)
#endif
}
-#undef RQMASK
+struct lwp *
+nextrunqueue(void)
+{
+ const struct prochd *rq;
+ struct lwp *l;
+ int whichq;
+
+ if (sched_whichqs == 0) {
+ return NULL;
+ }
+#ifdef __HAVE_BIGENDIAN_BITOPS
+ for (whichq = 0; ; whichq++) {
+ if ((schqd_whichqs & RQMASK(whichq)) != 0) {
+ break;
+ }
+ }
+#else
+ whichq = ffs(sched_whichqs) - 1;
+#endif
+ rq = &sched_qs[whichq];
+ l = rq->ph_link;
+ return l;
+}
+
#endif /* !defined(__HAVE_MD_RUNQUEUE) */
+
+#if defined(DDB)
+void
+sched_print_runqueue(void (*pr)(const char *, ...))
+{
+ struct prochd *ph;
+ struct lwp *l;
+ int i, first;
+
+ for (i = 0; i < RUNQUE_NQS; i++)
+ {
+ first = 1;
+ ph = &sched_qs[i];
+ for (l = ph->ph_link; l != (void *)ph; l = l->l_forw) {
+ if (first) {
+ (*pr)("%c%d",
+ (sched_whichqs & RQMASK(i))
+ ? ' ' : '!', i);
+ first = 0;
+ }
+ (*pr)("\t%d.%d (%s) pri=%d usrpri=%d\n",
+ l->l_proc->p_pid,
+ l->l_lid, l->l_proc->p_comm,
+ (int)l->l_priority, (int)l->l_usrpri);
+ }
+ }
+}
+#endif /* defined(DDB) */
+#undef RQMASK
Index: kern/init_main.c
===================================================================
--- kern/init_main.c (revision 1787)
+++ kern/init_main.c (working copy)
@@ -92,6 +92,7 @@ __KERNEL_RCSID(0, "$NetBSD: init_main.c,
#include <sys/file.h>
#include <sys/errno.h>
#include <sys/callout.h>
+#include <sys/idle.h>
#include <sys/kernel.h>
#include <sys/kcont.h>
#include <sys/kmem.h>
@@ -278,6 +279,9 @@ main(void)
/* Create process 0 (the swapper). */
proc0_init();
+ error = create_idle_lwp(curcpu());
+ KASSERT(error == 0);
+
/*
* Charge root for one process.
*/
Index: kern/kern_clock.c
===================================================================
--- kern/kern_clock.c (revision 1785)
+++ kern/kern_clock.c (working copy)
@@ -518,7 +518,7 @@ hardclock(struct clockframe *frame)
#endif /* __HAVE_TIMECOUNTER */
l = curlwp;
- if (l) {
+ if (!CURCPU_IDLE_P()) {
p = l->l_proc;
/*
* Run current process's virtual and profile time, as needed.
@@ -1221,7 +1221,7 @@ statclock(struct clockframe *frame)
if (p != NULL)
p->p_iticks++;
spc->spc_cp_time[CP_INTR]++;
- } else if (p != NULL) {
+ } else if (!CURCPU_IDLE_P()) {
p->p_sticks++;
spc->spc_cp_time[CP_SYS]++;
} else
@@ -1229,7 +1229,7 @@ statclock(struct clockframe *frame)
}
spc->spc_pscnt = psdiv;
- if (p != NULL) {
+ if (p != NULL && !CURCPU_IDLE_P()) {
++p->p_cpticks;
/*
* If no separate schedclock is provided, call it here
Index: kern/kern_lwp.c
===================================================================
--- kern/kern_lwp.c (revision 1770)
+++ kern/kern_lwp.c (working copy)
@@ -587,13 +587,13 @@ lwp_exit(struct lwp *l)
pmap_deactivate(l);
+ curlwp = curcpu()->ci_data.cpu_idlelwp;
if (l->l_flag & L_DETACHED) {
simple_lock(&p->p_lock);
LIST_REMOVE(l, l_sibling);
p->p_nlwps--;
simple_unlock(&p->p_lock);
- curlwp = NULL;
l->l_proc = NULL;
}
@@ -605,8 +605,20 @@ lwp_exit(struct lwp *l)
/* This LWP no longer needs to hold the kernel lock. */
KERNEL_PROC_UNLOCK(l);
- /* cpu_exit() will not return */
- cpu_exit(l);
+ lwp_exit_switchaway(l);
+}
+
+void
+lwp_exit_switchaway(struct lwp *l)
+{
+ struct cpu_info *ci;
+
+ uvmexp.swtch++;
+ ci = curcpu();
+ KASSERT(ci->ci_data.cpu_exitinglwp == NULL);
+ KASSERT(CURCPU_IDLE_P());
+ ci->ci_data.cpu_exitinglwp = l;
+ cpu_switchto(NULL, ci->ci_data.cpu_idlelwp);
}
/*
@@ -668,6 +680,9 @@ proc_representative_lwp(struct proc *p)
onproc = running = sleeping = stopped = suspended = NULL;
signalled = NULL;
LIST_FOREACH(l, &p->p_lwps, l_sibling) {
+ if ((l->l_flag & L_IDLE) != 0) {
+ continue;
+ }
if (l->l_lid == p->p_sigctx.ps_lwp)
signalled = l;
switch (l->l_stat) {
Index: kern/kern_proc.c
===================================================================
--- kern/kern_proc.c (revision 1747)
+++ kern/kern_proc.c (working copy)
@@ -319,6 +319,7 @@ proc0_init(void)
l->l_flag = L_INMEM;
l->l_stat = LSONPROC;
p->p_nrlwps = 1;
+ p->p_nlwpid = l->l_lid;
callout_init(&l->l_tsleep_ch);
Index: kern/kern_exit.c
===================================================================
--- kern/kern_exit.c (revision 1799)
+++ kern/kern_exit.c (working copy)
@@ -497,7 +497,7 @@ exit1(struct lwp *l, int rv)
*
* Other substructures are freed from wait().
*/
- curlwp = NULL;
+ curlwp = curcpu()->ci_data.cpu_idlelwp;
/* Delay release until after dropping the proclist lock */
plim = p->p_limit;
@@ -537,26 +537,12 @@ exit1(struct lwp *l, int rv)
/* Release cached credentials. */
kauth_cred_free(l->l_cred);
-#ifdef DEBUG
- /* Nothing should use the process link anymore */
l->l_proc = NULL;
-#endif
/* This process no longer needs to hold the kernel lock. */
KERNEL_PROC_UNLOCK(l);
- /*
- * Finally, call machine-dependent code to switch to a new
- * context (possibly the idle context). Once we are no longer
- * using the dead lwp's stack, lwp_exit2() will be called
- * to arrange for the resources to be released.
- *
- * Note that cpu_exit() will end with a call equivalent to
- * cpu_switch(), finishing our execution (pun intended).
- */
-
- uvmexp.swtch++;
- cpu_exit(l);
+ lwp_exit_switchaway(l);
}
void
Index: kern/kern_lock.c
===================================================================
--- kern/kern_lock.c (revision 1799)
+++ kern/kern_lock.c (working copy)
@@ -1424,8 +1424,8 @@ void
assert_sleepable(struct simplelock *interlock, const char *msg)
{
- if (curlwp == NULL) {
- panic("assert_sleepable: NULL curlwp");
+ if (CURCPU_IDLE_P()) {
+ panic("assert_sleepable: idle");
}
spinlock_switchcheck();
simple_lock_only_held(interlock, msg);
Index: uvm/uvm_page.c
===================================================================
--- uvm/uvm_page.c (revision 1799)
+++ uvm/uvm_page.c (working copy)
@@ -1523,8 +1523,7 @@ uvm_page_own(struct vm_page *pg, const c
*
* => try to complete one color bucket at a time, to reduce our impact
* on the CPU cache.
- * => we loop until we either reach the target or whichqs indicates that
- * there is a process ready to run.
+ * => we loop until we either reach the target or there is a lwp ready to run.
*/
void
uvm_pageidlezero(void)
@@ -1538,8 +1537,9 @@ uvm_pageidlezero(void)
s = uvm_lock_fpageq();
firstbucket = nextbucket;
do {
- if (sched_whichqs != 0)
+ if (sched_curcpu_runnable_p()) {
goto quit;
+ }
if (uvmexp.zeropages >= UVM_PAGEZERO_TARGET) {
uvm.page_idle_zero = FALSE;
goto quit;
@@ -1548,7 +1548,7 @@ uvm_pageidlezero(void)
pgfl = &uvm.page_free[free_list];
while ((pg = TAILQ_FIRST(&pgfl->pgfl_buckets[
nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) {
- if (sched_whichqs != 0)
+ if (sched_curcpu_runnable_p())
goto quit;
TAILQ_REMOVE(&pgfl->pgfl_buckets[
Index: arch/i386/include/frame.h
===================================================================
--- arch/i386/include/frame.h (revision 1485)
+++ arch/i386/include/frame.h (working copy)
@@ -139,7 +139,7 @@ struct intrframe {
};
/*
- * Stack frame inside cpu_switch()
+ * Stack frame inside cpu_switchto()
*/
struct switchframe {
int sf_edi;
Index: arch/i386/include/cpu.h
===================================================================
--- arch/i386/include/cpu.h (revision 1785)
+++ arch/i386/include/cpu.h (working copy)
@@ -101,10 +101,6 @@ struct cpu_info {
#define TLBSTATE_LAZY 1 /* tlbs are valid but won't be kept uptodate */
#define TLBSTATE_STALE 2 /* we might have stale user tlbs */
- struct pcb *ci_curpcb; /* VA of current HW PCB */
- struct pcb *ci_idle_pcb; /* VA of current PCB */
- int ci_idle_tss_sel; /* TSS selector of idle PCB */
-
struct intrsource *ci_isources[MAX_INTR_SOURCES];
uint32_t ci_ipending;
int ci_ilevel;
@@ -214,7 +210,7 @@ curcpu()
extern struct cpu_info *cpu_info[X86_MAXPROCS];
void cpu_boot_secondary_processors(void);
-void cpu_init_idle_pcbs(void);
+void cpu_init_idle_lwps(void);
/*
* Preempt the current process if in interrupt from user mode,
@@ -242,7 +238,7 @@ extern void need_resched(struct cpu_info
do { \
struct cpu_info *__ci = (ci); \
__ci->ci_want_resched = 1; \
- if (__ci->ci_curlwp != NULL) \
+ if (__ci->ci_curlwp != __ci->ci_data.cpu_idlelwp) \
aston(__ci->ci_curlwp->l_proc); \
} while (/*CONSTCOND*/0)
@@ -252,8 +248,8 @@ do { \
extern uint32_t cpus_attached;
-#define curpcb curcpu()->ci_curpcb
#define curlwp curcpu()->ci_curlwp
+#define curpcb &curlwp->l_addr->u_pcb
/*
* Arguments to hardclock, softclock and statclock
@@ -348,7 +344,6 @@ extern int i386_has_sse2;
void dumpconf(void);
int cpu_maxproc(void);
void cpu_reset(void);
-void i386_init_pcb_tss_ldt(struct cpu_info *);
void i386_proc0_tss_ldt_init(void);
/* identcpu.c */
Index: arch/i386/conf/files.i386
===================================================================
--- arch/i386/conf/files.i386 (revision 1785)
+++ arch/i386/conf/files.i386 (working copy)
@@ -73,6 +73,7 @@ file arch/i386/i386/db_memrw.c ddb | kgd
file arch/i386/i386/db_trace.c ddb
file kern/subr_disk_mbr.c disk
file arch/i386/i386/gdt.c
+file arch/i386/i386/idle_machdep.c
file arch/i386/i386/in_cksum.S inet | inet6
file arch/i386/i386/ipkdb_glue.c ipkdb
file arch/i386/i386/kgdb_machdep.c kgdb
Index: arch/i386/i386/copy.S
===================================================================
--- arch/i386/i386/copy.S (revision 1464)
+++ arch/i386/i386/copy.S (working copy)
@@ -80,7 +80,9 @@
#include <machine/frameasm.h>
#include <machine/cputypes.h>
-#define GET_CURPCB(reg) movl CPUVAR(CURPCB),reg
+#define GET_CURPCB(reg) \
+ movl CPUVAR(CURLWP), reg; \
+ movl L_ADDR(reg), reg
/*
* The following primitives are used to fill and copy regions of memory.
Index: arch/i386/i386/machdep.c
===================================================================
--- arch/i386/i386/machdep.c (revision 1799)
+++ arch/i386/i386/machdep.c (working copy)
@@ -458,6 +458,9 @@ cpu_startup()
/* Safe for i/o port / memory space allocation to use malloc now. */
x86_bus_space_mallocok();
+
+ gdt_init();
+ i386_proc0_tss_ldt_init();
}
/*
@@ -466,13 +469,12 @@ cpu_startup()
void
i386_proc0_tss_ldt_init()
{
+ struct lwp *l;
struct pcb *pcb;
int x;
- gdt_init();
-
- cpu_info_primary.ci_curpcb = pcb = &lwp0.l_addr->u_pcb;
-
+ l = &lwp0;
+ pcb = &l->l_addr->u_pcb;
pcb->pcb_tss.tss_ioopt =
((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss) << 16;
@@ -482,36 +484,15 @@ i386_proc0_tss_ldt_init()
pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
pcb->pcb_cr0 = rcr0();
pcb->pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
- pcb->pcb_tss.tss_esp0 = USER_TO_UAREA(lwp0.l_addr) + KSTACK_SIZE - 16;
- lwp0.l_md.md_regs = (struct trapframe *)pcb->pcb_tss.tss_esp0 - 1;
- lwp0.l_md.md_tss_sel = tss_alloc(pcb);
+ pcb->pcb_tss.tss_esp0 = USER_TO_UAREA(l->l_addr) + KSTACK_SIZE - 16;
+ l->l_md.md_regs = (struct trapframe *)pcb->pcb_tss.tss_esp0 - 1;
+ l->l_md.md_tss_sel = tss_alloc(pcb);
- ltr(lwp0.l_md.md_tss_sel);
+ ltr(l->l_md.md_tss_sel);
lldt(pcb->pcb_ldt_sel);
}
/*
- * Set up TSS and LDT for a new PCB.
- */
-
-void
-i386_init_pcb_tss_ldt(struct cpu_info *ci)
-{
- int x;
- struct pcb *pcb = ci->ci_idle_pcb;
-
- pcb->pcb_tss.tss_ioopt =
- ((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss) << 16;
- for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++)
- pcb->pcb_iomap[x] = 0xffffffff;
-
- pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
- pcb->pcb_cr0 = rcr0();
-
- ci->ci_idle_tss_sel = tss_alloc(pcb);
-}
-
-/*
* sysctl helper routine for machdep.tm* nodes.
*/
static int
@@ -1483,7 +1464,6 @@ init386(paddr_t first_avail)
proc0paddr = UAREA_TO_USER(proc0uarea);
lwp0.l_addr = proc0paddr;
- cpu_info_primary.ci_curpcb = &lwp0.l_addr->u_pcb;
x86_bus_space_init();
consinit(); /* XXX SHOULD NOT BE DONE HERE */
@@ -2397,7 +2377,7 @@ need_resched(struct cpu_info *ci)
return;
ci->ci_want_resched = 1;
- if ((ci)->ci_curlwp != NULL)
+ if (ci->ci_curlwp != ci->ci_data.cpu_idlelwp)
aston((ci)->ci_curlwp->l_proc);
else if (ci != curcpu())
x86_send_ipi(ci, 0);
Index: arch/i386/i386/autoconf.c
===================================================================
--- arch/i386/i386/autoconf.c (revision 1664)
+++ arch/i386/i386/autoconf.c (working copy)
@@ -107,8 +107,6 @@ cpu_configure(void)
pcibios_init();
#endif
- /* kvm86 needs a TSS */
- i386_proc0_tss_ldt_init();
#ifdef KVM86
kvm86_init();
#endif
@@ -128,7 +126,7 @@ cpu_configure(void)
lwp0.l_addr->u_pcb.pcb_cr0 = rcr0();
#ifdef MULTIPROCESSOR
/* propagate this to the idle pcb's. */
- cpu_init_idle_pcbs();
+ cpu_init_idle_lwps();
#endif
#if defined(I586_CPU) || defined(I686_CPU)
Index: arch/i386/i386/cpu.c
===================================================================
--- arch/i386/i386/cpu.c (revision 1785)
+++ arch/i386/i386/cpu.c (working copy)
@@ -87,6 +87,7 @@ __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.29
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/malloc.h>
+#include <sys/idle.h>
#include <uvm/uvm_extern.h>
@@ -251,8 +252,6 @@ cpu_attach(parent, self, aux)
struct cpu_info *ci;
#if defined(MULTIPROCESSOR)
int cpunum = caa->cpu_number;
- vaddr_t kstack;
- struct pcb *pcb;
#endif
/*
@@ -304,30 +303,22 @@ cpu_attach(parent, self, aux)
#if defined(MULTIPROCESSOR)
/*
- * Allocate UPAGES contiguous pages for the idle PCB and stack.
+ * primary cpu has its idle lwp already allocated by init_main.
*/
- kstack = uvm_km_alloc(kernel_map, USPACE, 0, UVM_KMF_WIRED);
- if (kstack == 0) {
- if (caa->cpu_role != CPU_ROLE_AP) {
- printf("\n");
- panic("cpu_attach: unable to allocate idle stack for"
- " primary");
+
+ if (caa->cpu_role == CPU_ROLE_AP) {
+ int error;
+
+ error = create_idle_lwp(ci);
+ if (error != 0) {
+ aprint_normal("\n");
+ aprint_error("%s: unable to allocate idle lwp\n",
+ sc->sc_dev.dv_xname);
+ return;
}
- aprint_normal("\n");
- aprint_error("%s: unable to allocate idle stack\n",
- sc->sc_dev.dv_xname);
- return;
+ } else {
+ KASSERT(ci->ci_data.cpu_idlelwp != NULL);
}
- pcb = ci->ci_idle_pcb = (struct pcb *) kstack;
- memset(pcb, 0, USPACE);
-
- pcb->pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
- pcb->pcb_tss.tss_esp0 =
- kstack + USPACE - 16 - sizeof (struct trapframe);
- pcb->pcb_tss.tss_esp =
- kstack + USPACE - 16 - sizeof (struct trapframe);
- pcb->pcb_cr0 = rcr0();
- pcb->pcb_cr3 = pmap_kernel()->pm_pdirpa;
#endif
pmap_reference(pmap_kernel());
ci->ci_pmap = pmap_kernel();
@@ -397,10 +388,10 @@ cpu_attach(parent, self, aux)
#if defined(MULTIPROCESSOR)
if (mp_verbose) {
- aprint_verbose("%s: kstack at 0x%lx for %d bytes\n",
- sc->sc_dev.dv_xname, kstack, USPACE);
- aprint_verbose("%s: idle pcb at %p, idle sp at 0x%x\n",
- sc->sc_dev.dv_xname, pcb, pcb->pcb_esp);
+ struct lwp *l = ci->ci_data.cpu_idlelwp;
+
+ aprint_verbose("%s: idle lwp at %p, idle sp at 0x%x\n",
+ sc->sc_dev.dv_xname, l, l->l_addr->u_pcb.pcb_esp);
}
#endif
}
@@ -493,7 +484,7 @@ cpu_boot_secondary_processors()
ci = cpu_info[i];
if (ci == NULL)
continue;
- if (ci->ci_idle_pcb == NULL)
+ if (ci->ci_data.cpu_idlelwp == NULL)
continue;
if ((ci->ci_flags & CPUF_PRESENT) == 0)
continue;
@@ -503,21 +494,30 @@ cpu_boot_secondary_processors()
}
}
+static void
+cpu_init_idle_lwp(struct cpu_info *ci)
+{
+ struct lwp *l = ci->ci_data.cpu_idlelwp;
+ struct pcb *pcb = &l->l_addr->u_pcb;
+
+ pcb->pcb_cr0 = rcr0();
+}
+
void
-cpu_init_idle_pcbs()
+cpu_init_idle_lwps()
{
struct cpu_info *ci;
u_long i;
- for (i=0; i < X86_MAXPROCS; i++) {
+ for (i = 0; i < X86_MAXPROCS; i++) {
ci = cpu_info[i];
if (ci == NULL)
continue;
- if (ci->ci_idle_pcb == NULL)
+ if (ci->ci_data.cpu_idlelwp == NULL)
continue;
if ((ci->ci_flags & CPUF_PRESENT) == 0)
continue;
- i386_init_pcb_tss_ldt(ci);
+ cpu_init_idle_lwp(ci);
}
}
@@ -525,19 +525,17 @@ void
cpu_start_secondary(ci)
struct cpu_info *ci;
{
- struct pcb *pcb;
int i;
struct pmap *kpm = pmap_kernel();
extern uint32_t mp_pdirpa;
mp_pdirpa = kpm->pm_pdirpa; /* XXX move elsewhere, not per CPU. */
- pcb = ci->ci_idle_pcb;
-
ci->ci_flags |= CPUF_AP;
aprint_normal("%s: starting\n", ci->ci_dev->dv_xname);
+ ci->ci_curlwp = ci->ci_data.cpu_idlelwp;
CPU_STARTUP(ci);
/*
@@ -613,7 +611,7 @@ cpu_hatch(void *v)
panic("%s: already running!?", ci->ci_dev->dv_xname);
#endif
- lcr0(ci->ci_idle_pcb->pcb_cr0);
+ lcr0(ci->ci_data.cpu_idlelwp->l_addr->u_pcb.pcb_cr0);
cpu_init_idt();
lapic_set_lvt();
gdt_init_cpu(ci);
Index: arch/i386/i386/pmap.c
===================================================================
--- arch/i386/i386/pmap.c (revision 1638)
+++ arch/i386/i386/pmap.c (working copy)
@@ -1757,7 +1757,7 @@ pmap_ldt_cleanup(l)
ldt_free(pmap);
pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
- if (pcb == curpcb)
+ if (l == curlwp)
lldt(pcb->pcb_ldt_sel);
old_ldt = pmap->pm_ldt;
len = pmap->pm_ldt_len * sizeof(union descriptor);
@@ -1889,8 +1889,7 @@ pmap_load()
KASSERT(pmap != pmap_kernel());
oldpmap = ci->ci_pmap;
- pcb = ci->ci_curpcb;
- KASSERT(pcb == &l->l_addr->u_pcb);
+ pcb = &l->l_addr->u_pcb;
/* loaded by pmap_activate */
KASSERT(pcb->pcb_ldt_sel == pmap->pm_ldt_sel);
@@ -2159,7 +2158,7 @@ pmap_pageidlezero(pa)
pmap_update_pg((vaddr_t)zerova); /* flush TLB */
for (ptr = (int *) zerova, ep = ptr + PAGE_SIZE / sizeof(int);
ptr < ep; ptr++) {
- if (sched_whichqs != 0) {
+ if (sched_curcpu_runnable_p()) {
/*
* A process has become ready. Abort now,
Index: arch/i386/i386/sys_machdep.c
===================================================================
--- arch/i386/i386/sys_machdep.c (revision 1799)
+++ arch/i386/i386/sys_machdep.c (working copy)
@@ -325,9 +325,7 @@ i386_set_ldt(l, args, retval)
pmap->pm_flags |= PMF_USER_LDT;
ldt_alloc(pmap, new_ldt, new_len);
pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
- if (pcb == curpcb)
- lldt(pcb->pcb_ldt_sel);
-
+ lldt(pcb->pcb_ldt_sel);
}
copy:
/* Now actually replace the descriptors. */
Index: arch/i386/i386/mptramp.S
===================================================================
--- arch/i386/i386/mptramp.S (revision 1464)
+++ arch/i386/i386/mptramp.S (working copy)
@@ -221,7 +221,8 @@ _C_LABEL(cpu_spinup_trampoline_end): #en
mp_cont:
HALT(0x15)
- movl CPU_INFO_IDLE_PCB(%ecx),%esi
+ movl CPU_INFO_IDLELWP(%ecx),%esi
+ movl L_ADDR(%esi),%esi
# %esi now points at our PCB.
@@ -250,8 +251,7 @@ mp_cont:
pushl %ecx
call _C_LABEL(cpu_hatch)
HALT(0x33)
- xorl %esi,%esi
- jmp _C_LABEL(mpidle)
+ jmp _C_LABEL(idle_loop)
.data
_C_LABEL(mp_pdirpa):
Index: arch/i386/i386/locore.S
===================================================================
--- arch/i386/i386/locore.S (revision 1747)
+++ arch/i386/i386/locore.S (working copy)
@@ -107,24 +107,6 @@
#include <machine/asm.h>
-#if defined(MULTIPROCESSOR)
-
-#define SET_CURLWP(lwp,cpu) \
- movl CPUVAR(SELF),cpu ; \
- movl lwp,CPUVAR(CURLWP) ; \
- movl cpu,L_CPU(lwp)
-
-#else
-
-#define SET_CURLWP(lwp,tcpu) movl lwp,CPUVAR(CURLWP)
-#define GET_CURLWP(reg) movl CPUVAR(CURLWP),reg
-
-#endif
-
-#define SET_CURPCB(reg) movl reg,CPUVAR(CURPCB)
-
-#define CLEAR_RESCHED(reg) movl reg,CPUVAR(RESCHED)
-
/* XXX temporary kluge; these should not be here */
/* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */
#include <dev/isa/isareg.h>
@@ -657,10 +639,13 @@ begin:
*/
/* LINTSTUB: Func: void proc_trampoline(void) */
NENTRY(proc_trampoline)
+ movl $IPL_NONE,CPUVAR(ILEVEL)
+ pushl CPUVAR(CURLWP)
+ call _C_LABEL(pmap_activate)
+ addl $4,%esp
#ifdef MULTIPROCESSOR
call _C_LABEL(proc_trampoline_mp)
#endif
- movl $IPL_NONE,CPUVAR(ILEVEL)
pushl %ebx
call *%esi
addl $4,%esp
@@ -761,7 +746,6 @@ ENTRY(longjmp)
/*****************************************************************************/
- .globl _C_LABEL(sched_whichqs),_C_LABEL(sched_qs)
.globl _C_LABEL(uvmexp),_C_LABEL(panic)
#ifdef DIAGNOSTIC
@@ -773,200 +757,27 @@ NENTRY(switch_error)
#endif /* DIAGNOSTIC */
/*
- * void cpu_switch(struct lwp *)
- * Find a runnable lwp and switch to it. Wait if necessary. If the new
- * lwp is the same as the old one, we short-circuit the context save and
- * restore.
+ * void cpu_switchto(struct lwp *oldlwp, struct newlwp)
+ *
+ * 1. if (oldlwp != NULL), save its context and call sched_unlock().
+ * 2. then, restore context of newlwp.
*
* Note that the stack frame layout is known to "struct switchframe"
- * in <machine/frame.h> and to the code in cpu_fork() which initializes
+ * in <machine/frame.h> and to the code in cpu_lwp_fork() which initializes
* it for a new lwp.
*/
-ENTRY(cpu_switch)
+
+/*
+ * void cpu_switchto(struct lwp *current, struct lwp *next)
+ * Switch to the specified next LWP.
+ */
+ENTRY(cpu_switchto)
pushl %ebx
pushl %esi
pushl %edi
-#ifdef DEBUG
- cmpl $IPL_SCHED,CPUVAR(ILEVEL)
- jae 1f
- pushl $2f
- call _C_LABEL(panic)
- /* NOTREACHED */
-2: .asciz "not splsched() in cpu_switch!"
-1:
-#endif /* DEBUG */
-
movl 16(%esp),%esi # current
-
- /*
- * Clear curlwp so that we don't accumulate system time while idle.
- * This also insures that schedcpu() will move the old lwp to
- * the correct queue if it happens to get called from the spllower()
- * below and changes the priority. (See corresponding comment in
- * userret()).
- */
- movl $0,CPUVAR(CURLWP)
- /*
- * First phase: find new lwp.
- *
- * Registers:
- * %eax - queue head, scratch, then zero
- * %ebx - queue number
- * %ecx - cached value of whichqs
- * %edx - next lwp in queue
- * %esi - old lwp
- * %edi - new lwp
- */
-
- /* Look for new lwp. */
- cli # splhigh doesn't do a cli
- movl _C_LABEL(sched_whichqs),%ecx
- bsfl %ecx,%ebx # find a full q
- jnz switch_dequeue
-
- /*
- * idling: save old context.
- *
- * Registers:
- * %eax, %ecx - scratch
- * %esi - old lwp, then old pcb
- * %edi - idle pcb
- */
-
- pushl %esi
- call _C_LABEL(pmap_deactivate2) # pmap_deactivate(oldproc)
- addl $4,%esp
-
- movl L_ADDR(%esi),%esi
-
- /* Save stack pointers. */
- movl %esp,PCB_ESP(%esi)
- movl %ebp,PCB_EBP(%esi)
-
- /* Find idle PCB for this CPU */
-#ifndef MULTIPROCESSOR
- movl $_C_LABEL(lwp0),%ebx
- movl L_ADDR(%ebx),%edi
- movl L_MD_TSS_SEL(%ebx),%edx
-#else
- movl CPUVAR(IDLE_PCB),%edi
- movl CPUVAR(IDLE_TSS_SEL),%edx
-#endif
- movl $0,CPUVAR(CURLWP) /* In case we fault... */
-
- /* Restore the idle context (avoid interrupts) */
- cli
-
- /* Restore stack pointers. */
- movl PCB_ESP(%edi),%esp
- movl PCB_EBP(%edi),%ebp
-
- /* Switch TSS. Reset "task busy" flag before loading. */
- movl %cr3,%eax
- movl %eax,PCB_CR3(%edi)
-#ifdef MULTIPROCESSOR
- movl CPUVAR(GDT),%eax
-#else
- movl _C_LABEL(gdt),%eax
-#endif
- andl $~0x0200,4-SEL_KPL(%eax,%edx,1)
- ltr %dx
-
- /* We're always in the kernel, so we don't need the LDT. */
-
- /* Restore cr0 (including FPU state). */
- movl PCB_CR0(%edi),%ecx
- movl %ecx,%cr0
-
- /* Record new pcb. */
- SET_CURPCB(%edi)
-
- xorl %esi,%esi
- sti
-idle_unlock:
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
- call _C_LABEL(sched_unlock_idle)
-#endif
- /* Interrupts are okay again. */
- pushl $IPL_NONE # spl0()
- call _C_LABEL(Xspllower) # process pending interrupts
- addl $4,%esp
- jmp idle_start
-idle_zero:
- sti
- call _C_LABEL(uvm_pageidlezero)
- cli
- cmpl $0,_C_LABEL(sched_whichqs)
- jnz idle_exit
-idle_loop:
- /* Try to zero some pages. */
- movl _C_LABEL(uvm)+UVM_PAGE_IDLE_ZERO,%ecx
- testl %ecx,%ecx
- jnz idle_zero
- sti
- hlt
-NENTRY(mpidle)
-idle_start:
- cli
- cmpl $0,_C_LABEL(sched_whichqs)
- jz idle_loop
-idle_exit:
- movl $IPL_HIGH,CPUVAR(ILEVEL) # splhigh
- sti
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
- call _C_LABEL(sched_lock_idle)
-#endif
- movl _C_LABEL(sched_whichqs),%ecx
- bsfl %ecx,%ebx
- jz idle_unlock
-
-switch_dequeue:
- /*
- * we're running at splhigh(), but it's otherwise okay to take
- * interrupts here.
- */
- sti
- leal _C_LABEL(sched_qs)(,%ebx,8),%eax # select q
-
- movl L_FORW(%eax),%edi # unlink from front of process q
-#ifdef DIAGNOSTIC
- cmpl %edi,%eax # linked to self (i.e. nothing queued)?
- je _C_LABEL(switch_error) # not possible
-#endif /* DIAGNOSTIC */
- movl L_FORW(%edi),%edx
- movl %edx,L_FORW(%eax)
- movl %eax,L_BACK(%edx)
-
- cmpl %edx,%eax # q empty?
- jne 3f
-
- btrl %ebx,%ecx # yes, clear to indicate empty
- movl %ecx,_C_LABEL(sched_whichqs) # update q status
-
-3: /* We just did it. */
- xorl %eax,%eax
- CLEAR_RESCHED(%eax)
-
-switch_resume:
-#ifdef DIAGNOSTIC
- cmpl %eax,L_WCHAN(%edi) # Waiting for something?
- jne _C_LABEL(switch_error) # Yes; shouldn't be queued.
- cmpb $LSRUN,L_STAT(%edi) # In run state?
- jne _C_LABEL(switch_error) # No; shouldn't be queued.
-#endif /* DIAGNOSTIC */
-
- /* Isolate lwp. XXX Is this necessary? */
- movl %eax,L_BACK(%edi)
-
- /* Record new lwp. */
- movb $LSONPROC,L_STAT(%edi) # l->l_stat = LSONPROC
- SET_CURLWP(%edi,%ecx)
-
- /* Skip context switch if same lwp. */
- xorl %ebx,%ebx
- cmpl %edi,%esi
- je switch_return
+ movl 20(%esp),%edi # next
/* If old lwp exited, don't bother. */
testl %esi,%esi
@@ -981,16 +792,16 @@ switch_resume:
* %edi - new lwp
*/
- pushl %esi
- call _C_LABEL(pmap_deactivate2) # pmap_deactivate(oldproc)
- addl $4,%esp
-
movl L_ADDR(%esi),%esi
/* Save stack pointers. */
movl %esp,PCB_ESP(%esi)
movl %ebp,PCB_EBP(%esi)
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+ call _C_LABEL(sched_unlock)
+#endif
+
switch_exited:
/*
* Third phase: restore saved context.
@@ -1029,10 +840,6 @@ switch_exited:
andl $~0x0200,4(%eax,%edx, 1)
ltr %dx
- pushl %edi
- call _C_LABEL(pmap_activate) # pmap_activate(p)
- addl $4,%esp
-
#if 0
switch_restored:
#endif
@@ -1051,9 +858,6 @@ switch_restored:
#endif
movl %ecx,%cr0
- /* Record new pcb. */
- SET_CURPCB(%esi)
-
/* Interrupts are okay again. */
sti
@@ -1065,22 +869,6 @@ switch_restored:
cmpl $0,P_RASLIST(%esi)
jne 2f
1:
- movl $1,%ebx
-
-switch_return:
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
- call _C_LABEL(sched_unlock_idle)
-#endif
- cmpl $0,CPUVAR(IPENDING)
- jz 3f
- pushl $IPL_NONE # spl0()
- call _C_LABEL(Xspllower) # process pending interrupts
- addl $4,%esp
-3:
- movl $IPL_HIGH,CPUVAR(ILEVEL) # splhigh()
-
- movl %ebx,%eax
-
popl %edi
popl %esi
popl %ebx
@@ -1099,116 +887,6 @@ switch_return:
jmp 1b
/*
- * void cpu_switchto(struct lwp *current, struct lwp *next)
- * Switch to the specified next LWP.
- */
-ENTRY(cpu_switchto)
- pushl %ebx
- pushl %esi
- pushl %edi
-
-#ifdef DEBUG
- cmpl $IPL_SCHED,CPUVAR(ILEVEL)
- jae 1f
- pushl $2f
- call _C_LABEL(panic)
- /* NOTREACHED */
-2: .asciz "not splsched() in cpu_switchto!"
-1:
-#endif /* DEBUG */
-
- movl 16(%esp),%esi # current
- movl 20(%esp),%edi # next
-
- /*
- * Clear curlwp so that we don't accumulate system time while idle.
- * This also insures that schedcpu() will move the old process to
- * the correct queue if it happens to get called from the spllower()
- * below and changes the priority. (See corresponding comment in
- * usrret()).
- *
- * XXX Is this necessary? We know we won't go idle.
- */
- movl $0,CPUVAR(CURLWP)
-
- /*
- * We're running at splhigh(), but it's otherwise okay to take
- * interrupts here.
- */
- sti
-
- /* Jump into the middle of cpu_switch */
- xorl %eax,%eax
- jmp switch_resume
-
-/*
- * void cpu_exit(struct lwp *l)
- * Switch to the appropriate idle context (lwp0's if uniprocessor; the CPU's
- * if multiprocessor) and deallocate the address space and kernel stack for p.
- * Then jump into cpu_switch(), as if we were in the idle proc all along.
- */
-#ifndef MULTIPROCESSOR
- .globl _C_LABEL(lwp0)
-#endif
-/* LINTSTUB: Func: void cpu_exit(struct lwp *l) */
-ENTRY(cpu_exit)
- movl 4(%esp),%edi # old process
-#ifndef MULTIPROCESSOR
- movl $_C_LABEL(lwp0),%ebx
- movl L_ADDR(%ebx),%esi
- movl L_MD_TSS_SEL(%ebx),%edx
-#else
- movl CPUVAR(IDLE_PCB),%esi
- movl CPUVAR(IDLE_TSS_SEL),%edx
-#endif
- /* In case we fault... */
- movl $0,CPUVAR(CURLWP)
-
- /* Restore the idle context. */
- cli
-
- /* Restore stack pointers. */
- movl PCB_ESP(%esi),%esp
- movl PCB_EBP(%esi),%ebp
-
- /* Switch TSS. Reset "task busy" flag before loading. */
- movl %cr3,%eax
- movl %eax,PCB_CR3(%esi)
-#ifdef MULTIPROCESSOR
- movl CPUVAR(GDT),%eax
-#else
- /* Load TSS info. */
- movl _C_LABEL(gdt),%eax
-#endif
-
- andl $~0x0200,4-SEL_KPL(%eax,%edx,1)
- ltr %dx
-
- /* We're always in the kernel, so we don't need the LDT. */
-
- /* Restore cr0 (including FPU state). */
- movl PCB_CR0(%esi),%ecx
- movl %ecx,%cr0
-
- /* Record new pcb. */
- SET_CURPCB(%esi)
-
- /* Interrupts are okay again. */
- sti
-
- /*
- * Schedule the dead LWP's stack to be freed.
- */
- pushl %edi
- call _C_LABEL(lwp_exit2)
- addl $4,%esp
-
- /* Jump into cpu_switch() with the right state. */
- xorl %esi,%esi
- movl %esi,CPUVAR(CURLWP)
- jmp idle_start
-
-/*
* void savectx(struct pcb *pcb);
* Update pcb, saving current processor state.
*/
Index: arch/i386/i386/trap.c
===================================================================
--- arch/i386/i386/trap.c (revision 1799)
+++ arch/i386/i386/trap.c (working copy)
@@ -509,8 +509,10 @@ copyfault:
KERNEL_PROC_UNLOCK(l);
}
/* Allow a forced task switch. */
- if (curcpu()->ci_want_resched) /* XXX CSE me? */
+ if (curcpu()->ci_want_resched) { /* XXX CSE me? */
+ curcpu()->ci_want_resched = 0;
preempt(0);
+ }
goto out;
case T_DNA|T_USER: {
Index: arch/i386/i386/genassym.cf
===================================================================
--- arch/i386/i386/genassym.cf (revision 1728)
+++ arch/i386/i386/genassym.cf (working copy)
@@ -283,10 +283,8 @@ define CPU_INFO_WANT_PMAPLOAD offsetof(s
define CPU_INFO_TLBSTATE offsetof(struct cpu_info, ci_tlbstate)
define TLBSTATE_VALID TLBSTATE_VALID
define CPU_INFO_CURLWP offsetof(struct cpu_info, ci_curlwp)
-define CPU_INFO_CURPCB offsetof(struct cpu_info, ci_curpcb)
-define CPU_INFO_IDLE_PCB offsetof(struct cpu_info, ci_idle_pcb)
-define CPU_INFO_IDLE_TSS_SEL offsetof(struct cpu_info, ci_idle_tss_sel)
define CPU_INFO_ASTPENDING offsetof(struct cpu_info, ci_astpending)
+define CPU_INFO_IDLELWP offsetof(struct cpu_info, ci_data.cpu_idlelwp)
define CPU_INFO_LEVEL offsetof(struct cpu_info, ci_cpuid_level)
define CPU_INFO_VENDOR offsetof(struct cpu_info, ci_vendor[0])
Index: ddb/db_xxx.c
===================================================================
--- ddb/db_xxx.c (revision 1638)
+++ ddb/db_xxx.c (working copy)
@@ -292,25 +292,6 @@ db_dmesg(db_expr_t addr, int haddr, db_e
void
db_show_sched_qs(db_expr_t addr, int haddr, db_expr_t count, const char *modif)
{
- struct prochd *ph;
- struct lwp *l;
- int i, first;
- for (i = 0; i < RUNQUE_NQS; i++)
- {
- first = 1;
- ph = &sched_qs[i];
- for (l = ph->ph_link; l != (void *)ph; l = l->l_forw) {
- if (first) {
- db_printf("%c%d",
- (sched_whichqs & RQMASK(i))
- ? ' ' : '!', i);
- first = 0;
- }
- db_printf("\t%d.%d (%s) pri=%d usrpri=%d\n",
- l->l_proc->p_pid,
- l->l_lid, l->l_proc->p_comm,
- (int)l->l_priority, (int)l->l_usrpri);
- }
- }
+ sched_print_runqueue(db_printf);
}
Index: sys/lwp.h
===================================================================
--- sys/lwp.h (revision 1747)
+++ sys/lwp.h (working copy)
@@ -114,6 +114,7 @@ extern struct lwp lwp0; /* LWP for pro
#endif
/* These flags are kept in l_flag. [*] is shared with p_flag */
+#define L_IDLE 0x00000001
#define L_INMEM 0x00000004 /* [*] Loaded into memory. */
#define L_SELECT 0x00000040 /* [*] Selecting; wakeup/waiting danger. */
#define L_SINTR 0x00000080 /* [*] Sleep is interruptible. */
@@ -174,9 +175,6 @@ void setrunqueue (struct lwp *);
struct lwp *nextrunqueue(void);
#endif
void unsleep (struct lwp *);
-#ifndef cpu_switch
-int cpu_switch (struct lwp *, struct lwp *);
-#endif
#ifndef cpu_switchto
void cpu_switchto (struct lwp *, struct lwp *);
#endif
@@ -192,6 +190,7 @@ void cpu_setfunc(struct lwp *, void (*)(
void startlwp(void *);
void upcallret(struct lwp *);
void lwp_exit (struct lwp *);
+void lwp_exit_switchaway(struct lwp *);
void lwp_exit2 (struct lwp *);
struct lwp *proc_representative_lwp(struct proc *);
__inline int lwp_suspend(struct lwp *, struct lwp *);
Index: sys/cpu_data.h
===================================================================
--- sys/cpu_data.h (revision 1787)
+++ sys/cpu_data.h (working copy)
@@ -61,7 +61,9 @@ struct lwp;
*/
struct cpu_data {
- struct schedstate_percpu cpu_schedstate; /* scheduler state */
+ struct schedstate_percpu cpu_schedstate; /* scheduler state */
+ struct lwp *cpu_idlelwp; /* idle lwp */
+ struct lwp *cpu_exitinglwp;
#if defined(MULTIPROCESSOR)
int cpu_biglock_count;
Index: sys/sched.h
===================================================================
--- sys/sched.h (revision 1638)
+++ sys/sched.h (working copy)
@@ -175,17 +175,6 @@ struct schedstate_percpu {
extern int schedhz; /* ideally: 16 */
extern int rrticks; /* ticks per roundrobin() */
-/*
- * Global scheduler state. We would like to group these all together
- * in a single structure to make them easier to find, but leaving
- * whichqs and qs as independent globals makes for more efficient
- * assembly language in the low-level context switch code. So we
- * simply give them meaningful names; the globals are actually declared
- * in kern/kern_synch.c.
- */
-extern struct prochd sched_qs[];
-extern volatile uint32_t sched_whichqs;
-
struct proc;
struct cpu_info;
@@ -217,8 +206,9 @@ do { \
splx(s); \
} while (/* CONSTCOND */ 0)
-void sched_lock_idle(void);
-void sched_unlock_idle(void);
+void sched_unlock(void);
+boolean_t sched_curcpu_runnable_p(void);
+void sched_print_runqueue(void (*)(const char *, ...));
#else /* ! MULTIPROCESSOR || LOCKDEBUG */
Index: sys/proc.h
===================================================================
--- sys/proc.h (revision 1747)
+++ sys/proc.h (working copy)
@@ -410,6 +410,8 @@ extern struct lwp *curlwp; /* Current r
#endif /* MULTIPROCESSOR */
#endif /* ! curproc */
+#define CURCPU_IDLE_P() (curlwp == curcpu()->ci_data.cpu_idlelwp)
+
static struct proc *__curproc(void);
static __inline struct proc *
@@ -465,7 +467,6 @@ int inferior(struct proc *, struct proc
int leavepgrp(struct proc *);
void sessdelete(struct session *);
void yield(void);
-struct lwp *chooselwp(void);
void pgdelete(struct pgrp *);
void procinit(void);
void resetprocpriority(struct proc *);
--NextPart-20060930211225-0902601
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="kern_idle.c"
/* $NetBSD$ */
/*-
* Copyright (c)2002, 2006 YAMAMOTO Takashi,
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD$");
#include <sys/param.h>
#include <sys/idle.h>
#include <sys/lwp.h>
#include <sys/proc.h>
#include <uvm/uvm.h>
#include <uvm/uvm_extern.h>
#define PIDLELWP (MAXPRI + 1) /* lowest priority */
void
idle_loop(void *dummy)
{
struct cpu_info *ci = curcpu();
struct lwp *l = curlwp;
(void)KERNEL_LOCK_RELEASE_ALL();
l->l_usrpri = PIDLELWP;
while (1 /* CONSTCOND */) {
struct lwp *exiting;
int s;
KERNEL_LOCK_ASSERT_UNLOCKED();
KASSERT((l->l_flag & L_IDLE) != 0);
KASSERT(ci == curcpu());
KASSERT(l == curlwp);
KASSERT(CURCPU_IDLE_P());
KASSERT(l->l_usrpri == PIDLELWP);
if (uvm.page_idle_zero) {
if (sched_curcpu_runnable_p()) {
goto schedule;
}
uvm_pageidlezero();
}
if (!sched_curcpu_runnable_p()) {
cpu_idle();
if (!sched_curcpu_runnable_p()) {
continue;
}
}
schedule:
SCHED_LOCK(s);
l->l_stat = LSRUN;
mi_switch(l, NULL);
splx(s);
exiting = ci->ci_data.cpu_exitinglwp;
if (exiting != NULL) {
ci->ci_data.cpu_exitinglwp = NULL;
lwp_exit2(exiting);
}
}
}
int
create_idle_lwp(struct cpu_info *ci)
{
struct proc *p = &proc0;
struct lwp *l;
vaddr_t uaddr;
boolean_t inmem;
int error;
/* XXX should use kthread_create1? */
KASSERT(ci->ci_data.cpu_idlelwp == NULL);
inmem = uvm_uarea_alloc(&uaddr);
if (uaddr == 0) {
return ENOMEM;
}
error = newlwp(&lwp0, p, uaddr, inmem, 0, NULL, 0, idle_loop, NULL, &l);
if (error != 0) {
panic("create_idle_lwp: newlwp failed");
}
PHOLD(l);
l->l_stat = LSRUN;
l->l_flag |= L_IDLE;
l->l_cpu = ci;
ci->ci_data.cpu_idlelwp = l;
return error;
}
--NextPart-20060930211225-0902601
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="idle_machdep.c"
/* $NetBSD$ */
/*-
* Copyright (c)2002, 2006 YAMAMOTO Takashi,
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD$");
#include <sys/param.h>
#include <sys/proc.h>
#include <machine/cpufunc.h>
void
cpu_idle(void)
{
struct cpu_info *ci = curcpu();
disable_intr();
__insn_barrier();
if (__predict_false(ci->ci_want_resched) == 0) {
__asm __volatile ("sti; hlt");
} else {
enable_intr();
}
ci->ci_want_resched = 0;
}
--NextPart-20060930211225-0902601--