Subject: lazy pmap switch
To: None <port-i386@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: port-i386
Date: 02/07/2004 19:21:31
--NextPart-20040207190858-1090400
Content-Type: Text/Plain; charset=us-ascii

hi,

currently we too oftenly switch between address spaces.
(eg. everytime when we start idling.)
and on i386, it means flushing all user tlbs.
to resolve the issue, i'd like to postpone loading cr3
until it's really needed.
i'll commit the attached diffs if no one objects.

YAMAMOTO Takashi


--NextPart-20040207190858-1090400
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="i386.lazysw.diff"

Index: include/frameasm.h
===================================================================
--- include/frameasm.h	(revision 528)
+++ include/frameasm.h	(revision 551)
@@ -80,6 +80,15 @@
 	addl	$(TF_PUSHSIZE+8),%esp	; \
 	iret
 
+#define	DO_DEFERRED_SWITCH(reg) \
+	cmpl	$0, CPUVAR(WANT_PMAPLOAD)		; \
+	jz	1f					; \
+	call	_C_LABEL(pmap_load)			; \
+	1:
+
+#define	CHECK_DEFERRED_SWITCH(reg) \
+	cmpl	$0, CPUVAR(WANT_PMAPLOAD)
+
 #define	CHECK_ASTPENDING(reg)	movl	CPUVAR(CURLWP),reg	; \
 				cmpl	$0, reg			; \
 				je	1f			; \
Index: include/pcb.h
===================================================================
--- include/pcb.h	(revision 528)
+++ include/pcb.h	(revision 551)
@@ -108,7 +108,6 @@ struct pcb {
 	int	vm86_eflags;		/* virtual eflags for vm86 mode */
 	int	vm86_flagmask;		/* flag mask for vm86 mode */
 	void	*vm86_userp;		/* XXX performance hack */
-	struct pmap *pcb_pmap;		/* back pointer to our pmap */
 	struct cpu_info *pcb_fpcpu;	/* cpu holding our fp state. */
 	u_long	pcb_iomap[NIOPORTS/32];	/* I/O bitmap */
 };
Index: include/cpu.h
===================================================================
--- include/cpu.h	(revision 528)
+++ include/cpu.h	(revision 551)
@@ -61,6 +61,7 @@
 #include <lib/libkern/libkern.h>	/* offsetof */
 
 struct intrsource;
+struct pmap;
 
 /*
  * a bunch of this belongs in cpuvar.h; move it later..
@@ -92,6 +93,8 @@ struct cpu_info {
 
 	volatile u_int32_t	ci_tlb_ipi_mask;
 
+	struct pmap *ci_pmap;		/* current pmap */
+	int ci_want_pmapload;		/* pmap_load() is needed */
 	struct pcb *ci_curpcb;		/* VA of current HW PCB */
 	struct pcb *ci_idle_pcb;	/* VA of current PCB */
 	int ci_idle_tss_sel;		/* TSS selector of idle PCB */
Index: include/pmap.h
===================================================================
--- include/pmap.h	(revision 528)
+++ include/pmap.h	(revision 551)
@@ -254,6 +254,7 @@ struct pmap {
 	int pm_ldt_len;			/* number of LDT entries */
 	int pm_ldt_sel;			/* LDT selector */
 	u_int32_t pm_cpus;		/* mask of CPUs using pmap */
+	u_int32_t pm_weakrefs;		/* mask of CPUs having lazy ref */
 };
 
 /* pm_flags */
@@ -342,12 +343,14 @@ void		pmap_activate(struct lwp *);
 void		pmap_bootstrap(vaddr_t);
 boolean_t	pmap_clear_attrs(struct vm_page *, int);
 void		pmap_deactivate(struct lwp *);
+void		pmap_deactivate2(struct lwp *);
 void		pmap_page_remove (struct vm_page *);
 void		pmap_remove(struct pmap *, vaddr_t, vaddr_t);
 boolean_t	pmap_test_attrs(struct vm_page *, int);
 void		pmap_write_protect(struct pmap *, vaddr_t, vaddr_t, vm_prot_t);
 int		pmap_exec_fixup(struct vm_map *, struct trapframe *,
 		    struct pcb *);
+void		pmap_load(void);
 
 vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */
 
Index: i386/svr4_sigcode.S
===================================================================
--- i386/svr4_sigcode.S	(revision 528)
+++ i386/svr4_sigcode.S	(revision 551)
@@ -119,4 +119,10 @@ IDTVEC(svr4_fasttrap)
 	call	_C_LABEL(trap)
 	addl	$4,%esp
 	jmp	2b
-1:	INTRFASTEXIT
+1:	CHECK_DEFERRED_SWITCH(%eax)
+	jnz	9f
+	INTRFASTEXIT
+9:	sti
+	call	_C_LABEL(pmap_load)
+	cli
+	jmp	2b
Index: i386/spl.S
===================================================================
--- i386/spl.S	(revision 528)
+++ i386/spl.S	(revision 551)
@@ -151,13 +151,17 @@ IDTVEC(doreti)
 	jmp	*IS_RESUME(%eax)
 2:	/* Check for ASTs on exit to user mode. */
 	movl	%ebx,CPUVAR(ILEVEL)
-5:	CHECK_ASTPENDING(%eax)
-	je	3f
+5:
 	testb   $SEL_RPL,TF_CS(%esp)
+	jnz	doreti_checkast
 #ifdef VM86
-	jnz	4f
 	testl	$PSL_VM,TF_EFLAGS(%esp)
+	jz	6f
+#else
+	jmp	6f
 #endif
+doreti_checkast:
+	CHECK_ASTPENDING(%eax)
 	jz	3f
 4:	CLEAR_ASTPENDING(%eax)
 	sti
@@ -169,4 +173,12 @@ IDTVEC(doreti)
 	cli
 	jmp	5b
 3:
+	CHECK_DEFERRED_SWITCH(%eax)
+	jnz	9f
+6:
 	INTRFASTEXIT
+9:
+	sti
+	call	_C_LABEL(pmap_load)
+	cli
+	jmp	doreti_checkast	/* recheck ASTs */
Index: i386/vector.S
===================================================================
--- i386/vector.S	(revision 528)
+++ i386/vector.S	(revision 551)
@@ -860,27 +860,32 @@ calltrap:
 	pushl	%esp
 	call	_C_LABEL(trap)
 	addl	$4,%esp
-2:	/* Check for ASTs on exit to user mode. */
-	cli
-	CHECK_ASTPENDING(%eax)
-	je	1f
 	testb	$SEL_RPL,TF_CS(%esp)
+	jnz	alltraps_checkast
 #ifdef VM86
-	jnz	5f
 	testl	$PSL_VM,TF_EFLAGS(%esp)
+	jz	6f
+#else
+	jmp	6f
 #endif
-	jz	1f
+alltraps_checkast:
+	/* Check for ASTs on exit to user mode. */
+	cli
+	CHECK_ASTPENDING(%eax)
+	jz	3f
 5:	CLEAR_ASTPENDING(%eax)
 	sti
 	movl	$T_ASTFLT,TF_TRAPNO(%esp)
 	pushl	%esp
 	call	_C_LABEL(trap)
 	addl	$4,%esp
-	jmp	2b
+	jmp	alltraps_checkast	/* re-check ASTs */
+3:	CHECK_DEFERRED_SWITCH(%eax)
+	jnz	9f
 #ifndef DIAGNOSTIC
-1:	INTRFASTEXIT
+6:	INTRFASTEXIT
 #else
-1:	cmpl	CPUVAR(ILEVEL),%ebx
+6:	cmpl	CPUVAR(ILEVEL),%ebx
 	jne	3f
 	INTRFASTEXIT
 3:	sti
@@ -891,9 +896,12 @@ calltrap:
 	int	$3
 #endif /* DDB */
 	movl	%ebx,CPUVAR(ILEVEL)
-	jmp	2b
+	jmp	alltraps_checkast	/* re-check ASTs */
 4:	.asciz	"WARNING: SPL NOT LOWERED ON TRAP EXIT\n"
 #endif /* DIAGNOSTIC */
+9:	sti
+	call	_C_LABEL(pmap_load)
+	jmp	alltraps_checkast	/* re-check ASTs */
 
 #ifdef IPKDB
 /* LINTSTUB: Ignore */
Index: i386/cpu.c
===================================================================
--- i386/cpu.c	(revision 528)
+++ i386/cpu.c	(revision 551)
@@ -314,10 +314,11 @@ cpu_attach(parent, self, aux)
 	    kstack + USPACE - 16 - sizeof (struct trapframe);
 	pcb->pcb_tss.tss_esp =
 	    kstack + USPACE - 16 - sizeof (struct trapframe);
-	pcb->pcb_pmap = pmap_kernel();
 	pcb->pcb_cr0 = rcr0();
-	pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdirpa;
+	pcb->pcb_cr3 = pmap_kernel()->pm_pdirpa;
 #endif
+	pmap_reference(pmap_kernel());
+	ci->ci_pmap = pmap_kernel();
 
 	/* further PCB init done later. */
 
Index: i386/pmap.c
===================================================================
--- i386/pmap.c	(revision 528)
+++ i386/pmap.c	(revision 551)
@@ -501,6 +501,8 @@ static void		 pmap_tmpunmap_pa(void);
 static void		 pmap_tmpunmap_pvepte(struct pv_entry *);
 static void		 pmap_unmap_ptes(struct pmap *);
 
+static boolean_t	 pmap_reactivate(struct pmap *);
+
 /*
  * p m a p   i n l i n e   h e l p e r   f u n c t i o n s
  */
@@ -514,8 +516,9 @@ __inline static boolean_t
 pmap_is_curpmap(pmap)
 	struct pmap *pmap;
 {
+
 	return((pmap == pmap_kernel()) ||
-	       (pmap->pm_pdirpa == (paddr_t) rcr3()));
+	       (pmap == curcpu()->ci_pmap));
 }
 
 /*
@@ -663,24 +666,33 @@ pmap_map_ptes(pmap)
 	struct pmap *pmap;
 {
 	pd_entry_t opde;
+	struct pmap *ourpmap;
+	struct cpu_info *ci;
 
 	/* the kernel's pmap is always accessible */
 	if (pmap == pmap_kernel()) {
 		return(PTE_BASE);
 	}
 
+	ci = curcpu();
+	if (ci->ci_want_pmapload &&
+	    vm_map_pmap(&ci->ci_curlwp->l_proc->p_vmspace->vm_map) == pmap)
+		pmap_load();
+
 	/* if curpmap then we are always mapped */
 	if (pmap_is_curpmap(pmap)) {
 		simple_lock(&pmap->pm_obj.vmobjlock);
 		return(PTE_BASE);
 	}
 
+	ourpmap = ci->ci_pmap;
+
 	/* need to lock both curpmap and pmap: use ordered locking */
-	if ((unsigned) pmap < (unsigned) curpcb->pcb_pmap) {
+	if ((unsigned) pmap < (unsigned) ourpmap) {
 		simple_lock(&pmap->pm_obj.vmobjlock);
-		simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
+		simple_lock(&ourpmap->pm_obj.vmobjlock);
 	} else {
-		simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
+		simple_lock(&ourpmap->pm_obj.vmobjlock);
 		simple_lock(&pmap->pm_obj.vmobjlock);
 	}
 
@@ -690,7 +702,7 @@ pmap_map_ptes(pmap)
 	if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) {
 		*APDP_PDE = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V);
 		if (pmap_valid_entry(opde))
-			pmap_apte_flush(curpcb->pcb_pmap);
+			pmap_apte_flush(ourpmap);
 	}
 	return(APTE_BASE);
 }
@@ -703,19 +715,22 @@ __inline static void
 pmap_unmap_ptes(pmap)
 	struct pmap *pmap;
 {
+
 	if (pmap == pmap_kernel()) {
 		return;
 	}
 	if (pmap_is_curpmap(pmap)) {
 		simple_unlock(&pmap->pm_obj.vmobjlock);
 	} else {
+		struct pmap *ourpmap = curcpu()->ci_pmap;
+
 #if defined(MULTIPROCESSOR)
 		*APDP_PDE = 0;
-		pmap_apte_flush(curpcb->pcb_pmap);
+		pmap_apte_flush(ourpmap);
 #endif
 		COUNT(apdp_pde_unmap);
 		simple_unlock(&pmap->pm_obj.vmobjlock);
-		simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
+		simple_unlock(&ourpmap->pm_obj.vmobjlock);
 	}
 }
 
@@ -952,8 +967,6 @@ pmap_bootstrap(kva_start)
 	 * operation of the system.
 	 */
 
-	curpcb->pcb_pmap = kpm;	/* proc0's pcb */
-
 	/*
 	 * Begin to enable global TLB entries if they are supported.
 	 * The G bit has no effect until the CR4_PGE bit is set in CR4,
@@ -1903,31 +1916,24 @@ pmap_ldt_cleanup(l)
 #endif /* USER_LDT */
 
 /*
- * pmap_activate: activate a process' pmap (fill in %cr3 and LDT info)
+ * pmap_activate: activate a process' pmap
  *
  * => called from cpu_switch()
- * => if proc is the curlwp, then load it into the MMU
+ * => if lwp is the curlwp, then set ci_want_pmapload so that
+ *    actual MMU context switch will be done by pmap_load() later
  */
 
 void
 pmap_activate(l)
 	struct lwp *l;
 {
+	struct cpu_info *ci = curcpu();
 	struct pcb *pcb = &l->l_addr->u_pcb;
-	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
+	struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
 
-	pcb->pcb_pmap = pmap;
 	pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
 	pcb->pcb_cr3 = pmap->pm_pdirpa;
-	if (l == curlwp) {
-		lcr3(pcb->pcb_cr3);
-		lldt(pcb->pcb_ldt_sel);
-
-		/*
-		 * mark the pmap in use by this processor.
-		 */
-		x86_atomic_setbits_l(&pmap->pm_cpus, (1U << cpu_number()));
-
+	if (l == ci->ci_curlwp) {
 #ifdef KSTACK_CHECK_DR0
 		/*
 		 * setup breakpoint on the top of stack
@@ -1937,7 +1943,109 @@ pmap_activate(l)
 		else
 			dr0(KSTACK_LOWEST_ADDR(l), 1, 3, 1);
 #endif
+
+		/*
+		 * no need to switch to kernel vmspace because
+		 * it's a subset of any vmspace.
+		 */
+
+		if (pmap == pmap_kernel()) {
+			ci->ci_want_pmapload = 0;
+			return;
+		}
+
+		ci->ci_want_pmapload = 1;
+	}
+}
+
+/*
+ * pmap_reactivate: try to regain reference to the pmap.
+ */
+
+static boolean_t
+pmap_reactivate(struct pmap *pmap)
+{
+	struct cpu_info *ci = curcpu();
+	u_int32_t cpumask = 1U << ci->ci_cpuid;
+
+	KASSERT(pmap->pm_pdirpa == rcr3());
+
+	/*
+	 * if we still have a lazy reference to this pmap,
+	 * we can assume that there was no tlb shootdowns
+	 * for this pmap in the meantime.
+	 */
+
+	if (pmap->pm_weakrefs & cpumask) {
+		x86_atomic_clearbits_l(&pmap->pm_weakrefs, cpumask);
+
+		/* got it */
+		return TRUE;
 	}
+
+	return FALSE;
+}
+
+/*
+ * pmap_load: actually switch pmap.  (fill in %cr3 and LDT info)
+ */
+
+void
+pmap_load()
+{
+	struct cpu_info *ci = curcpu();
+	u_int32_t cpumask = 1U << ci->ci_cpuid;
+	struct pmap *pmap;
+	struct pmap *oldpmap;
+	struct lwp *l;
+
+	KASSERT(ci->ci_want_pmapload);
+	ci->ci_want_pmapload = 0;
+
+	l = ci->ci_curlwp;
+	KASSERT(l != NULL);
+	pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
+	KASSERT(pmap != pmap_kernel());
+	oldpmap = ci->ci_pmap;
+
+	KASSERT(pmap->pm_ldt_sel == l->l_addr->u_pcb.pcb_ldt_sel);
+	lldt(pmap->pm_ldt_sel);
+
+	/*
+	 * mark the pmap in use by this processor.
+	 */
+
+	x86_atomic_setbits_l(&pmap->pm_cpus, cpumask);
+
+	if (pmap == oldpmap) {
+		if (pmap_reactivate(pmap))
+			return;
+		/*
+		 * pmap has been changed during deactivated.
+		 * our tlb may be stale.
+		 */
+
+		tlbflush();
+		return;
+	}
+
+	x86_atomic_clearbits_l(&pmap->pm_weakrefs, cpumask);
+	KASSERT(oldpmap->pm_pdirpa == rcr3());
+
+	/*
+	 * mark the pmap no longer in use by this processor.
+	 */
+
+	x86_atomic_clearbits_l(&oldpmap->pm_cpus, cpumask);
+
+	KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE);
+	pmap_reference(pmap);
+	KERNEL_UNLOCK();
+	ci->ci_pmap = pmap;
+	lcr3(pmap->pm_pdirpa);
+	KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE);
+	pmap_destroy(oldpmap);
+	KERNEL_UNLOCK();
 }
 
 /*
@@ -1948,12 +2056,53 @@ void
 pmap_deactivate(l)
 	struct lwp *l;
 {
-	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
+
+	if (l == curlwp)
+		pmap_deactivate2(l);
+}
+
+/*
+ * pmap_deactivate2: a context switch version of pmap_deactivate.
+ * always treat l is curlwp.
+ */
+
+void
+pmap_deactivate2(l)
+	struct lwp *l;
+{
+	struct pmap *pmap;
+	struct cpu_info *ci = curcpu();
+	u_int32_t cpumask = 1U << ci->ci_cpuid;
+
+	if (ci->ci_want_pmapload) {
+		KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map)
+		    != pmap_kernel());
+
+		/*
+		 * userspace has not been touched.
+		 * nothing to do here.
+		 */
+
+		ci->ci_want_pmapload = 0;
+		return;
+	}
+
+	pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
+
+	if (pmap == pmap_kernel()) {
+		return;
+	}
+
+	KASSERT(pmap->pm_pdirpa == rcr3());
+	KASSERT(ci->ci_pmap == pmap);
+
+	x86_atomic_setbits_l(&pmap->pm_weakrefs, cpumask);
 
 	/*
 	 * mark the pmap no longer in use by this processor.
 	 */
-	x86_atomic_clearbits_l(&pmap->pm_cpus, (1U << cpu_number()));
+
+	x86_atomic_clearbits_l(&pmap->pm_cpus, cpumask);
 }
 
 /*
@@ -2395,6 +2544,8 @@ pmap_do_remove(pmap, sva, eva, flags)
 	struct vm_page *ptp;
 	int32_t cpumask = 0;
 	TAILQ_HEAD(, vm_page) empty_ptps;
+	struct cpu_info *ci;
+	struct pmap *curpmap;
 
 	/*
 	 * we lock in the pmap => pv_head direction
@@ -2403,8 +2554,12 @@ pmap_do_remove(pmap, sva, eva, flags)
 	TAILQ_INIT(&empty_ptps);
 
 	PMAP_MAP_TO_HEAD_LOCK();
+
 	ptes = pmap_map_ptes(pmap);	/* locks pmap */
 
+	ci = curcpu();
+	curpmap = ci->ci_pmap;
+
 	/*
 	 * removing one page?  take shortcut function.
 	 */
@@ -2453,7 +2608,7 @@ pmap_do_remove(pmap, sva, eva, flags)
 				 * here if we're using APTE space.
 				 */
 #endif
-				pmap_tlb_shootdown(curpcb->pcb_pmap,
+				pmap_tlb_shootdown(curpmap,
 				    ((vaddr_t)ptes) + ptp->offset, opte,
 				    &cpumask);
 #if defined(MULTIPROCESSOR)
@@ -2461,8 +2616,7 @@ pmap_do_remove(pmap, sva, eva, flags)
 				 * Always shoot down the pmap's self-mapping
 				 * of the PTP.
 				 * XXXthorpej Redundant shootdown can happen
-				 * here if pmap == curpcb->pcb_pmap (not APTE
-				 * space).
+				 * here if pmap == curpmap (not APTE space).
 				 */
 				pmap_tlb_shootdown(pmap,
 				    ((vaddr_t)PTE_BASE) + ptp->offset, opte,
@@ -2552,14 +2706,14 @@ pmap_do_remove(pmap, sva, eva, flags)
 			 * if we're using APTE space.
 			 */
 #endif
-			pmap_tlb_shootdown(curpcb->pcb_pmap,
+			pmap_tlb_shootdown(curpmap,
 			    ((vaddr_t)ptes) + ptp->offset, opte, &cpumask);
 #if defined(MULTIPROCESSOR)
 			/*
 			 * Always shoot down the pmap's self-mapping
 			 * of the PTP.
 			 * XXXthorpej Redundant shootdown can happen here
-			 * if pmap == curpcb->pcb_pmap (not APTE space).
+			 * if pmap == curpmap (not APTE space).
 			 */
 			pmap_tlb_shootdown(pmap,
 			    ((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask);
@@ -2600,6 +2754,8 @@ pmap_page_remove(pg)
 	int32_t cpumask = 0;
 	TAILQ_HEAD(, vm_page) empty_ptps;
 	struct vm_page *ptp;
+	struct cpu_info *ci;
+	struct pmap *curpmap;
 
 #ifdef DIAGNOSTIC
 	int bank, off;
@@ -2619,6 +2775,9 @@ pmap_page_remove(pg)
 	/* set pv_head => pmap locking */
 	PMAP_HEAD_TO_MAP_LOCK();
 
+	ci = curcpu();
+	curpmap = ci->ci_pmap;
+
 	/* XXX: needed if we hold head->map lock? */
 	simple_lock(&pvh->pvh_lock);
 
@@ -2672,7 +2831,7 @@ pmap_page_remove(pg)
 				opte = x86_atomic_testset_ul(
 				    &pve->pv_pmap->pm_pdir[pdei(pve->pv_va)],
 				    0);
-				pmap_tlb_shootdown(curpcb->pcb_pmap,
+				pmap_tlb_shootdown(curpmap,
 				    ((vaddr_t)ptes) + pve->pv_ptp->offset,
 				    opte, &cpumask);
 #if defined(MULTIPROCESSOR)
@@ -3602,6 +3761,8 @@ pmap_tlb_shootdown(pmap, va, pte, cpumas
 		__cpu_simple_unlock(&pq->pq_slock);
 	}
 	splx(s);
+
+	pmap->pm_weakrefs = 0; /* zap weak references */
 }
 
 /*
Index: i386/locore.S
===================================================================
--- i386/locore.S	(revision 528)
+++ i386/locore.S	(revision 551)
@@ -126,7 +126,7 @@
 
 #define GET_CURPCB(reg)			movl	CPUVAR(CURPCB),reg	
 #define SET_CURPCB(reg)			movl	reg,CPUVAR(CURPCB)
-	
+
 #define CLEAR_RESCHED(reg)		movl	reg,CPUVAR(RESCHED)
 
 /* XXX temporary kluge; these should not be here */
@@ -702,6 +702,7 @@ NENTRY(proc_trampoline)
 	pushl	%ebx
 	call	*%esi
 	addl	$4,%esp
+	DO_DEFERRED_SWITCH(%eax)
 	INTRFASTEXIT
 	/* NOTREACHED */
 
@@ -872,6 +873,7 @@ _C_LABEL(copyin_func):
  */
 /* LINTSTUB: Func: int copyout(const void *kaddr, void *uaddr, size_t len) */
 ENTRY(copyout)
+	DO_DEFERRED_SWITCH(%eax)
 	jmp	*_C_LABEL(copyout_func)
 
 #if defined(I386_CPU)
@@ -1013,6 +1015,7 @@ ENTRY(i486_copyout)
  */
 /* LINTSTUB: Func: int copyin(const void *uaddr, void *kaddr, size_t len) */
 ENTRY(copyin)
+	DO_DEFERRED_SWITCH(%eax)
 	jmp	*_C_LABEL(copyin_func)
 
 #if defined(I386_CPU) || defined(I486_CPU) || defined(I586_CPU) || \
@@ -1084,6 +1087,8 @@ ENTRY(copyoutstr)
 	pushl	%esi
 	pushl	%edi
 
+	DO_DEFERRED_SWITCH(%eax)
+
 	movl	12(%esp),%esi		# esi = from
 	movl	16(%esp),%edi		# edi = to
 	movl	20(%esp),%edx		# edx = maxlen
@@ -1201,6 +1206,9 @@ ENTRY(copyoutstr)
 ENTRY(copyinstr)
 	pushl	%esi
 	pushl	%edi
+	
+	DO_DEFERRED_SWITCH(%eax)
+
 	GET_CURPCB(%ecx)
 	movl	$_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx)
 
@@ -1312,6 +1320,7 @@ ENTRY(copystr)
  */
 /* LINTSTUB: Func: long fuword(const void *base) */
 ENTRY(fuword)
+	DO_DEFERRED_SWITCH(%eax)
 	movl	4(%esp),%edx
 	cmpl	$VM_MAXUSER_ADDRESS-4,%edx
 	ja	_C_LABEL(fusuaddrfault)
@@ -1328,6 +1337,7 @@ ENTRY(fuword)
  */
 /* LINTSTUB: Func: int fusword(const void *base) */
 ENTRY(fusword)
+	DO_DEFERRED_SWITCH(%eax)
 	movl	4(%esp),%edx
 	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
 	ja	_C_LABEL(fusuaddrfault)
@@ -1345,6 +1355,8 @@ ENTRY(fusword)
  */
 /* LINTSTUB: Func: int fuswintr(const void *base) */
 ENTRY(fuswintr)
+	cmpl	$0, CPUVAR(WANT_PMAPLOAD)
+	jnz	_C_LABEL(fusuaddrfault)
 	movl	4(%esp),%edx
 	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
 	ja	_C_LABEL(fusuaddrfault)
@@ -1362,6 +1374,7 @@ ENTRY(fuswintr)
  */
 /* LINTSTUB: Func: int fubyte(const void *base) */
 ENTRY(fubyte)
+	DO_DEFERRED_SWITCH(%eax)
 	movl	4(%esp),%edx
 	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
 	ja	_C_LABEL(fusuaddrfault)
@@ -1406,6 +1419,7 @@ NENTRY(fusuaddrfault)
  */
 /* LINTSTUB: Func: int suword(void *base, long c) */
 ENTRY(suword)
+	DO_DEFERRED_SWITCH(%eax)
 	movl	4(%esp),%edx
 	cmpl	$VM_MAXUSER_ADDRESS-4,%edx
 	ja	_C_LABEL(fusuaddrfault)
@@ -1453,6 +1467,7 @@ ENTRY(suword)
  */
 /* LINTSTUB: Func: int susword(void *base, short c) */
 ENTRY(susword)
+	DO_DEFERRED_SWITCH(%eax)
 	movl	4(%esp),%edx
 	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
 	ja	_C_LABEL(fusuaddrfault)
@@ -1501,6 +1516,8 @@ ENTRY(susword)
  */
 /* LINTSTUB: Func: int suswintr(void *base, short c) */
 ENTRY(suswintr)
+	cmpl	$0, CPUVAR(WANT_PMAPLOAD)
+	jnz	_C_LABEL(fusuaddrfault)
 	movl	4(%esp),%edx
 	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
 	ja	_C_LABEL(fusuaddrfault)
@@ -1538,6 +1555,7 @@ ENTRY(suswintr)
  */
 /* LINTSTUB: Func: int subyte(void *base, int c) */
 ENTRY(subyte)
+	DO_DEFERRED_SWITCH(%eax)
 	movl	4(%esp),%edx
 	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
 	ja	_C_LABEL(fusuaddrfault)
@@ -1723,7 +1741,7 @@ ENTRY(cpu_switch)
 	 */
 
 	pushl	%esi
-	call	_C_LABEL(pmap_deactivate)	# pmap_deactivate(oldproc)
+	call	_C_LABEL(pmap_deactivate2)	# pmap_deactivate(oldproc)
 	addl	$4,%esp
 
 	movl	L_ADDR(%esi),%esi
@@ -1750,11 +1768,6 @@ ENTRY(cpu_switch)
 	movl	PCB_ESP(%edi),%esp
 	movl	PCB_EBP(%edi),%ebp
 
-
-	/* Switch address space. */
-	movl	PCB_CR3(%edi),%ecx
-	movl	%ecx,%cr3
-
 	/* Switch TSS. Reset "task busy" flag before loading. */
 #ifdef MULTIPROCESSOR
 	movl	CPUVAR(GDT),%eax
@@ -1873,7 +1886,7 @@ switch_resume:
 	 */
 
 	pushl	%esi
-	call	_C_LABEL(pmap_deactivate)	# pmap_deactivate(oldproc)
+	call	_C_LABEL(pmap_deactivate2)	# pmap_deactivate(oldproc)
 	addl	$4,%esp
 
 	movl	L_ADDR(%esi),%esi
@@ -2067,10 +2080,6 @@ ENTRY(cpu_exit)
 	movl	_C_LABEL(gdt),%eax
 #endif
 
-	/* Switch address space. */
-	movl	PCB_CR3(%esi),%ecx
-	movl	%ecx,%cr3
-
 	/* Switch TSS. */
 	andl	$~0x0200,4-SEL_KPL(%eax,%edx,1)
 	ltr	%dx
@@ -2135,6 +2144,12 @@ syscall1:
 	INTRENTRY
 
 #ifdef DIAGNOSTIC
+	cmpl	$0, CPUVAR(WANT_PMAPLOAD)
+	jz	1f
+	pushl	$6f
+	call	_C_LABEL(printf)
+	addl	$4, %esp
+1:
 	movl	CPUVAR(ILEVEL),%ebx
 	testl	%ebx,%ebx
 	jz	1f
@@ -2152,7 +2167,8 @@ syscall1:
 	pushl	%esp
 	call	*P_MD_SYSCALL(%edx)	# get pointer to syscall() function
 	addl	$4,%esp
-2:	/* Check for ASTs on exit to user mode. */
+syscall_checkast:
+	/* Check for ASTs on exit to user mode. */
 	cli
 	CHECK_ASTPENDING(%eax)
 	je	1f
@@ -2163,11 +2179,13 @@ syscall1:
 	pushl	%esp
 	call	_C_LABEL(trap)
 	addl	$4,%esp
-	jmp	2b
+	jmp	syscall_checkast	/* re-check ASTs */
+1:	CHECK_DEFERRED_SWITCH(%eax)
+	jnz	9f
 #ifndef DIAGNOSTIC
-1:	INTRFASTEXIT
+	INTRFASTEXIT
 #else /* DIAGNOSTIC */
-1:	cmpl	$IPL_NONE,CPUVAR(ILEVEL)
+	cmpl	$IPL_NONE,CPUVAR(ILEVEL)
 	jne	3f
 	INTRFASTEXIT
 3:	sti
@@ -2181,7 +2199,11 @@ syscall1:
 	jmp	2b
 4:	.asciz	"WARNING: SPL NOT LOWERED ON SYSCALL EXIT\n"
 5:	.asciz	"WARNING: SPL NOT ZERO ON SYSCALL ENTRY\n"	
+6:	.asciz	"WARNING: WANT PMAPLOAD ON SYSCALL ENTRY\n"	
 #endif /* DIAGNOSTIC */
+9:	sti
+	call	_C_LABEL(pmap_load)
+	jmp	syscall_checkast	/* re-check ASTs */
 
 #if NNPX > 0
 /*
Index: i386/trap.c
===================================================================
--- i386/trap.c	(revision 528)
+++ i386/trap.c	(revision 551)
@@ -609,6 +609,8 @@ copyfault:
 				vm->vm_ssize = nss;
 
 			if (type == T_PAGEFLT) {
+				if (curcpu()->ci_want_pmapload)
+					pmap_load();
 				KERNEL_UNLOCK();
 				return;
 			}
Index: i386/mach_sigcode.S
===================================================================
--- i386/mach_sigcode.S	(revision 528)
+++ i386/mach_sigcode.S	(revision 551)
@@ -125,4 +125,10 @@ IDTVEC(mach_trap)
 	call	_C_LABEL(trap)
 	addl	$4,%esp
 	jmp	2b
-1:	INTRFASTEXIT
+1:	CHECK_DEFERRED_SWITCH(%eax)
+	jnz	9f
+	INTRFASTEXIT
+9:	sti
+	call	_C_LABEL(pmap_load)
+	cli
+	jmp	2b
Index: i386/genassym.cf
===================================================================
--- i386/genassym.cf	(revision 528)
+++ i386/genassym.cf	(revision 551)
@@ -258,6 +258,7 @@ endif
 
 define	CPU_INFO_SELF		offsetof(struct cpu_info, ci_self)
 define	CPU_INFO_RESCHED	offsetof(struct cpu_info, ci_want_resched)
+define	CPU_INFO_WANT_PMAPLOAD	offsetof(struct cpu_info, ci_want_pmapload)
 define	CPU_INFO_CURLWP		offsetof(struct cpu_info, ci_curlwp)
 define	CPU_INFO_CURPCB		offsetof(struct cpu_info, ci_curpcb)
 define	CPU_INFO_IDLE_PCB	offsetof(struct cpu_info, ci_idle_pcb)

--NextPart-20040207190858-1090400--