Subject: Re: interrupt stack
To: None <port-i386@netbsd.org, port-xen@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: port-amd64
Date: 11/25/2006 00:06:19
--NextPart-20061125000507-1487002
Content-Type: Text/Plain; charset=us-ascii

> unless anyone objects, i'll make i386 and xen use interrupt stack.
> 
> the attached patch is not tested on amd64 yet.
> (amd64 part merely changes how to pass intrframe.)

oops, it was an old patch with known bugs.

the new one is here.

YAMAMOTO Takashi

--NextPart-20061125000507-1487002
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="i2.diff"

Index: amd64/amd64/vector.S
===================================================================
--- amd64/amd64/vector.S	(revision 1464)
+++ amd64/amd64/vector.S	(working copy)
@@ -360,6 +360,7 @@ IDTVEC(resume_lapic_ltimer)
 	movl	$IPL_CLOCK,CPUVAR(ILEVEL)
 	sti
 	pushq	%rbx
+	movq	%rsp,%rsi
 	xorq	%rdi,%rdi
 	call	_C_LABEL(lapic_clockintr)
 	jmp	_C_LABEL(Xdoreti)
@@ -415,6 +416,7 @@ IDTVEC(intr_/**/name/**/num)						;\
 	movl	IH_LEVEL(%rbx),%r12d					;\
 	cmpl	%r13d,%r12d						;\
 	jle	7f							;\
+	movq	%rsp,%rsi						;\
 	movq	IH_ARG(%rbx),%rdi					;\
 	movl	%r12d,CPUVAR(ILEVEL)					;\
 	call	*IH_FUN(%rbx)		/* call it */			;\
Index: i386/include/frameasm.h
===================================================================
--- i386/include/frameasm.h	(revision 1407)
+++ i386/include/frameasm.h	(working copy)
@@ -100,4 +100,32 @@
 				1:
 #define	CLEAR_ASTPENDING(reg)	movl	$0, P_MD_ASTPENDING(reg)
 
+/*
+ * IDEPTH_INCR:
+ * increase ci_idepth and switch to the interrupt stack if necessary.
+ * note that the initial value of ci_idepth is -1.
+ *
+ * => should be called with interrupt disabled.
+ * => save the old value of %esp in %eax.
+ */
+
+#define	IDEPTH_INCR \
+	incl	CPUVAR(IDEPTH); \
+	movl	%esp, %eax; \
+	jne	999f; \
+	movl	CPUVAR(INTRSTACK), %esp; \
+999:	pushl	%eax; \
+
+/*
+ * IDEPTH_DECR:
+ * decrement ci_idepth and switch back to
+ * the original stack saved by IDEPTH_INCR.
+ *
+ * => should be called with interrupt disabled.
+ */
+
+#define	IDEPTH_DECR \
+	popl	%esp; \
+	decl	CPUVAR(IDEPTH)
+
 #endif /* _I386_FRAMEASM_H_ */
Index: i386/include/i82093reg.h
===================================================================
--- i386/include/i82093reg.h	(revision 1407)
+++ i386/include/i82093reg.h	(working copy)
@@ -50,7 +50,8 @@
  * XXX this is not obvious
  */
 #define ioapic_unmask(num) \
-	cmpl    $IREENT_MAGIC,(TF_ERR+4)(%esp)			;\
+	movl    (%esp),%eax					;\
+	cmpl    $IREENT_MAGIC,(TF_ERR+4)(%eax)			;\
 	jne     79f						;\
 	movl	IS_PIC(%ebp),%edi				;\
 	ioapic_asm_lock(num)					;\
Index: i386/include/param.h
===================================================================
--- i386/include/param.h	(revision 1776)
+++ i386/include/param.h	(working copy)
@@ -112,6 +112,7 @@
 #endif /*NOREDZONE */
 #endif /* !defined(UPAGES) */
 #define	USPACE		(UPAGES * NBPG)	/* total size of u-area */
+#define	INTRSTACKSIZE	8192
 
 #ifndef MSGBUFSIZE
 #define MSGBUFSIZE	4*NBPG		/* default message buffer size */
Index: i386/include/cpu.h
===================================================================
--- i386/include/cpu.h	(revision 1785)
+++ i386/include/cpu.h	(working copy)
@@ -111,6 +111,7 @@ struct cpu_info {
 	int		ci_idepth;
 	uint32_t	ci_imask[NIPL];
 	uint32_t	ci_iunmask[NIPL];
+	void *		ci_intrstack;
 
 	paddr_t ci_idle_pcb_paddr;	/* PA of idle PCB */
 	uint32_t ci_flags;		/* flags; see below */
@@ -271,7 +272,7 @@ struct clockframe {
 #define	CLKF_USERMODE(frame)	USERMODE((frame)->cf_if.if_cs, (frame)->cf_if.if_eflags)
 #define	CLKF_BASEPRI(frame)	(0)
 #define	CLKF_PC(frame)		((frame)->cf_if.if_eip)
-#define	CLKF_INTR(frame)	(curcpu()->ci_idepth > 1)
+#define	CLKF_INTR(frame)	(curcpu()->ci_idepth > 0)
 
 /*
  * This is used during profiling to integrate system time.  It can safely
Index: i386/i386/spl.S
===================================================================
--- i386/i386/spl.S	(revision 1464)
+++ i386/i386/spl.S	(working copy)
@@ -102,10 +102,17 @@ IDTVEC(spllower)
 #else /* defined(DDB) || defined(GPROF) */
 	movl	16(%esp),%ebx
 #endif /* defined(DDB) || defined(GPROF) */
-	movl	$1f,%esi		# address to resume loop at
-1:	movl	%ebx,%eax		# get cpl
-	movl	CPUVAR(IUNMASK)(,%eax,4),%eax
+	movl	$.Lspllower_resume,%esi		# address to resume loop at
 	cli
+.Lspllower_resume:
+#if defined(DEBUG)
+	pushf
+	popl	%eax
+	testl	$PSL_I,%eax
+	jnz	.Lspllower_panic
+#endif /* defined(DEBUG) */
+	movl	%ebx,%eax		# get cpl
+	movl	CPUVAR(IUNMASK)(,%eax,4),%eax
 	andl	CPUVAR(IPENDING),%eax		# any non-masked bits left?
 	jz	2f
 	bsrl	%eax,%eax
@@ -122,6 +129,12 @@ IDTVEC(spllower)
 	leave
 #endif /* defined(DDB) || defined(GPROF) */
 	ret
+#if defined(DEBUG)
+.Lspllower_panic:
+	pushl	$1f
+	call	_C_LABEL(panic)
+1:	.asciz	"SPLLOWER: INTERRUPT ENABLED"
+#endif /* defined(DEBUG) */
 
 /*
  * Handle return from interrupt after device handler finishes.
@@ -130,14 +143,22 @@ IDTVEC(spllower)
  *   ebx - cpl to restore
  *   esi - address to resume loop at
  *   edi - scratch for Xsoftnet
+ *
+ * called with interrupt disabled.
  */
 IDTVEC(doreti)
+	IDEPTH_DECR
 	popl	%ebx			# get previous priority
-	decl	CPUVAR(IDEPTH)
-	movl	$1f,%esi		# address to resume loop at
-1:	movl	%ebx,%eax
+	movl	$.Ldoreti_resume,%esi	# address to resume loop at
+.Ldoreti_resume:
+#if defined(DEBUG)
+	pushf
+	popl	%eax
+	testl	$PSL_I,%eax
+	jnz	.Ldoreti_panic
+#endif /* defined(DEBUG) */
+	movl	%ebx,%eax
 	movl	CPUVAR(IUNMASK)(,%eax,4),%eax
-	cli
 	andl	CPUVAR(IPENDING),%eax
 	jz	2f
 	bsrl    %eax,%eax               # slow, but not worth optimizing
@@ -177,3 +198,9 @@ IDTVEC(doreti)
 	call	_C_LABEL(pmap_load)
 	cli
 	jmp	.Ldoreti_checkast	/* recheck ASTs */
+#if defined(DEBUG)
+.Ldoreti_panic:
+	pushl	$1f
+	call	_C_LABEL(panic)
+1:	.asciz	"DORETI: INTERRUPT ENABLED"
+#endif /* defined(DEBUG) */
Index: i386/i386/db_trace.c
===================================================================
--- i386/i386/db_trace.c	(revision 1912)
+++ i386/i386/db_trace.c	(working copy)
@@ -357,6 +357,26 @@ db_nextframe(
 	return 1;
 }
 
+static boolean_t
+db_intrstack_p(const void *vp)
+{
+	const struct cpu_info *ci;
+	CPU_INFO_ITERATOR cii;
+
+	for (CPU_INFO_FOREACH(cii, ci)) {
+		const char *cp = ci->ci_intrstack;
+
+		if (cp == NULL) {
+			continue;
+		}
+		if ((cp - INTRSTACKSIZE + 4) <= (const char *)vp &&
+		    (const char *)vp <= cp) {
+			return TRUE;
+		}
+	}
+	return FALSE;
+}
+
 void
 db_stack_trace_print(db_expr_t addr, boolean_t have_addr, db_expr_t count,
 		     const char *modif, void (*pr)(const char *, ...))
@@ -514,7 +534,10 @@ have_u:
 
 		if (INKERNEL((int)frame)) {
 			/* staying in kernel */
-			if (frame < lastframe ||
+			if (!db_intrstack_p(frame) &&
+			    db_intrstack_p(lastframe)) {
+				(*pr)("--- switch to interrupt stack ---\n");
+			} else if (frame < lastframe ||
 			    (frame == lastframe && callpc == lastcallpc)) {
 				(*pr)("Bad frame pointer: %p\n", frame);
 				break;
Index: i386/i386/vector.S
===================================================================
--- i386/i386/vector.S	(revision 1464)
+++ i386/i386/vector.S	(working copy)
@@ -165,11 +165,12 @@ IDTVEC(intr_lapic_ipi)
 	jae	2f
 IDTVEC(resume_lapic_ipi)
 1:
-	incl	CPUVAR(IDEPTH)
-	movl	$IPL_IPI,CPUVAR(ILEVEL)
-        sti
 	pushl	%ebx
+	IDEPTH_INCR
+	movl	$IPL_IPI,CPUVAR(ILEVEL)
+	sti
 	call	_C_LABEL(x86_ipi_handler)
+	cli
 	jmp	_C_LABEL(Xdoreti)
 2:
 	orl	$(1 << LIR_IPI),CPUVAR(IPENDING)
@@ -215,13 +216,14 @@ IDTVEC(intr_lapic_ltimer)
 	jae	2f
 IDTVEC(resume_lapic_ltimer)
 1:
-	incl	CPUVAR(IDEPTH)
+	pushl	%ebx
+	IDEPTH_INCR
 	movl	$IPL_CLOCK,CPUVAR(ILEVEL)
 	sti
-	pushl	%ebx
 	pushl	$0
 	call	_C_LABEL(lapic_clockintr)
 	addl	$4,%esp		
+	cli
 	jmp	_C_LABEL(Xdoreti)
 2:
 	orl	$(1 << LIR_TIMER),CPUVAR(IPENDING)
@@ -270,8 +272,8 @@ IDTVEC(intr_/**/name/**/num)						;\
 1:									\
 	pushl	%esi							;\
 	movl	%ebx,CPUVAR(ILEVEL)					;\
+	IDEPTH_INCR							;\
 	sti								;\
-	incl	CPUVAR(IDEPTH)						;\
 	movl	IS_HANDLERS(%ebp),%ebx					;\
 6:									\
 	movl	IH_LEVEL(%ebx),%edi					;\
@@ -285,29 +287,26 @@ IDTVEC(intr_/**/name/**/num)						;\
 	addl	$4,%esp			/* toss the arg */		;\
 	testl	%ebx,%ebx						;\
 	jnz	6b							;\
-5:									\
 	cli								;\
 	unmask(num)			/* unmask it in hardware */	;\
 	late_ack(num)							;\
-	sti								;\
 	jmp	_C_LABEL(Xdoreti)	/* lower spl and do ASTs */	;\
 7:									\
 	cli								;\
 	orl     $(1 << num),CPUVAR(IPENDING)				;\
 	level_mask(num)							;\
 	late_ack(num)							;\
-	sti								;\
 	jmp	_C_LABEL(Xdoreti)	/* lower spl and do ASTs */	;\
 10:									\
-	cli								;\
 	orl     $(1 << num),CPUVAR(IPENDING)				;\
 	level_mask(num)							;\
 	late_ack(num)							;\
-	sti								;\
 	INTRFASTEXIT							;\
 9:									\
+	pushl	%esp			/* for unmask */		;\
 	unmask(num)							;\
 	late_ack(num)							;\
+	addl	$4,%esp							;\
 	INTRFASTEXIT
 
 #define ICUADDR IO_ICU1
@@ -595,8 +594,8 @@ _C_LABEL(ioapic_level_stubs):
 
 IDTVEC(softserial)
 	movl	$IPL_SOFTSERIAL, CPUVAR(ILEVEL)
+	IDEPTH_INCR
 	sti
-	incl	CPUVAR(IDEPTH)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintlock)
 #endif
@@ -609,13 +608,14 @@ IDTVEC(softserial)
 #ifdef MULTIPROCESSOR	
 	call	_C_LABEL(x86_softintunlock)
 #endif
-	decl	CPUVAR(IDEPTH)
+	cli
+	IDEPTH_DECR
 	jmp	*%esi
 
 IDTVEC(softnet)
 	movl	$IPL_SOFTNET, CPUVAR(ILEVEL)
+	IDEPTH_INCR
 	sti
-	incl	CPUVAR(IDEPTH)
 #ifdef MULTIPROCESSOR	
 	call	_C_LABEL(x86_softintlock)
 #endif
@@ -641,13 +641,14 @@ IDTVEC(softnet)
 #ifdef MULTIPROCESSOR	
 	call	_C_LABEL(x86_softintunlock)	
 #endif
-	decl	CPUVAR(IDEPTH)
+	cli
+	IDEPTH_DECR
 	jmp	*%esi
 
 IDTVEC(softclock)
 	movl	$IPL_SOFTCLOCK, CPUVAR(ILEVEL)
+	IDEPTH_INCR
 	sti
-	incl	CPUVAR(IDEPTH)
 #ifdef MULTIPROCESSOR	
 	call	_C_LABEL(x86_softintlock)
 #endif
@@ -661,7 +662,8 @@ IDTVEC(softclock)
 #ifdef MULTIPROCESSOR	
 	call	_C_LABEL(x86_softintunlock)		
 #endif
-	decl	CPUVAR(IDEPTH)
+	cli
+	IDEPTH_DECR
 	jmp	*%esi
 
 /*
@@ -766,9 +768,10 @@ IDTVEC(trap10)
 	INTRENTRY
 	pushl	CPUVAR(ILEVEL)
 	pushl	%esp
+	pushl	$0			# dummy arg
 	incl	_C_LABEL(uvmexp)+V_TRAP
 	call	_C_LABEL(npxintr)
-	addl	$8,%esp
+	addl	$12,%esp
 	INTRFASTEXIT
 #else
 	ZTRAP(T_ARITHTRAP)
Index: i386/i386/genassym.cf
===================================================================
--- i386/i386/genassym.cf	(revision 1728)
+++ i386/i386/genassym.cf	(working copy)
@@ -304,6 +304,7 @@ define	CPU_INFO_IUNMASK	offsetof(struct 
 define	CPU_INFO_ILEVEL		offsetof(struct cpu_info, ci_ilevel)
 define	CPU_INFO_IDEPTH		offsetof(struct cpu_info, ci_idepth)
 define	CPU_INFO_ISOURCES	offsetof(struct cpu_info, ci_isources)
+define	CPU_INFO_INTRSTACK	offsetof(struct cpu_info, ci_intrstack)
 
 if NIOAPIC > 0
 define		IOAPIC_SC_REG		offsetof(struct ioapic_softc, sc_reg)
@@ -366,3 +367,4 @@ define	PSL_AC			PSL_AC
 define	PSL_MBO			PSL_MBO
 define	PSL_ID			PSL_ID
 define	PSL_VM			PSL_VM
+define	PSL_I			PSL_I
Index: i386/isa/npxvar.h
===================================================================
--- i386/isa/npxvar.h	(revision 1885)
+++ i386/isa/npxvar.h	(working copy)
@@ -87,4 +87,4 @@ struct npx_softc {
 
 enum npx_type npxprobe1(bus_space_tag_t, bus_space_handle_t, int);
 void npxattach(struct npx_softc *);
-int npxintr(void *, struct intrframe);
+int npxintr(void *, struct intrframe *);
Index: i386/isa/npx.c
===================================================================
--- i386/isa/npx.c	(revision 1914)
+++ i386/isa/npx.c	(working copy)
@@ -345,12 +345,11 @@ npxattach(struct npx_softc *sc)
  * IRQ13 exception handling makes exceptions even less precise than usual.
  */
 int
-npxintr(void *arg, struct intrframe iframe)
+npxintr(void *arg, struct intrframe *frame)
 {
 	struct cpu_info *ci = curcpu();
 	struct lwp *l = ci->ci_fpcurlwp;
 	union savefpu *addr;
-	struct intrframe *frame = &iframe;
 	struct npx_softc *sc;
 	ksiginfo_t ksi;
 
Index: x86/x86/intr.c
===================================================================
--- x86/x86/intr.c	(revision 1728)
+++ x86/x86/intr.c	(working copy)
@@ -119,6 +119,8 @@ __KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.2
 #include <sys/proc.h>
 #include <sys/errno.h>
 
+#include <uvm/uvm_extern.h>
+
 #include <machine/atomic.h>
 #include <machine/i8259.h>
 #include <machine/cpu.h>
@@ -818,6 +820,9 @@ cpu_intr_init(struct cpu_info *ci)
 #if NLAPIC > 0 && defined(MULTIPROCESSOR)
 	int i;
 #endif
+#if defined(INTRSTACKSIZE)
+	char *cp;
+#endif /* defined(INTRSTACKSIZE) */
 
 	MALLOC(isp, struct intrsource *, sizeof (struct intrsource), M_DEVBUF,
 	    M_WAITOK|M_ZERO);
@@ -892,6 +897,11 @@ cpu_intr_init(struct cpu_info *ci)
 
 	intr_calculatemasks(ci);
 
+#if defined(INTRSTACKSIZE)
+	cp = (char *)uvm_km_alloc(kernel_map, INTRSTACKSIZE, 0, UVM_KMF_WIRED);
+	ci->ci_intrstack = cp + INTRSTACKSIZE - sizeof(register_t);
+	ci->ci_idepth = -1;
+#endif /* defined(INTRSTACKSIZE) */
 }
 
 #ifdef MULTIPROCESSOR
Index: x86/x86/lapic.c
===================================================================
--- x86/x86/lapic.c	(revision 1915)
+++ x86/x86/lapic.c	(working copy)
@@ -78,7 +78,7 @@ __KERNEL_RCSID(0, "$NetBSD: lapic.c,v 1.
 void		lapic_delay(int);
 void		lapic_microtime(struct timeval *);
 static u_int32_t lapic_gettick(void);
-void		lapic_clockintr(void *, struct intrframe);
+void		lapic_clockintr(void *, struct intrframe *);
 static void 	lapic_map(paddr_t);
 
 static void lapic_hwmask(struct pic *, int);
@@ -238,7 +238,7 @@ u_int64_t lapic_frac_cycle_per_usec;
 u_int32_t lapic_delaytab[26];
 
 void
-lapic_clockintr(void *arg, struct intrframe frame)
+lapic_clockintr(void *arg, struct intrframe *frame)
 {
 #if defined(I586_CPU) || defined(I686_CPU) || defined(__x86_64__)
 #ifndef __HAVE_TIMECOUNTER
@@ -333,7 +333,7 @@ lapic_clockintr(void *arg, struct intrfr
 #endif /* !__HAVE_TIMECOUNTER */
 #endif /* I586_CPU || I686_CPU || __x86_64__ */
 
-	hardclock((struct clockframe *)&frame);
+	hardclock((struct clockframe *)frame);
 }
 
 #if !defined(__HAVE_TIMECOUNTER) && defined(NTP)
Index: x86/isa/clock.c
===================================================================
--- x86/isa/clock.c	(revision 1915)
+++ x86/isa/clock.c	(working copy)
@@ -192,7 +192,7 @@ int		gettick(void);
 void		sysbeep(int, int);
 static void     tickle_tc(void);
 
-static int	clockintr(void *, struct intrframe);
+static int	clockintr(void *, struct intrframe *);
 static void	rtcinit(void);
 static int	rtcget(mc_todregs *);
 static void	rtcput(mc_todregs *);
@@ -404,11 +404,11 @@ tickle_tc(void) 
 }
 
 static int
-clockintr(void *arg, struct intrframe frame)
+clockintr(void *arg, struct intrframe *frame)
 {
 	tickle_tc();
 
-	hardclock((struct clockframe *)&frame);
+	hardclock((struct clockframe *)frame);
 
 #if NMCA > 0
 	if (MCA_system) {
Index: xen/include/cpu.h
===================================================================
--- xen/include/cpu.h	(revision 1571)
+++ xen/include/cpu.h	(working copy)
@@ -114,6 +114,7 @@ struct cpu_info {
 	u_int32_t	ci_imask[NIPL];
 #endif
 	u_int32_t	ci_iunmask[NIPL];
+	void *		ci_intrstack;
 
 	paddr_t ci_idle_pcb_paddr;	/* PA of idle PCB */
 	u_int32_t ci_flags;		/* flags; see below */
@@ -271,7 +272,7 @@ struct clockframe {
 #define	CLKF_USERMODE(frame)	USERMODE((frame)->cf_if.if_cs, (frame)->cf_if.if_eflags)
 #define	CLKF_BASEPRI(frame)	(0)
 #define	CLKF_PC(frame)		((frame)->cf_if.if_eip)
-#define	CLKF_INTR(frame)	(curcpu()->ci_idepth > 1)
+#define	CLKF_INTR(frame)	(curcpu()->ci_idepth > 0)
 
 /*
  * This is used during profiling to integrate system time.  It can safely
Index: xen/include/evtchn.h
===================================================================
--- xen/include/evtchn.h	(revision 1799)
+++ xen/include/evtchn.h	(working copy)
@@ -41,6 +41,7 @@ extern struct evtsource *evtsource[];
 void events_default_setup(void);
 void init_events(void);
 unsigned int evtchn_do_event(int, struct intrframe *);
+void call_evtchn_do_event(int, struct intrframe *);
 int event_set_handler(int, int (*func)(void *), void *, int, const char *);
 int event_remove_handler(int, int (*func)(void *), void *);
 
Index: xen/x86/intr.c
===================================================================
--- xen/x86/intr.c	(revision 1848)
+++ xen/x86/intr.c	(working copy)
@@ -120,6 +120,8 @@ __KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.1
 #include <sys/proc.h>
 #include <sys/errno.h>
 
+#include <uvm/uvm_extern.h>
+
 #include <machine/atomic.h>
 #include <machine/i8259.h>
 #include <machine/cpu.h>
@@ -206,6 +208,7 @@ void
 cpu_intr_init(struct cpu_info *ci)
 {
 	struct iplsource *ipl;
+	char *cp;
 	int i;
 
 	ci->ci_iunmask[0] = 0xfffffffe;
@@ -261,6 +264,10 @@ cpu_intr_init(struct cpu_info *ci)
 	evcnt_attach_dynamic(&softxenevt_evtcnt, EVCNT_TYPE_INTR, NULL,
 	    ci->ci_dev->dv_xname, "xenevt");
 #endif /* defined(DOM0OPS) */
+
+	cp = (char *)uvm_km_alloc(kernel_map, INTRSTACKSIZE, 0, UVM_KMF_WIRED);
+	ci->ci_intrstack = cp + INTRSTACKSIZE - sizeof(register_t);
+	ci->ci_idepth = -1;
 }
 
 #if NPCI > 0 || NISA > 0
Index: xen/i386/npx.c
===================================================================
--- xen/i386/npx.c	(revision 1609)
+++ xen/i386/npx.c	(working copy)
@@ -368,12 +368,11 @@ npxattach(struct npx_softc *sc)
  * IRQ13 exception handling makes exceptions even less precise than usual.
  */
 int
-npxintr(void *arg, struct intrframe iframe)
+npxintr(void *arg, struct intrframe *frame)
 {
 	struct cpu_info *ci = curcpu();
 	struct lwp *l = ci->ci_fpcurlwp;
 	union savefpu *addr;
-	struct intrframe *frame = &iframe;
 	struct npx_softc *sc;
 	ksiginfo_t ksi;
 
Index: xen/i386/spl.S
===================================================================
--- xen/i386/spl.S	(revision 1464)
+++ xen/i386/spl.S	(working copy)
@@ -105,11 +105,13 @@ IDTVEC(spllower)
 #else /* defined(DDB) || defined(GPROF) */
 	movl	20(%esp),%ebx
 #endif /* defined(DDB) || defined(GPROF) */
-	movl	$1f,%esi		# address to resume loop at
-1:	movl	%ebx,%eax		# get cpl
+	movl	$.Lspllower_resume,%esi	# address to resume loop at
+1:
+	CLI(%eax)
+.Lspllower_resume:
+	movl	%ebx,%eax		# get cpl
 	movl	CPUVAR(IUNMASK)(,%eax,4),%eax
-	CLI(%ecx)
-	andl	CPUVAR(IPENDING),%eax		# any non-masked bits left?
+	andl	CPUVAR(IPENDING),%eax	# any non-masked bits left?
 	jz	2f
 	bsrl	%eax,%eax
 	btrl	%eax,CPUVAR(IPENDING)
@@ -138,15 +140,16 @@ IDTVEC(spllower)
  *   ebx - cpl to restore
  *   esi - address to resume loop at
  *   edi - scratch for Xsoftnet
+ *
+ * called with interrupt disabled.
  */
 IDTVEC(doreti)
+	IDEPTH_DECR
 	popl	%ebx			# get previous priority
-	decl	CPUVAR(IDEPTH)
-	movl	$1f,%esi		# address to resume loop at
-8:
-1:	movl	%ebx,%eax
+	movl	$.Ldoreti_resume,%esi	# address to resume loop at
+.Ldoreti_resume:
+	movl	%ebx,%eax
 	movl	CPUVAR(IUNMASK)(,%eax,4),%eax
-	CLI(%ecx)
 	andl	CPUVAR(IPENDING),%eax
 	jz	2f
 	bsrl    %eax,%eax               # slow, but not worth optimizing
@@ -176,7 +179,7 @@ doreti_checkast:
 	call	_C_LABEL(trap)
 	addl	$4,%esp
 	CLI(%ecx)
-	jmp	5b
+	jmp	.Ldoreti_resume
 3:
 	CHECK_DEFERRED_SWITCH(%eax)
 	jnz	9f
@@ -184,7 +187,8 @@ doreti_checkast:
     	jz	4f
 	call	_C_LABEL(stipending)
 	testl	%eax,%eax
-	jnz	8b
+	CLI(%eax)
+	jnz	.Ldoreti_resume
 4:
 	INTRFASTEXIT
 9:
@@ -192,3 +196,19 @@ doreti_checkast:
 	call	_C_LABEL(pmap_load)
 	CLI(%ecx)
 	jmp	doreti_checkast	/* recheck ASTs */
+
+/*
+ * void evtchn_do_event(int evtch, struct intrframe *regs)
+ */
+
+NENTRY(call_evtchn_do_event)
+	IDEPTH_INCR
+	/*
+	 * IDEPTH_INCR leaves old %esp in %eax.
+	 */
+	pushl	8(%eax)	/* regs */
+	pushl	4(%eax)	/* evtch */
+	call	_C_LABEL(evtchn_do_event)
+	addl	$8, %esp
+	IDEPTH_DECR
+	ret
Index: xen/i386/vector.S
===================================================================
--- xen/i386/vector.S	(revision 1540)
+++ xen/i386/vector.S	(working copy)
@@ -202,15 +202,14 @@ IDTVEC(resume_/**/name/**/num)						\
 1:									\
 	pushl	%esi							;\
 	movl	$num,CPUVAR(ILEVEL)					;\
+	IDEPTH_INCR /* leaves old %esp on stack	*/			;\
 	STI(%eax)							;\
-	incl	CPUVAR(IDEPTH)						;\
 	movl	IS_HANDLERS(%ebp),%ebx					;\
 	LOCK_KERNEL							;\
 6:									\
-	pushl	%esp							;\
 	pushl	IH_ARG(%ebx)						;\
 	call	*IH_FUN(%ebx)		/* call it */			;\
-	addl	$8,%esp			/* toss the arg */		;\
+	addl	$4,%esp			/* toss the arg */		;\
 	movl	IH_IPL_NEXT(%ebx),%ebx	/* next handler in chain */	;\
 	testl	%ebx,%ebx						;\
 	jnz	6b							;\
@@ -219,7 +218,6 @@ IDTVEC(resume_/**/name/**/num)						\
 	CLI(%eax)							;\
 	unmask(num)			/* unmask it in hardware */	;\
 	late_ack(num)							;\
-	STI(%eax)							;\
 	jmp	_C_LABEL(Xdoreti)	/* lower spl and do ASTs */	;\
 
 # Just unmasking the event isn't enouth, we also need to
@@ -306,8 +304,8 @@ _C_LABEL(xenev_stubs):
 #define EVCNTHI(x) _C_LABEL(x) + EV_EVCNTHI
 IDTVEC(softserial)
 	movl	$IPL_SOFTSERIAL, CPUVAR(ILEVEL)
+	IDEPTH_INCR
 	STI(%eax)
-	incl	CPUVAR(IDEPTH)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintlock)
 #endif
@@ -319,13 +317,14 @@ IDTVEC(softserial)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintunlock)
 #endif
-	decl	CPUVAR(IDEPTH)
+	CLI(%eax)
+	IDEPTH_DECR
 	jmp	*%esi
 
 IDTVEC(softnet)
 	movl	$IPL_SOFTNET, CPUVAR(ILEVEL)
+	IDEPTH_INCR
 	STI(%eax)
-	incl	CPUVAR(IDEPTH)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintlock)
 #endif
@@ -349,13 +348,14 @@ IDTVEC(softnet)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintunlock)
 #endif
-	decl	CPUVAR(IDEPTH)
+	CLI(%eax)
+	IDEPTH_DECR
 	jmp	*%esi
 
 IDTVEC(softclock)
 	movl	$IPL_SOFTCLOCK, CPUVAR(ILEVEL)
+	IDEPTH_INCR
 	STI(%eax)
-	incl	CPUVAR(IDEPTH)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintlock)
 #endif
@@ -368,14 +368,15 @@ IDTVEC(softclock)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintunlock)
 #endif
-	decl	CPUVAR(IDEPTH)
+	CLI(%eax)
+	IDEPTH_DECR
 	jmp	*%esi
 
 #if defined(DOM0OPS)
 IDTVEC(softxenevt)
 	movl	$IPL_SOFTXENEVT, CPUVAR(ILEVEL)
+	IDEPTH_INCR
 	STI(%eax)
-	incl	CPUVAR(IDEPTH)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintlock)
 #endif
@@ -385,7 +386,8 @@ IDTVEC(softxenevt)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintunlock)
 #endif
-	decl	CPUVAR(IDEPTH)
+	CLI(%eax)
+	IDEPTH_DECR
 	jmp	*%esi
 #endif /* defined(DOM0OPS) */
 
@@ -486,9 +488,10 @@ IDTVEC(trap10)
 	INTRENTRY
 	pushl	CPUVAR(ILEVEL)
 	pushl	%esp
+	pushl	$0			# dummy arg
 	incl	_C_LABEL(uvmexp)+V_TRAP
 	call	_C_LABEL(npxintr)
-	addl	$8,%esp
+	addl	$12,%esp
 	INTRFASTEXIT
 #else
 	ZTRAP(T_ARITHTRAP)
Index: xen/i386/hypervisor_machdep.c
===================================================================
--- xen/i386/hypervisor_machdep.c	(revision 1540)
+++ xen/i386/hypervisor_machdep.c	(working copy)
@@ -215,7 +215,7 @@ do_hypervisor_callback(struct intrframe 
 					printf("do_hypervisor_callback event %d\n", port);
 #endif
 				if (evtsource[port])
-					evtchn_do_event(port, regs);
+					call_evtchn_do_event(port, regs);
 #ifdef DOM0OPS
 				else
 					xenevt_event(port);
Index: xen/i386/genassym.cf
===================================================================
--- xen/i386/genassym.cf	(revision 1657)
+++ xen/i386/genassym.cf	(working copy)
@@ -271,6 +271,7 @@ define	CPU_INFO_IUNMASK	offsetof(struct 
 define	CPU_INFO_ILEVEL		offsetof(struct cpu_info, ci_ilevel)
 define	CPU_INFO_IDEPTH		offsetof(struct cpu_info, ci_idepth)
 define	CPU_INFO_ISOURCES	offsetof(struct cpu_info, ci_isources)
+define	CPU_INFO_INTRSTACK	offsetof(struct cpu_info, ci_intrstack)
 
 define	SIZEOF_CPU_INFO		sizeof(struct cpu_info)
 
Index: xen/xen/evtchn.c
===================================================================
--- xen/xen/evtchn.c	(revision 1799)
+++ xen/xen/evtchn.c	(working copy)
@@ -234,7 +234,6 @@ evtchn_do_event(int evtch, struct intrfr
 	ci->ci_ilevel = evtsource[evtch]->ev_maxlevel;
 	iplmask = evtsource[evtch]->ev_imask;
 	sti();
-	ci->ci_idepth++;
 #ifdef MULTIPROCESSOR
 	x86_intlock(regs);
 #endif
@@ -252,7 +251,6 @@ evtchn_do_event(int evtch, struct intrfr
 			hypervisor_set_ipending(iplmask,
 			    evtch / 32, evtch % 32);
 			/* leave masked */
-			ci->ci_idepth--;
 			splx(ilevel);
 			return 0;
 		}
@@ -267,7 +265,6 @@ evtchn_do_event(int evtch, struct intrfr
 	x86_intunlock(regs);
 #endif
 	hypervisor_enable_event(evtch);
-	ci->ci_idepth--;
 	splx(ilevel);
 
 	return 0;

--NextPart-20061125000507-1487002--