Subject: interrupt stack
To: None <port-i386@netbsd.org, port-xen@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: port-xen
Date: 11/24/2006 05:05:05
--NextPart-20061124050246-1487001
Content-Type: Text/Plain; charset=us-ascii

unless anyone objects, i'll make i386 and xen use interrupt stack.

the attached patch is not tested on amd64 yet.
(amd64 part merely changes how to pass intrframe.)

YAMAMOTO Takashi

--NextPart-20061124050246-1487001
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="i.diff"

Index: amd64/amd64/vector.S
===================================================================
--- amd64/amd64/vector.S	(revision 1464)
+++ amd64/amd64/vector.S	(working copy)
@@ -360,6 +360,7 @@ IDTVEC(resume_lapic_ltimer)
 	movl	$IPL_CLOCK,CPUVAR(ILEVEL)
 	sti
 	pushq	%rbx
+	movq	%rsp,%rsi
 	xorq	%rdi,%rdi
 	call	_C_LABEL(lapic_clockintr)
 	jmp	_C_LABEL(Xdoreti)
@@ -415,6 +416,7 @@ IDTVEC(intr_/**/name/**/num)						;\
 	movl	IH_LEVEL(%rbx),%r12d					;\
 	cmpl	%r13d,%r12d						;\
 	jle	7f							;\
+	movq	%rsp,%rsi						;\
 	movq	IH_ARG(%rbx),%rdi					;\
 	movl	%r12d,CPUVAR(ILEVEL)					;\
 	call	*IH_FUN(%rbx)		/* call it */			;\
Index: i386/include/frameasm.h
===================================================================
--- i386/include/frameasm.h	(revision 1407)
+++ i386/include/frameasm.h	(working copy)
@@ -100,4 +100,32 @@
 				1:
 #define	CLEAR_ASTPENDING(reg)	movl	$0, P_MD_ASTPENDING(reg)
 
+/*
+ * IDEPTH_INCR:
+ * increase ci_idepth and switch to the interrupt stack if necessary.
+ * note that the initial value of ci_idepth is -1.
+ *
+ * => should be called with interrupt disabled.
+ * => save the old value of %esp in %eax.
+ */
+
+#define	IDEPTH_INCR \
+	incl	CPUVAR(IDEPTH); \
+	movl	%esp, %eax; \
+	jne	999f; \
+	movl	CPUVAR(INTRSTACK), %esp; \
+999:	pushl	%eax; \
+
+/*
+ * IDEPTH_DECR:
+ * decrement ci_idepth and switch back to
+ * the original stack saved by IDEPTH_INCR.
+ *
+ * => should be called with interrupt disabled.
+ */
+
+#define	IDEPTH_DECR \
+	popl	%esp; \
+	decl	CPUVAR(IDEPTH)
+
 #endif /* _I386_FRAMEASM_H_ */
Index: i386/include/param.h
===================================================================
--- i386/include/param.h	(revision 1776)
+++ i386/include/param.h	(working copy)
@@ -112,6 +112,7 @@
 #endif /*NOREDZONE */
 #endif /* !defined(UPAGES) */
 #define	USPACE		(UPAGES * NBPG)	/* total size of u-area */
+#define	INTRSTACKSIZE	8192
 
 #ifndef MSGBUFSIZE
 #define MSGBUFSIZE	4*NBPG		/* default message buffer size */
Index: i386/include/cpu.h
===================================================================
--- i386/include/cpu.h	(revision 1785)
+++ i386/include/cpu.h	(working copy)
@@ -111,6 +111,7 @@ struct cpu_info {
 	int		ci_idepth;
 	uint32_t	ci_imask[NIPL];
 	uint32_t	ci_iunmask[NIPL];
+	void *		ci_intrstack;
 
 	paddr_t ci_idle_pcb_paddr;	/* PA of idle PCB */
 	uint32_t ci_flags;		/* flags; see below */
@@ -271,7 +272,7 @@ struct clockframe {
 #define	CLKF_USERMODE(frame)	USERMODE((frame)->cf_if.if_cs, (frame)->cf_if.if_eflags)
 #define	CLKF_BASEPRI(frame)	(0)
 #define	CLKF_PC(frame)		((frame)->cf_if.if_eip)
-#define	CLKF_INTR(frame)	(curcpu()->ci_idepth > 1)
+#define	CLKF_INTR(frame)	(curcpu()->ci_idepth > 0)
 
 /*
  * This is used during profiling to integrate system time.  It can safely
Index: i386/i386/spl.S
===================================================================
--- i386/i386/spl.S	(revision 1464)
+++ i386/i386/spl.S	(working copy)
@@ -102,10 +102,11 @@ IDTVEC(spllower)
 #else /* defined(DDB) || defined(GPROF) */
 	movl	16(%esp),%ebx
 #endif /* defined(DDB) || defined(GPROF) */
-	movl	$1f,%esi		# address to resume loop at
-1:	movl	%ebx,%eax		# get cpl
-	movl	CPUVAR(IUNMASK)(,%eax,4),%eax
+	movl	$.Lspllower_resume,%esi		# address to resume loop at
 	cli
+.Lspllower_resume:
+	movl	%ebx,%eax		# get cpl
+	movl	CPUVAR(IUNMASK)(,%eax,4),%eax
 	andl	CPUVAR(IPENDING),%eax		# any non-masked bits left?
 	jz	2f
 	bsrl	%eax,%eax
@@ -130,14 +131,16 @@ IDTVEC(spllower)
  *   ebx - cpl to restore
  *   esi - address to resume loop at
  *   edi - scratch for Xsoftnet
+ *
+ * called with interrupt disabled.
  */
 IDTVEC(doreti)
+	IDEPTH_DECR
 	popl	%ebx			# get previous priority
-	decl	CPUVAR(IDEPTH)
-	movl	$1f,%esi		# address to resume loop at
-1:	movl	%ebx,%eax
+	movl	$.Ldoreti_resume,%esi	# address to resume loop at
+.Ldoreti_resume:
+	movl	%ebx,%eax
 	movl	CPUVAR(IUNMASK)(,%eax,4),%eax
-	cli
 	andl	CPUVAR(IPENDING),%eax
 	jz	2f
 	bsrl    %eax,%eax               # slow, but not worth optimizing
Index: i386/i386/db_trace.c
===================================================================
--- i386/i386/db_trace.c	(revision 1912)
+++ i386/i386/db_trace.c	(working copy)
@@ -357,6 +357,26 @@ db_nextframe(
 	return 1;
 }
 
+static boolean_t
+db_intrstack_p(const void *vp)
+{
+	const struct cpu_info *ci;
+	CPU_INFO_ITERATOR cii;
+
+	for (CPU_INFO_FOREACH(cii, ci)) {
+		const char *cp = ci->ci_intrstack;
+
+		if (cp == NULL) {
+			continue;
+		}
+		if ((cp - INTRSTACKSIZE + 4) <= (const char *)vp &&
+		    (const char *)vp <= cp) {
+			return TRUE;
+		}
+	}
+	return FALSE;
+}
+
 void
 db_stack_trace_print(db_expr_t addr, boolean_t have_addr, db_expr_t count,
 		     const char *modif, void (*pr)(const char *, ...))
@@ -514,7 +534,10 @@ have_u:
 
 		if (INKERNEL((int)frame)) {
 			/* staying in kernel */
-			if (frame < lastframe ||
+			if (!db_intrstack_p(frame) &&
+			    db_intrstack_p(lastframe)) {
+				(*pr)("--- switch to interrupt stack ---\n");
+			} else if (frame < lastframe ||
 			    (frame == lastframe && callpc == lastcallpc)) {
 				(*pr)("Bad frame pointer: %p\n", frame);
 				break;
Index: i386/i386/vector.S
===================================================================
--- i386/i386/vector.S	(revision 1464)
+++ i386/i386/vector.S	(working copy)
@@ -165,11 +165,12 @@ IDTVEC(intr_lapic_ipi)
 	jae	2f
 IDTVEC(resume_lapic_ipi)
 1:
-	incl	CPUVAR(IDEPTH)
-	movl	$IPL_IPI,CPUVAR(ILEVEL)
-        sti
 	pushl	%ebx
+	IDEPTH_INCR
+	movl	$IPL_IPI,CPUVAR(ILEVEL)
+	sti
 	call	_C_LABEL(x86_ipi_handler)
+	cli
 	jmp	_C_LABEL(Xdoreti)
 2:
 	orl	$(1 << LIR_IPI),CPUVAR(IPENDING)
@@ -215,13 +216,14 @@ IDTVEC(intr_lapic_ltimer)
 	jae	2f
 IDTVEC(resume_lapic_ltimer)
 1:
-	incl	CPUVAR(IDEPTH)
+	pushl	%ebx
+	IDEPTH_INCR
 	movl	$IPL_CLOCK,CPUVAR(ILEVEL)
 	sti
-	pushl	%ebx
 	pushl	$0
 	call	_C_LABEL(lapic_clockintr)
 	addl	$4,%esp		
+	cli
 	jmp	_C_LABEL(Xdoreti)
 2:
 	orl	$(1 << LIR_TIMER),CPUVAR(IPENDING)
@@ -270,8 +272,8 @@ IDTVEC(intr_/**/name/**/num)						;\
 1:									\
 	pushl	%esi							;\
 	movl	%ebx,CPUVAR(ILEVEL)					;\
+	IDEPTH_INCR							;\
 	sti								;\
-	incl	CPUVAR(IDEPTH)						;\
 	movl	IS_HANDLERS(%ebp),%ebx					;\
 6:									\
 	movl	IH_LEVEL(%ebx),%edi					;\
@@ -289,14 +291,12 @@ IDTVEC(intr_/**/name/**/num)						;\
 	cli								;\
 	unmask(num)			/* unmask it in hardware */	;\
 	late_ack(num)							;\
-	sti								;\
 	jmp	_C_LABEL(Xdoreti)	/* lower spl and do ASTs */	;\
 7:									\
 	cli								;\
 	orl     $(1 << num),CPUVAR(IPENDING)				;\
 	level_mask(num)							;\
 	late_ack(num)							;\
-	sti								;\
 	jmp	_C_LABEL(Xdoreti)	/* lower spl and do ASTs */	;\
 10:									\
 	cli								;\
@@ -595,8 +595,8 @@ _C_LABEL(ioapic_level_stubs):
 
 IDTVEC(softserial)
 	movl	$IPL_SOFTSERIAL, CPUVAR(ILEVEL)
+	IDEPTH_INCR
 	sti
-	incl	CPUVAR(IDEPTH)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintlock)
 #endif
@@ -609,13 +609,14 @@ IDTVEC(softserial)
 #ifdef MULTIPROCESSOR	
 	call	_C_LABEL(x86_softintunlock)
 #endif
-	decl	CPUVAR(IDEPTH)
+	cli
+	IDEPTH_DECR
 	jmp	*%esi
 
 IDTVEC(softnet)
 	movl	$IPL_SOFTNET, CPUVAR(ILEVEL)
+	IDEPTH_INCR
 	sti
-	incl	CPUVAR(IDEPTH)
 #ifdef MULTIPROCESSOR	
 	call	_C_LABEL(x86_softintlock)
 #endif
@@ -641,13 +642,14 @@ IDTVEC(softnet)
 #ifdef MULTIPROCESSOR	
 	call	_C_LABEL(x86_softintunlock)	
 #endif
-	decl	CPUVAR(IDEPTH)
+	cli
+	IDEPTH_DECR
 	jmp	*%esi
 
 IDTVEC(softclock)
 	movl	$IPL_SOFTCLOCK, CPUVAR(ILEVEL)
+	IDEPTH_INCR
 	sti
-	incl	CPUVAR(IDEPTH)
 #ifdef MULTIPROCESSOR	
 	call	_C_LABEL(x86_softintlock)
 #endif
@@ -661,7 +663,8 @@ IDTVEC(softclock)
 #ifdef MULTIPROCESSOR	
 	call	_C_LABEL(x86_softintunlock)		
 #endif
-	decl	CPUVAR(IDEPTH)
+	cli
+	IDEPTH_DECR
 	jmp	*%esi
 
 /*
@@ -766,9 +769,10 @@ IDTVEC(trap10)
 	INTRENTRY
 	pushl	CPUVAR(ILEVEL)
 	pushl	%esp
+	pushl	$0			# dummy arg
 	incl	_C_LABEL(uvmexp)+V_TRAP
 	call	_C_LABEL(npxintr)
-	addl	$8,%esp
+	addl	$12,%esp
 	INTRFASTEXIT
 #else
 	ZTRAP(T_ARITHTRAP)
Index: i386/i386/genassym.cf
===================================================================
--- i386/i386/genassym.cf	(revision 1728)
+++ i386/i386/genassym.cf	(working copy)
@@ -304,6 +304,7 @@ define	CPU_INFO_IUNMASK	offsetof(struct 
 define	CPU_INFO_ILEVEL		offsetof(struct cpu_info, ci_ilevel)
 define	CPU_INFO_IDEPTH		offsetof(struct cpu_info, ci_idepth)
 define	CPU_INFO_ISOURCES	offsetof(struct cpu_info, ci_isources)
+define	CPU_INFO_INTRSTACK	offsetof(struct cpu_info, ci_intrstack)
 
 if NIOAPIC > 0
 define		IOAPIC_SC_REG		offsetof(struct ioapic_softc, sc_reg)
Index: i386/isa/npxvar.h
===================================================================
--- i386/isa/npxvar.h	(revision 1885)
+++ i386/isa/npxvar.h	(working copy)
@@ -87,4 +87,4 @@ struct npx_softc {
 
 enum npx_type npxprobe1(bus_space_tag_t, bus_space_handle_t, int);
 void npxattach(struct npx_softc *);
-int npxintr(void *, struct intrframe);
+int npxintr(void *, struct intrframe *);
Index: i386/isa/npx.c
===================================================================
--- i386/isa/npx.c	(revision 1914)
+++ i386/isa/npx.c	(working copy)
@@ -345,12 +345,11 @@ npxattach(struct npx_softc *sc)
  * IRQ13 exception handling makes exceptions even less precise than usual.
  */
 int
-npxintr(void *arg, struct intrframe iframe)
+npxintr(void *arg, struct intrframe *frame)
 {
 	struct cpu_info *ci = curcpu();
 	struct lwp *l = ci->ci_fpcurlwp;
 	union savefpu *addr;
-	struct intrframe *frame = &iframe;
 	struct npx_softc *sc;
 	ksiginfo_t ksi;
 
Index: x86/x86/intr.c
===================================================================
--- x86/x86/intr.c	(revision 1728)
+++ x86/x86/intr.c	(working copy)
@@ -119,6 +119,8 @@ __KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.2
 #include <sys/proc.h>
 #include <sys/errno.h>
 
+#include <uvm/uvm_extern.h>
+
 #include <machine/atomic.h>
 #include <machine/i8259.h>
 #include <machine/cpu.h>
@@ -818,6 +820,9 @@ cpu_intr_init(struct cpu_info *ci)
 #if NLAPIC > 0 && defined(MULTIPROCESSOR)
 	int i;
 #endif
+#if defined(INTRSTACKSIZE)
+	char *cp;
+#endif /* defined(INTRSTACKSIZE) */
 
 	MALLOC(isp, struct intrsource *, sizeof (struct intrsource), M_DEVBUF,
 	    M_WAITOK|M_ZERO);
@@ -892,6 +897,11 @@ cpu_intr_init(struct cpu_info *ci)
 
 	intr_calculatemasks(ci);
 
+#if defined(INTRSTACKSIZE)
+	cp = (char *)uvm_km_alloc(kernel_map, INTRSTACKSIZE, 0, UVM_KMF_WIRED);
+	ci->ci_intrstack = cp + INTRSTACKSIZE - sizeof(register_t);
+	ci->ci_idepth = -1;
+#endif /* defined(INTRSTACKSIZE) */
 }
 
 #ifdef MULTIPROCESSOR
Index: x86/x86/lapic.c
===================================================================
--- x86/x86/lapic.c	(revision 1915)
+++ x86/x86/lapic.c	(working copy)
@@ -78,7 +78,7 @@ __KERNEL_RCSID(0, "$NetBSD: lapic.c,v 1.
 void		lapic_delay(int);
 void		lapic_microtime(struct timeval *);
 static u_int32_t lapic_gettick(void);
-void		lapic_clockintr(void *, struct intrframe);
+void		lapic_clockintr(void *, struct intrframe *);
 static void 	lapic_map(paddr_t);
 
 static void lapic_hwmask(struct pic *, int);
@@ -238,7 +238,7 @@ u_int64_t lapic_frac_cycle_per_usec;
 u_int32_t lapic_delaytab[26];
 
 void
-lapic_clockintr(void *arg, struct intrframe frame)
+lapic_clockintr(void *arg, struct intrframe *frame)
 {
 #if defined(I586_CPU) || defined(I686_CPU) || defined(__x86_64__)
 #ifndef __HAVE_TIMECOUNTER
@@ -333,7 +333,7 @@ lapic_clockintr(void *arg, struct intrfr
 #endif /* !__HAVE_TIMECOUNTER */
 #endif /* I586_CPU || I686_CPU || __x86_64__ */
 
-	hardclock((struct clockframe *)&frame);
+	hardclock((struct clockframe *)frame);
 }
 
 #if !defined(__HAVE_TIMECOUNTER) && defined(NTP)
Index: x86/isa/clock.c
===================================================================
--- x86/isa/clock.c	(revision 1915)
+++ x86/isa/clock.c	(working copy)
@@ -192,7 +192,7 @@ int		gettick(void);
 void		sysbeep(int, int);
 static void     tickle_tc(void);
 
-static int	clockintr(void *, struct intrframe);
+static int	clockintr(void *, struct intrframe *);
 static void	rtcinit(void);
 static int	rtcget(mc_todregs *);
 static void	rtcput(mc_todregs *);
@@ -404,11 +404,11 @@ tickle_tc(void) 
 }
 
 static int
-clockintr(void *arg, struct intrframe frame)
+clockintr(void *arg, struct intrframe *frame)
 {
 	tickle_tc();
 
-	hardclock((struct clockframe *)&frame);
+	hardclock((struct clockframe *)frame);
 
 #if NMCA > 0
 	if (MCA_system) {
Index: xen/include/cpu.h
===================================================================
--- xen/include/cpu.h	(revision 1571)
+++ xen/include/cpu.h	(working copy)
@@ -114,6 +114,7 @@ struct cpu_info {
 	u_int32_t	ci_imask[NIPL];
 #endif
 	u_int32_t	ci_iunmask[NIPL];
+	void *		ci_intrstack;
 
 	paddr_t ci_idle_pcb_paddr;	/* PA of idle PCB */
 	u_int32_t ci_flags;		/* flags; see below */
@@ -271,7 +272,7 @@ struct clockframe {
 #define	CLKF_USERMODE(frame)	USERMODE((frame)->cf_if.if_cs, (frame)->cf_if.if_eflags)
 #define	CLKF_BASEPRI(frame)	(0)
 #define	CLKF_PC(frame)		((frame)->cf_if.if_eip)
-#define	CLKF_INTR(frame)	(curcpu()->ci_idepth > 1)
+#define	CLKF_INTR(frame)	(curcpu()->ci_idepth > 0)
 
 /*
  * This is used during profiling to integrate system time.  It can safely
Index: xen/include/evtchn.h
===================================================================
--- xen/include/evtchn.h	(revision 1799)
+++ xen/include/evtchn.h	(working copy)
@@ -41,6 +41,7 @@ extern struct evtsource *evtsource[];
 void events_default_setup(void);
 void init_events(void);
 unsigned int evtchn_do_event(int, struct intrframe *);
+void call_evtchn_do_event(int, struct intrframe *);
 int event_set_handler(int, int (*func)(void *), void *, int, const char *);
 int event_remove_handler(int, int (*func)(void *), void *);
 
Index: xen/x86/intr.c
===================================================================
--- xen/x86/intr.c	(revision 1848)
+++ xen/x86/intr.c	(working copy)
@@ -120,6 +120,8 @@ __KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.1
 #include <sys/proc.h>
 #include <sys/errno.h>
 
+#include <uvm/uvm_extern.h>
+
 #include <machine/atomic.h>
 #include <machine/i8259.h>
 #include <machine/cpu.h>
@@ -206,6 +208,7 @@ void
 cpu_intr_init(struct cpu_info *ci)
 {
 	struct iplsource *ipl;
+	char *cp;
 	int i;
 
 	ci->ci_iunmask[0] = 0xfffffffe;
@@ -261,6 +264,10 @@ cpu_intr_init(struct cpu_info *ci)
 	evcnt_attach_dynamic(&softxenevt_evtcnt, EVCNT_TYPE_INTR, NULL,
 	    ci->ci_dev->dv_xname, "xenevt");
 #endif /* defined(DOM0OPS) */
+
+	cp = (char *)uvm_km_alloc(kernel_map, INTRSTACKSIZE, 0, UVM_KMF_WIRED);
+	ci->ci_intrstack = cp + INTRSTACKSIZE - sizeof(register_t);
+	ci->ci_idepth = -1;
 }
 
 #if NPCI > 0 || NISA > 0
Index: xen/i386/npx.c
===================================================================
--- xen/i386/npx.c	(revision 1609)
+++ xen/i386/npx.c	(working copy)
@@ -368,12 +368,11 @@ npxattach(struct npx_softc *sc)
  * IRQ13 exception handling makes exceptions even less precise than usual.
  */
 int
-npxintr(void *arg, struct intrframe iframe)
+npxintr(void *arg, struct intrframe *frame)
 {
 	struct cpu_info *ci = curcpu();
 	struct lwp *l = ci->ci_fpcurlwp;
 	union savefpu *addr;
-	struct intrframe *frame = &iframe;
 	struct npx_softc *sc;
 	ksiginfo_t ksi;
 
Index: xen/i386/spl.S
===================================================================
--- xen/i386/spl.S	(revision 1464)
+++ xen/i386/spl.S	(working copy)
@@ -105,11 +105,13 @@ IDTVEC(spllower)
 #else /* defined(DDB) || defined(GPROF) */
 	movl	20(%esp),%ebx
 #endif /* defined(DDB) || defined(GPROF) */
-	movl	$1f,%esi		# address to resume loop at
-1:	movl	%ebx,%eax		# get cpl
+	movl	$.Lspllower_resume,%esi	# address to resume loop at
+1:
+	CLI(%eax)
+.Lspllower_resume:
+	movl	%ebx,%eax		# get cpl
 	movl	CPUVAR(IUNMASK)(,%eax,4),%eax
-	CLI(%ecx)
-	andl	CPUVAR(IPENDING),%eax		# any non-masked bits left?
+	andl	CPUVAR(IPENDING),%eax	# any non-masked bits left?
 	jz	2f
 	bsrl	%eax,%eax
 	btrl	%eax,CPUVAR(IPENDING)
@@ -138,15 +140,16 @@ IDTVEC(spllower)
  *   ebx - cpl to restore
  *   esi - address to resume loop at
  *   edi - scratch for Xsoftnet
+ *
+ * called with interrupt disabled.
  */
 IDTVEC(doreti)
+	IDEPTH_DECR
 	popl	%ebx			# get previous priority
-	decl	CPUVAR(IDEPTH)
-	movl	$1f,%esi		# address to resume loop at
-8:
-1:	movl	%ebx,%eax
+	movl	$.Ldoreti_resume,%esi	# address to resume loop at
+.Ldoreti_resume:
+	movl	%ebx,%eax
 	movl	CPUVAR(IUNMASK)(,%eax,4),%eax
-	CLI(%ecx)
 	andl	CPUVAR(IPENDING),%eax
 	jz	2f
 	bsrl    %eax,%eax               # slow, but not worth optimizing
@@ -176,7 +179,7 @@ doreti_checkast:
 	call	_C_LABEL(trap)
 	addl	$4,%esp
 	CLI(%ecx)
-	jmp	5b
+	jmp	.Ldoreti_resume
 3:
 	CHECK_DEFERRED_SWITCH(%eax)
 	jnz	9f
@@ -184,7 +187,8 @@ doreti_checkast:
     	jz	4f
 	call	_C_LABEL(stipending)
 	testl	%eax,%eax
-	jnz	8b
+	CLI(%eax)
+	jnz	.Ldoreti_resume
 4:
 	INTRFASTEXIT
 9:
@@ -192,3 +196,19 @@ doreti_checkast:
 	call	_C_LABEL(pmap_load)
 	CLI(%ecx)
 	jmp	doreti_checkast	/* recheck ASTs */
+
+/*
+ * void evtchn_do_event(int evtch, struct intrframe *regs)
+ */
+
+NENTRY(call_evtchn_do_event)
+	IDEPTH_INCR
+	/*
+	 * IDEPTH_INCR leaves old %esp in %eax.
+	 */
+	pushl	8(%eax)	/* regs */
+	pushl	4(%eax)	/* evtch */
+	call	_C_LABEL(evtchn_do_event)
+	addl	$8, %esp
+	IDEPTH_DECR
+	ret
Index: xen/i386/vector.S
===================================================================
--- xen/i386/vector.S	(revision 1540)
+++ xen/i386/vector.S	(working copy)
@@ -202,15 +202,14 @@ IDTVEC(resume_/**/name/**/num)						\
 1:									\
 	pushl	%esi							;\
 	movl	$num,CPUVAR(ILEVEL)					;\
+	IDEPTH_INCR /* leaves old %esp on stack	*/			;\
 	STI(%eax)							;\
-	incl	CPUVAR(IDEPTH)						;\
 	movl	IS_HANDLERS(%ebp),%ebx					;\
 	LOCK_KERNEL							;\
 6:									\
-	pushl	%esp							;\
 	pushl	IH_ARG(%ebx)						;\
 	call	*IH_FUN(%ebx)		/* call it */			;\
-	addl	$8,%esp			/* toss the arg */		;\
+	addl	$4,%esp			/* toss the arg */		;\
 	movl	IH_IPL_NEXT(%ebx),%ebx	/* next handler in chain */	;\
 	testl	%ebx,%ebx						;\
 	jnz	6b							;\
@@ -219,7 +218,6 @@ IDTVEC(resume_/**/name/**/num)						\
 	CLI(%eax)							;\
 	unmask(num)			/* unmask it in hardware */	;\
 	late_ack(num)							;\
-	STI(%eax)							;\
 	jmp	_C_LABEL(Xdoreti)	/* lower spl and do ASTs */	;\
 
 # Just unmasking the event isn't enouth, we also need to
@@ -306,8 +304,8 @@ _C_LABEL(xenev_stubs):
 #define EVCNTHI(x) _C_LABEL(x) + EV_EVCNTHI
 IDTVEC(softserial)
 	movl	$IPL_SOFTSERIAL, CPUVAR(ILEVEL)
+	IDEPTH_INCR
 	STI(%eax)
-	incl	CPUVAR(IDEPTH)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintlock)
 #endif
@@ -319,13 +317,14 @@ IDTVEC(softserial)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintunlock)
 #endif
-	decl	CPUVAR(IDEPTH)
+	CLI(%eax)
+	IDEPTH_DECR
 	jmp	*%esi
 
 IDTVEC(softnet)
 	movl	$IPL_SOFTNET, CPUVAR(ILEVEL)
+	IDEPTH_INCR
 	STI(%eax)
-	incl	CPUVAR(IDEPTH)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintlock)
 #endif
@@ -349,13 +348,14 @@ IDTVEC(softnet)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintunlock)
 #endif
-	decl	CPUVAR(IDEPTH)
+	CLI(%eax)
+	IDEPTH_DECR
 	jmp	*%esi
 
 IDTVEC(softclock)
 	movl	$IPL_SOFTCLOCK, CPUVAR(ILEVEL)
+	IDEPTH_INCR
 	STI(%eax)
-	incl	CPUVAR(IDEPTH)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintlock)
 #endif
@@ -368,14 +368,15 @@ IDTVEC(softclock)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintunlock)
 #endif
-	decl	CPUVAR(IDEPTH)
+	CLI(%eax)
+	IDEPTH_DECR
 	jmp	*%esi
 
 #if defined(DOM0OPS)
 IDTVEC(softxenevt)
 	movl	$IPL_SOFTXENEVT, CPUVAR(ILEVEL)
+	IDEPTH_INCR
 	STI(%eax)
-	incl	CPUVAR(IDEPTH)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintlock)
 #endif
@@ -385,7 +386,8 @@ IDTVEC(softxenevt)
 #ifdef MULTIPROCESSOR
 	call	_C_LABEL(x86_softintunlock)
 #endif
-	decl	CPUVAR(IDEPTH)
+	CLI(%eax)
+	IDEPTH_DECR
 	jmp	*%esi
 #endif /* defined(DOM0OPS) */
 
@@ -486,9 +488,10 @@ IDTVEC(trap10)
 	INTRENTRY
 	pushl	CPUVAR(ILEVEL)
 	pushl	%esp
+	pushl	$0			# dummy arg
 	incl	_C_LABEL(uvmexp)+V_TRAP
 	call	_C_LABEL(npxintr)
-	addl	$8,%esp
+	addl	$12,%esp
 	INTRFASTEXIT
 #else
 	ZTRAP(T_ARITHTRAP)
Index: xen/i386/hypervisor_machdep.c
===================================================================
--- xen/i386/hypervisor_machdep.c	(revision 1540)
+++ xen/i386/hypervisor_machdep.c	(working copy)
@@ -215,7 +215,7 @@ do_hypervisor_callback(struct intrframe 
 					printf("do_hypervisor_callback event %d\n", port);
 #endif
 				if (evtsource[port])
-					evtchn_do_event(port, regs);
+					call_evtchn_do_event(port, regs);
 #ifdef DOM0OPS
 				else
 					xenevt_event(port);
Index: xen/i386/genassym.cf
===================================================================
--- xen/i386/genassym.cf	(revision 1657)
+++ xen/i386/genassym.cf	(working copy)
@@ -271,6 +271,7 @@ define	CPU_INFO_IUNMASK	offsetof(struct 
 define	CPU_INFO_ILEVEL		offsetof(struct cpu_info, ci_ilevel)
 define	CPU_INFO_IDEPTH		offsetof(struct cpu_info, ci_idepth)
 define	CPU_INFO_ISOURCES	offsetof(struct cpu_info, ci_isources)
+define	CPU_INFO_INTRSTACK	offsetof(struct cpu_info, ci_intrstack)
 
 define	SIZEOF_CPU_INFO		sizeof(struct cpu_info)
 
Index: xen/xen/evtchn.c
===================================================================
--- xen/xen/evtchn.c	(revision 1799)
+++ xen/xen/evtchn.c	(working copy)
@@ -234,7 +234,6 @@ evtchn_do_event(int evtch, struct intrfr
 	ci->ci_ilevel = evtsource[evtch]->ev_maxlevel;
 	iplmask = evtsource[evtch]->ev_imask;
 	sti();
-	ci->ci_idepth++;
 #ifdef MULTIPROCESSOR
 	x86_intlock(regs);
 #endif
@@ -252,7 +251,6 @@ evtchn_do_event(int evtch, struct intrfr
 			hypervisor_set_ipending(iplmask,
 			    evtch / 32, evtch % 32);
 			/* leave masked */
-			ci->ci_idepth--;
 			splx(ilevel);
 			return 0;
 		}
@@ -267,7 +265,6 @@ evtchn_do_event(int evtch, struct intrfr
 	x86_intunlock(regs);
 #endif
 	hypervisor_enable_event(evtch);
-	ci->ci_idepth--;
 	splx(ilevel);
 
 	return 0;

--NextPart-20061124050246-1487001--