Subject: Re: kernel: supervisor trap asynchronous system trap, code=0
To: None <port-xen@NetBSD.org>
From: Manuel Bouyer <bouyer@antioche.eu.org>
List: port-xen
Date: 06/28/2007 15:34:00
--gKMricLos+KVdGMg
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Thu, Jun 28, 2007 at 12:18:44AM +0200, Manuel Bouyer wrote:
> Hi,
> I've been working on the Xen interrupt code, especially to fix the places
> where it reenable interrupts without checking for pending interrupts
> (which could cause interrupts to be deffered until the next IRQ
> or hypercall), see attached diff.
> 
> With this, I get a
> kernel: supervisor trap asynchronous system trap, code=0
> when running a HVM guest (it may not be related to the HVM code, it may just
> be because of the high interrupt load).
> The trap consistenly hapens in i386_copyin, on the loop copying the data,
> called from syscall_plain() through the uvm and ffs code.
> As I understand it, AST should only happen for user context, and here it
> seems to occur in a kernel context. I'm not sure why this happens and

I found it: in alltraps, if there was pending interrrupt the code would jump
back to processing ASTs without checking if it's a user or kernel context.

The attached diff works fine for me, and includes the fix for the
read_psl() == 0 assertion failure proposed by Kazushi Marukawa (fixed to
handle deffered interrupts).

I'll commit it in a few hours, but it would still be good if someone could
double-check it :)

-- 
Manuel Bouyer, LIP6, Universite Paris VI.           Manuel.Bouyer@lip6.fr
     NetBSD: 26 ans d'experience feront toujours la difference
--

--gKMricLos+KVdGMg
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=diff

Index: i386/locore.S
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/i386/locore.S,v
retrieving revision 1.25
diff -u -r1.25 locore.S
--- i386/locore.S	17 May 2007 14:51:35 -0000	1.25
+++ i386/locore.S	28 Jun 2007 13:27:49 -0000
@@ -663,6 +663,7 @@
 	 * Switch to newlwp's stack.
 	 */
 
+	CLI(%ebx)
 	movl	L_ADDR(%edi),%ebx
 	movl	PCB_EBP(%ebx),%ebp
 	movl	PCB_ESP(%ebx),%esp
@@ -690,6 +691,14 @@
 	jne	check_ras
 
 switch_return:
+	STIC(%ebx)
+	jz 1f
+	call	_C_LABEL(stipending)
+	testl	%eax,%eax
+	jz 1f
+	pushl  CPUVAR(ILEVEL)
+	call	_C_LABEL(Xspllower) # process pending interrupts
+1:
 	movl	%esi,%eax	# return 'oldlwp'
 	popl	%edi
 	popl	%esi
@@ -780,9 +789,29 @@
 	call	_C_LABEL(trap)
 	addl	$4,%esp
 	jmp	.Lsyscall_checkast
-1:	STI(%eax)
-	CHECK_DEFERRED_SWITCH(%eax)
+1:	CHECK_DEFERRED_SWITCH(%eax)
 	jnz	9f
+	STIC(%eax)
+	jz	14f
+	call	_C_LABEL(stipending)
+	testl	%eax,%eax
+	jz	14f
+	/* process pending interrupts */
+	CLI(%eax)
+	movl	CPUVAR(ILEVEL), %ebx
+	movl	$.Lsyscall_resume, %esi # address to resume loop at
+.Lsyscall_resume:
+	movl	%ebx,%eax		# get cpl
+	movl	CPUVAR(IUNMASK)(,%eax,4),%eax
+	andl	CPUVAR(IPENDING),%eax	# any non-masked bits left?
+	jz	17f
+	bsrl	%eax,%eax
+	btrl	%eax,CPUVAR(IPENDING)
+	movl	CPUVAR(ISOURCES)(,%eax,4),%eax
+	jmp	*IS_RESUME(%eax)
+17:	movl	%ebx, CPUVAR(ILEVEL)	#restore cpl
+	jmp	.Lsyscall_checkast
+14:
 #ifndef DIAGNOSTIC
 	INTRFASTEXIT
 #else /* DIAGNOSTIC */
@@ -801,7 +830,8 @@
 5:	.asciz	"WARNING: SPL NOT ZERO ON SYSCALL ENTRY\n"
 6:	.asciz	"WARNING: WANT PMAPLOAD ON SYSCALL ENTRY\n"
 #endif /* DIAGNOSTIC */
-9:	call    _C_LABEL(pmap_load)
+9:	STI(%eax)
+	call    _C_LABEL(pmap_load)
 	jmp     .Lsyscall_checkast        /* re-check ASTs */
 
 #if NNPX > 0
Index: i386/spl.S
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/i386/spl.S,v
retrieving revision 1.9
diff -u -r1.9 spl.S
--- i386/spl.S	25 Jun 2007 20:09:34 -0000	1.9
+++ i386/spl.S	28 Jun 2007 13:27:49 -0000
@@ -144,8 +144,6 @@
  * called with interrupt disabled.
  */
 IDTVEC(doreti)
-	IDEPTH_DECR
-	popl	%ebx			# get previous priority
 .Ldoreti_resume:
 	movl	$.Ldoreti_resume,%esi	# address to resume loop at
 	movl	%ebx,%eax
Index: i386/vector.S
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/i386/vector.S,v
retrieving revision 1.18
diff -u -r1.18 vector.S
--- i386/vector.S	25 Jun 2007 20:09:34 -0000	1.18
+++ i386/vector.S	28 Jun 2007 13:27:49 -0000
@@ -194,8 +194,8 @@
 	subl	$4,%esp							;\
 	pushl	$T_ASTFLT		/* trap # for doing ASTs */	;\
 	INTRENTRY							;\
+	movl	$_C_LABEL(Xdoreti), %esi; /* we now have a trap frame, so loop using doreti instead */ ;\
 IDTVEC(resume_/**/name/**/num)						\
-	/*movl	%esp,%ecx*/						;\
 	movl	$IREENT_MAGIC,TF_ERR(%esp)				;\
 	pushl	%ebx							;\
 	movl	CPUVAR(ISOURCES) + (num) * 4, %ebp			;\
@@ -216,7 +216,9 @@
 	CLI(%eax)							;\
 	unmask(num)			/* unmask it in hardware */	;\
 	late_ack(num)							;\
-	jmp	_C_LABEL(Xdoreti)	/* lower spl and do ASTs */	;\
+	IDEPTH_DECR							;\
+	popl	%ebx							;\
+	jmp	*%esi			/* lower spl and do ASTs */	;\
 
 # Just unmasking the event isn't enouth, we also need to
 # reassert the event pending bit if needed. For now just call
@@ -578,6 +580,7 @@
 	pushl	%esp
 	call	_C_LABEL(trap)
 	addl	$4,%esp
+.Lalltraps_checkusr:
 	testb	$CHK_UPL,TF_CS(%esp)
 	jnz	.Lalltraps_checkast
 #ifdef VM86
@@ -604,8 +607,24 @@
 6:	STIC(%eax)
     	jz	4f
 	call	_C_LABEL(stipending)
-	#testl	%eax,%eax		/* XXXcl */
-	#jnz	1b
+	testl	%eax,%eax
+	jz	4f
+	/* process pending interrupts */
+	CLI(%eax)
+	movl	CPUVAR(ILEVEL), %ebx
+	movl	$.Lalltraps_resume, %esi # address to resume loop at
+.Lalltraps_resume:
+	movl    %ebx,%eax		# get cpl
+	movl    CPUVAR(IUNMASK)(,%eax,4),%eax
+	andl    CPUVAR(IPENDING),%eax   # any non-masked bits left?
+	jz	7f
+	bsrl    %eax,%eax
+	btrl    %eax,CPUVAR(IPENDING)
+	movl    CPUVAR(ISOURCES)(,%eax,4),%eax
+	jmp     *IS_RESUME(%eax)
+7:	movl 	%ebx, CPUVAR(ILEVEL) #restore cpl
+	jmp 	.Lalltraps_checkusr
+	
 4:
 #ifndef DIAGNOSTIC
 	INTRFASTEXIT
@@ -644,6 +663,7 @@
 	call	_C_LABEL(trap)
 	addl	$4,%esp
 	addl	$4,%esp
+.Ltrap0e_checkusr:
 	testb	$CHK_UPL,TF_CS(%esp)
 	jnz	trap0e_checkast
 #ifdef VM86
@@ -670,8 +690,23 @@
 6:	STIC(%eax)
     	jz	4f
 	call	_C_LABEL(stipending)
-	#testl	%eax,%eax		/* XXXcl */
-	#jnz	1b
+	testl	%eax,%eax
+	jz	4f
+	/* process for pending interrupts */
+	CLI(%eax)
+	movl	CPUVAR(ILEVEL), %ebx
+	movl	$.Ltrap0e_resume, %esi # address to resume loop at
+.Ltrap0e_resume:
+	movl    %ebx,%eax		# get cpl
+	movl    CPUVAR(IUNMASK)(,%eax,4),%eax
+	andl    CPUVAR(IPENDING),%eax   # any non-masked bits left?
+	jz	7f
+	bsrl    %eax,%eax
+	btrl    %eax,CPUVAR(IPENDING)
+	movl    CPUVAR(ISOURCES)(,%eax,4),%eax
+	jmp     *IS_RESUME(%eax)
+7:	movl	%ebx, CPUVAR(ILEVEL) #restore cpl
+	jmp 	.Ltrap0e_checkusr
 4:
 #ifndef DIAGNOSTIC
 	INTRFASTEXIT

--gKMricLos+KVdGMg--