Port-xen archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: Interrupt codepath review



Cherry G. Mathew <cherry%zyx.in@localhost> writes:

> Hello,
>
> I'm wondering if there'd be any objections to the approach I'm taking in
> the attached patch. This will take us closer to unifying the interrupt
> path with native.
>
> A rationale is included in this discussion:
> http://mail-index.netbsd.org/tech-kern/2018/11/18/msg024198.html

Hello,

Here's an updated patch. Taylor asked me on ICB to come up with a
diagram - I'm a little short of time (and energy) for that, but if
needed, I will spend some time on it. Until then, here's a brief
description:

The motivation here is that on XEN we want to re-use the native
interrupt codepath, via vector.S

Since XEN's interrupt/event entry is via a named callback function, we
need a way to hook into the vector.S codepath in a way that closely
mimics native.

The obvious candidate here was spllower(9) since it sets up the stack
magic for us to emulate an architectural interrupt event, before
(conditionally) passing control to vector.S interrupts stubs.

Since spllower(9) also manages deferred interrupts , and on XEN the
callback essentially does exactly the same thing (with slightly
different semantics), I thought it was a good match to abuse spllower(9)
for this purpose.

So I decided to try the following:

Define a new super high priority IPL - IPL_HYPERVISOR

No interrupts are to be registered using this IPL. Its purpose is to
raise the current IPL, so that spllower() can then be used to crank
through all the remaining lower IPLs , starting with IPL_HIGH

So in essense, the Xen callback handler will set the ci_ipending bit
corresponding to the pending IPLs of pending XEN events, and set the
current cpu IPL to IPL_HYPERVISOR using splraise(IPL_HYPERVISOR).

Then it will iterate through all IPLs to ensure that all ci_ipending
bits are processed by the spl.S and vector.S entry code - and that all
registered and corresponding OS interrupt handlers are called.

At the end, we reset the spl to the original value.

I tested this overnight on amd64 DOMU. It held up to the usual atf tests
and build distribution.

Please let me know if you have any comments. I am planning to use this
set of patches to inch towards normalising the rest of the intr.c code
towards native. I've posted separate patches for that earlier.

Thanks,
-- 
~cherry

Index: sys/arch/amd64/amd64/spl.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/spl.S,v
retrieving revision 1.36
diff -u -p -r1.36 spl.S
--- sys/arch/amd64/amd64/spl.S	22 Aug 2018 17:04:36 -0000	1.36
+++ sys/arch/amd64/amd64/spl.S	22 Nov 2018 03:11:50 -0000
@@ -200,6 +200,7 @@ IDTVEC(resume_preempt)
 	cli
 	jmp	*%r13			/* back to Xdoreti */
 IDTVEC_END(resume_preempt)
+#endif /* !XEN */
 
 /*
  * int splraise(int s);
@@ -212,6 +213,7 @@ ENTRY(splraise)
 	ret
 END(splraise)
 
+#ifndef XEN
 /*
  * void spllower(int s);
  *
Index: sys/arch/i386/i386/spl.S
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/i386/spl.S,v
retrieving revision 1.43
diff -u -p -r1.43 spl.S
--- sys/arch/i386/i386/spl.S	4 Apr 2018 22:52:58 -0000	1.43
+++ sys/arch/i386/i386/spl.S	22 Nov 2018 03:11:51 -0000
@@ -44,7 +44,6 @@ __KERNEL_RCSID(0, "$NetBSD: spl.S,v 1.43
 
 	.text
 
-#ifndef XEN
 /*
  * int splraise(int s);
  */
@@ -68,6 +67,7 @@ ENTRY(splraise)
 	ret
 END(splraise)
 
+#ifndef XEN
 /*
  * void spllower(int s);
  *
Index: sys/arch/x86/include/intrdefs.h
===================================================================
RCS file: /cvsroot/src/sys/arch/x86/include/intrdefs.h,v
retrieving revision 1.20
diff -u -p -r1.20 intrdefs.h
--- sys/arch/x86/include/intrdefs.h	19 May 2014 22:47:54 -0000	1.20
+++ sys/arch/x86/include/intrdefs.h	22 Nov 2018 03:11:51 -0000
@@ -13,7 +13,9 @@
 #define	IPL_VM		0x6	/* low I/O, memory allocation */
 #define IPL_SCHED	0x7	/* medium I/O, scheduler, clock */
 #define	IPL_HIGH	0x8	/* high I/O, statclock, IPIs */
-#define	NIPL		9
+#define IPL_HYPERVISOR  0x9	/* Exclusively used by hypervisor callback */
+#define	NIPL		10
+
 
 /* Interrupt sharing types. */
 #define	IST_NONE	0	/* none */
Index: sys/arch/xen/x86/hypervisor_machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/x86/hypervisor_machdep.c,v
retrieving revision 1.32
diff -u -p -r1.32 hypervisor_machdep.c
--- sys/arch/xen/x86/hypervisor_machdep.c	18 Nov 2018 23:50:48 -0000	1.32
+++ sys/arch/xen/x86/hypervisor_machdep.c	22 Nov 2018 03:11:51 -0000
@@ -211,6 +211,45 @@ stipending(void)
 	return (ret);
 }
 
+/* This is essentially a despatch function for queued interrupts */
+static void
+xen_despatch_events(struct cpu_info *ci)
+{
+	int i, spl;
+	u_long psl;
+
+	if (ci->ci_idepth >= NIPL) /* Shouldn't recurse more than NIPL */
+		return;
+
+	spl = splraise(IPL_HYPERVISOR);
+	/*
+	 * This bit uses spl(9) magic to brute force calling every
+	 * pending handler at every SPL level down to the interuptee.
+	 */
+
+	psl = x86_read_psl();
+	x86_enable_intr();
+	for (i = IPL_HYPERVISOR;ci->ci_ipending && i >= spl;i--) {
+		if (ci->ci_idepth < NIPL) /* Shouldn't recurse more than NIPL */
+			spllower(i);
+		else
+			break;
+	}
+
+	spllower(spl);
+	
+	x86_write_psl(psl);
+}
+
+static void
+xen_queue_event(struct cpu_info *ci, unsigned long port)
+{
+	/* Set pending bit for spl code */
+	hypervisor_set_ipending(evtsource[port]->ev_imask,
+	    port >> LONG_SHIFT, port & LONG_MASK);
+
+}
+
 /* Iterate through pending events and call the event handler */
 
 static inline void
@@ -220,7 +259,6 @@ evt_do_hypervisor_callback(unsigned int 
 	KASSERT(args != NULL);
 
 	struct cpu_info *ci = curcpu();
-	struct intrframe *regs = args;
 
 #ifdef PORT_DEBUG
 	if (port == PORT_DEBUG)
@@ -228,7 +266,8 @@ evt_do_hypervisor_callback(unsigned int 
 #endif
 	if (evtsource[port]) {
 		ci->ci_idepth++;
-		evtchn_do_event(port, regs);
+		xen_queue_event(ci, port);
+		xen_despatch_events(ci);
 		ci->ci_idepth--;
 	}
 #ifdef DOM0OPS
@@ -281,8 +320,13 @@ do_hypervisor_callback(struct intrframe 
 		evt_iterate_bits(&vci->evtchn_pending_sel,
 		    s->evtchn_pending, s->evtchn_mask,
 		    evt_do_hypervisor_callback, regs);
+
 	}
 
+	ci->ci_idepth++;
+	xen_despatch_events(ci);
+	ci->ci_idepth--;
+
 #ifdef DIAGNOSTIC
 	if (level != ci->ci_ilevel)
 		printf("hypervisor done %08x level %d/%d ipending %08x\n",
Index: sys/arch/xen/x86/xen_intr.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/x86/xen_intr.c,v
retrieving revision 1.9
diff -u -p -r1.9 xen_intr.c
--- sys/arch/xen/x86/xen_intr.c	16 Jan 2009 20:16:47 -0000	1.9
+++ sys/arch/xen/x86/xen_intr.c	22 Nov 2018 03:11:51 -0000
@@ -38,22 +38,6 @@ __KERNEL_RCSID(0, "$NetBSD: xen_intr.c,v
 #include <machine/intr.h>
 
 /*
- * Add a mask to cpl, and return the old value of cpl.
- */
-int
-splraise(int nlevel)
-{
-	int olevel;
-	struct cpu_info *ci = curcpu();
-
-	olevel = ci->ci_ilevel;
-	if (nlevel > olevel)
-		ci->ci_ilevel = nlevel;
-	__insn_barrier();
-	return (olevel);
-}
-
-/*
  * Restore a value to cpl (unmasking interrupts).  If any unmasked
  * interrupts are pending, call Xspllower() to process them.
  */
Index: sys/arch/xen/xen/evtchn.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/xen/evtchn.c,v
retrieving revision 1.82
diff -u -p -r1.82 evtchn.c
--- sys/arch/xen/xen/evtchn.c	26 Oct 2018 05:33:21 -0000	1.82
+++ sys/arch/xen/xen/evtchn.c	22 Nov 2018 03:11:51 -0000
@@ -305,136 +305,6 @@ events_resume (void)
 	return true;
 }
 
-
-unsigned int
-evtchn_do_event(int evtch, struct intrframe *regs)
-{
-	struct cpu_info *ci;
-	int ilevel;
-	struct intrhand *ih;
-	int	(*ih_fun)(void *, void *);
-	uint32_t iplmask;
-	int i;
-	uint32_t iplbit;
-
-	KASSERTMSG(evtch >= 0, "negative evtch: %d", evtch);
-	KASSERTMSG(evtch < NR_EVENT_CHANNELS,
-	    "evtch number %d > NR_EVENT_CHANNELS", evtch);
-
-#ifdef IRQ_DEBUG
-	if (evtch == IRQ_DEBUG)
-		printf("evtchn_do_event: evtch %d\n", evtch);
-#endif
-	ci = curcpu();
-
-	/*
-	 * Shortcut for the debug handler, we want it to always run,
-	 * regardless of the IPL level.
-	 */
-	if (__predict_false(evtch == debug_port)) {
-		xen_debug_handler(NULL);
-		hypervisor_unmask_event(debug_port);
-#if NPCI > 0 || NISA > 0
-		hypervisor_ack_pirq_event(debug_port);
-#endif /* NPCI > 0 || NISA > 0 */		
-		return 0;
-	}
-
-	KASSERTMSG(evtsource[evtch] != NULL, "unknown event %d", evtch);
-	ci->ci_data.cpu_nintr++;
-	evtsource[evtch]->ev_evcnt.ev_count++;
-	ilevel = ci->ci_ilevel;
-
-	if (evtsource[evtch]->ev_cpu != ci /* XXX: get stats */) {
-		hypervisor_send_event(evtsource[evtch]->ev_cpu, evtch);
-		return 0;
-	}
-
-	if (evtsource[evtch]->ev_maxlevel <= ilevel) {
-#ifdef IRQ_DEBUG
-		if (evtch == IRQ_DEBUG)
-		    printf("evtsource[%d]->ev_maxlevel %d <= ilevel %d\n",
-		    evtch, evtsource[evtch]->ev_maxlevel, ilevel);
-#endif
-		hypervisor_set_ipending(evtsource[evtch]->ev_imask,
-					evtch >> LONG_SHIFT,
-					evtch & LONG_MASK);
-
-		/* leave masked */
-
-		return 0;
-	}
-	ci->ci_ilevel = evtsource[evtch]->ev_maxlevel;
-	iplmask = evtsource[evtch]->ev_imask;
-	sti();
-	mutex_spin_enter(&evtlock[evtch]);
-	ih = evtsource[evtch]->ev_handlers;
-	while (ih != NULL) {
-		if (ih->ih_cpu != ci) {
-			hypervisor_send_event(ih->ih_cpu, evtch);
-			iplmask &= ~IUNMASK(ci, ih->ih_level);
-			ih = ih->ih_evt_next;
-			continue;
-		}
-		if (ih->ih_level <= ilevel) {
-#ifdef IRQ_DEBUG
-		if (evtch == IRQ_DEBUG)
-		    printf("ih->ih_level %d <= ilevel %d\n", ih->ih_level, ilevel);
-#endif
-			cli();
-			hypervisor_set_ipending(iplmask,
-			    evtch >> LONG_SHIFT, evtch & LONG_MASK);
-			/* leave masked */
-			mutex_spin_exit(&evtlock[evtch]);
-			goto splx;
-		}
-		iplmask &= ~IUNMASK(ci, ih->ih_level);
-		ci->ci_ilevel = ih->ih_level;
-		ih_fun = (void *)ih->ih_fun;
-		ih_fun(ih->ih_arg, regs);
-		ih = ih->ih_evt_next;
-	}
-	mutex_spin_exit(&evtlock[evtch]);
-	cli();
-	hypervisor_unmask_event(evtch);
-#if NPCI > 0 || NISA > 0
-	hypervisor_ack_pirq_event(evtch);
-#endif /* NPCI > 0 || NISA > 0 */		
-
-splx:
-	/*
-	 * C version of spllower(). ASTs will be checked when
-	 * hypevisor_callback() exits, so no need to check here.
-	 */
-	iplmask = (IUNMASK(ci, ilevel) & ci->ci_ipending);
-	while (iplmask != 0) {
-		iplbit = 1 << (NIPL - 1);
-		i = (NIPL - 1);
-		while (iplmask != 0 && i > ilevel) {
-			while (iplmask & iplbit) {
-				ci->ci_ipending &= ~iplbit;
-				ci->ci_ilevel = i;
-				for (ih = ci->ci_isources[i]->is_handlers;
-				    ih != NULL; ih = ih->ih_next) {
-					KASSERT(ih->ih_cpu == ci);
-					sti();
-					ih_fun = (void *)ih->ih_fun;
-					ih_fun(ih->ih_arg, regs);
-					cli();
-				}
-				hypervisor_enable_ipl(i);
-				/* more pending IPLs may have been registered */
-				iplmask =
-				    (IUNMASK(ci, ilevel) & ci->ci_ipending);
-			}
-			i--;
-			iplbit >>= 1;
-		}
-	}
-	ci->ci_ilevel = ilevel;
-	return 0;
-}
-
 #define PRIuCPUID	"lu" /* XXX: move this somewhere more appropriate */
 
 /* PIC callbacks */


Home | Main Index | Thread Index | Old Index