Re: interrupt cleanup #1

To: port-xen%NetBSD.org@localhost
Subject: Re: interrupt cleanup #1
From: Cherry G.Mathew <cherry%zyx.in@localhost>
Date: Thu, 28 Jun 2018 17:54:25 +0530

Cherry G. Mathew <cherry%zyx.in@localhost> writes:

> Cherry G. Mathew <cherry%zyx.in@localhost> writes:
>
>> Hello Xen,
>>
>> I've been spending a little attention on our PV interrupt path with a
>> view to getting PVHVM etc. to work better.
>
> Hello - now that things have settled in nicely for 8.0, I'm going to
> start rocking the boat again. The next set of patches are going to be
> quite intrusive - especially wrt to interrupt paths, so I'd really like
> more than just one pair of eyeballs on them please.
>
> Sometime tomorrow I'll send a first re-org patch (no functional changes)
> followed by the actual meat later in the week.
>
> The idea with this set of reorgs is to be able to pull in the OpenBSD
> PVHVM changes with as little disruption as possible, while maintaining
> backwards compatibility to our existing features.
>
> Many Thanks,

As promised, but a little later than I thought - please find a patch
below. There are no functional changes, and this is just a first cut.

I'd appreciate testing on older machines (esp without ACPI, i386 with
and without pae, etc.) - dom0 mostly and of course domU etc. on amd64

Please let me know how it goes.

-- 
~cherry

diff --git a/sys/arch/x86/include/pic.h b/sys/arch/x86/include/pic.h
index ef7fc11..26a0033 100644
--- a/sys/arch/x86/include/pic.h
+++ b/sys/arch/x86/include/pic.h
@@ -23,6 +23,9 @@ struct pic {
 	struct intrstub *pic_edge_stubs;
 	struct ioapic_softc *pic_ioapic; /* if pic_type == PIC_IOAPIC */
 	struct msipic *pic_msipic; /* if (pic_type == PIC_MSI) || (pic_type == PIC_MSIX) */
+#if defined(XEN)
+	struct xen_pic *pic_xen;
+#endif
 };
 
 /*
diff --git a/sys/arch/x86/isa/isa_machdep.c b/sys/arch/x86/isa/isa_machdep.c
index 7d41f89..c3fe66c 100644
--- a/sys/arch/x86/isa/isa_machdep.c
+++ b/sys/arch/x86/isa/isa_machdep.c
@@ -237,31 +237,8 @@ isa_intr_establish_xname(isa_chipset_tag_t ic, int irq, int type, int level,
 			printf("isa_intr_establish: no MP mapping found\n");
 	}
 #endif
-#if defined(XEN)
-	KASSERT(APIC_IRQ_ISLEGACY(irq));
-
-	int evtch;
-	const char *intrstr;
-	char intrstr_buf[INTRIDBUF];
-
-	mpih |= APIC_IRQ_LEGACY_IRQ(irq);
-
-	evtch = xen_pirq_alloc(&mpih, type); /* XXX: legacy - xen just tosses irq back at us */
-	if (evtch == -1)
-		return NULL;
-
-	intrstr = intr_create_intrid(irq, pic, pin, intrstr_buf,
-	    sizeof(intrstr_buf));
-
-	aprint_debug("irq: %d requested on pic: %s.\n", irq, pic->pic_name);
-
-	return (void *)pirq_establish(irq, evtch, ih_fun, ih_arg, level,
-	    intrstr, xname);
-#else /* defined(XEN) */
 	return intr_establish_xname(irq, pic, pin, type, level, ih_fun, ih_arg,
 	    false, xname);
-#endif
-
 }
 
 /* Deregister an interrupt handler. */
diff --git a/sys/arch/x86/x86/intr.c b/sys/arch/x86/x86/intr.c
index 5790e80..4fa52be 100644
--- a/sys/arch/x86/x86/intr.c
+++ b/sys/arch/x86/x86/intr.c
@@ -1264,7 +1264,7 @@ intr_establish_xname(int legacy_irq, struct pic *pic, int pin,
 #if NPCI > 0 || NISA > 0
 	struct pintrhand *pih;
 	intr_handle_t irq;
-	int evtchn;
+	int evtchn, token;
 
 	KASSERTMSG(legacy_irq == -1 || (0 <= legacy_irq && legacy_irq < 16),
 	    "bad legacy IRQ value: %d", legacy_irq);
@@ -1287,10 +1287,40 @@ intr_establish_xname(int legacy_irq, struct pic *pic, int pin,
 	intrstr = intr_create_intrid(irq, pic, pin, intrstr_buf,
 	    sizeof(intrstr_buf));
 
-	evtchn = xen_pirq_alloc(&irq, type);
-	pih = pirq_establish(irq & 0xff, evtchn, handler, arg, level,
-	    intrstr, xname);
+	pih = kmem_zalloc(sizeof(struct pintrhand),
+	    cold ? KM_NOSLEEP : KM_SLEEP);
+	if (pih == NULL) {
+		printf("%s: can't allocate handler info\n", __func__);
+		return NULL;
+	}
+
+	extern struct cpu_info phycpu_info_primary; /* XXX */
+	struct cpu_info *ci = &phycpu_info_primary;	
+	xen_pic.pic_addroute(pic, &phycpu_info_primary, irq, -1, type);
+
+	token = xen_pirq_get_token_from_pirq(irq);
+	evtchn = xen_pirq_get_port_from_token(xen_pic.pic_xen, token);
+	KASSERT(evtchn < NR_EVENT_CHANNELS);
+	KASSERT(evtchn >= 0);
+
+	pih->pic = pic;
 	pih->pic_type = pic->pic_type;
+	pih->pirq = token; /* This is just a handle that xen gives us */
+	pih->ci = ci;
+	pih->evtch = evtchn;
+	pih->func = handler;
+	pih->arg = arg;
+
+	extern int pirq_interrupt(void *);
+	if (event_set_handler(evtchn, pirq_interrupt, pih, level,
+		intrstr, xname) != 0) {
+		kmem_free(pih, sizeof(struct pintrhand));
+		return NULL;
+	}
+
+	hypervisor_prime_pirq_event(irq & 0xff,evtchn);
+	hypervisor_enable_event(evtchn);
+
 	return pih;
 #endif /* NPCI > 0 || NISA > 0 */
 
diff --git a/sys/arch/x86/x86/ioapic.c b/sys/arch/x86/x86/ioapic.c
index db34dc5..5643638 100644
--- a/sys/arch/x86/x86/ioapic.c
+++ b/sys/arch/x86/x86/ioapic.c
@@ -564,28 +564,6 @@ ioapic_addroute(struct pic *pic, struct cpu_info *ci, int pin,
 	pp->ip_vector = idtvec;
 	pp->ip_cpu = ci;
 	apic_set_redir(sc, pin, idtvec, ci);
-
-#if defined(XEN)
-	/*
-	 * This is kludgy, and not the right place, but we can't bind
-	 * before the routing has been set to the appropriate 'vector'.
-	 * in x86/intr.c, this is done after idt_vec_set(), where this
-	 * would have been more appropriate to put this.
-	 */
-
-	int port, irq;
-	irq = vect2irq[idtvec];
-	KASSERT(irq != 0);
-	port = bind_pirq_to_evtch(irq);
-	KASSERT(port < NR_EVENT_CHANNELS);
-	KASSERT(port >= 0);
-
-	KASSERT(irq2port[irq] == 0);
-	irq2port[irq] = port + 1;
-
-	xen_atomic_set_bit(&ci->ci_evtmask[0], port);
-#endif
-
 }
 
 static void
@@ -594,15 +572,6 @@ ioapic_delroute(struct pic *pic, struct cpu_info *ci, int pin,
 {
 
 	ioapic_hwmask(pic, pin);
-
-#if defined(XEN)
-	int port, irq;
-	irq = vect2irq[idtvec];
-	port = unbind_pirq_from_evtch(irq);
-
-	KASSERT(port < NR_EVENT_CHANNELS);
-#endif
-
 }
 
 #ifdef DDB
diff --git a/sys/arch/xen/conf/files.xen b/sys/arch/xen/conf/files.xen
index 4296b4b..fad9108 100644
--- a/sys/arch/xen/conf/files.xen
+++ b/sys/arch/xen/conf/files.xen
@@ -103,6 +103,7 @@ file	arch/xen/xen/xen_debug.c
 file	arch/xen/xen/clock.c
 file	arch/x86/isa/rtc.c		dom0ops
 file	arch/xen/xen/evtchn.c
+file	arch/xen/x86/xen_pic.c
 
 file	arch/xen/xen/xengnt.c
 
diff --git a/sys/arch/xen/include/evtchn.h b/sys/arch/xen/include/evtchn.h
index f835a9c..53e9816 100644
--- a/sys/arch/xen/include/evtchn.h
+++ b/sys/arch/xen/include/evtchn.h
@@ -65,6 +65,8 @@ struct pintrhand {
 	int pic_type;
 	int pirq;
 	int evtch;
+	struct pic *pic;
+	struct cpu_info *ci;
 	int (*func)(void *);
 	void *arg;
 };
diff --git a/sys/arch/xen/include/hypervisor.h b/sys/arch/xen/include/hypervisor.h
index a04a8e0..1294de7 100644
--- a/sys/arch/xen/include/hypervisor.h
+++ b/sys/arch/xen/include/hypervisor.h
@@ -130,6 +130,7 @@ extern volatile shared_info_t *HYPERVISOR_shared_info;
 struct intrframe;
 struct cpu_info;
 void do_hypervisor_callback(struct intrframe *regs);
+void hypervisor_prime_pirq_event(int, unsigned int);
 void hypervisor_enable_event(unsigned int);
 
 extern int xen_version;
diff --git a/sys/arch/xen/include/intr.h b/sys/arch/xen/include/intr.h
index c368ab9..1e350bf 100644
--- a/sys/arch/xen/include/intr.h
+++ b/sys/arch/xen/include/intr.h
@@ -62,16 +62,30 @@ struct evtsource {
 };
 
 extern struct intrstub xenev_stubs[];
-extern int irq2vect[256];
-extern int vect2irq[256];
-extern int irq2port[NR_EVENT_CHANNELS]; /* actually port + 1, so that 0 is invaid */
+
+/* pirq binding related mappings */
+struct xen_pic {
+	int irq2vect[256];
+	int vect2irq[256];
+	/*
+	 * Note: The 'irq' here comes from MP/apic tables, not xen.
+	 * Later we want this to be a lookup for MSI etc.
+	 * See: intr.c: intr_establish_xname() for usage eg:
+	 */
+	int irq2port[256]; /* actually port + 1, so that 0 is invaid */
+};
 
 #ifdef MULTIPROCESSOR
 int xen_intr_biglock_wrapper(void *);
 #endif
 
 #if defined(DOM0OPS) || NPCI > 0
-int xen_pirq_alloc(intr_handle_t *, int);
+int xen_pirq_get_vector_from_token(struct xen_pic *, int);
+int xen_pirq_get_token_from_vector(struct xen_pic *, int);
+void xen_pirq_save_port_token(struct xen_pic *, int, int);
+int xen_pirq_get_port_from_token(struct xen_pic *, int);
+int xen_pirq_get_token_from_pirq(intr_handle_t);
+int xen_pirq_alloc(intr_handle_t);
 #endif /* defined(DOM0OPS) || NPCI > 0 */
 
 #ifdef MULTIPROCESSOR
diff --git a/sys/arch/xen/x86/pintr.c b/sys/arch/xen/x86/pintr.c
index 2809f2c..79a02be 100644
--- a/sys/arch/xen/x86/pintr.c
+++ b/sys/arch/xen/x86/pintr.c
@@ -142,9 +142,6 @@ struct intrstub ioapic_level_stubs[MAX_INTR_SOURCES] = {{0,0}};
 struct intrstub x2apic_edge_stubs[MAX_INTR_SOURCES] = {{0,0}};
 struct intrstub x2apic_level_stubs[MAX_INTR_SOURCES] = {{0,0}};
 #include <machine/i82093var.h>
-int irq2port[NR_EVENT_CHANNELS] = {0}; /* actually port + 1, so that 0 is invaid */
-int irq2vect[256] = {0};
-int vect2irq[256] = {0};
 #endif /* NIOAPIC */
 #if NACPICA > 0
 #include <machine/mpconfig.h>
@@ -159,13 +156,19 @@ int vect2irq[256] = {0};
 #endif
 
 #if defined(DOM0OPS) || NPCI > 0
-int
-xen_pirq_alloc(intr_handle_t *pirq, int type)
+/*
+ * Ask Xen for a physical CPU vector.
+ * Give xen a hint about our idea of what we think the irq should be
+ * if we were native.
+ *
+ */
+#if defined(NIOAPIC) 
+static int
+xen_assign_vector(int irqhint)
 {
+
 	physdev_op_t op;
-	int irq = *pirq;
-#if NIOAPIC > 0
-	extern struct cpu_info phycpu_info_primary; /* XXX */
+
 	/*
 	 * The hypervisor has already allocated vectors and IRQs for the
 	 * devices. Reusing the same IRQ doesn't work because as we bind
@@ -178,54 +181,180 @@ xen_pirq_alloc(intr_handle_t *pirq, int type)
 	 * or none is available.
 	 */
 	static int xen_next_irq = 200;
-	struct ioapic_softc *ioapic = ioapic_find(APIC_IRQ_APIC(*pirq));
-	struct pic *pic = &ioapic->sc_pic;
-	int pin = APIC_IRQ_PIN(*pirq);
 
-	if (*pirq & APIC_INT_VIA_APIC) {
-		irq = vect2irq[ioapic->sc_pins[pin].ip_vector];
-		if (ioapic->sc_pins[pin].ip_vector == 0 || irq == 0) {
-			/* allocate IRQ */
-			irq = APIC_IRQ_LEGACY_IRQ(*pirq);
-			if (irq <= 0 || irq > 15)
-				irq = xen_next_irq--;
+	if (irqhint <= 0 || irqhint > 15) {
+		irqhint = xen_next_irq--;
+	}
+
 retry:
-			/* allocate vector and route interrupt */
-			op.cmd = PHYSDEVOP_ASSIGN_VECTOR;
-			op.u.irq_op.irq = irq;
-			if (HYPERVISOR_physdev_op(&op) < 0) {
-				irq = xen_next_irq--;
-				if (xen_next_irq == 15)
-					panic("PHYSDEVOP_ASSIGN_VECTOR irq %d", irq);
-				goto retry;
+	op.cmd = PHYSDEVOP_ASSIGN_VECTOR;
+	op.u.irq_op.irq = irqhint;
+	if (HYPERVISOR_physdev_op(&op) < 0) {
+		irqhint = xen_next_irq--;
+		if (xen_next_irq == 15)
+			panic("PHYSDEVOP_ASSIGN_VECTOR irq %d", irqhint);
+		goto retry;
+	}
+
+	return op.u.irq_op.vector;
+}
+#endif /* NIOAPIC */
+
+static int
+xen_assign_vector_legacy(int irqhint)
+{
+	physdev_op_t op;
+	
+	KASSERT(APIC_IRQ_ISLEGACY(irqhint));
+
+	/* allocate vector and route interrupt */
+	op.cmd = PHYSDEVOP_ASSIGN_VECTOR;
+	op.u.irq_op.irq = irqhint;
+	if (HYPERVISOR_physdev_op(&op) < 0) {
+		panic("PHYSDEVOP_ASSIGN_VECTOR irq %d", irqhint);
+	}
+
+	return op.u.irq_op.vector;
+}
+
+static void
+xen_pirq_save_vector_token(struct xen_pic *xpic, int vector, int token)
+{
+	KASSERT(xpic->irq2vect[token] == 0);
+	xpic->irq2vect[token] = vector;
+
+	KASSERT(xpic->vect2irq[vector] == 0);
+	xpic->vect2irq[vector] = token;
+
+	KASSERT(xpic->irq2port[token] == 0);
+}
+
+int
+xen_pirq_get_vector_from_token(struct xen_pic *xpic, int token)
+ {
+	KASSERT(xpic->irq2vect[token] != 0);
+
+	return xpic->irq2vect[token];
+
+}
+
+int
+xen_pirq_get_token_from_vector(struct xen_pic *xpic, int vector)
+{
+	KASSERT(xpic->vect2irq[vector] != 0);
+	return xpic->vect2irq[vector];
+}
+
+void
+xen_pirq_save_port_token(struct xen_pic *xpic, int port, int token)
+{
+	KASSERT(xpic->irq2port[token] == 0);
+	xpic->irq2port[token] = port + 1;
+}
+
+int
+xen_pirq_get_port_from_token(struct xen_pic *xpic, int token)
+{
+	KASSERT(xpic->irq2port[token] > 0);
+	return xpic->irq2port[token] - 1;
+}
+
+int
+xen_pirq_get_token_from_pirq(intr_handle_t pirq)
+{
+	int token = pirq;
+#if NIOAPIC > 0
+	struct ioapic_softc *ioapic = ioapic_find(APIC_IRQ_APIC(pirq));
+	int pin = APIC_IRQ_PIN(pirq);
+
+	if (pirq & APIC_INT_VIA_APIC) {
+		token = xen_pirq_get_token_from_vector(xen_pic.pic_xen, ioapic->sc_pins[pin].ip_vector);		
+		KASSERT(token != 0);
+	} else
+#endif /* NIOAPIC */
+	{
+		KASSERT(APIC_IRQ_ISLEGACY(pirq));
+		token = pirq; /* Redundant but semantically clear */
+	}
+
+	return token; 
+}
+
+int
+xen_pirq_alloc(intr_handle_t pirq)
+{
+	int irq = pirq;
+#if NIOAPIC > 0
+	struct ioapic_softc *ioapic = ioapic_find(APIC_IRQ_APIC(pirq));
+	int pin = APIC_IRQ_PIN(pirq);
+
+	if (pirq & APIC_INT_VIA_APIC) {
+		irq = xen_pic.pic_xen->vect2irq[ioapic->sc_pins[pin].ip_vector];
+		if (ioapic->sc_pins[pin].ip_vector == 0 || irq == 0) {
+			int xen_vector; /*
+					 * The allocated vector xen
+					 * gives us
+					 */
+			int token; /*
+				    * Our handle to the vector that we
+				    * got from Xen. We use this to
+				    * tell Xen about this vector, for
+				    * binding, for eg:
+				    */
+
+			/* request vector */
+			xen_vector = xen_assign_vector(APIC_IRQ_LEGACY_IRQ(pirq));
+			token = xen_vector;
+
+			/*
+			 * On Native, the first 15 IRQs are identity
+			 * mapped to CPU IDT vector numbers. Our
+			 * current implementation seems to imply that
+			 * the dom0 needs to assume that the hardware
+			 * sources of these IRQs are fixed, and as
+			 * documented in the ISA spec, whereas,
+			 * the CPU vectors for the corresponding
+			 * callbacks can be re-routed to arbitrary
+			 * vectors by Xen (presumably via APIC
+			 * magic).
+			 *
+			 * XXX: cherry - it's unclear to me if current
+			 * hypervisor implementations follow any of
+			 * the above, but I'm maintaining the current
+			 * logic for clarity and any backwards compat
+			 * that I may have missed.
+			 *
+			 */
+
+			if (APIC_IRQ_LEGACY_IRQ(pirq) > 0 &&
+			    APIC_IRQ_LEGACY_IRQ(pirq) <= 15) {
+				token = APIC_IRQ_LEGACY_IRQ(pirq);
 			}
-			KASSERT(irq2vect[irq] == 0);
-			irq2vect[irq] = op.u.irq_op.vector;
-			KASSERT(vect2irq[op.u.irq_op.vector] == 0);
-			vect2irq[op.u.irq_op.vector] = irq;
-			pic->pic_addroute(pic, &phycpu_info_primary, pin,
-			    op.u.irq_op.vector, type);
+
+			xen_pirq_save_vector_token(xen_pic.pic_xen,
+			    xen_vector, token);
+			/*XXX:*/ irq = token;
 		}
-		*pirq &= ~0xff;
-		*pirq |= irq;
 	} else
 #endif /* NIOAPIC */
 	{
-		if (irq2port[irq] == 0) {
-			op.cmd = PHYSDEVOP_ASSIGN_VECTOR;
-			op.u.irq_op.irq = irq;
-			if (HYPERVISOR_physdev_op(&op) < 0) {
-				panic("PHYSDEVOP_ASSIGN_VECTOR irq %d", irq);
-			}
-			KASSERT(irq2vect[irq] == 0);
-			irq2vect[irq] = op.u.irq_op.vector;
-			KASSERT(vect2irq[op.u.irq_op.vector] == 0);
-			vect2irq[op.u.irq_op.vector] = irq;
-			KASSERT(irq2port[irq] == 0);
-			irq2port[irq] = bind_pirq_to_evtch(irq) + 1;
+		if (xen_pic.pic_xen->irq2port[irq] == 0) {
+			int xen_vector; /*
+					 * The allocated vector xen
+					 * gives us
+					 */
+			xen_vector = xen_assign_vector_legacy(irq);
+
+			xen_pirq_save_vector_token(xen_pic.pic_xen,
+			    xen_vector, irq);
 		}
 	}
-	KASSERT(irq2port[irq] > 0);
-	return (irq2port[irq] - 1);
+
+	/*
+	 * Although we call it 'irq', it really is just a
+	 * token
+	 */
+	return irq; 
 }
 #endif /* defined(DOM0OPS) || NPCI > 0 */
+
diff --git a/sys/arch/xen/x86/xen_pic.c b/sys/arch/xen/x86/xen_pic.c
new file mode 100644
index 0000000..2692eb6
--- /dev/null
+++ b/sys/arch/xen/x86/xen_pic.c
@@ -0,0 +1,337 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2018 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software written under contract with The NetBSD Foundation
+ * by Cherry G. Mathew <cherry%NetBSD.org@localhost>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include "opt_xen.h"
+
+#include <sys/param.h>
+#include <sys/cpu.h>
+#include <sys/kernel.h>
+#include <machine/i82093var.h>
+
+#include "ioapic.h"
+
+#include <xen/evtchn.h>
+
+extern struct cpu_info phycpu_info_primary;
+
+static void xen_pic_mask(struct pic *, int);
+static void xen_pic_unmask(struct pic *, int);
+static void xen_pic_addroute(struct pic *, struct cpu_info *, int, int, int);
+static void xen_pic_delroute(struct pic *, struct cpu_info *, int, int, int);
+static bool xen_pic_trymask(struct pic *, int);
+
+static struct xen_pic pic_xen = {
+	.irq2port = {0},
+	.irq2vect = {0},
+	.vect2irq = {0}
+};
+
+struct pic xen_pic = {
+	.pic_name = "xenev0",
+	.pic_type = PIC_XEN,
+	.pic_vecbase = 0,
+	.pic_apicid = 0,
+	.pic_lock = __SIMPLELOCK_UNLOCKED,
+	.pic_hwmask = xen_pic_mask,
+	.pic_hwunmask = xen_pic_unmask,
+	.pic_addroute = xen_pic_addroute,
+	.pic_delroute = xen_pic_delroute,
+	.pic_trymask = xen_pic_trymask,
+	.pic_level_stubs = xenev_stubs,
+	.pic_edge_stubs = xenev_stubs,
+	.pic_xen = &pic_xen,
+};
+
+	
+/*
+ * We try to stick to the traditional x86 PIC semantics wrt Xen
+ * events.
+ *
+ * PIC pins exist in a global namespace which may be hierarchical, and
+ * are mapped to a cpu bus concept called 'IRQ' numbers, which are
+ * also global, but linear. Thus a PIC, pin tuple will always map to
+ * an IRQ number. These tuples can alias to the same IRQ number, thus
+ * causing IRQ "sharing". IRQ numbers can be bound to specific CPUs,
+ * and to specific callback vector indices on the CPU called idt_vec,
+ * which are aliases to handlers meant to run on destination
+ * CPUs. This binding can also happen at interrupt time and resolved
+ * 'round-robin' between all CPUs, depending on the lapic setup. In
+ * this case, all CPUs need to have identical idt_vec->handler
+ * mappings.
+ *
+ * The job of pic_addroute() is to setup the 'wiring' between the
+ * source pin, and the destination CPU callback offset, ideally on a
+ * specific CPU in MP systems (or 'round-robin').
+ *
+ * On Xen, a global namespace of 'events' exist, which are initially
+ * bound to nothing. This is similar to the relationship between
+ * realworld IRQ numbers wrt PIC pins, since before routing, IRQ
+ * numbers by themselves have no causal connection setup with the real
+ * world. (Except for the hardwired cases on the PC Architecture, 
+ * which we ignore for the purpose of this description). However the
+ * really important routing is from pin to idt_vec. On PIC_XEN, all
+ * three (pic, irq, idt_vec) belong to the same namespace and are
+ * identical. Further, the mapping between idt_vec and the actual
+ * callback handler is setup via calls to the evtchn.h api - this
+ * last bit is analogous to x86/idt.c:idt_vec_set() on real h/w
+ *
+ * For now we handle two cases:
+ * - IPC style events - eg: timer, PV devices, etc.
+ * - dom0 physical irq bound events.
+ *
+ * In the case of IPC style events, we currently externalise the
+ * event binding by using evtchn.h functions. From the POV of
+ * PIC_XEN ,  'pin' , 'irq' and 'idt_vec' are all identical to the
+ * port number of the event.
+ *
+ * In the case of dom0 physical irq bound events, we currently
+ * event binding by exporting evtchn.h functions. From the POV of
+ * PIC_LAPIC/PIC_IOAPIC, the 'pin' is the hardware pin, the 'irq' is
+ * the x86 global irq number  - the port number is extracted out of a
+ * global array (this is currently kludgy and breaks API abstraction)
+ * and the binding happens during pic_addroute() of the ioapic.
+ *
+ * Later when we integrate more tightly with x86/intr.c, we will be
+ * able to conform better to (PIC_LAPIC/PIC_IOAPIC)->PIC_XEN
+ * cascading model.
+ */
+
+/* PIC callbacks */
+/* pic "pin"s are conceptually mapped to event port numbers */
+static void
+xen_pic_mask(struct pic *pic, int pin)
+{
+	evtchn_port_t evtchn = pin;
+
+	KASSERT(pic->pic_type == PIC_XEN);
+	KASSERT(evtchn < NR_EVENT_CHANNELS);
+
+	hypervisor_mask_event(evtchn);
+	
+}
+
+static void
+xen_pic_unmask(struct pic *pic, int pin)
+{
+	evtchn_port_t evtchn = pin;
+
+	KASSERT(pic->pic_type == PIC_XEN);
+	KASSERT(evtchn < NR_EVENT_CHANNELS);
+
+	hypervisor_unmask_event(evtchn);
+	
+}
+
+intr_handle_t
+xen_pirq_assign(intr_handle_t );
+
+static void
+xen_pic_addroute(struct pic *pic, struct cpu_info *ci, int src, int dest, int type)
+{
+
+	/* Events are simulated as level triggered interrupts */
+	evtchn_op_t op;
+	
+	KASSERT(ci != NULL);
+	
+	switch (pic->pic_type) {
+	case PIC_XEN:
+		/* Pass through! */
+		break;
+#if defined(DOM0OPS) || NPCI > 0
+		int xen_vector, token; /* CPU vector, irq returned by xen */
+	case PIC_I8259:
+	case PIC_LAPIC:
+	case PIC_IOAPIC:
+		
+		/*
+		 * Route a hardware PIC pin binding through
+		 * a specified port.
+		 */
+
+		/*
+		 * Tell xen about our intentions.
+		 *
+		 * But first get a vector binding. What's returned to
+		 * us  is a handle to that vector. Xen calls it a
+		 * "pseudo IRQ" (pirq).
+		 *
+		 * Note that in the case of ioapics, we expect the src
+		 * to contain bitfielded info in src.
+		 */
+
+		token = xen_pirq_alloc(src);
+
+		xen_vector = xen_pirq_get_vector_from_token(xen_pic.pic_xen, token);
+
+#if NIOAPIC > 0
+		/*
+		 * Add the ioapic route  - we can only route it via
+		 * the BSP for now.
+		 */
+
+		pic->pic_addroute(pic, &phycpu_info_primary, APIC_IRQ_PIN(src),
+			    xen_vector, type);
+
+
+#endif /* NIOAPIC > 0 */
+
+		/*
+		 * Now we can bind+alloc (no API to do this in a
+		 * degenerate fashion) this handle to a port.
+		 */
+
+		if (token >= NR_PIRQS) {
+			panic("pirq %d out of bound, increase NR_PIRQS", token);
+		}
+		
+		op.cmd = EVTCHNOP_bind_pirq;
+		op.u.bind_pirq.pirq = token;
+		op.u.bind_pirq.flags = BIND_PIRQ__WILL_SHARE;
+		if (HYPERVISOR_event_channel_op(&op) != 0)
+			panic("Failed to bind physical IRQ %d\n", token);
+		dest = op.u.bind_pirq.port;
+
+		KASSERT(dest < NR_EVENT_CHANNELS);
+		KASSERT(dest >= 0);
+		xen_pirq_save_port_token(xen_pic.pic_xen, dest, token);
+		break;
+
+	case PIC_MSI:
+	case PIC_MSIX:
+#endif /*  defined(DOM0OPS) || NPCI > 0 */		
+	default:
+		panic("/* TODO */");
+		break;
+
+	}
+
+	/* Bind dest port to the routed vcpu */
+	op.cmd = EVTCHNOP_bind_vcpu;
+	op.u.bind_vcpu.vcpu = ci->ci_cpuid;
+	op.u.bind_vcpu.port = dest;
+
+#define PRIuCPUID	"lu" /* XXX: move this somewhere more appropriate */
+
+	if (HYPERVISOR_event_channel_op(&op) != 0)
+		panic("Failed to bind port %d to VCPU %"PRIuCPUID"\n", dest, ci->ci_cpuid);
+
+	/* Give the event handler a hint about our vcpu preference */
+	xen_atomic_set_bit(&ci->ci_evtmask[0], dest);
+
+}
+
+static void
+xen_pic_delroute(struct pic *pic, struct cpu_info *ci, int src, int dest, int type)
+{
+	KASSERT(ci != NULL);
+
+	switch (pic->pic_type) {
+	case PIC_XEN:
+		/* Pass through! */
+		break;
+
+	case PIC_I8259:
+	case PIC_LAPIC:
+		/* NOP */
+		break;
+
+	case PIC_IOAPIC:
+		pic->pic_delroute(pic, &phycpu_info_primary, src,
+		    pic->pic_ioapic->sc_pins[src].ip_vector, type);
+	case PIC_MSI:
+	case PIC_MSIX:
+	default:
+		panic("/* TODO */");
+		break;
+
+	}
+
+	/*
+	 * Unbind the destination port. Thus the source won't "route"
+	 * to the destination.
+	 *
+	 * If the source was a hardware PIC pin binding:
+	 *
+	 * There currently isn't a documented way to remove this
+	 * binding, so we just go ahead and close the destination port
+	 * in the hope that it will be unbound in the process.
+	 */
+
+	evtchn_op_t op;
+	op.cmd = EVTCHNOP_close;
+	op.u.close.port = dest;
+	if (HYPERVISOR_event_channel_op(&op) != 0)
+		panic("Failed to unbind physical IRQ %d\n", src);
+
+	xen_atomic_clear_bit(&ci->ci_evtmask[0], dest);
+}
+
+/*
+ * xen_pic_trymask(pic, pin)
+ *
+ *	If there are interrupts pending on the bus-shared pic, return
+ *	false.  Otherwise, mask interrupts on the bus-shared pic and
+ *	return true.
+ */
+static bool
+xen_pic_trymask(struct pic *pic, int pin)
+{
+	volatile struct shared_info *s = HYPERVISOR_shared_info;
+	unsigned long masked __diagused;
+
+	/* Mask it.  */
+	masked = xen_atomic_test_and_set_bit(&s->evtchn_mask[0], pin);
+
+	/*
+	 * Caller is responsible for calling trymask only when the
+	 * interrupt pin is not masked, and for serializing calls to
+	 * trymask.
+	 */
+	KASSERT(!masked);
+
+	/*
+	 * Check whether there were any interrupts pending when we
+	 * masked it.  If there were, unmask and abort.
+	 */
+	if (xen_atomic_test_bit(&s->evtchn_pending[0], pin)) {
+		xen_atomic_clear_bit(&s->evtchn_mask[0], pin);
+		return false;
+	}
+
+	/* Success: masked, not pending.  */
+	return true;
+}
+
diff --git a/sys/arch/xen/xen/evtchn.c b/sys/arch/xen/xen/evtchn.c
index 555681a..916eb7a 100644
--- a/sys/arch/xen/xen/evtchn.c
+++ b/sys/arch/xen/xen/evtchn.c
@@ -116,81 +116,6 @@ physdev_op_t physdev_op_notify = {
 };
 #endif
 
-static void xen_evtchn_mask(struct pic *, int);
-static void xen_evtchn_unmask(struct pic *, int);
-static void xen_evtchn_addroute(struct pic *, struct cpu_info *, int, int, int);
-static void xen_evtchn_delroute(struct pic *, struct cpu_info *, int, int, int);
-static bool xen_evtchn_trymask(struct pic *, int);
-
-
-struct pic xen_pic = {
-	.pic_name = "xenev0",
-	.pic_type = PIC_XEN,
-	.pic_vecbase = 0,
-	.pic_apicid = 0,
-	.pic_lock = __SIMPLELOCK_UNLOCKED,
-	.pic_hwmask = xen_evtchn_mask,
-	.pic_hwunmask = xen_evtchn_unmask,
-	.pic_addroute = xen_evtchn_addroute,
-	.pic_delroute = xen_evtchn_delroute,
-	.pic_trymask = xen_evtchn_trymask,
-	.pic_level_stubs = xenev_stubs,
-	.pic_edge_stubs = xenev_stubs,
-};
-	
-/*
- * We try to stick to the traditional x86 PIC semantics wrt Xen
- * events.
- *
- * PIC pins exist in a global namespace which may be hierarchical, and
- * are mapped to a cpu bus concept called 'IRQ' numbers, which are
- * also global, but linear. Thus a PIC, pin tuple will always map to
- * an IRQ number. These tuples can alias to the same IRQ number, thus
- * causing IRQ "sharing". IRQ numbers can be bound to specific CPUs,
- * and to specific callback vector indices on the CPU called idt_vec,
- * which are aliases to handlers meant to run on destination
- * CPUs. This binding can also happen at interrupt time and resolved
- * 'round-robin' between all CPUs, depending on the lapic setup. In
- * this case, all CPUs need to have identical idt_vec->handler
- * mappings.
- *
- * The job of pic_addroute() is to setup the 'wiring' between the
- * source pin, and the destination CPU handler, ideally on a specific
- * CPU in MP systems (or 'round-robin').
- *
- * On Xen, a global namespace of 'events' exist, which are initially
- * bound to nothing. This is similar to the relationship between
- * realworld realworld IRQ numbers wrt PIC pins, since before routing,
- * IRQ numbers by themselves have no causal connection setup with the
- * real world. (Except for the hardwired cases on the PC Architecture,
- * which we ignore for the purpose of this description). However the
- * really important routing is from pin to idt_vec. On PIC_XEN, all
- * three (pic, irq, idt_vec) belong to the same namespace and are
- * identical. Further, the mapping between idt_vec and the actual
- * callback handler is setup via calls to the evtchn.h api - this
- * last bit is analogous to x86/idt.c:idt_vec_set() on real h/w
- *
- * For now we handle two cases:
- * - IPC style events - eg: timer, PV devices, etc.
- * - dom0 physical irq bound events.
- *
- * In the case of IPC style events, we currently externalise the
- * event binding by using evtchn.h functions. From the POV of
- * PIC_XEN ,  'pin' , 'irq' and 'idt_vec' are all identical to the
- * port number of the event.
- *
- * In the case of dom0 physical irq bound events, we currently
- * event binding by exporting evtchn.h functions. From the POV of
- * PIC_LAPIC/PIC_IOAPIC, the 'pin' is the hardware pin, the 'irq' is
- * the x86 global irq number  - the port number is extracted out of a
- * global array (this is currently kludgy and breaks API abstraction)
- * and the binding happens during pic_addroute() of the ioapic.
- *
- * Later when we integrate more tightly with x86/intr.c, we will be
- * able to conform better to (PIC_LAPIC/PIC_IOAPIC)->PIC_XEN
- * cascading model.
- */
-
 int debug_port = -1;
 
 // #define IRQ_DEBUG 4
@@ -428,113 +353,6 @@ splx:
 
 #define PRIuCPUID	"lu" /* XXX: move this somewhere more appropriate */
 
-/* PIC callbacks */
-/* pic "pin"s are conceptually mapped to event port numbers */
-static void
-xen_evtchn_mask(struct pic *pic, int pin)
-{
-	evtchn_port_t evtchn = pin;
-
-	KASSERT(pic->pic_type == PIC_XEN);
-	KASSERT(evtchn < NR_EVENT_CHANNELS);
-
-	hypervisor_mask_event(evtchn);
-	
-}
-
-static void
-xen_evtchn_unmask(struct pic *pic, int pin)
-{
-	evtchn_port_t evtchn = pin;
-
-	KASSERT(pic->pic_type == PIC_XEN);
-	KASSERT(evtchn < NR_EVENT_CHANNELS);
-
-	hypervisor_unmask_event(evtchn);
-	
-}
-
-
-static void
-xen_evtchn_addroute(struct pic *pic, struct cpu_info *ci, int pin, int idt_vec, int type)
-{
-
-	evtchn_port_t evtchn = pin;
-
-	/* Events are simulated as level triggered interrupts */
-	KASSERT(type == IST_LEVEL); 
-
-	KASSERT(evtchn < NR_EVENT_CHANNELS);
-#if notyet
-	evtchn_port_t boundport = idt_vec;
-#endif
-	
-	KASSERT(pic->pic_type == PIC_XEN);
-
-	xen_atomic_set_bit(&ci->ci_evtmask[0], evtchn);
-
-}
-
-static void
-xen_evtchn_delroute(struct pic *pic, struct cpu_info *ci, int pin, int idt_vec, int type)
-{
-	/*
-	 * XXX: In the future, this is a great place to
-	 * 'unbind' events to underlying events and cpus.
-	 * For now, just disable interrupt servicing on this cpu for
-	 * this pin aka cpu.
-	 */
-	evtchn_port_t evtchn = pin;
-
-	/* Events are simulated as level triggered interrupts */
-	KASSERT(type == IST_LEVEL); 
-
-	KASSERT(evtchn < NR_EVENT_CHANNELS);
-#if notyet
-	evtchn_port_t boundport = idt_vec;
-#endif
-	
-	KASSERT(pic->pic_type == PIC_XEN);
-
-	xen_atomic_clear_bit(&ci->ci_evtmask[0], evtchn);
-}
-
-/*
- * xen_evtchn_trymask(pic, pin)
- *
- *	If there are interrupts pending on the bus-shared pic, return
- *	false.  Otherwise, mask interrupts on the bus-shared pic and
- *	return true.
- */
-static bool
-xen_evtchn_trymask(struct pic *pic, int pin)
-{
-	volatile struct shared_info *s = HYPERVISOR_shared_info;
-	unsigned long masked __diagused;
-
-	/* Mask it.  */
-	masked = xen_atomic_test_and_set_bit(&s->evtchn_mask[0], pin);
-
-	/*
-	 * Caller is responsible for calling trymask only when the
-	 * interrupt pin is not masked, and for serializing calls to
-	 * trymask.
-	 */
-	KASSERT(!masked);
-
-	/*
-	 * Check whether there were any interrupts pending when we
-	 * masked it.  If there were, unmask and abort.
-	 */
-	if (xen_atomic_test_bit(&s->evtchn_pending[0], pin)) {
-		xen_atomic_clear_bit(&s->evtchn_mask[0], pin);
-		return false;
-	}
-
-	/* Success: masked, not pending.  */
-	return true;
-}
-
 evtchn_port_t
 bind_vcpu_to_evtch(cpuid_t vcpu)
 {
@@ -1005,6 +823,25 @@ event_remove_handler(int evtch, int (*func)(void *), void *arg)
 	return 0;
 }
 
+void
+hypervisor_prime_pirq_event(int pirq, unsigned int evtch)
+{
+#if NPCI > 0 || NISA > 0
+	physdev_op_t physdev_op;
+	physdev_op.cmd = PHYSDEVOP_IRQ_STATUS_QUERY;
+	physdev_op.u.irq_status_query.irq = pirq;
+	if (HYPERVISOR_physdev_op(&physdev_op) < 0)
+		panic("HYPERVISOR_physdev_op(PHYSDEVOP_IRQ_STATUS_QUERY)");
+	if (physdev_op.u.irq_status_query.flags &
+	    PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY) {
+		pirq_needs_unmask_notify[evtch >> 5] |= (1 << (evtch & 0x1f));
+#ifdef IRQ_DEBUG
+		printf("pirq %d needs notify\n", pirq);
+#endif
+	}
+#endif /* NPCI > 0 || NISA > 0 */
+}
+	
 void
 hypervisor_enable_event(unsigned int evtch)
 {

Follow-Ups:
- Re: interrupt cleanup #1
  - From: Taylor R Campbell

References:
- Re: interrupt cleanup #1
  - From: Cherry G . Mathew

Prev by Date: Re: interrupt cleanup #1
Next by Date: xentools seabios
Previous by Thread: Re: interrupt cleanup #1
Next by Thread: Re: interrupt cleanup #1
Indexes:

Home | Main Index | Thread Index | Old Index