Subject: Re: Netbsd DomU daily-08.01.2007-crashes
To: Anzi <anzi@dnainternet.net>
From: Manuel Bouyer <bouyer@antioche.eu.org>
List: port-xen
Date: 01/10/2007 12:57:14
--bp/iNruPH9dso1Pn
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
On Wed, Jan 10, 2007 at 11:10:11AM +0200, Anzi wrote:
> Manuel Bouyer wrote:
>
> >On Tue, Jan 09, 2007 at 02:01:36PM +0200, Anzi wrote:
> >
> >
> >>OK,
> >>
> >>I thought that if I download kernel from current I get the fixed version
> >>from Manuel (in xbd_xenbus.c 1.16)
> >>Apparently it is not so?
> >>
> >>
> >
> >your kernel is from 20061025, long before I commited the fix
> >
> >
> >
>
> oh, I thought that fix was for the DOMU kernel. Is the current DOM0 /
> DOMU kernel good for production use (I think server will not be under
> high load)?
> This far it has worked pretty well for with linux domU.
Yes, it's for the domU. Sorry, I misread your first mail.
Hum, there's still a possible race condition at attach time. Can you try
the attached patch ?
--
Manuel Bouyer <bouyer@antioche.eu.org>
NetBSD: 26 ans d'experience feront toujours la difference
--
--bp/iNruPH9dso1Pn
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=diff
Index: xbd_xenbus.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/xen/xbd_xenbus.c,v
retrieving revision 1.15
diff -u -r1.15 xbd_xenbus.c
--- xbd_xenbus.c 1 Jan 2007 22:54:14 -0000 1.15
+++ xbd_xenbus.c 10 Jan 2007 11:56:46 -0000
@@ -388,12 +388,15 @@
xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
break;
case XenbusStateConnected:
- s = splbio();
+ /*
+ * note that xbd_backend_changed() can only be called by
+ * the xenbus thread.
+ */
+
if (sc->sc_backend_status == BLKIF_STATE_CONNECTED)
/* already connected */
return;
- sc->sc_backend_status = BLKIF_STATE_CONNECTED;
- splx(s);
+
xbd_connect(sc);
sc->sc_shutdown = 0;
hypervisor_enable_event(sc->sc_evtchn);
@@ -409,8 +412,10 @@
bufq_alloc(&sc->sc_dksc.sc_bufq, "fcfs", 0);
sc->sc_dksc.sc_flags |= DKF_INITED;
-
disk_attach(&sc->sc_dksc.sc_dkdev);
+
+ sc->sc_backend_status = BLKIF_STATE_CONNECTED;
+
/* try to read the disklabel */
dk_getdisklabel(sc->sc_di, &sc->sc_dksc, 0 /* XXX ? */);
format_bytes(buf, sizeof(buf), (uint64_t)sc->sc_dksc.sc_size *
@@ -469,6 +474,8 @@
DPRINTF(("xbd_handler(%s)\n", sc->sc_dev.dv_xname));
+ if (__predict_false(sc->sc_backend_status != BLKIF_STATE_CONNECTED))
+ return 0;
again:
resp_prod = sc->sc_ring.sring->rsp_prod;
x86_lfence(); /* ensure we see replies up to resp_prod */
Index: xencons.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/xen/xencons.c,v
retrieving revision 1.19
diff -u -r1.19 xencons.c
--- xencons.c 1 Oct 2006 19:28:43 -0000 1.19
+++ xencons.c 10 Jan 2007 11:56:46 -0000
@@ -387,6 +387,7 @@
#define XNC_OUT (xencons_interface->out)
cons = xencons_interface->out_cons;
prod = xencons_interface->out_prod;
+ x86_lfence();
while (prod != cons + sizeof(xencons_interface->out)) {
if (MASK_XENCONS_IDX(prod, XNC_OUT) <
MASK_XENCONS_IDX(cons, XNC_OUT)) {
@@ -402,9 +403,9 @@
break;
prod = prod + len;
}
- x86_lfence();
+ x86_sfence();
xencons_interface->out_prod = prod;
- x86_lfence();
+ x86_sfence();
hypervisor_notify_via_evtchn(xen_start_info.console_evtchn);
#undef XNC_OUT
#else /* XEN3 */
@@ -482,12 +483,13 @@
cons = xencons_interface->in_cons;
prod = xencons_interface->in_prod;
x86_lfence();
- } else
+ } else {
cons += len;
+ x86_sfence();
+ xencons_interface->in_cons = cons;
+ x86_sfence();
+ }
}
- x86_lfence();
- xencons_interface->in_cons = cons;
- x86_lfence();
hypervisor_notify_via_evtchn(xen_start_info.console_evtchn);
splx(s);
return 1;
--bp/iNruPH9dso1Pn--