Subject: Re: Netbsd DomU daily-08.01.2007-crashes
To: Manuel Bouyer <bouyer@antioche.eu.org>
From: Anzi <anzi@dnainternet.net>
List: port-xen
Date: 01/10/2007 14:09:11
Manuel Bouyer wrote:
>On Wed, Jan 10, 2007 at 11:10:11AM +0200, Anzi wrote:
>
>
>>Manuel Bouyer wrote:
>>
>>
>>
>>>On Tue, Jan 09, 2007 at 02:01:36PM +0200, Anzi wrote:
>>>
>>>
>>>
>>>
>>>>OK,
>>>>
>>>>I thought that if I download kernel from current I get the fixed version
>>>>
>>>>
>>>>from Manuel (in xbd_xenbus.c 1.16)
>>>
>>>
>>>>Apparently it is not so?
>>>>
>>>>
>>>>
>>>>
>>>your kernel is from 20061025, long before I commited the fix
>>>
>>>
>>>
>>>
>>>
>>oh, I thought that fix was for the DOMU kernel. Is the current DOM0 /
>>DOMU kernel good for production use (I think server will not be under
>>high load)?
>>This far it has worked pretty well for with linux domU.
>>
>>
>
>Yes, it's for the domU. Sorry, I misread your first mail.
>Hum, there's still a possible race condition at attach time. Can you try
>the attached patch ?
>
>
>
I can, after I have downloaded kernel source. You have commited version
1.20 of xencons.c and version 1.16 of xbd_xenbus.c. Are they the same
version as these diffs?
-anzi-
>------------------------------------------------------------------------
>
>Index: xbd_xenbus.c
>===================================================================
>RCS file: /cvsroot/src/sys/arch/xen/xen/xbd_xenbus.c,v
>retrieving revision 1.15
>diff -u -r1.15 xbd_xenbus.c
>--- xbd_xenbus.c 1 Jan 2007 22:54:14 -0000 1.15
>+++ xbd_xenbus.c 10 Jan 2007 11:56:46 -0000
>@@ -388,12 +388,15 @@
> xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
> break;
> case XenbusStateConnected:
>- s = splbio();
>+ /*
>+ * note that xbd_backend_changed() can only be called by
>+ * the xenbus thread.
>+ */
>+
> if (sc->sc_backend_status == BLKIF_STATE_CONNECTED)
> /* already connected */
> return;
>- sc->sc_backend_status = BLKIF_STATE_CONNECTED;
>- splx(s);
>+
> xbd_connect(sc);
> sc->sc_shutdown = 0;
> hypervisor_enable_event(sc->sc_evtchn);
>@@ -409,8 +412,10 @@
>
> bufq_alloc(&sc->sc_dksc.sc_bufq, "fcfs", 0);
> sc->sc_dksc.sc_flags |= DKF_INITED;
>-
> disk_attach(&sc->sc_dksc.sc_dkdev);
>+
>+ sc->sc_backend_status = BLKIF_STATE_CONNECTED;
>+
> /* try to read the disklabel */
> dk_getdisklabel(sc->sc_di, &sc->sc_dksc, 0 /* XXX ? */);
> format_bytes(buf, sizeof(buf), (uint64_t)sc->sc_dksc.sc_size *
>@@ -469,6 +474,8 @@
>
> DPRINTF(("xbd_handler(%s)\n", sc->sc_dev.dv_xname));
>
>+ if (__predict_false(sc->sc_backend_status != BLKIF_STATE_CONNECTED))
>+ return 0;
> again:
> resp_prod = sc->sc_ring.sring->rsp_prod;
> x86_lfence(); /* ensure we see replies up to resp_prod */
>Index: xencons.c
>===================================================================
>RCS file: /cvsroot/src/sys/arch/xen/xen/xencons.c,v
>retrieving revision 1.19
>diff -u -r1.19 xencons.c
>--- xencons.c 1 Oct 2006 19:28:43 -0000 1.19
>+++ xencons.c 10 Jan 2007 11:56:46 -0000
>@@ -387,6 +387,7 @@
> #define XNC_OUT (xencons_interface->out)
> cons = xencons_interface->out_cons;
> prod = xencons_interface->out_prod;
>+ x86_lfence();
> while (prod != cons + sizeof(xencons_interface->out)) {
> if (MASK_XENCONS_IDX(prod, XNC_OUT) <
> MASK_XENCONS_IDX(cons, XNC_OUT)) {
>@@ -402,9 +403,9 @@
> break;
> prod = prod + len;
> }
>- x86_lfence();
>+ x86_sfence();
> xencons_interface->out_prod = prod;
>- x86_lfence();
>+ x86_sfence();
> hypervisor_notify_via_evtchn(xen_start_info.console_evtchn);
> #undef XNC_OUT
> #else /* XEN3 */
>@@ -482,12 +483,13 @@
> cons = xencons_interface->in_cons;
> prod = xencons_interface->in_prod;
> x86_lfence();
>- } else
>+ } else {
> cons += len;
>+ x86_sfence();
>+ xencons_interface->in_cons = cons;
>+ x86_sfence();
>+ }
> }
>- x86_lfence();
>- xencons_interface->in_cons = cons;
>- x86_lfence();
> hypervisor_notify_via_evtchn(xen_start_info.console_evtchn);
> splx(s);
> return 1;
>
>