Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/xen/xenbus Fix /kern/xen/xenbus handling. It's badl...



details:   https://anonhg.NetBSD.org/src/rev/55b28bfeef54
branches:  trunk
changeset: 822495:55b28bfeef54
user:      bouyer <bouyer%NetBSD.org@localhost>
date:      Wed Mar 22 21:21:39 2017 +0000

description:
Fix /kern/xen/xenbus handling. It's badly broken and will do bad things
if more than one thread tries to use it at the same time (hang, memory leak,
panic).
In a kernfs node, the fileops (open, close, read, write) gets a vnode,
but no way to know the file descriptor from which the request comes from.
As /kern/xen/xenbus interface is statefull (write sends a command and read
gets the response), a way to track "clients" is needed.
This commit implement states using the lwp pointer from the caller as a key.
It will fail (with an error) if a kernfs file descriptor is reused by a child
after a fork(), or if a file descriptor is shared by two threads of the same
process but fortunably the xen tools using this interface don't do this.

This should fixes occasional hangs of the Xen tools (one in "xbrd" state,
others in tstile) reported on port-xen by Alaric Snell-Pym.

diffstat:

 sys/arch/xen/xenbus/xenbus_dev.c |  262 +++++++++++++++++++++++++++-----------
 1 files changed, 187 insertions(+), 75 deletions(-)

diffs (truncated from 406 to 300 lines):

diff -r 8245a6b2d569 -r 55b28bfeef54 sys/arch/xen/xenbus/xenbus_dev.c
--- a/sys/arch/xen/xenbus/xenbus_dev.c  Wed Mar 22 21:14:11 2017 +0000
+++ b/sys/arch/xen/xenbus/xenbus_dev.c  Wed Mar 22 21:21:39 2017 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: xenbus_dev.c,v 1.10 2016/07/07 06:55:40 msaitoh Exp $ */
+/* $NetBSD: xenbus_dev.c,v 1.11 2017/03/22 21:21:39 bouyer Exp $ */
 /*
  * xenbus_dev.c
  * 
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xenbus_dev.c,v 1.10 2016/07/07 06:55:40 msaitoh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xenbus_dev.c,v 1.11 2017/03/22 21:21:39 bouyer Exp $");
 
 #include "opt_xen.h"
 
@@ -61,11 +61,6 @@
 static int xenbus_dev_close(void *);
 static int xsd_port_read(void *);
 
-struct xenbus_dev_transaction {
-       SLIST_ENTRY(xenbus_dev_transaction) trans_next;
-       struct xenbus_transaction *handle;
-};
-
 #define DIR_MODE        (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
 #define PRIVCMD_MODE    (S_IRUSR | S_IWUSR)
 static const struct kernfs_fileop xenbus_fileops[] = {
@@ -80,6 +75,8 @@
     { .kf_fileop = KERNFS_FILEOP_READ, .kf_vop = xsd_port_read },
 };
 
+static kmutex_t xenbus_dev_open_mtx;
+
 void
 xenbus_kernfs_init(void)
 {
@@ -99,26 +96,52 @@
                    kfst, VREG, XSD_MODE);
                kernfs_addentry(kernxen_pkt, dkt);
        }
+       mutex_init(&xenbus_dev_open_mtx, MUTEX_DEFAULT, IPL_NONE);
 }
 
-struct xenbus_dev_data {
+/*
+ * several process may open /kern/xen/xenbus in parallel.
+ * In a transaction one or more write is followed by one or more read.
+ * Unfortunably we don't get a file descriptor identifier down there,
+ * which we could use to link a read() to a transaction started in a write().
+ * To work around this we keep a list of lwp that opended the xenbus file.
+ * This assumes that a single lwp won't open /kern/xen/xenbus more
+ * than once, and that a transaction started by one lwp won't be ended
+ * by another.
+ * because of this, we also assume that we always got the data before
+ * the read() syscall.
+ */
+
+struct xenbus_dev_transaction {
+       SLIST_ENTRY(xenbus_dev_transaction) trans_next;
+       struct xenbus_transaction *handle;
+};
+
+struct xenbus_dev_lwp {
+       SLIST_ENTRY(xenbus_dev_lwp) lwp_next;
+       SLIST_HEAD(, xenbus_dev_transaction) transactions;
+       lwp_t *lwp;
+       /* Response queue. */
 #define BUFFER_SIZE (PAGE_SIZE)
 #define MASK_READ_IDX(idx) ((idx)&(BUFFER_SIZE-1))
-       /* In-progress transaction. */
-       SLIST_HEAD(, xenbus_dev_transaction) transactions;
-
+       char read_buffer[BUFFER_SIZE];
+       unsigned int read_cons, read_prod;
        /* Partial request. */
        unsigned int len;
        union {
                struct xsd_sockmsg msg;
                char buffer[BUFFER_SIZE];
        } u;
+       kmutex_t mtx;
+};
 
-       /* Response queue. */
-       char read_buffer[BUFFER_SIZE];
-       unsigned int read_cons, read_prod;
+struct xenbus_dev_data {
+       /* lwps which opended this device */
+       SLIST_HEAD(, xenbus_dev_lwp) lwps;
+       kmutex_t mtx;
 };
 
+
 static int
 xenbus_dev_read(void *v)
 {
@@ -131,47 +154,59 @@
        struct kernfs_node *kfs = VTOKERN(ap->a_vp);
        struct uio *uio = ap->a_uio;
        struct xenbus_dev_data *u = kfs->kfs_v;
+       struct xenbus_dev_lwp *xlwp;
        int err;
        off_t offset;
-       int s = spltty();
 
-       while (u->read_prod == u->read_cons) {
-               err = tsleep(u, PRIBIO | PCATCH, "xbrd", 0);
+       KASSERT(u != NULL);
+       mutex_enter(&u->mtx);
+       SLIST_FOREACH(xlwp, &u->lwps, lwp_next) {
+               if (xlwp->lwp == curlwp) {
+                       break;
+               }
+       }
+       if (xlwp == NULL) {
+               mutex_exit(&u->mtx);
+               return EBADF;
+       }
+       mutex_enter(&xlwp->mtx);
+       mutex_exit(&u->mtx);
+
+       if (xlwp->read_prod == xlwp->read_cons) {
+               err = EWOULDBLOCK;
+               goto end;
+       }
+
+       offset = uio->uio_offset;
+       if (xlwp->read_cons > xlwp->read_prod) {
+               err = uiomove(
+                   &xlwp->read_buffer[MASK_READ_IDX(xlwp->read_cons)],
+                   0U - xlwp->read_cons, uio);
                if (err)
                        goto end;
-       }
-       offset = uio->uio_offset;
-
-       if (u->read_cons > u->read_prod) {
-               err = uiomove(&u->read_buffer[MASK_READ_IDX(u->read_cons)],
-                   0U - u->read_cons, uio);
-               if (err)
-                       goto end;
-               u->read_cons += (uio->uio_offset - offset);
+               xlwp->read_cons += (uio->uio_offset - offset);
                offset = uio->uio_offset;
        }
-       err = uiomove(&u->read_buffer[MASK_READ_IDX(u->read_cons)],
-           u->read_prod - u->read_cons, uio);
+       err = uiomove(&xlwp->read_buffer[MASK_READ_IDX(xlwp->read_cons)],
+           xlwp->read_prod - xlwp->read_cons, uio);
        if (err == 0)
-               u->read_cons += (uio->uio_offset - offset);
+               xlwp->read_cons += (uio->uio_offset - offset);
 
 end:
-       splx(s);
+       mutex_exit(&xlwp->mtx);
        return err;
 }
 
 static void
-queue_reply(struct xenbus_dev_data *u,
+queue_reply(struct xenbus_dev_lwp *xlwp,
                        char *data, unsigned int len)
 {
        int i;
-
-       for (i = 0; i < len; i++, u->read_prod++)
-               u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i];
+       KASSERT(mutex_owned(&xlwp->mtx));
+       for (i = 0; i < len; i++, xlwp->read_prod++)
+               xlwp->read_buffer[MASK_READ_IDX(xlwp->read_prod)] = data[i];
 
-       KASSERT((u->read_prod - u->read_cons) <= sizeof(u->read_buffer));
-
-       wakeup(&u);
+       KASSERT((xlwp->read_prod - xlwp->read_cons) <= sizeof(xlwp->read_buffer));
 }
 
 static int
@@ -187,27 +222,47 @@
        struct uio *uio = ap->a_uio;
 
        struct xenbus_dev_data *u = kfs->kfs_v;
+       struct xenbus_dev_lwp *xlwp;
        struct xenbus_dev_transaction *trans;
        void *reply;
        int err;
        size_t size;
 
-       if (uio->uio_offset < 0)
-               return EINVAL;
+       KASSERT(u != NULL);
+       mutex_enter(&u->mtx);
+       SLIST_FOREACH(xlwp, &u->lwps, lwp_next) {
+               if (xlwp->lwp == curlwp) {
+                       break;
+               }
+       }
+       if (xlwp == NULL) {
+               mutex_exit(&u->mtx);
+               return EBADF;
+       }
+       mutex_enter(&xlwp->mtx);
+       mutex_exit(&u->mtx);
+
+       if (uio->uio_offset < 0) {
+               err = EINVAL;
+               goto end;
+       }
        size = uio->uio_resid;
 
-       if ((size + u->len) > sizeof(u->u.buffer))
-               return EINVAL;
+       if ((size + xlwp->len) > sizeof(xlwp->u.buffer)) {
+               err = EINVAL;
+               goto end;
+       }
 
-       err = uiomove(u->u.buffer + u->len, sizeof(u->u.buffer) -  u->len, uio);
-       if (err)
-               return err;
+       err = uiomove(xlwp->u.buffer + xlwp->len,
+                     sizeof(xlwp->u.buffer) -  xlwp->len, uio);
+       if (err) 
+               goto end;
 
-       u->len += size;
-       if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
-               return 0;
+       xlwp->len += size;
+       if (xlwp->len < (sizeof(xlwp->u.msg) + xlwp->u.msg.len))
+               goto end;
 
-       switch (u->u.msg.type) {
+       switch (xlwp->u.msg.type) {
        case XS_TRANSACTION_START:
        case XS_TRANSACTION_END:
        case XS_DIRECTORY:
@@ -219,29 +274,33 @@
        case XS_MKDIR:
        case XS_RM:
        case XS_SET_PERMS:
-               err = xenbus_dev_request_and_reply(&u->u.msg, &reply);
+               err = xenbus_dev_request_and_reply(&xlwp->u.msg, &reply);
                if (err == 0) {
-                       if (u->u.msg.type == XS_TRANSACTION_START) {
+                       if (xlwp->u.msg.type == XS_TRANSACTION_START) {
                                trans = malloc(sizeof(*trans), M_DEVBUF,
                                    M_WAITOK);
                                trans->handle = (struct xenbus_transaction *)
                                        strtoul(reply, NULL, 0);
-                               SLIST_INSERT_HEAD(&u->transactions,
+                               SLIST_INSERT_HEAD(&xlwp->transactions,
                                    trans, trans_next);
-                       } else if (u->u.msg.type == XS_TRANSACTION_END) {
-                               SLIST_FOREACH(trans, &u->transactions,
+                       } else if (xlwp->u.msg.type == XS_TRANSACTION_END) {
+                               SLIST_FOREACH(trans, &xlwp->transactions,
                                                    trans_next) {
                                        if ((unsigned long)trans->handle ==
-                                           (unsigned long)u->u.msg.tx_id)
+                                           (unsigned long)xlwp->u.msg.tx_id)
                                                break;
                                }
-                               KASSERT(trans != NULL);
-                               SLIST_REMOVE(&u->transactions, trans, 
+                               if (trans == NULL) {
+                                       err = EINVAL;
+                                       goto end;
+                               }
+                               SLIST_REMOVE(&xlwp->transactions, trans, 
                                    xenbus_dev_transaction, trans_next);
                                free(trans, M_DEVBUF);
                        }
-                       queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
-                       queue_reply(u, (char *)reply, u->u.msg.len);
+                       queue_reply(xlwp, (char *)&xlwp->u.msg,
+                                               sizeof(xlwp->u.msg));
+                       queue_reply(xlwp, (char *)reply, xlwp->u.msg.len);
                        free(reply, M_DEVBUF);
                }
                break;
@@ -252,9 +311,10 @@
        }
 
        if (err == 0) {
-               u->len = 0;
+               xlwp->len = 0;
        }
-
+end:
+       mutex_exit(&xlwp->mtx);
        return err;
 }
 
@@ -267,21 +327,46 @@
                struct ucred *a_cred;



Home | Main Index | Thread Index | Old Index