Port-xen archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[PATCH] xen: add gntdev
This is a basic (and experimental) gntdev implementation for NetBSD.
The gnt device allows usermode applications to map grant references in
userspace. It is mainly used by Qemu to implement a Xen backend (that
runs in userspace).
Due to the fact that qemu-upstream is not yet functional in NetBSD,
the only way to try this gntdev is to use the old qemu
(qemu-traditional).
Performance is not that bad (given that we are using qemu-traditional
and running a backend in userspace), the throughput of write
operations is 64.7 MB/s, while in the Dom0 it is 104.6 MB/s. Regarding
read operations, the throughput inside the DomU is 76.0 MB/s, while on
the Dom0 it is 108.8 MB/s.
Patches to libxc and libxl are also comming soon.
Cc: xen-devel%lists.xen.org@localhost
---
etc/MAKEDEV.tmpl | 5 +
etc/etc.amd64/MAKEDEV.conf | 2 +-
etc/etc.i386/MAKEDEV.conf | 2 +-
sys/arch/amd64/conf/XEN3_DOM0 | 1 +
sys/arch/amd64/conf/majors.amd64 | 1 +
sys/arch/i386/conf/XEN3_DOM0 | 1 +
sys/arch/i386/conf/majors.i386 | 1 +
sys/arch/xen/conf/files.xen | 2 +
sys/arch/xen/include/xen_shm.h | 2 +
sys/arch/xen/include/xenio.h | 76 +++++
sys/arch/xen/x86/xen_shm_machdep.c | 18 +-
sys/arch/xen/xen/gntdev.c | 618 ++++++++++++++++++++++++++++++++++++
sys/dev/DEVNAMES | 1 +
sys/rump/librump/rumpkern/devsw.c | 1 +
14 files changed, 728 insertions(+), 3 deletions(-)
create mode 100644 sys/arch/xen/xen/gntdev.c
diff --git a/etc/MAKEDEV.tmpl b/etc/MAKEDEV.tmpl
index 21b0568..00029c6 100644
--- a/etc/MAKEDEV.tmpl
+++ b/etc/MAKEDEV.tmpl
@@ -289,6 +289,7 @@
# wsfont* console font control
# wsmux* wscons event multiplexor
# xenevt Xen event interface
+# gntdev Xen grant table interface
#
# iSCSI communication devices
# iscsi* iSCSI driver and /sbin/iscsid communication
@@ -1020,6 +1021,10 @@ xsd_kva)
mkdev xsd_kva c %xenevt_chr% 1
;;
+gntdev)
+ mkdev gntdev c %gntdev_chr% 0
+ ;;
+
xencons)
mkdev xencons c %xencons_chr% 0
;;
diff --git a/etc/etc.amd64/MAKEDEV.conf b/etc/etc.amd64/MAKEDEV.conf
index a4a831c..5e2098c 100644
--- a/etc/etc.amd64/MAKEDEV.conf
+++ b/etc/etc.amd64/MAKEDEV.conf
@@ -44,5 +44,5 @@ all_md)
;;
xen)
- makedev xenevt xencons xsd_kva
+ makedev xenevt xencons xsd_kva gntdev
;;
diff --git a/etc/etc.i386/MAKEDEV.conf b/etc/etc.i386/MAKEDEV.conf
index ba3e2cc..bd38673 100644
--- a/etc/etc.i386/MAKEDEV.conf
+++ b/etc/etc.i386/MAKEDEV.conf
@@ -48,7 +48,7 @@ all_md)
;;
xen)
- makedev xenevt xencons xsd_kva
+ makedev xenevt xencons xsd_kva gntdev
;;
floppy)
diff --git a/sys/arch/amd64/conf/XEN3_DOM0 b/sys/arch/amd64/conf/XEN3_DOM0
index e5f9f1f..1807dd2 100644
--- a/sys/arch/amd64/conf/XEN3_DOM0
+++ b/sys/arch/amd64/conf/XEN3_DOM0
@@ -838,6 +838,7 @@ pseudo-device wsfont
pseudo-device drvctl
# xen pseudo-devices
+pseudo-device gntdev
pseudo-device xenevt
pseudo-device xvif
pseudo-device xbdback
diff --git a/sys/arch/amd64/conf/majors.amd64 b/sys/arch/amd64/conf/majors.amd64
index 9e6b1ac..cf15f7d 100644
--- a/sys/arch/amd64/conf/majors.amd64
+++ b/sys/arch/amd64/conf/majors.amd64
@@ -96,6 +96,7 @@ device-major nsmb char 98 nsmb
# - they appear in the i386 MAKEDEV
#
+device-major gntdev char 140 gntdev
device-major xenevt char 141 xenevt
device-major xbd char 142 block 142 xbd
device-major xencons char 143 xencons
diff --git a/sys/arch/i386/conf/XEN3_DOM0 b/sys/arch/i386/conf/XEN3_DOM0
index 8b5cf99..be28bbc 100644
--- a/sys/arch/i386/conf/XEN3_DOM0
+++ b/sys/arch/i386/conf/XEN3_DOM0
@@ -820,6 +820,7 @@ pseudo-device wsfont
pseudo-device drvctl
# xen pseudo-devices
+pseudo-device gntdev
pseudo-device xenevt
pseudo-device xvif
pseudo-device xbdback
diff --git a/sys/arch/i386/conf/majors.i386 b/sys/arch/i386/conf/majors.i386
index 38c043f..9aab728 100644
--- a/sys/arch/i386/conf/majors.i386
+++ b/sys/arch/i386/conf/majors.i386
@@ -111,6 +111,7 @@ device-major mt char 107 block 24
mt
# - they appear in the i386 MAKEDEV
#
+device-major gntdev char 140 gntdev
device-major xenevt char 141 xenevt
device-major xbd char 142 block 142 xbd
device-major xencons char 143 xencons
diff --git a/sys/arch/xen/conf/files.xen b/sys/arch/xen/conf/files.xen
index e022db5..91ff858 100644
--- a/sys/arch/xen/conf/files.xen
+++ b/sys/arch/xen/conf/files.xen
@@ -198,6 +198,7 @@ attach xencons at xendevbus
file arch/xen/xen/xencons.c xencons needs-flag
# Xen event peudo-device
+defpseudo gntdev
defpseudo xenevt
defpseudo xvif
defpseudo xbdback
@@ -390,6 +391,7 @@ include "dev/pcmcia/files.pcmcia"
# Domain-0 operations
defflag opt_xen.h DOM0OPS
file arch/xen/xen/privcmd.c dom0ops
+file arch/xen/xen/gntdev.c dom0ops
file arch/xen/x86/xen_shm_machdep.c dom0ops
file arch/x86/pci/pci_machdep.c hypervisor & pci & dom0ops
file arch/xen/xen/pci_intr_machdep.c hypervisor & pci
diff --git a/sys/arch/xen/include/xen_shm.h b/sys/arch/xen/include/xen_shm.h
index e2d89d0..a796572 100644
--- a/sys/arch/xen/include/xen_shm.h
+++ b/sys/arch/xen/include/xen_shm.h
@@ -37,6 +37,8 @@
*/
int xen_shm_map(int, int, grant_ref_t *, vaddr_t *, grant_handle_t *, int);
+int xen_shm_map_multidom(int , int *, grant_ref_t *, vaddr_t *,
+ grant_handle_t *, int);
void xen_shm_unmap(vaddr_t, int, grant_handle_t *);
int xen_shm_callback(int (*)(void *), void *);
diff --git a/sys/arch/xen/include/xenio.h b/sys/arch/xen/include/xenio.h
index 6b25733..87cd376 100644
--- a/sys/arch/xen/include/xenio.h
+++ b/sys/arch/xen/include/xenio.h
@@ -122,4 +122,80 @@ typedef struct oprivcmd_hypercall
/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */
#define EVTCHN_UNBIND _IOW('E', 3, unsigned long)
+/* Interface to /dev/gntdev */
+
+typedef struct ioctl_gntdev_grant_ref {
+ /* The domain ID of the grant to be mapped. */
+ uint32_t domid;
+ /* The grant reference of the grant to be mapped. */
+ uint32_t ref;
+} ioctl_gntdev_grant_ref;
+
+typedef struct ioctl_gntdev_map_grant_ref {
+ /* IN parameters */
+ /* The number of grants to be mapped. */
+ uint32_t count;
+ uint32_t pad;
+ uint64_t vaddr;
+ /* OUT parameters */
+ /* The offset to be used on a subsequent call to mmap(). */
+ uint64_t index;
+ /* Variable IN parameter. */
+ /* Array of grant references, of size @count. */
+ ioctl_gntdev_grant_ref *refs;
+} ioctl_gntdev_map_grant_ref;
+
+typedef struct ioctl_gntdev_unmap_grant_ref {
+ /* IN parameters */
+ /* The offset was returned by the corresponding map operation. */
+ uint64_t index;
+ /* The number of pages to be unmapped. */
+ uint32_t count;
+ uint32_t pad;
+} ioctl_gntdev_unmap_grant_ref;
+
+typedef struct ioctl_gntdev_get_offset_for_vaddr {
+ /* IN parameters */
+ /* The virtual address of the first mapped page in a range. */
+ uint64_t vaddr;
+ /* OUT parameters */
+ /* The offset that was used in the initial mmap() operation. */
+ uint64_t offset;
+ /* The number of pages mapped in the VM area that begins at @vaddr. */
+ uint32_t count;
+ uint32_t pad;
+} ioctl_gntdev_get_offset_for_vaddr;
+
+/*
+ * Inserts the grant references into the mapping table of an instance
+ * of gntdev. N.B. This does not perform the mapping, which is deferred
+ * until mmap() is called with @index as the offset.
+ */
+#define IOCTL_GNTDEV_MAP_GRANT_REF \
+ _IOWR('G', 0, ioctl_gntdev_map_grant_ref)
+
+/*
+ * Removes the grant references from the mapping table of an instance of
+ * of gntdev. N.B. munmap() must be called on the relevant virtual address(es)
+ * before this ioctl is called, or an error will result.
+ */
+#define IOCTL_GNTDEV_UNMAP_GRANT_REF \
+ _IOW('G', 1, ioctl_gntdev_unmap_grant_ref)
+
+/*
+ * Returns the offset in the driver's address space that corresponds
+ * to @vaddr. This can be used to perform a munmap(), followed by an
+ * UNMAP_GRANT_REF ioctl, where no state about the offset is retained by
+ * the caller. The number of pages that were allocated at the same time as
+ * @vaddr is returned in @count.
+ *
+ * N.B. Where more than one page has been mapped into a contiguous range, the
+ * supplied @vaddr must correspond to the start of the range; otherwise
+ * an error will result. It is only possible to munmap() the entire
+ * contiguously-allocated range at once, and not any subrange thereof.
+ */
+#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \
+ _IOWR('G', 2, ioctl_gntdev_get_offset_for_vaddr)
+
+
#endif /* __XEN_XENIO_H__ */
diff --git a/sys/arch/xen/x86/xen_shm_machdep.c
b/sys/arch/xen/x86/xen_shm_machdep.c
index d47745c..b47cc54 100644
--- a/sys/arch/xen/x86/xen_shm_machdep.c
+++ b/sys/arch/xen/x86/xen_shm_machdep.c
@@ -35,6 +35,7 @@ __KERNEL_RCSID(0, "$NetBSD: xen_shm_machdep.c,v 1.10
2011/09/02 22:25:08 dyoung
#include <sys/queue.h>
#include <sys/vmem.h>
#include <sys/kernel.h>
+#include <sys/malloc.h>
#include <uvm/uvm.h>
#include <machine/pmap.h>
@@ -120,6 +121,21 @@ int
xen_shm_map(int nentries, int domid, grant_ref_t *grefp, vaddr_t *vap,
grant_handle_t *handlep, int flags)
{
+ int i, rc;
+ int *domids = malloc(sizeof(domid) * nentries, M_DEVBUF,
+ M_WAITOK | M_ZERO);
+ for(i = 0; i < nentries; i++) {
+ domids[i] = domid;
+ }
+ rc = xen_shm_map_multidom(nentries, domids, grefp, vap, handlep, flags);
+ free(domids, M_DEVBUF);
+ return rc;
+}
+
+int
+xen_shm_map_multidom(int nentries, int *domid, grant_ref_t *grefp,
+ vaddr_t *vap, grant_handle_t *handlep, int flags)
+{
int s, i;
vaddr_t new_va;
vmem_addr_t new_va_pg;
@@ -168,7 +184,7 @@ xen_shm_map(int nentries, int domid, grant_ref_t *grefp,
vaddr_t *vap,
new_va = new_va_pg << PAGE_SHIFT;
for (i = 0; i < nentries; i++) {
op[i].host_addr = new_va + i * PAGE_SIZE;
- op[i].dom = domid;
+ op[i].dom = domid[i];
op[i].ref = grefp[i];
op[i].flags = GNTMAP_host_map |
((flags & XSHM_RO) ? GNTMAP_readonly : 0);
diff --git a/sys/arch/xen/xen/gntdev.c b/sys/arch/xen/xen/gntdev.c
new file mode 100644
index 0000000..85dd8ec
--- /dev/null
+++ b/sys/arch/xen/xen/gntdev.c
@@ -0,0 +1,618 @@
+/*
+ * Copyright (c) 2012 Roger Pau Monné.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <sys/cdefs.h>
+
+#include "opt_xen.h"
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/conf.h>
+
+#include <uvm/uvm.h>
+
+#include <xen/xen_shm.h>
+#include <xen/xenio.h>
+
+void gntdevattach(int n);
+
+#define freem(va) \
+ if (va) free(va, M_DEVBUF)
+
+#undef GNTDEBUG
+#ifdef GNTDEBUG
+ #define debug(M, ...) \
+ printf("gntdev:%d: " M "\n", __LINE__, ##__VA_ARGS__)
+#else
+ #define debug(M, ...)
+#endif
+
+#define VA_FREE 0
+
+static int gntdev_fioctl(struct file *, u_long, void *);
+static int gntdev_fclose(struct file *);
+
+static const struct fileops gntdev_fileops = {
+ .fo_read = fbadop_read,
+ .fo_write = fbadop_write,
+ .fo_ioctl = gntdev_fioctl,
+ .fo_fcntl = fnullop_fcntl,
+ .fo_poll = fnullop_poll,
+ .fo_stat = fbadop_stat,
+ .fo_close = gntdev_fclose,
+ .fo_kqfilter = fnullop_kqfilter,
+ .fo_restart = fnullop_restart,
+};
+
+dev_type_open(gntdev_open);
+
+const struct cdevsw gntdev_cdevsw = {
+ gntdev_open, nullclose, noread, nowrite, noioctl,
+ nostop, notty, nopoll, nommap, nokqfilter, D_OTHER
+};
+
+struct gntmap {
+ struct uvm_object uobj;
+ pmap_t pmap;
+ LIST_ENTRY(gntmap) next_map;
+ int index;
+ int count;
+ grant_ref_t *grants;
+ int *domids;
+ vaddr_t va;
+ vaddr_t kernel_va;
+ grant_handle_t *handles;
+ bool ro;
+};
+
+struct gntproc {
+ LIST_HEAD(,gntmap) maps;
+ kmutex_t lock;
+ struct lwp *lwp;
+ unsigned int num_maps;
+};
+
+struct gntdev {
+ kcondvar_t wait_mem;
+ kmutex_t lock;
+ bool callback_set;
+};
+
+struct gntdev priv;
+
+/* --- UVM handlers prototypes --- */
+
+static int
+gntmap_fault(struct uvm_faultinfo *, vaddr_t, struct vm_page **,
+ int, int, vm_prot_t, int);
+static void
+gntmap_reference(struct uvm_object *);
+static void
+gntmap_detach(struct uvm_object *);
+static int
+map_grant_ref(struct gntmap *map);
+
+static struct uvm_pagerops gntops = {
+ .pgo_reference = gntmap_reference,
+ .pgo_detach = gntmap_detach,
+ .pgo_fault = gntmap_fault,
+};
+
+/* --- Helper prototypes --- */
+
+static int
+gntdev_mem_callback(void *arg);
+static void
+insert_map(struct gntproc *proc, struct gntmap *map);
+static struct gntmap *
+find_map(struct gntproc *proc, int index, int count);
+static struct gntmap *
+find_vaddr(struct gntproc *proc, vaddr_t va);
+static void
+remove_map(struct gntproc *proc, struct gntmap *map);
+
+/* --- UVM handlers --- */
+
+static int
+gntdev_install_handler(struct vm_map *vmap, vaddr_t start,
+ struct gntmap *map)
+{
+ int rc;
+ uvm_flag_t uvmflag;
+ vaddr_t newstart = start;
+ vm_prot_t prot;
+ off_t size = ((off_t)map->count << PGSHIFT);
+
+ /* remove current entries */
+ uvm_unmap(vmap, start, start + size);
+
+ uvm_obj_init(&map->uobj, &gntops, true, 1);
+ prot = map->ro ? VM_PROT_READ : VM_PROT_READ | VM_PROT_WRITE;
+ uvmflag = UVM_MAPFLAG(prot, prot, UVM_INH_NONE, UVM_ADV_NORMAL,
+ UVM_FLAG_FIXED | UVM_FLAG_NOMERGE);
+ rc = uvm_map(vmap, &newstart, size, &map->uobj, 0, 0, uvmflag);
+ if (rc) {
+ debug("uvm_map failed");
+ map->uobj.pgops->pgo_detach(&map->uobj);
+ return rc;
+ }
+ if (newstart != start) {
+ debug("uvm_map didn't give us back our vm space");
+ return EINVAL;
+ }
+ map->pmap = vm_map_pmap(vmap);
+ return 0;
+}
+
+static int
+gntmap_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
+ int npages, int centeridx, vm_prot_t access_type, int flags)
+{
+ struct vm_map_entry *entry = ufi->entry;
+ struct uvm_object *uobj = entry->object.uvm_obj;
+ struct gntmap *map = (struct gntmap*)entry->object.uvm_obj;
+ pmap_t pmap = ufi->orig_map->pmap;
+ int index, i, rc = 0;
+ vaddr_t u_va, k_va;
+ vm_prot_t prot;
+ paddr_t ma;
+
+ /* compute offset from start of map */
+ index = (entry->offset + (vaddr - entry->start)) >> PAGE_SHIFT;
+ if (index + npages > map->count) {
+ return EINVAL;
+ }
+
+ for (i = 0; i < npages; i++, index++) {
+ if ((flags & PGO_ALLPAGES) == 0 && i != centeridx)
+ continue;
+ if (pps[i] == PGO_DONTCARE)
+ continue;
+
+ u_va = map->va + (index * PAGE_SIZE);
+ k_va = map->kernel_va + (index * PAGE_SIZE);
+ if (pmap_extract_ma(pmap_kernel(), k_va, &ma) == false) {
+ debug("unable to extract kernel MA");
+ return EFAULT;
+ }
+ prot = map->ro ? VM_PROT_READ : (VM_PROT_READ | VM_PROT_WRITE);
+ rc = pmap_enter_ma(pmap, u_va, ma, 0, prot, PMAP_CANFAIL,
+ map->domids[index]);
+ if (rc == ENOMEM) {
+ debug("pmap_enter_ma: ENOMEM");
+ rc = ERESTART;
+ break;
+ }
+ if (rc) {
+ /* XXX for proper ptp accountings */
+ debug("pmap_enter_ma: failed");
+ pmap_remove(pmap, u_va, u_va + PAGE_SIZE);
+ }
+ }
+ pmap_update(pmap);
+ uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
+
+ if (rc == ERESTART) {
+ uvm_wait("privpgop_fault");
+ }
+ return rc;
+}
+
+static void
+gntmap_reference(struct uvm_object *uobj)
+{
+ mutex_enter(uobj->vmobjlock);
+ uobj->uo_refs++;
+ mutex_exit(uobj->vmobjlock);
+}
+
+static void
+gntmap_detach(struct uvm_object *uobj)
+{
+ struct gntmap *map = (struct gntmap *)uobj;
+
+ mutex_enter(uobj->vmobjlock);
+ uobj->uo_refs--;
+ if (uobj->uo_refs > 0) {
+ mutex_exit(uobj->vmobjlock);
+ return;
+ }
+ mutex_exit(uobj->vmobjlock);
+ pmap_remove(map->pmap, map->va, map->va + (map->count * PAGE_SIZE));
+ pmap_update(map->pmap);
+}
+
+/* --- Internal Helpers --- */
+
+static int
+gntdev_mem_callback(void *arg)
+{
+ mutex_enter(&priv.lock);
+ cv_broadcast(&priv.wait_mem);
+ priv.callback_set = false;
+ mutex_exit(&priv.lock);
+ return 0;
+}
+
+static void
+insert_map(struct gntproc *proc, struct gntmap *map)
+{
+ struct gntmap *tmap;
+
+ mutex_enter(&proc->lock);
+ proc->num_maps++;
+ if (LIST_EMPTY(&proc->maps)) {
+ LIST_INSERT_HEAD(&proc->maps, map, next_map);
+ goto out;
+ }
+ LIST_FOREACH(tmap, &proc->maps, next_map) {
+ if (map->index + map->count < tmap->index) {
+ LIST_INSERT_BEFORE(tmap, map, next_map);
+ goto out;
+ }
+ map->index = tmap->index + tmap->count;
+ if (LIST_NEXT(tmap, next_map) == NULL) {
+ LIST_INSERT_AFTER(tmap, map, next_map);
+ goto out;
+ }
+ }
+
+out:
+ mutex_exit(&proc->lock);
+ return;
+}
+
+static struct gntmap *
+find_map(struct gntproc *proc, int index, int count)
+{
+ struct gntmap *map = NULL;
+
+ mutex_enter(&proc->lock);
+ if (LIST_EMPTY(&proc->maps))
+ goto out;
+
+ LIST_FOREACH(map, &proc->maps, next_map) {
+ if (index != map->index) {
+ continue;
+ }
+ if (count && count != map->count) {
+ continue;
+ }
+ goto out;
+ }
+ map = NULL;
+
+out:
+ mutex_exit(&proc->lock);
+ return map;
+}
+
+static struct gntmap *
+find_vaddr(struct gntproc *proc, vaddr_t va)
+{
+ struct gntmap *map = NULL;
+
+ mutex_enter(&proc->lock);
+ if (LIST_EMPTY(&proc->maps))
+ goto out;
+
+ LIST_FOREACH(map, &proc->maps, next_map) {
+ if (va >= map->va && va < (map->va + (map->count * PAGE_SIZE)))
+ goto out;
+ }
+ map = NULL;
+
+out:
+ mutex_exit(&proc->lock);
+ return map;
+}
+
+static void
+remove_map(struct gntproc *proc, struct gntmap *map)
+{
+ mutex_enter(&proc->lock);
+ LIST_REMOVE(map, next_map);
+ proc->num_maps--;
+ mutex_exit(&proc->lock);
+ if (map->kernel_va != VA_FREE) {
+ xen_shm_unmap(map->kernel_va, map->count, map->handles);
+ map->kernel_va = VA_FREE;
+ uvm_obj_destroy(&map->uobj, true);
+ }
+ free(map->grants, M_DEVBUF);
+ free(map->handles, M_DEVBUF);
+ free(map->domids, M_DEVBUF);
+ free(map, M_DEVBUF);
+}
+
+static int
+map_grant_ref(struct gntmap *map)
+{
+ int rc;
+ vaddr_t k_va;
+
+ do {
+ debug("mapping grefs with index %d", map->index);
+ rc = xen_shm_map_multidom(map->count, map->domids, map->grants,
&k_va,
+ map->handles, map->ro ? XSHM_RO : 0);
+ switch (rc) {
+ case 0:
+ /* All good */
+ map->kernel_va = k_va;
+ break;
+ case ENOMEM:
+ mutex_enter(&priv.lock);
+ debug("xen_shm_map_multidom out of memory");
+ if (!priv.callback_set) {
+ /* Register callback */
+ if (xen_shm_callback(gntdev_mem_callback, NULL)
!= 0) {
+ mutex_exit(&priv.lock);
+ panic("ioctl_map_grant_ref:
xen_shm_callback failed");
+ }
+ priv.callback_set = true;
+ }
+ cv_wait(&priv.wait_mem, &priv.lock);
+ mutex_exit(&priv.lock);
+ default:
+ debug("xen_shm_map_multidom failed");
+ goto error;
+ }
+ } while (rc == ENOMEM);
+
+ return 0;
+
+error:
+ return rc;
+}
+
+/* --- ioctl handlers --- */
+
+static int
+ioctl_map_grant_ref(struct gntproc *proc,
+ ioctl_gntdev_map_grant_ref *map_grants)
+{
+ grant_ref_t *refs = NULL;
+ grant_handle_t *handles = NULL;
+ int *domids = NULL;
+ struct gntmap *map = NULL;
+ struct vm_map *vmm;
+ ioctl_gntdev_grant_ref ioctl_map;
+ int i, rc;
+ vaddr_t va0;
+
+ if (find_vaddr(proc, map_grants->vaddr)) {
+ debug("memory area already in use");
+ rc = EINVAL;
+ goto error;
+ }
+
+ debug("mapping %d refs", map_grants->count);
+
+ refs = malloc(sizeof(*refs) * map_grants->count, M_DEVBUF,
+ M_WAITOK | M_ZERO);
+ handles = malloc(sizeof(*handles) * map_grants->count, M_DEVBUF,
+ M_WAITOK | M_ZERO);
+ domids = malloc(sizeof(*domids) * map_grants->count, M_DEVBUF,
+ M_WAITOK | M_ZERO);
+
+ for (i = 0; i < map_grants->count; i++) {
+ rc = copyin(&map_grants->refs[i], &ioctl_map,
sizeof(ioctl_map));
+ if (rc != 0) {
+ debug("unable to copyin grant ref info %d", i);
+ goto error;
+ }
+ debug("mapping ref: %u Dom: %u", ioctl_map.ref,
ioctl_map.domid);
+ refs[i] = ioctl_map.ref;
+ domids[i] = ioctl_map.domid;
+ }
+ map = malloc(sizeof(*map), M_DEVBUF,
+ M_WAITOK | M_ZERO);
+ vmm = &proc->lwp->l_proc->p_vmspace->vm_map;
+ va0 = map_grants->vaddr & ~PAGE_MASK;
+ vm_map_lock_read(vmm);
+ if (uvm_map_checkprot(vmm, va0, va0 + (map_grants->count << PGSHIFT) -
1,
+ VM_PROT_WRITE)) {
+ map->ro = false;
+ debug("map RW");
+ } else if (uvm_map_checkprot(vmm, va0,
+ va0 + (map_grants->count << PGSHIFT) - 1, VM_PROT_READ)) {
+ map->ro = true;
+ debug("map RO");
+ } else {
+ debug("unable check protection");
+ vm_map_unlock_read(vmm);
+ goto error;
+ }
+ vm_map_unlock_read(vmm);
+ map->grants = refs;
+ map->handles = handles;
+ map->domids = domids;
+ map->va = map_grants->vaddr;
+ map->kernel_va = VA_FREE;
+ map->count = map_grants->count;
+ map->index = 0;
+
+ rc = map_grant_ref(map);
+ if (rc) {
+ debug("map_grant_ref failed");
+ goto error;
+ }
+
+ rc = gntdev_install_handler(vmm, map->va, map);
+ if (rc) {
+ debug("gntdev_install_handler failed");
+ xen_shm_unmap(map->kernel_va, map->count, map->handles);
+ map->kernel_va = VA_FREE;
+ goto error;
+ }
+ insert_map(proc, map);
+ map_grants->index = map->index << PAGE_SHIFT;
+ debug("gntrefs mapped at index %" PRIu64 "", map_grants->index);
+ return 0;
+
+error:
+ freem(refs);
+ freem(handles);
+ freem(domids);
+ freem(map);
+ debug("unable to map grant refs");
+ return rc;
+}
+
+static int
+ioctl_unmap_grant_ref(struct gntproc *proc,
+ ioctl_gntdev_unmap_grant_ref *unmap_grants)
+{
+ struct gntmap *map;
+ uint64_t index = unmap_grants->index >> PAGE_SHIFT;
+ int rc = 0;
+
+ debug("unmapping index %" PRIu64, index);
+
+ map = find_map(proc, index, unmap_grants->count);
+ if (map == NULL) {
+ debug("unable to find index %" PRIu64, index);
+ rc = EINVAL;
+ goto out;
+ }
+ mutex_enter(map->uobj.vmobjlock);
+ if (map->uobj.uo_refs) {
+ debug("trying to remove a referenced map");
+ mutex_exit(map->uobj.vmobjlock);
+ return EINVAL;
+ }
+ mutex_exit(map->uobj.vmobjlock);
+ remove_map(proc, map);
+out:
+ return rc;
+}
+
+static int
+ioctl_get_offset_vaddr(struct gntproc *proc,
+ ioctl_gntdev_get_offset_for_vaddr *offset_vaddr)
+{
+ struct gntmap *map;
+ int rc = 0;
+
+ debug("find offset va: %p", (void *)offset_vaddr->vaddr);
+
+ map = find_vaddr(proc, offset_vaddr->vaddr);
+ if (map == NULL) {
+ debug("unable to find vaddr");
+ rc = EINVAL;
+ goto out;
+ }
+
+ offset_vaddr->offset = map->index << PAGE_SHIFT;
+ offset_vaddr->count = map->count;
+
+out:
+ return rc;
+}
+
+/* --- Device ops handlers --- */
+
+static int
+gntdev_fioctl(struct file *fp, u_long cmd, void *addr)
+{
+ struct gntproc *proc = fp->f_data;
+ ioctl_gntdev_map_grant_ref *map_grants;
+ ioctl_gntdev_unmap_grant_ref *unmap_grants;
+ ioctl_gntdev_get_offset_for_vaddr *offset_vaddr;
+ int rc;
+
+ switch (cmd) {
+ case IOCTL_GNTDEV_MAP_GRANT_REF:
+ map_grants = addr;
+ rc = ioctl_map_grant_ref(proc, map_grants);
+ break;
+ case IOCTL_GNTDEV_UNMAP_GRANT_REF:
+ unmap_grants = addr;
+ rc = ioctl_unmap_grant_ref(proc, unmap_grants);
+ break;
+ case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
+ offset_vaddr = addr;
+ rc = ioctl_get_offset_vaddr(proc, offset_vaddr);
+ break;
+ default:
+ rc = EINVAL;
+ }
+ return rc;
+}
+
+int
+gntdev_open(dev_t dev, int flags, int mode, struct lwp *l)
+{
+ struct gntproc *proc;
+ struct file *fp;
+ int fd, rc;
+
+ rc = fd_allocfile(&fp, &fd);
+ if (rc)
+ return rc;
+
+ proc = malloc(sizeof(*proc), M_DEVBUF, M_WAITOK | M_ZERO);
+ mutex_init(&proc->lock, MUTEX_DEFAULT, IPL_NONE);
+ LIST_INIT(&proc->maps);
+ proc->lwp = l;
+ proc->num_maps = 0;
+ debug("opened for proc %p", l);
+ return fd_clone(fp, fd, flags, &gntdev_fileops, proc);
+}
+
+static int
+gntdev_fclose(struct file *fp)
+{
+ struct gntproc *proc = fp->f_data;
+ struct gntmap *map;
+
+ mutex_enter(&proc->lock);
+ while (LIST_FIRST(&proc->maps) != NULL) {
+ map = LIST_FIRST(&proc->maps);
+ mutex_exit(&proc->lock);
+ remove_map(proc, map);
+ mutex_enter(&proc->lock);
+ }
+ KASSERT(proc->num_maps == 0);
+ mutex_exit(&proc->lock);
+ mutex_destroy(&proc->lock);
+ debug("closed device for proc %p", proc->lwp);
+ free(proc, M_DEVBUF);
+ return 0;
+}
+
+void
+gntdevattach(int n)
+{
+ mutex_init(&priv.lock, MUTEX_DEFAULT, IPL_VM);
+ cv_init(&priv.wait_mem, "gntdev");
+ priv.callback_set = false;
+ debug("attached");
+ return;
+}
diff --git a/sys/dev/DEVNAMES b/sys/dev/DEVNAMES
index 45cf018..765fe45 100644
--- a/sys/dev/DEVNAMES
+++ b/sys/dev/DEVNAMES
@@ -1517,6 +1517,7 @@ xdc MI
xdc sun3
xe next68k
xel x68k
+gntdev xen
xencons xen
xenevt xen
xennet xen
diff --git a/sys/rump/librump/rumpkern/devsw.c
b/sys/rump/librump/rumpkern/devsw.c
index 5a1af01..e513885 100644
--- a/sys/rump/librump/rumpkern/devsw.c
+++ b/sys/rump/librump/rumpkern/devsw.c
@@ -134,6 +134,7 @@ struct devsw_conv devsw_conv0[] = {
{ "rd", 22, 105, DEVNODE_DONTBOTHER, 0, { 0, 0 }},
{ "ct", 23, 106, DEVNODE_DONTBOTHER, 0, { 0, 0 }},
{ "mt", 24, 107, DEVNODE_DONTBOTHER, 0, { 0, 0 }},
+ { "gntdev", -1, 140, DEVNODE_DONTBOTHER, 0, { 0, 0 }},
{ "xenevt", -1, 141, DEVNODE_DONTBOTHER, 0, { 0, 0 }},
{ "xbd", 142, 142, DEVNODE_DONTBOTHER, 0, { 0, 0 }},
{ "xencons", -1, 143, DEVNODE_DONTBOTHER, 0, { 0, 0 }},
--
1.7.7.5 (Apple Git-26)
Home |
Main Index |
Thread Index |
Old Index