Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch Work in progress on dom0 PVH support: ioctl support...



details:   https://anonhg.NetBSD.org/src/rev/37b5f4dcd5d4
branches:  trunk
changeset: 369778:37b5f4dcd5d4
user:      bouyer <bouyer%NetBSD.org@localhost>
date:      Wed Aug 31 12:51:56 2022 +0000

description:
Work in progress on dom0 PVH support: ioctl support for tools.
Basically, in PVH mode (where XENFEAT_auto_translated_physmap is enabled),
the hypervisor will not map foreing ressources in our virtual address
space for us. Instead, we have to pass it an address in our physical
address space (but not mapped to some RAM) where the ressource will show up
and then enter this PA in pour page table.

For this, introduce xenmem_* which manage the PA space. In PVH mode this
is just allocated from the iomem_ex extent.

With this, I can start a PV domU, and the guest's kernel boots (and
the console works). It hangs because the backend driver can't map the
frontend ressources (yet).

Note that, per https://xenbits.xen.org/docs/unstable/support-matrix.html,
dom0 PVH support is still considered experimental by Xen.

diffstat:

 sys/arch/x86/x86/pmap.c       |   40 +++++++++---
 sys/arch/xen/conf/files.xen   |    3 +-
 sys/arch/xen/include/xenmem.h |   42 +++++++++++++
 sys/arch/xen/xen/privcmd.c    |  132 +++++++++++++++++++++++++++++++++++++++--
 sys/arch/xen/xen/xenmem.c     |  108 ++++++++++++++++++++++++++++++++++
 5 files changed, 304 insertions(+), 21 deletions(-)

diffs (truncated from 582 to 300 lines):

diff -r bc4ae69eaa1c -r 37b5f4dcd5d4 sys/arch/x86/x86/pmap.c
--- a/sys/arch/x86/x86/pmap.c   Wed Aug 31 12:34:04 2022 +0000
+++ b/sys/arch/x86/x86/pmap.c   Wed Aug 31 12:51:56 2022 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap.c,v 1.420 2022/08/20 23:49:31 riastradh Exp $     */
+/*     $NetBSD: pmap.c,v 1.421 2022/08/31 12:51:56 bouyer Exp $        */
 
 /*
  * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc.
@@ -130,7 +130,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.420 2022/08/20 23:49:31 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.421 2022/08/31 12:51:56 bouyer Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -5229,6 +5229,9 @@
 {
        struct pmap_data_gnt *pgnt;
        pt_entry_t *ptes, opte;
+#ifndef XENPV
+       pt_entry_t npte;
+#endif
        pt_entry_t *ptep;
        pd_entry_t * const *pdes;
        struct vm_page *ptp;
@@ -5305,8 +5308,13 @@
        idx = (va - pgnt->pd_gnt_sva) / PAGE_SIZE;
        op = &pgnt->pd_gnt_ops[idx];
 
-#ifdef XENPV /* XXX */
+#ifdef XENPV
+       KASSERT(op->flags & GNTMAP_contains_pte);
        op->host_addr = xpmap_ptetomach(ptep);
+#else
+       KASSERT((op->flags & GNTMAP_contains_pte) == 0);
+       KASSERT(op->flags != 0);
+       KASSERT(op->host_addr != 0);
 #endif
        op->dev_bus_addr = 0;
        op->status = GNTST_general_error;
@@ -5328,10 +5336,18 @@
        if (__predict_false(op->status != GNTST_okay)) {
                printf("%s: GNTTABOP_map_grant_ref status: %d\n",
                    __func__, op->status);
-               if (have_oldpa) {
+               if (have_oldpa) { /* XXX did the pte really change if XENPV  ?*/
                        ptp->wire_count--;
                }
        } else {
+#ifndef XENPV
+               npte = op->host_addr | pmap_pg_nx | PTE_U | PTE_P;
+               if ((op->flags & GNTMAP_readonly) == 0)
+                       npte |= PTE_W;
+               do {
+                       opte = *ptep;
+               } while (pmap_pte_cas(ptep, opte, npte) != opte);
+#endif
                pgnt->pd_gnt_refs++;
                if (!have_oldpa) {
                        ptp->wire_count++;
@@ -5417,7 +5433,6 @@
                idx = (va - pgnt->pd_gnt_sva) / PAGE_SIZE;
                op = &pgnt->pd_gnt_ops[idx];
                KASSERT(lvl == 1);
-               KASSERT(op->status == GNTST_okay);
 
                /* Get PTP if non-kernel mapping. */
                ptp = pmap_find_ptp(pmap, va, 1);
@@ -5426,11 +5441,14 @@
 
                if (op->status == GNTST_okay)  {
                        KASSERT(pmap_valid_entry(ptes[pl1_i(va)]));
+#ifdef XENPV 
+                       unmap_op.host_addr = xpmap_ptetomach(&ptes[pl1_i(va)]);
+#else
+                       unmap_op.host_addr = op->host_addr;
+                       pmap_pte_testset(&ptes[pl1_i(va)], 0);
+#endif
                        unmap_op.handle = op->handle;
                        unmap_op.dev_bus_addr = 0;
-#ifdef XENPV /* XXX */
-                       unmap_op.host_addr = xpmap_ptetomach(&ptes[pl1_i(va)]);
-#endif
                        ret = HYPERVISOR_grant_table_op(
                            GNTTABOP_unmap_grant_ref, &unmap_op, 1);
                        if (ret) {
@@ -5440,9 +5458,9 @@
 
                        ptp->wire_count--;
                        pgnt->pd_gnt_refs--;
-                       if (pgnt->pd_gnt_refs == 0) {
-                               pmap_free_gnt(pmap, pgnt);
-                       }
+               }
+               if (pgnt->pd_gnt_refs == 0) {
+                       pmap_free_gnt(pmap, pgnt);
                }
                /*
                 * if mapping removed and the PTP is no longer
diff -r bc4ae69eaa1c -r 37b5f4dcd5d4 sys/arch/xen/conf/files.xen
--- a/sys/arch/xen/conf/files.xen       Wed Aug 31 12:34:04 2022 +0000
+++ b/sys/arch/xen/conf/files.xen       Wed Aug 31 12:51:56 2022 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: files.xen,v 1.186 2020/05/26 10:37:24 bouyer Exp $
+#      $NetBSD: files.xen,v 1.187 2022/08/31 12:51:56 bouyer Exp $
 
 defflag        opt_xen.h                       XEN XENPVH XENPVHVM PAE DOM0OPS
 
@@ -7,6 +7,7 @@
 file   arch/xen/xen/xen_machdep.c              xen
 file   arch/xen/xen/evtchn.c                   xen
 file   arch/xen/xen/xengnt.c                   xen
+file   arch/xen/xen/xenmem.c                   xen & !xenpv
 file   arch/xen/x86/xen_mainbus.c              xen
 file   arch/xen/xen/xen_clock.c                xen
 file   arch/xen/x86/xen_bus_dma.c              xen
diff -r bc4ae69eaa1c -r 37b5f4dcd5d4 sys/arch/xen/include/xenmem.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/xen/include/xenmem.h     Wed Aug 31 12:51:56 2022 +0000
@@ -0,0 +1,42 @@
+/* $NetBSD: xenmem.h,v 1.1 2022/08/31 12:51:56 bouyer Exp $ */
+/*
+ * Copyright (c) 2022 Manuel Bouyer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/* Xen physical space management */
+
+#ifndef _XEN_XENMEM_H_
+#define _XEN_XENMEM_H_
+#include "opt_xen.h"
+#ifdef XENPV
+extern paddr_t pmap_pa_start; /* PA of first physical page for this domain */
+extern paddr_t pmap_pa_end;   /* PA of last physical page for this domain */
+
+void xenmem_pa_register(paddr_t, paddr_t);
+#endif /* XENPV */
+
+paddr_t xenmem_alloc_pa(u_long, u_long, bool);
+void xenmem_free_pa(paddr_t, u_long);
+
+#endif /* _XEN_XENMEM_H_ */
diff -r bc4ae69eaa1c -r 37b5f4dcd5d4 sys/arch/xen/xen/privcmd.c
--- a/sys/arch/xen/xen/privcmd.c        Wed Aug 31 12:34:04 2022 +0000
+++ b/sys/arch/xen/xen/privcmd.c        Wed Aug 31 12:51:56 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: privcmd.c,v 1.61 2021/12/10 20:36:03 andvar Exp $ */
+/* $NetBSD: privcmd.c,v 1.62 2022/08/31 12:51:56 bouyer Exp $ */
 
 /*-
  * Copyright (c) 2004 Christian Limpach.
@@ -27,7 +27,7 @@
 
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: privcmd.c,v 1.61 2021/12/10 20:36:03 andvar Exp $");
+__KERNEL_RCSID(0, "$NetBSD: privcmd.c,v 1.62 2022/08/31 12:51:56 bouyer Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -47,6 +47,7 @@
 #include <xen/hypervisor.h>
 #include <xen/xen.h>
 #include <xen/xenio.h>
+#include <xen/xenmem.h>
 #include <xen/xenpmap.h>
 #include <xen/granttables.h>
 
@@ -60,17 +61,20 @@
 
 typedef enum _privcmd_type {
        PTYPE_PRIVCMD,
+       PTYPE_PRIVCMD_PHYSMAP,
        PTYPE_GNTDEV_REF,
        PTYPE_GNTDEV_ALLOC
 } privcmd_type;
 
 struct privcmd_object_privcmd {
+       paddr_t base_paddr; /* base address of physical space */
         paddr_t *maddr; /* array of machine address to map */
         int     domid;
         bool    no_translate;
 };
 
 struct privcmd_object_gntref {
+       paddr_t base_paddr; /* base address of physical space */
         struct ioctl_gntdev_grant_notify notify;
        struct gnttab_map_grant_ref ops[1]; /* variable length */
 };
@@ -316,6 +320,10 @@
 static int
 privcmd_mmap(struct vop_ioctl_args *ap)
 {
+#ifndef XENPV
+       printf("IOCTL_PRIVCMD_MMAP not supported\n");
+       return EINVAL;
+#else
        int i, j;
        privcmd_mmap_t *mcmd = ap->a_data;
        privcmd_mmap_entry_t mentry;
@@ -357,11 +365,16 @@
                        return error;
        }
        return 0;
+#endif
 }
 
 static int
 privcmd_mmapbatch(struct vop_ioctl_args *ap)
 {
+#ifndef XENPV
+       printf("IOCTL_PRIVCMD_MMAPBATCH not supported\n");
+       return EINVAL;
+#else
        int i;
        privcmd_mmapbatch_t* pmb = ap->a_data;
        vaddr_t va0;
@@ -432,6 +445,7 @@
        error = privcmd_map_obj(vmm, va0, obj, prot);
 
        return error;
+#endif
 }
 
 static int
@@ -446,6 +460,7 @@
        struct privcmd_object *obj;
        vm_prot_t prot;
        int error;
+       paddr_t base_paddr = 0;
 
        vmm = &curlwp->l_proc->p_vmspace->vm_map;
        va0 = pmb->addr & ~PAGE_MASK;
@@ -461,10 +476,16 @@
        if (prot == UVM_PROT_NONE)
                return EINVAL;
        
+#ifndef XENPV
+       KASSERT(xen_feature(XENFEAT_auto_translated_physmap));
+       base_paddr = xenmem_alloc_pa(pmb->num * PAGE_SIZE, PAGE_SIZE, true);
+       KASSERT(base_paddr != 0);
+#endif
        maddr = kmem_alloc(sizeof(paddr_t) * pmb->num, KM_SLEEP);
        obj = kmem_alloc(sizeof(*obj), KM_SLEEP);
-       obj->type = PTYPE_PRIVCMD;
+       obj->type = PTYPE_PRIVCMD_PHYSMAP;
        obj->u.pc.maddr = maddr;
+       obj->u.pc.base_paddr = base_paddr;
        obj->u.pc.no_translate = false;
        obj->npages = pmb->num;
        obj->u.pc.domid = pmb->dom;
@@ -473,9 +494,18 @@
                error = copyin(&pmb->arr[i], &mfn, sizeof(mfn));
                if (error != 0) {
                        kmem_free(maddr, sizeof(paddr_t) * pmb->num);
+                       kmem_free(obj, sizeof(*obj));
+#ifndef XENPV
+                       xenmem_free_pa(base_paddr, pmb->num * PAGE_SIZE);
+#endif
                        return error;
                }
+#ifdef XENPV
                maddr[i] = ((paddr_t)mfn) << PGSHIFT;
+#else
+               maddr[i] = mfn; /* TMP argument for XENMEM_add_to_physmap */
+#endif
+
        }
        error = privcmd_map_obj(vmm, va0, obj, prot);
        if (error)
@@ -487,6 +517,7 @@
         */
        for(i = 0; i < pmb->num; i++, va0 += PAGE_SIZE) {
                int err, cerr;
+#ifdef XENPV
                for (int j = 0 ; j < 10; j++) {
                        err = pmap_enter_ma(vmm->pmap, va0, maddr[i], 0, 



Home | Main Index | Thread Index | Old Index