Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/sparc64 many improvements:



details:   https://anonhg.NetBSD.org/src/rev/9597f99f3e66
branches:  trunk
changeset: 536798:9597f99f3e66
user:      chs <chs%NetBSD.org@localhost>
date:      Sun Sep 22 07:19:43 2002 +0000

description:
many improvements:
 - use struct vm_page_md for attaching pv entries to struct vm_page
 - change pseg_set()'s return value to indicate whether the spare page
   was used as an L2 or L3 PTP.
 - use a pool for pv entries instead of malloc().
 - put PTPs on a list attached to the pmap so we can free them
   more efficiently (by just walking the list) in pmap_destroy().
 - use the new pmap_remove_all() interface to avoid flushing the cache and TLB
   for each pmap_remove() that's done as we are tearing down an address space.
 - in pmap_enter(), handle replacing an existing mapping more efficiently
   than just calling pmap_remove() on it.  also, skip flushing the
   TSB and TLB if there was no previous mapping, since there can't be
   anything we need to flush.  also, preload the TSB if we're pre-setting
   the mod/ref bits.
 - allocate hardware contexts like the MIPS pmap:
   allocate them all sequentially without reuse, then once we run out
   just invalidate all user TLB entries and flush the entire L1 dcache.
 - fix pmap_extract() for the case where the va is not page-aligned and
   nothing is mapped there.
 - fix calculation of TSB size.  it was comparing physmem (which is
   in units of pages) to constants that only make sense if they are
   in units of bytes.
 - avoid sleeping in pmap_enter(), instead let the caller do it.
 - use pmap_kenter_pa() instead of pmap_enter() where appropriate.
 - remove code to handle impossible cases in various functions.
 - tweak asm code to pipeline a little better.
 - remove many unnecessary spls and membars.
 - lots of code cleanup.
 - no doubt other stuff that I've forgotten.

the result of all this is that a fork+exit microbenchmark is 34% faster
and a fork+exec+exit microbenchmark is 28% faster.

diffstat:

 sys/arch/sparc64/dev/iommu.c          |   275 +--
 sys/arch/sparc64/include/pmap.h       |    70 +-
 sys/arch/sparc64/include/pte.h        |     9 +-
 sys/arch/sparc64/include/vmparam.h    |    28 +-
 sys/arch/sparc64/sparc64/cache.h      |     5 +-
 sys/arch/sparc64/sparc64/cpu.c        |    16 +-
 sys/arch/sparc64/sparc64/locore.s     |   370 +++-
 sys/arch/sparc64/sparc64/machdep.c    |    68 +-
 sys/arch/sparc64/sparc64/mem.c        |     4 +-
 sys/arch/sparc64/sparc64/pmap.c       |  2384 ++++++++++++++------------------
 sys/arch/sparc64/sparc64/trap.c       |   366 ++--
 sys/arch/sparc64/sparc64/vm_machdep.c |    25 +-
 12 files changed, 1698 insertions(+), 1922 deletions(-)

diffs (truncated from 6257 to 300 lines):

diff -r 9fa6c1a624d0 -r 9597f99f3e66 sys/arch/sparc64/dev/iommu.c
--- a/sys/arch/sparc64/dev/iommu.c      Sun Sep 22 07:17:08 2002 +0000
+++ b/sys/arch/sparc64/dev/iommu.c      Sun Sep 22 07:19:43 2002 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: iommu.c,v 1.57 2002/08/29 04:43:43 chs Exp $   */
+/*     $NetBSD: iommu.c,v 1.58 2002/09/22 07:19:43 chs Exp $   */
 
 /*
  * Copyright (c) 2001, 2002 Eduardo Horvath
@@ -87,8 +87,8 @@
        psize_t size;
        vaddr_t va;
        paddr_t pa;
-       struct vm_page *m;
-       struct pglist mlist;
+       struct vm_page *pg;
+       struct pglist pglist;
 
        /*
         * Setup the iommu.
@@ -121,9 +121,9 @@
         * contiguous.
         */
 
-       size = NBPG<<(is->is_tsbsize);
+       size = NBPG << is->is_tsbsize;
        if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
-               (paddr_t)NBPG, (paddr_t)0, &mlist, 1, 0) != 0)
+               (paddr_t)NBPG, (paddr_t)0, &pglist, 1, 0) != 0)
                panic("iommu_init: no memory");
 
        va = uvm_km_valloc(kernel_map, size);
@@ -131,19 +131,16 @@
                panic("iommu_init: no memory");
        is->is_tsb = (int64_t *)va;
 
-       m = TAILQ_FIRST(&mlist);
-       is->is_ptsb = VM_PAGE_TO_PHYS(m);
+       is->is_ptsb = VM_PAGE_TO_PHYS(TAILQ_FIRST(&pglist));
 
        /* Map the pages */
-       for (; m != NULL; m = TAILQ_NEXT(m,pageq)) {
-               pa = VM_PAGE_TO_PHYS(m);
-               pmap_enter(pmap_kernel(), va, pa | PMAP_NVC,
-                       VM_PROT_READ|VM_PROT_WRITE,
-                       VM_PROT_READ|VM_PROT_WRITE|PMAP_WIRED);
+       TAILQ_FOREACH(pg, &pglist, pageq) {
+               pa = VM_PAGE_TO_PHYS(pg);
+               pmap_kenter_pa(va, pa | PMAP_NVC, VM_PROT_READ | VM_PROT_WRITE);
                va += NBPG;
        }
        pmap_update(pmap_kernel());
-       bzero(is->is_tsb, size);
+       memset(is->is_tsb, 0, size);
 
 #ifdef DEBUG
        if (iommudebug & IDB_INFO)
@@ -164,7 +161,7 @@
                        (unsigned long long)bus_space_read_8(is->is_bustag,
                                is->is_iommu,
                                offsetof (struct iommureg, iommu_tsb)));
-               printf("TSB base %p phys %llx\n", (void *)is->is_tsb, 
+               printf("TSB base %p phys %llx\n", (void *)is->is_tsb,
                        (unsigned long long)is->is_ptsb);
                delay(1000000); /* 1 s */
        }
@@ -178,10 +175,10 @@
        /*
         * Now all the hardware's working we need to allocate a dvma map.
         */
-       printf("DVMA map: %x to %x\n", 
+       printf("DVMA map: %x to %x\n",
                (unsigned int)is->is_dvmabase,
                (unsigned int)is->is_dvmaend);
-       printf("IOTSB: %llx to %llx\n", 
+       printf("IOTSB: %llx to %llx\n",
                (unsigned long long)is->is_ptsb,
                (unsigned long long)(is->is_ptsb + size));
        is->is_dvmamap = extent_create(name,
@@ -202,26 +199,27 @@
        struct strbuf_ctl *sb;
 
        /* Need to do 64-bit stores */
-       bus_space_write_8(is->is_bustag, is->is_iommu, IOMMUREG(iommu_tsb), 
+       bus_space_write_8(is->is_bustag, is->is_iommu, IOMMUREG(iommu_tsb),
                is->is_ptsb);
 
        /* Enable IOMMU in diagnostic mode */
        bus_space_write_8(is->is_bustag, is->is_iommu, IOMMUREG(iommu_cr),
                is->is_cr|IOMMUCR_DE);
 
-       for (i=0; i<2; i++) {
+       for (i = 0; i < 2; i++) {
                if ((sb = is->is_sb[i])) {
 
                        /* Enable diagnostics mode? */
-                       bus_space_write_8(is->is_bustag, is->is_sb[i]->sb_sb, 
+                       bus_space_write_8(is->is_bustag, is->is_sb[i]->sb_sb,
                                STRBUFREG(strbuf_ctl), STRBUF_EN);
 
                        /* No streaming buffers? Disable them */
-                       if (bus_space_read_8(is->is_bustag, 
-                               is->is_sb[i]->sb_sb, 
+                       if (bus_space_read_8(is->is_bustag,
+                               is->is_sb[i]->sb_sb,
                                STRBUFREG(strbuf_ctl)) == 0) {
                                is->is_sb[i]->sb_flush = NULL;
                        } else {
+
                                /*
                                 * locate the pa of the flush buffer.
                                 */
@@ -234,7 +232,7 @@
 }
 
 /*
- * Here are the iommu control routines. 
+ * Here are the iommu control routines.
  */
 void
 iommu_enter(sb, va, pa, flags)
@@ -260,16 +258,16 @@
                /* If we can't flush the strbuf don't enable it. */
                strbuf = 0;
 
-       tte = MAKEIOTTE(pa, !(flags & BUS_DMA_NOWRITE), 
+       tte = MAKEIOTTE(pa, !(flags & BUS_DMA_NOWRITE),
                !(flags & BUS_DMA_NOCACHE), (strbuf));
 #ifdef DEBUG
        tte |= (flags & 0xff000LL)<<(4*8);
 #endif
-       
-       DPRINTF(IDB_IOMMU, ("Clearing TSB slot %d for va %p\n", 
+
+       DPRINTF(IDB_IOMMU, ("Clearing TSB slot %d for va %p\n",
                       (int)IOTSBSLOT(va,is->is_tsbsize), (void *)(u_long)va));
        is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)] = tte;
-       bus_space_write_8(is->is_bustag, is->is_iommu, 
+       bus_space_write_8(is->is_bustag, is->is_iommu,
                IOMMUREG(iommu_flush), va);
        DPRINTF(IDB_IOMMU, ("iommu_enter: va %lx pa %lx TSB[%lx]@%p=%lx\n",
                va, (long)pa, (u_long)IOTSBSLOT(va,is->is_tsbsize),
@@ -277,7 +275,6 @@
                (u_long)tte));
 }
 
-
 /*
  * Find the value of a DVMA address (debug routine).
  */
@@ -287,7 +284,7 @@
        vaddr_t dva;
 {
        int64_t tte = 0;
-       
+
        if (dva >= is->is_dvmabase && dva < is->is_dvmaend)
                tte = is->is_tsb[IOTSBSLOT(dva, is->is_tsbsize)];
 
@@ -314,9 +311,9 @@
        if (va < is->is_dvmabase || va > is->is_dvmaend)
                panic("iommu_remove: va 0x%lx not in DVMA space", (u_long)va);
        if ((long)(va + len) < (long)va)
-               panic("iommu_remove: va 0x%lx + len 0x%lx wraps", 
+               panic("iommu_remove: va 0x%lx + len 0x%lx wraps",
                      (long) va, (long) len);
-       if (len & ~0xfffffff) 
+       if (len & ~0xfffffff)
                panic("iommu_remove: rediculous len 0x%lx", (u_long)len);
 #endif
 
@@ -336,13 +333,13 @@
 
                /* XXX Zero-ing the entry would not require RMW */
                is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)] &= ~IOTTE_V;
-               bus_space_write_8(is->is_bustag, is->is_iommu, 
+               bus_space_write_8(is->is_bustag, is->is_iommu,
                        IOMMUREG(iommu_flush), va);
                va += NBPG;
        }
 }
 
-static int 
+static int
 iommu_strbuf_flush_done(sb)
        struct strbuf_ctl *sb;
 {
@@ -362,10 +359,10 @@
 
        if (!sb->sb_flush)
                return (0);
-                               
+
        /*
         * Streaming buffer flushes:
-        * 
+        *
         *   1 Tell strbuf to flush by storing va to strbuf_pgflush.  If
         *     we're not on a cache line boundary (64-bits):
         *   2 Store 0 in flag
@@ -377,16 +374,16 @@
         */
 
        *sb->sb_flush = 0;
-       bus_space_write_8(is->is_bustag, sb->sb_sb, 
+       bus_space_write_8(is->is_bustag, sb->sb_sb,
                STRBUFREG(strbuf_flushsync), sb->sb_flushpa);
 
-       microtime(&flushtimeout); 
+       microtime(&flushtimeout);
        cur = flushtimeout;
        BUMPTIME(&flushtimeout, 500000); /* 1/2 sec */
-       
+
        DPRINTF(IDB_IOMMU, ("iommu_strbuf_flush_done: flush = %lx "
                "at va = %lx pa = %lx now=%lx:%lx until = %lx:%lx\n",
-               (long)*sb->sb_flush, (long)sb->sb_flush, (long)sb->sb_flushpa, 
+               (long)*sb->sb_flush, (long)sb->sb_flush, (long)sb->sb_flushpa,
                cur.tv_sec, cur.tv_usec,
                flushtimeout.tv_sec, flushtimeout.tv_usec));
 
@@ -432,7 +429,7 @@
        bus_size_t align, boundary;
        vaddr_t vaddr = (vaddr_t)buf;
        int seg;
-       pmap_t pmap;
+       struct pmap *pmap;
 
        if (map->dm_nsegs) {
                /* Already in use?? */
@@ -441,11 +438,11 @@
 #endif
                bus_dmamap_unload(t, map);
        }
+
        /*
         * Make sure that on error condition we return "no valid mappings".
         */
        map->dm_nsegs = 0;
-
        if (buflen > map->_dm_size) {
                DPRINTF(IDB_BUSDMA,
                    ("iommu_dvmamap_load(): error %d > %d -- "
@@ -462,26 +459,27 @@
        if ((boundary = (map->dm_segs[0]._ds_boundary)) == 0)
                boundary = map->_dm_boundary;
        align = max(map->dm_segs[0]._ds_align, NBPG);
-       s = splhigh();
-       /* 
-        * If our segment size is larger than the boundary we need to 
+
+       /*
+        * If our segment size is larger than the boundary we need to
         * split the transfer up int little pieces ourselves.
         */
-       err = extent_alloc(is->is_dvmamap, sgsize, align, 
-               (sgsize > boundary) ? 0 : boundary, 
+       s = splhigh();
+       err = extent_alloc(is->is_dvmamap, sgsize, align,
+               (sgsize > boundary) ? 0 : boundary,
                EX_NOWAIT|EX_BOUNDZERO, &dvmaddr);
        splx(s);
 
 #ifdef DEBUG
-       if (err || (dvmaddr == (bus_addr_t)-1)) 
-       { 
+       if (err || (dvmaddr == (bus_addr_t)-1))
+       {
                printf("iommu_dvmamap_load(): extent_alloc(%d, %x) failed!\n",
                    (int)sgsize, flags);
 #ifdef DDB
                Debugger();
 #endif
-       }               
-#endif 
+       }
+#endif
        if (err != 0)
                return (err);
 
@@ -506,7 +504,7 @@
                /* Oops.  We crossed a boundary.  Split the xfer. */
                DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
                        "seg %d start %lx size %lx\n", seg,
-                       (long)map->dm_segs[seg].ds_addr, 
+                       (long)map->dm_segs[seg].ds_addr,
                        map->dm_segs[seg].ds_len));
                map->dm_segs[seg].ds_len =
                    boundary - (sgstart & (boundary - 1));
@@ -539,6 +537,7 @@
                pmap = pmap_kernel();
 
        for (; buflen > 0; ) {
+
                /*
                 * Get the physical address for this page.
                 */
@@ -561,7 +560,7 @@
                            (long)(curaddr & ~(NBPG-1))));
                iommu_enter(sb, trunc_page(dvmaddr), trunc_page(curaddr),



Home | Main Index | Thread Index | Old Index