tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

rfc: vmem(9) API/implementation changes



I'm working some on the vmem(9) implementation and API.  Please send
me your feedback on the attached patch and my API proposal.

I've attached a patch that makes vmem_xalloc(..., minaddr, maxaddr, ...)
allocate from the interval [minaddr, maxaddr] instead of from [minaddr,
maxaddr - 2] like it used to.  The previous behavior didn't correspond
with the documentation, which I have updated (see patch), and it stopped
callers from allocating the maximum vmem_addr_t using vmem_xalloc(9).
The new behavior is closer to extent_alloc_subregion(9).

There are a couple of changes to the API that I would like to make.
First, I don't think that vmem_addr_t 0 should be reserved for error
indications (0 == VMEM_ADDR_NULL), but the API should change from
this:

     vmem_addr_t
     vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size,
         vm_flag_t flags);

     vmem_addr_t
     vmem_xalloc(vmem_t *vm, vmem_size_t size, vmem_size_t align,
         vmem_size_t phase, vmem_size_t nocross, vmem_addr_t minaddr,
         vmem_addr_t maxaddr, vm_flag_t flags);

     vmem_addr_t
     vmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags);

to this:

     /* vmem_add() returns 0 on success, ENOMEM on failure. */
     int
     vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size,
         vm_flag_t flags);

     /* vmem_xalloc() returns 0 on success, ENOMEM on failure;  On success,
      * the resource is stored in *addrp; on failure, *addrp is undefined.
      */
     int
     vmem_xalloc(vmem_t *vm, vmem_size_t size, vmem_size_t align,
         vmem_size_t phase, vmem_size_t nocross, vmem_addr_t minaddr,
         vmem_addr_t maxaddr, vm_flag_t flags, vmem_addr_t *addrp);

     /* vmem_alloc() returns 0 on success, ENOMEM on failure;  On success,
      * the resource is stored in *addrp; on failure, *addrp is undefined.
      */
     int
     vmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags,
         vmem_addr_t *addrp);

Likewise, the callback registered with vmem_create(9),

    vmem_addr_t (*allocfn)(vmem_t *, vmem_size_t, vmem_size_t *, vm_flag_t),

ought to be:

    int (*allocfn)(vmem_t *source, vmem_size_t size, vmem_size_t *sizep,
        vm_flag_t flags, vmem_addr_t *addrp),

The automatic-import callback, allocfn, is not useful for
importing resources to satisfy a vmem_xalloc(9) call.  I propose to
add a third callback, xallocfn, that applies the same constraints as
vmem_xalloc(9) does:

    int (*xallocfn)(vmem_t *source, vmem_size_t size, vmem_size_t *sizep,
        vmem_size_t align, vmem_size_t phase, vmem_size_t nocross,
        vmem_addr_t minaddr, vmem_addr_t maxaddr, vm_flag_t flags,
        vmem_addr_t *addrp),

Finally, I am considering adding a routine that tells the lowest
and highest vmem_addr_t's that are busy (i.e., vmem_{x,}alloc()'d but
not vmem_{x,}free()'d) in the arena,

    void vmem_busy_bounds(vmem_t *vm, vmem_addr_t *lbusy, vmem_addr_t *rbusy);

and/or routines that iterate over all of the busy spans in the arena,

    vmem_size_t vmem_first_busy(vmem_t *vm, vmem_busy_iter_t *it,
        vmem_addr_t *addrp);
    vmem_size_t vmem_next_busy(vmem_busy_iter_t *it, vmem_addr_t *addrp);

I would use those routines to help me manage PCI/CardBus bus-bus bridge
windows.

Dave

-- 
David Young             OJC Technologies
dyoung%ojctech.com@localhost      Urbana, IL * (217) 344-0444 x24
Index: sys/kern/subr_vmem.c
===================================================================
RCS file: /cvsroot/src/sys/kern/subr_vmem.c,v
retrieving revision 1.58
diff -u -p -r1.58 subr_vmem.c
--- sys/kern/subr_vmem.c        17 Dec 2010 22:24:11 -0000      1.58
+++ sys/kern/subr_vmem.c        27 Jul 2011 16:08:43 -0000
@@ -171,7 +171,7 @@ struct vmem_btag {
 #define        BT_TYPE_BUSY            4
 #define        BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC)
 
-#define        BT_END(bt)      ((bt)->bt_start + (bt)->bt_size)
+#define        BT_END(bt)      ((bt)->bt_start + (bt)->bt_size - 1)
 
 typedef struct vmem_btag bt_t;
 
@@ -705,12 +705,14 @@ vmem_rehash(vmem_t *vm, size_t newhashsi
  */
 
 static vmem_addr_t
-vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, vmem_size_t 
phase,
-    vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr)
+vmem_fit(const bt_t const *bt, vmem_size_t size, vmem_size_t align,
+    vmem_size_t phase, vmem_size_t nocross,
+    vmem_addr_t minaddr, vmem_addr_t maxaddr)
 {
        vmem_addr_t start;
        vmem_addr_t end;
 
+       KASSERT(size > 0);
        KASSERT(bt->bt_size >= size);
 
        /*
@@ -723,10 +725,10 @@ vmem_fit(const bt_t *bt, vmem_size_t siz
                start = minaddr;
        }
        end = BT_END(bt);
-       if (end > maxaddr - 1) {
-               end = maxaddr - 1;
+       if (end > maxaddr) {
+               end = maxaddr;
        }
-       if (start >= end) {
+       if (start > end) {
                return VMEM_ADDR_NULL;
        }
 
@@ -738,13 +740,13 @@ vmem_fit(const bt_t *bt, vmem_size_t siz
                KASSERT(align < nocross);
                start = VMEM_ALIGNUP(start - phase, nocross) + phase;
        }
-       if (start < end && end - start >= size) {
+       if (start <= end && end - start >= size - 1) {
                KASSERT((start & (align - 1)) == phase);
                KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross));
                KASSERT(minaddr <= start);
-               KASSERT(maxaddr == 0 || start + size <= maxaddr);
+               KASSERT(maxaddr == 0 || start + size - 1 <= maxaddr);
                KASSERT(bt->bt_start <= start);
-               KASSERT(start + size <= BT_END(bt));
+               KASSERT(BT_END(bt) - start >= size - 1);
                return start;
        }
        return VMEM_ADDR_NULL;
@@ -875,13 +877,13 @@ vmem_alloc(vmem_t *vm, vmem_size_t size,
        }
 #endif /* defined(QCACHE) */
 
-       return vmem_xalloc(vm, size, 0, 0, 0, 0, 0, flags);
+       return vmem_xalloc(vm, size, 0, 0, 0, 0, ~(vmem_addr_t)0, flags);
 }
 
 vmem_addr_t
-vmem_xalloc(vmem_t *vm, vmem_size_t size0, vmem_size_t align, vmem_size_t 
phase,
-    vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr,
-    vm_flag_t flags)
+vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align,
+    const vmem_size_t phase, const vmem_size_t nocross,
+    const vmem_addr_t minaddr, const vmem_addr_t maxaddr, const vm_flag_t 
flags)
 {
        struct vmem_freelist *list;
        struct vmem_freelist *first;
@@ -906,7 +908,7 @@ vmem_xalloc(vmem_t *vm, vmem_size_t size
        KASSERT((nocross & (nocross - 1)) == 0);
        KASSERT((align == 0 && phase == 0) || phase < align);
        KASSERT(nocross == 0 || nocross >= size);
-       KASSERT(maxaddr == 0 || minaddr < maxaddr);
+       KASSERT(minaddr <= maxaddr);
        KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross));
 
        if (align == 0) {
@@ -961,7 +963,7 @@ retry:
        }
 #endif
        if (align != vm->vm_quantum_mask + 1 || phase != 0 ||
-           nocross != 0 || minaddr != 0 || maxaddr != 0) {
+           nocross != 0) {
 
                /*
                 * XXX should try to import a region large enough to
@@ -970,6 +972,7 @@ retry:
 
                goto fail;
        }
+       /* XXX eeek, minaddr & maxaddr not respected */
        if (vmem_import(vm, size, flags) == 0) {
                goto retry;
        }
@@ -1074,7 +1077,7 @@ vmem_xfree(vmem_t *vm, vmem_addr_t addr,
        /* coalesce */
        t = CIRCLEQ_NEXT(bt, bt_seglist);
        if (t != NULL && t->bt_type == BT_TYPE_FREE) {
-               KASSERT(BT_END(bt) == t->bt_start);
+               KASSERT(BT_END(bt) < t->bt_start);      /* YYY */
                bt_remfree(vm, t);
                bt_remseg(vm, t);
                bt->bt_size += t->bt_size;
@@ -1082,7 +1085,7 @@ vmem_xfree(vmem_t *vm, vmem_addr_t addr,
        }
        t = CIRCLEQ_PREV(bt, bt_seglist);
        if (t != NULL && t->bt_type == BT_TYPE_FREE) {
-               KASSERT(BT_END(t) == bt->bt_start);
+               KASSERT(BT_END(t) < bt->bt_start);      /* YYY */
                bt_remfree(vm, t);
                bt_remseg(vm, t);
                bt->bt_size += t->bt_size;
@@ -1277,7 +1280,7 @@ vmem_whatis_lookup(vmem_t *vm, uintptr_t
                if (BT_ISSPAN_P(bt)) {
                        continue;
                }
-               if (bt->bt_start <= addr && addr < BT_END(bt)) {
+               if (bt->bt_start <= addr && addr <= BT_END(bt)) {
                        return bt;
                }
        }
@@ -1337,7 +1340,7 @@ vmem_check_sanity(vmem_t *vm)
        KASSERT(vm != NULL);
 
        CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
-               if (bt->bt_start >= BT_END(bt)) {
+               if (bt->bt_start > BT_END(bt)) {
                        printf("corrupted tag\n");
                        bt_dump(bt, (void *)printf);
                        return false;
@@ -1351,8 +1354,8 @@ vmem_check_sanity(vmem_t *vm)
                        if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) {
                                continue;
                        }
-                       if (bt->bt_start < BT_END(bt2) &&
-                           bt2->bt_start < BT_END(bt)) {
+                       if (bt->bt_start <= BT_END(bt2) &&
+                           bt2->bt_start <= BT_END(bt)) {
                                printf("overwrapped tags\n");
                                bt_dump(bt, (void *)printf);
                                bt_dump(bt2, (void *)printf);
@@ -1405,10 +1408,29 @@ main(void)
        vmem_dump(vm, (void *)printf);
 
        p = vmem_add(vm, 100, 200, VM_SLEEP);
+       assert(p != VMEM_ADDR_NULL);
        p = vmem_add(vm, 2000, 1, VM_SLEEP);
+       assert(p != VMEM_ADDR_NULL);
        p = vmem_add(vm, 40000, 0x10000000>>12, VM_SLEEP);
+       assert(p != VMEM_ADDR_NULL);
        p = vmem_add(vm, 10000, 10000, VM_SLEEP);
+       assert(p != VMEM_ADDR_NULL);
        p = vmem_add(vm, 500, 1000, VM_SLEEP);
+       assert(p != VMEM_ADDR_NULL);
+       p = vmem_add(vm, 0xffffff00, 0x100, VM_SLEEP);
+       assert(p != VMEM_ADDR_NULL);
+       p = vmem_xalloc(vm, 0x101, 0, 0, 0,
+           0xffffff00, 0xffffffff, strat|VM_SLEEP);
+       assert(p == VMEM_ADDR_NULL);
+       p = vmem_xalloc(vm, 0x100, 0, 0, 0,
+           0xffffff01, 0xffffffff, strat|VM_SLEEP);
+       assert(p == VMEM_ADDR_NULL);
+       p = vmem_xalloc(vm, 0x100, 0, 0, 0,
+           0xffffff00, 0xfffffffe, strat|VM_SLEEP);
+       assert(p == VMEM_ADDR_NULL);
+       p = vmem_xalloc(vm, 0x100, 0, 0, 0,
+           0xffffff00, 0xffffffff, strat|VM_SLEEP);
+       assert(p != VMEM_ADDR_NULL);
        vmem_dump(vm, (void *)printf);
        for (;;) {
                struct reg *r;
@@ -1434,12 +1456,10 @@ main(void)
                                    nocross)) {
                                        nocross = 0;
                                }
-                               minaddr = rand() % 50000;
-                               maxaddr = rand() % 70000;
-                               if (minaddr > maxaddr) {
-                                       minaddr = 0;
-                                       maxaddr = 0;
-                               }
+                               do {
+                                       minaddr = rand() % 50000;
+                                       maxaddr = rand() % 70000;
+                               } while (minaddr > maxaddr);
                                printf("=== xalloc %" PRIu64
                                    " align=%" PRIu64 ", phase=%" PRIu64
                                    ", nocross=%" PRIu64 ", min=%" PRIu64
Index: sys/sys/vmem.h
===================================================================
RCS file: /cvsroot/src/sys/sys/vmem.h,v
retrieving revision 1.12
diff -u -p -r1.12 vmem.h
--- sys/sys/vmem.h      18 Feb 2009 13:31:59 -0000      1.12
+++ sys/sys/vmem.h      27 Jul 2011 16:08:43 -0000
@@ -42,6 +42,8 @@ typedef unsigned int vm_flag_t;
 typedef        uintptr_t vmem_addr_t;
 typedef size_t vmem_size_t;
 #define        VMEM_ADDR_NULL  0
+#define        VMEM_ADDR_MIN   0
+#define        VMEM_ADDR_MAX   (~(vmem_addr_t)0)
 
 vmem_t *vmem_create(const char *, vmem_addr_t, vmem_size_t, vmem_size_t,
     vmem_addr_t (*)(vmem_t *, vmem_size_t, vmem_size_t *, vm_flag_t),
Index: share/man/man9/vmem.9
===================================================================
RCS file: /cvsroot/src/share/man/man9/vmem.9,v
retrieving revision 1.9
diff -u -p -r1.9 vmem.9
--- share/man/man9/vmem.9       2 Dec 2010 12:54:13 -0000       1.9
+++ share/man/man9/vmem.9       27 Jul 2011 16:08:43 -0000
@@ -228,9 +228,13 @@ Request a resource which doesn't cross
 .Fa nocross
 aligned boundary.
 .It Fa minaddr
-If non-zero, specify the minimum address which can be allocated.
+Specify the minimum address which can be allocated, or
+.Dv VMEM_ADDR_MIN
+if the caller does not care.
 .It Fa maxaddr
-If non-zero, specify the maximum address + 1 which can be allocated.
+Specify the maximum address which can be allocated, or
+.Dv VMEM_ADDR_MAX
+if the caller does not care.
 .It Fa flags
 A bitwise OR of an allocation strategy and a sleep flag.
 .Pp
Index: sys/rump/net/lib/libshmif/if_shmem.c
===================================================================
RCS file: /cvsroot/src/sys/rump/net/lib/libshmif/if_shmem.c,v
retrieving revision 1.39
diff -u -p -r1.39 if_shmem.c
--- sys/rump/net/lib/libshmif/if_shmem.c        21 Mar 2011 16:41:09 -0000      
1.39
+++ sys/rump/net/lib/libshmif/if_shmem.c        27 Jul 2011 16:08:44 -0000
@@ -293,7 +293,7 @@ rump_shmif_create(const char *path, int 
                        return error;
        }
 
-       unit = vmem_xalloc(shmif_units, 1, 0, 0, 0, 0, 0,
+       unit = vmem_xalloc(shmif_units, 1, 0, 0, 0, 0, ~(vmem_addr_t)0,
            VM_INSTANTFIT | VM_SLEEP) - 1;
 
        if ((error = allocif(unit, &sc)) != 0) {
@@ -332,11 +332,9 @@ shmif_clone(struct if_clone *ifc, int un
         * Otherwise the wildcard-side of things might get the same one.
         * This is slightly offset-happy due to vmem.  First, we offset
         * the range of unit numbers by +1 since vmem cannot deal with
-        * ranges starting from 0.  Second, since vmem_xalloc() allocates
-        * from [min,max) (half-*open* interval), we need to add one extra
-        * to the one extra we add to maxaddr.  Talk about uuuh.
+        * ranges starting from 0.  Talk about uuuh.
         */
-       unit2 = vmem_xalloc(shmif_units, 1, 0, 0, 0, unit+1, unit+3,
+       unit2 = vmem_xalloc(shmif_units, 1, 0, 0, 0, unit+1, unit+1,
            VM_SLEEP | VM_INSTANTFIT);
        KASSERT(unit2-1 == unit);
 


Home | Main Index | Thread Index | Old Index