Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/external/cddl/osnet/dist/uts/common/fs/zfs Do reference coun...



details:   https://anonhg.NetBSD.org/src/rev/bd2aabf021b1
branches:  trunk
changeset: 782094:bd2aabf021b1
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Mon Oct 15 14:15:59 2012 +0000

description:
Do reference counting for zfs range lock waiters.

Avoid cv_broadcast(&cv); cv_destroy(&cv); which works in Solaris only
by abuse of the condvar abstraction.

There are parts of this code that should be factored into smaller
subroutines, mainly range lock allocation and initialization, but
that would make it harder to merge newer versions of zfs, so for now
I've just expanded those parts further in-line.

diffstat:

 external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_rlock.h |   1 +
 external/cddl/osnet/dist/uts/common/fs/zfs/zfs_rlock.c     |  77 ++++++++++---
 2 files changed, 61 insertions(+), 17 deletions(-)

diffs (219 lines):

diff -r cfdf8fb30623 -r bd2aabf021b1 external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_rlock.h
--- a/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_rlock.h        Mon Oct 15 14:03:06 2012 +0000
+++ b/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_rlock.h        Mon Oct 15 14:15:59 2012 +0000
@@ -54,6 +54,7 @@
        uint8_t r_proxy;        /* acting for original range */
        uint8_t r_write_wanted; /* writer wants to lock this range */
        uint8_t r_read_wanted;  /* reader wants to lock this range */
+       unsigned long r_refcnt; /* reference count for cv waits */
 } rl_t;
 
 /*
diff -r cfdf8fb30623 -r bd2aabf021b1 external/cddl/osnet/dist/uts/common/fs/zfs/zfs_rlock.c
--- a/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_rlock.c    Mon Oct 15 14:03:06 2012 +0000
+++ b/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_rlock.c    Mon Oct 15 14:15:59 2012 +0000
@@ -94,6 +94,33 @@
 
 #include <sys/zfs_rlock.h>
 
+static int
+zfs_range_lock_hold(rl_t *rl)
+{
+
+       KASSERT(mutex_owned(&rl->r_zp->z_range_lock));
+
+       if (rl->r_refcnt >= ULONG_MAX)
+               return (ENFILE); /* XXX What to do?  */
+
+       rl->r_refcnt++;
+       return (0);
+}
+
+static void
+zfs_range_lock_rele(rl_t *rl)
+{
+
+       KASSERT(mutex_owned(&rl->r_zp->z_range_lock));
+       KASSERT(rl->r_refcnt > 0);
+
+       if (--rl->r_refcnt == 0) {
+               cv_destroy(&rl->r_wr_cv);
+               cv_destroy(&rl->r_rd_cv);
+               kmem_free(rl, sizeof (rl_t));
+       }
+}
+
 /*
  * Check if a write lock can be grabbed, or wait and recheck until available.
  */
@@ -169,10 +196,12 @@
                return;
 wait:
                if (!rl->r_write_wanted) {
-                       cv_init(&rl->r_wr_cv, NULL, CV_DEFAULT, NULL);
                        rl->r_write_wanted = B_TRUE;
                }
+               if (zfs_range_lock_hold(rl) != 0)
+                       panic("too many waiters on zfs range lock %p", rl);
                cv_wait(&rl->r_wr_cv, &zp->z_range_lock);
+               zfs_range_lock_rele(rl);
 
                /* reset to original */
                new->r_off = off;
@@ -205,8 +234,11 @@
        proxy->r_cnt = 1;
        proxy->r_type = RL_READER;
        proxy->r_proxy = B_TRUE;
+       cv_init(&proxy->r_wr_cv, NULL, CV_DEFAULT, NULL);
+       cv_init(&proxy->r_rd_cv, NULL, CV_DEFAULT, NULL);
        proxy->r_write_wanted = B_FALSE;
        proxy->r_read_wanted = B_FALSE;
+       proxy->r_refcnt = 1;
        avl_add(tree, proxy);
 
        return (proxy);
@@ -234,6 +266,9 @@
        rear->r_cnt = rl->r_cnt;
        rear->r_type = RL_READER;
        rear->r_proxy = B_TRUE;
+       cv_init(&rear->r_wr_cv, NULL, CV_DEFAULT, NULL);
+       cv_init(&rear->r_rd_cv, NULL, CV_DEFAULT, NULL);
+       rear->r_refcnt = 1;
        rear->r_write_wanted = B_FALSE;
        rear->r_read_wanted = B_FALSE;
 
@@ -259,8 +294,11 @@
        rl->r_cnt = 1;
        rl->r_type = RL_READER;
        rl->r_proxy = B_TRUE;
+       cv_init(&rl->r_wr_cv, NULL, CV_DEFAULT, NULL);
+       cv_init(&rl->r_rd_cv, NULL, CV_DEFAULT, NULL);
        rl->r_write_wanted = B_FALSE;
        rl->r_read_wanted = B_FALSE;
+       rl->r_refcnt = 1;
        avl_add(tree, rl);
 }
 
@@ -372,10 +410,13 @@
        if (prev && (off < prev->r_off + prev->r_len)) {
                if ((prev->r_type == RL_WRITER) || (prev->r_write_wanted)) {
                        if (!prev->r_read_wanted) {
-                               cv_init(&prev->r_rd_cv, NULL, CV_DEFAULT, NULL);
                                prev->r_read_wanted = B_TRUE;
                        }
+                       if (zfs_range_lock_hold(prev) != 0)
+                               panic("too many waiters on zfs range lock %p",
+                                   prev);
                        cv_wait(&prev->r_rd_cv, &zp->z_range_lock);
+                       zfs_range_lock_rele(prev);
                        goto retry;
                }
                if (off + len < prev->r_off + prev->r_len)
@@ -395,10 +436,13 @@
                        goto got_lock;
                if ((next->r_type == RL_WRITER) || (next->r_write_wanted)) {
                        if (!next->r_read_wanted) {
-                               cv_init(&next->r_rd_cv, NULL, CV_DEFAULT, NULL);
                                next->r_read_wanted = B_TRUE;
                        }
+                       if (zfs_range_lock_hold(next) != 0)
+                               panic("too many waiters on zfs range lock %p",
+                                   next);
                        cv_wait(&next->r_rd_cv, &zp->z_range_lock);
+                       zfs_range_lock_rele(next);
                        goto retry;
                }
                if (off + len <= next->r_off + next->r_len)
@@ -435,20 +479,25 @@
        new->r_cnt = 1; /* assume it's going to be in the tree */
        new->r_type = type;
        new->r_proxy = B_FALSE;
+       cv_init(&new->r_wr_cv, NULL, CV_DEFAULT, NULL);
+       cv_init(&new->r_rd_cv, NULL, CV_DEFAULT, NULL);
        new->r_write_wanted = B_FALSE;
        new->r_read_wanted = B_FALSE;
+       new->r_refcnt = 1;
 
        mutex_enter(&zp->z_range_lock);
        if (type == RL_READER) {
                /*
                 * First check for the usual case of no locks
                 */
-               if (avl_numnodes(&zp->z_range_avl) == 0)
+               if (avl_numnodes(&zp->z_range_avl) == 0) {
                        avl_add(&zp->z_range_avl, new);
-               else
+               } else {
                        zfs_range_lock_reader(zp, new);
-       } else
+               }
+       } else {
                zfs_range_lock_writer(zp, new); /* RL_WRITER or RL_APPEND */
+       }
        mutex_exit(&zp->z_range_lock);
        return (new);
 }
@@ -474,11 +523,9 @@
                avl_remove(tree, remove);
                if (remove->r_write_wanted) {
                        cv_broadcast(&remove->r_wr_cv);
-                       cv_destroy(&remove->r_wr_cv);
                }
                if (remove->r_read_wanted) {
                        cv_broadcast(&remove->r_rd_cv);
-                       cv_destroy(&remove->r_rd_cv);
                }
        } else {
                ASSERT3U(remove->r_cnt, ==, 0);
@@ -507,17 +554,15 @@
                                avl_remove(tree, rl);
                                if (rl->r_write_wanted) {
                                        cv_broadcast(&rl->r_wr_cv);
-                                       cv_destroy(&rl->r_wr_cv);
                                }
                                if (rl->r_read_wanted) {
                                        cv_broadcast(&rl->r_rd_cv);
-                                       cv_destroy(&rl->r_rd_cv);
                                }
-                               kmem_free(rl, sizeof (rl_t));
+                               zfs_range_lock_rele(rl);
                        }
                }
        }
-       kmem_free(remove, sizeof (rl_t));
+       zfs_range_lock_rele(remove);
 }
 
 /*
@@ -536,16 +581,14 @@
        if (rl->r_type == RL_WRITER) {
                /* writer locks can't be shared or split */
                avl_remove(&zp->z_range_avl, rl);
-               mutex_exit(&zp->z_range_lock);
                if (rl->r_write_wanted) {
                        cv_broadcast(&rl->r_wr_cv);
-                       cv_destroy(&rl->r_wr_cv);
                }
                if (rl->r_read_wanted) {
                        cv_broadcast(&rl->r_rd_cv);
-                       cv_destroy(&rl->r_rd_cv);
                }
-               kmem_free(rl, sizeof (rl_t));
+               zfs_range_lock_rele(rl);
+               mutex_exit(&zp->z_range_lock);
        } else {
                /*
                 * lock may be shared, let zfs_range_unlock_reader()
@@ -577,11 +620,11 @@
        mutex_enter(&zp->z_range_lock);
        rl->r_off = off;
        rl->r_len = len;
-       mutex_exit(&zp->z_range_lock);
        if (rl->r_write_wanted)
                cv_broadcast(&rl->r_wr_cv);
        if (rl->r_read_wanted)
                cv_broadcast(&rl->r_rd_cv);
+       mutex_exit(&zp->z_range_lock);
 }
 
 /*



Home | Main Index | Thread Index | Old Index