Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/external/cddl/osnet/dist/uts/common/fs/zfs Fix various issue...



details:   https://anonhg.NetBSD.org/src/rev/4b8ef90f44a0
branches:  trunk
changeset: 782101:4b8ef90f44a0
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Mon Oct 15 23:08:19 2012 +0000

description:
Fix various issues in zfs life cycle, locking, and vop protocol.

- Restore some zfs locking and unlocking that got lost randomly.

- Enable use of the BSD vnode lock.  Lock order: all BSD vnode locks
are taken before all zfs internal locks.  There remains an issue with
O_EXCL, to be solved later (famous last words).  KASSERT the locking
scheme up the wazoo.

- Take our cruft out of zfs_lookup and move it to zfs_netbsd_lookup.
Restore much of the way zfs_lookup looked to make merging future
versions easier.  Disable use of the namecache for now because its
locking dance is too scary to contemplate.

- Implement BSD semantics for rename, to appease our tests.  This is
a provisional kludge; eventually we need VOP_RENAME to take a flag
specifying whether to use BSD semantics or POSIX semantics.

- Simplify zfs_netbsd_reclaim and make it work.  Now that getnewvnode
never tries to vclean anything itself, we need not worry about
recursion of ZFS_OBJ_MUTEX locks.

- Clarify and fix genfs node initialization and destruction.

zfs passes most of our atf vfs tests now, including the rename races.

Still to do:

- fix the impedance mismatch between our permissions model and zfs's;
- fix O_EXCL (nontrivial);
- throw dirconc at it and see how badly it explodes;
- find why zpool sometimes wedges itself during mkfs; and
- find why pool caches sometimes seem to get corrupted.

diffstat:

 external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c |    4 +
 external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c  |  719 +++++++++++----
 external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c  |   30 +-
 3 files changed, 549 insertions(+), 204 deletions(-)

diffs (truncated from 1027 to 300 lines):

diff -r 2f9455180ea6 -r 4b8ef90f44a0 external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c
--- a/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c   Mon Oct 15 22:50:25 2012 +0000
+++ b/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c   Mon Oct 15 23:08:19 2012 +0000
@@ -1811,6 +1811,10 @@
                *vpp = ZTOV(rootzp);
        dprintf("vpp -> %d, error %d -- %p\n", (*vpp)->v_type, error, *vpp);
        ZFS_EXIT(zfsvfs);
+       if (error == 0)
+               vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
+       KASSERT((error != 0) || (*vpp != NULL));
+       KASSERT((error != 0) || (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE));
        return (error);
 }
 
diff -r 2f9455180ea6 -r 4b8ef90f44a0 external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c
--- a/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c    Mon Oct 15 22:50:25 2012 +0000
+++ b/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c    Mon Oct 15 23:08:19 2012 +0000
@@ -1197,13 +1197,12 @@
  */
 /* ARGSUSED */
 static int
-zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp,
-    int nameiop, cred_t *cr, int flags)
+zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
+    int flags, vnode_t *rdir, cred_t *cr,  caller_context_t *ct,
+    int *direntflags, pathname_t *realpnp)
 {
        znode_t *zdp = VTOZ(dvp);
        zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
-       int *direntflags = NULL;
-       void *realpnp = NULL;
        int     error = 0;
 
        /* fast path */
@@ -1249,7 +1248,7 @@
        ZFS_VERIFY_ZP(zdp);
 
        *vpp = NULL;
-       dprintf("zfs_lookup called %s\n", nm);
+
        if (flags & LOOKUP_XATTR) {
 #ifdef TODO
                /*
@@ -1301,17 +1300,6 @@
                return (error);
        }
 
-       /*
-        * Before tediously performing a linear scan of the directory,
-        * check the name cache to see if the directory/name pair
-        * we are looking for is known already.
-        */
-
-       if ((error = cache_lookup(dvp, vpp, cnp)) >= 0) {
-               ZFS_EXIT(zfsvfs);
-               return (error);
-       }
-
        if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
            NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
                ZFS_EXIT(zfsvfs);
@@ -1323,56 +1311,6 @@
                error = specvp_check(vpp, cr);
 
        ZFS_EXIT(zfsvfs);
-       
-       /* Translate errors and add SAVENAME when needed. */
-       if (cnp->cn_flags & ISLASTCN) {
-               switch (nameiop) {
-               case CREATE:
-               case RENAME:
-                       if (error == ENOENT) {
-                               error = EJUSTRETURN;
-                               break;
-                       }
-                       /* FALLTHROUGH */
-               case DELETE:
-                       break;
-               }
-       }
-
-       if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) {
-               int ltype = 0;
-
-               if (cnp->cn_flags & ISDOTDOT) {
-                       ltype = VOP_ISLOCKED(dvp);
-                       VOP_UNLOCK(dvp);
-               }
-               error = vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
-               if (cnp->cn_flags & ISDOTDOT)
-                       vn_lock(dvp, ltype | LK_RETRY);
-               if (error != 0) {
-                       VN_RELE(*vpp);
-                       *vpp = NULL;
-                       return (error);
-               }
-       }
-
-       /*
-        * Insert name into cache if appropriate.
-        */
-       if ((cnp->cn_flags & MAKEENTRY) == 0){
-               return (error);
-       }
-       switch (error) {
-       case 0:
-               cache_enter(dvp, *vpp, cnp);
-               break;
-       case ENOENT:
-               if (nameiop != CREATE)
-                       cache_enter(dvp, *vpp, cnp);
-               break;
-       default:
-               break;
-       }
        return (error);
 }
 
@@ -2033,6 +1971,12 @@
        vnevent_rmdir(vp, dvp, name, ct);
 
        /*
+        * Grab a lock on the directory to make sure that noone is
+        * trying to add (or lookup) entries while we are removing it.
+        */
+       rw_enter(&zp->z_name_lock, RW_WRITER);
+
+       /*
         * Grab a lock on the parent pointer to make sure we play well
         * with the treewalk and directory rename code.
         */
@@ -3571,10 +3515,20 @@
                 * entries refer to the same file object, rename
                 * must do nothing and exit without error.
                 */
+#ifndef __NetBSD__
+               /*
+                * But on NetBSD we have a different system call to do
+                * this, posix_rename, which sorta kinda handles this
+                * case (modulo races), and our tests expect BSD
+                * semantics for rename, so we'll do that until we can
+                * push the choice between BSD and POSIX semantics into
+                * the VOP_RENAME protocol as a flag.
+                */
                if (szp->z_id == tzp->z_id) {
                        error = 0;
                        goto out;
                }
+#endif
        }
 
        vnevent_rename_src(ZTOV(szp), sdvp, snm, ct);
@@ -3623,15 +3577,20 @@
                return (error);
        }
 
-       if (tzp)        /* Attempt to remove the existing target */
+       if (tzp && (tzp->z_id != szp->z_id))
+               /* Attempt to remove the existing target */
                error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
 
        if (error == 0) {
-               error = zfs_link_create(tdl, szp, tx, ZRENAMING);
+               if (!tzp || (tzp->z_id != szp->z_id))
+                       error = zfs_link_create(tdl, szp, tx, ZRENAMING);
                if (error == 0) {
                        szp->z_phys->zp_flags |= ZFS_AV_MODIFIED;
 
-                       error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
+                       error = zfs_link_destroy(sdl, szp, tx,
+                           /* Kludge for BSD rename semantics.  */
+                           ((tzp && (tzp->z_id == szp->z_id)) ?
+                               zflg : ZRENAMING), NULL);
                        ASSERT(error == 0);
 
                        zfs_log_rename(zilog, tx,
@@ -4779,13 +4738,8 @@
 zfs_netbsd_open(void *v)
 {
        struct vop_open_args *ap = v;
-       vnode_t *vp = ap->a_vp;
-       znode_t *zp = VTOZ(vp);
-       int error;
-
-       error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL);
-
-       return (error);
+
+       return (zfs_open(&ap->a_vp, ap->a_mode, ap->a_cred, NULL));
 }
 
 static int
@@ -4846,63 +4800,346 @@
 static int
 zfs_netbsd_lookup(void *v)
 {
-       struct vop_lookup_args *ap = v;
+       struct vop_lookup_args /* {
+               struct vnode *a_dvp;
+               struct vnode **a_vpp;
+               struct componentname *a_cnp;
+       } */ *ap = v;
+       struct vnode *dvp = ap->a_dvp;
+       struct vnode **vpp = ap->a_vpp;
        struct componentname *cnp = ap->a_cnp;
        char nm[NAME_MAX + 1];
-       int err;
-       
-       ASSERT(cnp->cn_namelen < sizeof(nm));
-       strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm)));
-
-       err = zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
-           cnp->cn_cred, 0);
-       
-       return err;
+       int error;
+
+       KASSERT(dvp != NULL);
+       KASSERT(vpp != NULL);
+       KASSERT(cnp != NULL);
+       KASSERT(cnp->cn_nameptr != NULL);
+       KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
+       KASSERT(cnp->cn_namelen < sizeof nm);
+
+#if 0                          /* Namecache too scary to contemplate.  */
+       *vpp = NULL;
+
+       /*
+        * Do an access check before the cache lookup.  zfs_lookup does
+        * an access check too, but it's too scary to contemplate
+        * injecting our namecache stuff into zfs internals.
+        *
+        * XXX Is this the correct access check?
+        */
+       if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred)) != 0)
+               goto out;
+
+       /*
+        * Check the namecache before entering zfs_lookup.
+        * cache_lookup does the locking dance for us.
+        */
+       if ((error = cache_lookup(dvp, vpp, cnp)) >= 0)
+               goto out;
+#endif
+
+       /*
+        * zfs_lookup wants a null-terminated component name, but namei
+        * gives us a pointer into the full pathname.
+        */
+       (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1);
+
+       error = zfs_lookup(dvp, nm, vpp, NULL, 0, NULL, cnp->cn_cred, NULL,
+           NULL, NULL);
+
+       /*
+        * Translate errors to match our namei insanity.
+        */
+       if (cnp->cn_flags & ISLASTCN) {
+               switch (cnp->cn_nameiop) {
+               case CREATE:
+               case RENAME:
+                       if (error == ENOENT) {
+                               error = EJUSTRETURN;
+                               break;
+                       }
+                       /* FALLTHROUGH */
+               case DELETE:
+                       break;
+               }
+       }
+
+       if (error) {
+               KASSERT(*vpp == NULL);
+               goto out;
+       }
+       KASSERT(*vpp != NULL);  /* XXX Correct?  */
+
+       /*
+        * Do a locking dance in conformance to the VOP_LOOKUP protocol.
+        */
+       if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) {
+               KASSERT(!(cnp->cn_flags & ISDOTDOT));
+               KASSERT(dvp == *vpp);
+       } else if ((cnp->cn_namelen == 2) &&
+           (cnp->cn_nameptr[0] == '.') &&
+           (cnp->cn_nameptr[1] == '.')) {
+               KASSERT(cnp->cn_flags & ISDOTDOT);
+               VOP_UNLOCK(dvp);
+               vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
+               vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
+       } else {
+               KASSERT(!(cnp->cn_flags & ISDOTDOT));
+               vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
+       }
+
+out:
+       KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
+       KASSERT((*vpp == NULL) || (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE));
+
+#if 0                          /* Namecache too scary to contemplate.  */



Home | Main Index | Thread Index | Old Index