tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

rename(), wapbl and deadlock



Hi,
I got a look again at the ufs_rename patch that David Holland sent
at the end of september, and managed to get it working on netbsd-5.
Changes from his version (apart from the backport) that got it working:
- VOP_UNLOCK(ap->a_tvp) on entry, if not NULL. it's not used and we'll
  do a lookup() to find it again.
- clear SAVESTART from from_name and to_name. otherwise relookup() will
  VREF() the vnodes and we'll have a refcount leak (and so a vnode
  leak).
- make sure to always vrele(illegal_vnode).

With these changes, I couldn't crash or hang a kernel using rsync
on a WAPBL filesystem. However, the system doen't come up multiuser
if / is not logged:
Building databases: devpanic: kernel diagnostic assertion "bn >= NDADDR" 
failed: file "/dsk/l1/misc/bouyer/netbsd-5/src/sys/ufs/ufs/ufs_bmap.c", line 349
fatal breakpoint trap in supervisor mode
trap type 1 code 0 eip c05b947c cs 8 eflags 246 cr2 bbb74e84 ilevel 0
Stopped in pid 114.1 (dev_mkdb) at      netbsd:breakpoint+0x4:  popl    %ebp
db{1}> tr
breakpoint(c0af5b1a,cc5f6768,c208d800,c04bee0e,0,0,c0457448,0,0,0) at 
netbsd:breakpoint+0x4
panic(c0b03610,c0a675f1,c0a9f2ce,c0a9f2dc,15d,0,cc5f67dc,c044413d,c0a675f1,c0a9f2dc)
 at netbsd:panic+0x1b0
__kernassert(c0a675f1,c0a9f2dc,15d,c0a9f2ce,c1f84d60,1,cc5f67bc,c0850696,ffffffff,ffffffff)
 at netbsd:__kernassert+0x39
ufs_getlbns(cc61c968,ffffffff,ffffffff,cc5f6878,cc5f68b8,4,4,c04bee0e,0,0) at 
netbsd:ufs_getlbns+0x1fd
ufs_bmaparray(cc61c968,ffffffff,ffffffff,c278ad18,0,0,0,c0443ef0,cc61c968,c278acec)
 at netbsd:ufs_bmaparray+0x257
ufs_bmap(cc5f691c,cc5280e0,0,c0bf0a00,0,2000,c08a5280,cc61c968,ffffffff,ffffffff)
 at netbsd:ufs_bmap+0x85
VOP_BMAP(cc61c968,ffffffff,ffffffff,0,c278ad18,0,cc681edc,cc61c968,cc61c968,0) 
at netbsd:VOP_BMAP+0x7f
ufs_strategy(cc5f698c,2000,0,c04bee0e,0,0,c08a52c0,cc61c968,c278acec,c278acec) 
at netbsd:ufs_strategy+0xb3
VOP_STRATEGY(cc61c968,c278acec,ffffffff,2000,0,0,0,cc5f6aa8,cc61c968,0) at 
netbsd:VOP_STRATEGY+0x66
bio_doread(2000,ffffffff,0,0,cc681edc,d,1,ffffffff,cb2eee68,cb2ee8d0) at 
netbsd:bio_doread+0x73
breadn(cc61c968,ffffffff,ffffffff,2000,cb2ee8d0,cb2eee68,0,ffffffff,1,cc5f6aa8) 
at netbsd:breadn+0x47
ufs_blkatoff(cc61c968,fffffe30,ffffffff,cc5f6aec,cc5f6ae8,1,cc61c968,c04bf3da,8,c044b96d)
 at netbsd:ufs_blkatoff+0x242
ufs_dirremove(cc61c968,cc69a9cc,8418,0,0,0,c04bcde3,cc681edc,cc681edc,8000) at 
netbsd:ufs_dirremove+0x13b
ufs_rename(cc5f6bac,cc5280e0,6,c0be5d60,c27677e0,c0b23ec0,c08a4fc0,cc61c968,cc69bc48,cc5f6c90)
 at netbsd:ufs_rename+0xb45
VOP_RENAME(cc61c968,cc69bc48,cc5f6c90,cc61c968,0,cc5f6c48,cc5f6c0c,c0850696,cc544354,0)
 at netbsd:VOP_RENAME+0x7c
do_sys_rename(bfbfda5b,bfbfea5f,0,0,0,c0b22410,cc5f6d3c,c05c2798,cc5280e0,cc5f6d00)
 at netbsd:do_sys_rename+0x5a9
sys_rename(cc5280e0,cc5f6d00,cc5f6d28,bbb74000,cb31fc6c,cb31fc6c,1,bfbfda5b,bfbfea5f,401)
 at netbsd:sys_rename+0x26


but using the wapbl rename version for the non-wapbl seems to run stable. 
I've not analysed whenever this is safe in case of crash though (and
won't say I can do it, I don't understand all what's going on in there).

Does anyone see a problem with this, or otherwise has comments ?
For now I only tested this on test boxes, I probably won't have time to do
more checks for the next 2 weeks.

-- 
Manuel Bouyer <bouyer%antioche.eu.org@localhost>
     NetBSD: 26 ans d'experience feront toujours la difference
--
Index: ufs_extern.h
===================================================================
RCS file: /cvsroot/src/sys/ufs/ufs/ufs_extern.h,v
retrieving revision 1.60
diff -u -p -u -r1.60 ufs_extern.h
--- ufs_extern.h        31 May 2008 21:37:08 -0000      1.60
+++ ufs_extern.h        17 Dec 2009 21:15:23 -0000
@@ -129,7 +129,8 @@ int ufs_direnter(struct vnode *, struct 
 int    ufs_dirremove(struct vnode *, struct inode *, int, int);
 int    ufs_dirrewrite(struct inode *, struct inode *, ino_t, int, int, int);
 int    ufs_dirempty(struct inode *, ino_t, kauth_cred_t);
-int    ufs_checkpath(struct inode *, struct inode *, kauth_cred_t);
+int    ufs_parentcheck(struct vnode *, struct vnode *, kauth_cred_t,
+                       int *, struct vnode **);
 int    ufs_blkatoff(struct vnode *, off_t, char **, struct buf **, bool);
 
 /* ufs_quota.c */
Index: ufs_lookup.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ufs/ufs_lookup.c,v
retrieving revision 1.99
diff -u -p -u -r1.99 ufs_lookup.c
--- ufs_lookup.c        31 Jul 2008 05:38:06 -0000      1.99
+++ ufs_lookup.c        17 Dec 2009 21:15:23 -0000
@@ -1248,6 +1248,7 @@ ufs_dirempty(struct inode *ip, ino_t par
        return (1);
 }
 
+#if 0 /* XXX remove ufs_checkpath() before commit */
 /*
  * Check if source directory is in the path of the target directory.
  * Target is supplied locked, source is unlocked.
@@ -1320,6 +1321,127 @@ out:
                vput(vp);
        return (error);
 }
+#endif
+
+/*
+ * Extract the inode number of ".." from a directory.
+ * Helper for ufs_parentcheck.
+ */
+static int
+ufs_readdotdot(struct vnode *vp, int needswap, kauth_cred_t cred, ino_t 
*result)
+{
+       struct dirtemplate dirbuf;
+       int namlen, error;
+
+       error = vn_rdwr(UIO_READ, vp, &dirbuf,
+                   sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
+                   IO_NODELOCKED, cred, NULL, NULL);
+       if (error) {
+               return error;
+       }
+
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+       if (FSFMT(vp) && needswap == 0)
+               namlen = dirbuf.dotdot_type;
+       else
+               namlen = dirbuf.dotdot_namlen;
+#else
+       if (FSFMT(vp) && needswap != 0)
+               namlen = dirbuf.dotdot_type;
+       else
+               namlen = dirbuf.dotdot_namlen;
+#endif
+       if (namlen != 2 ||
+           dirbuf.dotdot_name[0] != '.' ||
+           dirbuf.dotdot_name[1] != '.') {
+               printf("ufs_readdotdot: directory %llu contains "
+                      "garbage instead of ..\n",
+                      (unsigned long long) VTOI(vp)->i_number);
+               return ENOTDIR;
+       }
+       *result = ufs_rw32(dirbuf.dotdot_ino, needswap);
+       return 0;
+}
+
+/*
+ * Check if LOWER is a descendent of UPPER. If we find UPPER, return
+ * nonzero in FOUND and return a reference to the immediate descendent
+ * of UPPER in UPPERCHILD. If we don't find UPPER (that is, if we
+ * reach the volume root and that isn't UPPER), return zero in FOUND
+ * and null in UPPERCHILD.
+ *
+ * Neither UPPER nor LOWER should be locked.
+ *
+ * On error (such as a permissions error checking up the directory
+ * tree) fail entirely.
+ */
+int
+ufs_parentcheck(struct vnode *upper, struct vnode *lower, kauth_cred_t cred,
+               int *found_ret, struct vnode **upperchild_ret)
+{
+       ino_t upper_ino, found_ino;
+       struct vnode *current, *next;
+       int error;
+
+       if (upper == lower) {
+               VREF(upper);
+               *found_ret = 1;
+               *upperchild_ret = upper;
+               return 0;
+       }
+       if (VTOI(lower)->i_number == ROOTINO) {
+               *found_ret = 0;
+               *upperchild_ret = NULL;
+               return 0;
+       }
+
+       upper_ino = VTOI(upper)->i_number;
+
+       current = lower;
+       VREF(current);
+       vn_lock(current, LK_EXCLUSIVE | LK_RETRY);
+
+       for (;;) {
+               const int needswap = UFS_MPNEEDSWAP(VTOI(current)->i_ump);
+               error = ufs_readdotdot(current, needswap, cred, &found_ino);
+               if (error) {
+                       vput(current);
+                       return error;
+               }
+               if (found_ino == upper_ino) {
+                       VOP_UNLOCK(current, 0);
+                       *found_ret = 1;
+                       *upperchild_ret = current;
+                       return 0;
+               }
+               if (found_ino == ROOTINO) {
+                       vput(current);
+                       *found_ret = 0;
+                       *upperchild_ret = NULL;
+                       return 0;
+               }
+               VOP_UNLOCK(current, 0);
+               error = VFS_VGET(current->v_mount, found_ino, &next);
+               if (error) {
+                       vrele(current);
+                       return error;
+               }
+               KASSERT(VOP_ISLOCKED(next));
+               if (next->v_type != VDIR) {
+                       printf("ufs_parentcheck: inode %llu reached via .. of "
+                              "inode %llu is not a directory\n",
+                           (unsigned long long)VTOI(next)->i_number,
+                           (unsigned long long)VTOI(current)->i_number);
+                       vput(next);
+                       vrele(current);
+                       return ENOTDIR;
+               }
+               vrele(current);
+               current = next;
+       }
+
+       return 0;
+}
 
 #define        UFS_DIRRABLKS 0
 int ufs_dirrablks = UFS_DIRRABLKS;
Index: ufs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ufs/ufs_vnops.c,v
retrieving revision 1.169
diff -u -p -u -r1.169 ufs_vnops.c
--- ufs_vnops.c 14 Aug 2008 16:19:25 -0000      1.169
+++ ufs_vnops.c 17 Dec 2009 21:15:23 -0000
@@ -974,126 +974,489 @@ ufs_rename(void *v)
                struct vnode            *a_tvp;
                struct componentname    *a_tcnp;
        } */ *ap = v;
-       struct vnode            *tvp, *tdvp, *fvp, *fdvp;
-       struct componentname    *tcnp, *fcnp;
-       struct inode            *ip, *xp, *dp;
+       struct vnode            *fromparent_vnode;
+       struct inode            *fromparent_i;
+       struct componentname    *from_name;
+       struct vnode            *fromchild_vnode;
+       struct inode            *fromchild_i;
+
+       struct vnode            *toparent_vnode;
+       struct inode            *toparent_i;
+       struct componentname    *to_name;
+       struct vnode            *tochild_vnode;
+       struct inode            *tochild_i;
+
+       struct vnode            *illegal_vnode;
        struct mount            *mp;
        struct direct           *newdir;
-       int                     doingdirectory, oldparent, newparent, error;
+       int                     error;
+       int                     oldparent, newparent;
+       int                     doingdirectory;
+       int                     foundfromparent;
+       int                     do_race_condition;
+       int                     noabort = 0;
 
 #ifdef WAPBL
-       if (ap->a_tdvp->v_mount->mnt_wapbl)
+       if ( /* ap->a_tdvp->v_mount->mnt_wapbl */ 1)
                return wapbl_ufs_rename(v);
 #endif
 
-       tvp = ap->a_tvp;
-       tdvp = ap->a_tdvp;
-       fvp = ap->a_fvp;
-       fdvp = ap->a_fdvp;
-       tcnp = ap->a_tcnp;
-       fcnp = ap->a_fcnp;
-       doingdirectory = oldparent = newparent = error = 0;
+       /*
+        * Due to oddities in namei and in the fs-independent code,
+        * currently we are called with references to all four vnodes,
+        * but with only toparent_vnode locked.
+        *
+        * In a sane world we'd be passed just the two parent vnodes,
+        * both unlocked, and two (string) names.
+        *
+        * For now we will begin by setting up to mimic the sane world.
+        */
+
+       fromparent_vnode = ap->a_fdvp;
+       fromparent_i = VTOI(fromparent_vnode);
+
+       from_name = ap->a_fcnp;
+       fromchild_vnode = NULL;
+
+       toparent_vnode = ap->a_tdvp;
+       toparent_i = VTOI(toparent_vnode);
+
+       to_name = ap->a_tcnp;
+       tochild_vnode = NULL;
+
+       VOP_UNLOCK(toparent_vnode, 0);
+       if (ap->a_tvp)
+               VOP_UNLOCK(ap->a_tvp, 0);
+
+       illegal_vnode = NULL;
+       do_race_condition = 0;
+
+       /*
+        * Part 1: check for broken args and illegal operations.
+        */
 
 #ifdef DIAGNOSTIC
-       if ((tcnp->cn_flags & HASBUF) == 0 ||
-           (fcnp->cn_flags & HASBUF) == 0)
+       /* We expect SAVESTART to have been set. */
+       if ((to_name->cn_flags & HASBUF) == 0 ||
+           (from_name->cn_flags & HASBUF) == 0)
                panic("ufs_rename: no name");
 #endif
-       /*
-        * Check for cross-device rename.
-        */
-       if ((fvp->v_mount != tdvp->v_mount) ||
-           (tvp && (fvp->v_mount != tvp->v_mount))) {
+
+       /* Cross-device rename is prohibited. */
+       if (fromparent_vnode->v_mount != toparent_vnode->v_mount) {
                error = EXDEV;
- abortit:
-               VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
-               if (tdvp == tvp)
-                       vrele(tdvp);
-               else
-                       vput(tdvp);
-               if (tvp)
-                       vput(tvp);
-               VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
-               vrele(fdvp);
-               vrele(fvp);
-               return (error);
+               goto fail_early;
+       }
+
+       /* If either parent isn't a directory, just give up now. */
+       if (fromparent_vnode->v_type != VDIR ||
+           toparent_vnode->v_type != VDIR) {
+               error = ENOTDIR;
+               goto fail_early;
+       }
+
+       /* The "." and ".." entries may not be renamed. */
+       if ((from_name->cn_namelen == 1 && from_name->cn_nameptr[0] == '.') ||
+           (from_name->cn_flags & ISDOTDOT) ||
+           (to_name->cn_flags & ISDOTDOT)) {
+               error = EINVAL;
+               goto fail_early;
        }
 
        /*
-        * Check if just deleting a link name.
+        * Part 2: get locks.
+        *
+        * We lock parent vnodes before child vnodes. This means in
+        * particular that if A is above B in the directory tree then
+        * A must be locked before B. (This is true regardless of how
+        * many steps appear in between, because an arbitrary number
+        * of other processes could lock parent/child in between and
+        * establish a lock cycle and deadlock.)
+        *
+        * Therefore, if TOPARENT is above FROMPARENT we must lock
+        * TOPARENT first; if FROMPARENT is above TOPARENT we must
+        * lock FROMPARENT first; and if they're incommensurate it
+        * doesn't matter. (But, we rely on the fact that there's a
+        * whole-volume rename lock to prevent deadlock among groups
+        * of renames upon overlapping sets of incommensurate vnodes.)
+        *
+        * In addition to establishing lock ordering the parent check
+        * also serves to rule out cases where someone tries to move a
+        * directory underneath itself, e.g. rename("a/b", "a/b/c").
+        * If allowed to proceed such renames would detach portions of
+        * the directory tree and make fsck very unhappy.
+        *
+        * Note that it is an error for *FROMCHILD* to be above
+        * TOPARENT; however, *FROMPARENT* can be above TOPARENT, as
+        * in rename("a/b", "a/c/d").
+        *
+        * The parent check searches up the tree from TOPARENT until
+        * it either finds FROMPARENT or the root of the volume. It
+        * also returns the vnode it saw immediately before TOPARENT,
+        * if any. Later on (after looking up FROMCHILD) we will check
+        * to see if this *is* FROMCHILD and if so fail.
+        *
+        * If the parent check finds FROMPARENT, it means FROMPARENT
+        * is above TOPARENT, so we lock FROMPARENT first and then
+        * TOPARENT. Otherwise, either TOPARENT is above FROMPARENT or
+        * they're incommensurate and we lock TOPARENT first.
+        *
+        * In either case each of the child vnodes has to be looked up
+        * and locked immediately after its parent. The two cases
+        *
+        *       FROMPARENT/FROMCHILD/TOPARENT/TOCHILD
+        *       TOPARENT/TOCHILD/FROMPARENT/FROMCHILD
+        *
+        * can cause deadlock otherwise. (Note that both of these are
+        * error cases; the first fails the parent check and the
+        * second fails because TOCHILD isn't empty. The parent check
+        * case can be handled without attempting to lock FROMCHILD,
+        * (XXX or at least it could be in a saner world of namei),
+        * but the nonempty case requires locking TOCHILD to test.
+        *
+        * Therefore the procedure is either
+        *
+        *   lock FROMPARENT
+        *   lookup FROMCHILD
+        *   lock FROMCHILD
+        *   lock TOPARENT
+        *   lookup TOCHILD
+        *   lock TOCHILD
+        *
+        * or
+        *
+        *   lock TOPARENT
+        *   lookup TOCHILD
+        *   lock TOCHILD
+        *   lock FROMPARENT
+        *   lookup FROMCHILD
+        *   lock FROMCHILD
+        *
+        * In a saner namei world we could simplify this (some) by always
+        * handling FROMCHILD last, but that isn't currently possible.
+        *
+        * Note that for now we aren't doing lookup so much as relookup()
+        * and checking what we find against what was passed down from the
+        * fs-independent code.
+        *
+        * On top of all the above, just to make everything more
+        * exciting, any two of the vnodes might end up being the same.
+        *
+        * FROMPARENT == FROMCHILD      mv a/. foo      is an error.
+        * FROMPARENT == TOPARENT       mv a/b a/c      is ok.
+        * FROMPARENT == TOCHILD        mv a/b/c a/b    will give ENOTEMPTY.
+        * FROMCHILD == TOPARENT        mv a/b a/b/c    fails the parent check.
+        * FROMCHILD == TOCHILD         mv a/b a/b      is ok.
+        * TOPARENT == TOCHILD          mv foo a/.      is an error.
+        *
+        * This introduces more cases in the locking, because each
+        * distinct vnode must be locked exactly once.
+        *
+        * When FROMPARENT == TOPARENT and FROMCHILD != TOCHILD we
+        * assume it doesn't matter what order the children are locked
+        * in, because the per-volume rename lock excludes other
+        * renames and no other operation locks two files in the same
+        * directory at once. (Note: if it turns out that link() does,
+        * link() is wrong.)
         */
-       if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) ||
-           (VTOI(tdvp)->i_flags & APPEND))) {
-               error = EPERM;
-               goto abortit;
+
+       /*
+        * XXX until such time as we can do lookups without the namei
+        * and lookup machinery "helpfully" locking the result vnode
+        * for us, we can't avoid tripping on cases where FROMCHILD ==
+        * TOCHILD. The right way to do this is to check after looking
+        * the second one up and only lock it if it's different. Easy,
+        * but requires having the right underlying abstractions.
+        *
+        * Instead, we'll have to check the child vnode passed down in
+        * the argument block. For the branch where we look up
+        * FROMCHILD first we do this:
+        *
+        *     - check if FROMCHILD is the same as the TOCHILD passed
+        *       in (ap->a_tvp)
+        *     - if so, unlock FROMCHILD before looking up TOCHILD
+        *       and set do_race_condition
+        *     - afterward if they aren't the same lock FROMCHILD again.
+        *
+        * Eww.
+        *
+        * On the plus side, this method at least shouldn't be able to
+        * deadlock when relocking FROMCHILD.
+        *
+        * Needless to say this is a race condition and should be
+        * fixed up once namei has been rendered sufficiently sane.
+        */
+
+       // XXX do we need this?
+       from_name->cn_flags &= ~SAVESTART;
+       to_name->cn_flags &= ~SAVESTART;
+       // XXX or this form instead?
+       //from_name->cn_flags &= ~(MODMASK | SAVESTART);
+       //from_name->cn_flags |= LOCKPARENT | LOCKLEAF;
+
+       if (fromparent_vnode == toparent_vnode) {
+               vn_lock(fromparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+               error = relookup(fromparent_vnode, &fromchild_vnode,
+                                from_name);
+               if (error) {
+                       VOP_UNLOCK(fromparent_vnode, 0);
+                       goto fail_nolocks;
+               }
+               /* XXX see note above */
+               if (fromchild_vnode == ap->a_tvp) {
+                       VOP_UNLOCK(fromchild_vnode, 0);
+                       do_race_condition = 1;
+               }
+               error = relookup(toparent_vnode, &tochild_vnode,
+                                to_name);
+               if (error && error != ENOENT) {
+                       /*VOP_UNLOCK(toparent_vnode); -- same as fromparent */
+                       VOP_UNLOCK(fromchild_vnode, 0);
+                       VOP_UNLOCK(fromparent_vnode, 0);
+                       goto fail_nolocks;
+               }
+               if (error == ENOENT) {
+                       /*
+                        * Note: currently in this case relookup() succeeds
+                        * and sets tochild_vnode = NULL, but this piece of
+                        * logic will be wanted in the (saner) future.
+                        * (XXX: remove this comment when appropriate)
+                        */
+                       tochild_vnode = NULL;
+               }
+               if (do_race_condition) {
+                       if (tochild_vnode != fromchild_vnode) {
+                               vn_lock(fromchild_vnode,
+                                       LK_EXCLUSIVE | LK_RETRY);
+                       }
+               }
        }
-       if (fvp == tvp) {
-               if (fvp->v_type == VDIR) {
-                       error = EINVAL;
-                       goto abortit;
+       else {
+               error = ufs_parentcheck(fromparent_vnode, toparent_vnode,
+                           from_name->cn_cred,
+                           &foundfromparent, &illegal_vnode);
+               if (error) {
+                       goto fail_nolocks;
+               }
+               if (foundfromparent) {
+                       vn_lock(fromparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+                       error = relookup(fromparent_vnode, &fromchild_vnode,
+                                        from_name);
+                       if (error) {
+                               VOP_UNLOCK(fromparent_vnode, 0);
+                               goto fail_nolocks;
+                       }
+                       if (fromchild_vnode == toparent_vnode) {
+                               VOP_UNLOCK(fromparent_vnode, 0);
+                               VOP_UNLOCK(fromchild_vnode, 0);
+                               error = EINVAL;
+                               goto fail_nolocks;
+                       }
+                       vn_lock(toparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+                       /* XXX see note above */
+                       if (fromchild_vnode == ap->a_tvp) {
+                               VOP_UNLOCK(fromchild_vnode, 0);
+                               do_race_condition = 1;
+                       }
+                       error = relookup(toparent_vnode, &tochild_vnode,
+                                        to_name);
+                       if (error && error != ENOENT) {
+                               VOP_UNLOCK(toparent_vnode, 0);
+                               VOP_UNLOCK(fromchild_vnode, 0);
+                               VOP_UNLOCK(fromparent_vnode, 0);
+                               goto fail_nolocks;
+                       }
+                       if (error == ENOENT) {
+                               tochild_vnode = NULL;
+                       }
+                       if (do_race_condition) {
+                               if (tochild_vnode != fromchild_vnode) {
+                                       vn_lock(fromchild_vnode,
+                                               LK_EXCLUSIVE | LK_RETRY);
+                               }
+                       }
+               }
+               else {
+                       vn_lock(toparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+                       error = relookup(toparent_vnode, &tochild_vnode,
+                                        to_name);
+                       if (error && error != ENOENT) {
+                               VOP_UNLOCK(toparent_vnode, 0);
+                               goto fail_nolocks;
+                       }
+                       if (error == ENOENT) {
+                               tochild_vnode = NULL;
+                       }
+                       if (fromparent_vnode == tochild_vnode) {
+                               VOP_UNLOCK(tochild_vnode, 0);
+                               VOP_UNLOCK(toparent_vnode, 0);
+                               error = ENOTEMPTY;
+                               goto fail_nolocks;
+                       }
+                       vn_lock(fromparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+                       /* XXX see note above */
+                       if (tochild_vnode && tochild_vnode == ap->a_fvp) {
+                               VOP_UNLOCK(tochild_vnode, 0);
+                               do_race_condition = 1;
+                       }
+                       error = relookup(fromparent_vnode, &fromchild_vnode,
+                                        from_name);
+                       if (error) {
+                               VOP_UNLOCK(fromparent_vnode, 0);
+                               VOP_UNLOCK(tochild_vnode, 0);
+                               VOP_UNLOCK(toparent_vnode, 0);
+                               goto fail_nolocks;
+                       }
+                       if (do_race_condition) {
+                               if (fromchild_vnode != tochild_vnode) {
+                                       vn_lock(tochild_vnode,
+                                               LK_EXCLUSIVE | LK_RETRY);
+                               }
+                       }
                }
+       }
 
-               /* Release destination completely. */
-               VOP_ABORTOP(tdvp, tcnp);
-               vput(tdvp);
-               vput(tvp);
+       /*
+        * Note: after relookup to get fromchild_vnode and
+        * tochild_vnode, we have extra references to (what may or may
+        * not be the same) vnodes in ap->a_fvp and ap->a_tvp. These
+        * are cleaned up at function exit below.
+        */
 
-               /* Delete source. */
-               vrele(fvp);
-               fcnp->cn_flags &= ~(MODMASK | SAVESTART);
-               fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
-               fcnp->cn_nameiop = DELETE;
-               vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
-               if ((error = relookup(fdvp, &fvp, fcnp))) {
-                       vput(fdvp);
-                       return (error);
-               }
-               return (VOP_REMOVE(fdvp, fvp, fcnp));
+       /* Now that we have fromchild, make sure it passes the parent check */
+       if (fromchild_vnode == illegal_vnode) {
+               error = EINVAL;
+               goto fail_withlocks;
        }
-       if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
-               goto abortit;
-       dp = VTOI(fdvp);
-       ip = VTOI(fvp);
-       if ((nlink_t) ip->i_nlink >= LINK_MAX) {
-               VOP_UNLOCK(fvp, 0);
-               error = EMLINK;
-               goto abortit;
+       if (illegal_vnode) 
+               vrele(illegal_vnode);
+       illegal_vnode = NULL;
+
+       /*
+        * All four vnodes are now locked, and we can proceed to do
+        * the real work.
+        */
+
+       KASSERT(VOP_ISLOCKED(fromparent_vnode));
+       KASSERT(VOP_ISLOCKED(fromchild_vnode));
+       KASSERT(VOP_ISLOCKED(toparent_vnode));
+       KASSERT(tochild_vnode == NULL || VOP_ISLOCKED(tochild_vnode));
+
+       /* Get the rest of the inodes. */
+       fromchild_i = VTOI(fromchild_vnode);
+       tochild_i = tochild_vnode ? VTOI(tochild_vnode) : NULL;
+
+       KASSERT(fromparent_i != fromchild_i);
+       KASSERT(tochild_i == NULL || toparent_i != tochild_i);
+
+       /*
+        * Part 3. More checks.
+        */
+
+       /* More cross-directory cases. */
+       if (fromparent_vnode->v_mount != fromchild_vnode->v_mount) {
+               error = EXDEV;
+               goto fail_withlocks;
+       }
+       if (tochild_vnode != NULL &&
+           toparent_vnode->v_mount != tochild_vnode->v_mount) {
+               error = EXDEV;
+               goto fail_withlocks;
        }
-       if ((ip->i_flags & (IMMUTABLE | APPEND)) ||
-               (dp->i_flags & APPEND)) {
-               VOP_UNLOCK(fvp, 0);
+
+       /* Check flags. */
+       if ((fromparent_i->i_flags & APPEND) != 0) {
                error = EPERM;
-               goto abortit;
+               goto fail_withlocks;
        }
-       if ((ip->i_mode & IFMT) == IFDIR) {
-               /*
-                * Avoid ".", "..", and aliases of "." for obvious reasons.
-                */
-               if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
-                   dp == ip ||
-                   (fcnp->cn_flags & ISDOTDOT) ||
-                   (tcnp->cn_flags & ISDOTDOT) ||
-                   (ip->i_flag & IN_RENAME)) {
-                       VOP_UNLOCK(fvp, 0);
-                       error = EINVAL;
-                       goto abortit;
-               }
-               ip->i_flag |= IN_RENAME;
-               oldparent = dp->i_number;
-               doingdirectory = 1;
+       if ((fromchild_i->i_flags & (IMMUTABLE | APPEND)) != 0) {
+               error = EPERM;
+               goto fail_withlocks;
+       }
+       if ((toparent_i->i_flags & APPEND) != 0) {
+               error = EPERM;
+               goto fail_withlocks;
+       }
+       if (tochild_vnode != NULL &&
+           (tochild_i->i_flags & (IMMUTABLE | APPEND)) != 0) {
+               error = EPERM;
+               goto fail_withlocks;
+       }
+
+       /* Moving a directory over itself is not allowed. */
+       if (fromchild_vnode == tochild_vnode &&
+           fromchild_vnode->v_type == VDIR) {
+               error = EINVAL;
+               goto fail_withlocks;
+       }
+
+       /* Make sure bumping the target's link count won't overflow. */
+       if ((nlink_t) fromchild_i->i_nlink >= LINK_MAX) {
+               error = EMLINK;
+               goto fail_withlocks;
+       }
+
+       /* This flag is now useless and should be removed. */
+       if (fromchild_i->i_flag & IN_RENAME) {
+               error = EINVAL;
+               goto fail_withlocks;
        }
-       VN_KNOTE(fdvp, NOTE_WRITE);             /* XXXLUKEM/XXX: right place? */
 
        /*
-        * When the target exists, both the directory
-        * and target vnodes are returned locked.
+        * Part 4. Rename logic.
         */
-       dp = VTOI(tdvp);
-       xp = NULL;
-       if (tvp)
-               xp = VTOI(tvp);
 
-       mp = fdvp->v_mount;
+       /* If moving a file over itself, just unlink the from end. */
+       if (fromchild_vnode == tochild_vnode) {
+               /* This case was ruled out above. */
+               KASSERT(fromchild_vnode->v_type != VDIR);
+
+               /* Release to-side completely. */
+               VOP_ABORTOP(toparent_vnode, to_name);
+               if (toparent_vnode != fromparent_vnode) {
+                       VOP_UNLOCK(toparent_vnode, 0);
+               }
+               vrele(toparent_vnode);
+               vrele(tochild_vnode);
+
+               /* Clean up a_fvp/a_tvp. */
+               vrele(ap->a_fvp);
+               if (ap->a_tvp != NULL) {
+                       vrele(ap->a_tvp);
+               }
+
+               /* Now unlink the from-side. */
+               from_name->cn_flags &= ~(MODMASK | SAVESTART);
+               from_name->cn_flags |= LOCKPARENT | LOCKLEAF;
+               from_name->cn_nameiop = DELETE;
+               return VOP_REMOVE(fromparent_vnode, fromchild_vnode, from_name);
+       }
+
+       oldparent = fromparent_i->i_number;
+       newparent = toparent_i->i_number;
+       doingdirectory = (fromchild_i->i_mode & IFMT) == IFDIR;
+
+       /* XXX update the logic below to not test "newparent!=0" */
+       if (oldparent == newparent) {
+               newparent = 0;
+       }
+
+       /* Check permissions. */
+       error = VOP_ACCESS(fromchild_vnode, VWRITE, to_name->cn_cred);
+       if (error) {
+               goto fail_withlocks;
+       }
+
+       noabort = 1;
+
+       if (doingdirectory) {
+               fromchild_i->i_flag |= IN_RENAME;
+       }
+
+       /* XXXLUKEM/XXX: right place? */
+       VN_KNOTE(fromparent_vnode, NOTE_WRITE);
+
+       mp = fromparent_vnode->v_mount;
        fstrans_start(mp, FSTRANS_SHARED);
 
        /*
@@ -1102,62 +1465,27 @@ ufs_rename(void *v)
         *    completing our work, the link count
         *    may be wrong, but correctable.
         */
-       ip->i_ffs_effnlink++;
-       ip->i_nlink++;
-       DIP_ASSIGN(ip, nlink, ip->i_nlink);
-       ip->i_flag |= IN_CHANGE;
-       if (DOINGSOFTDEP(fvp))
-               softdep_change_linkcnt(ip);
-       if ((error = UFS_UPDATE(fvp, NULL, NULL, UPDATE_DIROP)) != 0) {
-               VOP_UNLOCK(fvp, 0);
-               goto bad;
+       fromchild_i->i_ffs_effnlink++;
+       fromchild_i->i_nlink++;
+       DIP_ASSIGN(fromchild_i, nlink, fromchild_i->i_nlink);
+       fromchild_i->i_flag |= IN_CHANGE;
+       if (DOINGSOFTDEP(fromchild_vnode))
+               softdep_change_linkcnt(fromchild_i);
+       error = UFS_UPDATE(fromchild_vnode, NULL, NULL, UPDATE_DIROP);
+       if (error) {
+               goto fail_withfstrans;
        }
 
        /*
-        * If ".." must be changed (ie the directory gets a new
-        * parent) then the source directory must not be in the
-        * directory hierarchy above the target, as this would
-        * orphan everything below the source directory. Also
-        * the user must have write permission in the source so
-        * as to be able to change "..". We must repeat the call
-        * to namei, as the parent directory is unlocked by the
-        * call to checkpath().
-        */
-       error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred);
-       VOP_UNLOCK(fvp, 0);
-       if (oldparent != dp->i_number)
-               newparent = dp->i_number;
-       if (doingdirectory && newparent) {
-               if (error)      /* write access check above */
-                       goto bad;
-               if (xp != NULL)
-                       vput(tvp);
-               vref(tdvp);     /* compensate for the ref checkpath loses */
-               if ((error = ufs_checkpath(ip, dp, tcnp->cn_cred)) != 0) {
-                       vrele(tdvp);
-                       goto out;
-               }
-               tcnp->cn_flags &= ~SAVESTART;
-               vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
-               error = relookup(tdvp, &tvp, tcnp);
-               if (error != 0) {
-                       vput(tdvp);
-                       goto out;
-               }
-               dp = VTOI(tdvp);
-               xp = NULL;
-               if (tvp)
-                       xp = VTOI(tvp);
-       }
-       /*
         * 2) If target doesn't exist, link the target
         *    to the source and unlink the source.
         *    Otherwise, rewrite the target directory
         *    entry to reference the source inode and
         *    expunge the original entry's existence.
         */
-       if (xp == NULL) {
-               if (dp->i_dev != ip->i_dev)
+
+       if (tochild_i == NULL) {
+               if (toparent_i->i_dev != fromchild_i->i_dev)
                        panic("rename: EXDEV");
                /*
                 * Account for ".." in new directory.
@@ -1165,53 +1493,53 @@ ufs_rename(void *v)
                 * parent we don't fool with the link count.
                 */
                if (doingdirectory && newparent) {
-                       if ((nlink_t)dp->i_nlink >= LINK_MAX) {
+                       if ((nlink_t)toparent_i->i_nlink >= LINK_MAX) {
                                error = EMLINK;
-                               goto bad;
+                               goto fail_withlinkcount;
                        }
-                       dp->i_ffs_effnlink++;
-                       dp->i_nlink++;
-                       DIP_ASSIGN(dp, nlink, dp->i_nlink);
-                       dp->i_flag |= IN_CHANGE;
-                       if (DOINGSOFTDEP(tdvp))
-                               softdep_change_linkcnt(dp);
-                       if ((error = UFS_UPDATE(tdvp, NULL, NULL,
+                       toparent_i->i_ffs_effnlink++;
+                       toparent_i->i_nlink++;
+                       DIP_ASSIGN(toparent_i, nlink, toparent_i->i_nlink);
+                       toparent_i->i_flag |= IN_CHANGE;
+                       if (DOINGSOFTDEP(toparent_vnode))
+                               softdep_change_linkcnt(toparent_i);
+                       if ((error = UFS_UPDATE(toparent_vnode, NULL, NULL,
                            UPDATE_DIROP)) != 0) {
-                               dp->i_ffs_effnlink--;
-                               dp->i_nlink--;
-                               DIP_ASSIGN(dp, nlink, dp->i_nlink);
-                               dp->i_flag |= IN_CHANGE;
-                               if (DOINGSOFTDEP(tdvp))
-                                       softdep_change_linkcnt(dp);
-                               goto bad;
+                               toparent_i->i_ffs_effnlink--;
+                               toparent_i->i_nlink--;
+                               DIP_ASSIGN(toparent_i, nlink, 
toparent_i->i_nlink);
+                               toparent_i->i_flag |= IN_CHANGE;
+                               if (DOINGSOFTDEP(toparent_vnode))
+                                       softdep_change_linkcnt(toparent_i);
+                               goto fail_withlinkcount;
                        }
                }
                newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
-               ufs_makedirentry(ip, tcnp, newdir);
-               error = ufs_direnter(tdvp, NULL, newdir, tcnp, NULL);
+               ufs_makedirentry(fromchild_i, to_name, newdir);
+               error = ufs_direnter(toparent_vnode, NULL, newdir, to_name, 
NULL);
                pool_cache_put(ufs_direct_cache, newdir);
                if (error != 0) {
                        if (doingdirectory && newparent) {
-                               dp->i_ffs_effnlink--;
-                               dp->i_nlink--;
-                               DIP_ASSIGN(dp, nlink, dp->i_nlink);
-                               dp->i_flag |= IN_CHANGE;
-                               if (DOINGSOFTDEP(tdvp))
-                                       softdep_change_linkcnt(dp);
-                               (void)UFS_UPDATE(tdvp, NULL, NULL,
-                                                UPDATE_WAIT|UPDATE_DIROP);
+                               toparent_i->i_ffs_effnlink--;
+                               toparent_i->i_nlink--;
+                               DIP_ASSIGN(toparent_i, nlink, 
toparent_i->i_nlink);
+                               toparent_i->i_flag |= IN_CHANGE;
+                               if (DOINGSOFTDEP(toparent_vnode))
+                                       softdep_change_linkcnt(toparent_i);
+                               (void)UFS_UPDATE(toparent_vnode, NULL, NULL,
+                                                UPDATE_WAIT | UPDATE_DIROP);
                        }
-                       goto bad;
+                       goto fail_withlinkcount;
                }
-               VN_KNOTE(tdvp, NOTE_WRITE);
-               vput(tdvp);
+               VN_KNOTE(toparent_vnode, NOTE_WRITE);
        } else {
-               if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
+               if (tochild_i->i_dev != toparent_i->i_dev ||
+                   tochild_i->i_dev != fromchild_i->i_dev)
                        panic("rename: EXDEV");
                /*
                 * Short circuit rename(foo, foo).
                 */
-               if (xp->i_number == ip->i_number)
+               if (tochild_i->i_number == fromchild_i->i_number)
                        panic("rename: same file");
                /*
                 * If the parent directory is "sticky", then the user must
@@ -1219,49 +1547,51 @@ ufs_rename(void *v)
                 * otherwise the destination may not be changed (except by
                 * root). This implements append-only directories.
                 */
-               if ((dp->i_mode & S_ISTXT) &&
-                   kauth_authorize_generic(tcnp->cn_cred,
+               if ((toparent_i->i_mode & S_ISTXT) &&
+                   kauth_authorize_generic(to_name->cn_cred,
                     KAUTH_GENERIC_ISSUSER, NULL) != 0 &&
-                   kauth_cred_geteuid(tcnp->cn_cred) != dp->i_uid &&
-                   xp->i_uid != kauth_cred_geteuid(tcnp->cn_cred)) {
+                   kauth_cred_geteuid(to_name->cn_cred) != toparent_i->i_uid &&
+                   tochild_i->i_uid != kauth_cred_geteuid(to_name->cn_cred)) {
                        error = EPERM;
-                       goto bad;
+                       goto fail_withlinkcount;
                }
                /*
                 * Target must be empty if a directory and have no links
                 * to it. Also, ensure source and target are compatible
                 * (both directories, or both not directories).
                 */
-               if ((xp->i_mode & IFMT) == IFDIR) {
-                       if (xp->i_ffs_effnlink > 2 ||
-                           !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
+               if ((tochild_i->i_mode & IFMT) == IFDIR) {
+                       if (tochild_i->i_ffs_effnlink > 2 ||
+                           !ufs_dirempty(tochild_i, toparent_i->i_number,
+                                         to_name->cn_cred)) {
                                error = ENOTEMPTY;
-                               goto bad;
+                               goto fail_withlinkcount;
                        }
                        if (!doingdirectory) {
                                error = ENOTDIR;
-                               goto bad;
+                               goto fail_withlinkcount;
                        }
-                       cache_purge(tdvp);
+                       cache_purge(toparent_vnode);
                } else if (doingdirectory) {
                        error = EISDIR;
-                       goto bad;
+                       goto fail_withlinkcount;
                }
-               if ((error = ufs_dirrewrite(dp, xp, ip->i_number,
-                   IFTODT(ip->i_mode), doingdirectory && newparent ?
+               if ((error = ufs_dirrewrite(toparent_i, tochild_i,
+                   fromchild_i->i_number,
+                   IFTODT(fromchild_i->i_mode), doingdirectory && newparent ?
                    newparent : doingdirectory, IN_CHANGE | IN_UPDATE)) != 0)
-                       goto bad;
+                       goto fail_withlinkcount;
                if (doingdirectory) {
                        if (!newparent) {
-                               dp->i_ffs_effnlink--;
-                               if (DOINGSOFTDEP(tdvp))
-                                       softdep_change_linkcnt(dp);
-                       }
-                       xp->i_ffs_effnlink--;
-                       if (DOINGSOFTDEP(tvp))
-                               softdep_change_linkcnt(xp);
+                               toparent_i->i_ffs_effnlink--;
+                               if (DOINGSOFTDEP(toparent_vnode))
+                                       softdep_change_linkcnt(toparent_i);
+                       }
+                       tochild_i->i_ffs_effnlink--;
+                       if (DOINGSOFTDEP(tochild_vnode))
+                               softdep_change_linkcnt(tochild_i);
                }
-               if (doingdirectory && !DOINGSOFTDEP(tvp)) {
+               if (doingdirectory && !DOINGSOFTDEP(tochild_vnode)) {
                        /*
                         * Truncate inode. The only stuff left in the directory
                         * is "." and "..". The "." reference is inconsequential
@@ -1274,48 +1604,24 @@ ufs_rename(void *v)
                         * them now.
                         */
                        if (!newparent) {
-                               dp->i_nlink--;
-                               DIP_ASSIGN(dp, nlink, dp->i_nlink);
-                               dp->i_flag |= IN_CHANGE;
-                       }
-                       xp->i_nlink--;
-                       DIP_ASSIGN(xp, nlink, xp->i_nlink);
-                       xp->i_flag |= IN_CHANGE;
-                       if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC,
-                           tcnp->cn_cred)))
-                               goto bad;
-               }
-               VN_KNOTE(tdvp, NOTE_WRITE);
-               vput(tdvp);
-               VN_KNOTE(tvp, NOTE_DELETE);
-               vput(tvp);
-               xp = NULL;
+                               toparent_i->i_nlink--;
+                               DIP_ASSIGN(toparent_i, nlink, 
toparent_i->i_nlink);
+                               toparent_i->i_flag |= IN_CHANGE;
+                       }
+                       tochild_i->i_nlink--;
+                       DIP_ASSIGN(tochild_i, nlink, tochild_i->i_nlink);
+                       tochild_i->i_flag |= IN_CHANGE;
+                       if ((error = UFS_TRUNCATE(tochild_vnode, (off_t)0,
+                           IO_SYNC, to_name->cn_cred)))
+                               goto fail_withlinkcount;
+               }
+               VN_KNOTE(toparent_vnode, NOTE_WRITE);
+               VN_KNOTE(tochild_vnode, NOTE_DELETE);
        }
 
        /*
         * 3) Unlink the source.
         */
-       fcnp->cn_flags &= ~(MODMASK | SAVESTART);
-       fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
-       vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
-       if ((error = relookup(fdvp, &fvp, fcnp))) {
-               vput(fdvp);
-               vrele(ap->a_fvp);
-               goto out2;
-       }
-       if (fvp != NULL) {
-               xp = VTOI(fvp);
-               dp = VTOI(fdvp);
-       } else {
-               /*
-                * From name has disappeared.
-                */
-               if (doingdirectory)
-                       panic("rename: lost dir entry");
-               vrele(ap->a_fvp);
-               error = 0;
-               goto out2;
-       }
        /*
         * Ensure that the directory entry still exists and has not
         * changed while the new name has been entered. If the source is
@@ -1325,58 +1631,89 @@ ufs_rename(void *v)
         * flag ensures that it cannot be moved by another rename or removed
         * by a rmdir.
         */
-       if (xp != ip) {
-               if (doingdirectory)
-                       panic("rename: lost dir entry");
-       } else {
-               /*
-                * If the source is a directory with a
-                * new parent, the link count of the old
-                * parent directory must be decremented
-                * and ".." set to point to the new parent.
-                */
-               if (doingdirectory && newparent) {
-                       KASSERT(dp != NULL);
-                       xp->i_offset = mastertemplate.dot_reclen;
-                       ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0, IN_CHANGE);
-                       cache_purge(fdvp);
-               }
-               error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
-               xp->i_flag &= ~IN_RENAME;
-       }
-       VN_KNOTE(fvp, NOTE_RENAME);
-       if (dp)
-               vput(fdvp);
-       if (xp)
-               vput(fvp);
-       vrele(ap->a_fvp);
-       goto out2;
 
-       /* exit routines from steps 1 & 2 */
- bad:
-       if (xp)
-               vput(ITOV(xp));
-       vput(ITOV(dp));
- out:
-       if (doingdirectory)
-               ip->i_flag &= ~IN_RENAME;
-       if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
-               ip->i_ffs_effnlink--;
-               ip->i_nlink--;
-               DIP_ASSIGN(ip, nlink, ip->i_nlink);
-               ip->i_flag |= IN_CHANGE;
-               ip->i_flag &= ~IN_RENAME;
-               if (DOINGSOFTDEP(fvp))
-                       softdep_change_linkcnt(ip);
-               vput(fvp);
-       } else
-               vrele(fvp);
-       vrele(fdvp);
+       /*
+        * If the source is a directory with a
+        * new parent, the link count of the old
+        * parent directory must be decremented
+        * and ".." set to point to the new parent.
+        */
+       if (doingdirectory && newparent) {
+               KASSERT(toparent_i != NULL);
+               fromchild_i->i_offset = mastertemplate.dot_reclen;
+               ufs_dirrewrite(fromchild_i, toparent_i,
+                   newparent, DT_DIR, 0, IN_CHANGE);
+               cache_purge(fromparent_vnode);
+       }
+       error = ufs_dirremove(fromparent_vnode, fromchild_i,
+           from_name->cn_flags, 0);
+       VN_KNOTE(fromchild_vnode, NOTE_RENAME);
+
+       if (error) {
+               goto fail_withfstrans;
+       }
+       goto succeed;
 
-       /* exit routines from step 3 */
- out2:
+fail_withlinkcount:
+       fromchild_i->i_nlink--;
+       fromchild_i->i_ffs_effnlink--;
+       DIP_ASSIGN(fromchild_i, nlink, fromchild_i->i_nlink);
+       fromchild_i->i_flag |= IN_CHANGE;
+       if (DOINGSOFTDEP(fromchild_vnode))
+               softdep_change_linkcnt(fromchild_i);
+
+fail_withfstrans:
+succeed:
        fstrans_done(mp);
-       return (error);
+       fromchild_i->i_flag &= ~IN_RENAME;
+
+fail_withlocks:
+       /* See comment up top about which of these can legally be the same. */
+       if (fromparent_vnode == toparent_vnode) {
+               VOP_UNLOCK(fromparent_vnode, 0);
+       } else {
+               VOP_UNLOCK(fromparent_vnode, 0);
+               VOP_UNLOCK(toparent_vnode, 0);
+       }
+       if (fromchild_vnode == tochild_vnode) {
+               VOP_UNLOCK(fromchild_vnode, 0);
+       } else {
+               VOP_UNLOCK(fromchild_vnode, 0);
+               if (tochild_vnode) {
+                       VOP_UNLOCK(tochild_vnode, 0);
+               }
+       }
+
+fail_nolocks:
+       if (fromchild_vnode != NULL) {
+               vrele(fromchild_vnode);
+               fromchild_vnode = NULL;
+       }
+       if (tochild_vnode != NULL) {
+               vrele(tochild_vnode);
+               tochild_vnode = NULL;
+       }
+       if (illegal_vnode) {
+               vrele(illegal_vnode);
+               illegal_vnode = NULL;
+       }
+fail_early:
+       if (!noabort) {
+               /* XXX, why not in NFS? */
+               VOP_ABORTOP(fromparent_vnode, from_name);
+               VOP_ABORTOP(toparent_vnode, to_name);
+       }
+       vrele(fromparent_vnode);
+       vrele(toparent_vnode);
+
+       KASSERT(fromchild_vnode == NULL);
+       KASSERT(tochild_vnode == NULL);
+
+       vrele(ap->a_fvp);
+       if (ap->a_tvp != NULL)
+               vrele(ap->a_tvp);
+
+       return error;
 }
 
 int
Index: ufs_wapbl.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ufs/ufs_wapbl.c,v
retrieving revision 1.2.8.1
diff -u -p -u -r1.2.8.1 ufs_wapbl.c
--- ufs_wapbl.c 14 Dec 2008 11:56:04 -0000      1.2.8.1
+++ ufs_wapbl.c 17 Dec 2009 21:15:23 -0000
@@ -158,12 +158,27 @@ wapbl_ufs_rename(void *v)
                struct vnode            *a_tvp;
                struct componentname    *a_tcnp;
        } */ *ap = v;
-       struct vnode            *tvp, *tdvp, *fvp, *fdvp;
-       struct componentname    *tcnp, *fcnp;
-       struct inode            *ip, *txp, *fxp, *tdp, *fdp;
+       struct vnode            *fromparent_vnode;
+       struct inode            *fromparent_i;
+       struct componentname    *from_name;
+       struct vnode            *fromchild_vnode;
+       struct inode            *fromchild_i;
+
+       struct vnode            *toparent_vnode;
+       struct inode            *toparent_i;
+       struct componentname    *to_name;
+       struct vnode            *tochild_vnode;
+       struct inode            *tochild_i;
+
+       struct vnode            *illegal_vnode;
        struct mount            *mp;
        struct direct           *newdir;
-       int                     doingdirectory, oldparent, newparent, error;
+       int                     error;
+       int                     oldparent, newparent;
+       int                     doingdirectory;
+       int                     foundfromparent;
+       int                     do_race_condition;
+       int                     noabort = 0;
 
        int32_t   saved_f_count;
        doff_t    saved_f_diroff;
@@ -175,224 +190,494 @@ wapbl_ufs_rename(void *v)
        doff_t    saved_t_offset;
        u_int32_t saved_t_reclen;
 
-       tvp = ap->a_tvp;
-       tdvp = ap->a_tdvp;
-       fvp = ap->a_fvp;
-       fdvp = ap->a_fdvp;
-       tcnp = ap->a_tcnp;
-       fcnp = ap->a_fcnp;
-       doingdirectory = oldparent = newparent = error = 0;
+       /*
+        * Due to oddities in namei and in the fs-independent code,
+        * currently we are called with references to all four vnodes,
+        * but with only toparent_vnode locked.
+        * In a sane world we'd be passed just the two parent vnodes,
+        * both unlocked, and two (string) names.
+        *
+        * For now we will begin by setting up to mimic the sane world.
+        */
+       fromparent_vnode = ap->a_fdvp;
+       fromparent_i = VTOI(fromparent_vnode);
+       from_name = ap->a_fcnp;
+       fromchild_vnode = NULL;
+
+       toparent_vnode = ap->a_tdvp;
+       toparent_i = VTOI(toparent_vnode);
+
+       to_name = ap->a_tcnp;
+       tochild_vnode = NULL;
+
+       VOP_UNLOCK(toparent_vnode, 0);
+       if (ap->a_tvp)
+               VOP_UNLOCK(ap->a_tvp, 0);
+
+       illegal_vnode = NULL;
+       do_race_condition = 0;
+
+       /* Because gcc 4.1 is not being real smart, zero all this */
+       saved_f_count = 0;
+       saved_f_diroff = 0;
+       saved_f_offset = 0;
+       saved_f_reclen = 0;
+       saved_t_count = 0;
+       saved_t_endoff = 0;
+       saved_t_diroff = 0;
+       saved_t_offset = 0;
+       saved_t_reclen = 0;
+
+       /*
+        * Part 1: check for broken args and illegal operations.
+        */
 
 #ifdef DIAGNOSTIC
-       if ((tcnp->cn_flags & HASBUF) == 0 ||
-           (fcnp->cn_flags & HASBUF) == 0)
+       /* We expect SAVESTART to have been set. */
+       if ((to_name->cn_flags & HASBUF) == 0 ||
+           (from_name->cn_flags & HASBUF) == 0)
                panic("ufs_rename: no name");
 #endif
+       /* Cross-device rename is prohibited. */
+       if (fromparent_vnode->v_mount != toparent_vnode->v_mount) {
+               error = EXDEV;
+               goto fail_early;
+       }
+
+       /* If either parent isn't a directory, just give up now. */
+       if (fromparent_vnode->v_type != VDIR ||
+           toparent_vnode->v_type != VDIR) {
+               error = ENOTDIR;
+               goto fail_early;
+       }
+
+       /* The "." and ".." entries may not be renamed. */
+       if ((from_name->cn_namelen == 1 && from_name->cn_nameptr[0] == '.') ||
+           (from_name->cn_flags & ISDOTDOT) ||
+           (to_name->cn_flags & ISDOTDOT)) {
+               error = EINVAL;
+               goto fail_early;
+       }
+
+       /*
+        * Part 2: get locks.
+        *
+        * We lock parent vnodes before child vnodes. This means in
+        * particular that if A is above B in the directory tree then
+        * A must be locked before B. (This is true regardless of how
+        * many steps appear in between, because an arbitrary number
+        * of other processes could lock parent/child in between and
+        * establish a lock cycle and deadlock.)
+        *
+        * Therefore, if TOPARENT is above FROMPARENT we must lock
+        * TOPARENT first; if FROMPARENT is above TOPARENT we must
+        * lock FROMPARENT first; and if they're incommensurate it
+        * doesn't matter. (But, we rely on the fact that there's a
+        * whole-volume rename lock to prevent deadlock among groups
+        * of renames upon overlapping sets of incommensurate vnodes.)
+        *
+        * In addition to establishing lock ordering the parent check
+        * also serves to rule out cases where someone tries to move a
+        * directory underneath itself, e.g. rename("a/b", "a/b/c").
+        * If allowed to proceed such renames would detach portions of
+        * the directory tree and make fsck very unhappy.
+        *
+        * Note that it is an error for *FROMCHILD* to be above
+        * TOPARENT; however, *FROMPARENT* can be above TOPARENT, as
+        * in rename("a/b", "a/c/d").
+        *
+        * The parent check searches up the tree from TOPARENT until
+        * it either finds FROMPARENT or the root of the volume. It
+        * also returns the vnode it saw immediately before TOPARENT,
+        * if any. Later on (after looking up FROMCHILD) we will check
+        * to see if this *is* FROMCHILD and if so fail.
+        *
+        * If the parent check finds FROMPARENT, it means FROMPARENT
+        * is above TOPARENT, so we lock FROMPARENT first and then
+        * TOPARENT. Otherwise, either TOPARENT is above FROMPARENT or
+        * they're incommensurate and we lock TOPARENT first.
+        *
+        * In either case each of the child vnodes has to be looked up
+        * and locked immediately after its parent. The two cases
+        *
+        *       FROMPARENT/FROMCHILD/TOPARENT/TOCHILD
+        *       TOPARENT/TOCHILD/FROMPARENT/FROMCHILD
+        *
+        * can cause deadlock otherwise. (Note that both of these are
+        * error cases; the first fails the parent check and the
+        * second fails because TOCHILD isn't empty. The parent check
+        * case can be handled without attempting to lock FROMCHILD,
+        * (XXX or at least it could be in a saner world of namei),
+        * but the nonempty case requires locking TOCHILD to test.
+        * Therefore the procedure is either
+        *
+        *   lock FROMPARENT
+        *   lookup FROMCHILD
+        *   lock FROMCHILD
+        *   lock TOPARENT
+        *   lookup TOCHILD
+        *   lock TOCHILD
+        *
+        * or
+        *
+        *   lock TOPARENT
+        *   lookup TOCHILD
+        *   lock TOCHILD
+        *   lock FROMPARENT
+        *   lookup FROMCHILD
+        *   lock FROMCHILD
+        * In a saner namei world we could simplify this (some) by always
+        * handling FROMCHILD last, but that isn't currently possible.
+        *
+        * Note that for now we aren't doing lookup so much as relookup()
+        * and checking what we find against what was passed down from the
+        * fs-independent code.
+        * On top of all the above, just to make everything more
+        * exciting, any two of the vnodes might end up being the same.
+        *
+        * FROMPARENT == FROMCHILD      mv a/. foo      is an error.
+        * FROMPARENT == TOPARENT       mv a/b a/c      is ok.
+        * FROMPARENT == TOCHILD        mv a/b/c a/b    will give ENOTEMPTY.
+        * FROMCHILD == TOPARENT        mv a/b a/b/c    fails the parent check.
+        * FROMCHILD == TOCHILD         mv a/b a/b      is ok.
+        * TOPARENT == TOCHILD          mv foo a/.      is an error.
+        * This introduces more cases in the locking, because each
+        * distinct vnode must be locked exactly once.
+        *
+        * When FROMPARENT == TOPARENT and FROMCHILD != TOCHILD we
+        * assume it doesn't matter what order the children are locked
+        * in, because the per-volume rename lock excludes other
+        * renames and no other operation locks two files in the same
+        * directory at once. (Note: if it turns out that link() does,
+        * link() is wrong.)
+        */
+
+       /*
+        * XXX until such time as we can do lookups without the namei
+        * and lookup machinery "helpfully" locking the result vnode
+        * for us, we can't avoid tripping on cases where FROMCHILD ==
+        * TOCHILD. The right way to do this is to check after looking
+        * the second one up and only lock it if it's different. Easy,
+        * but requires having the right underlying abstractions.
+        *
+        * Instead, we'll have to check the child vnode passed down in
+        * the argument block. For the branch where we look up
+        * FROMCHILD first we do this:
+        *
+        *     - check if FROMCHILD is the same as the TOCHILD passed
+        *       in (ap->a_tvp)
+        *     - if so, unlock FROMCHILD before looking up TOCHILD
+        *       and set do_race_condition
+        *     - afterward if they aren't the same lock FROMCHILD again.
+        *
+        * Eww.
+        *
+        * On the plus side, this method at least shouldn't be able to
+        * deadlock when relocking FROMCHILD.
+        *
+        * Needless to say this is a race condition and should be
+        * fixed up once namei has been rendered sufficiently sane.
+        */
+       // XXX do we need this?
+       from_name->cn_flags &= ~SAVESTART;
+       to_name->cn_flags &= ~SAVESTART;
+       // XXX or this form instead?
+       //from_name->cn_flags &= ~(MODMASK | SAVESTART);
+       //from_name->cn_flags |= LOCKPARENT | LOCKLEAF;
+
+       if (fromparent_vnode == toparent_vnode) {
+               vn_lock(fromparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+               error = relookup(fromparent_vnode, &fromchild_vnode,
+                                from_name);
+               if (error) {
+                       VOP_UNLOCK(fromparent_vnode, 0);
+                       goto fail_nolocks;
+               }
+
+               /* 
+                * save directory lookup information
+                */
+               saved_f_count  = fromparent_i->i_count;
+               saved_f_diroff = fromparent_i->i_diroff;
+               saved_f_offset = fromparent_i->i_offset;
+               saved_f_reclen = fromparent_i->i_reclen;
+
+               /* XXX see note above */
+               if (fromchild_vnode == ap->a_tvp) {
+                       VOP_UNLOCK(fromchild_vnode, 0);
+                       do_race_condition = 1;
+               }
+               error = relookup(toparent_vnode, &tochild_vnode,
+                                to_name);
+               if (error && error != ENOENT) {
+                       /*VOP_UNLOCK(toparent_vnode); -- same as fromparent */
+                       VOP_UNLOCK(fromchild_vnode, 0);
+                       VOP_UNLOCK(fromparent_vnode, 0);
+                       goto fail_nolocks;
+               }
+               if (error == ENOENT) {
+                       /*
+                        * Note: currently in this case relookup() succeeds
+                        * and sets tochild_vnode = NULL, but this piece of
+                        * logic will be wanted in the (saner) future.
+                        * (XXX: remove this comment when appropriate)
+                        */
+                       tochild_vnode = NULL;
+               }
+               if (do_race_condition) {
+                       if (tochild_vnode != fromchild_vnode) {
+                               vn_lock(fromchild_vnode,
+                                       LK_EXCLUSIVE | LK_RETRY);
+                       }
+               }
+               /* 
+                * save directory lookup information
+                */
+               saved_t_count  = toparent_i->i_count;
+               saved_t_endoff = toparent_i->i_endoff;
+               saved_t_diroff = toparent_i->i_diroff;
+               saved_t_offset = toparent_i->i_offset;
+               saved_t_reclen = toparent_i->i_reclen;
+       }
+       else {
+               error = ufs_parentcheck(fromparent_vnode, toparent_vnode,
+                           from_name->cn_cred,
+                           &foundfromparent, &illegal_vnode);
+               if (error) {
+                       goto fail_nolocks;
+               }
+               if (foundfromparent) {
+                       vn_lock(fromparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+                       error = relookup(fromparent_vnode, &fromchild_vnode,
+                                        from_name);
+                       if (error) {
+                               VOP_UNLOCK(fromparent_vnode, 0);
+                               goto fail_nolocks;
+                       }
+                       if (fromchild_vnode == toparent_vnode) {
+                               VOP_UNLOCK(fromparent_vnode, 0);
+                               VOP_UNLOCK(fromchild_vnode, 0);
+                               error = EINVAL;
+                               goto fail_nolocks;
+                       }
+                       vn_lock(toparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+                       /* XXX see note above */
+                       if (fromchild_vnode == ap->a_tvp) {
+                               VOP_UNLOCK(fromchild_vnode, 0);
+                               do_race_condition = 1;
+                       }
+                       error = relookup(toparent_vnode, &tochild_vnode,
+                                        to_name);
+                       if (error && error != ENOENT) {
+                               VOP_UNLOCK(toparent_vnode, 0);
+                               VOP_UNLOCK(fromchild_vnode, 0);
+                               VOP_UNLOCK(fromparent_vnode, 0);
+                               goto fail_nolocks;
+                       }
+                       if (error == ENOENT) {
+                               tochild_vnode = NULL;
+                       }
+                       if (do_race_condition) {
+                               if (tochild_vnode != fromchild_vnode) {
+                                       vn_lock(fromchild_vnode,
+                                               LK_EXCLUSIVE | LK_RETRY);
+                               }
+                       }
+               }
+               else {
+                       vn_lock(toparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+                       error = relookup(toparent_vnode, &tochild_vnode,
+                                        to_name);
+                       if (error && error != ENOENT) {
+                               VOP_UNLOCK(toparent_vnode, 0);
+                               goto fail_nolocks;
+                       }
+                       if (error == ENOENT) {
+                               tochild_vnode = NULL;
+                       }
+                       if (fromparent_vnode == tochild_vnode) {
+                               VOP_UNLOCK(tochild_vnode, 0);
+                               VOP_UNLOCK(toparent_vnode, 0);
+                               error = ENOTEMPTY;
+                               goto fail_nolocks;
+                       }
+                       vn_lock(fromparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+                       /* XXX see note above */
+                       if (tochild_vnode && tochild_vnode == ap->a_fvp) {
+                               VOP_UNLOCK(tochild_vnode, 0);
+                               do_race_condition = 1;
+                       }
+                       error = relookup(fromparent_vnode, &fromchild_vnode,
+                                        from_name);
+                       if (error) {
+                               VOP_UNLOCK(fromparent_vnode, 0);
+                               VOP_UNLOCK(tochild_vnode, 0);
+                               VOP_UNLOCK(toparent_vnode, 0);
+                               goto fail_nolocks;
+                       }
+                       if (do_race_condition) {
+                               if (fromchild_vnode != tochild_vnode) {
+                                       vn_lock(tochild_vnode,
+                                               LK_EXCLUSIVE | LK_RETRY);
+                               }
+                       }
+               }
+       }
        /*
-        * Check for cross-device rename.
+        * Note: after relookup to get fromchild_vnode and
+        * tochild_vnode, we have extra references to (what may or may
+        * not be the same) vnodes in ap->a_fvp and ap->a_tvp. These
+        * are cleaned up at function exit below.
         */
-       if ((fvp->v_mount != tdvp->v_mount) ||
-           (tvp && (fvp->v_mount != tvp->v_mount))) {
-               error = EXDEV;
- abortit:
-               VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
-               if (tdvp == tvp)
-                       vrele(tdvp);
-               else
-                       vput(tdvp);
-               if (tvp)
-                       vput(tvp);
-               VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
-               vrele(fdvp);
-               vrele(fvp);
-               return (error);
+
+       /* Now that we have fromchild, make sure it passes the parent check */
+       if (fromchild_vnode == illegal_vnode) {
+               error = EINVAL;
+               goto fail_withlocks;
        }
+       if (illegal_vnode)
+               vrele(illegal_vnode);
+       illegal_vnode = NULL;
 
        /*
-        * Check if just deleting a link name.
+        * All four vnodes are now locked, and we can proceed to do
+        * the real work.
         */
-       if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) ||
-           (VTOI(tdvp)->i_flags & APPEND))) {
+
+       KASSERT(VOP_ISLOCKED(fromparent_vnode));
+       KASSERT(VOP_ISLOCKED(fromchild_vnode));
+       KASSERT(VOP_ISLOCKED(toparent_vnode));
+       KASSERT(tochild_vnode == NULL || VOP_ISLOCKED(tochild_vnode));
+
+       /* Get the rest of the inodes. */
+       fromchild_i = VTOI(fromchild_vnode);
+       tochild_i = tochild_vnode ? VTOI(tochild_vnode) : NULL;
+
+       KASSERT(fromparent_i != fromchild_i);
+       KASSERT(tochild_i == NULL || toparent_i != tochild_i);
+       /*
+        * Part 3. More checks.
+        */
+
+       /* More cross-directory cases. */
+       if (fromparent_vnode->v_mount != fromchild_vnode->v_mount) {
+               error = EXDEV;
+               goto fail_withlocks;
+       }
+       if (tochild_vnode != NULL &&
+           toparent_vnode->v_mount != tochild_vnode->v_mount) {
+               error = EXDEV;
+               goto fail_withlocks;
+       }
+
+       /* Check flags. */
+       if ((fromparent_i->i_flags & APPEND) != 0) {
                error = EPERM;
-               goto abortit;
+               goto fail_withlocks;
        }
-       if (fvp == tvp) {
-               if (fvp->v_type == VDIR) {
-                       error = EINVAL;
-                       goto abortit;
-               }
-
-               /* Release destination completely. */
-               VOP_ABORTOP(tdvp, tcnp);
-               vput(tdvp);
-               vput(tvp);
-
-               /* Delete source. */
-               vrele(fvp);
-               fcnp->cn_flags &= ~(MODMASK | SAVESTART);
-               fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
-               fcnp->cn_nameiop = DELETE;
-               vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
-               if ((error = relookup(fdvp, &fvp, fcnp))) {
-                       vput(fdvp);
-                       return (error);
-               }
-               return (VOP_REMOVE(fdvp, fvp, fcnp));
-       }
-       if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
-               goto abortit;
-       fdp = VTOI(fdvp);
-       ip = VTOI(fvp);
-       if ((nlink_t) ip->i_nlink >= LINK_MAX) {
-               VOP_UNLOCK(fvp, 0);
-               error = EMLINK;
-               goto abortit;
+       if ((fromchild_i->i_flags & (IMMUTABLE | APPEND)) != 0) {
+               error = EPERM;
+               goto fail_withlocks;
        }
-       if ((ip->i_flags & (IMMUTABLE | APPEND)) ||
-               (fdp->i_flags & APPEND)) {
-               VOP_UNLOCK(fvp, 0);
+       if ((toparent_i->i_flags & APPEND) != 0) {
                error = EPERM;
-               goto abortit;
+               goto fail_withlocks;
+       }
+       if (tochild_vnode != NULL &&
+           (tochild_i->i_flags & (IMMUTABLE | APPEND)) != 0) {
+               error = EPERM;
+               goto fail_withlocks;
        }
-       if ((ip->i_mode & IFMT) == IFDIR) {
-               /*
-                * Avoid ".", "..", and aliases of "." for obvious reasons.
-                */
-               if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
-                   fdp == ip ||
-                   (fcnp->cn_flags & ISDOTDOT) ||
-                   (tcnp->cn_flags & ISDOTDOT) ||
-                   (ip->i_flag & IN_RENAME)) {
-                       VOP_UNLOCK(fvp, 0);
-                       error = EINVAL;
-                       goto abortit;
-               }
-               ip->i_flag |= IN_RENAME;
-               doingdirectory = 1;
-       }
-       oldparent = fdp->i_number;
-       VN_KNOTE(fdvp, NOTE_WRITE);             /* XXXLUKEM/XXX: right place? */
-
-       /*
-        * When the target exists, both the directory
-        * and target vnodes are returned locked.
-        */
-       tdp = VTOI(tdvp);
-       txp = NULL;
-       if (tvp)
-               txp = VTOI(tvp);
 
-       mp = fdvp->v_mount;
-       fstrans_start(mp, FSTRANS_SHARED);
+       /* Moving a directory over itself is not allowed. */
+       if (fromchild_vnode == tochild_vnode &&
+           fromchild_vnode->v_type == VDIR) {
+               error = EINVAL;
+               goto fail_withlocks;
+       }
 
-       /*
-        * If ".." must be changed (ie the directory gets a new
-        * parent) then the source directory must not be in the
-        * directory hierarchy above the target, as this would
-        * orphan everything below the source directory. Also
-        * the user must have write permission in the source so
-        * as to be able to change "..". We must repeat the call 
-        * to namei, as the parent directory is unlocked by the
-        * call to checkpath().
-        */
-       error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred);
-       VOP_UNLOCK(fvp, 0);
-       if (oldparent != tdp->i_number)
-               newparent = tdp->i_number;
-       if (doingdirectory && newparent) {
-               if (error)      /* write access check above */
-                       goto out;
-               if (txp != NULL)
-                       vput(tvp);
-               txp = NULL;
-               vref(tdvp);     /* compensate for the ref checkpath loses */
-               if ((error = ufs_checkpath(ip, tdp, tcnp->cn_cred)) != 0) {
-                       vrele(tdvp);
-                       tdp = NULL;
-                       goto out;
-               }
-               tcnp->cn_flags &= ~SAVESTART;
-               tdp = NULL;
-               vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
-               error = relookup(tdvp, &tvp, tcnp);
-               if (error != 0) {
-                       vput(tdvp);
-                       goto out;
-               }
-               tdp = VTOI(tdvp);
-               if (tvp)
-                       txp = VTOI(tvp);
+       /* Make sure bumping the target's link count won't overflow. */
+       if ((nlink_t) fromchild_i->i_nlink >= LINK_MAX) {
+               error = EMLINK;
+               goto fail_withlocks;
+       }
+
+       /* This flag is now useless and should be removed. */
+       if (fromchild_i->i_flag & IN_RENAME) {
+               error = EINVAL;
+               goto fail_withlocks;
        }
 
        /*
-        * XXX handle case where fdvp is parent of tdvp,
-        * by unlocking tdvp and regrabbing it with vget after?
+        * Part 4. Rename logic.
         */
 
-       /* save directory lookup information in case tdvp == fdvp */
-       saved_t_count  = tdp->i_count;
-       saved_t_endoff = tdp->i_endoff;
-       saved_t_diroff = tdp->i_diroff;
-       saved_t_offset = tdp->i_offset;
-       saved_t_reclen = tdp->i_reclen;
+       /* If moving a file over itself, just unlink the from end. */
+       if (fromchild_vnode == tochild_vnode) {
+               /* This case was ruled out above. */
+               KASSERT(fromchild_vnode->v_type != VDIR);
 
-       /*
-        * This was moved up to before the journal lock to
-        * avoid potential deadlock
-        */
-       fcnp->cn_flags &= ~(MODMASK | SAVESTART);
-       fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
-       if (newparent) {
-               /* Check for the rename("foo/foo", "foo") case. */
-               if (fdvp == tvp) {
-                       error = doingdirectory ? ENOTEMPTY : EISDIR;
-                       goto out;
+               /* Release to-side completely. */
+               VOP_ABORTOP(toparent_vnode, to_name);
+               if (toparent_vnode != fromparent_vnode) {
+                       VOP_UNLOCK(toparent_vnode, 0);
                }
-               vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
-               if ((error = relookup(fdvp, &fvp, fcnp))) {
-                       vput(fdvp);
-                       vrele(ap->a_fvp);
-                       goto out2;
-               }
-       } else {
-               error = VOP_LOOKUP(fdvp, &fvp, fcnp);
-               if (error && (error != EJUSTRETURN)) {
-                       vrele(ap->a_fvp);
-                       goto out2;
-               }
-               error = 0;
-       }
-       if (fvp != NULL) {
-               fxp = VTOI(fvp);
-               fdp = VTOI(fdvp);
-       } else {
-               /*
-                * From name has disappeared.
-                */
-               if (doingdirectory)
-                       panic("rename: lost dir entry");
+               vrele(toparent_vnode);
+               vrele(tochild_vnode);
+
+               /* Clean up a_fvp/a_tvp. */
                vrele(ap->a_fvp);
-               error = ENOENT; /* XXX ufs_rename sets "0" here */
-               goto out2;
+               if (ap->a_tvp != NULL) {
+                       vrele(ap->a_tvp);
+               }
+
+               /* Now unlink the from-side. */
+               from_name->cn_flags &= ~(MODMASK | SAVESTART);
+               from_name->cn_flags |= LOCKPARENT | LOCKLEAF;
+               from_name->cn_nameiop = DELETE;
+               return VOP_REMOVE(fromparent_vnode, fromchild_vnode, from_name);
+       }
+
+       oldparent = fromparent_i->i_number;
+       newparent = toparent_i->i_number;
+       doingdirectory = (fromchild_i->i_mode & IFMT) == IFDIR;
+
+       /* XXX update the logic below to not test "newparent!=0" */
+       if (oldparent == newparent) {
+               newparent = 0;
+       }
+       /* Check permissions. */
+       error = VOP_ACCESS(fromchild_vnode, VWRITE, to_name->cn_cred);
+       if (error) {
+               goto fail_withlocks;
        }
-       vrele(ap->a_fvp);
 
-       /* save directory lookup information in case tdvp == fdvp */
-       saved_f_count  = fdp->i_count;
-       saved_f_diroff = fdp->i_diroff;
-       saved_f_offset = fdp->i_offset;
-       saved_f_reclen = fdp->i_reclen;
-
-       /* restore directory lookup information in case tdvp == fdvp */
-       tdp->i_offset = saved_t_offset;
-       tdp->i_reclen = saved_t_reclen;
-       tdp->i_count  = saved_t_count;
-       tdp->i_endoff = saved_t_endoff;
-       tdp->i_diroff = saved_t_diroff;
+       noabort = 1;
 
-       error = UFS_WAPBL_BEGIN(fdvp->v_mount);
+       if (doingdirectory) {
+               fromchild_i->i_flag |= IN_RENAME;
+       }
+       /* XXXLUKEM/XXX: right place? */
+       VN_KNOTE(fromparent_vnode, NOTE_WRITE);
+
+       mp = fromparent_vnode->v_mount;
+       fstrans_start(mp, FSTRANS_SHARED);
+
+       error = UFS_WAPBL_BEGIN(fromparent_vnode->v_mount);
        if (error)
-               goto out2;
+               goto fail_withfstrans;
+
+       /*
+        * restore directory lookup information in case toparent_vnode
+        * == fromparent_vnode
+        */
+       if (fromparent_vnode == toparent_vnode) {
+               toparent_i->i_offset = saved_t_offset;
+               toparent_i->i_reclen = saved_t_reclen;
+               toparent_i->i_count  = saved_t_count;
+               toparent_i->i_endoff = saved_t_endoff;
+               toparent_i->i_diroff = saved_t_diroff;
+       }
 
        /*
         * 1) Bump link count while we're moving stuff
@@ -400,14 +685,15 @@ wapbl_ufs_rename(void *v)
         *    completing our work, the link count
         *    may be wrong, but correctable.
         */
-       ip->i_ffs_effnlink++;
-       ip->i_nlink++;
-       DIP_ASSIGN(ip, nlink, ip->i_nlink);
-       ip->i_flag |= IN_CHANGE;
-       if (DOINGSOFTDEP(fvp))
-               softdep_change_linkcnt(ip);
-       if ((error = UFS_UPDATE(fvp, NULL, NULL, UPDATE_DIROP)) != 0) {
-               goto bad;
+       fromchild_i->i_ffs_effnlink++;
+       fromchild_i->i_nlink++;
+       DIP_ASSIGN(fromchild_i, nlink, fromchild_i->i_nlink);
+       fromchild_i->i_flag |= IN_CHANGE;
+       if (DOINGSOFTDEP(fromchild_vnode))
+               softdep_change_linkcnt(fromchild_i);
+       error = UFS_UPDATE(fromchild_vnode, NULL, NULL, UPDATE_DIROP);
+       if (error) {
+               goto fail_withwapbl;
        }
 
        /*
@@ -417,8 +703,9 @@ wapbl_ufs_rename(void *v)
         *    entry to reference the source inode and
         *    expunge the original entry's existence.
         */
-       if (txp == NULL) {
-               if (tdp->i_dev != ip->i_dev)
+
+       if (tochild_i == NULL) {
+               if (toparent_i->i_dev != fromchild_i->i_dev)
                        panic("rename: EXDEV");
                /*
                 * Account for ".." in new directory.
@@ -426,52 +713,53 @@ wapbl_ufs_rename(void *v)
                 * parent we don't fool with the link count.
                 */
                if (doingdirectory && newparent) {
-                       if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
+                       if ((nlink_t)toparent_i->i_nlink >= LINK_MAX) {
                                error = EMLINK;
-                               goto bad;
+                               goto fail_withlinkcount;
                        }
-                       tdp->i_ffs_effnlink++;
-                       tdp->i_nlink++;
-                       DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
-                       tdp->i_flag |= IN_CHANGE;
-                       if (DOINGSOFTDEP(tdvp))
-                               softdep_change_linkcnt(tdp);
-                       if ((error = UFS_UPDATE(tdvp, NULL, NULL,
+                       toparent_i->i_ffs_effnlink++;
+                       toparent_i->i_nlink++;
+                       DIP_ASSIGN(toparent_i, nlink, toparent_i->i_nlink);
+                       toparent_i->i_flag |= IN_CHANGE;
+                       if (DOINGSOFTDEP(toparent_vnode))
+                               softdep_change_linkcnt(toparent_i);
+                       if ((error = UFS_UPDATE(toparent_vnode, NULL, NULL,
                            UPDATE_DIROP)) != 0) {
-                               tdp->i_ffs_effnlink--;
-                               tdp->i_nlink--;
-                               DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
-                               tdp->i_flag |= IN_CHANGE;
-                               if (DOINGSOFTDEP(tdvp))
-                                       softdep_change_linkcnt(tdp);
-                               goto bad;
+                               toparent_i->i_ffs_effnlink--;
+                               toparent_i->i_nlink--;
+                               DIP_ASSIGN(toparent_i, nlink, 
toparent_i->i_nlink);
+                               toparent_i->i_flag |= IN_CHANGE;
+                               if (DOINGSOFTDEP(toparent_vnode))
+                                       softdep_change_linkcnt(toparent_i);
+                               goto fail_withlinkcount;
                        }
                }
                newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
-               ufs_makedirentry(ip, tcnp, newdir);
-               error = ufs_direnter(tdvp, NULL, newdir, tcnp, NULL);
+               ufs_makedirentry(fromchild_i, to_name, newdir);
+               error = ufs_direnter(toparent_vnode, NULL, newdir, to_name, 
NULL);
                pool_cache_put(ufs_direct_cache, newdir);
                if (error != 0) {
                        if (doingdirectory && newparent) {
-                               tdp->i_ffs_effnlink--;
-                               tdp->i_nlink--;
-                               DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
-                               tdp->i_flag |= IN_CHANGE;
-                               if (DOINGSOFTDEP(tdvp))
-                                       softdep_change_linkcnt(tdp);
-                               (void)UFS_UPDATE(tdvp, NULL, NULL,
+                               toparent_i->i_ffs_effnlink--;
+                               toparent_i->i_nlink--;
+                               DIP_ASSIGN(toparent_i, nlink, 
toparent_i->i_nlink);
+                               toparent_i->i_flag |= IN_CHANGE;
+                               if (DOINGSOFTDEP(toparent_vnode))
+                                       softdep_change_linkcnt(toparent_i);
+                               (void)UFS_UPDATE(toparent_vnode, NULL, NULL,
                                                 UPDATE_WAIT | UPDATE_DIROP);
                        }
-                       goto bad;
+                       goto fail_withlinkcount;
                }
-               VN_KNOTE(tdvp, NOTE_WRITE);
+               VN_KNOTE(toparent_vnode, NOTE_WRITE);
        } else {
-               if (txp->i_dev != tdp->i_dev || txp->i_dev != ip->i_dev)
+               if (tochild_i->i_dev != toparent_i->i_dev ||
+                   tochild_i->i_dev != fromchild_i->i_dev)
                        panic("rename: EXDEV");
                /*
                 * Short circuit rename(foo, foo).
                 */
-               if (txp->i_number == ip->i_number)
+               if (tochild_i->i_number == fromchild_i->i_number)
                        panic("rename: same file");
                /*
                 * If the parent directory is "sticky", then the user must
@@ -479,49 +767,51 @@ wapbl_ufs_rename(void *v)
                 * otherwise the destination may not be changed (except by
                 * root). This implements append-only directories.
                 */
-               if ((tdp->i_mode & S_ISTXT) &&
-                   kauth_authorize_generic(tcnp->cn_cred,
+               if ((toparent_i->i_mode & S_ISTXT) &&
+                   kauth_authorize_generic(to_name->cn_cred,
                     KAUTH_GENERIC_ISSUSER, NULL) != 0 &&
-                   kauth_cred_geteuid(tcnp->cn_cred) != tdp->i_uid &&
-                   txp->i_uid != kauth_cred_geteuid(tcnp->cn_cred)) {
+                   kauth_cred_geteuid(to_name->cn_cred) != toparent_i->i_uid &&
+                   tochild_i->i_uid != kauth_cred_geteuid(to_name->cn_cred)) {
                        error = EPERM;
-                       goto bad;
+                       goto fail_withlinkcount;
                }
                /*
                 * Target must be empty if a directory and have no links
                 * to it. Also, ensure source and target are compatible
                 * (both directories, or both not directories).
                 */
-               if ((txp->i_mode & IFMT) == IFDIR) {
-                       if (txp->i_ffs_effnlink > 2 ||
-                           !ufs_dirempty(txp, tdp->i_number, tcnp->cn_cred)) {
+               if ((tochild_i->i_mode & IFMT) == IFDIR) {
+                       if (tochild_i->i_ffs_effnlink > 2 ||
+                           !ufs_dirempty(tochild_i, toparent_i->i_number,
+                                         to_name->cn_cred)) {
                                error = ENOTEMPTY;
-                               goto bad;
+                               goto fail_withlinkcount;
                        }
                        if (!doingdirectory) {
                                error = ENOTDIR;
-                               goto bad;
+                               goto fail_withlinkcount;
                        }
-                       cache_purge(tdvp);
+                       cache_purge(toparent_vnode);
                } else if (doingdirectory) {
                        error = EISDIR;
-                       goto bad;
+                       goto fail_withlinkcount;
                }
-               if ((error = ufs_dirrewrite(tdp, txp, ip->i_number,
-                   IFTODT(ip->i_mode), doingdirectory && newparent ?
+               if ((error = ufs_dirrewrite(toparent_i, tochild_i,
+                   fromchild_i->i_number,
+                   IFTODT(fromchild_i->i_mode), doingdirectory && newparent ?
                    newparent : doingdirectory, IN_CHANGE | IN_UPDATE)) != 0)
-                       goto bad;
+                       goto fail_withlinkcount;
                if (doingdirectory) {
                        if (!newparent) {
-                               tdp->i_ffs_effnlink--;
-                               if (DOINGSOFTDEP(tdvp))
-                                       softdep_change_linkcnt(tdp);
-                       }
-                       txp->i_ffs_effnlink--;
-                       if (DOINGSOFTDEP(tvp))
-                               softdep_change_linkcnt(txp);
+                               toparent_i->i_ffs_effnlink--;
+                               if (DOINGSOFTDEP(toparent_vnode))
+                                       softdep_change_linkcnt(toparent_i);
+                       }
+                       tochild_i->i_ffs_effnlink--;
+                       if (DOINGSOFTDEP(tochild_vnode))
+                               softdep_change_linkcnt(tochild_i);
                }
-               if (doingdirectory && !DOINGSOFTDEP(tvp)) {
+               if (doingdirectory && !DOINGSOFTDEP(tochild_vnode)) {
                        /*
                         * Truncate inode. The only stuff left in the directory
                         * is "." and "..". The "." reference is inconsequential
@@ -534,34 +824,39 @@ wapbl_ufs_rename(void *v)
                         * them now.
                         */
                        if (!newparent) {
-                               tdp->i_nlink--;
-                               DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
-                               tdp->i_flag |= IN_CHANGE;
-                               UFS_WAPBL_UPDATE(tdvp, NULL, NULL, 0);
-                       }
-                       txp->i_nlink--;
-                       DIP_ASSIGN(txp, nlink, txp->i_nlink);
-                       txp->i_flag |= IN_CHANGE;
-                       if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC,
-                           tcnp->cn_cred)))
-                               goto bad;
-               }
-               VN_KNOTE(tdvp, NOTE_WRITE);
-               VN_KNOTE(tvp, NOTE_DELETE);
-       }
-
-       /* restore directory lookup information in case tdvp == fdvp */
-       fdp->i_offset = saved_f_offset;
-       fdp->i_reclen = saved_f_reclen;
-       fdp->i_count  = saved_f_count;
-       fdp->i_diroff = saved_f_diroff;
+                               toparent_i->i_nlink--;
+                               DIP_ASSIGN(toparent_i, nlink, 
toparent_i->i_nlink);
+                               toparent_i->i_flag |= IN_CHANGE;
+                               UFS_WAPBL_UPDATE(toparent_vnode, NULL, NULL, 0);
+                       }
+                       tochild_i->i_nlink--;
+                       DIP_ASSIGN(tochild_i, nlink, tochild_i->i_nlink);
+                       tochild_i->i_flag |= IN_CHANGE;
+                       if ((error = UFS_TRUNCATE(tochild_vnode, (off_t)0,
+                           IO_SYNC, to_name->cn_cred)))
+                               goto fail_withlinkcount;
+               }
+               VN_KNOTE(toparent_vnode, NOTE_WRITE);
+               VN_KNOTE(tochild_vnode, NOTE_DELETE);
+       }
+
+       /*
+        * restore directory lookup information in case toparent_vnode
+        * == fromparent_vnode
+        */
+       if (fromparent_vnode == toparent_vnode) {
+               fromparent_i->i_offset = saved_f_offset;
+               fromparent_i->i_reclen = saved_f_reclen;
+               fromparent_i->i_count  = saved_f_count;
+               fromparent_i->i_diroff = saved_f_diroff;
+       }
 
        /*
         * Handle case where the directory we need to remove may have
         * been moved when the directory insertion above performed compaction.
         * or when i_count may be wrong due to insertion before this entry.
         */
-       if ((tdp->i_number == fdp->i_number) &&
+       if ((toparent_i->i_number == fromparent_i->i_number) &&
                (((saved_f_offset >= saved_t_offset) &&
                        (saved_f_offset < saved_t_offset + saved_t_count)) ||
                ((saved_f_offset - saved_f_count >= saved_t_offset) &&
@@ -569,7 +864,7 @@ wapbl_ufs_rename(void *v)
                         saved_t_offset + saved_t_count)))) {
                struct buf *bp;
                struct direct *ep;
-               struct ufsmount *ump = fdp->i_ump;
+               struct ufsmount *ump = fromparent_i->i_ump;
                doff_t endsearch;       /* offset to end directory search */
                int dirblksiz = ump->um_dirblksiz;
                const int needswap = UFS_MPNEEDSWAP(ump);
@@ -577,37 +872,38 @@ wapbl_ufs_rename(void *v)
                int namlen, entryoffsetinblock;
                char *dirbuf;
 
-               bmask = fdvp->v_mount->mnt_stat.f_iosize - 1;
+               bmask = fromparent_vnode->v_mount->mnt_stat.f_iosize - 1;
 
                /*
-                * the fcnp entry will be somewhere between the start of
+                * the from_name entry will be somewhere between the start of
                 * compaction and the original location.
                 */
-               fdp->i_offset = saved_t_offset;
-               error = ufs_blkatoff(fdvp, (off_t)fdp->i_offset, &dirbuf, &bp,
-                   false);
+               fromparent_i->i_offset = saved_t_offset;
+               error = ufs_blkatoff(fromparent_vnode,
+                   (off_t)fromparent_i->i_offset, &dirbuf, &bp, false);
                if (error)
-                       goto bad;
+                       goto fail_withlinkcount;
 
                /*
-                * keep existing fdp->i_count in case
-                * compaction started at the same location as the fcnp entry.
+                * keep existing fromparent_i->i_count in case
+                * compaction started at the same location as the from_name 
entry.
                 */
                endsearch = saved_f_offset + saved_f_reclen;
                entryoffsetinblock = 0;
-               while (fdp->i_offset < endsearch) {
+               while (fromparent_i->i_offset < endsearch) {
                        int reclen;
 
                        /*
                         * If necessary, get the next directory block.
                         */
-                       if ((fdp->i_offset & bmask) == 0) {
+                       if ((fromparent_i->i_offset & bmask) == 0) {
                                if (bp != NULL)
                                        brelse(bp, 0);
-                               error = ufs_blkatoff(fdvp, (off_t)fdp->i_offset,
-                                   &dirbuf, &bp, false);
+                               error = ufs_blkatoff(fromparent_vnode,
+                                   (off_t)fromparent_i->i_offset, &dirbuf, &bp,
+                                   false);
                                if (error)
-                                       goto bad;
+                                       goto fail_withlinkcount;
                                entryoffsetinblock = 0;
                        }
 
@@ -616,37 +912,38 @@ wapbl_ufs_rename(void *v)
                        reclen = ufs_rw16(ep->d_reclen, needswap);
 
 #if (BYTE_ORDER == LITTLE_ENDIAN)
-                       if (FSFMT(fdvp) && needswap == 0)
+                       if (FSFMT(fromparent_vnode) && needswap == 0)
                                namlen = ep->d_type;
                        else
                                namlen = ep->d_namlen;
 #else
-                       if (FSFMT(fdvp) && needswap != 0)
+                       if (FSFMT(fromparent_vnode) && needswap != 0)
                                namlen = ep->d_type;
                        else
                                namlen = ep->d_namlen;
 #endif
                        if ((ep->d_ino != 0) &&
                            (ufs_rw32(ep->d_ino, needswap) != WINO) &&
-                           (namlen == fcnp->cn_namelen) &&
-                           memcmp(ep->d_name, fcnp->cn_nameptr, namlen) == 0) {
-                               fdp->i_reclen = reclen;
+                           (namlen == from_name->cn_namelen) &&
+                           memcmp(ep->d_name, from_name->cn_nameptr, namlen) 
== 0) {
+                               fromparent_i->i_reclen = reclen;
                                break;
                        }
-                       fdp->i_offset += reclen;
-                       fdp->i_count = reclen;
+                       fromparent_i->i_offset += reclen;
+                       fromparent_i->i_count = reclen;
                        entryoffsetinblock += reclen;
                }
 
-               KASSERT(fdp->i_offset <= endsearch);
+               KASSERT(fromparent_i->i_offset <= endsearch);
 
                /*
-                * If fdp->i_offset points to start of a directory block,
-                * set fdp->i_count so ufs_dirremove() doesn't compact over
-                * a directory block boundary.
+                * If fromparent_i->i_offset points to start of a
+                * directory block, set fromparent_i->i_count so
+                * ufs_dirremove() doesn't compact over a directory
+                * block boundary.
                 */
-               if ((fdp->i_offset & (dirblksiz - 1)) == 0)
-                       fdp->i_count = 0;
+               if ((fromparent_i->i_offset & (dirblksiz - 1)) == 0)
+                       fromparent_i->i_count = 0;
 
                brelse(bp, 0);
        }
@@ -663,68 +960,96 @@ wapbl_ufs_rename(void *v)
         * flag ensures that it cannot be moved by another rename or removed
         * by a rmdir.
         */
-       if (fxp != ip) {
-               if (doingdirectory)
-                       panic("rename: lost dir entry");
+
+       /*
+        * If the source is a directory with a
+        * new parent, the link count of the old
+        * parent directory must be decremented
+        * and ".." set to point to the new parent.
+        */
+       if (doingdirectory && newparent) {
+               KASSERT(toparent_i != NULL);
+               fromchild_i->i_offset = mastertemplate.dot_reclen;
+               ufs_dirrewrite(fromchild_i, toparent_i,
+                   newparent, DT_DIR, 0, IN_CHANGE);
+               cache_purge(fromparent_vnode);
+       }
+       error = ufs_dirremove(fromparent_vnode, fromchild_i,
+           from_name->cn_flags, 0);
+       VN_KNOTE(fromchild_vnode, NOTE_RENAME);
+
+       if (error) {
+               goto fail_withwapbl;
+       }
+       goto succeed;
+
+fail_withlinkcount:
+       fromchild_i->i_ffs_effnlink--;
+       fromchild_i->i_nlink--;
+       DIP_ASSIGN(fromchild_i, nlink, fromchild_i->i_nlink);
+       fromchild_i->i_flag |= IN_CHANGE;
+       UFS_WAPBL_UPDATE(fromchild_vnode, NULL, NULL, 0);
+       if (DOINGSOFTDEP(fromchild_vnode))
+               softdep_change_linkcnt(fromchild_i);
+
+fail_withwapbl:
+succeed:
+       UFS_WAPBL_END(fromparent_vnode->v_mount);
+
+fail_withfstrans:
+       fstrans_done(mp);
+       fromchild_i->i_flag &= ~IN_RENAME;
+
+fail_withlocks:
+       /* See comment up top about which of these can legally be the same. */
+       if (fromparent_vnode == toparent_vnode) {
+               VOP_UNLOCK(fromparent_vnode, 0);
        } else {
-               /*
-                * If the source is a directory with a
-                * new parent, the link count of the old
-                * parent directory must be decremented
-                * and ".." set to point to the new parent.
-                */
-               if (doingdirectory && newparent) {
-                       KASSERT(fdp != NULL);
-                       fxp->i_offset = mastertemplate.dot_reclen;
-                       ufs_dirrewrite(fxp, fdp, newparent, DT_DIR, 0, 
IN_CHANGE);
-                       cache_purge(fdvp);
-               }
-               error = ufs_dirremove(fdvp, fxp, fcnp->cn_flags, 0);
-               fxp->i_flag &= ~IN_RENAME;
-       }
-       VN_KNOTE(fvp, NOTE_RENAME);
-       goto done;
-
- out:
-       vrele(fvp);
-       vrele(fdvp);
-       goto out2;
-
-       /* exit routines from steps 1 & 2 */
- bad:
-       if (doingdirectory)
-               ip->i_flag &= ~IN_RENAME;
-       ip->i_ffs_effnlink--;
-       ip->i_nlink--;
-       DIP_ASSIGN(ip, nlink, ip->i_nlink);
-       ip->i_flag |= IN_CHANGE;
-       ip->i_flag &= ~IN_RENAME;
-       UFS_WAPBL_UPDATE(fvp, NULL, NULL, 0);
-       if (DOINGSOFTDEP(fvp))
-               softdep_change_linkcnt(ip);
- done:
-       UFS_WAPBL_END(fdvp->v_mount);
-       vput(fdvp);
-       vput(fvp);
- out2:
-       /*
-        * clear IN_RENAME - some exit paths happen too early to go
-        * through the cleanup done in the "bad" case above, so we
-        * always do this mini-cleanup here.
-        */
-       ip->i_flag &= ~IN_RENAME;
-
-       if (txp)
-               vput(ITOV(txp));
-       if (tdp) {
-               if (newparent)
-                       vput(ITOV(tdp));
-               else
-                       vrele(ITOV(tdp));
+               VOP_UNLOCK(fromparent_vnode, 0);
+               VOP_UNLOCK(toparent_vnode, 0);
+       }
+       if (fromchild_vnode == tochild_vnode) {
+               VOP_UNLOCK(fromchild_vnode, 0);
+       } else {
+               VOP_UNLOCK(fromchild_vnode, 0);
+               if (tochild_vnode) {
+                       VOP_UNLOCK(tochild_vnode, 0);
+               }
+       }
+fail_nolocks:
+       if (fromchild_vnode != NULL) {
+               vrele(fromchild_vnode);
+               fromchild_vnode = NULL;
+       }
+       if (tochild_vnode != NULL) {
+               vrele(tochild_vnode);
+               tochild_vnode = NULL;
+       }
+       if (illegal_vnode) {
+               vrele(illegal_vnode);
+               illegal_vnode = NULL;
        }
 
-       fstrans_done(mp);
-       return (error);
+fail_early:
+       if (!noabort) {
+               /* XXX, why not in NFS? */
+               VOP_ABORTOP(fromparent_vnode, from_name);
+               VOP_ABORTOP(toparent_vnode, to_name);
+       }
+
+       vrele(fromparent_vnode);
+       vrele(toparent_vnode);
+
+       KASSERT(fromchild_vnode == NULL);
+       KASSERT(tochild_vnode == NULL);
+       KASSERT(illegal_vnode == NULL);
+
+       vrele(ap->a_fvp);
+       if (ap->a_tvp != NULL) {
+               vrele(ap->a_tvp);
+       }
+
+       return error;
 }
 
 #ifdef WAPBL_DEBUG_INODES


Home | Main Index | Thread Index | Old Index