Subject: kern/11547: various fixes in the lfs syscalls and lfs_cleanerd
To: None <gnats-bugs@gnats.netbsd.org>
From: None <joff@gci-net.com>
List: netbsd-bugs
Date: 11/21/2000 21:13:16
>Number:         11547
>Category:       kern
>Synopsis:       various fixes in the lfs syscalls and lfs_cleanerd
>Confidential:   no
>Severity:       serious
>Priority:       high
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Tue Nov 21 21:13:00 PST 2000
>Closed-Date:
>Last-Modified:
>Originator:     Jesse Off
>Release:        1.5BETA2
>Organization:
>Environment:
NetBSD construct.home 1.5_BETA NetBSD 1.5_BETA (CONSTRUCT) #30: Tue Nov 21 19:51:41 MST 2000     root@construct.home:/usr/src/sys/arch/i386/compile/CONSTRUCT i386

>Description:
The other day, I got a panic:

lfs_unmount: still dirty blocks on ifile vnode

on filesystem unmount time.  Upon reboot, the number of segs clean and
lfs_avail was wrong.  What I think happened was the cleaner made a call to
lfs_segclean during the unmounting, dirtying blocks on the ifile segment
usage table and incrementing lfs_avail.  

This got me to looking at the lfs_syscalls.c and I noticed the other
system calls could do bad things like this too.  I've included a patch
to wrap all the system calls with vfs_busy() calls to delay unmounts
until the syscall exits.  Also, while I was at it, I fixed a few other
things I found fishy in lfs_syscalls.c:

  *) Another lfs_avail leak when cleaning up from an error in lfs_markv
  *) A potential missing VOP_UNLOCK while cleaning up after a error in markv
  *) Change the return value for an outdated/bad/unmounted fsid_t from
     EINVAL to ENOENT which is consistent with the way vfs_busy() works.
     What do you think about this? 
  *) wrapping lfs_markv, lfs_bmapv, and lfs_segclean in vfs_(un)busy()
     calls (already mentioned above)

Then, while testing these fixes, I found and fixed some other potential 
problems in lfs_cleanerd:

  *) We do a superfluous free() when we error out of add_segment().  The
     calling code expects there to still be a valid (unfreed)
     SEGS_AND_BLOCKS struct even if add_segment returns error.  The 
     effect of this is that we can pass bad addresses to markv().
  *) If our filesystem is unmounted in add_segment() and all of a sudden
     lfs_bmapv() starts returning ENOENT, we get stuck in a loop where
     every segment has to be read in until we get into a context where we
     _do_ check to see if the filesystem is unmounted and exit.  I've 
     fixed this with a check to errno for ENOENT after add_segment fails.
     If it is ENOENT, we break out of the loop. 
  *) There is potential in clean_segments() for an infinite loop if a
     single lfs_markv() call fails.  This is because the exit condition
     for the loop is only satisfied when a variable that is only 
     decremented on lfs_markv() successes reaches 0.  Again, the fix
     is trivial.  I also included a similar ENOENT short circuit if
     lfs_markv() errs because the filesystem has been unmounted.


Note that messages will still appear in syslog from lfs_cleanerd about
lfs_bmapv, markv, segclean, etc failing with "No such file or directory",
when the fs is unmounted in the middle of a cleaning pass, but these
can be safely ignored.  
>How-To-Repeat:
Well, not all these problems were repeatable.  But I did my
testing by filling up a filesystem as root with 

dd if=/dev/zero of=out bs=64k  (Surprising, this works now without a panic!!)

then, when dd finally returns ENOSPC and the cleaner starts grinding
away endlessly, I unmount.  Each time I mount, the cleaner automatically
restarts in a bmapv/markv cycle that I can then interrupt with an unmount.
Repeat this many times, trying to uninterrupt in different phases in
the cleaning cycle.

>Fix:
Two patches are inclosed below, one for /libexec/lfs_cleanerd/cleanerd.c
and one for /sys/ufs/lfs/lfs_syscalls.c

RCS file: /cvsroot/basesrc/libexec/lfs_cleanerd/cleanerd.c,v
retrieving revision 1.25
diff -u -r1.25 cleanerd.c
--- cleanerd.c  2000/11/13 22:12:50     1.25
+++ cleanerd.c  2000/11/22 04:24:38
@@ -61,6 +61,7 @@
 #include <time.h>
 #include <unistd.h>
 #include <util.h>
+#include <errno.h>
 
 #include <syslog.h>
 
@@ -527,7 +528,7 @@
                                if (add_segment(fsp, sp, sbp) < 0) {
                                        syslog(LOG_NOTICE,"add_segment failed"
                                               " segment %ld: %m", sp->sl_id);
-                                       if (sbp->nsegs == 0)
+                                       if (sbp->nsegs == 0 && errno != ENOENT)
                                                continue;
                                        else
                                                break;
@@ -543,7 +544,7 @@
                                if (add_segment(fsp, sp, sbp) != 0) {
                                        syslog(LOG_NOTICE,"add_segment failed"
                                               " segment %ld: %m", sp->sl_id);
-                                       if (sbp->nsegs == 0)
+                                       if (sbp->nsegs == 0 && errno != ENOENT)
                                                continue;
                                        else
                                                break;
@@ -846,8 +847,6 @@
        --sbp->nsegs;
        if (tba)
                free(tba);
-       if (sbp->ba)
-               free(sbp->ba);
        if (error) {
                sp->su_flags |= SEGUSE_ERROR;
                ++cleaner_stats.segs_error;
@@ -886,10 +885,9 @@
                if ((error = lfs_markv(&fsp->fi_statfsp->f_fsid,
                                       bp, clean_blocks)) < 0) {
                        syslog(LOG_WARNING,"clean_segment: lfs_markv failed: %m");
-                       ++cleaner_stats.segs_error;
+                       if (errno == ENOENT) break;
                }
-               else
-                       sbp->nb -= clean_blocks;
+               sbp->nb -= clean_blocks;
        }
 
        /* Clean up */



Index: lfs_syscalls.c
===================================================================
RCS file: /cvsroot/syssrc/sys/ufs/lfs/lfs_syscalls.c,v
retrieving revision 1.41.4.5
diff -u -r1.41.4.5 lfs_syscalls.c
--- lfs_syscalls.c      2000/11/01 03:53:38     1.41.4.5
+++ lfs_syscalls.c      2000/11/22 04:21:53
@@ -175,12 +175,13 @@
                return (error);
 
        if ((mntp = vfs_getvfs(&fsid)) == NULL)
-               return (EINVAL);
+               return (ENOENT);
 
        fs = VFSTOUFS(mntp)->um_lfs;
 
        if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
                return (error);
+
 
        maxino = (dbtofsb(fs, VTOI(fs->lfs_ivnode)->i_ffs_blocks) -
                      fs->lfs_cleansz - fs->lfs_segtabsz) * fs->lfs_ifpb;
@@ -191,6 +192,9 @@
        if (error)
                goto err1;
 
+       if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
+               return (error);
+
        /*
         * This seglock is just to prevent the fact that we might have to sleep
         * from allowing the possibility that our blocks might become
@@ -472,6 +476,7 @@
        }
 #endif
 
+       vfs_unbusy(mntp);
        if(error)
                return (error);
        else if(do_again)
@@ -481,7 +486,13 @@
 
  err2:
        printf("lfs_markv err2\n");
+       if(lfs_fastvget_unlock) {
+               VOP_UNLOCK(vp, 0);
+               --numlocked;
+       }
        lfs_vunref(vp);
+       --numrefed;
+
        /* Free up fakebuffers -- have to take these from the LOCKED list */
  again:
        s = splbio();
@@ -494,6 +505,8 @@
                                splx(s);
                                goto again;
                        }
+                       if(bp->b_flags & B_DELWRI) 
+                               fs->lfs_avail += btodb(bp->b_bcount);
                        bremfree(bp);
                        splx(s);
                        brelse(bp);
@@ -504,6 +517,11 @@
        free(start, M_SEGMENT);
        lfs_segunlock(fs);
        vfs_unbusy(mntp);
+#ifdef DEBUG_LFS
+       if(numlocked != 0 || numrefed != 0) {
+               panic("lfs_markv: numlocked=%d numrefed=%d", numlocked, numrefed);
+       }
+#endif
        return (error);
 
  err1:
@@ -558,15 +576,18 @@
        if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
                return (error);
        if ((mntp = vfs_getvfs(&fsid)) == NULL)
-               return (EINVAL);
+               return (ENOENT);
 
        ump = VFSTOUFS(mntp);
+       if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
+               return (error);
 
        origcnt = cnt = SCARG(uap, blkcnt);
        start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
        error = copyin(SCARG(uap, blkiov), start, cnt * sizeof(BLOCK_INFO));
        if (error) {
                free(start, M_SEGMENT);
+               vfs_unbusy(mntp);
                return (error);
        }
 
@@ -584,6 +605,7 @@
                        printf("lfs_bmapv: attempt to clean current segment? (#%d)\n",
                               datosn(fs, fs->lfs_curseg));
                        free(start,M_SEGMENT);
+                       vfs_unbusy(mntp);
                        return (EBUSY);
                }
 #endif /* DEBUG */
@@ -600,6 +622,7 @@
                                printf("lfs_bmapv: attempt to clean pending segment? (#%d)\n",
                                       datosn(fs, fs->lfs_pending[j]));
                                free(start,M_SEGMENT);
+                               vfs_unbusy(mntp);
                                return (EBUSY);
                        }
                }
@@ -736,6 +759,7 @@
 
        copyout(start, SCARG(uap, blkiov), origcnt * sizeof(BLOCK_INFO));
        free(start, M_SEGMENT);
+       vfs_unbusy(mntp);
 
        return 0;
 }
@@ -772,16 +796,19 @@
        if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
                return (error);
        if ((mntp = vfs_getvfs(&fsid)) == NULL)
-               return (EINVAL);
+               return (ENOENT);
 
        fs = VFSTOUFS(mntp)->um_lfs;
 
        if (datosn(fs, fs->lfs_curseg) == SCARG(uap, segment))
                return (EBUSY);
 
+       if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0) 
+               return (error);
        LFS_SEGENTRY(sup, fs, SCARG(uap, segment), bp);
        if (sup->su_flags & SEGUSE_ACTIVE) {
                brelse(bp);
+               vfs_unbusy(mntp);
                return (EBUSY);
        }
 
@@ -805,6 +832,7 @@
        cip->avail = fs->lfs_avail - fs->lfs_ravail;
        (void) VOP_BWRITE(bp);
        wakeup(&fs->lfs_avail);
+       vfs_unbusy(mntp);
 
        return (0);
 }

>Release-Note:
>Audit-Trail:
>Unformatted: