Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys reorganize ffs_truncate()/ffs_indirtrunc() to be able to...



details:   https://anonhg.NetBSD.org/src/rev/756f1a4d59a3
branches:  trunk
changeset: 818748:756f1a4d59a3
user:      jdolecek <jdolecek%NetBSD.org@localhost>
date:      Fri Oct 28 20:38:12 2016 +0000

description:
reorganize ffs_truncate()/ffs_indirtrunc() to be able to partially
succeed; change wapbl_register_deallocation() to return EAGAIN
rather than panic when code hits the limit

callers changed to either loop calling ffs_truncate() using new
utility ufs_truncate_retry() if their semantics requires it, or
just ignore the failure; remove ufs_wapbl_truncate()

this fixes possible user-triggerable panic during truncate, and
resolves WAPBL performance issue with truncates of large files

PR kern/47146 and kern/49175

diffstat:

 sys/kern/vfs_wapbl.c       |   44 +++++++++++-----
 sys/sys/wapbl.h            |    4 +-
 sys/ufs/ffs/ffs_alloc.c    |   15 ++++-
 sys/ufs/ffs/ffs_inode.c    |  117 +++++++++++++++++++++++++--------------------
 sys/ufs/ffs/ffs_snapshot.c |   17 +++++-
 sys/ufs/ufs/ufs_extern.h   |    4 +-
 sys/ufs/ufs/ufs_inode.c    |   76 +++++++++++++----------------
 sys/ufs/ufs/ufs_rename.c   |    8 +-
 sys/ufs/ufs/ufs_vnops.c    |   10 ++-
 sys/ufs/ufs/ufs_wapbl.h    |   14 ++++-
 10 files changed, 180 insertions(+), 129 deletions(-)

diffs (truncated from 685 to 300 lines):

diff -r 2995916ee6af -r 756f1a4d59a3 sys/kern/vfs_wapbl.c
--- a/sys/kern/vfs_wapbl.c      Fri Oct 28 20:30:37 2016 +0000
+++ b/sys/kern/vfs_wapbl.c      Fri Oct 28 20:38:12 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: vfs_wapbl.c,v 1.84 2016/10/02 16:52:27 jdolecek Exp $  */
+/*     $NetBSD: vfs_wapbl.c,v 1.85 2016/10/28 20:38:12 jdolecek Exp $  */
 
 /*-
  * Copyright (c) 2003, 2008, 2009 The NetBSD Foundation, Inc.
@@ -36,7 +36,7 @@
 #define WAPBL_INTERNAL
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.84 2016/10/02 16:52:27 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.85 2016/10/28 20:38:12 jdolecek Exp $");
 
 #include <sys/param.h>
 #include <sys/bitops.h>
@@ -1929,22 +1929,35 @@
 
 /****************************************************************/
 
-void
-wapbl_register_deallocation(struct wapbl *wl, daddr_t blk, int len)
+int
+wapbl_register_deallocation(struct wapbl *wl, daddr_t blk, int len, bool force)
 {
        struct wapbl_dealloc *wd;
+       int error = 0;
 
        wapbl_jlock_assert(wl);
 
        mutex_enter(&wl->wl_mtx);
-       /* XXX should eventually instead tie this into resource estimation */
-       /*
-        * XXX this panic needs locking/mutex analysis and the
-        * ability to cope with the failure.
-        */
-       /* XXX this XXX doesn't have enough XXX */
-       if (__predict_false(wl->wl_dealloccnt >= wl->wl_dealloclim))
-               panic("wapbl_register_deallocation: out of resources");
+
+       if (__predict_false(wl->wl_dealloccnt >= wl->wl_dealloclim)) {
+               if (!force) {
+                       error = EAGAIN;
+                       goto out;
+               }
+
+               /*
+                * Forced registration can only be used when:
+                * 1) the caller can't cope with failure
+                * 2) the path can be triggered only bounded, small
+                *    times per transaction
+                * If this is not fullfilled, and the path would be triggered
+                * many times, this could overflow maximum transaction size
+                * and panic later.
+                */
+               printf("%s: forced dealloc registration over limit: %d >= %d\n",
+                       wl->wl_mount->mnt_stat.f_mntonname,
+                       wl->wl_dealloccnt, wl->wl_dealloclim);
+       }
 
        wl->wl_dealloccnt++;
        mutex_exit(&wl->wl_mtx);
@@ -1955,10 +1968,15 @@
 
        mutex_enter(&wl->wl_mtx);
        SIMPLEQ_INSERT_TAIL(&wl->wl_dealloclist, wd, wd_entries);
+
+ out:
        mutex_exit(&wl->wl_mtx);
 
        WAPBL_PRINTF(WAPBL_PRINT_ALLOC,
-           ("wapbl_register_deallocation: blk=%"PRId64" len=%d\n", blk, len));
+           ("wapbl_register_deallocation: blk=%"PRId64" len=%d error=%d\n",
+           blk, len, error));
+
+       return error;
 }
 
 /****************************************************************/
diff -r 2995916ee6af -r 756f1a4d59a3 sys/sys/wapbl.h
--- a/sys/sys/wapbl.h   Fri Oct 28 20:30:37 2016 +0000
+++ b/sys/sys/wapbl.h   Fri Oct 28 20:38:12 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: wapbl.h,v 1.18 2016/10/01 13:15:45 jdolecek Exp $      */
+/*     $NetBSD: wapbl.h,v 1.19 2016/10/28 20:38:12 jdolecek Exp $      */
 
 /*-
  * Copyright (c) 2003,2008 The NetBSD Foundation, Inc.
@@ -173,7 +173,7 @@
  * the corresponding blocks from being reused as data
  * blocks until the log is on disk.
  */
-void   wapbl_register_deallocation(struct wapbl *, daddr_t, int);
+int    wapbl_register_deallocation(struct wapbl *, daddr_t, int, bool);
 
 void   wapbl_jlock_assert(struct wapbl *wl);
 void   wapbl_junlock_assert(struct wapbl *wl);
diff -r 2995916ee6af -r 756f1a4d59a3 sys/ufs/ffs/ffs_alloc.c
--- a/sys/ufs/ffs/ffs_alloc.c   Fri Oct 28 20:30:37 2016 +0000
+++ b/sys/ufs/ffs/ffs_alloc.c   Fri Oct 28 20:38:12 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: ffs_alloc.c,v 1.152 2016/09/25 17:14:59 jdolecek Exp $ */
+/*     $NetBSD: ffs_alloc.c,v 1.153 2016/10/28 20:38:12 jdolecek Exp $ */
 
 /*-
  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
@@ -70,7 +70,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.152 2016/09/25 17:14:59 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.153 2016/10/28 20:38:12 jdolecek Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ffs.h"
@@ -478,11 +478,20 @@
        }
        bno = ffs_hashalloc(ip, cg, bpref, request, nsize, 0, ffs_alloccg);
        if (bno > 0) {
+               /*
+                * Use forced deallocation registration, we can't handle
+                * failure here. This is safe, as this place is ever hit
+                * maximum once per write operation, when fragment is extended
+                * to longer fragment, or a full block.
+                */
                if ((ip->i_ump->um_mountp->mnt_wapbl) &&
                    (ITOV(ip)->v_type != VREG)) {
-                       UFS_WAPBL_REGISTER_DEALLOCATION(
+                       /* this should never fail */
+                       error = UFS_WAPBL_REGISTER_DEALLOCATION_FORCE(
                            ip->i_ump->um_mountp, FFS_FSBTODB(fs, bprev),
                            osize);
+                       if (error)
+                               panic("ffs_realloccg: dealloc registration failed");
                } else {
                        ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize,
                            ip->i_number);
diff -r 2995916ee6af -r 756f1a4d59a3 sys/ufs/ffs/ffs_inode.c
--- a/sys/ufs/ffs/ffs_inode.c   Fri Oct 28 20:30:37 2016 +0000
+++ b/sys/ufs/ffs/ffs_inode.c   Fri Oct 28 20:38:12 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: ffs_inode.c,v 1.117 2015/03/28 19:24:04 maxv Exp $     */
+/*     $NetBSD: ffs_inode.c,v 1.118 2016/10/28 20:38:12 jdolecek Exp $ */
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -61,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_inode.c,v 1.117 2015/03/28 19:24:04 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_inode.c,v 1.118 2016/10/28 20:38:12 jdolecek Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ffs.h"
@@ -212,13 +212,15 @@
        daddr_t blks[UFS_NDADDR + UFS_NIADDR];
        struct fs *fs;
        int offset, pgoffset, level;
-       int64_t count, blocksreleased = 0;
+       int64_t blocksreleased = 0;
        int i, aflag, nblocks;
        int error, allerror = 0;
        off_t osize;
        int sync;
        struct ufsmount *ump = oip->i_ump;
 
+       UFS_WAPBL_JLOCK_ASSERT(ip->i_ump->um_mountp);
+
        if (ovp->v_type == VCHR || ovp->v_type == VBLK ||
            ovp->v_type == VFIFO || ovp->v_type == VSOCK) {
                KASSERT(oip->i_size == 0);
@@ -418,19 +420,22 @@
                        bn = ufs_rw64(oip->i_ffs2_ib[level],UFS_FSNEEDSWAP(fs));
                if (bn != 0) {
                        error = ffs_indirtrunc(oip, indir_lbn[level],
-                           FFS_FSBTODB(fs, bn), lastiblock[level], level, &count);
+                           FFS_FSBTODB(fs, bn), lastiblock[level], level,
+                           &blocksreleased);
                        if (error)
-                               allerror = error;
-                       blocksreleased += count;
+                               goto out;
+
                        if (lastiblock[level] < 0) {
-                               DIP_ASSIGN(oip, ib[level], 0);
                                if (oip->i_ump->um_mountp->mnt_wapbl) {
-                                       UFS_WAPBL_REGISTER_DEALLOCATION(
+                                       error = UFS_WAPBL_REGISTER_DEALLOCATION(
                                            oip->i_ump->um_mountp,
                                            FFS_FSBTODB(fs, bn), fs->fs_bsize);
+                                       if (error)
+                                               goto out;
                                } else
                                        ffs_blkfree(fs, oip->i_devvp, bn,
                                            fs->fs_bsize, oip->i_number);
+                               DIP_ASSIGN(oip, ib[level], 0);
                                blocksreleased += nblocks;
                        }
                }
@@ -450,14 +455,18 @@
                        bn = ufs_rw64(oip->i_ffs2_db[i], UFS_FSNEEDSWAP(fs));
                if (bn == 0)
                        continue;
-               DIP_ASSIGN(oip, db[i], 0);
+
                bsize = ffs_blksize(fs, oip, i);
                if ((oip->i_ump->um_mountp->mnt_wapbl) &&
                    (ovp->v_type != VREG)) {
-                       UFS_WAPBL_REGISTER_DEALLOCATION(oip->i_ump->um_mountp,
+                       error = UFS_WAPBL_REGISTER_DEALLOCATION(
+                           oip->i_ump->um_mountp,
                            FFS_FSBTODB(fs, bn), bsize);
+                       if (error)
+                               goto out;
                } else
                        ffs_blkfree(fs, oip->i_devvp, bn, bsize, oip->i_number);
+               DIP_ASSIGN(oip, db[i], 0);
                blocksreleased += btodb(bsize);
        }
        if (lastblock < 0)
@@ -493,9 +502,11 @@
                        bn += ffs_numfrags(fs, newspace);
                        if ((oip->i_ump->um_mountp->mnt_wapbl) &&
                            (ovp->v_type != VREG)) {
-                               UFS_WAPBL_REGISTER_DEALLOCATION(
+                               error = UFS_WAPBL_REGISTER_DEALLOCATION(
                                    oip->i_ump->um_mountp, FFS_FSBTODB(fs, bn),
                                    oldspace - newspace);
+                               if (error)
+                                       goto out;
                        } else
                                ffs_blkfree(fs, oip->i_devvp, bn,
                                    oldspace - newspace, oip->i_number);
@@ -515,6 +526,17 @@
            (!LIST_EMPTY(&ovp->v_cleanblkhd) || !LIST_EMPTY(&ovp->v_dirtyblkhd)))
                panic("itrunc3");
 #endif /* DIAGNOSTIC */
+
+out:
+       /*
+        * Set length back to old size if deallocation failed. Some indirect
+        * blocks were deallocated creating a hole, but that is okay.
+        */
+       if (error == EAGAIN) {
+               length = osize;
+               uvm_vnp_setsize(ovp, length);
+       }
+
        /*
         * Put back the real size.
         */
@@ -551,11 +573,11 @@
        int64_t *bap2 = NULL;
        struct vnode *vp;
        daddr_t nb, nlbn, last;
-       char *copy = NULL;
-       int64_t blkcount, factor, blocksreleased = 0;
-       int nblocks;
-       int error = 0, allerror = 0;
+       int64_t factor;
+       int64_t nblocks;
+       int error = 0;
        const int needswap = UFS_FSNEEDSWAP(fs);
+
 #define RBAP(ip, i) (((ip)->i_ump->um_fstype == UFS1) ? \
            ufs_rw32(bap1[i], needswap) : ufs_rw64(bap2[i], needswap))
 #define BAP_ASSIGN(ip, i, value)                                       \
@@ -580,7 +602,7 @@
        nblocks = btodb(fs->fs_bsize);
        /*
         * Get buffer of block pointers, zero those entries corresponding
-        * to blocks to be free'd, and update on disk copy first.  Since
+        * to blocks to be free'd, and update on disk copy.  Since
         * double(triple) indirect before single(double) indirect, calls
         * to bmap on these blocks will fail.  However, we already have
         * the on disk address, so we have to set the b_blkno field
@@ -588,10 +610,9 @@
         */
        vp = ITOV(ip);
        error = ffs_getblk(vp, lbn, FFS_NOBLK, fs->fs_bsize, false, &bp);
-       if (error) {
-               *countp = 0;
+       if (error)
                return error;
-       }
+
        if (bp->b_oflags & (BO_DONE | BO_DELWRI)) {
                /* Braces must be here in case trace evaluates to nothing. */
                trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn);
@@ -611,77 +632,69 @@
        }
        if (error) {
                brelse(bp, 0);
-               *countp = 0;
-               return (error);
+               return error;
        }



Home | Main Index | Thread Index | Old Index