NetBSD-Bugs archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: kern/49175
The following reply was made to PR kern/49175; it has been noted by GNATS.
From: "Sergio L. Pascual" <slp%sinrega.org@localhost>
To: gnats-bugs%netbsd.org@localhost
Cc:
Subject: Re: kern/49175
Date: Fri, 09 Jan 2015 09:38:01 +0100
This is a multi-part message in MIME format.
--nextPart23712401.KJzYPYpgBM
Content-Transfer-Encoding: 7Bit
Content-Type: text/plain; charset="us-ascii"
The problem comes from ufs_inode.c:284, where indirect blocks are being freed
one by one, apparently to avoid generating a large transaction and/or
exhausting WABPL resources. This is extremely expensive, for both sparse and
regular files. I think, at the very least, we should be able to free those
blocks in reasonably sized chunks.
I've attached here a reimplementation of my original patch over the changes
applied by Christos over the related code. It addresses this issue by checking
the maximum the size for a WAPBL transaction, blocking others from using WAPBL
resources by taking a RW_WRITER lock on wl_rwlock, and then issuing such
transaction.
Additionally, it yield()s a little every 10 batches (an arbitrary number I've
just made up, but sounds reasonable to me), to allow other tasks to use some
CPU time.
Sergio.
--nextPart23712401.KJzYPYpgBM
Content-Disposition: attachment; filename="wapbl_batch_dealloc_v3.patch"
Content-Transfer-Encoding: 7Bit
Content-Type: text/x-patch; charset="UTF-8"; name="wapbl_batch_dealloc_v3.patch"
diff --git a/sys/kern/vfs_wapbl.c b/sys/kern/vfs_wapbl.c
index 2448053..a005db9 100644
--- a/sys/kern/vfs_wapbl.c
+++ b/sys/kern/vfs_wapbl.c
@@ -683,7 +683,7 @@ wapbl_stop(struct wapbl *wl, int force)
int error;
WAPBL_PRINTF(WAPBL_PRINT_OPEN, ("wapbl_stop called\n"));
- error = wapbl_flush(wl, 1);
+ error = wapbl_flush(wl, WAPBL_WAITFOR);
if (error) {
if (force)
wapbl_discard(wl);
@@ -911,7 +911,7 @@ wapbl_circ_write(struct wapbl *wl, void *data, size_t len, off_t *offp)
/****************************************************************/
int
-wapbl_begin(struct wapbl *wl, const char *file, int line)
+wapbl_begin(struct wapbl *wl, int *deallocmax, const char *file, int line)
{
int doflush;
unsigned lockcount;
@@ -950,9 +950,15 @@ wapbl_begin(struct wapbl *wl, const char *file, int line)
return error;
}
- rw_enter(&wl->wl_rwlock, RW_READER);
+ if (deallocmax)
+ rw_enter(&wl->wl_rwlock, RW_WRITER);
+ else
+ rw_enter(&wl->wl_rwlock, RW_READER);
+
mutex_enter(&wl->wl_mtx);
wl->wl_lock_count++;
+ if (deallocmax)
+ *deallocmax = wl->wl_dealloclim - wl->wl_dealloccnt;
mutex_exit(&wl->wl_mtx);
#if defined(WAPBL_DEBUG_PRINT)
@@ -967,8 +973,10 @@ wapbl_begin(struct wapbl *wl, const char *file, int line)
}
void
-wapbl_end(struct wapbl *wl)
+wapbl_end(struct wapbl *wl, int dealloc)
{
+ int doflush = 0;
+ unsigned lockcount;
#if defined(WAPBL_DEBUG_PRINT)
WAPBL_PRINTF(WAPBL_PRINT_TRANSACTION,
@@ -990,12 +998,28 @@ wapbl_end(struct wapbl *wl)
}
#endif
+ if (dealloc) {
+ mutex_enter(&wl->wl_mtx);
+ lockcount = wl->wl_lock_count;
+ doflush = ((wl->wl_bufbytes + (lockcount * MAXPHYS)) >
+ wl->wl_bufbytes_max / 2) ||
+ ((wl->wl_bufcount + (lockcount * 10)) >
+ wl->wl_bufcount_max / 2) ||
+ (wapbl_transaction_len(wl) > wl->wl_circ_size / 2) ||
+ (wl->wl_dealloccnt >= (wl->wl_dealloclim / 2));
+ mutex_exit(&wl->wl_mtx);
+
+ if (doflush)
+ wapbl_flush(wl, WAPBL_WAITFOR | WAPBL_LOCKTAKEN);
+ }
+
mutex_enter(&wl->wl_mtx);
KASSERT(wl->wl_lock_count > 0);
wl->wl_lock_count--;
mutex_exit(&wl->wl_mtx);
- rw_exit(&wl->wl_rwlock);
+ if (!doflush)
+ rw_exit(&wl->wl_rwlock);
}
void
@@ -1435,7 +1459,7 @@ wapbl_biodone(struct buf *bp)
* Write transactions to disk + start I/O for contents
*/
int
-wapbl_flush(struct wapbl *wl, int waitfor)
+wapbl_flush(struct wapbl *wl, int flags)
{
struct buf *bp;
struct wapbl_entry *we;
@@ -1452,7 +1476,7 @@ wapbl_flush(struct wapbl *wl, int waitfor)
* This assumes that the flush callback does not need to be called
* unless there are other outstanding bufs.
*/
- if (!waitfor) {
+ if (!(flags & WAPBL_WAITFOR)) {
size_t nbufs;
mutex_enter(&wl->wl_mtx); /* XXX need mutex here to
protect the KASSERTS */
@@ -1468,7 +1492,8 @@ wapbl_flush(struct wapbl *wl, int waitfor)
* XXX we may consider using LK_UPGRADE here
* if we want to call flush from inside a transaction
*/
- rw_enter(&wl->wl_rwlock, RW_WRITER);
+ if (!(flags & WAPBL_LOCKTAKEN))
+ rw_enter(&wl->wl_rwlock, RW_WRITER);
wl->wl_flush(wl->wl_mount, wl->wl_deallocblks, wl->wl_dealloclens,
wl->wl_dealloccnt);
@@ -1639,7 +1664,7 @@ wapbl_flush(struct wapbl *wl, int waitfor)
* If the waitfor flag is set, don't return until everything is
* fully flushed and the on disk log is empty.
*/
- if (waitfor) {
+ if (flags & WAPBL_WAITFOR) {
error = wapbl_truncate(wl, wl->wl_circ_size -
wl->wl_reserved_bytes, wapbl_lazy_truncate);
}
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index 048fcdd..13c4993 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -312,8 +312,8 @@ struct wapbl_ops {
void (*wo_wapbl_add_buf)(struct wapbl *, struct buf *);
void (*wo_wapbl_remove_buf)(struct wapbl *, struct buf *);
void (*wo_wapbl_resize_buf)(struct wapbl *, struct buf *, long, long);
- int (*wo_wapbl_begin)(struct wapbl *, const char *, int);
- void (*wo_wapbl_end)(struct wapbl *);
+ int (*wo_wapbl_begin)(struct wapbl *, int *, const char *, int);
+ void (*wo_wapbl_end)(struct wapbl *, int);
void (*wo_wapbl_junlock_assert)(struct wapbl *);
void (*wo_wapbl_biodone)(struct buf *);
};
@@ -336,9 +336,9 @@ struct wapbl_ops {
(OLDSZ), (OLDCNT))
#define WAPBL_BEGIN(MP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_begin)((MP)->mnt_wapbl, \
- __FILE__, __LINE__)
+ NULL, __FILE__, __LINE__)
#define WAPBL_END(MP) \
- (*(MP)->mnt_wapbl_op->wo_wapbl_end)((MP)->mnt_wapbl)
+ (*(MP)->mnt_wapbl_op->wo_wapbl_end)((MP)->mnt_wapbl, 0)
#define WAPBL_JUNLOCK_ASSERT(MP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_junlock_assert)((MP)->mnt_wapbl)
diff --git a/sys/sys/wapbl.h b/sys/sys/wapbl.h
index 735ec26..f64a1d3 100644
--- a/sys/sys/wapbl.h
+++ b/sys/sys/wapbl.h
@@ -125,11 +125,11 @@ int wapbl_stop(struct wapbl *, int);
* level if called while a transaction is already in progress
* by the current process.
*/
-int wapbl_begin(struct wapbl *, const char *, int);
+int wapbl_begin(struct wapbl *, int *, const char *, int);
/* End a transaction or decrement the transaction recursion level */
-void wapbl_end(struct wapbl *);
+void wapbl_end(struct wapbl *, int);
/*
* Add a new buffer to the current transaction. The buffers
@@ -144,6 +144,10 @@ void wapbl_remove_buf(struct wapbl *, struct buf *);
void wapbl_resize_buf(struct wapbl *, struct buf *, long, long);
+/* Flags for wapbl_flush */
+#define WAPBL_WAITFOR 0x1 /* don't return until everything is flushed */
+#define WAPBL_LOCKTAKEN 0x2 /* a writer lock for wl_rwlock is already taken */
+
/*
* This will flush all completed transactions to disk and
* start asynchronous writes on the associated buffers
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 1d1e32a..c2f45a6 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1634,7 +1634,7 @@ ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
if (error)
return error;
if (mp->mnt_wapbl) {
- error = wapbl_flush(mp->mnt_wapbl, 1);
+ error = wapbl_flush(mp->mnt_wapbl, WAPBL_WAITFOR);
if (flags & FORCECLOSE)
error = 0;
}
@@ -2087,7 +2087,7 @@ ffs_suspendctl(struct mount *mp, int cmd)
error = fstrans_setstate(mp, FSTRANS_SUSPENDED);
#ifdef WAPBL
if (error == 0 && mp->mnt_wapbl)
- error = wapbl_flush(mp->mnt_wapbl, 1);
+ error = wapbl_flush(mp->mnt_wapbl, WAPBL_WAITFOR);
#endif
if (error != 0) {
(void) fstrans_setstate(mp, FSTRANS_NORMAL);
diff --git a/sys/ufs/ffs/ffs_wapbl.c b/sys/ufs/ffs/ffs_wapbl.c
index 37deeec..a42fc31 100644
--- a/sys/ufs/ffs/ffs_wapbl.c
+++ b/sys/ufs/ffs/ffs_wapbl.c
@@ -360,7 +360,7 @@ ffs_wapbl_start(struct mount *mp)
goto out;
}
UFS_WAPBL_END(mp);
- error = wapbl_flush(mp->mnt_wapbl, 1);
+ error = wapbl_flush(mp->mnt_wapbl, WAPBL_WAITFOR);
if (error)
goto out;
}
@@ -409,7 +409,7 @@ ffs_wapbl_stop(struct mount *mp, int force)
* as the only change in the final flush since otherwise
* a transaction may reorder writes.
*/
- error = wapbl_flush(mp->mnt_wapbl, 1);
+ error = wapbl_flush(mp->mnt_wapbl, WAPBL_WAITFOR);
if (error && !force)
return error;
if (error && force)
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
index db38d6e..4a8e7a2 100644
--- a/sys/ufs/ufs/ufs_inode.c
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -277,25 +277,64 @@ ufs_wapbl_truncate(struct vnode *vp, uint64_t newsize, kauth_cred_t cred)
{
struct inode *ip = VTOI(vp);
int error = 0;
- uint64_t base, incr;
+ int count = 0;
+ int maxblks;
+ int blksize = 1 << vp->v_mount->mnt_fs_bshift;
- base = UFS_NDADDR << vp->v_mount->mnt_fs_bshift;
- incr = MNINDIR(ip->i_ump) << vp->v_mount->mnt_fs_bshift;/* Power of 2 */
- while (ip->i_size > base + incr &&
- (newsize == 0 || ip->i_size > newsize + incr)) {
+ error = UFS_WAPBL_BEGIN_DEALLOC(vp->v_mount, &maxblks);
+ if (error)
+ return error;
+
+ while (ip->i_size > newsize) {
/*
* round down to next full indirect
* block boundary.
*/
- uint64_t nsize = base + ((ip->i_size - base - 1) & ~(incr - 1));
+ uint64_t nsize;
+ uint64_t allocsize;
+ uint64_t maxbytes = maxblks << vp->v_mount->mnt_fs_bshift;
+
+ /*
+ * Non-allocated blocks do not require a transaction, so
+ * limit based on the number of actually allocated blocks.
+ */
+ if (ip->i_ump->um_fstype == UFS1) {
+ allocsize = dbtob((u_quad_t)ip->i_ffs1_blocks);
+ } else {
+ allocsize = dbtob(ip->i_ffs2_blocks);
+ }
+
+ if (allocsize > maxbytes &&
+ ip->i_size - newsize > maxbytes) {
+ /*
+ * Truncate to the minimum block aligned offset
+ * maxbytes allows us to target.
+ */
+ nsize = ip->i_size - maxbytes;
+ nsize -= nsize & (blksize - 1);
+ } else {
+ nsize = newsize;
+ }
+
error = UFS_TRUNCATE(vp, nsize, 0, cred);
if (error)
break;
- UFS_WAPBL_END(vp->v_mount);
- error = UFS_WAPBL_BEGIN(vp->v_mount);
+ UFS_WAPBL_END_DEALLOC(vp->v_mount);
+ if (count > 10) {
+ /*
+ * If this is an unusually large operation,
+ * be a good neighbor and rest a little.
+ */
+ yield();
+ count = 0;
+ } else {
+ count++;
+ }
+ error = UFS_WAPBL_BEGIN_DEALLOC(vp->v_mount, &maxblks);
if (error)
return error;
}
+ UFS_WAPBL_END_DEALLOC(vp->v_mount);
return error;
}
@@ -304,16 +343,10 @@ ufs_truncate(struct vnode *vp, uint64_t newsize, kauth_cred_t cred)
{
int error;
- error = UFS_WAPBL_BEGIN(vp->v_mount);
- if (error)
- return error;
-
if (vp->v_mount->mnt_wapbl)
error = ufs_wapbl_truncate(vp, newsize, cred);
-
- if (error == 0)
+ else
error = UFS_TRUNCATE(vp, newsize, 0, cred);
- UFS_WAPBL_END(vp->v_mount);
return error;
}
diff --git a/sys/ufs/ufs/ufs_wapbl.h b/sys/ufs/ufs/ufs_wapbl.h
index 1eab045..47a379f 100644
--- a/sys/ufs/ufs/ufs_wapbl.h
+++ b/sys/ufs/ufs/ufs_wapbl.h
@@ -95,7 +95,8 @@ void ufs_wapbl_verify_inodes(struct mount *, const char *);
#endif
static __inline int
-ufs_wapbl_begin2(struct mount *mp, struct vnode *vp1, struct vnode *vp2,
+ufs_wapbl_begin2(struct mount *mp, int *dealloc,
+ struct vnode *vp1, struct vnode *vp2,
const char *file, int line)
{
if (mp->mnt_wapbl) {
@@ -105,7 +106,7 @@ ufs_wapbl_begin2(struct mount *mp, struct vnode *vp1, struct vnode *vp2,
vref(vp1);
if (vp2)
vref(vp2);
- error = wapbl_begin(mp->mnt_wapbl, file, line);
+ error = wapbl_begin(mp->mnt_wapbl, dealloc, file, line);
if (error)
return error;
#ifdef WAPBL_DEBUG_INODES
@@ -117,14 +118,14 @@ ufs_wapbl_begin2(struct mount *mp, struct vnode *vp1, struct vnode *vp2,
}
static __inline void
-ufs_wapbl_end2(struct mount *mp, struct vnode *vp1, struct vnode *vp2)
+ufs_wapbl_end2(struct mount *mp, int dealloc, struct vnode *vp1, struct vnode *vp2)
{
if (mp->mnt_wapbl) {
#ifdef WAPBL_DEBUG_INODES
if (mp->mnt_wapbl->wl_lock.lk_exclusivecount == 1)
ufs_wapbl_verify_inodes(mp, "wapbl_end");
#endif
- wapbl_end(mp->mnt_wapbl);
+ wapbl_end(mp->mnt_wapbl, dealloc);
if (vp2)
vrele(vp2);
if (vp1)
@@ -133,11 +134,14 @@ ufs_wapbl_end2(struct mount *mp, struct vnode *vp1, struct vnode *vp2)
}
#define UFS_WAPBL_BEGIN(mp) \
- ufs_wapbl_begin2(mp, NULL, NULL, __FUNCTION__, __LINE__)
+ ufs_wapbl_begin2(mp, NULL, NULL, NULL, __FUNCTION__, __LINE__)
#define UFS_WAPBL_BEGIN1(mp, v1) \
- ufs_wapbl_begin2(mp, v1, NULL, __FUNCTION__, __LINE__)
-#define UFS_WAPBL_END(mp) ufs_wapbl_end2(mp, NULL, NULL)
-#define UFS_WAPBL_END1(mp, v1) ufs_wapbl_end2(mp, v1, NULL)
+ ufs_wapbl_begin2(mp, NULL, v1, NULL, __FUNCTION__, __LINE__)
+#define UFS_WAPBL_BEGIN_DEALLOC(mp, dealloc) \
+ ufs_wapbl_begin2(mp, dealloc, NULL, NULL, __FUNCTION__, __LINE__)
+#define UFS_WAPBL_END(mp) ufs_wapbl_end2(mp, 0, NULL, NULL)
+#define UFS_WAPBL_END1(mp, v1) ufs_wapbl_end2(mp, 0, v1, NULL)
+#define UFS_WAPBL_END_DEALLOC(mp) ufs_wapbl_end2(mp, 1, NULL, NULL)
#define UFS_WAPBL_UPDATE(vp, access, modify, flags) \
if ((vp)->v_mount->mnt_wapbl) { \
@@ -165,8 +169,10 @@ ufs_wapbl_end2(struct mount *mp, struct vnode *vp1, struct vnode *vp2)
#else /* ! WAPBL */
#define UFS_WAPBL_BEGIN(mp) (__USE(mp), 0)
#define UFS_WAPBL_BEGIN1(mp, v1) 0
+#define UFS_WAPBL_BEGIN_DEALLOC(mp, dealloc) (__USE(mp), __USE(dealloc), 0)
#define UFS_WAPBL_END(mp) do { } while (0)
#define UFS_WAPBL_END1(mp, v1)
+#define UFS_WAPBL_END_DEALLOC(mp, dealloc)
#define UFS_WAPBL_UPDATE(vp, access, modify, flags) do { } while (0)
#define UFS_WAPBL_JLOCK_ASSERT(mp)
#define UFS_WAPBL_JUNLOCK_ASSERT(mp)
--nextPart23712401.KJzYPYpgBM--
Home |
Main Index |
Thread Index |
Old Index