tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
ffs snapshots patch
Hello,
attached is a work in progress on ffs snapshot (as it's work in progress,
some debug and instrumentation code is still present in the
patch, no need to comment on this part :).
The start of this work is that when working on quota, I noticed that
taking a snapshot on a 500Gb filesystem needs several minutes, and is
O(n) with the number of persisent snapshots.
Here's some timings on a otherwise idle 500Gb filesystem (it's some brand of
SATA2 3.5" drive attached to a AHCI controller, so it's a reasonable test
bed for today):
java# /usr/bin/time fssconfig fss0 /home /home/snaps/snap0
260.53 real 0.00 user 1.15 sys
/home: suspended 77.873 sec, redo 1184 of 2556
java# /usr/bin/time fssconfig fss1 /home /home/snaps/snap1
377.87 real 0.00 user 2.53 sys
/home: suspended 206.078 sec, redo 1184 of 2556
java# /usr/bin/time fssconfig fss2 /home /home/snaps/snap2
508.23 real 0.00 user 4.28 sys
/home: suspended 338.534 sec, redo 1184 of 2556
java# /usr/bin/time fssconfig fss3 /home /home/snaps/snap3
621.40 real 0.00 user 5.50 sys
/home: suspended 431.154 sec, redo 1183 of 2556
suspending a filesystem for more than 7mn to take a snapshot makes
persisent snapshot quite useless to me. I wonder how it would behaves
on a multi-terabyte filesystem.
I looked at where the time is spend and found 2 major issues:
1 cgaccount() works in 2 pass: first it copies cg before suspending the
filesystem; then it is called again to copy only the cg that have been
modified between copy and filesystem suspend.
The problem is that to copy a cg we need to allocate blocks for the snapshot
file, which may be in a cg we just copied. This is the cause of the high
number of cg copies (almost half of them) with the filesystem suspended.
2 while the filesystem is suspended, we want to expunge the snapshot files
from the snapshot view (make them appear as a 0-length file).
With ~500GB sparse files this is a lot of work.
I fixed 1) by preallocating needed blocks snapshot_setup().
Fixing 2) is trickier. To avoid the heavy writes to the snapshot file
with the fs suspended, the snapshot appears with its real lenght and
blocks at the time of creation, but is marked invalid (only the
inode block needs to be copied, and this can be done before suspending
the fs). Now BLK_SNAP should never be seen as a block number, and we skip
ffs_copyonwrite() if the write is to a snapshot inode.
With these changes the times are much more reasonable:
/usr/bin/time fssconfig fss0 /home /home/snaps/snap0
299.68 real 0.00 user 1.10 sys
/home: suspended 0.310 sec, redo 0 of 2556
/usr/bin/time fssconfig fss1 /home /home/snaps/snap1
188.10 real 0.00 user 0.86 sys
/home: suspended 0.270 sec, redo 0 of 2556
/usr/bin/time fssconfig fss2 /home /home/snaps/snap2
169.78 real 0.00 user 0.95 sys
/home: suspended 0.450 sec, redo 0 of 2556
/usr/bin/time fssconfig fss3 /home /home/snaps/snap3
172.39 real 0.00 user 0.99 sys
/home: suspended 0.300 sec, redo 0 of 2556
This seems to work; one issue with this patch is that the block
count for the snapshot inode, and block summary informations (the
second being probably a consequence of the first) appear wrong when
running fsck against a snapshot. I believe this is fixable, but
I've not yet found from where the information mismatch is coming from.
comments ?
PS: I'm away from computers for one week, so don't expect replies to
your comments before next sunday.
--
Manuel Bouyer <bouyer%antioche.eu.org@localhost>
NetBSD: 26 ans d'experience feront toujours la difference
--
Index: ffs/ffs_snapshot.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ffs/ffs_snapshot.c,v
retrieving revision 1.111
diff -u -p -u -r1.111 ffs_snapshot.c
--- ffs/ffs_snapshot.c 6 Mar 2011 17:08:38 -0000 1.111
+++ ffs/ffs_snapshot.c 16 Apr 2011 19:07:31 -0000
@@ -109,6 +109,8 @@ static int snapacct(struct vnode *, void
daddr_t, int);
static int mapacct(struct vnode *, void *, int, int, struct fs *,
daddr_t, int);
+static int snapcount(struct vnode *, void *, int, int, struct fs *,
+ daddr_t, int);
#endif /* !defined(FFS_NO_SNAPSHOT) */
static int ffs_copyonwrite(void *, struct buf *, bool);
@@ -190,7 +192,7 @@ ffs_snapshot(struct mount *mp, struct vn
struct timespec ts;
struct timeval starttime;
#ifdef DEBUG
- struct timeval endtime;
+ struct timeval endtime, parttime;
#endif
struct vnode *devvp = ip->i_devvp;
@@ -250,6 +252,8 @@ ffs_snapshot(struct mount *mp, struct vn
/*
* All allocations are done, so we can now suspend the filesystem.
*/
+ printf("%s: before suspend size %qd %qd\n",
+ mp->mnt_stat.f_mntonname, (long long int)ip->i_size, (long long
int)DIP(ip, size));
error = vfs_suspend(vp->v_mount, 0);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
if (error)
@@ -262,6 +266,14 @@ ffs_snapshot(struct mount *mp, struct vn
error = cgaccount(vp, 2, &redo);
if (error)
goto out;
+#ifdef DEBUG
+ getmicrotime(&endtime);
+ timersub(&endtime, &starttime, &parttime);
+ printf("%s: suspended for cgaccount %lld.%03d sec, redo %d of %d size
%qd %qd\n",
+ mp->mnt_stat.f_mntonname, (long long)parttime.tv_sec,
+ parttime.tv_usec / 1000, redo, fs->fs_ncg, (long long
int)ip->i_size, (long long int)DIP(ip, size));
+ parttime = endtime;
+#endif
/*
* Create a copy of the superblock and its summary information.
*/
@@ -269,12 +281,28 @@ ffs_snapshot(struct mount *mp, struct vn
copy_fs = (struct fs *)((char *)sbbuf + blkoff(fs, fs->fs_sblockloc));
if (error)
goto out;
+#ifdef DEBUG
+ getmicrotime(&endtime);
+ timersub(&endtime, &parttime, &parttime);
+ printf("%s: suspended for snapshot_copyfs %lld.%03d sec, redo %d of %d
size %qd\n",
+ mp->mnt_stat.f_mntonname, (long long)parttime.tv_sec,
+ parttime.tv_usec / 1000, redo, fs->fs_ncg, (long long
int)ip->i_size);
+ parttime = endtime;
+#endif
/*
* Expunge unlinked files from our view.
*/
error = snapshot_expunge(mp, vp, copy_fs, &snaplistsize, &snaplist);
if (error)
goto out;
+#ifdef DEBUG
+ getmicrotime(&endtime);
+ timersub(&endtime, &parttime, &parttime);
+ printf("%s: suspended for snapshot_expunge %lld.%03d sec, redo %d of %d
size %qd\n",
+ mp->mnt_stat.f_mntonname, (long long)parttime.tv_sec,
+ parttime.tv_usec / 1000, redo, fs->fs_ncg, (long long
int)ip->i_size);
+ parttime = endtime;
+#endif
/*
* Record snapshot inode. Since this is the newest snapshot,
* it must be placed at the end of the list.
@@ -293,6 +321,14 @@ ffs_snapshot(struct mount *mp, struct vn
*/
si->si_snapblklist = snaplist;
fscow_establish(mp, ffs_copyonwrite, devvp);
+#ifdef DEBUG
+ getmicrotime(&endtime);
+ timersub(&endtime, &parttime, &parttime);
+ printf("%s: suspended for fscow_establish %lld.%03d sec, redo %d of %d
size %qd\n",
+ mp->mnt_stat.f_mntonname, (long long)parttime.tv_sec,
+ parttime.tv_usec / 1000, redo, fs->fs_ncg, (long long
int)ip->i_size);
+ parttime = endtime;
+#endif
}
si->si_gen++;
mutex_exit(&si->si_lock);
@@ -308,24 +344,47 @@ ffs_snapshot(struct mount *mp, struct vn
DIP_ASSIGN(ip, mtimensec, ts.tv_nsec);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
/*
- * Copy allocation information from all snapshots and then
- * expunge them from our view.
+ * Copy allocation information from all snapshots
*/
error = snapshot_expunge_snap(mp, vp, copy_fs, snaplistsize);
if (error)
goto out;
+#ifdef DEBUG
+ getmicrotime(&endtime);
+ timersub(&endtime, &parttime, &parttime);
+ printf("%s: suspended for snapshot_expunge_snap %lld.%03d sec, redo %d
of %d size %qd\n",
+ mp->mnt_stat.f_mntonname, (long long)parttime.tv_sec,
+ parttime.tv_usec / 1000, redo, fs->fs_ncg, (long long
int)ip->i_size);
+ parttime = endtime;
+#endif
/*
* Write the superblock and its summary information to the snapshot.
*/
error = snapshot_writefs(mp, vp, sbbuf);
if (error)
goto out;
+#ifdef DEBUG
+ getmicrotime(&endtime);
+ timersub(&endtime, &parttime, &parttime);
+ printf("%s: suspended for snapshot_writefs %lld.%03d sec, redo %d of %d
size %qd\n",
+ mp->mnt_stat.f_mntonname, (long long)parttime.tv_sec,
+ parttime.tv_usec / 1000, redo, fs->fs_ncg, (long long
int)ip->i_size);
+ parttime = endtime;
+#endif
/*
* We're nearly done, ensure that the snapshot is completely on disk.
*/
error = VOP_FSYNC(vp, l->l_cred, FSYNC_WAIT, 0, 0);
if (error)
goto out;
+#ifdef DEBUG
+ getmicrotime(&endtime);
+ timersub(&endtime, &parttime, &parttime);
+ printf("%s: suspended for VOP_FSYNC %lld.%03d sec, redo %d of %d size
%qd\n",
+ mp->mnt_stat.f_mntonname, (long long)parttime.tv_sec,
+ parttime.tv_usec / 1000, redo, fs->fs_ncg, (long long
int)ip->i_size);
+ parttime = endtime;
+#endif
/*
* Invalidate and free all pages on the snapshot vnode.
* We will read and write through the buffercache.
@@ -335,6 +394,14 @@ ffs_snapshot(struct mount *mp, struct vn
PGO_ALLPAGES | PGO_CLEANIT | PGO_SYNCIO | PGO_FREE);
if (error)
goto out;
+#ifdef DEBUG
+ getmicrotime(&endtime);
+ timersub(&endtime, &parttime, &parttime);
+ printf("%s: suspended for VOP_PUTPAGES %lld.%03d sec, redo %d of %d
size %qd\n",
+ mp->mnt_stat.f_mntonname, (long long)parttime.tv_sec,
+ parttime.tv_usec / 1000, redo, fs->fs_ncg, (long long
int)ip->i_size);
+ parttime = endtime;
+#endif
/*
* Invalidate short ( < fs_bsize ) buffers. We will always read
* full size buffers later.
@@ -350,6 +417,14 @@ ffs_snapshot(struct mount *mp, struct vn
}
}
mutex_exit(&bufcache_lock);
+#ifdef DEBUG
+ getmicrotime(&endtime);
+ timersub(&endtime, &parttime, &parttime);
+ printf("%s: suspended for brelsel %lld.%03d sec, redo %d of %d size
%qd\n",
+ mp->mnt_stat.f_mntonname, (long long)parttime.tv_sec,
+ parttime.tv_usec / 1000, redo, fs->fs_ncg, (long long
int)ip->i_size);
+ parttime = endtime;
+#endif
out:
if (sbbuf != NULL) {
@@ -399,19 +474,58 @@ out:
return (error);
}
+/* copy and mark a snapshot inode as invalid in a snapshot */
+static int
+snapshot_inval_snap(struct mount *mp, struct vnode *vp, struct inode *ip,
+ struct lwp *l)
+{
+ daddr_t lbn;
+ struct buf *bp;
+ struct ufs1_dinode *dip1;
+ struct ufs2_dinode *dip2;
+ struct fs *fs = VFSTOUFS(mp)->um_fs;
+ int error;
+
+ lbn = fragstoblks(fs, ino_to_fsba(fs, ip->i_number));
+ error = ffs_balloc(vp, lblktosize(fs, (off_t)lbn),
+ fs->fs_bsize, l->l_cred, 0, &bp);
+ if (error)
+ return error;
+ error = rwfsblk(vp, B_READ, bp->b_data, lbn);
+ if (error)
+ return error;
+ if (fs->fs_magic == FS_UFS1_MAGIC) {
+ dip1 = (struct ufs1_dinode *)bp->b_data +
+ ino_to_fsbo(fs, ip->i_number);
+ dip1->di_flags =
+ ufs_rw32(ufs_rw32(dip1->di_flags, ns) |
+ SF_SNAPINVAL, ns);
+ } else {
+ dip2 = (struct ufs2_dinode *)bp->b_data +
+ ino_to_fsbo(fs, ip->i_number);
+ dip2->di_flags =
+ ufs_rw32(ufs_rw32(dip2->di_flags, ns) |
+ SF_SNAPINVAL, ns);
+ }
+ bawrite(bp);
+ return 0;
+}
+
/*
* Prepare vnode to become a snapshot.
*/
static int
snapshot_setup(struct mount *mp, struct vnode *vp)
{
- int error, n, len, loc;
+ int error, n, len, loc, cg;
daddr_t blkno, numblks;
struct buf *ibp, *nbp;
struct fs *fs = VFSTOUFS(mp)->um_fs;
struct lwp *l = curlwp;
const int wbreak = blocks_in_journal(fs)/8;
struct inode *ip = VTOI(vp);
+ struct inode *xp;
+ struct snap_info *si = VFSTOUFS(mp)->um_snapinfo;
/*
* Check mount, exclusive reference and owner.
@@ -485,6 +599,20 @@ snapshot_setup(struct mount *mp, struct
return error;
}
}
+ /* allocate copies for the cylinder group maps */
+ for (cg = 0; cg < fs->fs_ncg; cg++) {
+ error = ffs_balloc(vp, lfragtosize(fs, cgtod(fs, cg)),
+ fs->fs_bsize, l->l_cred, 0, &nbp);
+ if (error)
+ goto out;
+ bawrite(nbp);
+ if (wbreak > 0 && (cg % wbreak) == 0) {
+ UFS_WAPBL_END(mp);
+ error = UFS_WAPBL_BEGIN(mp);
+ if (error)
+ return error;
+ }
+ }
/*
* Allocate copies for the superblock and its summary information.
*/
@@ -502,7 +630,29 @@ snapshot_setup(struct mount *mp, struct
goto out;
bawrite(nbp);
}
+ /*
+ * allocate inode blocks for all exising snapshot inodes,
+ * and mark the snapshot as invalid.
+ */
+ TAILQ_FOREACH(xp, &si->si_snapshots, i_nextsnap) {
+ error = snapshot_inval_snap(mp, vp, xp, l);
+ if (error)
+ goto out;
+ }
+ /* allocate inode block for ourself */
+ KASSERT(lfragtosize(fs, ino_to_fsba(fs, ip->i_number)) ==
+ lblktosize(fs, fragstoblks(fs, ino_to_fsba(fs, ip->i_number))));
+ error = ffs_balloc(vp, lfragtosize(fs, ino_to_fsba(fs, ip->i_number)),
+ fs->fs_bsize, l->l_cred, 0, &nbp);
+ if (error)
+ goto out;
+ bawrite(nbp);
+ /* ensure inode block is up to date before copying it */
+ error = VOP_FSYNC(vp, l->l_cred, FSYNC_WAIT, 0, 0);
+ if (error)
+ goto out;
+ error = snapshot_inval_snap(mp, vp, ip, l);
out:
UFS_WAPBL_END(mp);
return error;
@@ -581,6 +731,7 @@ static int
snapshot_expunge(struct mount *mp, struct vnode *vp, struct fs *copy_fs,
daddr_t *snaplistsize, daddr_t **snaplist)
{
+ struct ufsmount *ump = VFSTOUFS(mp);
int cg, error = 0, len, loc;
daddr_t blkno, *blkp;
struct fs *fs = VFSTOUFS(mp)->um_fs;
@@ -644,6 +795,19 @@ snapshot_expunge(struct mount *mp, struc
vprint("ffs_snapshot: busy vnode", xvp);
#endif
xp = VTOI(xvp);
+#if 0
+ if (xvp == logvp) {
+ mutex_enter(&mntvnode_lock);
+ continue;
+ }
+#endif
+ if (ump->um_flags & UFS_QUOTA2) {
+ if (xvp == ump->um_quotas[USRQUOTA] ||
+ xvp == ump->um_quotas[GRPQUOTA]) {
+ mutex_enter(&mntvnode_lock);
+ continue;
+ }
+ }
if (xvp != logvp) {
if (VOP_GETATTR(xvp, &vat, l->l_cred) == 0 &&
vat.va_nlink > 0) {
@@ -738,39 +902,18 @@ snapshot_expunge_snap(struct mount *mp,
int error = 0, i;
daddr_t numblks, *snaplist = NULL;
struct fs *fs = VFSTOUFS(mp)->um_fs;
- struct inode *ip = VTOI(vp), *xp;
+ struct inode *ip = VTOI(vp);
struct lwp *l = curlwp;
- struct snap_info *si = VFSTOUFS(mp)->um_snapinfo;
- TAILQ_FOREACH(xp, &si->si_snapshots, i_nextsnap) {
- if (xp != ip) {
- error = expunge(vp, xp, fs, snapacct, BLK_SNAP);
- if (error)
- break;
- }
- if (xp->i_nlink != 0)
- continue;
- error = UFS_WAPBL_BEGIN(mp);
- if (error)
- break;
- error = ffs_freefile_snap(copy_fs, vp, xp->i_number,
xp->i_mode);
- UFS_WAPBL_END(mp);
- if (error)
- break;
- }
- if (error)
- goto out;
/*
* Allocate space for the full list of preallocated snapshot blocks.
*/
snaplist = malloc(snaplistsize * sizeof(daddr_t), M_UFSMNT, M_WAITOK);
ip->i_snapblklist = &snaplist[1];
/*
- * Expunge the blocks used by the snapshots from the set of
- * blocks marked as used in the snapshot bitmaps. Also, collect
- * the list of allocated blocks in i_snapblklist.
+ * collect the list of allocated blocks in i_snapblklist.
*/
- error = expunge(vp, ip, copy_fs, mapacct, BLK_SNAP);
+ error = expunge(vp, ip, copy_fs, snapcount, BLK_SNAP);
if (error)
goto out;
if (snaplistsize < ip->i_snapblklist - snaplist)
@@ -899,15 +1042,15 @@ cgaccount(struct vnode *vp, int passno,
*redo += 1;
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
- return error;
- error = ffs_balloc(vp, lfragtosize(fs, cgtod(fs, cg)),
- fs->fs_bsize, curlwp->l_cred, 0, &nbp);
+ break;
+ error = bread(vp, fragstoblks(fs, cgtod(fs, cg)),
+ fs->fs_bsize, curlwp->l_cred, B_MODIFY, &nbp);
if (error) {
UFS_WAPBL_END(vp->v_mount);
break;
}
error = cgaccount1(cg, vp, nbp->b_data, passno);
- bawrite(nbp);
+ bdwrite(nbp);
UFS_WAPBL_END(vp->v_mount);
if (error)
break;
@@ -1023,63 +1166,66 @@ expunge(struct vnode *snapvp, struct ino
ns = UFS_FSNEEDSWAP(fs);
mp = snapvp->v_mount;
- error = UFS_WAPBL_BEGIN(mp);
- if (error)
- return error;
- /*
- * Prepare to expunge the inode. If its inode block has not
- * yet been copied, then allocate and fill the copy.
- */
- lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
- error = snapblkaddr(snapvp, lbn, &blkno);
- if (error)
- return error;
- if (blkno != 0) {
- error = bread(snapvp, lbn, fs->fs_bsize, l->l_cred,
- B_MODIFY, &bp);
- } else {
- error = ffs_balloc(snapvp, lblktosize(fs, (off_t)lbn),
- fs->fs_bsize, l->l_cred, 0, &bp);
- if (! error)
- error = rwfsblk(snapvp, B_READ, bp->b_data, lbn);
- }
- if (error) {
+ if (expungetype != BLK_SNAP) {
+ error = UFS_WAPBL_BEGIN(mp);
+ if (error)
+ return error;
+ /*
+ * Prepare to expunge the inode. If its inode block has not
+ * yet been copied, then allocate and fill the copy.
+ */
+ printf("expunge %d\n", (int)cancelip->i_number);
+ lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
+ error = snapblkaddr(snapvp, lbn, &blkno);
+ if (error)
+ return error;
+ if (blkno != 0) {
+ error = bread(snapvp, lbn, fs->fs_bsize, l->l_cred,
+ B_MODIFY, &bp);
+ } else {
+ error = ffs_balloc(snapvp, lblktosize(fs, (off_t)lbn),
+ fs->fs_bsize, l->l_cred, 0, &bp);
+ if (! error)
+ error = rwfsblk(snapvp, B_READ, bp->b_data,
lbn);
+ }
+ if (error) {
+ UFS_WAPBL_END(mp);
+ return error;
+ }
+ /*
+ * Set a snapshot inode to be a zero length file, regular files
+ * or unlinked snapshots to be completely unallocated.
+ */
+ if (fs->fs_magic == FS_UFS1_MAGIC) {
+ dip1 = (struct ufs1_dinode *)bp->b_data +
+ ino_to_fsbo(fs, cancelip->i_number);
+ if (cancelip->i_flags & SF_SNAPSHOT) {
+ KASSERT((ufs_rw32(dip1->di_flags, ns) &
+ SF_SNAPINVAL) != 0);
+ }
+ if (expungetype == BLK_NOCOPY || cancelip->i_nlink == 0)
+ dip1->di_mode = 0;
+ dip1->di_size = 0;
+ dip1->di_blocks = 0;
+ memset(&dip1->di_db[0], 0,
+ (NDADDR + NIADDR) * sizeof(int32_t));
+ } else {
+ dip2 = (struct ufs2_dinode *)bp->b_data +
+ ino_to_fsbo(fs, cancelip->i_number);
+ if (cancelip->i_flags & SF_SNAPSHOT) {
+ KASSERT((ufs_rw32(dip2->di_flags, ns) &
+ SF_SNAPINVAL) != 0);
+ }
+ if (expungetype == BLK_NOCOPY || cancelip->i_nlink == 0)
+ dip2->di_mode = 0;
+ dip2->di_size = 0;
+ dip2->di_blocks = 0;
+ memset(&dip2->di_db[0], 0,
+ (NDADDR + NIADDR) * sizeof(int64_t));
+ }
+ bdwrite(bp);
UFS_WAPBL_END(mp);
- return error;
- }
- /*
- * Set a snapshot inode to be a zero length file, regular files
- * or unlinked snapshots to be completely unallocated.
- */
- if (fs->fs_magic == FS_UFS1_MAGIC) {
- dip1 = (struct ufs1_dinode *)bp->b_data +
- ino_to_fsbo(fs, cancelip->i_number);
- if (cancelip->i_flags & SF_SNAPSHOT) {
- dip1->di_flags =
- ufs_rw32(ufs_rw32(dip1->di_flags, ns) |
- SF_SNAPINVAL, ns);
- }
- if (expungetype == BLK_NOCOPY || cancelip->i_nlink == 0)
- dip1->di_mode = 0;
- dip1->di_size = 0;
- dip1->di_blocks = 0;
- memset(&dip1->di_db[0], 0, (NDADDR + NIADDR) * sizeof(int32_t));
- } else {
- dip2 = (struct ufs2_dinode *)bp->b_data +
- ino_to_fsbo(fs, cancelip->i_number);
- if (cancelip->i_flags & SF_SNAPSHOT) {
- dip2->di_flags =
- ufs_rw32(ufs_rw32(dip2->di_flags, ns) |
- SF_SNAPINVAL, ns);
- }
- if (expungetype == BLK_NOCOPY || cancelip->i_nlink == 0)
- dip2->di_mode = 0;
- dip2->di_size = 0;
- dip2->di_blocks = 0;
- memset(&dip2->di_db[0], 0, (NDADDR + NIADDR) * sizeof(int64_t));
}
- bdwrite(bp);
- UFS_WAPBL_END(mp);
/*
* Now go through and expunge all the blocks in the file
* using the function requested.
@@ -1198,6 +1344,8 @@ fullacct(struct vnode *vp, void *bap, in
{
int error;
+ KASSERT(exptype == BLK_NOCOPY);
+
if ((error = snapacct(vp, bap, oldblkp, lastblkp, fs, lblkno, exptype)))
return (error);
return (mapacct(vp, bap, oldblkp, lastblkp, fs, lblkno, exptype));
@@ -1220,12 +1368,13 @@ snapacct(struct vnode *vp, void *bap, in
int error, n;
const int wbreak = blocks_in_journal(VFSTOUFS(mp)->um_fs)/8;
+ KASSERT(expungetype == BLK_NOCOPY);
error = UFS_WAPBL_BEGIN(mp);
if (error)
return error;
for ( n = 0; oldblkp < lastblkp; oldblkp++) {
blkno = idb_get(ip, bap, oldblkp);
- if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
+ if (blkno == 0 || blkno == BLK_NOCOPY)
continue;
lbn = fragstoblks(fs, blkno);
if (lbn < NDADDR) {
@@ -1284,6 +1433,7 @@ mapacct(struct vnode *vp, void *bap, int
int acctit, error, n;
const int wbreak = blocks_in_journal(VFSTOUFS(mp)->um_fs)/8;
+ KASSERT(expungetype == BLK_NOCOPY);
error = UFS_WAPBL_BEGIN(mp);
if (error)
return error;
@@ -1313,6 +1463,32 @@ mapacct(struct vnode *vp, void *bap, int
return (0);
}
+/* count the number of blocks in a snapshot */
+static int
+snapcount(struct vnode *vp, void *bap, int oldblkp, int lastblkp,
+ struct fs *fs, daddr_t lblkno, int expungetype)
+{
+ daddr_t blkno;
+ struct inode *ip;
+ ino_t inum;
+ int acctit, n;
+
+ KASSERT(expungetype == BLK_SNAP);
+ ip = VTOI(vp);
+ inum = ip->i_number;
+ if (lblkno == -1)
+ acctit = 0;
+ else
+ acctit = 1;
+ for ( n = 0; oldblkp < lastblkp; oldblkp++, lblkno++) {
+ blkno = idb_get(ip, bap, oldblkp);
+ if (blkno == 0 || blkno == BLK_NOCOPY)
+ continue;
+ if (acctit && expungetype == BLK_SNAP)
+ *ip->i_snapblklist++ = lblkno;
+ }
+ return (0);
+}
/*
* Number of blocks that fit into the journal or zero if not logging.
*/
@@ -1746,6 +1922,20 @@ ffs_snapshot_mount(struct mount *mp)
snaploc--;
continue;
}
+ if (is_active_snapshot(si, ip)) {
+ printf("ffs_snapshot_mount: duplicate snapshot "
+ "inode %d\n", fs->fs_snapinum[snaploc]);
+ vput(vp);
+ vp = NULL;
+ for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) {
+ if (fs->fs_snapinum[loc] == 0)
+ break;
+ fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc];
+ }
+ fs->fs_snapinum[loc - 1] = 0;
+ snaploc--;
+ continue;
+ }
/*
* Read the block hints list. Use an empty list on
@@ -1859,6 +2049,10 @@ ffs_copyonwrite(void *v, struct buf *bp,
uint32_t gen;
int lower, upper, mid, snapshot_locked = 0, error = 0;
+ /* skip COW if we're writing to a snapshot inode */
+ ip = VTOI(bp->b_vp);
+ if (ip != NULL && (ip->i_flags & SF_SNAPSHOT) != 0)
+ return 0;
/*
* Check for valid snapshots.
*/
Home |
Main Index |
Thread Index |
Old Index