NetBSD-Bugs archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: kern/58111: Tracking issue for potential ZFS data corruption
> >Number: 58111
> >Category: kern
> >Synopsis: Tracking issue for potential ZFS data corruption
Attached is a reproducer for the ZFS corruption bug. There's a
coreutils patch to make "cp" to use the FIOSEEKHOLE and FIOSEEKDATA
ioctls. Coreutils "cp" and "dd" should be installed on the target as
"/usr/bin/gcp" and "gdd" (in your path) and needs bash as /bin/bash.
Should probably use NetBSD's dd with msgfmt=quiet and use NetBSD's
/bin/sh with some tweaks, but I was lazy.
Running this reproducer on an 8 CPU qemu NetBSD amd64 VM with an 8GB ZFS
pool got 11 groups of failures over 8 hours on an unpatched host.
Cheers,
Simon.
--
# This is a shell archive. Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file". Note, it may
# create directories; files and directories will be owned by you and
# have default permissions.
#
# This archive contains:
#
# reproducer.sh
# repro10.sh
# coreutils-copy.c.diff
#
echo x - 'reproducer.sh'
sed 's/^X//' >'reproducer.sh' << 'END-of-reproducer.sh'
X#!/bin/bash
X#
X# Run this script multiple times in parallel inside your pool's mount
X# to reproduce https://github.com/openzfs/zfs/issues/15526. Like:
X#
X# ./reproducer.sh & ./reproducer.sh & ./reproducer.sh & /reproducer.sh & wait
X#
X
X#if [ $(cat /sys/module/zfs/parameters/zfs_bclone_enabled) != "1" ] ; then
X# echo "please set /sys/module/zfs/parameters/zfs_bclone_enabled = 1"
X# exit
X#fi
X
X#CP=/home/rich/coreutils-9.1/src/cp
X#CP=/home/rich/coreutils-9.3/src/cp
X#CP=/home/rich/coreutils/src/cp
XCP=/usr/bin/gcp
X
Xprefix="reproducer_${BASHPID}_"
Xgdd if=/dev/urandom of=${prefix}0 bs=1M count=1 status=none
X
X##### echo "writing files"
Xend=500
Xh=0
Xfor i in `seq 1 2 $end` ; do
X let "j=$i+1"
X ${CP} ${prefix}$h ${prefix}$i
X ${CP} ${prefix}$i ${prefix}$j
X let "h++"
Xdone
X
X##### echo "checking files"
Xfor i in `seq 1 $end` ; do
X diff ${prefix}0 ${prefix}$i
Xdone
END-of-reproducer.sh
echo x - 'repro10.sh'
sed 's/^X//' >'repro10.sh' << 'END-of-repro10.sh'
X#!/bin/sh
X
Xdate
X# echo cleaning up previous test
Xrm -f reproducer_* 2> /dev/null
X
Xscriptdir=$(dirname $0)
X
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
Xwait
END-of-repro10.sh
echo x - 'coreutils-copy.c.diff'
sed 's/^X//' >'coreutils-copy.c.diff' << 'END-of-coreutils-copy.c.diff'
X--- src/copy.c.orig 2023-08-29 21:39:27.000000000 +1000
X+++ src/copy.c 2024-04-05 02:48:41.652462664 +1100
X@@ -534,6 +534,7 @@
X return true;
X }
X
X+#define SEEK_HOLE // XXX netbsd
X #ifdef SEEK_HOLE
X /* Perform an efficient extent copy, if possible. This avoids
X the overhead of detecting holes in hole-introducing/preserving
X@@ -562,7 +563,10 @@
X
X while (0 <= ext_start)
X {
X- off_t ext_end = lseek (src_fd, ext_start, SEEK_HOLE);
X+ //XXX off_t ext_end = lseek (src_fd, ext_start, SEEK_HOLE);
X+ off_t ext_end = ext_start;
X+ if (ioctl(src_fd, FIOSEEKHOLE, &ext_end) < 0)
X+ ext_end = -1;
X if (ext_end < 0)
X {
X if (errno != ENXIO)
X@@ -641,7 +645,10 @@
X break;
X }
X
X- ext_start = lseek (src_fd, dest_pos, SEEK_DATA);
X+ //XXX ext_start = lseek (src_fd, dest_pos, SEEK_DATA);
X+ ext_start = dest_pos;
X+ if (ioctl(src_fd, FIOSEEKDATA, &ext_start) < 0)
X+ ext_start = -1;
X if (ext_start < 0 && errno != ENXIO)
X goto cannot_lseek;
X }
X@@ -1141,13 +1148,19 @@
X
X /* Only attempt SEEK_HOLE if this heuristic
X suggests the file is sparse. */
X+#if 0 // XXX skip this check!
X if (! (HAVE_STRUCT_STAT_ST_BLOCKS
X && S_ISREG (sb->st_mode)
X && ST_NBLOCKS (*sb) < sb->st_size / ST_NBLOCKSIZE))
X return PLAIN_SCANTYPE;
X+#endif // XXX
X
X #ifdef SEEK_HOLE
X- off_t ext_start = lseek (fd, 0, SEEK_DATA);
X+ //XXX off_t ext_start = lseek (fd, 0, SEEK_DATA);
X+ off_t ext_start = 0;
X+ if (ioctl(fd, FIOSEEKDATA, &ext_start) < 0)
X+ ext_start = -1;
X+
X if (0 <= ext_start || errno == ENXIO)
X {
X scan_inference->ext_start = ext_start;
END-of-coreutils-copy.c.diff
exit
Home |
Main Index |
Thread Index |
Old Index