NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: kern/58111: Tracking issue for potential ZFS data corruption



> >Number:         58111
> >Category:       kern
> >Synopsis:       Tracking issue for potential ZFS data corruption

Attached is a reproducer for the ZFS corruption bug.  There's a
coreutils patch to make "cp" to use the FIOSEEKHOLE and FIOSEEKDATA
ioctls.  Coreutils "cp" and "dd" should be installed on the target as
"/usr/bin/gcp" and "gdd" (in your path) and needs bash as /bin/bash.
Should probably use NetBSD's dd with msgfmt=quiet and use NetBSD's
/bin/sh with some tweaks, but I was lazy.

Running this reproducer on an 8 CPU qemu NetBSD amd64 VM with an 8GB ZFS
pool got 11 groups of failures over 8 hours on an unpatched host.

Cheers,
Simon.
--
# This is a shell archive.  Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file".  Note, it may
# create directories; files and directories will be owned by you and
# have default permissions.
#
# This archive contains:
#
#	reproducer.sh
#	repro10.sh
#	coreutils-copy.c.diff
#
echo x - 'reproducer.sh'
sed 's/^X//' >'reproducer.sh' << 'END-of-reproducer.sh'
X#!/bin/bash
X#
X# Run this script multiple times in parallel inside your pool's mount
X# to reproduce https://github.com/openzfs/zfs/issues/15526.  Like:
X#
X# ./reproducer.sh & ./reproducer.sh & ./reproducer.sh & /reproducer.sh & wait
X#
X
X#if [ $(cat /sys/module/zfs/parameters/zfs_bclone_enabled) != "1" ] ; then
X#	echo "please set /sys/module/zfs/parameters/zfs_bclone_enabled = 1"
X#	exit
X#fi
X
X#CP=/home/rich/coreutils-9.1/src/cp
X#CP=/home/rich/coreutils-9.3/src/cp
X#CP=/home/rich/coreutils/src/cp
XCP=/usr/bin/gcp
X
Xprefix="reproducer_${BASHPID}_"
Xgdd if=/dev/urandom of=${prefix}0 bs=1M count=1 status=none
X
X##### echo "writing files"
Xend=500
Xh=0
Xfor i in `seq 1 2 $end` ; do
X	let "j=$i+1"
X	${CP} ${prefix}$h ${prefix}$i
X	${CP} ${prefix}$i ${prefix}$j
X	let "h++"
Xdone
X
X##### echo "checking files"
Xfor i in `seq 1 $end` ; do
X	diff ${prefix}0 ${prefix}$i
Xdone
END-of-reproducer.sh
echo x - 'repro10.sh'
sed 's/^X//' >'repro10.sh' << 'END-of-repro10.sh'
X#!/bin/sh
X
Xdate
X# echo cleaning up previous test
Xrm -f reproducer_* 2> /dev/null
X
Xscriptdir=$(dirname $0)
X
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
X${scriptdir}/reproducer.sh &
Xwait
END-of-repro10.sh
echo x - 'coreutils-copy.c.diff'
sed 's/^X//' >'coreutils-copy.c.diff' << 'END-of-coreutils-copy.c.diff'
X--- src/copy.c.orig	2023-08-29 21:39:27.000000000 +1000
X+++ src/copy.c	2024-04-05 02:48:41.652462664 +1100
X@@ -534,6 +534,7 @@
X   return true;
X }
X 
X+#define SEEK_HOLE	// XXX netbsd
X #ifdef SEEK_HOLE
X /* Perform an efficient extent copy, if possible.  This avoids
X    the overhead of detecting holes in hole-introducing/preserving
X@@ -562,7 +563,10 @@
X 
X   while (0 <= ext_start)
X     {
X-      off_t ext_end = lseek (src_fd, ext_start, SEEK_HOLE);
X+      //XXX off_t ext_end = lseek (src_fd, ext_start, SEEK_HOLE);
X+      off_t ext_end = ext_start;
X+      if (ioctl(src_fd, FIOSEEKHOLE, &ext_end) < 0)
X+	ext_end = -1;
X       if (ext_end < 0)
X         {
X           if (errno != ENXIO)
X@@ -641,7 +645,10 @@
X           break;
X         }
X 
X-      ext_start = lseek (src_fd, dest_pos, SEEK_DATA);
X+      //XXX ext_start = lseek (src_fd, dest_pos, SEEK_DATA);
X+      ext_start = dest_pos;
X+      if (ioctl(src_fd, FIOSEEKDATA, &ext_start) < 0)
X+	ext_start = -1;
X       if (ext_start < 0 && errno != ENXIO)
X         goto cannot_lseek;
X     }
X@@ -1141,13 +1148,19 @@
X 
X   /* Only attempt SEEK_HOLE if this heuristic
X      suggests the file is sparse.  */
X+#if 0	// XXX skip this check!
X   if (! (HAVE_STRUCT_STAT_ST_BLOCKS
X          && S_ISREG (sb->st_mode)
X          && ST_NBLOCKS (*sb) < sb->st_size / ST_NBLOCKSIZE))
X     return PLAIN_SCANTYPE;
X+#endif	// XXX
X 
X #ifdef SEEK_HOLE
X-  off_t ext_start = lseek (fd, 0, SEEK_DATA);
X+  //XXX off_t ext_start = lseek (fd, 0, SEEK_DATA);
X+  off_t ext_start = 0;
X+  if (ioctl(fd, FIOSEEKDATA, &ext_start) < 0)
X+    ext_start = -1;
X+
X   if (0 <= ext_start || errno == ENXIO)
X     {
X       scan_inference->ext_start = ext_start;
END-of-coreutils-copy.c.diff
exit



Home | Main Index | Thread Index | Old Index