NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: kern/60286 (zfs sparse zvol reports wrong volume size)



Em sex, 2026-05-22 às 16:47 +0000, riastradh%NetBSD.org@localhost escreveu:
Synopsis: zfs sparse zvol reports wrong volume size

Responsible-Changed-From-To: kern-bug-people->mlelstv
Responsible-Changed-By: riastradh%NetBSD.org@localhost
Responsible-Changed-When: Fri, 22 May 2026 16:47:19 +0000
Responsible-Changed-Why:
Can you take a look?  Looks like you added the original dkw_size =
dg_secperunit assignment.



I have also being playing with zfs... and I found this patch very useful because it limits
ARC to take all avail memory....
Are there any zfs guru out there???
Claude-code point these changes...  I have been using it for a long time and my zfs sometimes perform better than FreeBSD...



Index: usr/src/external/cddl/osnet/dist/uts/common/fs/zfs/arc.c
===================================================================
RCS file: /cvsroot/src/external/cddl/osnet/dist/uts/common/fs/zfs/arc.c,v
retrieving revision 1.22
diff -u -r1.22 arc.c
--- usr/src/external/cddl/osnet/dist/uts/common/fs/zfs/arc.c	3 Aug 2022 01:53:06 -0000	1.22
+++ usr/src/external/cddl/osnet/dist/uts/common/fs/zfs/arc.c	12 Sep 2023 16:44:48 -0000
@@ -380,8 +380,16 @@
 /*
  * These tunables are for performance analysis.
  */
+/*
+ * zfs_arc_max and zfs_arc_min control arc_c_max and arc_c_min at
+ * initialization and are then set to the computed values.
+ */
 uint64_t zfs_arc_max;
 uint64_t zfs_arc_min;
+/*
+ * zfs_arc_meta_{limit,min} control arc_meta_* at initialization but
+ * for unclear reasons are not set to the computed values.
+ */
 uint64_t zfs_arc_meta_limit = 0;
 uint64_t zfs_arc_meta_min = 0;
 int zfs_arc_grow_retry = 0;
@@ -757,11 +765,32 @@
 	kstat_named_t arcstat_l2_write_buffer_list_iter;
 	kstat_named_t arcstat_l2_write_buffer_list_null_iter;
 	kstat_named_t arcstat_memory_throttle_count;
+	/*
+	 * Number of bytes of metadata buffers in ARC.
+	 */
 	kstat_named_t arcstat_meta_used;
+	/*
+	 * Number of bytes to which that meta usage will be reduced
+	 * during routine adjustment.
+	 */
 	kstat_named_t arcstat_meta_limit;
+	/*
+	 * Maximum size (bytes) of stored meta data for this
+	 * instantiation of zfs.  This is a measurement, not control,
+	 * and probably should be named _hiwat instead.
+	 */
 	kstat_named_t arcstat_meta_max;
+	/*
+	 * Number of bytes of metadata in ARC below which data is
+	 * preferentially evicted.
+	 */
 	kstat_named_t arcstat_meta_min;
+	/* Apparently unused. */
 	kstat_named_t arcstat_sync_wait_for_async;
+	/*
+	 * Count of reads that succeed because a prior predictive
+	 * prefetch has already completed.
+	 */
 	kstat_named_t arcstat_demand_hit_predictive_prefetch;
 } arc_stats_t;
 
@@ -3582,6 +3611,13 @@
 
 	total_evicted += arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA);
 
+#if 0
+	if (total_evicted != 0) {
+		printf("arc_adjust_meta: evicted %" PRIu64 " with target %" PRId64 "\n",
+		       total_evicted, target);
+	}
+#endif
+
 	return (total_evicted);
 }
 
@@ -3753,7 +3789,7 @@
 
 		/*
 		 * If we couldn't evict our target number of bytes from
-		 * data, we try to get the rest from data.
+		 * data, we try to get the rest from metadata.
 		 */
 		target -= bytes;
 
@@ -3802,6 +3838,11 @@
 	total_evicted +=
 	    arc_adjust_impl(arc_mfu_ghost, 0, target, ARC_BUFC_METADATA);
 
+	/* Log if likely to be interesting.
+	if (total_evicted >= 4 * 1024 * 1024) {
+		printf("arc_adjust evicted %" PRIu64 "\n", total_evicted);
+	}
+	*/
 	return (total_evicted);
 }
 
@@ -3831,6 +3872,8 @@
 
 	(void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_DATA, retry);
 	(void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_METADATA, retry);
+
+	//printf("arc_flush completed\n");
 }
 
 void
@@ -4158,6 +4201,9 @@
 			arc_no_grow = B_TRUE;
 			arc_warm = B_TRUE;
 
+			/*printf("arc_reclaim_thread: negative free_memory %" PRId64 "\n",
+			       free_memory);
+			*/
 			/*
 			 * Wait at least zfs_grow_retry (default 60) seconds
 			 * before considering growing.
@@ -6081,6 +6127,9 @@
 
 	/* Start out with 1/8 of all memory */
 	arc_c = kmem_size() / 8;
+#if 0
+	printf("ARCI 001 arc_c %" PRIu64 "\n", arc_c);
+#endif
 
 #ifdef illumos
 #ifdef _KERNEL
@@ -6094,12 +6143,32 @@
 #endif	/* illumos */
 	/* set min cache to 1/32 of all memory, or arc_abs_min, whichever is more */
 	arc_c_min = MAX(arc_c / 4, arc_abs_min);
+	printf("ARCI 002 arc_abs_min %" PRIu64 "\n", arc_abs_min);
+	printf("ARCI 002 arc_c_min %" PRIu64 "\n", arc_c_min);
+
+#if defined(__NetBSD__) && defined(_KERNEL)
+	/*
+	 * Because NetBSD/zfs lacks an effective mechanism for memory
+	 * pressure to reclaim from ARC, keep arc_c_max moderate
+	 * rather than allowing ARC to consume nearly all memory.
+	 * XXX Revisit when reclaim works.
+	 */
+	arc_c_max = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8);
+	printf("ARCI 005 arc_c_max %" PRIu64 "\n", arc_c_max);
+#else
+	/* XXX
+	 * This comment does not match the code at all!
+	 * Plus, it's dangerous to assume that arc_c is still 1/8 of RAM at this point.
+	 */
 	/* set max to 1/2 of all memory, or all but 1GB, whichever is more */
 	if (arc_c * 8 >= 1 << 30)
 		arc_c_max = (arc_c * 8) - (1 << 30);
 	else
 		arc_c_max = arc_c_min;
+	printf("ARCI 003 arc_c_max %" PRIu64 "\n", arc_c_max);
 	arc_c_max = MAX(arc_c * 5, arc_c_max);
+	printf("ARCI 004 arc_c_max %" PRIu64 "\n", arc_c_max);
+#endif
 
 	/*
 	 * In userland, there's only the memory pressure that we artificially
@@ -6124,10 +6193,24 @@
 		arc_c_min = zfs_arc_min;
 #endif
 
+	/*
+	 * Start out with the target usage for ARC as high as we are
+	 * willing to go.  (This likely relies on some memory pressure
+	 * mechanism to reduce it when freeing is requested.
+	 */
 	arc_c = arc_c_max;
+
+	/*
+	 * Compute the MRU's portion of target (and implicitly MFU's).
+	 */
 	arc_p = (arc_c >> 1);
 	arc_size = 0;
 
+	printf("ARCI 010 arc_c_min %" PRIu64 "\n", arc_c_min);
+	printf("ARCI 010 arc_p     %" PRIu64 "\n", arc_p);
+	printf("ARCI 010 arc_c     %" PRIu64 "\n", arc_c);
+	printf("ARCI 010 arc_c_max %" PRIu64 "\n", arc_c_max);
+
 	/* limit meta-data to 1/4 of the arc capacity */
 	arc_meta_limit = arc_c_max / 4;
 
@@ -6135,9 +6218,23 @@
 	if (zfs_arc_meta_limit > 0 && zfs_arc_meta_limit <= arc_c_max)
 		arc_meta_limit = zfs_arc_meta_limit;
 
+	/*printf("ARCI 011 arc_meta_limit %" PRIu64 "\n", arc_meta_limit);*/
+
+#if defined(__NetBSD__)
+	/*
+	 * XXX Explain why this is reasonable; it appears to protect
+	 * the cache from going below half the max allowed amount of
+	 * metadata, and that is not obviously sensible.
+	 */
+#else
 	if (arc_c_min < arc_meta_limit / 2 && zfs_arc_min == 0)
 		arc_c_min = arc_meta_limit / 2;
+#endif
 
+	/*
+	 * Respect tunable, and default meta minimum to half the
+	 * overall minimum.
+	 */
 	if (zfs_arc_meta_min > 0) {
 		arc_meta_min = zfs_arc_meta_min;
 	} else {
@@ -6163,6 +6260,7 @@
 		zfs_arc_num_sublists_per_state = MAX(max_ncpus, 1);
 
 	/* if kmem_flags are set, lets try to use less memory */
+	/* XXX Should this change arc_c_max?  Explain yes/no. */
 	if (kmem_debugging())
 		arc_c = arc_c / 2;
 	if (arc_c < arc_c_min)
@@ -6170,6 +6268,7 @@
 
 	zfs_arc_min = arc_c_min;
 	zfs_arc_max = arc_c_max;
+	/* Why are zfs_arc_meta_limit and zfs_arc_meta_min not written? */
 
 	arc_state_init();
 	buf_init();
@@ -6219,6 +6318,12 @@
 		    zfs_dirty_data_max_max);
 	}
 
+#if defined(__NetBSD__)
+	/*
+	 * XXX Disable prefetch if RAM is low.
+	 */
+#endif
+
 #ifdef _KERNEL
 #ifdef __FreeBSD__
 	if (TUNABLE_INT_FETCH("vfs.zfs.prefetch_disable", &zfs_prefetch_disable))


Home | Main Index | Thread Index | Old Index