NetBSD-Bugs archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: kern/60286 (zfs sparse zvol reports wrong volume size)
Em sex, 2026-05-22 às 16:47 +0000, riastradh%NetBSD.org@localhost escreveu:
Synopsis: zfs sparse zvol reports wrong volume size
Responsible-Changed-From-To: kern-bug-people->mlelstv
Responsible-Changed-By: riastradh%NetBSD.org@localhost
Responsible-Changed-When: Fri, 22 May 2026 16:47:19 +0000
Responsible-Changed-Why:
Can you take a look? Looks like you added the original dkw_size =
dg_secperunit assignment.
I have also being playing with zfs... and I found this patch very useful because it limits
ARC to take all avail memory....
Are there any zfs guru out there???
Claude-code point these changes... I have been using it for a long time and my zfs sometimes perform better than FreeBSD...
Index: usr/src/external/cddl/osnet/dist/uts/common/fs/zfs/arc.c
===================================================================
RCS file: /cvsroot/src/external/cddl/osnet/dist/uts/common/fs/zfs/arc.c,v
retrieving revision 1.22
diff -u -r1.22 arc.c
--- usr/src/external/cddl/osnet/dist/uts/common/fs/zfs/arc.c 3 Aug 2022 01:53:06 -0000 1.22
+++ usr/src/external/cddl/osnet/dist/uts/common/fs/zfs/arc.c 12 Sep 2023 16:44:48 -0000
@@ -380,8 +380,16 @@
/*
* These tunables are for performance analysis.
*/
+/*
+ * zfs_arc_max and zfs_arc_min control arc_c_max and arc_c_min at
+ * initialization and are then set to the computed values.
+ */
uint64_t zfs_arc_max;
uint64_t zfs_arc_min;
+/*
+ * zfs_arc_meta_{limit,min} control arc_meta_* at initialization but
+ * for unclear reasons are not set to the computed values.
+ */
uint64_t zfs_arc_meta_limit = 0;
uint64_t zfs_arc_meta_min = 0;
int zfs_arc_grow_retry = 0;
@@ -757,11 +765,32 @@
kstat_named_t arcstat_l2_write_buffer_list_iter;
kstat_named_t arcstat_l2_write_buffer_list_null_iter;
kstat_named_t arcstat_memory_throttle_count;
+ /*
+ * Number of bytes of metadata buffers in ARC.
+ */
kstat_named_t arcstat_meta_used;
+ /*
+ * Number of bytes to which that meta usage will be reduced
+ * during routine adjustment.
+ */
kstat_named_t arcstat_meta_limit;
+ /*
+ * Maximum size (bytes) of stored meta data for this
+ * instantiation of zfs. This is a measurement, not control,
+ * and probably should be named _hiwat instead.
+ */
kstat_named_t arcstat_meta_max;
+ /*
+ * Number of bytes of metadata in ARC below which data is
+ * preferentially evicted.
+ */
kstat_named_t arcstat_meta_min;
+ /* Apparently unused. */
kstat_named_t arcstat_sync_wait_for_async;
+ /*
+ * Count of reads that succeed because a prior predictive
+ * prefetch has already completed.
+ */
kstat_named_t arcstat_demand_hit_predictive_prefetch;
} arc_stats_t;
@@ -3582,6 +3611,13 @@
total_evicted += arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA);
+#if 0
+ if (total_evicted != 0) {
+ printf("arc_adjust_meta: evicted %" PRIu64 " with target %" PRId64 "\n",
+ total_evicted, target);
+ }
+#endif
+
return (total_evicted);
}
@@ -3753,7 +3789,7 @@
/*
* If we couldn't evict our target number of bytes from
- * data, we try to get the rest from data.
+ * data, we try to get the rest from metadata.
*/
target -= bytes;
@@ -3802,6 +3838,11 @@
total_evicted +=
arc_adjust_impl(arc_mfu_ghost, 0, target, ARC_BUFC_METADATA);
+ /* Log if likely to be interesting.
+ if (total_evicted >= 4 * 1024 * 1024) {
+ printf("arc_adjust evicted %" PRIu64 "\n", total_evicted);
+ }
+ */
return (total_evicted);
}
@@ -3831,6 +3872,8 @@
(void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_DATA, retry);
(void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_METADATA, retry);
+
+ //printf("arc_flush completed\n");
}
void
@@ -4158,6 +4201,9 @@
arc_no_grow = B_TRUE;
arc_warm = B_TRUE;
+ /*printf("arc_reclaim_thread: negative free_memory %" PRId64 "\n",
+ free_memory);
+ */
/*
* Wait at least zfs_grow_retry (default 60) seconds
* before considering growing.
@@ -6081,6 +6127,9 @@
/* Start out with 1/8 of all memory */
arc_c = kmem_size() / 8;
+#if 0
+ printf("ARCI 001 arc_c %" PRIu64 "\n", arc_c);
+#endif
#ifdef illumos
#ifdef _KERNEL
@@ -6094,12 +6143,32 @@
#endif /* illumos */
/* set min cache to 1/32 of all memory, or arc_abs_min, whichever is more */
arc_c_min = MAX(arc_c / 4, arc_abs_min);
+ printf("ARCI 002 arc_abs_min %" PRIu64 "\n", arc_abs_min);
+ printf("ARCI 002 arc_c_min %" PRIu64 "\n", arc_c_min);
+
+#if defined(__NetBSD__) && defined(_KERNEL)
+ /*
+ * Because NetBSD/zfs lacks an effective mechanism for memory
+ * pressure to reclaim from ARC, keep arc_c_max moderate
+ * rather than allowing ARC to consume nearly all memory.
+ * XXX Revisit when reclaim works.
+ */
+ arc_c_max = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8);
+ printf("ARCI 005 arc_c_max %" PRIu64 "\n", arc_c_max);
+#else
+ /* XXX
+ * This comment does not match the code at all!
+ * Plus, it's dangerous to assume that arc_c is still 1/8 of RAM at this point.
+ */
/* set max to 1/2 of all memory, or all but 1GB, whichever is more */
if (arc_c * 8 >= 1 << 30)
arc_c_max = (arc_c * 8) - (1 << 30);
else
arc_c_max = arc_c_min;
+ printf("ARCI 003 arc_c_max %" PRIu64 "\n", arc_c_max);
arc_c_max = MAX(arc_c * 5, arc_c_max);
+ printf("ARCI 004 arc_c_max %" PRIu64 "\n", arc_c_max);
+#endif
/*
* In userland, there's only the memory pressure that we artificially
@@ -6124,10 +6193,24 @@
arc_c_min = zfs_arc_min;
#endif
+ /*
+ * Start out with the target usage for ARC as high as we are
+ * willing to go. (This likely relies on some memory pressure
+ * mechanism to reduce it when freeing is requested.
+ */
arc_c = arc_c_max;
+
+ /*
+ * Compute the MRU's portion of target (and implicitly MFU's).
+ */
arc_p = (arc_c >> 1);
arc_size = 0;
+ printf("ARCI 010 arc_c_min %" PRIu64 "\n", arc_c_min);
+ printf("ARCI 010 arc_p %" PRIu64 "\n", arc_p);
+ printf("ARCI 010 arc_c %" PRIu64 "\n", arc_c);
+ printf("ARCI 010 arc_c_max %" PRIu64 "\n", arc_c_max);
+
/* limit meta-data to 1/4 of the arc capacity */
arc_meta_limit = arc_c_max / 4;
@@ -6135,9 +6218,23 @@
if (zfs_arc_meta_limit > 0 && zfs_arc_meta_limit <= arc_c_max)
arc_meta_limit = zfs_arc_meta_limit;
+ /*printf("ARCI 011 arc_meta_limit %" PRIu64 "\n", arc_meta_limit);*/
+
+#if defined(__NetBSD__)
+ /*
+ * XXX Explain why this is reasonable; it appears to protect
+ * the cache from going below half the max allowed amount of
+ * metadata, and that is not obviously sensible.
+ */
+#else
if (arc_c_min < arc_meta_limit / 2 && zfs_arc_min == 0)
arc_c_min = arc_meta_limit / 2;
+#endif
+ /*
+ * Respect tunable, and default meta minimum to half the
+ * overall minimum.
+ */
if (zfs_arc_meta_min > 0) {
arc_meta_min = zfs_arc_meta_min;
} else {
@@ -6163,6 +6260,7 @@
zfs_arc_num_sublists_per_state = MAX(max_ncpus, 1);
/* if kmem_flags are set, lets try to use less memory */
+ /* XXX Should this change arc_c_max? Explain yes/no. */
if (kmem_debugging())
arc_c = arc_c / 2;
if (arc_c < arc_c_min)
@@ -6170,6 +6268,7 @@
zfs_arc_min = arc_c_min;
zfs_arc_max = arc_c_max;
+ /* Why are zfs_arc_meta_limit and zfs_arc_meta_min not written? */
arc_state_init();
buf_init();
@@ -6219,6 +6318,12 @@
zfs_dirty_data_max_max);
}
+#if defined(__NetBSD__)
+ /*
+ * XXX Disable prefetch if RAM is low.
+ */
+#endif
+
#ifdef _KERNEL
#ifdef __FreeBSD__
if (TUNABLE_INT_FETCH("vfs.zfs.prefetch_disable", &zfs_prefetch_disable))
Home |
Main Index |
Thread Index |
Old Index