tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: ZFS tunable parameters
sotiris%lamprinidis.com@localhost (Sotiris Lamprinidis) writes:
>I looked into exposing ZFS tunables through sysctl and it seems
>straightforward. Despite being under "dist" I am not sure if this is true
>upstream, so the patch is directly on arc.c (and I think it'd be tricky to
>implement otherwise).
> vfs.zfs.arc_min = 392903680
> vfs.zfs.arc_max = 3143229440
> vfs.zfs.arc_average_blocksize = 8192
> vfs.zfs.arc_shrink_shift = 7
> vfs.zfs.compressed_arc_enable = 1
> vfs.zfs.arc_meta_limit = 785807360
> vfs.zfs.arc_meta_min = 0
> vfs.zfs.arc_free_target = 682
Yes. I'm using a similar patch but haven't found what knobs were
actually useful. But I have some other adjustments to help
memory usage.
Index: arc.c
===================================================================
RCS file: /cvsroot/src/external/cddl/osnet/dist/uts/common/fs/zfs/arc.c,v
retrieving revision 1.23
diff -p -u -r1.23 arc.c
--- arc.c 3 May 2026 22:41:40 -0000 1.23
+++ arc.c 24 Jun 2026 16:25:58 -0000
@@ -275,7 +275,7 @@ int arc_procfd;
#endif
#endif /* illumos */
-#ifdef __NetBSD__
+#if defined(__NetBSD__) && defined(_KERNEL)
#include <uvm/uvm.h>
#ifndef btop
#define btop(x) ((x) / PAGE_SIZE)
@@ -288,21 +288,30 @@ int arc_procfd;
#define freemem uvm_availmem(false)
#define minfree uvmexp.freemin
#define desfree uvmexp.freetarg
-#define zfs_arc_free_target desfree
+//#define zfs_arc_free_target desfree
#define lotsfree (desfree * 2)
+#define maxfree uvmexp.npages
#define availrmem desfree
#define swapfs_minfree 0
#define swapfs_reserve 0
#undef curproc
#define curproc curlwp
+u_int zfs_arc_free_target;
+static void
+arc_free_target_init(void)
+{
+
+ zfs_arc_free_target = desfree;
+}
+
static void *zio_arena;
#include <sys/callback.h>
/* Structures used for memory and kva space reclaim. */
static struct callback_entry arc_kva_reclaim_entry;
-#endif /* __NetBSD__ */
+#endif /* __NetBSD__ && _KERNEL */
static kmutex_t arc_reclaim_lock;
static kcondvar_t arc_reclaim_thread_cv;
@@ -467,6 +476,68 @@ SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_meta
"ARC metadata limit");
#endif
+#if defined(__NetBSD__) && defined(_KERNEL)
+
+static int sysctl_vfs_zfs_arc_meta_limit(SYSCTLFN_PROTO);
+static int sysctl_vfs_zfs_arc_max(SYSCTLFN_PROTO);
+static int sysctl_vfs_zfs_arc_min(SYSCTLFN_PROTO);
+static int sysctl_vfs_zfs_arc_free_target(SYSCTLFN_PROTO);
+
+SYSCTL_SETUP(sysctl_vfs_zfs_arc_setup, "sysctl vfs.zfs_arc subtree setup")
+{
+ const struct sysctlnode *rnode = NULL;
+
+ /* vfs.zfs is created in zfs_ioctl.c */
+ sysctl_createv(clog, 0, NULL, &rnode,
+ CTLFLAG_PERMANENT,
+ CTLTYPE_NODE, "zfs_arc",
+ SYSCTL_DESCR("zfs"),
+ NULL, 0, NULL, 0,
+ CTL_VFS, CTL_CREATE, CTL_EOL);
+
+ sysctl_createv(clog, 0, &rnode, NULL,
+ CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_QUAD,
+ "meta_limit", SYSCTL_DESCR("ARC metadata limit"),
+ sysctl_vfs_zfs_arc_meta_limit, 0,
+ &zfs_arc_meta_limit, sizeof(zfs_arc_meta_limit),
+ CTL_CREATE, CTL_EOL);
+ sysctl_createv(clog, 0, &rnode, NULL,
+ CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_QUAD,
+ "meta_min", SYSCTL_DESCR("ARC metadata minimum"),
+ NULL, 0, &zfs_arc_meta_min, sizeof(zfs_arc_meta_min),
+ CTL_CREATE, CTL_EOL);
+ sysctl_createv(clog, 0, &rnode, NULL,
+ CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
+ "shrink_shift", SYSCTL_DESCR("ARC shrink shift"),
+ NULL, 0, &zfs_arc_shrink_shift, sizeof(zfs_arc_shrink_shift),
+ CTL_CREATE, CTL_EOL);
+ sysctl_createv(clog, 0, &rnode, NULL,
+ CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_QUAD,
+ "max", SYSCTL_DESCR("Maximum ARC size"),
+ sysctl_vfs_zfs_arc_max, 0,
+ &zfs_arc_max, sizeof(zfs_arc_max),
+ CTL_CREATE, CTL_EOL);
+ sysctl_createv(clog, 0, &rnode, NULL,
+ CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_QUAD,
+ "min", SYSCTL_DESCR("Maximum ARC size"),
+ sysctl_vfs_zfs_arc_min, 0,
+ &zfs_arc_min, sizeof(zfs_arc_min),
+ CTL_CREATE, CTL_EOL);
+ sysctl_createv(clog, 0, &rnode, NULL,
+ CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
+ "compressed", SYSCTL_DESCR("ARC compression"),
+ NULL, 0, &zfs_compressed_arc_enabled, sizeof(zfs_compressed_arc_enabled),
+ CTL_CREATE, CTL_EOL);
+ sysctl_createv(clog, 0, &rnode, NULL,
+ CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
+ "free_target", SYSCTL_DESCR("Desired number of free pages below which ARC triggers reclaim"),
+ sysctl_vfs_zfs_arc_free_target, 0,
+ &zfs_arc_free_target, sizeof(zfs_arc_free_target),
+ CTL_CREATE, CTL_EOL);
+}
+
+#endif
+
/*
* Note that buffers can be in one of 6 states:
* ARC_anon - anonymous (discussed below)
@@ -1168,6 +1239,151 @@ sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_AR
}
#endif
+#if defined(__NetBSD__) && defined(_KERNEL)
+static int
+sysctl_vfs_zfs_arc_meta_limit(SYSCTLFN_ARGS)
+{
+ struct sysctlnode node;
+ uint64_t val;
+ int error;
+
+ val = *(uint64_t *)rnode->sysctl_data;
+
+ node = *rnode;
+ node.sysctl_data = &val;
+
+ error = sysctl_lookup(SYSCTLFN_CALL(&node));
+ if (error != 0 || newp == NULL)
+ return (error);
+
+ if (val <= 0 || val > arc_c_max)
+ return (EINVAL);
+
+ arc_meta_limit = val;
+
+ *(uint64_t *)rnode->sysctl_data = val;
+ return (0);
+}
+
+static int
+sysctl_vfs_zfs_arc_max(SYSCTLFN_ARGS)
+{
+ struct sysctlnode node;
+ uint64_t val;
+ int error;
+
+ val = *(uint64_t *)rnode->sysctl_data;
+
+ node = *rnode;
+ node.sysctl_data = &val;
+
+ error = sysctl_lookup(SYSCTLFN_CALL(&node));
+ if (error != 0 || newp == NULL)
+ return (error);
+
+#if 0
+ if (zfs_arc_max == 0) {
+ /* Loader tunable so blindly set */
+ zfs_arc_max = val;
+ return (0);
+ }
+#endif
+
+ if (val < arc_abs_min || val > kmem_size())
+ return (EINVAL);
+ if (val < arc_c_min)
+ return (EINVAL);
+ if (zfs_arc_meta_limit > 0 && val < zfs_arc_meta_limit)
+ return (EINVAL);
+
+ arc_c_max = val;
+
+ arc_c = arc_c_max;
+ arc_p = (arc_c >> 1);
+
+ if (zfs_arc_meta_limit == 0) {
+ /* limit meta-data to 1/4 of the arc capacity */
+ arc_meta_limit = arc_c_max / 4;
+ }
+
+#if 0
+ /* if kmem_flags are set, lets try to use less memory */
+ if (kmem_debugging())
+ arc_c = arc_c / 2;
+#endif
+
+ *(uint64_t *)rnode->sysctl_data = arc_c;
+ return (0);
+
+ return (0);
+}
+
+static int
+sysctl_vfs_zfs_arc_min(SYSCTLFN_ARGS)
+{
+ struct sysctlnode node;
+ uint64_t val;
+ int error;
+
+ val = *(uint64_t *)rnode->sysctl_data;
+
+ node = *rnode;
+ node.sysctl_data = &val;
+
+ error = sysctl_lookup(SYSCTLFN_CALL(&node));
+ if (error != 0 || newp == NULL)
+ return (error);
+
+#if 0
+ if (zfs_arc_min == 0) {
+ /* Loader tunable so blindly set */
+ zfs_arc_min = val;
+ return (0);
+ }
+#endif
+
+ if (val < arc_abs_min || val > arc_c_max)
+ return (EINVAL);
+
+ arc_c_min = val;
+
+ if (zfs_arc_meta_min == 0)
+ arc_meta_min = arc_c_min / 2;
+
+ if (arc_c < arc_c_min)
+ arc_c = arc_c_min;
+
+ *(uint64_t *)rnode->sysctl_data = arc_c_min;
+ return (0);
+
+}
+
+static int
+sysctl_vfs_zfs_arc_free_target(SYSCTLFN_ARGS)
+{
+ struct sysctlnode node;
+ u_int val;
+ int error;
+
+ val = *(u_int *)rnode->sysctl_data;
+
+ node = *rnode;
+ node.sysctl_data = &val;
+
+ error = sysctl_lookup(SYSCTLFN_CALL(&node));
+ if (error != 0 || newp == NULL)
+ return (error);
+
+ if (val < minfree)
+ return (EINVAL);
+ if (val > maxfree)
+ return (EINVAL);
+
+ *(u_int *)rnode->sysctl_data = val;
+ return (0);
+}
+#endif
+
#define GHOST_STATE(state) \
((state) == arc_mru_ghost || (state) == arc_mfu_ghost || \
(state) == arc_l2c_only)
@@ -3902,6 +4118,34 @@ arc_available_memory(void)
free_memory_reason_t r = FMR_UNKNOWN;
#ifdef _KERNEL
+#ifdef __NetBSD__
+ vmem_size_t totalpercent;
+ vmem_size_t free;
+
+ /*
+ * PR kern/57558:
+ *
+ * do not let pdaemon get stuck in the uvm_km_va_starved_p()
+ * state. it starts a tight loop when in uvm_km_va_starved state
+ * and ZFS is not freeing any pool pages as it started freeing
+ * only when falling below uvmexp.freetarg.
+ * now we start freeing when falling below 10% kva free or
+ * uvmexp.freetarg.
+ * the 10% magic is shamelessly copied from uvm_km_va_starved_p()
+ * The interface to the pagedaemon has room for improvement.
+ */
+
+ totalpercent = vmem_size(heap_arena, VMEM_ALLOC|VMEM_FREE) / 10;
+ free = vmem_size(heap_arena, VMEM_FREE);
+
+ if (free < totalpercent) {
+ needfree = btop(totalpercent - free);
+ }
+ if (free < uvmexp.freetarg && needfree < uvmexp.freetarg) {
+ needfree = uvmexp.freetarg;
+ }
+#endif
+
if (needfree > 0) {
n = PAGESIZE * (-needfree);
if (n < lowest) {
@@ -5924,8 +6168,8 @@ arc_state_multilist_index_func(multilist
multilist_get_num_sublists(ml));
}
-#ifdef _KERNEL
#ifdef __FreeBSD__
+#ifdef _KERNEL
static eventhandler_tag arc_event_lowmem = NULL;
#endif
@@ -6074,6 +6318,9 @@ arc_init(void)
mutex_init(&arc_dnlc_evicts_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&arc_dnlc_evicts_cv, NULL, CV_DEFAULT, NULL);
#endif
+#if defined(__NetBSD__) && defined(_KERNEL)
+ arc_free_target_init();
+#endif
/* Convert seconds to clock ticks */
arc_min_prefetch_lifespan = 1 * hz;
@@ -6093,11 +6340,17 @@ arc_init(void)
#endif /* illumos */
/* set min cache to 1/32 of all memory, or arc_abs_min, whichever is more */
arc_c_min = MAX(arc_c / 4, arc_abs_min);
+#if 0
/* set max to 1/2 of all memory, or all but 1GB, whichever is more */
if (arc_c * 8 >= 1 << 30)
arc_c_max = (arc_c * 8) - (1 << 30);
else
arc_c_max = arc_c_min;
+#else
+ /* set max to 1/2 of all memory, but at least the minimum */
+ arc_c_max = MAX(arc_c_min, arc_c * 4);
+#endif
+ /* increase max to at least 5 times the default size */
arc_c_max = MAX(arc_c * 5, arc_c_max);
/*
Home |
Main Index |
Thread Index |
Old Index