tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: Likely lock botch in NPF firewall



Try the attached patch?
From 89b144a95e698df4a844157c55b70cef488548ba Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Sun, 22 Jan 2023 15:56:55 +0000
Subject: [PATCH] npf(9): Don't hold table lock across copyout.

Instead, block npf_table_gc during copyout of anything that might be
GC'd, so it can't be freed during copyout, but then drop the lock
across copyout.

Make LPM-type tables use the same GC list as thmap-type (`ipset')
tables, since copyout must block GC the same way.

Note that copyout may block indefinitely, so npf_table_gc may also
block indefinitely as a consequence.  If npf_table_gc is required to
return in bounded time, it can't wait for all copyouts to finish, so
if copyouts are in progress we need to find some way to retry the GC
later -- maybe just issue npf_table_gc when npf_table_list returns.

PR kern/57136
PR kern/57181
---
 sys/net/npf/npf_tableset.c | 68 +++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 26 deletions(-)

diff --git a/sys/net/npf/npf_tableset.c b/sys/net/npf/npf_tableset.c
index 51d6268143ce..d67f3dee8fef 100644
--- a/sys/net/npf/npf_tableset.c
+++ b/sys/net/npf/npf_tableset.c
@@ -80,10 +80,7 @@ struct npf_table {
 	 * There are separate trees for IPv4 and IPv6.
 	 */
 	union {
-		struct {
-			thmap_t *	t_map;
-			LIST_HEAD(, npf_tblent) t_gc;
-		};
+		thmap_t *		t_map;
 		lpm_t *			t_lpm;
 		struct {
 			void *		t_blob;
@@ -97,6 +94,7 @@ struct npf_table {
 		};
 	} /* C11 */;
 	LIST_HEAD(, npf_tblent)		t_list;
+	LIST_HEAD(, npf_tblent)		t_gc;
 	unsigned			t_nitems;
 
 	/*
@@ -107,6 +105,9 @@ struct npf_table {
 	unsigned		t_id;
 	kmutex_t		t_lock;
 
+	uint64_t		t_gclock;
+	kcondvar_t		t_gclockcv;
+
 	/* Reference count and table name. */
 	unsigned		t_refcnt;
 	char			t_name[NPF_TABLE_MAXNAMELEN];
@@ -413,6 +414,8 @@ npf_table_create(const char *name, u_int tid, int type,
 	mutex_init(&t->t_lock, MUTEX_DEFAULT, IPL_NET);
 	t->t_type = type;
 	t->t_id = tid;
+	t->t_gclock = 0;
+	cv_init(&t->t_gclockcv, "npftabgc");
 	return t;
 out:
 	kmem_free(t, sizeof(npf_table_t));
@@ -447,6 +450,8 @@ npf_table_destroy(npf_table_t *t)
 	default:
 		KASSERT(false);
 	}
+	KASSERTMSG(t->t_gclock == 0, "t_gclock=%"PRIu64, t->t_gclock);
+	cv_destroy(&t->t_gclockcv);
 	mutex_destroy(&t->t_lock);
 	kmem_free(t, sizeof(npf_table_t));
 }
@@ -615,7 +620,7 @@ int
 npf_table_remove(npf_table_t *t, const int alen,
     const npf_addr_t *addr, const npf_netmask_t mask)
 {
-	npf_tblent_t *ent = NULL;
+	npf_tblent_t *ent;
 	int error;
 
 	error = npf_netmask_check(alen, mask);
@@ -630,7 +635,6 @@ npf_table_remove(npf_table_t *t, const int alen,
 		if (__predict_true(ent != NULL)) {
 			LIST_REMOVE(ent, te_listent);
 			LIST_INSERT_HEAD(&t->t_gc, ent, te_listent);
-			ent = NULL; // to be G/C'ed
 			t->t_nitems--;
 		} else {
 			error = ENOENT;
@@ -640,6 +644,7 @@ npf_table_remove(npf_table_t *t, const int alen,
 		ent = lpm_lookup(t->t_lpm, addr, alen);
 		if (__predict_true(ent != NULL)) {
 			LIST_REMOVE(ent, te_listent);
+			LIST_INSERT_HEAD(&t->t_gc, ent, te_listent);
 			lpm_remove(t->t_lpm, &ent->te_addr,
 			    ent->te_alen, ent->te_preflen);
 			t->t_nitems--;
@@ -653,13 +658,9 @@ npf_table_remove(npf_table_t *t, const int alen,
 		break;
 	default:
 		KASSERT(false);
-		ent = NULL;
 	}
 	mutex_exit(&t->t_lock);
 
-	if (ent) {
-		pool_cache_put(tblent_cache, ent);
-	}
 	return error;
 }
 
@@ -762,18 +763,25 @@ table_ent_copyout(const npf_addr_t *addr, const int alen, npf_netmask_t mask,
 }
 
 static int
-table_generic_list(const npf_table_t *t, void *ubuf, size_t len)
+table_generic_list(npf_table_t *t, void *ubuf, size_t len)
 {
 	npf_tblent_t *ent;
 	size_t off = 0;
 	int error = 0;
 
+	mutex_enter(&t->t_lock);
+	t->t_gclock++;
 	LIST_FOREACH(ent, &t->t_list, te_listent) {
+		mutex_exit(&t->t_lock);
 		error = table_ent_copyout(&ent->te_addr,
 		    ent->te_alen, ent->te_preflen, ubuf, len, &off);
+		mutex_enter(&t->t_lock);
 		if (error)
 			break;
 	}
+	if (--t->t_gclock == 0)
+		cv_broadcast(&t->t_gclockcv);
+	mutex_exit(&t->t_lock);
 	return error;
 }
 
@@ -803,7 +811,6 @@ npf_table_list(npf_table_t *t, void *ubuf, size_t len)
 {
 	int error = 0;
 
-	mutex_enter(&t->t_lock);
 	switch (t->t_type) {
 	case NPF_TABLE_IPSET:
 		error = table_generic_list(t, ubuf, len);
@@ -820,7 +827,6 @@ npf_table_list(npf_table_t *t, void *ubuf, size_t len)
 	default:
 		KASSERT(false);
 	}
-	mutex_exit(&t->t_lock);
 
 	return error;
 }
@@ -855,22 +861,32 @@ npf_table_flush(npf_table_t *t)
 void
 npf_table_gc(npf_t *npf, npf_table_t *t)
 {
-	npf_tblent_t *ent;
-	void *ref;
 
-	if (t->t_type != NPF_TABLE_IPSET || LIST_EMPTY(&t->t_gc)) {
-		return;
-	}
+	if (t->t_type == NPF_TABLE_IPSET) {
+		void *ref = thmap_stage_gc(t->t_map);
 
-	ref = thmap_stage_gc(t->t_map);
-	if (npf) {
-		npf_config_locked_p(npf);
-		npf_config_sync(npf);
+		if (npf) {
+			npf_config_locked_p(npf);
+			npf_config_sync(npf);
+		}
+		thmap_gc(t->t_map, ref);
 	}
-	thmap_gc(t->t_map, ref);
 
-	while ((ent = LIST_FIRST(&t->t_gc)) != NULL) {
-		LIST_REMOVE(ent, te_listent);
-		pool_cache_put(tblent_cache, ent);
+	if (t->t_type == NPF_TABLE_IPSET || t->t_type == NPF_TABLE_LPM) {
+		npf_tblent_t *ent;
+
+		mutex_enter(&t->t_lock);
+		for (;;) {
+			if (t->t_gclock > 0) {
+				cv_wait(&t->t_gclockcv, &t->t_lock);
+				continue;
+			}
+			if ((ent = LIST_FIRST(&t->t_gc)) == NULL)
+				break;
+			mutex_exit(&t->t_lock);
+			pool_cache_put(tblent_cache, ent);
+			mutex_enter(&t->t_lock);
+		}
+		mutex_exit(&t->t_lock);
 	}
 }


Home | Main Index | Thread Index | Old Index