tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: Likely lock botch in NPF firewall
Try the attached patch?
From 89b144a95e698df4a844157c55b70cef488548ba Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Sun, 22 Jan 2023 15:56:55 +0000
Subject: [PATCH] npf(9): Don't hold table lock across copyout.
Instead, block npf_table_gc during copyout of anything that might be
GC'd, so it can't be freed during copyout, but then drop the lock
across copyout.
Make LPM-type tables use the same GC list as thmap-type (`ipset')
tables, since copyout must block GC the same way.
Note that copyout may block indefinitely, so npf_table_gc may also
block indefinitely as a consequence. If npf_table_gc is required to
return in bounded time, it can't wait for all copyouts to finish, so
if copyouts are in progress we need to find some way to retry the GC
later -- maybe just issue npf_table_gc when npf_table_list returns.
PR kern/57136
PR kern/57181
---
sys/net/npf/npf_tableset.c | 68 +++++++++++++++++++++++---------------
1 file changed, 42 insertions(+), 26 deletions(-)
diff --git a/sys/net/npf/npf_tableset.c b/sys/net/npf/npf_tableset.c
index 51d6268143ce..d67f3dee8fef 100644
--- a/sys/net/npf/npf_tableset.c
+++ b/sys/net/npf/npf_tableset.c
@@ -80,10 +80,7 @@ struct npf_table {
* There are separate trees for IPv4 and IPv6.
*/
union {
- struct {
- thmap_t * t_map;
- LIST_HEAD(, npf_tblent) t_gc;
- };
+ thmap_t * t_map;
lpm_t * t_lpm;
struct {
void * t_blob;
@@ -97,6 +94,7 @@ struct npf_table {
};
} /* C11 */;
LIST_HEAD(, npf_tblent) t_list;
+ LIST_HEAD(, npf_tblent) t_gc;
unsigned t_nitems;
/*
@@ -107,6 +105,9 @@ struct npf_table {
unsigned t_id;
kmutex_t t_lock;
+ uint64_t t_gclock;
+ kcondvar_t t_gclockcv;
+
/* Reference count and table name. */
unsigned t_refcnt;
char t_name[NPF_TABLE_MAXNAMELEN];
@@ -413,6 +414,8 @@ npf_table_create(const char *name, u_int tid, int type,
mutex_init(&t->t_lock, MUTEX_DEFAULT, IPL_NET);
t->t_type = type;
t->t_id = tid;
+ t->t_gclock = 0;
+ cv_init(&t->t_gclockcv, "npftabgc");
return t;
out:
kmem_free(t, sizeof(npf_table_t));
@@ -447,6 +450,8 @@ npf_table_destroy(npf_table_t *t)
default:
KASSERT(false);
}
+ KASSERTMSG(t->t_gclock == 0, "t_gclock=%"PRIu64, t->t_gclock);
+ cv_destroy(&t->t_gclockcv);
mutex_destroy(&t->t_lock);
kmem_free(t, sizeof(npf_table_t));
}
@@ -615,7 +620,7 @@ int
npf_table_remove(npf_table_t *t, const int alen,
const npf_addr_t *addr, const npf_netmask_t mask)
{
- npf_tblent_t *ent = NULL;
+ npf_tblent_t *ent;
int error;
error = npf_netmask_check(alen, mask);
@@ -630,7 +635,6 @@ npf_table_remove(npf_table_t *t, const int alen,
if (__predict_true(ent != NULL)) {
LIST_REMOVE(ent, te_listent);
LIST_INSERT_HEAD(&t->t_gc, ent, te_listent);
- ent = NULL; // to be G/C'ed
t->t_nitems--;
} else {
error = ENOENT;
@@ -640,6 +644,7 @@ npf_table_remove(npf_table_t *t, const int alen,
ent = lpm_lookup(t->t_lpm, addr, alen);
if (__predict_true(ent != NULL)) {
LIST_REMOVE(ent, te_listent);
+ LIST_INSERT_HEAD(&t->t_gc, ent, te_listent);
lpm_remove(t->t_lpm, &ent->te_addr,
ent->te_alen, ent->te_preflen);
t->t_nitems--;
@@ -653,13 +658,9 @@ npf_table_remove(npf_table_t *t, const int alen,
break;
default:
KASSERT(false);
- ent = NULL;
}
mutex_exit(&t->t_lock);
- if (ent) {
- pool_cache_put(tblent_cache, ent);
- }
return error;
}
@@ -762,18 +763,25 @@ table_ent_copyout(const npf_addr_t *addr, const int alen, npf_netmask_t mask,
}
static int
-table_generic_list(const npf_table_t *t, void *ubuf, size_t len)
+table_generic_list(npf_table_t *t, void *ubuf, size_t len)
{
npf_tblent_t *ent;
size_t off = 0;
int error = 0;
+ mutex_enter(&t->t_lock);
+ t->t_gclock++;
LIST_FOREACH(ent, &t->t_list, te_listent) {
+ mutex_exit(&t->t_lock);
error = table_ent_copyout(&ent->te_addr,
ent->te_alen, ent->te_preflen, ubuf, len, &off);
+ mutex_enter(&t->t_lock);
if (error)
break;
}
+ if (--t->t_gclock == 0)
+ cv_broadcast(&t->t_gclockcv);
+ mutex_exit(&t->t_lock);
return error;
}
@@ -803,7 +811,6 @@ npf_table_list(npf_table_t *t, void *ubuf, size_t len)
{
int error = 0;
- mutex_enter(&t->t_lock);
switch (t->t_type) {
case NPF_TABLE_IPSET:
error = table_generic_list(t, ubuf, len);
@@ -820,7 +827,6 @@ npf_table_list(npf_table_t *t, void *ubuf, size_t len)
default:
KASSERT(false);
}
- mutex_exit(&t->t_lock);
return error;
}
@@ -855,22 +861,32 @@ npf_table_flush(npf_table_t *t)
void
npf_table_gc(npf_t *npf, npf_table_t *t)
{
- npf_tblent_t *ent;
- void *ref;
- if (t->t_type != NPF_TABLE_IPSET || LIST_EMPTY(&t->t_gc)) {
- return;
- }
+ if (t->t_type == NPF_TABLE_IPSET) {
+ void *ref = thmap_stage_gc(t->t_map);
- ref = thmap_stage_gc(t->t_map);
- if (npf) {
- npf_config_locked_p(npf);
- npf_config_sync(npf);
+ if (npf) {
+ npf_config_locked_p(npf);
+ npf_config_sync(npf);
+ }
+ thmap_gc(t->t_map, ref);
}
- thmap_gc(t->t_map, ref);
- while ((ent = LIST_FIRST(&t->t_gc)) != NULL) {
- LIST_REMOVE(ent, te_listent);
- pool_cache_put(tblent_cache, ent);
+ if (t->t_type == NPF_TABLE_IPSET || t->t_type == NPF_TABLE_LPM) {
+ npf_tblent_t *ent;
+
+ mutex_enter(&t->t_lock);
+ for (;;) {
+ if (t->t_gclock > 0) {
+ cv_wait(&t->t_gclockcv, &t->t_lock);
+ continue;
+ }
+ if ((ent = LIST_FIRST(&t->t_gc)) == NULL)
+ break;
+ mutex_exit(&t->t_lock);
+ pool_cache_put(tblent_cache, ent);
+ mutex_enter(&t->t_lock);
+ }
+ mutex_exit(&t->t_lock);
}
}
Home |
Main Index |
Thread Index |
Old Index