NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: kern/59411 (deadlock on mbuf pool)



The following reply was made to PR kern/59411; it has been noted by GNATS.

From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
To: Manuel Bouyer <bouyer%antioche.eu.org@localhost>
Cc: gnats-bugs%netbsd.org@localhost, kern-bug-people%netbsd.org@localhost,
	netbsd-bugs%netbsd.org@localhost, gnats-admin%netbsd.org@localhost,
	mrg%NetBSD.org@localhost, chs%NetBSD.org@localhost, christos%NetBSD.org@localhost, tnn%NetBSD.org@localhost
Subject: Re: kern/59411 (deadlock on mbuf pool)
Date: Sat, 17 May 2025 00:51:26 +0000

 This is a multi-part message in MIME format.
 --=_uuZz9P26KhzVTydfohaOkWcMHct8nEA0
 
 Can you try the attached patch?
 
 And, can you record some dtrace output to see where this busy-wait
 logic takes effect?
 
 dtrace -n '
 	sdt:::pool_grow* { @[probename, stack()] = count() }
 	tick-10s { printa(@) }
 '
 
 --=_uuZz9P26KhzVTydfohaOkWcMHct8nEA0
 Content-Type: text/plain; charset="ISO-8859-1"; name="pr59411-mbufpooldeadlockpoolgrowbusywaitintr"
 Content-Transfer-Encoding: quoted-printable
 Content-Disposition: attachment; filename="pr59411-mbufpooldeadlockpoolgrowbusywaitintr.patch"
 
 # HG changeset patch
 # User Taylor R Campbell <riastradh%NetBSD.org@localhost>
 # Date 1747442650 0
 #      Sat May 17 00:44:10 2025 +0000
 # Branch trunk
 # Node ID f8a9f7969e74c93df131b0d1c86775697fa66e62
 # Parent  e65a3a130b666dadcc0a084f312f8bb66063fa65
 # EXP-Topic riastradh-pr59411-mbufpooldeadlock
 pool(9): Don't busy-wait in pool_grow with PR_NOWAIT from (soft)intr.
 
 PR kern/59411: deadlock on mbuf pool
 
 diff -r e65a3a130b66 -r f8a9f7969e74 sys/kern/subr_pool.c
 --- a/sys/kern/subr_pool.c	Fri May 16 19:01:50 2025 +0000
 +++ b/sys/kern/subr_pool.c	Sat May 17 00:44:10 2025 +0000
 @@ -60,6 +60,7 @@
  #include <sys/asan.h>
  #include <sys/msan.h>
  #include <sys/fault.h>
 +#include <sys/sdt.h>
 =20
  #include <uvm/uvm_extern.h>
 =20
 @@ -1406,14 +1407,22 @@ pool_grow(struct pool *pp, int flags)
  			} while (pp->pr_flags & PR_GROWING);
  			return ERESTART;
  		} else {
 -			if (pp->pr_flags & PR_GROWINGNOWAIT) {
 +			if ((pp->pr_flags & PR_GROWINGNOWAIT) &&
 +			    !cpu_intr_p() && !cpu_softintr_p()) {
  				/*
  				 * This needs an unlock/relock dance so
  				 * that the other caller has a chance to
  				 * run and actually do the thing.  Note
  				 * that this is effectively a busy-wait.
 +				 *
 +				 * Of course, busy-wait doesn't work if
 +				 * we're interrupting the thread in the
 +				 * middle of pool_grow.  So only do
 +				 * this if we're not in interrupt or
 +				 * soft interrupt context.
  				 */
  				mutex_exit(&pp->pr_lock);
 +				DTRACE_PROBE(pool_grow_busywait);
  				mutex_enter(&pp->pr_lock);
  				return ERESTART;
  			}
 @@ -1421,10 +1430,11 @@ pool_grow(struct pool *pp, int flags)
  		}
  	}
  	pp->pr_flags |=3D PR_GROWING;
 -	if (flags & PR_WAITOK)
 -		mutex_exit(&pp->pr_lock);
 -	else
 +	if ((flags & PR_WAITOK) =3D=3D 0) {
  		pp->pr_flags |=3D PR_GROWINGNOWAIT;
 +		DTRACE_PROBE(pool_growingnowait);
 +	}
 +	mutex_exit(&pp->pr_lock);
 =20
  	storage =3D pool_allocator_alloc(pp, flags);
  	if (__predict_false(storage =3D=3D NULL))
 @@ -1436,8 +1446,7 @@ pool_grow(struct pool *pp, int flags)
  		goto out;
  	}
 =20
 -	if (flags & PR_WAITOK)
 -		mutex_enter(&pp->pr_lock);
 +	mutex_enter(&pp->pr_lock);
  	pool_prime_page(pp, storage, ph);
  	pp->pr_npagealloc++;
  	KASSERT(pp->pr_flags & PR_GROWING);
 @@ -1449,8 +1458,7 @@ pool_grow(struct pool *pp, int flags)
  	cv_broadcast(&pp->pr_cv);
  	return 0;
  out:
 -	if (flags & PR_WAITOK)
 -		mutex_enter(&pp->pr_lock);
 +	mutex_enter(&pp->pr_lock);
  	KASSERT(pp->pr_flags & PR_GROWING);
  	pp->pr_flags &=3D ~(PR_GROWING|PR_GROWINGNOWAIT);
  	return ENOMEM;
 
 --=_uuZz9P26KhzVTydfohaOkWcMHct8nEA0--
 


Home | Main Index | Thread Index | Old Index