Subject: Re: high input packet rate can lead to process starvation
To: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
From: Tad Hunt <tad@entrisphere.com>
List: tech-net
Date: 09/25/2006 12:09:25
What are the chances of something like this making it into NetBSD in the 
near future?

-Tad

YAMAMOTO Takashi wrote:
>> As long as there are ip packets in the queue, ipintr() will continue 
>> happily processing them.
>>
>> This is really a problem not just with the ipintrq, but with all of the 
>> software-interrupt protocol input routines.
>>
>> My solution (this is a hack) is to put a time-limit on how long ipintr() 
>> is allowed to run.  If it runs longer than this without emptying the 
>> queue, I set a global "ipdisabled" flag and start a callout ticking that 
>> will wakeup, clear the flag, and setsoftnet() (code attached).  In 
>> addition the ethernet driver drops all packets as long as ipdisabled is 
>> true.  (See the attachment for code)
> 
> it reminds me the attached code.
> (i thought i've filed a PR but couldn't find a number.)
> the idea was let a thread process the queue when it's somewhat busy.
> 
> YAMAMOTO Takashi
> 
> 
> ------------------------------------------------------------------------
> 
> 
> #include <sys/param.h>
> #include <sys/systm.h>
> #include <sys/mbuf.h>
> #include <sys/kthread.h>
> #include <sys/lock.h>
> #include <sys/proc.h>
> 
> #include <net/if.h>
> #include <net/netisr.h>
> 
> #if 1
> /* XXX should be per netisr */
> #define NETISR_COUNTER_DEFINE(name)   \
> struct evcnt netisr_stat_##name       \
>     = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "netisr", #name); \
> EVCNT_ATTACH_STATIC(netisr_stat_##name)
> #define NETISR_COUNTER_INCL(name)     (netisr_stat_##name).ev_count++
> #else
> #define NETISR_COUNTER_DEFINE(name)   /* nothing */
> #define NETISR_COUNTER_INCL(name)     /* nothing */
> #endif
> 
> NETISR_COUNTER_DEFINE(sched);
> NETISR_COUNTER_DEFINE(thread);
> NETISR_COUNTER_DEFINE(intr);
> NETISR_COUNTER_DEFINE(transit);
> NETISR_COUNTER_DEFINE(yield);
> NETISR_COUNTER_DEFINE(sleep);
> NETISR_COUNTER_DEFINE(throttle);
> 
> static void netisr_create_thread(void *);
> static void netisr_thread(void *);
> static __inline void netisr_schedule(struct netisr *);
> static __inline void netisr_wakeup_thread(struct netisr *);
> static __inline boolean_t netisr_empty(const struct netisr *) __unused;
> static __inline struct mbuf *netisr_dequeue(struct netisr *);
> 
> int netisr_dothrottle = 1;
> int netisr_maxrun = 10240;
> 
> static __inline boolean_t
> netisr_empty(const struct netisr *ni)
> {
> 
> 	return ni->ni_queue.ifq_len == 0;
> }
> 
> static __inline void
> netisr_wakeup_thread(struct netisr *ni)
> {
> 
> 	if (ni->ni_threadsleeping) {
> 		wakeup(ni);
> 	}
> }
> 
> static __inline void
> netisr_schedule(struct netisr *ni)
> {
> 
> 	NETISR_COUNTER_INCL(sched);
> 
> 	if (__predict_false(ni->ni_usethread)) {
> 		netisr_wakeup_thread(ni);
> 	} else {
> 		schednetisr(ni->ni_isr);
> 	}
> }
> 
> /*
>  * netisr_enqueue:
>  *
>  * => called at splnet.
>  */
> 
> int
> netisr_enqueue(struct netisr *ni, struct mbuf *m)
> {
> 	struct ifqueue *q = &ni->ni_queue;
> 
> 	simple_lock(&ni->ni_lock);
> 	if (IF_QFULL(q) || ni->ni_throttle) {
> 		IF_DROP(q);
> 		if (!ni->ni_throttle && netisr_dothrottle) {
> 			ni->ni_throttle = TRUE;
> 			NETISR_COUNTER_INCL(throttle);
> 		}
> 		simple_unlock(&ni->ni_lock);
> 		m_freem(m);
> 		return ENOBUFS;
> 	}
> 
> 	if (netisr_empty(ni)) {
> 		netisr_schedule(ni);
> 	}
> 
> 	IF_ENQUEUE(q, m);
> 
> 	simple_unlock(&ni->ni_lock);
> 
> 	return 0;
> }
> 
> /*
>  * netisr_dequeue:
>  *
>  * => called with ni_lock locked.
>  */
> 
> static __inline struct mbuf *
> netisr_dequeue(struct netisr *ni)
> {
> 	struct mbuf *m;
> 
> 	IF_DEQUEUE(&ni->ni_queue, m);
> 	if (m == NULL) {
> 		ni->ni_throttle = FALSE;
> 	}
> 
> 	return m;
> }
> 
> void
> netisr_init(struct netisr *ni, const char *name, int isr,
>     void (*handler)(struct mbuf *), int maxqlen)
> {
> 	struct ifqueue *q;
> 
> 	q = &ni->ni_queue;
> 	memset(q, 0, sizeof(*q));
> 	q->ifq_maxlen = maxqlen;
> 
> 	ni->ni_isr = isr;
> 	ni->ni_name = name;
> 	ni->ni_handler = handler;
> 	ni->ni_maxrun = 64; /* XXX */
> 	ni->ni_minrun = 16; /* XXX */
> 	ni->ni_usethread = FALSE;
> 
> 	simple_lock_init(&ni->ni_lock);
> 
> 	kthread_create(netisr_create_thread, ni);
> }
> 
> static void
> netisr_create_thread(void *arg)
> {
> 	struct netisr *ni = arg;
> 	int error;
> 
> 	error = kthread_create1(netisr_thread, ni, &ni->ni_proc, ni->ni_name);
> 	if (error) {
> 		panic("netisr_create_thread: %s: %d", ni->ni_name, error);
> 	}
> }
> 
> /*
>  * netisr_run:
>  *
>  * => called at splsoftnet.
>  */
> 
> void
> netisr_run(struct netisr *ni)
> {
> //	int rest = ni->ni_maxrun;
> 	int rest = netisr_maxrun;
> 	int s;
> 
> 	while ((rest--) > 0) {
> 		struct mbuf *m;
> 
> 		s = splnet();
> 		simple_lock(&ni->ni_lock);
> 		m = netisr_dequeue(ni);
> 		simple_unlock(&ni->ni_lock);
> 		splx(s);
> 		if (m == NULL) {
> 			return;
> 		}
> 
> 		NETISR_COUNTER_INCL(intr);
> 
> 		ni->ni_handler(m);
> 	}
> 
> 	NETISR_COUNTER_INCL(transit);
> 	s = splnet();
> 	simple_lock(&ni->ni_lock);
> 	ni->ni_usethread = TRUE;
> 	netisr_wakeup_thread(ni);
> 	simple_unlock(&ni->ni_lock);
> 	splx(s);
> }
> 
> static void
> netisr_thread(void *arg)
> {
> 	struct netisr *ni = arg;
> 	int s;
> 
> 	s = splnet();
> 	simple_lock(&ni->ni_lock);
> again:
> 	while (!ni->ni_usethread) {
> 		ni->ni_threadsleeping = TRUE;
> 		ltsleep(ni, PUSER, ni->ni_name, 0, &ni->ni_lock);
> 	}
> 	ni->ni_threadsleeping = FALSE;
> 	simple_unlock(&ni->ni_lock);
> 	splx(s);
> 
> 	while (/* CONSTCOND */ 1) {
> 		struct mbuf *m;
> 
> 		s = splnet();
> 		simple_lock(&ni->ni_lock);
> 		m = netisr_dequeue(ni);
> 		if (m == NULL) {
> 			break;
> 		}
> 		simple_unlock(&ni->ni_lock);
> 		splx(s);
> 
> 		NETISR_COUNTER_INCL(thread);
> 
> 		s = splsoftnet();
> 		ni->ni_handler(m);
> 		splx(s);
> 
> 		if ((curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)) {
> 			NETISR_COUNTER_INCL(yield);
> 			preempt(1);
> 		}
> 	}
> 
> 	NETISR_COUNTER_INCL(yield);
> 	ni->ni_usethread = FALSE;
> 	ni->ni_throttle = FALSE;
> 	goto again;
> }