Subject: Callouts for TCP timers
To: None <tech-net@netbsd.org>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: tech-net
Date: 09/10/2001 15:08:12
--pWyiEgJYm5f9v55/
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Okay, building on all the previous stuff I've sent here (plus a
few other commmits I've made today)...

The following diff makes TCP timers use callouts, rather than
the tcp_slowtimo() path.

The benefit of this is obvious -- no more need to traverse every
TCP connection state 2x per second (which is Really Bad when if
you want to scale to thousands of connections).

The current timers still use PR_SLOWHZ granularity.  This is something
I am planning on addressing in a future revision (make them hz granularity,
plus some random jitter to emulate the "may not actually wait a full 500ms"
behavior that is present with tcp_slowtimo()-based timers).

-- 
        -- Jason R. Thorpe <thorpej@wasabisystems.com>

--pWyiEgJYm5f9v55/
Content-Type: text/plain; charset=us-ascii
Content-Description: tcp_timer_callout.diff
Content-Disposition: attachment; filename=foo

Index: tcp_input.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_input.c,v
retrieving revision 1.128
diff -c -r1.128 tcp_input.c
*** tcp_input.c	2001/09/10 15:23:09	1.128
--- tcp_input.c	2001/09/10 21:39:00
***************
*** 140,145 ****
--- 140,146 ----
  #include <sys/syslog.h>
  #include <sys/pool.h>
  #include <sys/domain.h>
+ #include <sys/kernel.h>
  
  #include <net/if.h>
  #include <net/route.h>
Index: tcp_output.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_output.c,v
retrieving revision 1.73
diff -c -r1.73 tcp_output.c
*** tcp_output.c	2001/09/10 15:23:10	1.73
--- tcp_output.c	2001/09/10 21:39:01
***************
*** 127,132 ****
--- 127,133 ----
  #include <sys/socketvar.h>
  #include <sys/errno.h>
  #include <sys/domain.h>
+ #include <sys/kernel.h>
  
  #include <net/if.h>
  #include <net/route.h>
Index: tcp_subr.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_subr.c,v
retrieving revision 1.117
diff -c -r1.117 tcp_subr.c
*** tcp_subr.c	2001/09/10 20:36:43	1.117
--- tcp_subr.c	2001/09/10 21:39:02
***************
*** 1021,1026 ****
--- 1021,1027 ----
  	(void) tcp_freeq(tp);
  	TCP_REASS_UNLOCK(tp);
  
+ 	tcp_canceltimers(tp);
  	TCP_CLEAR_DELACK(tp);
  	syn_cache_cleanup(tp);
  
Index: tcp_timer.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_timer.c,v
retrieving revision 1.52
diff -c -r1.52 tcp_timer.c
*** tcp_timer.c	2001/09/10 20:36:43	1.52
--- tcp_timer.c	2001/09/10 21:39:03
***************
*** 221,307 ****
  void
  tcp_slowtimo()
  {
- 	struct inpcb *inp, *ninp;
- 	struct tcpcb *tp;
- #ifdef INET6
- 	struct in6pcb *in6p, *nin6p;
- #endif
- 	int s;
- 	long i;
  	static int syn_cache_last = 0;
! 	int skip, mask;
! 
! 	skip = mask = 0;
  
  	s = splsoftnet();
  	tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
- 	/*
- 	 * Search through tcb's and update active timers.
- 	 */
- 	mask |= 1;
- 	inp = tcbtable.inpt_queue.cqh_first;
- 	if (inp == (struct inpcb *)0) {				/* XXX */
- 		skip |= 1;
- 		goto dotcb6;
- 	}
- 	for (; inp != (struct inpcb *)&tcbtable.inpt_queue; inp = ninp) {
- 		ninp = inp->inp_queue.cqe_next;
- 		tp = intotcpcb(inp);
- 		if (tp == 0 || tp->t_state == TCPS_LISTEN)
- 			continue;
- 		for (i = 0; i < TCPT_NTIMERS; i++) {
- 			if (TCP_TIMER_ISEXPIRED(tp, i)) {
- 				TCP_TIMER_DISARM(tp, i);
- 				(*(tcp_timer_funcs[i]))(tp);
- 				/* XXX NOT MP SAFE */
- 				if ((ninp == (void *)&tcbtable.inpt_queue &&
- 				    tcbtable.inpt_queue.cqh_last != inp) ||
- 				    ninp->inp_queue.cqe_prev != inp)
- 					goto tpgone;
- 			}
- 		}
- tpgone:
- 		;
- 	}
- dotcb6:
- #ifdef INET6
- 	mask |= 2;
- 	in6p = tcb6.in6p_next;
- 	if (in6p == (struct in6pcb *)0) {			/* XXX */
- 		skip |= 2;
- 		goto doiss;
- 	}
- 	for (; in6p != (struct in6pcb *)&tcb6; in6p = nin6p) {
- 		nin6p = in6p->in6p_next;
- 		tp = in6totcpcb(in6p);
- 		if (tp == 0 || tp->t_state == TCPS_LISTEN)
- 			continue;
- 		for (i = 0; i < TCPT_NTIMERS; i++) {
- 			if (TCP_TIMER_ISEXPIRED(tp, i)) {
- 				TCP_TIMER_DISARM(tp, i);
- 				(*(tcp_timer_funcs[i]))(tp);
- 				/* XXX NOT MP SAFE */
- 				if ((nin6p == (void *)&tcb6 &&
- 				    tcb6.in6p_prev != in6p) ||
- 				    nin6p->in6p_prev != in6p)
- 					goto tp6gone;
- 			}
- 		}
- tp6gone:
- 		;
- 	}
- 
- doiss:
- #endif
- 	if (mask == skip)
- 		goto done;
  	tcp_iss_seq += TCP_ISSINCR;			/* increment iss */
  	tcp_now++;					/* for timestamps */
  	if (++syn_cache_last >= tcp_syn_cache_interval) {
  		syn_cache_timer();
  		syn_cache_last = 0;
  	}
- done:
  	splx(s);
  }
  
--- 221,237 ----
  void
  tcp_slowtimo()
  {
  	static int syn_cache_last = 0;
! 	int s;
  
  	s = splsoftnet();
  	tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
  	tcp_iss_seq += TCP_ISSINCR;			/* increment iss */
  	tcp_now++;					/* for timestamps */
  	if (++syn_cache_last >= tcp_syn_cache_interval) {
  		syn_cache_timer();
  		syn_cache_last = 0;
  	}
  	splx(s);
  }
  
***************
*** 340,345 ****
--- 270,277 ----
  
  	s = splsoftnet();
  
+ 	callout_deactivate(&tp->t_timer[TCPT_REXMT]);
+ 
  #ifdef TCP_DEBUG
  #ifdef INET
  	if (tp->t_inpcb)
***************
*** 485,490 ****
--- 417,424 ----
  
  	s = splsoftnet();
  
+ 	callout_deactivate(&tp->t_timer[TCPT_PERSIST]);
+ 
  #ifdef INET
  	if (tp->t_inpcb)
  		so = tp->t_inpcb->inp_socket;
***************
*** 547,552 ****
--- 481,488 ----
  
  	s = splsoftnet();
  
+ 	callout_deactivate(&tp->t_timer[TCPT_KEEP]);
+ 
  #ifdef TCP_DEBUG
  	ostate = tp->t_state;
  #endif /* TCP_DEBUG */
***************
*** 628,633 ****
--- 564,571 ----
  #endif
  
  	s = splsoftnet();
+ 
+ 	callout_deactivate(&tp->t_timer[TCPT_2MSL]);
  
  #ifdef INET
  	if (tp->t_inpcb)
Index: tcp_timer.h
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_timer.h,v
retrieving revision 1.14
diff -c -r1.14 tcp_timer.h
*** tcp_timer.h	2001/09/10 20:36:43	1.14
--- tcp_timer.h	2001/09/10 21:39:03
***************
*** 118,137 ****
  /*
   * Init, arm, disarm, and test TCP timers.
   */
! #define	TCP_TIMER_INIT(tp, timer) \
! 	/* Nothing. */
  
! #define	TCP_TIMER_ARM(tp, timer, nticks) \
! 	PRT_SLOW_ARM((tp)->t_timer[(timer)], (nticks))
  
! #define	TCP_TIMER_DISARM(tp, timer) \
! 	PRT_SLOW_DISARM((tp)->t_timer[(timer)])
  
! #define	TCP_TIMER_ISARMED(tp, timer) \
! 	PRT_SLOW_ISARMED((tp)->t_timer[(timer)])
! 
! #define	TCP_TIMER_ISEXPIRED(tp, timer) \
! 	PRT_SLOW_ISEXPIRED((tp)->t_timer[(timer)])
  
  /*
   * Force a time value to be in a certain range.
--- 118,135 ----
  /*
   * Init, arm, disarm, and test TCP timers.
   */
! #define	TCP_TIMER_INIT(tp, timer)					\
! 	callout_init(&(tp)->t_timer[(timer)])
  
! #define	TCP_TIMER_ARM(tp, timer, nticks)				\
! 	callout_reset(&(tp)->t_timer[(timer)],				\
! 	    (nticks) * (hz / PR_SLOWHZ), tcp_timer_funcs[(timer)], tp)
  
! #define	TCP_TIMER_DISARM(tp, timer)					\
! 	callout_stop(&(tp)->t_timer[(timer)])
  
! #define	TCP_TIMER_ISARMED(tp, timer)					\
! 	callout_active(&(tp)->t_timer[(timer)])
  
  /*
   * Force a time value to be in a certain range.
Index: tcp_var.h
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_var.h,v
retrieving revision 1.85
diff -c -r1.85 tcp_var.h
*** tcp_var.h	2001/09/10 20:15:15	1.85
--- tcp_var.h	2001/09/10 21:39:03
***************
*** 133,139 ****
  struct tcpcb {
  	int	t_family;		/* address family on the wire */
  	struct ipqehead segq;		/* sequencing queue */
! 	u_int	t_timer[TCPT_NTIMERS];	/* tcp timers */
  	short	t_state;		/* state of this connection */
  	short	t_rxtshift;		/* log(2) of rexmt exp. backoff */
  	uint32_t t_rxtcur;		/* current retransmit value */
--- 133,139 ----
  struct tcpcb {
  	int	t_family;		/* address family on the wire */
  	struct ipqehead segq;		/* sequencing queue */
! 	struct callout t_timer[TCPT_NTIMERS];/* tcp timers */
  	short	t_state;		/* state of this connection */
  	short	t_rxtshift;		/* log(2) of rexmt exp. backoff */
  	uint32_t t_rxtcur;		/* current retransmit value */

--pWyiEgJYm5f9v55/--