Subject: Use callouts for SYN cache timers
To: None <tech-net@netbsd.org>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: tech-net
Date: 09/11/2001 12:19:59
--32u276st3Jlj2kUU
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
The following diff changes the TCP SYN cache to use callouts for
the timers, rather than multiple lists traversed via tcp_slowtimo().
--
-- Jason R. Thorpe <thorpej@wasabisystems.com>
--32u276st3Jlj2kUU
Content-Type: text/plain; charset=us-ascii
Content-Description: syn_cache_timer.diff
Content-Disposition: attachment; filename=foo
Index: tcp_input.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_input.c,v
retrieving revision 1.129
diff -c -r1.129 tcp_input.c
*** tcp_input.c 2001/09/10 22:14:26 1.129
--- tcp_input.c 2001/09/11 19:15:10
***************
*** 2578,2595 ****
default: \
hash = 0; \
} \
! } while (0)
#endif /* INET6 */
#define SYN_CACHE_RM(sc) \
do { \
! LIST_REMOVE((sc), sc_bucketq); \
(sc)->sc_tp = NULL; \
LIST_REMOVE((sc), sc_tpq); \
tcp_syn_cache[(sc)->sc_bucketidx].sch_length--; \
! TAILQ_REMOVE(&tcp_syn_cache_timeq[(sc)->sc_rxtshift], (sc), sc_timeq); \
syn_cache_count--; \
! } while (0)
#define SYN_CACHE_PUT(sc) \
do { \
--- 2578,2596 ----
default: \
hash = 0; \
} \
! } while (/*CONSTCOND*/0)
#endif /* INET6 */
#define SYN_CACHE_RM(sc) \
do { \
! TAILQ_REMOVE(&tcp_syn_cache[(sc)->sc_bucketidx].sch_bucket, \
! (sc), sc_bucketq); \
(sc)->sc_tp = NULL; \
LIST_REMOVE((sc), sc_tpq); \
tcp_syn_cache[(sc)->sc_bucketidx].sch_length--; \
! callout_stop(&(sc)->sc_timer); \
syn_cache_count--; \
! } while (/*CONSTCOND*/0)
#define SYN_CACHE_PUT(sc) \
do { \
***************
*** 2598,2622 ****
if ((sc)->sc_route4.ro_rt != NULL) \
RTFREE((sc)->sc_route4.ro_rt); \
pool_put(&syn_cache_pool, (sc)); \
! } while (0)
struct pool syn_cache_pool;
/*
* We don't estimate RTT with SYNs, so each packet starts with the default
! * RTT and each timer queue has a fixed timeout value. This allows us to
! * optimize the timer queues somewhat.
*/
#define SYN_CACHE_TIMER_ARM(sc) \
do { \
TCPT_RANGESET((sc)->sc_rxtcur, \
TCPTV_SRTTDFLT * tcp_backoff[(sc)->sc_rxtshift], TCPTV_MIN, \
TCPTV_REXMTMAX); \
! PRT_SLOW_ARM((sc)->sc_rexmt, (sc)->sc_rxtcur); \
! } while (0)
- TAILQ_HEAD(, syn_cache) tcp_syn_cache_timeq[TCP_MAXRXTSHIFT + 1];
-
#define SYN_CACHE_TIMESTAMP(sc) (tcp_now - (sc)->sc_timebase)
void
--- 2599,2621 ----
if ((sc)->sc_route4.ro_rt != NULL) \
RTFREE((sc)->sc_route4.ro_rt); \
pool_put(&syn_cache_pool, (sc)); \
! } while (/*CONSTCOND*/0)
struct pool syn_cache_pool;
/*
* We don't estimate RTT with SYNs, so each packet starts with the default
! * RTT and each timer step has a fixed timeout value.
*/
#define SYN_CACHE_TIMER_ARM(sc) \
do { \
TCPT_RANGESET((sc)->sc_rxtcur, \
TCPTV_SRTTDFLT * tcp_backoff[(sc)->sc_rxtshift], TCPTV_MIN, \
TCPTV_REXMTMAX); \
! callout_reset(&(sc)->sc_timer, \
! (sc)->sc_rxtcur * (hz / PR_SLOWHZ), syn_cache_timer, (sc)); \
! } while (/*CONSTCOND*/0)
#define SYN_CACHE_TIMESTAMP(sc) (tcp_now - (sc)->sc_timebase)
void
***************
*** 2626,2636 ****
/* Initialize the hash buckets. */
for (i = 0; i < tcp_syn_cache_size; i++)
! LIST_INIT(&tcp_syn_cache[i].sch_bucket);
!
! /* Initialize the timer queues. */
! for (i = 0; i <= TCP_MAXRXTSHIFT; i++)
! TAILQ_INIT(&tcp_syn_cache_timeq[i]);
/* Initialize the syn cache pool. */
pool_init(&syn_cache_pool, sizeof(struct syn_cache), 0, 0, 0,
--- 2625,2631 ----
/* Initialize the hash buckets. */
for (i = 0; i < tcp_syn_cache_size; i++)
! TAILQ_INIT(&tcp_syn_cache[i].sch_bucket);
/* Initialize the syn cache pool. */
pool_init(&syn_cache_pool, sizeof(struct syn_cache), 0, 0, 0,
***************
*** 2644,2650 ****
{
struct syn_cache_head *scp;
struct syn_cache *sc2;
! int s, i;
/*
* If there are no entries in the hash table, reinitialize
--- 2639,2645 ----
{
struct syn_cache_head *scp;
struct syn_cache *sc2;
! int s;
/*
* If there are no entries in the hash table, reinitialize
***************
*** 2670,2741 ****
tcpstat.tcps_sc_bucketoverflow++;
/*
* The bucket is full. Toss the oldest element in the
! * bucket. This will be the entry with our bucket
! * index closest to the front of the timer queue with
! * the largest timeout value.
! *
! * Note: This timer queue traversal may be expensive, so
! * we hope that this doesn't happen very often. It is
! * much more likely that we'll overflow the entire
! * cache, which is much easier to handle; see below.
*/
! for (i = TCP_MAXRXTSHIFT; i >= 0; i--) {
! for (sc2 = TAILQ_FIRST(&tcp_syn_cache_timeq[i]);
! sc2 != NULL;
! sc2 = TAILQ_NEXT(sc2, sc_timeq)) {
! if (sc2->sc_bucketidx == sc->sc_bucketidx) {
! SYN_CACHE_RM(sc2);
! SYN_CACHE_PUT(sc2);
! goto insert; /* 2 level break */
! }
! }
! }
#ifdef DIAGNOSTIC
/*
* This should never happen; we should always find an
* entry in our bucket.
*/
! panic("syn_cache_insert: bucketoverflow: impossible");
#endif
} else if (syn_cache_count >= tcp_syn_cache_limit) {
tcpstat.tcps_sc_overflowed++;
/*
* The cache is full. Toss the oldest entry in the
! * entire cache. This is the front entry in the
! * first non-empty timer queue with the largest
! * timeout value.
*/
! for (i = TCP_MAXRXTSHIFT; i >= 0; i--) {
! sc2 = TAILQ_FIRST(&tcp_syn_cache_timeq[i]);
! if (sc2 == NULL)
! continue;
! SYN_CACHE_RM(sc2);
! SYN_CACHE_PUT(sc2);
! goto insert; /* symmetry with above */
! }
#ifdef DIAGNOSTIC
! /*
! * This should never happen; we should always find an
! * entry in the cache.
! */
! panic("syn_cache_insert: cache overflow: impossible");
#endif
}
- insert:
/*
* Initialize the entry's timer.
*/
sc->sc_rxttot = 0;
sc->sc_rxtshift = 0;
SYN_CACHE_TIMER_ARM(sc);
- TAILQ_INSERT_TAIL(&tcp_syn_cache_timeq[sc->sc_rxtshift], sc, sc_timeq);
/* Link it from tcpcb entry */
LIST_INSERT_HEAD(&tp->t_sc, sc, sc_tpq);
/* Put it into the bucket. */
! LIST_INSERT_HEAD(&scp->sch_bucket, sc, sc_bucketq);
scp->sch_length++;
syn_cache_count++;
--- 2665,2731 ----
tcpstat.tcps_sc_bucketoverflow++;
/*
* The bucket is full. Toss the oldest element in the
! * bucket. This will be the first entry in the bucket.
*/
! sc2 = TAILQ_FIRST(&scp->sch_bucket);
#ifdef DIAGNOSTIC
/*
* This should never happen; we should always find an
* entry in our bucket.
*/
! if (sc2 == NULL)
! panic("syn_cache_insert: bucketoverflow: impossible");
#endif
+ SYN_CACHE_RM(sc2);
+ SYN_CACHE_PUT(sc2);
} else if (syn_cache_count >= tcp_syn_cache_limit) {
+ struct syn_cache_head *scp2, *sce;
+
tcpstat.tcps_sc_overflowed++;
/*
* The cache is full. Toss the oldest entry in the
! * first non-empty bucket we can find.
! *
! * XXX We would really like to toss the oldest
! * entry in the cache, but we hope that this
! * condition doesn't happen very often.
*/
! scp2 = scp;
! if (TAILQ_EMPTY(&scp2->sch_bucket)) {
! sce = &tcp_syn_cache[tcp_syn_cache_size];
! for (++scp2; scp2 != scp; scp2++) {
! if (scp2 >= sce)
! scp2 = &tcp_syn_cache[0];
! if (! TAILQ_EMPTY(&scp2->sch_bucket))
! break;
! }
#ifdef DIAGNOSTIC
! /*
! * This should never happen; we should always find a
! * non-empty bucket.
! */
! if (scp2 == scp)
! panic("syn_cache_insert: cacheoverflow: "
! "impossible");
#endif
+ }
+ sc2 = TAILQ_FIRST(&scp2->sch_bucket);
+ SYN_CACHE_RM(sc2);
+ SYN_CACHE_PUT(sc2);
}
/*
* Initialize the entry's timer.
*/
sc->sc_rxttot = 0;
sc->sc_rxtshift = 0;
SYN_CACHE_TIMER_ARM(sc);
/* Link it from tcpcb entry */
LIST_INSERT_HEAD(&tp->t_sc, sc, sc_tpq);
/* Put it into the bucket. */
! TAILQ_INSERT_TAIL(&scp->sch_bucket, sc, sc_bucketq);
scp->sch_length++;
syn_cache_count++;
***************
*** 2749,2808 ****
* that entry.
*/
void
! syn_cache_timer()
{
! struct syn_cache *sc, *nsc;
! int i, s;
s = splsoftnet();
/*
! * First, get all the entries that need to be retransmitted, or
! * must be expired due to exceeding the initial keepalive time.
*/
! for (i = 0; i < TCP_MAXRXTSHIFT; i++) {
! for (sc = TAILQ_FIRST(&tcp_syn_cache_timeq[i]);
! sc != NULL && PRT_SLOW_ISEXPIRED(sc->sc_rexmt);
! sc = nsc) {
! nsc = TAILQ_NEXT(sc, sc_timeq);
! /*
! * Compute the total amount of time this entry has
! * been on a queue. If this entry has been on longer
! * than the keep alive timer would allow, expire it.
! */
! sc->sc_rxttot += sc->sc_rxtcur;
! if (sc->sc_rxttot >= TCPTV_KEEP_INIT) {
! tcpstat.tcps_sc_timed_out++;
! SYN_CACHE_RM(sc);
! SYN_CACHE_PUT(sc);
! continue;
! }
! tcpstat.tcps_sc_retransmitted++;
! (void) syn_cache_respond(sc, NULL);
! /* Advance this entry onto the next timer queue. */
! TAILQ_REMOVE(&tcp_syn_cache_timeq[i], sc, sc_timeq);
! sc->sc_rxtshift = i + 1;
! SYN_CACHE_TIMER_ARM(sc);
! TAILQ_INSERT_TAIL(&tcp_syn_cache_timeq[sc->sc_rxtshift],
! sc, sc_timeq);
! }
! }
! /*
! * Now get all the entries that are expired due to too many
! * retransmissions.
! */
! for (sc = TAILQ_FIRST(&tcp_syn_cache_timeq[TCP_MAXRXTSHIFT]);
! sc != NULL && PRT_SLOW_ISEXPIRED(sc->sc_rexmt);
! sc = nsc) {
! nsc = TAILQ_NEXT(sc, sc_timeq);
! tcpstat.tcps_sc_timed_out++;
! SYN_CACHE_RM(sc);
! SYN_CACHE_PUT(sc);
! }
splx(s);
}
--- 2739,2779 ----
* that entry.
*/
void
! syn_cache_timer(void *arg)
{
! struct syn_cache *sc = arg;
! int s;
s = splsoftnet();
+ if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) {
+ /* Drop it -- too many retransmissions. */
+ goto dropit;
+ }
+
/*
! * Compute the total amount of time this entry has
! * been on a queue. If this entry has been on longer
! * than the keep alive timer would allow, expire it.
*/
! sc->sc_rxttot += sc->sc_rxtcur;
! if (sc->sc_rxttot >= TCPTV_KEEP_INIT)
! goto dropit;
! tcpstat.tcps_sc_retransmitted++;
! (void) syn_cache_respond(sc, NULL);
! /* Advance the timer back-off. */
! sc->sc_rxtshift++;
! SYN_CACHE_TIMER_ARM(sc);
! splx(s);
! return;
! dropit:
! tcpstat.tcps_sc_timed_out++;
! SYN_CACHE_RM(sc);
! SYN_CACHE_PUT(sc);
splx(s);
}
***************
*** 2855,2862 ****
scp = &tcp_syn_cache[hash % tcp_syn_cache_size];
*headp = scp;
s = splsoftnet();
! for (sc = LIST_FIRST(&scp->sch_bucket); sc != NULL;
! sc = LIST_NEXT(sc, sc_bucketq)) {
if (sc->sc_hash != hash)
continue;
if (!bcmp(&sc->sc_src, src, src->sa_len) &&
--- 2826,2833 ----
scp = &tcp_syn_cache[hash % tcp_syn_cache_size];
*headp = scp;
s = splsoftnet();
! for (sc = TAILQ_FIRST(&scp->sch_bucket); sc != NULL;
! sc = TAILQ_NEXT(sc, sc_bucketq)) {
if (sc->sc_hash != hash)
continue;
if (!bcmp(&sc->sc_src, src, src->sa_len) &&
***************
*** 3347,3352 ****
--- 3318,3324 ----
* Fill in the cache, and put the necessary IP and TCP
* options into the reply.
*/
+ callout_init(&sc->sc_timer);
bzero(sc, sizeof(struct syn_cache));
bcopy(src, &sc->sc_src, src->sa_len);
bcopy(dst, &sc->sc_dst, dst->sa_len);
Index: tcp_subr.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_subr.c,v
retrieving revision 1.118
diff -c -r1.118 tcp_subr.c
*** tcp_subr.c 2001/09/10 22:14:27 1.118
--- tcp_subr.c 2001/09/11 19:15:11
***************
*** 194,200 ****
int tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE;
int tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE;
struct syn_cache_head tcp_syn_cache[TCP_SYN_HASH_SIZE];
- int tcp_syn_cache_interval = 1; /* runs timer twice a second */
int tcp_freeq __P((struct tcpcb *));
--- 194,199 ----
Index: tcp_timer.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_timer.c,v
retrieving revision 1.54
diff -c -r1.54 tcp_timer.c
*** tcp_timer.c 2001/09/10 22:45:46 1.54
--- tcp_timer.c 2001/09/11 19:15:11
***************
*** 221,237 ****
void
tcp_slowtimo()
{
- static int syn_cache_last = 0;
int s;
s = splsoftnet();
tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
tcp_iss_seq += TCP_ISSINCR; /* increment iss */
tcp_now++; /* for timestamps */
- if (++syn_cache_last >= tcp_syn_cache_interval) {
- syn_cache_timer();
- syn_cache_last = 0;
- }
splx(s);
}
--- 221,232 ----
Index: tcp_var.h
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_var.h,v
retrieving revision 1.86
diff -c -r1.86 tcp_var.h
*** tcp_var.h 2001/09/10 22:14:28 1.86
--- tcp_var.h 2001/09/11 19:15:12
***************
*** 352,359 ****
};
struct syn_cache {
! LIST_ENTRY(syn_cache) sc_bucketq; /* link on bucket list */
! TAILQ_ENTRY(syn_cache) sc_timeq; /* link on timer queue */
union { /* cached route */
struct route route4;
#ifdef INET6
--- 352,359 ----
};
struct syn_cache {
! TAILQ_ENTRY(syn_cache) sc_bucketq; /* link on bucket list */
! struct callout sc_timer; /* rexmt timer */
union { /* cached route */
struct route route4;
#ifdef INET6
***************
*** 373,379 ****
union syn_cache_sa sc_dst;
tcp_seq sc_irs;
tcp_seq sc_iss;
- u_int sc_rexmt; /* retransmit timer */
u_int sc_rxtcur; /* current rxt timeout */
u_int sc_rxttot; /* total time spend on queues */
u_short sc_rxtshift; /* for computing backoff */
--- 373,378 ----
***************
*** 393,399 ****
};
struct syn_cache_head {
! LIST_HEAD(, syn_cache) sch_bucket; /* bucket entries */
u_short sch_length; /* # entries in bucket */
};
--- 392,398 ----
};
struct syn_cache_head {
! TAILQ_HEAD(, syn_cache) sch_bucket; /* bucket entries */
u_short sch_length; /* # entries in bucket */
};
***************
*** 537,543 ****
--- 536,544 ----
#define TCPCTL_MSSDFLT 4 /* default seg size */
#define TCPCTL_SYN_CACHE_LIMIT 5 /* max size of comp. state engine */
#define TCPCTL_SYN_BUCKET_LIMIT 6 /* max size of hash bucket */
+ #if 0 /*obsoleted*/
#define TCPCTL_SYN_CACHE_INTER 7 /* interval of comp. state timer */
+ #endif
#define TCPCTL_INIT_WIN 8 /* initial window */
#define TCPCTL_MSS_IFMTU 9 /* mss from interface, not in_maxmtu */
#define TCPCTL_SACK 10 /* RFC2018 selective acknowledgement */
***************
*** 568,574 ****
{ "mssdflt", CTLTYPE_INT }, \
{ "syn_cache_limit", CTLTYPE_INT }, \
{ "syn_bucket_limit", CTLTYPE_INT }, \
! { "syn_cache_interval", CTLTYPE_INT },\
{ "init_win", CTLTYPE_INT }, \
{ "mss_ifmtu", CTLTYPE_INT }, \
{ "sack", CTLTYPE_INT }, \
--- 569,575 ----
{ "mssdflt", CTLTYPE_INT }, \
{ "syn_cache_limit", CTLTYPE_INT }, \
{ "syn_bucket_limit", CTLTYPE_INT }, \
! { 0, 0 },\
{ "init_win", CTLTYPE_INT }, \
{ "mss_ifmtu", CTLTYPE_INT }, \
{ "sack", CTLTYPE_INT }, \
***************
*** 610,616 ****
extern int tcp_ack_on_push; /* ACK immediately on PUSH */
extern int tcp_syn_cache_limit; /* max entries for compressed state engine */
extern int tcp_syn_bucket_limit;/* max entries per hash bucket */
- extern int tcp_syn_cache_interval; /* compressed state timer */
extern int tcp_log_refused; /* log refused connections */
extern int tcp_rst_ppslim;
--- 611,616 ----
***************
*** 627,633 ****
{ 1, 0, &tcp_mssdflt }, \
{ 1, 0, &tcp_syn_cache_limit }, \
{ 1, 0, &tcp_syn_bucket_limit }, \
! { 1, 0, &tcp_syn_cache_interval }, \
{ 1, 0, &tcp_init_win }, \
{ 1, 0, &tcp_mss_ifmtu }, \
{ 1, 0, &tcp_do_sack }, \
--- 627,633 ----
{ 1, 0, &tcp_mssdflt }, \
{ 1, 0, &tcp_syn_cache_limit }, \
{ 1, 0, &tcp_syn_bucket_limit }, \
! { 0 }, \
{ 1, 0, &tcp_init_win }, \
{ 1, 0, &tcp_mss_ifmtu }, \
{ 1, 0, &tcp_do_sack }, \
***************
*** 720,726 ****
void syn_cache_reset __P((struct sockaddr *, struct sockaddr *,
struct tcphdr *));
int syn_cache_respond __P((struct syn_cache *, struct mbuf *));
! void syn_cache_timer __P((void));
void syn_cache_cleanup __P((struct tcpcb *));
int tcp_newreno __P((struct tcpcb *, struct tcphdr *));
--- 720,726 ----
void syn_cache_reset __P((struct sockaddr *, struct sockaddr *,
struct tcphdr *));
int syn_cache_respond __P((struct syn_cache *, struct mbuf *));
! void syn_cache_timer __P((void *));
void syn_cache_cleanup __P((struct tcpcb *));
int tcp_newreno __P((struct tcpcb *, struct tcphdr *));
--32u276st3Jlj2kUU--