Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/netinet Change struct ipqe to use TAILQ's instead of LIS...



details:   https://anonhg.NetBSD.org/src/rev/d21dc7b5045a
branches:  trunk
changeset: 526619:d21dc7b5045a
user:      matt <matt%NetBSD.org@localhost>
date:      Tue May 07 02:59:38 2002 +0000

description:
Change struct ipqe to use TAILQ's instead of LIST's (primarily for TCP's
benefit currently).  Rework tcp_reass code to optimize the 4 most likely causes
of out-of-order packets: first OoO pkt, next OoO pkt in seq, OoO pkt is part
of new chuck of OoO packets, and the OoO pkt fills the first hole.  Add evcnts
to instrument tcp_reass (enabled by the options TCP_REASS_COUNTERS).  This is
part 1/2 of tcp_reass changes.

diffstat:

 sys/netinet/ip_input.c  |   34 ++++++------
 sys/netinet/ip_var.h    |   11 ++-
 sys/netinet/tcp_input.c |  135 +++++++++++++++++++++++++++++++++++++++++------
 sys/netinet/tcp_subr.c  |   77 ++++++++++++++++++++++++--
 4 files changed, 211 insertions(+), 46 deletions(-)

diffs (truncated from 553 to 300 lines):

diff -r 199c2c991168 -r d21dc7b5045a sys/netinet/ip_input.c
--- a/sys/netinet/ip_input.c    Tue May 07 02:40:55 2002 +0000
+++ b/sys/netinet/ip_input.c    Tue May 07 02:59:38 2002 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: ip_input.c,v 1.147 2002/04/18 22:33:21 matt Exp $      */
+/*     $NetBSD: ip_input.c,v 1.148 2002/05/07 02:59:38 matt Exp $      */
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -102,7 +102,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.147 2002/04/18 22:33:21 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.148 2002/05/07 02:59:38 matt Exp $");
 
 #include "opt_gateway.h"
 #include "opt_pfil_hooks.h"
@@ -858,7 +858,7 @@
                fp->ipq_ttl = IPFRAGTTL;
                fp->ipq_p = ipqe->ipqe_ip->ip_p;
                fp->ipq_id = ipqe->ipqe_ip->ip_id;
-               LIST_INIT(&fp->ipq_fragq);
+               TAILQ_INIT(&fp->ipq_fragq);
                fp->ipq_src = ipqe->ipqe_ip->ip_src;
                fp->ipq_dst = ipqe->ipqe_ip->ip_dst;
                p = NULL;
@@ -868,8 +868,8 @@
        /*
         * Find a segment which begins after this one does.
         */
-       for (p = NULL, q = LIST_FIRST(&fp->ipq_fragq); q != NULL;
-           p = q, q = LIST_NEXT(q, ipqe_q))
+       for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL;
+           p = q, q = TAILQ_NEXT(q, ipqe_q))
                if (q->ipqe_ip->ip_off > ipqe->ipqe_ip->ip_off)
                        break;
 
@@ -904,9 +904,9 @@
                        m_adj(q->ipqe_m, i);
                        break;
                }
-               nq = LIST_NEXT(q, ipqe_q);
+               nq = TAILQ_NEXT(q, ipqe_q);
                m_freem(q->ipqe_m);
-               LIST_REMOVE(q, ipqe_q);
+               TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
                pool_put(&ipqent_pool, q);
        }
 
@@ -916,13 +916,13 @@
         * check for complete reassembly.
         */
        if (p == NULL) {
-               LIST_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q);
+               TAILQ_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q);
        } else {
-               LIST_INSERT_AFTER(p, ipqe, ipqe_q);
+               TAILQ_INSERT_AFTER(&fp->ipq_fragq, p, ipqe, ipqe_q);
        }
        next = 0;
-       for (p = NULL, q = LIST_FIRST(&fp->ipq_fragq); q != NULL;
-           p = q, q = LIST_NEXT(q, ipqe_q)) {
+       for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL;
+           p = q, q = TAILQ_NEXT(q, ipqe_q)) {
                if (q->ipqe_ip->ip_off != next)
                        return (0);
                next += q->ipqe_ip->ip_len;
@@ -934,7 +934,7 @@
         * Reassembly is complete.  Check for a bogus message size and
         * concatenate fragments.
         */
-       q = LIST_FIRST(&fp->ipq_fragq);
+       q = TAILQ_FIRST(&fp->ipq_fragq);
        ip = q->ipqe_ip;
        if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) {
                ipstat.ips_toolong++;
@@ -945,11 +945,11 @@
        t = m->m_next;
        m->m_next = 0;
        m_cat(m, t);
-       nq = LIST_NEXT(q, ipqe_q);
+       nq = TAILQ_NEXT(q, ipqe_q);
        pool_put(&ipqent_pool, q);
        for (q = nq; q != NULL; q = nq) {
                t = q->ipqe_m;
-               nq = LIST_NEXT(q, ipqe_q);
+               nq = TAILQ_NEXT(q, ipqe_q);
                pool_put(&ipqent_pool, q);
                m_cat(m, t);
        }
@@ -996,10 +996,10 @@
 
        IPQ_LOCK_CHECK();
 
-       for (q = LIST_FIRST(&fp->ipq_fragq); q != NULL; q = p) {
-               p = LIST_NEXT(q, ipqe_q);
+       for (q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; q = p) {
+               p = TAILQ_NEXT(q, ipqe_q);
                m_freem(q->ipqe_m);
-               LIST_REMOVE(q, ipqe_q);
+               TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
                pool_put(&ipqent_pool, q);
        }
        LIST_REMOVE(fp, ipq_q);
diff -r 199c2c991168 -r d21dc7b5045a sys/netinet/ip_var.h
--- a/sys/netinet/ip_var.h      Tue May 07 02:40:55 2002 +0000
+++ b/sys/netinet/ip_var.h      Tue May 07 02:59:38 2002 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: ip_var.h,v 1.46 2001/12/21 02:51:47 itojun Exp $       */
+/*     $NetBSD: ip_var.h,v 1.47 2002/05/07 02:59:38 matt Exp $ */
 
 /*
  * Copyright (c) 1982, 1986, 1993
@@ -63,19 +63,20 @@
  * port numbers (which are no longer needed once we've located the
  * tcpcb) are overlayed with an mbuf pointer.
  */
-LIST_HEAD(ipqehead, ipqent);
+TAILQ_HEAD(ipqehead, ipqent);
 struct ipqent {
-       LIST_ENTRY(ipqent) ipqe_q;
+       TAILQ_ENTRY(ipqent) ipqe_q;
        union {
                struct ip       *_ip;
                struct tcpiphdr *_tcp;
        } _ipqe_u1;
-       struct mbuf     *ipqe_m;        /* mbuf contains packet */
+       struct mbuf     *ipqe_m;        /* point to first mbuf */
+       struct mbuf     *ipre_mlast;    /* point to last mbuf */
        u_int8_t        ipqe_mff;       /* for IP fragmentation */
        /*
         * The following are used in TCP reassembly
         */
-       LIST_ENTRY(ipqent) ipqe_timeq;
+       TAILQ_ENTRY(ipqent) ipqe_timeq;
        u_int32_t ipqe_seq;
        u_int32_t ipqe_len;
        u_int32_t ipqe_flags;
diff -r 199c2c991168 -r d21dc7b5045a sys/netinet/tcp_input.c
--- a/sys/netinet/tcp_input.c   Tue May 07 02:40:55 2002 +0000
+++ b/sys/netinet/tcp_input.c   Tue May 07 02:59:38 2002 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: tcp_input.c,v 1.140 2002/03/24 17:09:01 christos Exp $ */
+/*     $NetBSD: tcp_input.c,v 1.141 2002/05/07 02:59:38 matt Exp $     */
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -152,7 +152,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.140 2002/03/24 17:09:01 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.141 2002/05/07 02:59:38 matt Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -292,6 +292,31 @@
 
 #endif /* TCP_CSUM_COUNTERS */
 
+#ifdef TCP_REASS_COUNTERS
+#include <sys/device.h>
+
+extern struct evcnt tcp_reass_;
+extern struct evcnt tcp_reass_empty;
+extern struct evcnt tcp_reass_iteration[8];
+extern struct evcnt tcp_reass_prependfirst;
+extern struct evcnt tcp_reass_prepend;
+extern struct evcnt tcp_reass_insert;
+extern struct evcnt tcp_reass_inserttail;
+extern struct evcnt tcp_reass_append;
+extern struct evcnt tcp_reass_appendtail;
+extern struct evcnt tcp_reass_overlaptail;
+extern struct evcnt tcp_reass_overlapfront;
+extern struct evcnt tcp_reass_segdup;
+extern struct evcnt tcp_reass_fragdup;
+
+#define        TCP_REASS_COUNTER_INCR(ev)      (ev)->ev_count++
+
+#else
+
+#define        TCP_REASS_COUNTER_INCR(ev)      /* nothing */
+
+#endif /* TCP_REASS_COUNTERS */
+
 int
 tcp_reass(tp, th, m, tlen)
        struct tcpcb *tp;
@@ -306,6 +331,9 @@
        unsigned pkt_len;
        u_long rcvpartdupbyte = 0;
        u_long rcvoobyte;
+#ifdef TCP_REASS_COUNTERS
+       u_int count = 0;
+#endif
 
        if (tp->t_inpcb)
                so = tp->t_inpcb->inp_socket;
@@ -331,11 +359,64 @@
        pkt_seq = th->th_seq;
        pkt_len = *tlen;
        pkt_flags = th->th_flags;
+
+       TCP_REASS_COUNTER_INCR(&tcp_reass_);
+
+       if ((p = TAILQ_LAST(&tp->segq, ipqehead)) != NULL) {
+               /*
+                * When we miss a packet, the vast majority of time we get
+                * packets that follow it in order.  So optimize for that.
+                */
+               if (pkt_seq == p->ipqe_seq + p->ipqe_len) {
+                       p->ipqe_len += pkt_len;
+                       p->ipqe_flags |= pkt_flags;
+                       m_cat(p->ipqe_m, m);
+                       tiqe = p;
+                       TAILQ_REMOVE(&tp->timeq, p, ipqe_timeq);
+                       TCP_REASS_COUNTER_INCR(&tcp_reass_appendtail);
+                       goto skip_replacement;
+               }
+               /*
+                * While we're here, if the pkt is completely beyond
+                * anything we have, just insert it at the tail.
+                */
+               if (SEQ_GT(pkt_seq, p->ipqe_seq + p->ipqe_len)) {
+                       TCP_REASS_COUNTER_INCR(&tcp_reass_inserttail);
+                       goto insert_it;
+               }
+       }
+
+       q = TAILQ_FIRST(&tp->segq);
+
+       if (q != NULL) {
+               /*
+                * If this segment immediately precedes the first out-of-order
+                * block, simply slap the segment in front of it and (mostly)
+                * skip the complicated logic.
+                */
+               if (pkt_seq + pkt_len == q->ipqe_seq) {
+                       q->ipqe_seq = pkt_seq;
+                       q->ipqe_len += pkt_len;
+                       q->ipqe_flags |= pkt_flags;
+                       m_cat(m, q->ipqe_m);
+                       q->ipqe_m = m;
+                       tiqe = q;
+                       TAILQ_REMOVE(&tp->timeq, q, ipqe_timeq);
+                       TCP_REASS_COUNTER_INCR(&tcp_reass_prependfirst);
+                       goto skip_replacement;
+               }
+       } else {
+               TCP_REASS_COUNTER_INCR(&tcp_reass_empty);
+       }
+
        /*
         * Find a segment which begins after this one does.
         */
-       for (p = NULL, q = LIST_FIRST(&tp->segq); q != NULL; q = nq) {
-               nq = LIST_NEXT(q, ipqe_q);
+       for (p = NULL; q != NULL; q = nq) {
+               nq = TAILQ_NEXT(q, ipqe_q);
+#ifdef TCP_REASS_COUNTERS
+               count++;
+#endif
                /*
                 * If the received segment is just right after this
                 * fragment, merge the two together and then check
@@ -352,6 +433,7 @@
                        pkt_seq = q->ipqe_seq;
                        m_cat(q->ipqe_m, m);
                        m = q->ipqe_m;
+                       TCP_REASS_COUNTER_INCR(&tcp_reass_append);
                        goto free_ipqe;
                }
                /*
@@ -366,8 +448,11 @@
                 * If the fragment is past the received segment, 
                 * it (or any following) can't be concatenated.
                 */
-               if (SEQ_GT(q->ipqe_seq, pkt_seq + pkt_len))
+               if (SEQ_GT(q->ipqe_seq, pkt_seq + pkt_len)) {
+                       TCP_REASS_COUNTER_INCR(&tcp_reass_insert);
                        break;
+               }
+
                /*
                 * We've received all the data in this segment before.
                 * mark it as a duplicate and return.
@@ -379,6 +464,7 @@
                        m_freem(m);
                        if (tiqe != NULL)
                                pool_put(&ipqent_pool, tiqe);
+                       TCP_REASS_COUNTER_INCR(&tcp_reass_segdup);
                        return (0);
                }
                /*
@@ -390,6 +476,7 @@
                    SEQ_LEQ(q->ipqe_seq + q->ipqe_len, pkt_seq + pkt_len)) {
                        rcvpartdupbyte += q->ipqe_len;
                        m_freem(q->ipqe_m);
+                       TCP_REASS_COUNTER_INCR(&tcp_reass_fragdup);



Home | Main Index | Thread Index | Old Index