Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys Split-off IPv4 re-assembly mechanism into a separate mod...



details:   https://anonhg.NetBSD.org/src/rev/cc6a4148cfed
branches:  trunk
changeset: 756355:cc6a4148cfed
user:      rmind <rmind%NetBSD.org@localhost>
date:      Tue Jul 13 22:16:10 2010 +0000

description:
Split-off IPv4 re-assembly mechanism into a separate module.  Abstract
into ip_reass_init(), ip_reass_lookup(), etc (note: abstraction is not
yet complete).  No functional changes to the actual mechanism.

OK matt@

diffstat:

 sys/netinet/files.netinet                |    3 +-
 sys/netinet/in_var.h                     |    3 +-
 sys/netinet/ip_input.c                   |  571 +------------------------
 sys/netinet/ip_reass.c                   |  677 +++++++++++++++++++++++++++++++
 sys/netinet/ip_var.h                     |   16 +-
 sys/rump/net/lib/libnetinet/Makefile.inc |    6 +-
 6 files changed, 726 insertions(+), 550 deletions(-)

diffs (truncated from 1503 to 300 lines):

diff -r a05e5e2b2e45 -r cc6a4148cfed sys/netinet/files.netinet
--- a/sys/netinet/files.netinet Tue Jul 13 22:13:18 2010 +0000
+++ b/sys/netinet/files.netinet Tue Jul 13 22:16:10 2010 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: files.netinet,v 1.20 2008/01/25 21:12:14 joerg Exp $
+#      $NetBSD: files.netinet,v 1.21 2010/07/13 22:16:10 rmind Exp $
 
 defflag opt_tcp_debug.h                TCP_DEBUG
 defparam opt_tcp_debug.h       TCP_NDEBUG
@@ -29,6 +29,7 @@
 file   netinet/ip_input.c      inet
 file   netinet/ip_mroute.c     inet & mrouting
 file   netinet/ip_output.c     inet
+file   netinet/ip_reass.c      inet
 file   netinet/raw_ip.c        inet
 
 file   netinet/tcp_debug.c     (inet | inet6) & tcp_debug
diff -r a05e5e2b2e45 -r cc6a4148cfed sys/netinet/in_var.h
--- a/sys/netinet/in_var.h      Tue Jul 13 22:13:18 2010 +0000
+++ b/sys/netinet/in_var.h      Tue Jul 13 22:16:10 2010 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: in_var.h,v 1.62 2008/04/28 20:24:09 martin Exp $       */
+/*     $NetBSD: in_var.h,v 1.63 2010/07/13 22:16:10 rmind Exp $        */
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -300,6 +300,7 @@
            struct lwp *);
 void   in_purgeaddr(struct ifaddr *);
 void   in_purgeif(struct ifnet *);
+void   ip_reass_init(void);
 void   ip_input(struct mbuf *);
 int    ipflow_fastforward(struct mbuf *);
 void   ip_initid(void);
diff -r a05e5e2b2e45 -r cc6a4148cfed sys/netinet/ip_input.c
--- a/sys/netinet/ip_input.c    Tue Jul 13 22:13:18 2010 +0000
+++ b/sys/netinet/ip_input.c    Tue Jul 13 22:16:10 2010 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: ip_input.c,v 1.287 2010/07/09 18:42:46 rmind Exp $     */
+/*     $NetBSD: ip_input.c,v 1.288 2010/07/13 22:16:10 rmind Exp $     */
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -91,7 +91,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.287 2010/07/09 18:42:46 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.288 2010/07/13 22:16:10 rmind Exp $");
 
 #include "opt_inet.h"
 #include "opt_compat_netbsd.h"
@@ -104,7 +104,6 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
@@ -240,105 +239,7 @@
 struct pfil_head inet_pfil_hook;
 #endif
 
-/*
- * Cached copy of nmbclusters. If nbclusters is different,
- * recalculate IP parameters derived from nmbclusters.
- */
-static int     ip_nmbclusters;                 /* copy of nmbclusters */
-static void    ip_nmbclusters_changed(void);   /* recalc limits */
-
-#define CHECK_NMBCLUSTER_PARAMS()                              \
-do {                                                           \
-       if (__predict_false(ip_nmbclusters != nmbclusters))     \
-               ip_nmbclusters_changed();                       \
-} while (/*CONSTCOND*/0)
-
-/* IP datagram reassembly queues (hashed) */
-#define IPREASS_NHASH_LOG2      6
-#define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
-#define IPREASS_HMASK           (IPREASS_NHASH - 1)
-#define IPREASS_HASH(x,y) \
-       (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
-struct ipqhead ipq[IPREASS_NHASH];
-int    ipq_locked;
-static int     ip_nfragpackets;        /* packets in reass queue */
-static int     ip_nfrags;              /* total fragments in reass queues */
-
-int    ip_maxfragpackets = 200;        /* limit on packets. XXX sysctl */
-int    ip_maxfrags;                    /* limit on fragments. XXX sysctl */
-
-
-/*
- * Additive-Increase/Multiplicative-Decrease (AIMD) strategy for
- * IP reassembly queue buffer managment.
- *
- * We keep a count of total IP fragments (NB: not fragmented packets!)
- * awaiting reassembly (ip_nfrags) and a limit (ip_maxfrags) on fragments.
- * If ip_nfrags exceeds ip_maxfrags the limit, we drop half the
- * total fragments in  reassembly queues.This AIMD policy avoids
- * repeatedly deleting single packets under heavy fragmentation load
- * (e.g., from lossy NFS peers).
- */
-static u_int   ip_reass_ttl_decr(u_int ticks);
-static void    ip_reass_drophalf(void);
-
-
-static inline int ipq_lock_try(void);
-static inline void ipq_unlock(void);
-
-static inline int
-ipq_lock_try(void)
-{
-       int s;
-
-       /*
-        * Use splvm() -- we're blocking things that would cause
-        * mbuf allocation.
-        */
-       s = splvm();
-       if (ipq_locked) {
-               splx(s);
-               return (0);
-       }
-       ipq_locked = 1;
-       splx(s);
-       return (1);
-}
-
-static inline void
-ipq_unlock(void)
-{
-       int s;
-
-       s = splvm();
-       ipq_locked = 0;
-       splx(s);
-}
-
-#ifdef DIAGNOSTIC
-#define        IPQ_LOCK()                                                      \
-do {                                                                   \
-       if (ipq_lock_try() == 0) {                                      \
-               printf("%s:%d: ipq already locked\n", __FILE__, __LINE__); \
-               panic("ipq_lock");                                      \
-       }                                                               \
-} while (/*CONSTCOND*/ 0)
-#define        IPQ_LOCK_CHECK()                                                \
-do {                                                                   \
-       if (ipq_locked == 0) {                                          \
-               printf("%s:%d: ipq lock not held\n", __FILE__, __LINE__); \
-               panic("ipq lock check");                                \
-       }                                                               \
-} while (/*CONSTCOND*/ 0)
-#else
-#define        IPQ_LOCK()              (void) ipq_lock_try()
-#define        IPQ_LOCK_CHECK()        /* nothing */
-#endif
-
-#define        IPQ_UNLOCK()            ipq_unlock()
-
 struct pool inmulti_pool;
-struct pool ipqent_pool;
 
 #ifdef INET_CSUM_COUNTERS
 #include <sys/device.h>
@@ -387,16 +288,6 @@
 static void sysctl_net_inet_ip_setup(struct sysctllog **);
 
 /*
- * Compute IP limits derived from the value of nmbclusters.
- */
-static void
-ip_nmbclusters_changed(void)
-{
-       ip_maxfrags = nmbclusters / 4;
-       ip_nmbclusters =  nmbclusters;
-}
-
-/*
  * IP initialization: fill in IP protocol switch table.
  * All protocols not implemented in kernel go to raw IP protocol handler.
  */
@@ -410,8 +301,6 @@
 
        pool_init(&inmulti_pool, sizeof(struct in_multi), 0, 0, 0, "inmltpl",
            NULL, IPL_SOFTNET);
-       pool_init(&ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl",
-           NULL, IPL_VM);
 
        pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
        if (pr == 0)
@@ -424,14 +313,12 @@
                    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
                        ip_protox[pr->pr_protocol] = pr - inetsw;
 
-       for (i = 0; i < IPREASS_NHASH; i++)
-               LIST_INIT(&ipq[i]);
+       ip_reass_init();
 
        ip_initid();
        ip_id = time_second & 0xfffff;
 
        ipintrq.ifq_maxlen = ipqmaxlen;
-       ip_nmbclusters_changed();
 
        TAILQ_INIT(&in_ifaddrhead);
        in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true,
@@ -515,16 +402,12 @@
 ip_input(struct mbuf *m)
 {
        struct ip *ip = NULL;
-       struct ipq *fp;
        struct in_ifaddr *ia;
        struct ifaddr *ifa;
-       struct ipqent *ipqe;
-       int hlen = 0, mff, len;
+       int hlen = 0, len;
        int downmatch;
        int checkif;
        int srcrt = 0;
-       int s;
-       u_int hash;
 #ifdef FAST_IPSEC
        struct m_tag *mtag;
        struct tdb_ident *tdbi;
@@ -924,13 +807,12 @@
 ours:
        /*
         * If offset or IP_MF are set, must reassemble.
-        * Otherwise, nothing need be done.
-        * (We could look in the reassembly queue to see
-        * if the packet was previously fragmented,
-        * but it's not worth the time; just let them time out.)
         */
        if (ip->ip_off & ~htons(IP_DF|IP_RF)) {
-               u_int off;
+               struct ipq *fp;
+               u_int off, hash;
+               bool mff;
+
                /*
                 * Prevent TCP blind data attacks by not allowing non-initial
                 * fragments to start at less than 68 bytes (minimal fragment
@@ -944,16 +826,16 @@
                }
 
                /*
-                * Adjust ip_len to not reflect header,
-                * set ipqe_mff if more fragments are expected,
-                * convert offset of this to bytes.
+                * Adjust total IP length to not reflect header.  Set 'mff'
+                * indicator, if more fragments are expected.  Convert offset
+                * of this to bytes.
                 */
                ip->ip_len = htons(ntohs(ip->ip_len) - hlen);
                mff = (ip->ip_off & htons(IP_MF)) != 0;
                if (mff) {
                        /*
                         * Make sure that fragments have a data length
-                        * that's a non-zero multiple of 8 bytes.
+                        * which is non-zero and multiple of 8 bytes.
                         */
                        if (ntohs(ip->ip_len) == 0 ||
                            (ntohs(ip->ip_len) & 0x7) != 0) {
@@ -963,29 +845,14 @@
                }
                ip->ip_off = htons((ntohs(ip->ip_off) & IP_OFFMASK) << 3);
 
-               /*
-                * Look for queue of fragments of this datagram.
-                */
-               IPQ_LOCK();
-               hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
-               LIST_FOREACH(fp, &ipq[hash], ipq_q) {
-                       if (ip->ip_id != fp->ipq_id)
-                               continue;
-                       if (!in_hosteq(ip->ip_src, fp->ipq_src))
-                               continue;
-                       if (!in_hosteq(ip->ip_dst, fp->ipq_dst))
-                               continue;
-                       if (ip->ip_p != fp->ipq_p)
-                               continue;
-                       /*
-                        * Make sure the TOS is matches previous fragments.
-                        */
-                       if (ip->ip_tos != fp->ipq_tos) {
-                               IP_STATINC(IP_STAT_BADFRAGS);
-                               IPQ_UNLOCK();
-                               goto bad;
-                       }
-                       break;
+               /* Look for queue of fragments of this datagram. */
+               fp = ip_reass_lookup(ip, &hash);
+
+               /* Make sure the TOS matches previous fragments. */
+               if (fp && fp->ipq_tos != ip->ip_tos) {
+                       IP_STATINC(IP_STAT_BADFRAGS);
+                       ip_reass_unlock();
+                       goto bad;



Home | Main Index | Thread Index | Old Index