tech-net archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: [patch] bug fix & TCP networking performance improvements



On Wed, Apr 20, 2011 at 08:39:08PM +0200, Matthias Drochner wrote:
> 
> [initialization crash]
> dyoung%pobox.com@localhost said:
> > Looks easy to fix.
> 
> With the appended hack the box seems to survive.
> Didn't do anything benchmark-like yet.
> Only netstat(1) isn't helpful -- started as root and it
> got into an endless loop printing
> tcp   0   0   *.*  *.*   TIME_WAIT   -1303323789977.711ms
> until I killed it.

I don't understand why I did not see that myself until today, but I
fixed the obvious netstat bug.  I have attached a patch that should fix
all of the issues that you've mentioned, too.  It needs to be applied
after my previous patch.

Thanks for the feedback.

Dave

-- 
David Young             OJC Technologies
dyoung%ojctech.com@localhost      Urbana, IL * (217) 344-0444 x24
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c      (revision 7181)
+++ sys/netinet/tcp_subr.c      (working copy)
@@ -144,6 +144,7 @@
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
+#include <netinet/tcp_vtw.h>
 #include <netinet/tcp_private.h>
 #include <netinet/tcp_congctl.h>
 #include <netinet/tcpip.h>
@@ -437,6 +438,8 @@
        MOWNER_ATTACH(&tcp_mowner);
 
        tcpstat_percpu = percpu_alloc(sizeof(uint64_t) * TCP_NSTATS);
+
+       vtw_earlyinit();
 }
 
 /*
Index: sys/netinet/tcp_vtw.c
===================================================================
--- sys/netinet/tcp_vtw.c       (revision 7215)
+++ sys/netinet/tcp_vtw.c       (working copy)
@@ -175,8 +175,8 @@
 
        KASSERT(n <= FATP_MAX / 2);
 
-       fat->hash   = kmem_alloc(2*m * sizeof (fatp_t *), KM_NOSLEEP);
-       fat->base   = kmem_alloc(2*n * sizeof (fatp_t), KM_NOSLEEP);
+       fat->hash   = kmem_alloc(2*m * sizeof (fatp_t *), KM_SLEEP);
+       fat->base   = kmem_alloc(2*n * sizeof (fatp_t), KM_SLEEP);
 
        if (!fat->base) {
                if (fat->hash)
@@ -1248,7 +1248,7 @@
        int i;
        int sz = (ctl->is_v4 ? sizeof (vtw_v4_t) : sizeof (vtw_v6_t));
 
-       ctl->base.v4 = kmem_alloc(n * sz, KM_NOSLEEP);
+       ctl->base.v4 = kmem_alloc(n * sz, KM_SLEEP);
        if (ctl->base.v4) {
                vtw_t   *base;
                int     class_n;
@@ -1749,48 +1749,42 @@
        .lookup6        = tcp_lookup_v6,
 };
 
-/*!\brief      select controlling instance
- */
-static vtw_ctl_t *
-vtw_control(int af, uint32_t msl)
+static bool
+vtw_select(int af, fatp_ctl_t **fatp, vtw_ctl_t **ctlp)
 {
        fatp_ctl_t      *fat;
        vtw_ctl_t       *ctl;
-       int             class   = msl_to_class(msl);
-       int             i;
 
-       if (!tcp_vtw_was_enabled) {
-               /* This guarantees is timer ticks until we no longer need them.
-                */
-               tcp_vtw_was_enabled = 1;
-
-               callout_init(&vtw_cs, 0);
-               callout_setfunc(&vtw_cs, vtw_tick, 0);
-               callout_schedule(&vtw_cs, hz / 5);
-
-               for (i = 0; i < VTW_NCLASS; ++i) {
-                       vtw_tcpv4[i].is_v4 = 1;
-                       vtw_tcpv6[i].is_v6 = 1;
-               }
-
-               tcbtable.vestige = &tcp_hooks;
-       }
-
        switch (af) {
        case AF_INET:
                fat = &fat_tcpv4;
                ctl = &vtw_tcpv4[0];
                break;
-
        case AF_INET6:
                fat = &fat_tcpv6;
                ctl = &vtw_tcpv6[0];
                break;
-
        default:
-               return 0;
+               return false;
        }
+       if (fatp != NULL)
+               *fatp = fat;
+       if (ctlp != NULL)
+               *ctlp = ctl;
+       return true;
+}
 
+/*!\brief      initialize controlling instance
+ */
+static int
+vtw_control_init(int af)
+{
+       fatp_ctl_t      *fat;
+       vtw_ctl_t       *ctl;
+
+       if (!vtw_select(af, &fat, &ctl))
+               return EAFNOSUPPORT;
+
        if (!fat->base) {
                uint32_t        n, m;
 
@@ -1807,14 +1801,34 @@
                fatp_init(fat, n, m);
 
                if (!fat->base)
-                       return 0;
+                       return ENOMEM;
        }
+
        if (!ctl->base.v) {
+
                vtw_init(fat, ctl, tcp_vtw_entries);
                if (!ctl->base.v)
-                       return 0;
+                       return ENOMEM;
        }
 
+       return 0;
+}
+
+/*!\brief      select controlling instance
+ */
+static vtw_ctl_t *
+vtw_control(int af, uint32_t msl)
+{
+       fatp_ctl_t      *fat;
+       vtw_ctl_t       *ctl;
+       int             class   = msl_to_class(msl);
+
+       if (!vtw_select(af, &fat, &ctl))
+               return NULL;
+
+       if (!fat->base || !ctl->base.v)
+               return NULL;
+
        return ctl + class;
 }
 
@@ -2106,6 +2120,37 @@
                vtw_restart_v6(vp);
 }
 
+int
+vtw_earlyinit(void)
+{
+       int rc;
+ 
+       if (!tcp_vtw_was_enabled) {
+               int i;
+
+               /* This guarantees is timer ticks until we no longer need them.
+                */
+               tcp_vtw_was_enabled = 1;
+
+               callout_init(&vtw_cs, 0);
+               callout_setfunc(&vtw_cs, vtw_tick, 0);
+               callout_schedule(&vtw_cs, hz / 5);
+
+               for (i = 0; i < VTW_NCLASS; ++i) {
+                       vtw_tcpv4[i].is_v4 = 1;
+                       vtw_tcpv6[i].is_v6 = 1;
+               }
+
+               tcbtable.vestige = &tcp_hooks;
+       }
+
+       if ((rc = vtw_control_init(AF_INET)) != 0 || 
+           (rc = vtw_control_init(AF_INET6)) != 0)
+               return rc;
+
+       return 0;
+}
+
 #ifdef VTW_DEBUG
 #include <sys/syscallargs.h>
 #include <sys/sysctl.h>
Index: sys/netinet/tcp_vtw.h
===================================================================
--- sys/netinet/tcp_vtw.h       (revision 7213)
+++ sys/netinet/tcp_vtw.h       (working copy)
@@ -123,6 +123,7 @@
 
 #include <sys/types.h>
 #include <sys/socket.h>
+#include <sys/sysctl.h>
 #include <net/if.h>
 #include <net/route.h>
 #include <netinet/in.h>
@@ -391,6 +392,7 @@
 } vestigial_inpcb_t;
 
 void vtw_restart(vestigial_inpcb_t*);
+int vtw_earlyinit(void);
 
 #ifdef VTW_DEBUG
 typedef struct sin_either {
Index: sys/sys/socket.h
===================================================================
--- sys/sys/socket.h    (revision 7058)
+++ sys/sys/socket.h    (working copy)
@@ -422,18 +422,6 @@
 #define ki_src ki_s._kis_src
 #define ki_dst ki_d._kid_dst
 
-/*
- * structure to get pcb info, this also gives
- * data buffered by upper layer than TCP module
- */
-struct kinfo_pcb2 {
-       struct kinfo_pcb pcb;
-
-
-       __uint64_t      ki_rcvq;        /* U_LONG: receive queue len */
-       __uint64_t      ki_sndq;        /* U_LONG: send queue len */
-};
-
 #define PCB_SLOP               20
 #define PCB_ALL                        0
 
Index: usr.bin/netstat/inet.c
===================================================================
--- usr.bin/netstat/inet.c      (revision 7189)
+++ usr.bin/netstat/inet.c      (working copy)
@@ -202,7 +202,9 @@
                    ,buf[1], ntohs(v4->fport));
                if (!(Vflag && vflag))
                        return;
-       } else if (vtw->expire.tv_sec != 0) {
+       } else if (vtw->expire.tv_sec == 0)
+               return;
+       else {
                dbg_printf("%15.15s:%d %15.15s:%d expires in %.3fms\n"
                    ,buf[0], ntohs(v4->lport)
                    ,buf[1], ntohs(v4->fport)
Index: usr.bin/netstat/vtw.c
===================================================================
--- usr.bin/netstat/vtw.c       (revision 7189)
+++ usr.bin/netstat/vtw.c       (working copy)
@@ -234,7 +234,8 @@
                n = (klim - kbase + 1);
 
                if (!i) {
-                       ubase = malloc(n * sizeof(*kbase));
+                       if ((ubase = malloc(n * sizeof(*kbase))) == NULL)
+                               err(EXIT_FAILURE, NULL);
                        ulim = ubase + n - 1;
 
                        snarf(kbase, ubase, n * sizeof(*ubase));
@@ -267,6 +268,8 @@
        mem += (lim - base + 1) * sizeof(*base);
 
        fat_tcpv4.base = malloc((lim - base + 1) * sizeof(*base));
+       if (fat_tcpv4.base == NULL)
+               err(EXIT_FAILURE, NULL);
        fat_tcpv4.lim = fat_tcpv4.base + (lim - base);
 
        snarf(base, fat_tcpv4.base, sizeof(*base) * (lim - base + 1));
@@ -280,6 +283,8 @@
 
        fat_tcpv4.hash = malloc(n * sizeof(*hash));
        fat_tcpv4.port = malloc(n * sizeof(*port));
+       if (fat_tcpv4.hash == NULL || fat_tcpv4.port == NULL)
+               err(EXIT_FAILURE, NULL);
 
        snarf(hash, fat_tcpv4.hash, n * sizeof(*hash));
        snarf(port, fat_tcpv4.port, n * sizeof(*port));
@@ -332,7 +337,8 @@
                n = (klim - kbase + 1);
 
                if (!i) {
-                       ubase = malloc(n * sizeof(*kbase));
+                       if ((ubase = malloc(n * sizeof(*kbase))) == NULL)
+                               err(EXIT_FAILURE, NULL);
                        ulim = ubase + n - 1;
 
                        snarf(kbase, ubase, n * sizeof(*ubase));
@@ -363,6 +369,8 @@
        mem += (lim - base + 1) * sizeof(*base);
 
        fat_tcpv6.base = malloc((lim - base + 1) * sizeof(*base));
+       if (fat_tcpv6.base == NULL)
+               err(EXIT_FAILURE, NULL);
        fat_tcpv6.lim = fat_tcpv6.base + (lim - base);
 
        snarf(base, fat_tcpv6.base, sizeof(*base) * (lim - base + 1));
@@ -376,6 +384,8 @@
 
        fat_tcpv6.hash = malloc(n * sizeof(*hash));
        fat_tcpv6.port = malloc(n * sizeof(*port));
+       if (fat_tcpv6.hash == NULL || fat_tcpv6.port == NULL)
+               err(EXIT_FAILURE, NULL);
 
        snarf(hash, fat_tcpv6.hash, n * sizeof(*hash));
        snarf(port, fat_tcpv6.port, n * sizeof(*port));
Index: usr.bin/netstat/main.c
===================================================================
--- usr.bin/netstat/main.c      (revision 7189)
+++ usr.bin/netstat/main.c      (working copy)
@@ -389,7 +389,7 @@
 
                /* If we have -M and -N, we're not dealing with live memory. */
                use_sysctl = 0;
-       } else if (qflag ||
+       } else if (true || qflag ||
                   rflag ||
                   iflag ||
 #ifndef SMALL
Index: usr.bin/netstat/inet6.c
===================================================================
--- usr.bin/netstat/inet6.c     (revision 7189)
+++ usr.bin/netstat/inet6.c     (working copy)
@@ -253,7 +253,9 @@
                    ,buf[1], ntohs(v6->fport));
                if (!(Vflag && vflag))
                        return;
-       } else if (vtw->expire.tv_sec) {
+       } else if (vtw->expire.tv_sec == 0)
+               return;
+       else {
                dbg_printf("%15.15s:%d %15.15s:%d expires in %.3fms\n"
                    ,buf[0], ntohs(v6->lport)
                    ,buf[1], ntohs(v6->fport)


Home | Main Index | Thread Index | Old Index