Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src Port over the TCP_INFO socket option from FreeBSD, originall...



details:   https://anonhg.NetBSD.org/src/rev/bda78a1ad8d7
branches:  trunk
changeset: 336141:bda78a1ad8d7
user:      he <he%NetBSD.org@localhost>
date:      Sat Feb 14 12:57:52 2015 +0000

description:
Port over the TCP_INFO socket option from FreeBSD, originally from
the Linux 2.6 TCP API.  This permits the caller to query certain information
about a TCP connection, and is used by pkgsrc's net/iperf3 test program
if available.

This extends struct tcbcb with three fields to count retransmits,
out-of-sequence receives and zero window announcements, and will
therefore warrant a kernel revision bump (done separately).

diffstat:

 share/man/man4/tcp.4     |  19 +++++++++++-
 sys/netinet/tcp.h        |  75 +++++++++++++++++++++++++++++++++++++++++++++++-
 sys/netinet/tcp_input.c  |   5 +-
 sys/netinet/tcp_output.c |   8 +++-
 sys/netinet/tcp_subr.c   |   7 +++-
 sys/netinet/tcp_usrreq.c |  69 ++++++++++++++++++++++++++++++++++++++++++-
 sys/netinet/tcp_var.h    |   7 +++-
 7 files changed, 179 insertions(+), 11 deletions(-)

diffs (truncated from 345 to 300 lines):

diff -r d66971c76620 -r bda78a1ad8d7 share/man/man4/tcp.4
--- a/share/man/man4/tcp.4      Sat Feb 14 10:21:29 2015 +0000
+++ b/share/man/man4/tcp.4      Sat Feb 14 12:57:52 2015 +0000
@@ -1,4 +1,4 @@
-.\"    $NetBSD: tcp.4,v 1.29 2013/10/10 12:28:10 christos Exp $
+.\"    $NetBSD: tcp.4,v 1.30 2015/02/14 12:57:52 he Exp $
 .\"    $FreeBSD: tcp.4,v 1.11.2.16 2004/02/16 22:21:47 bms Exp $
 .\"
 .\" Copyright (c) 1983, 1991, 1993
@@ -243,6 +243,23 @@
 This option takes an
 .Vt "unsigned int"
 value, with a value greater than 0.
+.It Dv TCP_INFO
+Information about a socket's underlying TCP session may be retreived
+by passing the read-only option
+.Dv TPC_INFO
+to 
+.Xr getsockopt 2 .
+It accepts a single argument: a pointer to an instance of
+.Vt "struct tcp_info" .
+.Pp
+This API is subject to change; consult the source to determine
+which fields are currently filled out by this option.
+.Nx
+specific additions include
+send window size,
+receive window size,
+and
+bandwidth-controlled window space.
 .\" range of 0 to N (where N is the
 .\" .Xr sysctl 8
 .\" variable
diff -r d66971c76620 -r bda78a1ad8d7 sys/netinet/tcp.h
--- a/sys/netinet/tcp.h Sat Feb 14 10:21:29 2015 +0000
+++ b/sys/netinet/tcp.h Sat Feb 14 12:57:52 2015 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: tcp.h,v 1.30 2012/01/07 20:20:22 christos Exp $        */
+/*     $NetBSD: tcp.h,v 1.31 2015/02/14 12:57:53 he Exp $      */
 
 /*
  * Copyright (c) 1982, 1986, 1993
@@ -127,7 +127,80 @@
 #ifdef notyet
 #define        TCP_NOOPT       8       /* reserved for FreeBSD compat */
 #endif
+#define        TCP_INFO        9       /* retrieve tcp_info structure */
 #define        TCP_MD5SIG      0x10    /* use MD5 digests (RFC2385) */
 #define        TCP_CONGCTL     0x20    /* selected congestion control */
 
+#define        TCPI_OPT_TIMESTAMPS     0x01
+#define        TCPI_OPT_SACK           0x02
+#define        TCPI_OPT_WSCALE         0x04
+#define        TCPI_OPT_ECN            0x08
+#define        TCPI_OPT_TOE            0x10
+
+/*
+ * The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits
+ * the caller to query certain information about the state of a TCP
+ * connection.  We provide an overlapping set of fields with the Linux
+ * implementation, but since this is a fixed size structure, room has been
+ * left for growth.  In order to maximize potential future compatibility with
+ * the Linux API, the same variable names and order have been adopted, and
+ * padding left to make room for omitted fields in case they are added later.
+ *
+ * XXX: This is currently an unstable ABI/API, in that it is expected to
+ * change.
+ */
+struct tcp_info {
+       uint8_t         tcpi_state; /* TCP FSM state. */
+       uint8_t         __tcpi_ca_state;
+       uint8_t         __tcpi_retransmits;
+       uint8_t         __tcpi_probes;
+       uint8_t         __tcpi_backoff;
+       uint8_t         tcpi_options;          /* Options enabled on conn. */
+       uint8_t         tcpi_snd_wscale:4,      /* RFC1323 send shift value. */
+                       tcpi_rcv_wscale:4; /* RFC1323 recv shift value. */
+
+       uint32_t        tcpi_rto;               /* Retransmission timeout (usec). */
+       uint32_t        __tcpi_ato;
+       uint32_t        tcpi_snd_mss;           /* Max segment size for send. */
+       uint32_t        tcpi_rcv_mss;           /* Max segment size for receive. */
+
+       uint32_t        __tcpi_unacked;
+       uint32_t        __tcpi_sacked;
+       uint32_t        __tcpi_lost;
+       uint32_t        __tcpi_retrans;
+       uint32_t        __tcpi_fackets;
+
+       /* Times; measurements in usecs. */
+       uint32_t        __tcpi_last_data_sent;
+       uint32_t        __tcpi_last_ack_sent;   /* Also unimpl. on Linux? */
+       uint32_t        tcpi_last_data_recv;    /* Time since last recv data. */
+       uint32_t        __tcpi_last_ack_recv;
+
+       /* Metrics; variable units. */
+       uint32_t        __tcpi_pmtu;
+       uint32_t        __tcpi_rcv_ssthresh;
+       uint32_t        tcpi_rtt;               /* Smoothed RTT in usecs. */
+       uint32_t        tcpi_rttvar;            /* RTT variance in usecs. */
+       uint32_t        tcpi_snd_ssthresh;      /* Slow start threshold. */
+       uint32_t        tcpi_snd_cwnd;          /* Send congestion window. */
+       uint32_t        __tcpi_advmss;
+       uint32_t        __tcpi_reordering;
+
+       uint32_t        __tcpi_rcv_rtt;
+       uint32_t        tcpi_rcv_space;         /* Advertised recv window. */
+
+       /* FreeBSD/NetBSD extensions to tcp_info. */
+       uint32_t        tcpi_snd_wnd;           /* Advertised send window. */
+       uint32_t        tcpi_snd_bwnd;          /* No longer used. */
+       uint32_t        tcpi_snd_nxt;           /* Next egress seqno */
+       uint32_t        tcpi_rcv_nxt;           /* Next ingress seqno */
+       uint32_t        tcpi_toe_tid;           /* HWTID for TOE endpoints */
+       uint32_t        tcpi_snd_rexmitpack;    /* Retransmitted packets */
+       uint32_t        tcpi_rcv_ooopack;       /* Out-of-order packets */
+       uint32_t        tcpi_snd_zerowin;       /* Zero-sized windows sent */
+       
+       /* Padding to grow without breaking ABI. */
+       uint32_t        __tcpi_pad[26];         /* Padding. */
+};
+
 #endif /* !_NETINET_TCP_H_ */
diff -r d66971c76620 -r bda78a1ad8d7 sys/netinet/tcp_input.c
--- a/sys/netinet/tcp_input.c   Sat Feb 14 10:21:29 2015 +0000
+++ b/sys/netinet/tcp_input.c   Sat Feb 14 12:57:52 2015 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: tcp_input.c,v 1.335 2014/12/02 20:25:47 christos Exp $ */
+/*     $NetBSD: tcp_input.c,v 1.336 2015/02/14 12:57:53 he Exp $       */
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -148,7 +148,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.335 2014/12/02 20:25:47 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.336 2015/02/14 12:57:53 he Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -738,6 +738,7 @@
        /*
         * Update the counters.
         */
+       tp->t_rcvoopack++;
        tcps = TCP_STAT_GETREF();
        tcps[TCP_STAT_RCVOOPACK]++;
        tcps[TCP_STAT_RCVOOBYTE] += rcvoobyte;
diff -r d66971c76620 -r bda78a1ad8d7 sys/netinet/tcp_output.c
--- a/sys/netinet/tcp_output.c  Sat Feb 14 10:21:29 2015 +0000
+++ b/sys/netinet/tcp_output.c  Sat Feb 14 12:57:52 2015 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: tcp_output.c,v 1.179 2014/11/10 18:52:51 maxv Exp $    */
+/*     $NetBSD: tcp_output.c,v 1.180 2015/02/14 12:57:53 he Exp $      */
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -135,7 +135,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.179 2014/11/10 18:52:51 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.180 2015/02/14 12:57:53 he Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -439,6 +439,7 @@
        if (tp->t_force && len == 1)
                tcps[TCP_STAT_SNDPROBE]++;
        else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+               tp->t_sndrexmitpack++;
                tcps[TCP_STAT_SNDREXMITPACK]++;
                tcps[TCP_STAT_SNDREXMITBYTE] += len;
        } else {
@@ -1401,6 +1402,9 @@
        if (win < (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt))
                win = (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt);
        th->th_win = htons((u_int16_t) (win>>tp->rcv_scale));
+       if (th->th_win == 0) {
+               tp->t_sndzerowin++;
+       }
        if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
                u_int32_t urp = tp->snd_up - tp->snd_nxt;
                if (urp > IP_MAXPACKET)
diff -r d66971c76620 -r bda78a1ad8d7 sys/netinet/tcp_subr.c
--- a/sys/netinet/tcp_subr.c    Sat Feb 14 10:21:29 2015 +0000
+++ b/sys/netinet/tcp_subr.c    Sat Feb 14 12:57:52 2015 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: tcp_subr.c,v 1.257 2014/11/10 18:52:51 maxv Exp $      */
+/*     $NetBSD: tcp_subr.c,v 1.258 2015/02/14 12:57:53 he Exp $        */
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -91,7 +91,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.257 2014/11/10 18:52:51 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.258 2015/02/14 12:57:53 he Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -980,6 +980,9 @@
 
        .t_partialacks = -1,
        .t_bytes_acked = 0,
+       .t_sndrexmitpack = 0,
+       .t_rcvoopack = 0,
+       .t_sndzerowin = 0,
 };
 
 /*
diff -r d66971c76620 -r bda78a1ad8d7 sys/netinet/tcp_usrreq.c
--- a/sys/netinet/tcp_usrreq.c  Sat Feb 14 10:21:29 2015 +0000
+++ b/sys/netinet/tcp_usrreq.c  Sat Feb 14 12:57:52 2015 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: tcp_usrreq.c,v 1.202 2014/11/10 18:52:51 maxv Exp $    */
+/*     $NetBSD: tcp_usrreq.c,v 1.203 2015/02/14 12:57:53 he Exp $      */
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -99,7 +99,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.202 2014/11/10 18:52:51 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.203 2015/02/14 12:57:53 he Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -119,6 +119,7 @@
 #include <sys/domain.h>
 #include <sys/sysctl.h>
 #include <sys/kauth.h>
+#include <sys/kernel.h>
 #include <sys/uidinfo.h>
 
 #include <net/if.h>
@@ -271,6 +272,65 @@
                TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle);
 }
 
+/*
+ * Export TCP internal state information via a struct tcp_info, based on the
+ * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
+ * (TCP state machine, etc).  We export all information using FreeBSD-native
+ * constants -- for example, the numeric values for tcpi_state will differ
+ * from Linux.
+ */
+static void
+tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
+{
+
+       bzero(ti, sizeof(*ti));
+
+       ti->tcpi_state = tp->t_state;
+       if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
+               ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
+       if (tp->t_flags & TF_SACK_PERMIT)
+               ti->tcpi_options |= TCPI_OPT_SACK;
+       if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
+               ti->tcpi_options |= TCPI_OPT_WSCALE;
+               ti->tcpi_snd_wscale = tp->snd_scale;
+               ti->tcpi_rcv_wscale = tp->rcv_scale;
+       }
+       if (tp->t_flags & TF_ECN_PERMIT) {
+               ti->tcpi_options |= TCPI_OPT_ECN;
+       }
+
+       ti->tcpi_rto = tp->t_rxtcur * tick;
+       ti->tcpi_last_data_recv = (long)(hardclock_ticks -
+                                        (int)tp->t_rcvtime) * tick;
+       ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
+       ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
+
+       ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
+       /* Linux API wants these in # of segments, apparently */
+       ti->tcpi_snd_cwnd = tp->snd_cwnd / tp->t_segsz;
+       ti->tcpi_snd_wnd = tp->snd_wnd / tp->t_segsz;
+
+       /*
+        * FreeBSD-specific extension fields for tcp_info.
+        */
+       ti->tcpi_rcv_space = tp->rcv_wnd;
+       ti->tcpi_rcv_nxt = tp->rcv_nxt;
+       ti->tcpi_snd_bwnd = 0;          /* Unused, kept for compat. */
+       ti->tcpi_snd_nxt = tp->snd_nxt;
+       ti->tcpi_snd_mss = tp->t_segsz;
+       ti->tcpi_rcv_mss = tp->t_segsz;
+#ifdef TF_TOE
+       if (tp->t_flags & TF_TOE)
+               ti->tcpi_options |= TCPI_OPT_TOE;
+#endif
+       /* From the redundant department of redundancies... */
+       ti->__tcpi_retransmits = ti->__tcpi_retrans =
+               ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
+
+       ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
+       ti->tcpi_snd_zerowin = tp->t_sndzerowin;



Home | Main Index | Thread Index | Old Index