tech-net archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: Trimming TCP options
On 01/04/11 11:48, Mihai Chelaru wrote:
> I'll keep an eye on the last counter in the following days.
I need to reboot this one so I attached the latest stats and the
sys/netinet diff. I haven't seen anything strange during usage until now.
--
Mihai
tcp:
46317610 packets sent
36988340 data packets (27717538314 bytes)
195387 data packets (159339998 bytes) retransmitted
6355078 ack-only packets (7820889 delayed)
0 URG only packets
1396 window probe packets
2420913 window update packets
356502 control packets
0 send attempts resulted in self-quench
40556719 packets received
27918469 acks (for 27628909944 bytes)
0 duplicate acks
229 acks for unsent data
11497073 packets (10555150094 bytes) received in-sequence
677061 completely duplicate packets (36428938 bytes)
473 old duplicate packets
33390 packets with some dup. data (22800833 bytes duped)
673787 out-of-order packets (194682332 bytes)
3 packets (1 byte) of data after window
1 window probe
27609 window update packets
174177 packets received after close
48 discarded for bad checksums
0 discarded for bad header offset fields
0 discarded because packet too short
63831 connection requests
208515 connection accepts
250098 connections established (including accepts)
274252 connections closed (including 3614 drops)
6689 embryonic connections dropped
15832 SYN options degraded
161 connected with no options
0 delayed frees of tcpcb
22994926 segments updated rtt (of 21916325 attempts)
174389 retransmit timeouts
1215 connections dropped by rexmit timeout
1773 persist timeouts (resulting in 2 dropped connections)
1337 keepalive timeouts
77 keepalive probes sent
1228 connections dropped by keepalive
1842450 correct ACK header predictions
9056728 correct data packet header predictions
779054 PCB hash misses
17220 dropped due to no socket
167 connections drained due to memory shortage
3847 PMTUD blackholes detected
319271 bad connection attempts
210313 SYN cache entries added
0 hash collisions
208515 completed
0 aborted (no space to build PCB)
1436 timed out
0 dropped due to overflow
0 dropped due to bucket overflow
360 dropped due to RST
0 dropped due to ICMP unreachable
208938 delayed free of SYN cache entries
9849 SYN,ACKs retransmitted
3891 duplicate SYNs received for entries already in the cache
61 SYNs dropped (no route or no space)
0 packets with bad signature
137693 packets with good signature
0 sucessful ECN handshakes
0 packets with ECN CE bit
0 packets ECN ECT(0) bit
Index: tcp_input.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_input.c,v
retrieving revision 1.306
diff -u -p -r1.306 tcp_input.c
--- tcp_input.c 2 Dec 2010 19:07:27 -0000 1.306
+++ tcp_input.c 6 Jan 2011 17:22:27 -0000
@@ -1987,6 +1987,8 @@ after_listen:
tcp_rmx_rtt(tp);
if (tiflags & TH_ACK) {
TCP_STATINC(TCP_STAT_CONNECTS);
+ if (tp->t_rxtshift > TCP_SYN_RET_NOOPT)
+ TCP_STATINC(TCP_STAT_NOOPT_CON);
soisconnected(so);
tcp_established(tp);
/* Do window scaling on this connection? */
@@ -2312,6 +2314,8 @@ after_listen:
SEQ_GT(th->th_ack, tp->snd_max))
goto dropwithreset;
TCP_STATINC(TCP_STAT_CONNECTS);
+ if (tp->t_rxtshift > TCP_SYN_RET_NOOPT)
+ TCP_STATINC(TCP_STAT_NOOPT_CON);
soisconnected(so);
tcp_established(tp);
/* Do window scaling? */
@@ -4347,30 +4351,24 @@ syn_cache_respond(struct syn_cache *sc,
*optp++ = sc->sc_ourmaxseg & 0xff;
if (sc->sc_request_r_scale != 15) {
- *((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 |
- TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 |
- sc->sc_request_r_scale);
- optp += 4;
+ *((u_int32_t *)optp) = htonl(TCPOPT_WINDOW << 24 |
+ TCPOLEN_WINDOW << 16 | sc->sc_request_r_scale << 8);
+ optp += 3;
}
if (sc->sc_flags & SCF_TIMESTAMP) {
+ *optp++ = TCPOPT_TIMESTAMP;
+ *optp++ = TCPOLEN_TIMESTAMP;
u_int32_t *lp = (u_int32_t *)(optp);
- /* Form timestamp option as shown in appendix A of RFC 1323. */
- *lp++ = htonl(TCPOPT_TSTAMP_HDR);
*lp++ = htonl(SYN_CACHE_TIMESTAMP(sc));
*lp = htonl(sc->sc_timestamp);
- optp += TCPOLEN_TSTAMP_APPA;
+ optp += TCPOLEN_TIMESTAMP - 2;
}
if (sc->sc_flags & SCF_SACK_PERMIT) {
- u_int8_t *p = optp;
-
/* Let the peer know that we will SACK. */
- p[0] = TCPOPT_SACK_PERMITTED;
- p[1] = 2;
- p[2] = TCPOPT_NOP;
- p[3] = TCPOPT_NOP;
- optp += 4;
+ *optp++ = TCPOPT_SACK_PERMITTED;
+ *optp++ = 2;
}
/*
@@ -4440,8 +4438,6 @@ syn_cache_respond(struct syn_cache *sc,
sigp = optp;
memset(optp, 0, TCP_SIGLEN);
optp += TCP_SIGLEN;
- *optp++ = TCPOPT_NOP;
- *optp++ = TCPOPT_EOL;
(void)tcp_signature(m, th, hlen, sav, sigp);
@@ -4453,7 +4449,12 @@ syn_cache_respond(struct syn_cache *sc,
#endif
}
#endif
-
+ /* Align options to 32-bit boundary */
+ if ((optp - (u_int8_t *)(th + 1)) % 4) {
+ *optp++ = TCPOPT_EOL;
+ while((optp - (u_int8_t *)(th + 1)) % 4)
+ *optp++ = 0;
+ }
/* Compute the packet's checksum. */
switch (sc->sc_src.sa.sa_family) {
case AF_INET:
Index: tcp_output.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_output.c,v
retrieving revision 1.169
diff -u -p -r1.169 tcp_output.c
--- tcp_output.c 26 Jan 2010 18:09:08 -0000 1.169
+++ tcp_output.c 6 Jan 2011 17:22:27 -0000
@@ -1134,28 +1134,31 @@ send:
opt[3] = tp->t_ourmss & 0xff;
optlen = 4;
+ if (tp->t_rxtshift > TCP_SYN_RET_NOOPT) {
+ tp->t_flags |= TF_NOOPT;
+ TCP_STATINC(TCP_STAT_SYN_NOOPT);
+ goto outopts;
+ }
if ((tp->t_flags & TF_REQ_SCALE) &&
((flags & TH_ACK) == 0 ||
(tp->t_flags & TF_RCVD_SCALE))) {
*((u_int32_t *) (opt + optlen)) = htonl(
- TCPOPT_NOP << 24 |
- TCPOPT_WINDOW << 16 |
- TCPOLEN_WINDOW << 8 |
- tp->request_r_scale);
- optlen += 4;
+ TCPOPT_WINDOW << 24 |
+ TCPOLEN_WINDOW << 16 |
+ tp->request_r_scale << 8);
+ optlen += 3;
}
if (tcp_do_sack) {
u_int8_t *cp = (u_int8_t *)(opt + optlen);
cp[0] = TCPOPT_SACK_PERMITTED;
cp[1] = 2;
- cp[2] = TCPOPT_NOP;
- cp[3] = TCPOPT_NOP;
- optlen += 4;
+ optlen += 2;
}
}
}
+outopts:
/*
* Send a timestamp and echo-reply if this is a SYN and our side
* wants to use timestamps (TF_REQ_TSTMP is set) or both our side
@@ -1165,13 +1168,24 @@ send:
(flags & TH_RST) == 0 &&
((flags & (TH_SYN|TH_ACK)) == TH_SYN ||
(tp->t_flags & TF_RCVD_TSTMP))) {
- u_int32_t *lp = (u_int32_t *)(opt + optlen);
-
- /* Form timestamp option as shown in appendix A of RFC 1323. */
- *lp++ = htonl(TCPOPT_TSTAMP_HDR);
+ u_int32_t *lp;
+ if ((flags & (TH_SYN|TH_ACK)) == TH_SYN) {
+ u_char *bp = (u_char *)(opt + optlen);
+ bp[0] = TCPOPT_TIMESTAMP;
+ bp[1] = TCPOLEN_TIMESTAMP;
+ lp = (u_int32_t *)(opt + optlen + 2);
+ optlen += TCPOLEN_TIMESTAMP;
+ } else {
+ /*
+ * Form timestamp option as shown in
+ * appendix A of RFC 1323.
+ */
+ lp = (u_int32_t *)(opt + optlen);
+ *lp++ = htonl(TCPOPT_TSTAMP_HDR);
+ optlen += TCPOLEN_TSTAMP_APPA;
+ }
*lp++ = htonl(TCP_TIMESTAMP(tp));
*lp = htonl(tp->ts_recent);
- optlen += TCPOLEN_TSTAMP_APPA;
/* Set receive buffer autosizing timestamp. */
if (tp->rfbuf_ts == 0 && (so->so_rcv.sb_flags & SB_AUTOSIZE))
@@ -1184,14 +1198,12 @@ send:
if (sack_numblks) {
int sack_len;
u_char *bp = (u_char *)(opt + optlen);
- u_int32_t *lp = (u_int32_t *)(bp + 4);
+ u_int32_t *lp = (u_int32_t *)(bp + 2);
struct ipqent *tiqe;
sack_len = sack_numblks * 8 + 2;
- bp[0] = TCPOPT_NOP;
- bp[1] = TCPOPT_NOP;
- bp[2] = TCPOPT_SACK;
- bp[3] = sack_len;
+ bp[0] = TCPOPT_SACK;
+ bp[1] = sack_len;
if ((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) {
sack_numblks--;
*lp++ = htonl(tp->rcv_dsack_block.left);
@@ -1206,32 +1218,39 @@ send:
*lp++ = htonl(tiqe->ipqe_seq + tiqe->ipqe_len +
((tiqe->ipqe_flags & TH_FIN) != 0 ? 1 : 0));
}
- optlen += sack_len + 2;
+ optlen += sack_len;
}
TCP_REASS_UNLOCK(tp);
#ifdef TCP_SIGNATURE
if (tp->t_flags & TF_SIGNATURE) {
u_char *bp;
+ if (optlen + TCPOLEN_SIGNATURE > MAX_TCPOPTLEN)
+ return EINVAL;
/*
* Initialize TCP-MD5 option (RFC2385)
*/
- bp = (u_char *)opt + optlen;
+ bp = (u_char *)(opt + optlen);
*bp++ = TCPOPT_SIGNATURE;
*bp++ = TCPOLEN_SIGNATURE;
sigoff = optlen + 2;
memset(bp, 0, TCP_SIGLEN);
- bp += TCP_SIGLEN;
optlen += TCPOLEN_SIGNATURE;
- /*
- * Terminate options list and maintain 32-bit alignment.
- */
- *bp++ = TCPOPT_NOP;
- *bp++ = TCPOPT_EOL;
- optlen += 2;
}
#endif /* TCP_SIGNATURE */
+ /*
+ * Terminate options list and maintain 32-bit alignment.
+ */
+ if (optlen % 4) {
+ u_char *bp;
+ bp = (u_char*)opt + optlen;
+ *bp++ = TCPOPT_EOL;
+ optlen++;
+ for (; optlen % 4; optlen++, bp++)
+ *bp = 0;
+ }
+
hdrlen += optlen;
#ifdef DIAGNOSTIC
Index: tcp_subr.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.238
diff -u -p -r1.238 tcp_subr.c
--- tcp_subr.c 16 Sep 2009 15:23:05 -0000 1.238
+++ tcp_subr.c 6 Jan 2011 17:22:28 -0000
@@ -731,6 +731,13 @@ tcp_respond(struct tcpcb *tp, struct mbu
/* clear h/w csum flags inherited from rx packet */
m->m_pkthdr.csum_flags = 0;
+#ifdef TCP_SIGNATURE
+ if (tp != NULL && (tp->t_flags & TF_SIGNATURE) != 0 &&
+ ((flags & TH_SYN) == 0 ||
+ sizeof(*th0) + TCPOLEN_SIGNATURE > (th0->th_off << 2)))
+ tlen = sizeof(*th0) + TCPOLEN_SIGNATURE;
+ else
+#endif
if ((flags & TH_SYN) == 0 || sizeof(*th0) > (th0->th_off << 2))
tlen = sizeof(*th0);
else
@@ -773,6 +780,16 @@ tcp_respond(struct tcpcb *tp, struct mbu
m = n;
n = NULL;
}
+#ifdef TCP_SIGNATURE
+ if (tp != NULL && (tp->t_flags & TF_SIGNATURE) != 0) {
+ u_char *sigplace;
+ sigplace = mtod(m, u_char *);
+ sigplace += hlen + tlen - TCPOLEN_SIGLEN;
+ memset(sigplace, 0, TCPOLEN_SIGLEN);
+ sigplace[0] = TCPOPT_SIGNATURE;
+ sigplace[1] = TCPOLEN_SIGNATURE;
+ }
+#endif
#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
switch (family) {
@@ -814,6 +831,10 @@ tcp_respond(struct tcpcb *tp, struct mbu
th->th_win = htons((u_int16_t)win);
th->th_off = sizeof (struct tcphdr) >> 2;
tlen += sizeof(*th);
+#ifdef TCP_SIGNATURE
+ if (tp != NULL && (tp->t_flags & TF_SIGNATURE) != 0)
+ tlen += TCPOLEN_SIGNATURE;
+#endif
} else
tlen += th->th_off << 2;
m->m_len = hlen + tlen;
Index: tcp_var.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_var.h,v
retrieving revision 1.162
diff -u -p -r1.162 tcp_var.h
--- tcp_var.h 16 Sep 2009 15:23:05 -0000 1.162
+++ tcp_var.h 6 Jan 2011 17:22:28 -0000
@@ -643,8 +643,17 @@ struct syn_cache_head {
#define TCP_STAT_ECN_SHS 73 /* # of successful ECN
handshakes */
#define TCP_STAT_ECN_CE 74 /* # of packets with CE bit */
#define TCP_STAT_ECN_ECT 75 /* # of packets with ECT(0) bit
*/
+#define TCP_STAT_SYN_NOOPT 76 /* SYN options downgrades */
+#define TCP_STAT_NOOPT_CON 77 /* Conns established after
RET_NOOPT */
-#define TCP_NSTATS 76
+#define TCP_NSTATS 78
+
+/*
+ * Two SYNs sent with full options
+ * The next one only with MSS
+ * The following sent with no options at all
+ */
+#define TCP_SYN_RET_NOOPT 1
/*
* Names for TCP sysctl objects.
Home |
Main Index |
Thread Index |
Old Index