tech-net archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Trimming TCP options



Hi,

I'd like to drop paddings for wscale, sack, tcpsig and timestamps in syn
options.

Currently our TCP code is padding every option to 32bit, although this
is not required by RFC793 or other standards except a special case
for timestamps as far as I know. For example on NetBSD 5.1(RC2) at a
simple telnet we end up sending SYNs with 4 consecutive nop options and
for 4 options we also send 5 nops:

<mss 1300,nop,wscale 3,sackOK,nop,nop,nop,nop,timestamp 1 0>

After this change, options will look like:
<mss 1300,wscale 3,sackOK,timestamp 1 0,eol>

Also, as an immediate result TCP_SIGNATURE started to work instead on
panicing kernel after overflowing the 40 bytes opt buffer.

Patch is attached, opinions ?

-- 
Mihai

Index: tcp_output.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_output.c,v
retrieving revision 1.169
diff -u -p -r1.169 tcp_output.c
--- tcp_output.c        26 Jan 2010 18:09:08 -0000      1.169
+++ tcp_output.c        29 Dec 2010 21:44:08 -0000
@@ -1138,20 +1138,17 @@ send:
                            ((flags & TH_ACK) == 0 ||
                            (tp->t_flags & TF_RCVD_SCALE))) {
                                *((u_int32_t *) (opt + optlen)) = htonl(
-                                       TCPOPT_NOP << 24 |
-                                       TCPOPT_WINDOW << 16 |
-                                       TCPOLEN_WINDOW << 8 |
-                                       tp->request_r_scale);
-                               optlen += 4;
+                                       TCPOPT_WINDOW << 24 |
+                                       TCPOLEN_WINDOW << 16 |
+                                       tp->request_r_scale << 8);
+                               optlen += 3;
                        }
                        if (tcp_do_sack) {
                                u_int8_t *cp = (u_int8_t *)(opt + optlen);
 
                                cp[0] = TCPOPT_SACK_PERMITTED;
                                cp[1] = 2;
-                               cp[2] = TCPOPT_NOP;
-                               cp[3] = TCPOPT_NOP;
-                               optlen += 4;
+                               optlen += 2;
                        }
                }
        }
@@ -1165,13 +1162,24 @@ send:
             (flags & TH_RST) == 0 &&
            ((flags & (TH_SYN|TH_ACK)) == TH_SYN ||
             (tp->t_flags & TF_RCVD_TSTMP))) {
-               u_int32_t *lp = (u_int32_t *)(opt + optlen);
-
-               /* Form timestamp option as shown in appendix A of RFC 1323. */
-               *lp++ = htonl(TCPOPT_TSTAMP_HDR);
+               u_int32_t *lp;
+               if ((flags & (TH_SYN|TH_ACK)) == TH_SYN) {
+                       u_char *bp = (u_char *)(opt + optlen);
+                       bp[0] = TCPOPT_TIMESTAMP;
+                       bp[1] = TCPOLEN_TIMESTAMP;
+                       lp = (u_int32_t *)(opt + optlen + 2);
+                       optlen += TCPOLEN_TIMESTAMP;
+               } else {
+                       /*
+                        * Form timestamp option as shown in
+                        * appendix A of RFC 1323.
+                        */
+                       lp = (u_int32_t *)(opt + optlen);
+                       *lp++ = htonl(TCPOPT_TSTAMP_HDR);
+                       optlen += TCPOLEN_TSTAMP_APPA;
+               }
                *lp++ = htonl(TCP_TIMESTAMP(tp));
                *lp   = htonl(tp->ts_recent);
-               optlen += TCPOLEN_TSTAMP_APPA;
 
                /* Set receive buffer autosizing timestamp. */
                if (tp->rfbuf_ts == 0 && (so->so_rcv.sb_flags & SB_AUTOSIZE))
@@ -1184,14 +1192,12 @@ send:
        if (sack_numblks) {
                int sack_len;
                u_char *bp = (u_char *)(opt + optlen);
-               u_int32_t *lp = (u_int32_t *)(bp + 4);
+               u_int32_t *lp = (u_int32_t *)(bp + 2);
                struct ipqent *tiqe;
 
                sack_len = sack_numblks * 8 + 2;
-               bp[0] = TCPOPT_NOP;
-               bp[1] = TCPOPT_NOP;
-               bp[2] = TCPOPT_SACK;
-               bp[3] = sack_len;
+               bp[0] = TCPOPT_SACK;
+               bp[1] = sack_len;
                if ((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) {
                        sack_numblks--;
                        *lp++ = htonl(tp->rcv_dsack_block.left);
@@ -1206,32 +1212,39 @@ send:
                        *lp++ = htonl(tiqe->ipqe_seq + tiqe->ipqe_len +
                            ((tiqe->ipqe_flags & TH_FIN) != 0 ? 1 : 0));
                }
-               optlen += sack_len + 2;
+               optlen += sack_len;
        }
        TCP_REASS_UNLOCK(tp);
 
 #ifdef TCP_SIGNATURE
        if (tp->t_flags & TF_SIGNATURE) {
                u_char *bp;
+               if (optlen + TCPOLEN_SIGNATURE > MAX_TCPOPTLEN)
+                       return EINVAL;
                /*
                 * Initialize TCP-MD5 option (RFC2385)
                 */
-               bp = (u_char *)opt + optlen;
+               bp = (u_char *)(opt + optlen);
                *bp++ = TCPOPT_SIGNATURE;
                *bp++ = TCPOLEN_SIGNATURE;
                sigoff = optlen + 2;
                memset(bp, 0, TCP_SIGLEN);
-               bp += TCP_SIGLEN;
                optlen += TCPOLEN_SIGNATURE;
-               /*
-                * Terminate options list and maintain 32-bit alignment.
-                */
-               *bp++ = TCPOPT_NOP;
-               *bp++ = TCPOPT_EOL;
-               optlen += 2;
        }
 #endif /* TCP_SIGNATURE */
 
+       /*
+        * Terminate options list and maintain 32-bit alignment.
+        */
+       if (optlen % 4) {
+               u_char *bp;
+               bp = (u_char*)opt + optlen;
+               *bp++ = TCPOPT_EOL;
+               optlen++;
+               for (; optlen % 4; optlen++, bp++)
+                       *bp = 0;
+       }
+
        hdrlen += optlen;
 
 #ifdef DIAGNOSTIC



Home | Main Index | Thread Index | Old Index