tech-net archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

syn/ack ecn patch



Currently NetBSD sets ECT(0) on SYN/ACKs if ECN is enabled.
However it doesn't echo CN seen in SYN/ACK nor does react to ECE
received due to congestion caused by a SYN/ACK.

The attached patch brings the ECN code more in line with RFC5562.
If the stack receives a CN in a SYN/ACK it will reply with ECE.
If the stack receives an ECE in the ACK of a SYN/ACK the packet will
be resent without ECT(0) set and the send window will be set to one
segment.
Retransmitted SYN/ACKs won't have ECT(0) set.

br
Karl Knutsson

Index: tcp_input.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_input.c,v
retrieving revision 1.316
diff -u -r1.316 tcp_input.c
--- tcp_input.c 31 Aug 2011 18:31:03 -0000      1.316
+++ tcp_input.c 30 Oct 2011 15:07:04 -0000
@@ -1233,6 +1233,7 @@
        uint8_t iptos;
        uint64_t *tcps;
        vestigial_inpcb_t vestige;
+       bool synack_ece = false;
 
        vestige.valid = 0;
 
@@ -2213,6 +2214,19 @@
                        if ((tiflags & TH_ECE) && tcp_do_ecn) {
                                tp->t_flags |= TF_ECN_PERMIT;
                                TCP_STATINC(TCP_STAT_ECN_SHS);
+                               switch (iptos & IPTOS_ECN_MASK) {
+                               case IPTOS_ECN_CE:
+                                       tp->t_flags |= TF_ECN_SND_ECE;
+                                       synack_ece = true;
+                                       TCP_STATINC(TCP_STAT_ECN_CE);
+                                       break;
+                               case IPTOS_ECN_ECT0:
+                                       TCP_STATINC(TCP_STAT_ECN_ECT);
+                                       break;
+                               case IPTOS_ECN_ECT1:
+                                       /* XXX */
+                                       break;
+                               }
                        }
 
                }
@@ -3000,6 +3014,12 @@
        if (needoutput || (tp->t_flags & TF_ACKNOW)) {
                KERNEL_LOCK(1, NULL);
                (void) tcp_output(tp);
+               /* 
+                * Clear send ece flag since the peer won't respond to an ece 
for a
+                * syn/ack with a cwr
+                */
+               if (synack_ece)
+                       tp->t_flags &= ~TF_ECN_SND_ECE;
                KERNEL_UNLOCK_ONE(NULL);
        }
        if (tcp_saveti)
@@ -3834,10 +3854,10 @@
                goto dropit;
 
        TCP_STATINC(TCP_STAT_SC_RETRANSMITTED);
-       (void) syn_cache_respond(sc, NULL);
-
        /* Advance the timer back-off. */
        sc->sc_rxtshift++;
+       (void) syn_cache_respond(sc, NULL);
+
        SYN_CACHE_TIMER_ARM(sc);
 
        KERNEL_UNLOCK_ONE(NULL);
@@ -3967,7 +3987,10 @@
         */
        if ((th->th_ack != sc->sc_iss + 1) ||
            SEQ_LEQ(th->th_seq, sc->sc_irs) ||
-           SEQ_GT(th->th_seq, sc->sc_irs + 1 + sc->sc_win)) {
+           SEQ_GT(th->th_seq, sc->sc_irs + 1 + sc->sc_win) ||
+               (th->th_flags & TH_ECE) == TH_ECE) {
+               if ((th->th_flags & TH_ECE) == TH_ECE)
+                       sc->sc_flags |= SCF_ECE;
                (void) syn_cache_respond(sc, m);
                splx(s);
                return ((struct socket *)(-1));
@@ -4195,7 +4218,7 @@
         * had to retransmit the SYN,ACK, we must initialize cwnd
         * to 1 segment (i.e. the Loss Window).
         */
-       if (sc->sc_rxtshift)
+       if (sc->sc_rxtshift || (sc->sc_flags & SCF_ECE) != 0)
                tp->snd_cwnd = tp->t_peermss;
        else {
                int ss = tcp_init_win;
@@ -4724,17 +4747,20 @@
                 * Page 4 and 6, January 2006.
                 */
 
-               switch (sc->sc_src.sa.sa_family) {
+
+               if ((sc->sc_flags & SCF_ECE) == 0 && !sc->sc_rxtshift) {
+                       switch (sc->sc_src.sa.sa_family) {
 #ifdef INET
-               case AF_INET:
-                       ip->ip_tos |= IPTOS_ECN_ECT0;
-                       break;
+                       case AF_INET:
+                               ip->ip_tos |= IPTOS_ECN_ECT0;
+                               break;
 #endif
 #ifdef INET6
-               case AF_INET6:
-                       ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
-                       break;
+                       case AF_INET6:
+                               ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+                               break;
 #endif
+                       }
                }
                TCP_STATINC(TCP_STAT_ECN_ECT);
        }
Index: tcp_var.h
===================================================================
RCS file: /cvsroot/netbsd/src/sys/netinet/tcp_var.h,v
retrieving revision 1.167
diff -u -r1.167 tcp_var.h
--- tcp_var.h   25 May 2011 23:17:44 -0000      1.167
+++ tcp_var.h   30 Oct 2011 09:51:23 -0000
@@ -518,6 +518,7 @@
 #define SCF_SACK_PERMIT                0x0008          /* peer will do SACK */
 #define SCF_ECN_PERMIT         0x0010          /* peer will do ECN */
 #define SCF_SIGNATURE  0x40                    /* send MD5 digests */
+#define SCF_ECE                        0x0080          /* ECE received */
 
        struct mbuf *sc_ipopts;                 /* IP options */
        u_int16_t sc_peermaxseg;


Home | Main Index | Thread Index | Old Index