Subject: Re: Appropriate byte counting, revisited.
To: None <kurahone@sigusr1.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-net
Date: 10/12/2006 07:21:14
--NextPart-20061012071023-0604200
Content-Type: Text/Plain; charset=us-ascii

> > > With that said, it may be
> > > beneficial to provide the option for using L=2*SMSS and have a sysctl
> > > for that.
> 
> New and improved version of the patch available at the same location[0].
> It fixes most of the nits in the earlier version, and adds a sysctl that
> will set L=2*SMSS (on by default).
> 
> One other difference to the previous incarnation of the patch is that
> the ack prediction code will only grow snd_cwnd if it's < snd_wnd.  I'm
> not sure if this is entirely correct, but it does preserve the existing
> behavior.
> 
> Unless anyone has any objections I'm going to commit this.

what's the status of this?
i've adapted it to -current.  (attached)

YAMAMOTO Takashi

--NextPart-20061012071023-0604200
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="a.diff"

Index: tcp_input.c
===================================================================
--- tcp_input.c	(revision 1823)
+++ tcp_input.c	(working copy)
@@ -1685,7 +1685,7 @@ after_listen:
 			/* Ack prediction. */
 			if (SEQ_GT(th->th_ack, tp->snd_una) &&
 			    SEQ_LEQ(th->th_ack, tp->snd_max) &&
-			    tp->snd_cwnd >= tp->snd_wnd &&
+			    tp->snd_cwnd >= tp->snd_ssthresh &&
 			    tp->t_partialacks < 0) {
 				/*
 				 * this is a pure ack for outstanding data.
@@ -1702,6 +1702,19 @@ after_listen:
 				tcpstat.tcps_rcvackbyte += acked;
 				ND6_HINT(tp);
 
+				/*
+				 * Grow the congestion window, if the
+				 * connection is cwnd bound.
+				 */
+			    	if (tp->snd_cwnd < tp->snd_wnd) {
+					tp->t_bytes_acked += acked;
+					if (tp->t_bytes_acked > tp->snd_cwnd) {
+						tp->t_bytes_acked -=
+						    tp->snd_cwnd;
+						tp->snd_cwnd += tp->t_segsz;
+					}
+				}
+
 				if (acked > (tp->t_lastoff - tp->t_inoff))
 					tp->t_lastm = NULL;
 				sbdrop(&so->so_snd, acked);
Index: tcp_subr.c
===================================================================
--- tcp_subr.c	(revision 1817)
+++ tcp_subr.c	(working copy)
@@ -202,6 +202,7 @@ int	tcp_compat_42 = 0;
 int	tcp_rst_ppslim = 100;	/* 100pps */
 int	tcp_ackdrop_ppslim = 100;	/* 100pps */
 int	tcp_do_loopback_cksum = 0;
+int	tcp_do_rfc3465 = 1;	/* RFC3465 Appropriate byte counting. */
 int	tcp_sack_tp_maxholes = 32;
 int	tcp_sack_globalmaxholes = 1024;
 int	tcp_sack_globalholes = 0;
@@ -935,6 +936,7 @@ static struct tcpcb tcpcb_template = {
 	.snd_numholes = 0,
 
 	.t_partialacks = -1,
+	.t_bytes_acked = 0,
 };
 
 /*
@@ -1647,8 +1649,10 @@ tcp_quench(struct inpcb *inp, int errno)
 {
 	struct tcpcb *tp = intotcpcb(inp);
 
-	if (tp)
+	if (tp) {
 		tp->snd_cwnd = tp->t_segsz;
+		tp->t_bytes_acked = 0;
+	}
 }
 #endif
 
@@ -1658,8 +1662,10 @@ tcp6_quench(struct in6pcb *in6p, int err
 {
 	struct tcpcb *tp = in6totcpcb(in6p);
 
-	if (tp)
+	if (tp) {
 		tp->snd_cwnd = tp->t_segsz;
+		tp->t_bytes_acked = 0;
+	}
 }
 #endif
 
Index: tcp_var.h
===================================================================
--- tcp_var.h	(revision 1817)
+++ tcp_var.h	(working copy)
@@ -290,6 +290,9 @@ struct tcpcb {
 	u_int32_t ts_timebase;		/* our timebase */
 	tcp_seq	last_ack_sent;
 
+/* RFC 3465 variables */
+	u_long	t_bytes_acked;		/* ABC "bytes_acked" parameter */
+
 /* SACK stuff */
 #define TCP_SACK_MAX 3
 #define TCPSACK_NONE 0
@@ -754,6 +757,7 @@ extern	int tcp_ecn_maxretries;	/* Max EC
 extern int tcp_sack_tp_maxholes;	/* Max holes per connection. */
 extern int tcp_sack_globalmaxholes;	/* Max holes per system. */
 extern int tcp_sack_globalholes;	/* Number of holes present. */
+extern int tcp_do_rfc3465;		/* RFC3465 ABC enabled/disabled? */
 
 extern	int tcp_rst_ppslim;
 extern	int tcp_ackdrop_ppslim;
Index: tcp_usrreq.c
===================================================================
--- tcp_usrreq.c	(revision 1817)
+++ tcp_usrreq.c	(working copy)
@@ -1735,7 +1735,12 @@ sysctl_net_inet_tcp_setup2(struct sysctl
 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBX,
 		       CTL_EOL);
 #endif
-
+	sysctl_createv(clog, 0, NULL, NULL,
+		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
+		       CTLTYPE_INT, "rfc3465",
+		       SYSCTL_DESCR("Enable RFC3465 Appropriate Byte Counting"),
+		       NULL, 0, &tcp_do_rfc3465, 0,
+		       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
 }
 
 /*
Index: tcp_congctl.c
===================================================================
--- tcp_congctl.c	(revision 1823)
+++ tcp_congctl.c	(working copy)
@@ -465,6 +465,7 @@ tcp_reno_slow_retransmit(struct tcpcb *t
 	tp->snd_ssthresh = win * tp->t_segsz;
 	tp->t_partialacks = -1;
 	tp->t_dupacks = 0;
+	tp->t_bytes_acked = 0;
 }
 
 static void
@@ -485,6 +486,7 @@ tcp_reno_fast_retransmit_newack(struct t
 			tp->snd_cwnd = tp->snd_ssthresh;
 		tp->t_partialacks = -1;
 		tp->t_dupacks = 0;
+		tp->t_bytes_acked = 0;
 	}
 }
 
@@ -501,9 +503,30 @@ tcp_reno_newack(struct tcpcb *tp, struct
 
 	u_int cw = tp->snd_cwnd;
 	u_int incr = tp->t_segsz;
+	int acked = th->th_ack - tp->snd_una;
 
-	if (cw >= tp->snd_ssthresh)
-		incr = incr * incr / cw;
+	if (cw >= tp->snd_ssthresh) {
+		tp->t_bytes_acked += acked;
+		if (tp->t_bytes_acked >= cw) {
+			/* Time to increase the window. */
+			tp->t_bytes_acked -= cw;
+		} else {
+			/* No need to increase yet. */
+			incr = 0;
+		}
+	} else {
+		/*
+		 * If the user explicitly enables RFC3465
+		 * use 2*SMSS for the "L" param.  Otherwise
+		 * use the more conservative 1*SMSS.
+		 *
+		 * (See RFC 3465 2.3 Choosing the Limit)
+		 */
+		u_int abc_lim;
+
+		abc_lim = (tcp_do_rfc3465 == 0) ? incr : incr * 2;
+		incr = min(acked, abc_lim);
+	}
 
 	tp->snd_cwnd = min(cw + incr, TCP_MAXWIN << tp->snd_scale);
 }
@@ -601,6 +624,7 @@ tcp_newreno_fast_retransmit_newack(struc
 			tp->snd_cwnd = tp->snd_ssthresh;
 		tp->t_partialacks = -1;
 		tp->t_dupacks = 0;
+		tp->t_bytes_acked = 0;
 	}
 }
 

--NextPart-20061012071023-0604200--