Subject: Re: Appropriate byte counting, revisited.
To: None <tech-net@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-net
Date: 10/17/2006 07:08:22
--NextPart-20061017070648-2015000
Content-Type: Text/Plain; charset=us-ascii

> > Please, just use tcp.abc and be done with it!  Not every objection should
> > be taken seriously.
> 
> ok, maybe i'll do so...
> if anyone has any serious problem with "tcp.abc" beyond tastes,
> please speak up (again).

here's a patch.
i omitted ack prediction part of the original patch because
it's a separate change.

YAMAMOTO Takashi

--NextPart-20061017070648-2015000
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="a.diff"

Index: tcp_subr.c
===================================================================
--- tcp_subr.c	(revision 1830)
+++ tcp_subr.c	(revision 1838)
@@ -202,6 +202,8 @@ int	tcp_compat_42 = 0;
 int	tcp_rst_ppslim = 100;	/* 100pps */
 int	tcp_ackdrop_ppslim = 100;	/* 100pps */
 int	tcp_do_loopback_cksum = 0;
+int	tcp_do_abc = 1;		/* RFC3465 Appropriate byte counting. */
+int	tcp_abc_aggressive = 1;	/* 1: L=2*SMSS  0: L=1*SMSS */
 int	tcp_sack_tp_maxholes = 32;
 int	tcp_sack_globalmaxholes = 1024;
 int	tcp_sack_globalholes = 0;
@@ -935,6 +937,7 @@ static struct tcpcb tcpcb_template = {
 	.snd_numholes = 0,
 
 	.t_partialacks = -1,
+	.t_bytes_acked = 0,
 };
 
 /*
@@ -1647,8 +1650,10 @@ tcp_quench(struct inpcb *inp, int errno 
 {
 	struct tcpcb *tp = intotcpcb(inp);
 
-	if (tp)
+	if (tp) {
 		tp->snd_cwnd = tp->t_segsz;
+		tp->t_bytes_acked = 0;
+	}
 }
 #endif
 
@@ -1658,8 +1663,10 @@ tcp6_quench(struct in6pcb *in6p, int err
 {
 	struct tcpcb *tp = in6totcpcb(in6p);
 
-	if (tp)
+	if (tp) {
 		tp->snd_cwnd = tp->t_segsz;
+		tp->t_bytes_acked = 0;
+	}
 }
 #endif
 
Index: tcp_var.h
===================================================================
--- tcp_var.h	(revision 1830)
+++ tcp_var.h	(revision 1838)
@@ -290,6 +290,9 @@ struct tcpcb {
 	u_int32_t ts_timebase;		/* our timebase */
 	tcp_seq	last_ack_sent;
 
+/* RFC 3465 variables */
+	u_long	t_bytes_acked;		/* ABC "bytes_acked" parameter */
+
 /* SACK stuff */
 #define TCP_SACK_MAX 3
 #define TCPSACK_NONE 0
@@ -754,6 +757,8 @@ extern	int tcp_ecn_maxretries;	/* Max EC
 extern int tcp_sack_tp_maxholes;	/* Max holes per connection. */
 extern int tcp_sack_globalmaxholes;	/* Max holes per system. */
 extern int tcp_sack_globalholes;	/* Number of holes present. */
+extern int tcp_do_abc;			/* RFC3465 ABC enabled/disabled? */
+extern int tcp_abc_aggressive;		/* 1: L=2*SMSS  0: L=1*SMSS */
 
 extern	int tcp_rst_ppslim;
 extern	int tcp_ackdrop_ppslim;
Index: tcp_usrreq.c
===================================================================
--- tcp_usrreq.c	(revision 1830)
+++ tcp_usrreq.c	(revision 1838)
@@ -1431,6 +1431,7 @@ sysctl_net_inet_tcp_setup2(struct sysctl
 {
 	int ecn_node, congctl_node;
 	const struct sysctlnode *sack_node, *node;
+	const struct sysctlnode *abc_node;
 #ifdef TCP_DEBUG
 	extern struct tcp_debug tcp_debug[TCP_NDEBUG];
 	extern int tcp_debx;
@@ -1736,6 +1737,23 @@ sysctl_net_inet_tcp_setup2(struct sysctl
 		       CTL_EOL);
 #endif
 
+	/* ABC subtree */
+
+	sysctl_createv(clog, 0, NULL, &abc_node,
+		       CTLFLAG_PERMANENT, CTLTYPE_NODE, "abc",
+		       SYSCTL_DESCR("RFC3465 Appropriate Byte Counting (ABC)"),
+		       NULL, 0, NULL, 0,
+		       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
+	sysctl_createv(clog, 0, &abc_node, NULL,
+		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
+		       CTLTYPE_INT, "enable",
+		       SYSCTL_DESCR("Enable RFC3465 Appropriate Byte Counting"),
+		       NULL, 0, &tcp_do_abc, 0, CTL_CREATE, CTL_EOL);
+	sysctl_createv(clog, 0, &abc_node, NULL,
+		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
+		       CTLTYPE_INT, "aggressive",
+		       SYSCTL_DESCR("1: L=2*SMSS 0: L=1*SMSS"),
+		       NULL, 0, &tcp_abc_aggressive, 0, CTL_CREATE, CTL_EOL);
 }
 
 /*
Index: tcp_congctl.c
===================================================================
--- tcp_congctl.c	(revision 1830)
+++ tcp_congctl.c	(revision 1838)
@@ -465,6 +465,7 @@ tcp_reno_slow_retransmit(struct tcpcb *t
 	tp->snd_ssthresh = win * tp->t_segsz;
 	tp->t_partialacks = -1;
 	tp->t_dupacks = 0;
+	tp->t_bytes_acked = 0;
 }
 
 static void
@@ -485,6 +486,7 @@ tcp_reno_fast_retransmit_newack(struct t
 			tp->snd_cwnd = tp->snd_ssthresh;
 		tp->t_partialacks = -1;
 		tp->t_dupacks = 0;
+		tp->t_bytes_acked = 0;
 	}
 }
 
@@ -493,17 +495,53 @@ tcp_reno_newack(struct tcpcb *tp, struct
 {
 	/*
 	 * When new data is acked, open the congestion window.
-	 * If the window gives us less than ssthresh packets
-	 * in flight, open exponentially (segsz per packet).
-	 * Otherwise open linearly: segsz per window
-	 * (segsz^2 / cwnd per packet).
 	 */
 
 	u_int cw = tp->snd_cwnd;
 	u_int incr = tp->t_segsz;
 
-	if (cw >= tp->snd_ssthresh)
-		incr = incr * incr / cw;
+	if (tcp_do_abc) {
+
+		/*
+		 * RFC 3465 Appropriate Byte Counting (ABC)
+		 */
+
+		int acked = th->th_ack - tp->snd_una;
+
+		if (cw >= tp->snd_ssthresh) {
+			tp->t_bytes_acked += acked;
+			if (tp->t_bytes_acked >= cw) {
+				/* Time to increase the window. */
+				tp->t_bytes_acked -= cw;
+			} else {
+				/* No need to increase yet. */
+				incr = 0;
+			}
+		} else {
+			/*
+			 * use 2*SMSS or 1*SMSS for the "L" param,
+			 * depending on sysctl setting.
+			 *
+			 * (See RFC 3465 2.3 Choosing the Limit)
+			 */
+			u_int abc_lim;
+
+			abc_lim = (tcp_abc_aggressive == 0) ? incr : incr * 2;
+			incr = min(acked, abc_lim);
+		}
+	} else {
+
+		/*
+		 * If the window gives us less than ssthresh packets
+		 * in flight, open exponentially (segsz per packet).
+		 * Otherwise open linearly: segsz per window
+		 * (segsz^2 / cwnd per packet).
+		 */
+
+		if (cw >= tp->snd_ssthresh) {
+			incr = incr * incr / cw;
+		}
+	}
 
 	tp->snd_cwnd = min(cw + incr, TCP_MAXWIN << tp->snd_scale);
 }
@@ -601,6 +639,7 @@ tcp_newreno_fast_retransmit_newack(struc
 			tp->snd_cwnd = tp->snd_ssthresh;
 		tp->t_partialacks = -1;
 		tp->t_dupacks = 0;
+		tp->t_bytes_acked = 0;
 	}
 }
 

--NextPart-20061017070648-2015000--