Subject: Re: Appropriate byte counting, revisited.
To: None <kurahone@sigusr1.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-net
Date: 10/12/2006 07:21:14
--NextPart-20061012071023-0604200
Content-Type: Text/Plain; charset=us-ascii
> > > With that said, it may be
> > > beneficial to provide the option for using L=2*SMSS and have a sysctl
> > > for that.
>
> New and improved version of the patch available at the same location[0].
> It fixes most of the nits in the earlier version, and adds a sysctl that
> will set L=2*SMSS (on by default).
>
> One other difference to the previous incarnation of the patch is that
> the ack prediction code will only grow snd_cwnd if it's < snd_wnd. I'm
> not sure if this is entirely correct, but it does preserve the existing
> behavior.
>
> Unless anyone has any objections I'm going to commit this.
what's the status of this?
i've adapted it to -current. (attached)
YAMAMOTO Takashi
--NextPart-20061012071023-0604200
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="a.diff"
Index: tcp_input.c
===================================================================
--- tcp_input.c (revision 1823)
+++ tcp_input.c (working copy)
@@ -1685,7 +1685,7 @@ after_listen:
/* Ack prediction. */
if (SEQ_GT(th->th_ack, tp->snd_una) &&
SEQ_LEQ(th->th_ack, tp->snd_max) &&
- tp->snd_cwnd >= tp->snd_wnd &&
+ tp->snd_cwnd >= tp->snd_ssthresh &&
tp->t_partialacks < 0) {
/*
* this is a pure ack for outstanding data.
@@ -1702,6 +1702,19 @@ after_listen:
tcpstat.tcps_rcvackbyte += acked;
ND6_HINT(tp);
+ /*
+ * Grow the congestion window, if the
+ * connection is cwnd bound.
+ */
+ if (tp->snd_cwnd < tp->snd_wnd) {
+ tp->t_bytes_acked += acked;
+ if (tp->t_bytes_acked > tp->snd_cwnd) {
+ tp->t_bytes_acked -=
+ tp->snd_cwnd;
+ tp->snd_cwnd += tp->t_segsz;
+ }
+ }
+
if (acked > (tp->t_lastoff - tp->t_inoff))
tp->t_lastm = NULL;
sbdrop(&so->so_snd, acked);
Index: tcp_subr.c
===================================================================
--- tcp_subr.c (revision 1817)
+++ tcp_subr.c (working copy)
@@ -202,6 +202,7 @@ int tcp_compat_42 = 0;
int tcp_rst_ppslim = 100; /* 100pps */
int tcp_ackdrop_ppslim = 100; /* 100pps */
int tcp_do_loopback_cksum = 0;
+int tcp_do_rfc3465 = 1; /* RFC3465 Appropriate byte counting. */
int tcp_sack_tp_maxholes = 32;
int tcp_sack_globalmaxholes = 1024;
int tcp_sack_globalholes = 0;
@@ -935,6 +936,7 @@ static struct tcpcb tcpcb_template = {
.snd_numholes = 0,
.t_partialacks = -1,
+ .t_bytes_acked = 0,
};
/*
@@ -1647,8 +1649,10 @@ tcp_quench(struct inpcb *inp, int errno)
{
struct tcpcb *tp = intotcpcb(inp);
- if (tp)
+ if (tp) {
tp->snd_cwnd = tp->t_segsz;
+ tp->t_bytes_acked = 0;
+ }
}
#endif
@@ -1658,8 +1662,10 @@ tcp6_quench(struct in6pcb *in6p, int err
{
struct tcpcb *tp = in6totcpcb(in6p);
- if (tp)
+ if (tp) {
tp->snd_cwnd = tp->t_segsz;
+ tp->t_bytes_acked = 0;
+ }
}
#endif
Index: tcp_var.h
===================================================================
--- tcp_var.h (revision 1817)
+++ tcp_var.h (working copy)
@@ -290,6 +290,9 @@ struct tcpcb {
u_int32_t ts_timebase; /* our timebase */
tcp_seq last_ack_sent;
+/* RFC 3465 variables */
+ u_long t_bytes_acked; /* ABC "bytes_acked" parameter */
+
/* SACK stuff */
#define TCP_SACK_MAX 3
#define TCPSACK_NONE 0
@@ -754,6 +757,7 @@ extern int tcp_ecn_maxretries; /* Max EC
extern int tcp_sack_tp_maxholes; /* Max holes per connection. */
extern int tcp_sack_globalmaxholes; /* Max holes per system. */
extern int tcp_sack_globalholes; /* Number of holes present. */
+extern int tcp_do_rfc3465; /* RFC3465 ABC enabled/disabled? */
extern int tcp_rst_ppslim;
extern int tcp_ackdrop_ppslim;
Index: tcp_usrreq.c
===================================================================
--- tcp_usrreq.c (revision 1817)
+++ tcp_usrreq.c (working copy)
@@ -1735,7 +1735,12 @@ sysctl_net_inet_tcp_setup2(struct sysctl
CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBX,
CTL_EOL);
#endif
-
+ sysctl_createv(clog, 0, NULL, NULL,
+ CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
+ CTLTYPE_INT, "rfc3465",
+ SYSCTL_DESCR("Enable RFC3465 Appropriate Byte Counting"),
+ NULL, 0, &tcp_do_rfc3465, 0,
+ CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
}
/*
Index: tcp_congctl.c
===================================================================
--- tcp_congctl.c (revision 1823)
+++ tcp_congctl.c (working copy)
@@ -465,6 +465,7 @@ tcp_reno_slow_retransmit(struct tcpcb *t
tp->snd_ssthresh = win * tp->t_segsz;
tp->t_partialacks = -1;
tp->t_dupacks = 0;
+ tp->t_bytes_acked = 0;
}
static void
@@ -485,6 +486,7 @@ tcp_reno_fast_retransmit_newack(struct t
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_partialacks = -1;
tp->t_dupacks = 0;
+ tp->t_bytes_acked = 0;
}
}
@@ -501,9 +503,30 @@ tcp_reno_newack(struct tcpcb *tp, struct
u_int cw = tp->snd_cwnd;
u_int incr = tp->t_segsz;
+ int acked = th->th_ack - tp->snd_una;
- if (cw >= tp->snd_ssthresh)
- incr = incr * incr / cw;
+ if (cw >= tp->snd_ssthresh) {
+ tp->t_bytes_acked += acked;
+ if (tp->t_bytes_acked >= cw) {
+ /* Time to increase the window. */
+ tp->t_bytes_acked -= cw;
+ } else {
+ /* No need to increase yet. */
+ incr = 0;
+ }
+ } else {
+ /*
+ * If the user explicitly enables RFC3465
+ * use 2*SMSS for the "L" param. Otherwise
+ * use the more conservative 1*SMSS.
+ *
+ * (See RFC 3465 2.3 Choosing the Limit)
+ */
+ u_int abc_lim;
+
+ abc_lim = (tcp_do_rfc3465 == 0) ? incr : incr * 2;
+ incr = min(acked, abc_lim);
+ }
tp->snd_cwnd = min(cw + incr, TCP_MAXWIN << tp->snd_scale);
}
@@ -601,6 +624,7 @@ tcp_newreno_fast_retransmit_newack(struc
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_partialacks = -1;
tp->t_dupacks = 0;
+ tp->t_bytes_acked = 0;
}
}
--NextPart-20061012071023-0604200--