Subject: icmp patches
To: None <tech-net@netbsd.org>
From: Christos Zoulas <christos@zoulas.com>
List: tech-net
Date: 07/08/2005 23:32:14
Hi,

I ported the icmp patches from OpenBSD that fix the problems described in:

        http://www.gont.com.ar/drafts/icmp-attacks-against-tcp.html

Please let me know what you think.

christos

Index: files.netinet
===================================================================
RCS file: /cvsroot/src/sys/netinet/files.netinet,v
retrieving revision 1.10
diff -u -u -r1.10 files.netinet
--- files.netinet	28 Feb 2005 16:20:59 -0000	1.10
+++ files.netinet	8 Jul 2005 21:14:42 -0000
@@ -4,6 +4,7 @@
 defparam opt_tcp_debug.h	TCP_NDEBUG
 defflag opt_inet.h		INET INET6 INET6_MD_CKSUM TCP_SIGNATURE
 				TCP_OUTPUT_COUNTERS TCP_REASS_COUNTERS
+				STRICT_ICMP
 defparam opt_inet_conf.h	SUBNETSARELOCAL HOSTZEROBROADCAST
 
 defflag				MROUTING
Index: ip_icmp.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/ip_icmp.c,v
retrieving revision 1.92
diff -u -u -r1.92 ip_icmp.c
--- ip_icmp.c	29 Apr 2005 10:39:09 -0000	1.92
+++ ip_icmp.c	8 Jul 2005 21:14:43 -0000
@@ -361,7 +361,7 @@
 	m_freem(n);
 }
 
-static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
+struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
 struct sockaddr_in icmpmask = { 8, 0 };
Index: tcp_input.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_input.c,v
retrieving revision 1.230
diff -u -u -r1.230 tcp_input.c
--- tcp_input.c	30 Jun 2005 02:58:28 -0000	1.230
+++ tcp_input.c	8 Jul 2005 21:14:45 -0000
@@ -276,6 +276,29 @@
 		TCP_SET_DELACK(tp); \
 } while (/*CONSTCOND*/ 0)
 
+#ifdef STRICT_ICMP
+#define ICMP_CHECK(tp, th, acked) \
+do { \
+	/* \
+	 * If we had a pending ICMP message that \
+	 * refers to data that have just been  \
+	 * acknowledged, disregard the recorded ICMP \
+	 * message. \
+	 */ \
+	if (((tp)->t_flags & TF_PMTUD_PEND) && \
+	    SEQ_GT((th)->th_ack, (tp)->t_pmtud_th_seq)) \
+		(tp)->t_flags &= ~TF_PMTUD_PEND; \
+\
+	/* \
+	 * Keep track of the largest chunk of data \
+	 * acknowledged since last PMTU update \
+	 */ \
+	if ((tp)->t_pmtud_mss_acked < (acked)) \
+		(tp)->t_pmtud_mss_acked = (acked); \
+} while (/*CONSTCOND*/ 0)
+#else
+#define ICMP_CHECK(tp, th, acked)
+#endif
 /*
  * Convert TCP protocol fields to host order for easier processing.
  */
@@ -1620,6 +1643,8 @@
 				sbdrop(&so->so_snd, acked);
 				tp->t_lastoff -= acked;
 
+				ICMP_CHECK(tp, th, acked);
+
 				tp->snd_una = th->th_ack;
 				tp->snd_fack = tp->snd_una;
 				if (SEQ_LT(tp->snd_high, tp->snd_una))
@@ -2293,6 +2318,9 @@
 			ourfinisacked = 0;
 		}
 		sowwakeup(so);
+
+		ICMP_CHECK(tp, th, acked);
+
 		tp->snd_una = th->th_ack;
 		if (SEQ_GT(tp->snd_una, tp->snd_fack))
 			tp->snd_fack = tp->snd_una;
Index: tcp_output.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_output.c,v
retrieving revision 1.136
diff -u -u -r1.136 tcp_output.c
--- tcp_output.c	28 Jun 2005 19:16:02 -0000	1.136
+++ tcp_output.c	8 Jul 2005 21:14:47 -0000
@@ -550,7 +550,7 @@
 #endif
 	struct tcphdr *th;
 	u_char opt[MAX_TCPOPTLEN];
-	unsigned optlen, hdrlen;
+	unsigned optlen, hdrlen, packetlen;
 	unsigned int sack_numblks;
 	int idle, sendalot, txsegsize, rxsegsize;
 	int txsegsize_nosack;
@@ -1441,6 +1441,7 @@
 #ifdef INET
 	case AF_INET:
 		ip->ip_len = htons(m->m_pkthdr.len);
+		packetlen = m->m_pkthdr.len;
 		if (tp->t_inpcb) {
 			ip->ip_ttl = tp->t_inpcb->inp_ip.ip_ttl;
 			ip->ip_tos = tp->t_inpcb->inp_ip.ip_tos;
@@ -1455,6 +1456,7 @@
 #endif
 #ifdef INET6
 	case AF_INET6:
+		packetlen = m->m_pkthdr.len;
 		ip6->ip6_nxt = IPPROTO_TCP;
 		if (tp->t_in6pcb) {
 			/*
@@ -1470,6 +1472,9 @@
 		/* ip6_plen will be filled in ip6_output(). */
 		break;
 #endif
+	default:	/*pacify gcc*/
+		packetlen = 0;
+		break;
 	}
 
 	switch (af) {
@@ -1537,6 +1542,12 @@
 
 		return (error);
 	}
+
+#ifdef STRICT_ICMP
+	if (packetlen > tp->t_pmtud_mtu_sent)
+		tp->t_pmtud_mtu_sent = packetlen;
+#endif
+	
 	tcpstat.tcps_sndtotal++;
 	if (tp->t_flags & TF_DELACK)
 		tcpstat.tcps_delack++;
Index: tcp_subr.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.191
diff -u -u -r1.191 tcp_subr.c
--- tcp_subr.c	29 May 2005 21:41:23 -0000	1.191
+++ tcp_subr.c	8 Jul 2005 21:14:48 -0000
@@ -228,7 +228,6 @@
 void	tcp6_mtudisc_callback(struct in6_addr *);
 #endif
 
-void	tcp_mtudisc(struct inpcb *, int);
 #ifdef INET6
 void	tcp6_mtudisc(struct in6pcb *, int);
 #endif
@@ -1371,8 +1370,16 @@
 	if ((unsigned)cmd >= PRC_NCMDS)
 		return;
 	else if (cmd == PRC_QUENCH) {
+#ifdef STRICT_ICMP
+		/* 
+		 * Don't honor ICMP Source Quench messages meant for
+		 * TCP connections.
+		 */
+		return;
+#else
 		/* XXX there's no PRC_QUENCH in IPv6 */
 		notify = tcp6_quench;
+#endif
 	} else if (PRC_IS_REDIRECT(cmd))
 		notify = in6_rtchange, d = NULL;
 	else if (cmd == PRC_MSGSIZE)
@@ -1468,7 +1475,14 @@
 	void (*notify)(struct inpcb *, int) = tcp_notify;
 	int errno;
 	int nmatch;
+#ifdef STRICT_ICMP
+	struct tcpcb *tp;
+	u_int mtu;
+	tcp_seq seq;
+#endif
+	struct inpcb *inp;
 #ifdef INET6
+	struct in6pcb *in6p;
 	struct in6_addr src6, dst6;
 #endif
 
@@ -1479,7 +1493,15 @@
 		return NULL;
 	errno = inetctlerrmap[cmd];
 	if (cmd == PRC_QUENCH)
+#ifdef STRICT_ICMP
+		/* 
+		 * Don't honor ICMP Source Quench messages meant for
+		 * TCP connections.
+		 */
+		return NULL;
+#else
 		notify = tcp_quench;
+#endif
 	else if (PRC_IS_REDIRECT(cmd))
 		notify = in_rtchange, ip = 0;
 	else if (cmd == PRC_MSGSIZE && ip && ip->ip_v == 4) {
@@ -1498,12 +1520,12 @@
 		memcpy(&src6.s6_addr32[3], &ip->ip_src, sizeof(struct in_addr));
 		memcpy(&dst6.s6_addr32[3], &ip->ip_dst, sizeof(struct in_addr));
 #endif
-		if (in_pcblookup_connect(&tcbtable, ip->ip_dst, th->th_dport,
-		    ip->ip_src, th->th_sport) != NULL)
-			;
+		if ((inp = in_pcblookup_connect(&tcbtable, ip->ip_dst,
+		    th->th_dport, ip->ip_src, th->th_sport)) != NULL)
+			in6p = NULL;
 #ifdef INET6
-		else if (in6_pcblookup_connect(&tcbtable, &dst6,
-		    th->th_dport, &src6, th->th_sport, 0) != NULL)
+		else if ((in6p = in6_pcblookup_connect(&tcbtable, &dst6,
+		    th->th_dport, &src6, th->th_sport, 0)) != NULL)
 			;
 #endif
 		else
@@ -1517,8 +1539,58 @@
 		 */
 		icp = (struct icmp *)((caddr_t)ip -
 		    offsetof(struct icmp, icmp_ip));
+#ifdef STRICT_ICMP
+		if (inp) {
+			if ((tp = intotcpcb(inp)) == NULL)
+				return NULL;
+		}
+#ifdef INET6
+		else if (in6p) {
+			if ((tp = in6totcpcb(in6p)) == NULL)
+				return NULL;
+		}
+#endif
+		else
+			return NULL;
+		seq = ntohl(th->th_seq);
+		if (SEQ_LT(seq, tp->snd_una) || SEQ_GEQ(seq, tp->snd_max))
+			return NULL;
+		/* 
+		 * If the ICMP message advertises a Next-Hop MTU
+		 * equal or larger than the maximum packet size we have
+		 * ever sent, drop the message.
+		 */
+		mtu = (u_int)ntohs(icp->icmp_nextmtu);
+		if (mtu >= tp->t_pmtud_mtu_sent)
+			return NULL;
+		if (mtu >= tcp_hdrsz(tp) + tp->t_pmtud_mss_acked) {
+			/* 
+			 * Calculate new MTU, and create corresponding
+			 * route (traditional PMTUD).
+			 */
+			tp->t_flags &= ~TF_PMTUD_PEND;
+			icmp_mtudisc(icp, ip->ip_dst);
+		} else {
+			/*
+			 * Record the information got in the ICMP
+			 * message; act on it later.
+			 * If we had already recorded an ICMP message,
+			 * replace the old one only if the new message
+			 * refers to an older TCP segment
+			 */
+			if (tp->t_flags & TF_PMTUD_PEND) {
+				if (SEQ_LT(tp->t_pmtud_th_seq, seq))
+					return NULL;
+			} else
+				tp->t_flags |= TF_PMTUD_PEND;
+			tp->t_pmtud_th_seq = seq;
+			tp->t_pmtud_nextmtu = icp->icmp_nextmtu;
+			tp->t_pmtud_ip_len = icp->icmp_ip.ip_len;
+			tp->t_pmtud_ip_hl = icp->icmp_ip.ip_hl;
+		}
+#else
 		icmp_mtudisc(icp, ip->ip_dst);
-
+#endif
 		return NULL;
 	} else if (cmd == PRC_HOSTDEAD)
 		ip = 0;
@@ -2203,3 +2275,35 @@
 
 	return optlen;
 }
+
+#ifdef STRICT_ICMP
+u_int
+tcp_hdrsz(struct tcpcb *tp)
+{
+	u_int hlen;
+
+	switch (tp->t_family) {
+#ifdef INET6
+	case AF_INET6:
+		hlen = sizeof(struct ip6_hdr);
+		break;
+#endif
+	case AF_INET:
+		hlen = sizeof(struct ip);
+		break;
+	default:
+		hlen = 0;
+		break;
+	}
+	hlen += sizeof(struct tcphdr);
+
+	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
+	    (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
+		hlen += TCPOLEN_TSTAMP_APPA;
+#ifdef TCP_SIGNATURE
+	if (tp->t_flags & TF_SIGNATURE)
+		hlen += TCPOLEN_SIGLEN;
+#endif
+	return hlen;
+ }
+#endif
Index: tcp_timer.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_timer.c,v
retrieving revision 1.71
diff -u -u -r1.71 tcp_timer.c
--- tcp_timer.c	2 Mar 2005 10:20:18 -0000	1.71
+++ tcp_timer.c	8 Jul 2005 21:14:48 -0000
@@ -123,6 +123,7 @@
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
 
 #ifdef INET6
 #ifndef INET
@@ -232,6 +233,32 @@
 		splx(s);
 		return;
 	}
+#ifdef STRICT_ICMP
+	if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
+	    SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
+	    SEQ_LT(tp->t_pmtud_th_seq, (int)(tp->snd_una + tp->t_ourmss))) {
+		extern struct sockaddr_in icmpsrc;
+		struct icmp icmp;
+
+		tp->t_flags &= ~TF_PMTUD_PEND;
+
+		/* XXX create fake icmp message with relevant entries */
+		icmp.icmp_nextmtu = tp->t_pmtud_nextmtu;
+		icmp.icmp_ip.ip_len = tp->t_pmtud_ip_len;
+		icmp.icmp_ip.ip_hl = tp->t_pmtud_ip_hl;
+		icmpsrc.sin_addr = tp->t_inpcb->inp_faddr;
+		icmp_mtudisc(&icmp, icmpsrc.sin_addr);
+
+		/*
+		 * Notify all connections to the same peer about
+		 * new mss and trigger retransmit.
+		 */
+		in_pcbnotifyall(&tcbtable, icmpsrc.sin_addr, EMSGSIZE,
+		    tcp_mtudisc);
+ 		splx(s);
+ 		return;
+ 	}
+#endif
 
 	tp->t_flags |= TF_ACKNOW;
 	(void) tcp_output(tp);
Index: tcp_var.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_var.h,v
retrieving revision 1.126
diff -u -u -r1.126 tcp_var.h
--- tcp_var.h	29 May 2005 21:41:23 -0000	1.126
+++ tcp_var.h	8 Jul 2005 21:14:49 -0000
@@ -216,6 +216,7 @@
 #define	TF_WILL_SACK	0x0800		/* try to use SACK */
 #define	TF_REASSEMBLING	0x1000		/* we're busy reassembling */
 #define	TF_DEAD		0x2000		/* dead and to-be-released */
+#define	TF_PMTUD_PEND	0x4000		/* Path MTU Discovery pending */
 #define	TF_SIGNATURE	0x400000	/* require MD5 digests (RFC2385) */
 
 
@@ -301,9 +302,6 @@
 	tcp_seq snd_fack;		/* FACK TCP.  Forward-most data held by
 					   peer. */
 
-/* path MTU discovery blackhole detection */
-	int t_mtudisc;			/* perform mtudisc for this tcb */
-
 /* pointer for syn cache entries*/
 	LIST_HEAD(, syn_cache) t_sc;	/* list of entries by this tcb */
 
@@ -312,6 +310,18 @@
 	int	t_inoff;		/* data offset in previous mbuf */
 	int	t_lastoff;		/* last data address in mbuf chain */
 	int	t_lastlen;		/* last length read from mbuf chain */
+
+/* Path-MTU discovery blackhole detection */
+	int t_mtudisc;			/* perform mtudisc for this tcb */
+#ifdef STRICT_ICMP
+/* Path-MTU Discovery Information */
+	u_int	t_pmtud_mss_acked;	/* MSS acked, lower bound for MTU */
+	u_int	t_pmtud_mtu_sent;	/* MTU used, upper bound for MTU */
+	tcp_seq	t_pmtud_th_seq;		/* TCP SEQ from ICMP payload */
+	u_int	t_pmtud_nextmtu;	/* Advertised Next-Hop MTU from ICMP */
+	u_short	t_pmtud_ip_len;		/* IP length from ICMP payload */
+	u_short	t_pmtud_ip_hl;		/* IP header length from ICMP payload */
+#endif
 };
 
 /*
@@ -806,6 +816,9 @@
 int	 tcp6_input(struct mbuf **, int *, int);
 #endif
 void	 tcp_input(struct mbuf *, ...);
+#ifdef STRICT_ICMP
+u_int	 tcp_hdrsz(struct tcpcb *);
+#endif
 u_long	 tcp_mss_to_advertise(const struct ifnet *, int);
 void	 tcp_mss_from_peer(struct tcpcb *, int);
 void	 tcp_tcpcb_template(void);
@@ -823,6 +836,7 @@
 #ifdef INET6
 void	 tcp6_quench(struct in6pcb *, int);
 #endif
+void	 tcp_mtudisc(struct inpcb *, int);
 
 struct ipqent *tcpipqent_alloc(void);
 void	 tcpipqent_free(struct ipqent *);