Subject: kern/14269: ip over gre and TTL
To: None <gnats-bugs@gnats.netbsd.org>
From: None <jasper@pointless.net>
List: netbsd-bugs
Date: 10/16/2001 21:13:12
>Number:         14269
>Category:       kern
>Synopsis:       gre(4) broken with multicast packets with a TTL of 1
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Tue Oct 16 13:21:00 PDT 2001
>Closed-Date:
>Last-Modified:
>Originator:     Jasper Wallace
>Release:        NetBSD 1.5.3
>Organization:
not very much
>Environment:
	
System: NetBSD jellycat 1.5.3_ALPHA NetBSD 1.5.3_ALPHA (JELLYCAT) #0: Fri Oct 5 19:11:14 BST 2001 jasper@jellycat:/usr/src/sys/arch/i386/compile/JELLYCAT i386


>Description:

	GRE packets have their TTL set to the TTL of the payload packets.

	When your trying to use OSPF accross a gre link the TTL of the OSPF
	HELLO packets is 1. The TTL of the GRE packets is then set to 1 which
	makes them less than useless when trying to get them to go accross
	the 'net.

	Also gre(4) dosn't change the mbuf flags, so a multicast payload
	packet (with M_MCAST) will, once encapculated get sent out to a
	multicast ethernet address (assuming your on ethernet)!

>How-To-Repeat:

	Try to run zebra/ospf accross a gre link.

	Observe that it dosn't work.

	Run tcpdump:

20:02:50.258861 212.25.240.34 > 217.204.252.18: \
gre 212.25.240.34 > 217.204.252.18: [] \
10.1.12.33 > 224.0.0.5:  OSPFv2-hello 44: backbone \
auth MD5 E mask 0.0.0.0 int 10 pri 1 dead 40 nbrs \
[ttl 1] (id 5608) [ttl 1] (id 5609)
                   ^^^^^
                     `---- Ahem.

	(also run tcpdump -e, but i didn't save a copy of the output).

>Fix:
	
	The rfc says that an implementation MAY copying the TTL (along with
the TOS and any security options) into the payload packet.

http://rfc.net/rfc1702.html

Attached is a patch to add two sysctl's, one to set the default ttl (like
gif(4)), and the other to make copying the ttl from the payload packet
optional.

Note that this patch (as is) changes the default behaviour from copying the
TTL to NOT copying the TTL.

I felt it was less surprising for people that way, but you may object to
having a default changed.

It also wacks M_MCAST and M_BCAST from the mbuf flags.

A patch to sysctl(3) is included.

Index: sys/netinet/in.h
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/in.h,v
retrieving revision 1.47.4.2
diff -u -r1.47.4.2 in.h
--- sys/netinet/in.h	2001/04/24 22:21:35	1.47.4.2
+++ sys/netinet/in.h	2001/10/14 01:01:49
@@ -348,7 +348,9 @@
 #define IPCTL_LOWPORTMIN       16	/* minimum reserved port */
 #define IPCTL_LOWPORTMAX       17	/* maximum reserved port */
 #define IPCTL_MAXFRAGPACKETS   18	/* max packets in reassembly queue */
-#define	IPCTL_MAXID	       19
+#define IPCTL_GRE_TTL	       19	/* default TTL for gre encap packet */
+#define IPCTL_GRE_COPY_TTL     20   /* get the ttl from the payload packet */
+#define	IPCTL_MAXID	       21
 
 #define	IPCTL_NAMES { \
 	{ 0, 0 }, \
@@ -370,6 +372,8 @@
 	{ "lowportmin", CTLTYPE_INT }, \
 	{ "lowportmax", CTLTYPE_INT }, \
 	{ "maxfragpackets", CTLTYPE_INT }, \
+	{ "grettl", CTLTYPE_INT }, \
+	{ "grecopyttl", CTLTYPE_INT }, \
 }
 #endif /* !_XOPEN_SOURCE */
 
Index: sys/netinet/ip_input.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/ip_input.c,v
retrieving revision 1.114.4.6
diff -u -r1.114.4.6 ip_input.c
--- sys/netinet/ip_input.c	2001/04/24 22:21:20	1.114.4.6
+++ sys/netinet/ip_input.c	2001/10/14 01:01:50
@@ -138,6 +138,9 @@
 /* just for gif_ttl */
 #include <netinet/in_gif.h>
 #include "gif.h"
+/* just for gre_ttl and gre_copy_ttl */
+#include <net/if_gre.h>
+#include "gre.h"
 
 #ifdef MROUTING
 #include <netinet/ip_mroute.h>
@@ -1768,6 +1771,14 @@
 	case IPCTL_GIF_TTL:
 		return(sysctl_int(oldp, oldlenp, newp, newlen,
 				  &ip_gif_ttl));
+#endif
+#if NGRE > 0
+	case IPCTL_GRE_TTL:
+		return(sysctl_int(oldp, oldlenp, newp, newlen,
+				  &ip_gre_ttl));
+	case IPCTL_GRE_COPY_TTL:
+		return(sysctl_int(oldp, oldlenp, newp, newlen,
+				  &ip_gre_copy_ttl));
 #endif
 
 #ifndef IPNOPRIVPORTS
Index: sys/net/if_gre.h
===================================================================
RCS file: /cvsroot/syssrc/sys/net/if_gre.h,v
retrieving revision 1.5
diff -u -r1.5 if_gre.h
--- sys/net/if_gre.h	1999/11/19 20:41:19	1.5
+++ sys/net/if_gre.h	2001/10/14 01:01:51
@@ -132,6 +132,12 @@
 #define MOB_H_SIZ_L		(sizeof(struct mobile_h))
 #define MOB_H_SBIT	0x0080
 
+/* for sysctl */
+#define GRE_TTL		30
+extern int ip_gre_ttl;
+
+#define GRE_COPY_TTL	0
+extern int ip_gre_copy_ttl;
 
 /* 
  * ioctls needed to manipulate the interface 
Index: sys/net/if_gre.c
===================================================================
RCS file: /cvsroot/syssrc/sys/net/if_gre.c,v
retrieving revision 1.9.6.2
diff -u -r1.9.6.2 if_gre.c
--- sys/net/if_gre.c	2000/11/19 20:21:50	1.9.6.2
+++ sys/net/if_gre.c	2001/10/14 01:01:51
@@ -116,6 +116,8 @@
 
 struct gre_softc gre_softc[NGRE];
 
+int ip_gre_ttl = GRE_TTL;
+int ip_gre_copy_ttl = GRE_COPY_TTL;
 
 void gre_compute_route(struct gre_softc *sc);
 #ifdef DIAGNOSTIC
@@ -196,7 +198,9 @@
 	}
 #endif
 
-	ttl = 255;
+	ttl = ip_gre_ttl;
+	m->m_flags &= ~(M_BCAST|M_MCAST); /* we don't output multicast or
+						broadcast packets */
 
 	if (sc->g_proto == IPPROTO_MOBILE) {
 		if (dst->sa_family == AF_INET) {
@@ -263,7 +267,8 @@
 		switch(dst->sa_family) {
 		case AF_INET:
 			inp = mtod(m, struct ip *);
-			ttl = inp->ip_ttl;
+			if (ip_gre_copy_ttl)
+				ttl = inp->ip_ttl;
 			etype = ETHERTYPE_IP;
 			break;
 #ifdef NETATALK
@@ -577,5 +582,9 @@
 }
 
 #endif
-#endif
+
+#else  /* NGRE > 0 */
+int ip_gre_copy_ttl = 0;
+int ip_gif_ttl = 0;
+#endif /* NGRE > 0 */
 
--- lib/libc/gen/sysctl.3.orig	Tue Oct 16 20:55:25 2001
+++ lib/libc/gen/sysctl.3	Tue Oct 16 21:00:22 2001
@@ -669,6 +669,8 @@
 .It ip	lowportmin	integer	yes
 .It ip	lowportmax	integer	yes
 .It ip	maxfragpacket	integer	yes
+.It ip	grettl	integer	yes
+.It ip	grecopyttl	integer	yes
 .It icmp	maskrepl	integer	yes
 .It icmp	errppslimit	integer	yes
 .It tcp	rfc1323	integer	yes
@@ -755,6 +757,15 @@
 0 means that the node will not accept any fragmented packets.
 -1 means that the node will accept as many fragmented packets as it receives.
 The flag is provided basically for avoiding possible DoS attacks.
+.It Li ip.grettl
+The maximum time-to-live (hop count) value for an IPv4 packet generated by
+.Xr gre 4
+tunnel interface.
+.It Li ip.grecopyttl
+Set to 1 to have
+.Xr gre 4
+copy the maximum time-to-live (hop count) of the payload (inner) packet to
+the transport (outer) packet as it's sent out over the tunnel.
 .It Li icmp.maskrepl
 Returns 1 if ICMP network mask requests are to be answered.
 .It Li icmp.errppslimit
>Release-Note:
>Audit-Trail:
>Unformatted: