tech-net archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

route(4) RTM_MISS DST address filtering



IPv6 RFCs mention router reachability - if a router is unreachable then we should prefer a reachable one.

BSD determines that a node is unreachable by sending RTM_MISS with a DST sockaddr of the unreachable address to route(4). If the node later becomes reachable then RTM_NEWADDR is sent with the hardware address of the node. RTM_DELADDR is sent when the knowledge of the hardware address is lost - this does not mean it's unreachable, it could mean it's just been GCed.

Now, if you have a IPv6 router it will generate a lot of RTM_MISS messages when an upstream entity tries to probe a non exitant node on the downstream network. Since I introduced both route(4) message filtering and socket overflow detection, dhcpcd is still overflowing very frequently on my ERLITE router with the maximum possible receive buffer. Re-learning system state when this happens also burns a lot of CPU time.

One possible solution is to filter RTM_MISS messages by the DST sockaddr. Attached is a patch to do just this - it allows an array of sockaddrs spaced by RT_ROUNDUP just like in other route(4) messages to be set via ioctl. It mirrors a similar approach to setting the current route message type filter. dhcpcd can then use this to listen for any RTM_MISS for any gateway that dhcpcd could use as a default route so it can priortise a reachable router.

I've been running this patch on my router for a few days now and dhcpcd has gone from one of the top CPU burning programs to barely registering any CPU.

Commentary welcome, especially on the allocation/free of memory as I've never had to do that before in the kernel. Do we need to limit it by an arbitary number, if so what? The API itself is only be limited by what you can send via ioctl.

Roy
Index: sys/net/route.h
===================================================================
RCS file: /cvsroot/src/sys/net/route.h,v
retrieving revision 1.125
diff -u -p -r1.125 route.h
--- sys/net/route.h	19 Sep 2019 04:08:29 -0000	1.125
+++ sys/net/route.h	4 Feb 2020 10:00:22 -0000
@@ -270,6 +270,7 @@ struct rt_msghdr {
  * setsockopt defines used for the filtering.
  */
 #define	RO_MSGFILTER	1	/* array of which rtm_type to send to client */
+#define	RO_MISSFILTER	2	/* array of sockaddrs to match miss dst */
 
 #define RTV_MTU		0x1	/* init or lock _mtu */
 #define RTV_HOPCOUNT	0x2	/* init or lock _hopcount */
Index: sys/net/rtsock_shared.c
===================================================================
RCS file: /cvsroot/src/sys/net/rtsock_shared.c,v
retrieving revision 1.12
diff -u -p -r1.12 rtsock_shared.c
--- sys/net/rtsock_shared.c	29 Jan 2020 04:35:13 -0000	1.12
+++ sys/net/rtsock_shared.c	4 Feb 2020 10:00:22 -0000
@@ -169,6 +169,8 @@ struct routecb {
 	struct rawcb	rocb_rcb;
 	unsigned int	rocb_msgfilter;
 #define	RTMSGFILTER(m)	(1U << (m))
+	char		*rocb_missfilter;
+	size_t		rocb_missfilterlen;
 };
 #define sotoroutecb(so)	((struct routecb *)(so)->so_pcb)
 
@@ -218,7 +220,7 @@ COMPATNAME(route_filter)(struct mbuf *m,
 		return ENOPROTOOPT;
 
 	/* If no filter set, just return. */
-	if (rop->rocb_msgfilter == 0)
+	if (rop->rocb_msgfilter == 0 && rop->rocb_missfilterlen == 0)
 		return 0;
 
 	/* Ensure we can access rtm_type */
@@ -230,9 +232,27 @@ COMPATNAME(route_filter)(struct mbuf *m,
 	if (rtm->rtm_type >= sizeof(rop->rocb_msgfilter) * CHAR_BIT)
 		return EINVAL;
 	/* If the rtm type is filtered out, return a positive. */
-	if (!(rop->rocb_msgfilter & RTMSGFILTER(rtm->rtm_type)))
+	if (rop->rocb_msgfilter != 0 &&
+	    !(rop->rocb_msgfilter & RTMSGFILTER(rtm->rtm_type)))
 		return EEXIST;
 
+	if (rop->rocb_missfilterlen != 0 && rtm->rtm_type == RTM_MISS) {
+		__CTASSERT(RTA_DST == 1);
+		struct sockaddr *sa, *dst = (struct sockaddr *)(rtm + 1);
+		char *cp = rop->rocb_missfilter;
+		char *ep = cp + rop->rocb_missfilterlen;
+
+		while (cp < ep) {
+			sa = (struct sockaddr *)cp;
+			if (sa->sa_len == dst->sa_len &&
+			    memcmp(sa, dst, sa->sa_len) == 0)
+				break;
+			cp += RT_XROUNDUP(sa->sa_len);
+		}
+		if (cp == ep)
+			return EEXIST;
+	}
+
 	/* Passed the filter. */
 	return 0;
 }
@@ -291,12 +311,15 @@ static void
 COMPATNAME(route_detach)(struct socket *so)
 {
 	struct rawcb *rp = sotorawcb(so);
+	struct routecb *rop = (struct routecb *)rp;
 	int s;
 
 	KASSERT(rp != NULL);
 	KASSERT(solocked(so));
 
 	s = splsoftnet();
+	if (rop->rocb_missfilterlen != 0)
+		kmem_free(rop->rocb_missfilter, rop->rocb_missfilterlen);
 	rt_adjustcount(rp->rcb_proto.sp_protocol, -1);
 	raw_detach(so);
 	splx(s);
@@ -980,9 +1003,10 @@ route_ctloutput(int op, struct socket *s
 {
 	struct routecb *rop = sotoroutecb(so);
 	int error = 0;
-	unsigned char *rtm_type;
+	unsigned char *rtm_type, *cp, *ep;
 	size_t len;
 	unsigned int msgfilter;
+	struct sockaddr *sa;
 
 	KASSERT(solocked(so));
 
@@ -1007,6 +1031,40 @@ route_ctloutput(int op, struct socket *s
 			if (error == 0)
 				rop->rocb_msgfilter = msgfilter;
 			break;
+		case RO_MISSFILTER:
+			/* Validate the data */
+			cp = sopt->sopt_data;
+			ep = cp + sopt->sopt_size;
+			while (cp < ep) {
+				if (ep - cp <
+				    offsetof(struct sockaddr, sa_len) +
+				    sizeof(sa->sa_len))
+					break;
+				sa = (struct sockaddr *)cp;
+				cp += RT_XROUNDUP(sa->sa_len);
+			}
+			if (cp != ep) {
+				error = EINVAL;
+				break;
+			}
+			if (rop->rocb_missfilterlen != 0)
+				kmem_free(rop->rocb_missfilter,
+				    rop->rocb_missfilterlen);
+			if (sopt->sopt_size != 0) {
+				rop->rocb_missfilter =
+				    kmem_alloc(sopt->sopt_size, KM_SLEEP);
+				if (rop->rocb_missfilter == NULL) {
+					rop->rocb_missfilterlen = 0;
+					error = ENOBUFS;
+					break;
+				}
+			} else
+				rop->rocb_missfilter = NULL;
+			rop->rocb_missfilterlen = sopt->sopt_size;
+			if (rop->rocb_missfilterlen != 0)
+				memcpy(rop->rocb_missfilter, sopt->sopt_data,
+				    rop->rocb_missfilterlen);
+			break;
 		default:
 			error = ENOPROTOOPT;
 			break;
Index: share/man/man4/route.4
===================================================================
RCS file: /cvsroot/src/share/man/man4/route.4,v
retrieving revision 1.32
diff -u -p -r1.32 route.4
--- share/man/man4/route.4	1 Jul 2018 22:27:43 -0000	1.32
+++ share/man/man4/route.4	4 Feb 2020 10:00:22 -0000
@@ -29,7 +29,7 @@
 .\"
 .\"     @(#)route.4	8.6 (Berkeley) 4/19/94
 .\"
-.Dd July 11, 2018
+.Dd February 4, 2020
 .Dt ROUTE 4
 .Os
 .Sh NAME
@@ -191,6 +191,35 @@ if (setsockopt(routefd, PF_ROUTE, RO_MSG
 	err(1, "setsockopt(RO_MSGFILTER)");
 .Ed
 .Pp
+A process can specify which RTM_MISS destination addresses it's interested in
+by passing an array of struct sockaddr to the
+.Xr setsockopt 2
+call with the
+.Dv RO_MISSFILTER
+option at the
+.Dv PF_ROUTE
+level.
+For example, to only get RTM_MISS messages for specific destinations:
+.Bd -literal -offset indent
+char buf[1024] = { '\\0' }, *cp = buf;
+struct sockaddr_in sin = {
+	.sin_family = AF_INET,
+	.sin_len = sizeof(sin),
+};
+
+inet_aton("192.168.0.1", &sin.sin_addr);
+memcpy(cp, &sin, sin.sin_len);
+cp += RT_ROUNDUP(sin.sin_len);
+
+inet_aton("192.168.0.2", &sin.sin_addr);
+memcpy(cp, &sin, sin.sin_len);
+cp += RT_ROUNDUP(sin.sin_len);
+
+if (setsockopt(routefd, PF_ROUTE, RO_MISSFILTER,
+    &sin, (socklen_t)(cp - buf)) == -1)
+	err(1, "setsockopt(RO_MISSFILTER)");
+.Ed
+.Pp
 If a route is in use when it is deleted,
 the routing entry will be marked down and removed from the routing table,
 but the resources associated with it will not


Home | Main Index | Thread Index | Old Index