tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: route(4): Adding ROUTE_MSGFILTER socket option



On 05/04/2017 15:43, Robert Elz wrote:
>     Date:        Wed, 5 Apr 2017 13:25:44 +0100
>     From:        Roy Marples <roy%marples.name@localhost>
>     Message-ID:  <7b696d45-0fee-7b0a-ce73-e8daf3120097%marples.name@localhost>
> 
> 
>   | Right now I chose the same API, just so I can test and validate it works
>   | the same on both OS's.
> 
> That's reaosnable for testing, but preferably not for comitting.
> 
>   | Also, when NetBSD started it has 11 types. Now it has 21, so that's an
>   | extra 10 in 22 years, so at a rate of 1 every 2 years.
> 
> Most likely they come in bursts.
> 
>   | Strictly speaking, the types are beholden to the version so we could
>   | just bump the version and remove the O (ie old) compat vars and bury
>   | them to keep inside 32-bits. Thus would mean re-working how we handle
>   | compat in rtsock though.
> 
> We could, but let's not - it is a 256 value field (or 255 since 0 appears
> to not be used) and we should be able to cope with all values, and without
> needing hackery any time in the future.
> 
>   | Well yes, but the point of the API is that we don't want them by
>   | accident,
> 
> The scheme I proposed was intended to allow the kernel coder (when/if
> it is required) to arrange to map the message types so that accidents
> would be rare (that is combining in the bitmask values that tend to
> be enabled together, and excluding the types that are not.)
> 
> But...
> 
>   | If avoiding a variable length bitmask is needed then we just need
>   | something similar to poll
> 
> Yes, that's another option - anything to avoid 1 << 80 and similar...
> 
> The mapping scheme (I suggested) is likely cheaper in the kernel (just
> one extra mem reference compared to your/OpenBSD's original during
> packet processing) but a poll style interface is cleaner, and could
> avoid all unwanted packets being delivered.
> 
> With the API you proposed in this message, either internel implementation
> could be used, which is good - what matters most is that the application
> sends the actual rtm_type values to the kernel, rather than calculating a
> bitmask and sending that (as unless we do a select type interface, that
> bitmask is of fixed length, and I don't think we want to make it a 32 byte
> array to fit all possible bits in).
> 
> For now the kernel (packet processing) code could be just as you showed it
> initially (just a 32 bit mask operation) just as long as the API does not
> expose that.

These are all valid points.
v2 of the patch attached to address.
I changed the socket option name so apps can tell the two
implementations apart.

More comments are welcome.

Roy
Index: share/man/man4/route.4
===================================================================
RCS file: /cvsroot/src/share/man/man4/route.4,v
retrieving revision 1.28
diff -u -p -r1.28 route.4
--- share/man/man4/route.4	21 Sep 2016 10:50:23 -0000	1.28
+++ share/man/man4/route.4	5 Apr 2017 16:44:08 -0000
@@ -29,7 +29,7 @@
 .\"
 .\"     @(#)route.4	8.6 (Berkeley) 4/19/94
 .\"
-.Dd September 15, 2016
+.Dd March 5, 2017
 .Dt ROUTE 4
 .Os
 .Sh NAME
@@ -174,6 +174,23 @@ by doing a
 .Xr shutdown 2
 system call for further input.
 .Pp
+A process can specify which route message types it's interested in by passing
+an array of route messsage types to the
+.Xr setsockopt 2
+call with the
+.Dv RO_MSGFILTER
+option at the
+.Dv PF_ROUTE
+level.
+For example, to only get specific messages:
+.Bd -literal -offset indent
+unsigned char rtfilter[] = { RTM_IFINFO, RTM_IFANNOUNCE };
+
+if (setsockopt(routefd, PF_ROUTE, RO_MSGFILTER,
+    &rtfilter, sizeof(rtfilter)) == -1)
+	err(1, "setsockopt(RO_MSGFILTER)");
+.Ed
+.Pp
 If a route is in use when it is deleted,
 the routing entry will be marked down and removed from the routing table,
 but the resources associated with it will not
Index: sys/net/raw_cb.h
===================================================================
RCS file: /cvsroot/src/sys/net/raw_cb.h,v
retrieving revision 1.26
diff -u -p -r1.26 raw_cb.h
--- sys/net/raw_cb.h	20 Jan 2016 21:43:59 -0000	1.26
+++ sys/net/raw_cb.h	5 Apr 2017 16:44:08 -0000
@@ -46,6 +46,8 @@ struct rawcb {
 	struct	sockaddr *rcb_faddr;	/* destination address */
 	struct	sockaddr *rcb_laddr;	/* socket's address */
 	struct	sockproto rcb_proto;	/* protocol family, protocol */
+	int	(*rcb_filter)(struct mbuf *, struct sockproto *,
+		              struct rawcb *);
 	size_t	rcb_len;
 };
 
Index: sys/net/raw_usrreq.c
===================================================================
RCS file: /cvsroot/src/sys/net/raw_usrreq.c,v
retrieving revision 1.55
diff -u -p -r1.55 raw_usrreq.c
--- sys/net/raw_usrreq.c	20 Jan 2016 21:43:59 -0000	1.55
+++ sys/net/raw_usrreq.c	5 Apr 2017 16:44:08 -0000
@@ -107,6 +107,9 @@ raw_input(struct mbuf *m0, ...)
 			continue;
 		if (rp->rcb_faddr && !equal(rp->rcb_faddr, src))
 			continue;
+		/* Run any filtering that may have been installed. */
+		if (rp->rcb_filter != NULL && rp->rcb_filter(m, proto, rp) != 0)
+			continue;
 		if (last != NULL) {
 			struct mbuf *n;
 			if ((n = m_copy(m, 0, M_COPYALL)) == NULL)
Index: sys/net/route.h
===================================================================
RCS file: /cvsroot/src/sys/net/route.h,v
retrieving revision 1.111
diff -u -p -r1.111 route.h
--- sys/net/route.h	19 Dec 2016 11:17:00 -0000	1.111
+++ sys/net/route.h	5 Apr 2017 16:44:08 -0000
@@ -250,6 +250,11 @@ struct rt_msghdr {
 #define RTM_DELADDR	0x17	/* address being removed from iface */
 #define RTM_CHGADDR	0x18	/* address properties changed */
 
+/*
+ * setsockopt defines used for the filtering.
+ */
+#define	RO_MSGFILTER	1	/* array of which rtm_type to send to client */
+
 #define RTV_MTU		0x1	/* init or lock _mtu */
 #define RTV_HOPCOUNT	0x2	/* init or lock _hopcount */
 #define RTV_EXPIRE	0x4	/* init or lock _expire */
Index: sys/net/rtsock.c
===================================================================
RCS file: /cvsroot/src/sys/net/rtsock.c,v
retrieving revision 1.211
diff -u -p -r1.211 rtsock.c
--- sys/net/rtsock.c	24 Mar 2017 03:45:02 -0000	1.211
+++ sys/net/rtsock.c	5 Apr 2017 16:44:09 -0000
@@ -177,6 +177,12 @@ static void rt_adjustcount(int, int);
 
 static const struct protosw COMPATNAME(route_protosw)[];
 
+struct routecb {
+	struct rawcb	rocb_rcb;
+	unsigned int	rocb_msgfilter;
+};
+#define sotoroutecb(so)	((struct routecb *)(so)->so_pcb)
+
 static void
 rt_adjustcount(int af, int cnt)
 {
@@ -200,14 +206,48 @@ rt_adjustcount(int af, int cnt)
 }
 
 static int
+COMPATNAME(route_filter)(struct mbuf *m, struct sockproto *proto,
+    struct rawcb *rp)
+{
+	struct routecb *rop = (struct routecb *)rp;
+	struct rt_xmsghdr *rtm;
+
+	KASSERT(m != NULL);
+	KASSERT(proto != NULL);
+	KASSERT(rp != NULL);
+
+	/* Wrong family for this socket. */
+	if (proto->sp_family != PF_ROUTE)
+		return ENOPROTOOPT;
+
+	/* If no filter set, just return. */
+	if (rop->rocb_msgfilter == 0)
+		return 0;
+
+	/* Ensure we can access rtm_type */
+	if (m->m_len < offsetof(struct rt_xmsghdr, rtm_type) + 1)
+		return EINVAL;
+
+	rtm = mtod(m, struct rt_xmsghdr *);
+	/* If the rtm type is filtered out, return a positive. */
+	if (!(rop->rocb_msgfilter & (1U << rtm->rtm_type)))
+		return EEXIST;
+
+	/* Passed the filter. */
+	return 0;
+}
+
+static int
 COMPATNAME(route_attach)(struct socket *so, int proto)
 {
 	struct rawcb *rp;
+	struct routecb *rop;
 	int s, error;
 
 	KASSERT(sotorawcb(so) == NULL);
-	rp = kmem_zalloc(sizeof(*rp), KM_SLEEP);
-	rp->rcb_len = sizeof(*rp);
+	rop = kmem_zalloc(sizeof(*rop), KM_SLEEP);
+	rp = &rop->rocb_rcb;
+	rp->rcb_len = sizeof(*rop);
 	so->so_pcb = rp;
 
 	s = splsoftnet();
@@ -215,11 +255,12 @@ COMPATNAME(route_attach)(struct socket *
 		rt_adjustcount(rp->rcb_proto.sp_protocol, 1);
 		rp->rcb_laddr = &COMPATNAME(route_info).ri_src;
 		rp->rcb_faddr = &COMPATNAME(route_info).ri_dst;
+		rp->rcb_filter = COMPATNAME(route_filter);
 	}
 	splx(s);
 
 	if (error) {
-		kmem_free(rp, sizeof(*rp));
+		kmem_free(rop, sizeof(*rop));
 		so->so_pcb = NULL;
 		return error;
 	}
@@ -981,6 +1022,56 @@ out:
 	return error;
 }
 
+static int
+route_ctloutput(int op, struct socket *so, struct sockopt *sopt)
+{
+	struct routecb *rop = sotoroutecb(so);
+	int error = 0;
+	unsigned char *rtm_type;
+	size_t len;
+	unsigned int msgfilter;
+
+	KASSERT(solocked(so));
+
+	if (sopt->sopt_level != AF_ROUTE) {
+		error = ENOPROTOOPT;
+	} else switch (op) {
+	case PRCO_SETOPT:
+		switch (sopt->sopt_name) {
+		case RO_MSGFILTER:
+			msgfilter = 0;
+			for (rtm_type = sopt->sopt_data, len = sopt->sopt_size;
+			     len != 0;
+			     rtm_type++, len -= sizeof(*rtm_type))
+			{
+				/* Guard against overflowing our storage. */
+				if (*rtm_type >= sizeof(msgfilter) * CHAR_BIT) {
+					error = EOVERFLOW;
+					break;
+				}
+				msgfilter |= (1U << *rtm_type);
+			}
+			if (error == 0)
+				rop->rocb_msgfilter = msgfilter;
+			break;
+		default:
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+	case PRCO_GETOPT:
+		switch (sopt->sopt_name) {
+		case RO_MSGFILTER:
+			error = ENOTSUP;
+			break;
+		default:
+			error = ENOPROTOOPT;
+			break;
+		}
+	}
+	return error;
+}
+
 static void
 rt_setmetrics(int which, const struct rt_xmsghdr *in, struct rtentry *out)
 {
@@ -1946,6 +2037,7 @@ static const struct protosw COMPATNAME(r
 		.pr_flags = PR_ATOMIC|PR_ADDR,
 		.pr_input = raw_input,
 		.pr_ctlinput = raw_ctlinput,
+		.pr_ctloutput = route_ctloutput,
 		.pr_usrreqs = &route_usrreqs,
 		.pr_init = raw_init,
 	},


Home | Main Index | Thread Index | Old Index