Subject: independant setting of ifp/ifa on routes
To: None <tech-net@netbsd.org>
From: Bill Sommerfeld <sommerfeld@orchard.arlington.ma.us>
List: tech-net
Date: 06/25/1999 10:14:44
Last night I coded up some changes to the networking code to allow you
to independently set the interface address and interface pointer of
routes being added through the routing socket.

A common hack used with OSPF is for routers to have their OSPF "router
id" (their primary IP address) independant of any of their interfaces;
it gets added as an alias on a loopback interface, and distributed
through the OSPF cloud as a host route.

This sort of thing is also used for critical servers; for instance,
"ftp.netbsd.org" resolves to an address which is on the FTP server's
lo0 address.  One problem with this in the our current stack is that
unconnected traffic as well as connected traffic originating from the
system uses an address from one of the physical interfaces rater than
system "primary" address from the loopback... so connections inbound
to the system are independant of the topology used to connect it, but
not so for outbound connections.

It turns out that this is astoundingly easy to fix.

Routes have both an if-address pointer and a interface pointer.

Our code chooses which address to use on outgoing traffic by looking
up a route to the destination, and then using the address from
rt->rt_ifa, and then sending it out the interface pointed at by
rt->rt_ifp.

So, if somehow, rt->rt_ifa got set to the primary address, the "right
thing" would happen with respect to choosing the source address.  This
does mean that an invariant was broken: (formerly, rt->rt_ifa->ifa_ifp
== rt->rt_ifp); however, on a quick glance, nothing seems to depend on
this, and ifaddrs are independantly refcounted.

Now, amazingly enough, the "route add" command already takes "-ifa"
and "-ifp" values.  These make it as far as route_output() in
sys/net/rtsock.c, where they are discarded.  Anyhow, route_output()
calls rtrequest(RTM_ADD, ...); rtrequest() then looks up the ifa/ifp
based on the gateway and destination of the new route.

If we add two new (struct sockaddr *) parameters to rtrequest, and
have rtsock pass them down, we can allow the user to specify ifa/ifp
independently.

A minor additional change (not yet done) would allow you to specify an
interface name rather than an IP address for the "-ifp" option (it
would be passed down in a sockaddr_dl).

Here's the core part of the change; I've omitted the ~twenty other
files where I just added two more NULL pointers as parameters to
rtrequest().

Comments/thoughts?

					- Bill

Index: route.h
===================================================================
RCS file: /cvsroot/syssrc/sys/net/route.h,v
retrieving revision 1.17
diff -u -r1.17 route.h
--- route.h	1998/12/27 18:27:48	1.17
+++ route.h	1999/06/25 14:03:37
@@ -303,7 +303,8 @@
 int	 rtioctl __P((u_long, caddr_t, struct proc *));
 void	 rtredirect __P((struct sockaddr *, struct sockaddr *,
 	    struct sockaddr *, int, struct sockaddr *, struct rtentry **));
-int	 rtrequest __P((int, struct sockaddr *,
-	    struct sockaddr *, struct sockaddr *, int, struct rtentry **));
+int	 rtrequest __P((int, struct sockaddr *, struct sockaddr *,
+	    struct sockaddr *, struct sockaddr *, struct sockaddr *,
+            int, struct rtentry **));
 #endif /* _KERNEL */
 #endif /* _NET_ROUTE_H_ */
Index: route.c
===================================================================
RCS file: /cvsroot/syssrc/sys/net/route.c,v
retrieving revision 1.24
diff -u -r1.24 route.c
--- route.c	1998/12/27 18:27:48	1.24
+++ route.c	1999/06/25 14:03:37
@@ -155,8 +155,8 @@
 	    ((rn->rn_flags & RNF_ROOT) == 0)) {
 		newrt = rt = (struct rtentry *)rn;
 		if (report && (rt->rt_flags & RTF_CLONING)) {
-			err = rtrequest(RTM_RESOLVE, dst, SA(0),
-					      SA(0), 0, &newrt);
+			err = rtrequest(RTM_RESOLVE, dst, SA(0), SA(0),
+					      SA(0), SA(0), 0, &newrt);
 			if (err) {
 				newrt = rt;
 				rt->rt_refcnt++;
@@ -278,7 +278,7 @@
 		create:
 			flags |=  RTF_GATEWAY | RTF_DYNAMIC;
 			error = rtrequest((int)RTM_ADD, dst, gateway,
-				    netmask, flags,
+				    netmask, SA(0), SA(0), flags,
 				    (struct rtentry **)0);
 			stat = &rtstat.rts_dynamic;
 		} else {
@@ -371,12 +371,35 @@
 	return (ifa);
 }
 
+/*
+ * find the interface we'd use to talk to "gateway"
+ */
+struct ifnet *
+ifp_ifwithroute (flags, dst, gateway)
+	int flags;
+	struct sockaddr *dst, *gateway;
+{
+	struct rtentry *rt;
+
+	rt = rtalloc1(gateway, 0);
+	if (rt == 0)
+		rt = rtalloc1(dst, 0);
+
+	if (rt == 0)
+		return 0;
+
+	rt->rt_refcnt--;
+	return rt->rt_ifp;
+}
+
+	
+
 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
 
 int
-rtrequest(req, dst, gateway, netmask, flags, ret_nrt)
+rtrequest(req, dst, gateway, netmask, ifaaddr, ifpaddr, flags, ret_nrt)
 	int req, flags;
-	struct sockaddr *dst, *gateway, *netmask;
+	struct sockaddr *dst, *gateway, *netmask, *ifaaddr, *ifpaddr;
 	struct rtentry **ret_nrt;
 {
 	int s = splsoftnet(); int error = 0;
@@ -384,6 +407,7 @@
 	register struct radix_node *rn;
 	register struct radix_node_head *rnh;
 	struct ifaddr *ifa;
+	struct ifnet *ifp;
 	struct sockaddr *ndst;
 #define senderr(x) { error = x ; goto bad; }
 
@@ -418,6 +442,7 @@
 		if (ret_nrt == 0 || (rt = *ret_nrt) == 0)
 			senderr(EINVAL);
 		ifa = rt->rt_ifa;
+		ifp = rt->rt_ifp;
 		flags = rt->rt_flags & ~RTF_CLONING;
 		gateway = rt->rt_gateway;
 		if ((netmask = rt->rt_genmask) == 0)
@@ -425,8 +450,35 @@
 		goto makeroute;
 
 	case RTM_ADD:
-		if ((ifa = ifa_ifwithroute(flags, dst, gateway)) == 0)
+		/*
+		 * ifa/ifp selection:
+		 *
+		 * if ifp specified, use that to identify the interface.
+		 * otherwise use ifa_ifwithroute to pick it.
+		 *
+		 * if ifa specified, use that as ifa, otherwise use
+		 * ifa_withroute to pick one.
+		 *
+		 * Note that ifa doesn't have to be on ifp's interface.
+		 */
+		if (ifaaddr)
+			ifa = ifa_ifwithaddr(ifaaddr);
+		else
+			ifa = ifa_ifwithroute(flags, dst, gateway);
+
+		if (!ifa)
 			senderr(ENETUNREACH);
+
+		if (ifpaddr)
+			ifp = ifp_ifwithaddr(ifpaddr);
+		else if (!ifaaddr)
+			ifp = ifa->ifa_ifp;
+		else 
+			ifp = ifp_ifwithroute(flags, dst, gateway);
+		 
+		if (!ifp)
+			senderr(ENETUNREACH);
+
 	makeroute:
 		rt = pool_get(&rtentry_pool, PR_NOWAIT);
 		if (rt == 0)
@@ -454,7 +506,7 @@
 		}
 		ifa->ifa_refcnt++;
 		rt->rt_ifa = ifa;
-		rt->rt_ifp = ifa->ifa_ifp;
+		rt->rt_ifp = ifp;
 		if (req == RTM_RESOLVE)
 			rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
 		if (ifa->ifa_rtrequest)
@@ -559,7 +611,7 @@
 		}
 	}
 	error = rtrequest(cmd, dst, ifa->ifa_addr, ifa->ifa_netmask,
-			flags | ifa->ifa_flags, &nrt);
+	    SA(0), SA(0), flags | ifa->ifa_flags, &nrt);
 	if (m)
 		(void) m_free(m);
 	if (cmd == RTM_DELETE && error == 0 && (rt = nrt)) {
@@ -606,7 +658,7 @@
 	} else {						\
 		rtrequest((int) RTM_DELETE,			\
 			  (struct sockaddr *)rt_key(r->rtt_rt),	\
-			  0, 0, 0, 0);				\
+			  0, 0, 0, 0, 0, 0);			\
 	}							\
 }

Index: rtsock.c
===================================================================
RCS file: /cvsroot/syssrc/sys/net/rtsock.c,v
retrieving revision 1.29
diff -u -r1.29 rtsock.c
--- rtsock.c	1999/04/02 17:22:21	1.29
+++ rtsock.c	1999/06/25 14:05:36
@@ -234,6 +234,8 @@
 		if (gate == 0)
 			senderr(EINVAL);
 		error = rtrequest(RTM_ADD, dst, gate, netmask,
+		    (rtm->rtm_addrs & RTA_IFA) ? ifaaddr : 0,
+		    (rtm->rtm_addrs & RTA_IFP) ? ifpaddr : 0,		    
 		    rtm->rtm_flags, &saved_nrt);
 		if (error == 0 && saved_nrt) {
 			rt_setmetrics(rtm->rtm_inits,
@@ -244,7 +246,7 @@
 		break;
 
 	case RTM_DELETE:
-		error = rtrequest(RTM_DELETE, dst, gate, netmask,
+		error = rtrequest(RTM_DELETE, dst, gate, netmask, 0, 0,
 		    rtm->rtm_flags, &saved_nrt);
 		if (error == 0) {
 			(rt = saved_nrt)->rt_refcnt++;



Index: if.c
===================================================================
RCS file: /cvsroot/syssrc/sys/net/if.c,v
retrieving revision 1.48
diff -u -r1.48 if.c
--- if.c	1998/12/10 15:10:48	1.48
+++ if.c	1999/06/25 14:04:39
@@ -162,6 +162,33 @@
 	}
 	return ((struct ifaddr *)0);
 }
+
+/*
+ * Locate an interface based on a complete interface address.
+ */
+/*ARGSUSED*/
+struct ifnet *
+ifp_ifwithaddr(addr)
+	register struct sockaddr *addr;
+{
+	register struct ifnet *ifp;
+	register struct ifaddr *ifa;
+
+#define	equal(a1, a2) \
+  (bcmp((caddr_t)(a1), (caddr_t)(a2), ((struct sockaddr *)(a1))->sa_len) == 0)
+	for (ifp = ifnet.tqh_first; ifp != 0; ifp = ifp->if_list.tqe_next)
+	    for (ifa = ifp->if_addrlist.tqh_first; ifa != 0; ifa = ifa->ifa_list.tqe_next) {
+		if (ifa->ifa_addr->sa_family != addr->sa_family)
+			continue;
+		if (equal(addr, ifa->ifa_addr))
+			return (ifp);
+		if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr &&
+		    equal(ifa->ifa_broadaddr, addr))
+			return (ifp);
+	}
+	return ((struct ifnet *)0);
+}
+
 /*
  * Locate the point to point interface with a given destination address.
  */