Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys Stop using rt_gwroute on packet sending paths



details:   https://anonhg.NetBSD.org/src/rev/b1be312200af
branches:  trunk
changeset: 344889:b1be312200af
user:      ozaki-r <ozaki-r%NetBSD.org@localhost>
date:      Tue Apr 26 09:30:01 2016 +0000

description:
Stop using rt_gwroute on packet sending paths

rt_gwroute of rtentry is a reference to a rtentry of the gateway
for a rtentry with RTF_GATEWAY. That was used by L2 (arp and ndp)
to look up L2 addresses. By separating L2 nexthop caches, we don't
need a route for the purpose and we can stop using rt_gwroute.
By doing so, we can reduce referencing and modifying rtentries,
which makes it easy to apply a lock (and/or psref) to the
routing table and rtentries.

One issue to do this is to keep RTF_REJECT behavior. It seems it
was broken when we moved rtalloc1 things from L2 output routines
(e.g., ether_output) to ip_hresolv_output, but (fortunately?)
it works unexpectedly. What we mistook are:
- RTF_REJECT was checked for any routes in L2 output routines,
  but in ip_hresolv_output it is checked only when the route
  is RTF_GATEWAY
- The RTF_REJECT check wasn't copied to IPv6 (nd6_output)

It seems that rt_gwroute checks hid the mistakes and it looked
work (unexpectedly) and removing rt_gwroute checks unveil the
issue. So we need to fix RTF_REJECT checks in ip_hresolv_output
and also add them to nd6_output.

One more point we have to care is returning an errno; we need
to mimic looutput behavior. Originally RTF_REJECT check was
done either in L2 output routines or in looutput. The latter is
applied when a reject route directs to a loopback interface.
However, now RTF_REJECT check is done before looutput so to keep
the original behavior we need to return an errno which looutput
chooses. Added rt_check_reject_route does such tweaks.

diffstat:

 sys/net/if_mpls.c        |    6 +-
 sys/net/route.c          |   22 +++++++-
 sys/net/route.h          |   20 +------
 sys/netinet/in_offload.c |    6 +-
 sys/netinet/ip_output.c  |  130 ++++++++--------------------------------------
 sys/netinet/ip_var.h     |    4 +-
 sys/netinet6/nd6.c       |  119 +++++++++++++------------------------------
 7 files changed, 90 insertions(+), 217 deletions(-)

diffs (truncated from 500 to 300 lines):

diff -r 6bc073e21d98 -r b1be312200af sys/net/if_mpls.c
--- a/sys/net/if_mpls.c Tue Apr 26 09:24:57 2016 +0000
+++ b/sys/net/if_mpls.c Tue Apr 26 09:30:01 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: if_mpls.c,v 1.20 2016/02/09 08:32:12 ozaki-r Exp $ */
+/*     $NetBSD: if_mpls.c,v 1.21 2016/04/26 09:30:01 ozaki-r Exp $ */
 
 /*
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.20 2016/02/09 08:32:12 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.21 2016/04/26 09:30:01 ozaki-r Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_inet.h"
@@ -473,7 +473,7 @@
        case IFT_TUNNEL:
        case IFT_LOOP:
 #ifdef INET
-               ret = ip_hresolv_output(ifp, m, rt->rt_gateway, rt);
+               ret = ip_if_output(ifp, m, rt->rt_gateway, rt);
 #else
                KERNEL_LOCK(1, NULL);
                ret =  (*ifp->if_output)(ifp, m, rt->rt_gateway, rt);
diff -r 6bc073e21d98 -r b1be312200af sys/net/route.c
--- a/sys/net/route.c   Tue Apr 26 09:24:57 2016 +0000
+++ b/sys/net/route.c   Tue Apr 26 09:30:01 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: route.c,v 1.164 2016/04/25 14:38:08 ozaki-r Exp $      */
+/*     $NetBSD: route.c,v 1.165 2016/04/26 09:30:01 ozaki-r Exp $      */
 
 /*-
  * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
@@ -96,7 +96,7 @@
 #endif
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: route.c,v 1.164 2016/04/25 14:38:08 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: route.c,v 1.165 2016/04/26 09:30:01 ozaki-r Exp $");
 
 #include <sys/param.h>
 #ifdef RTFLUSH_DEBUG
@@ -1506,6 +1506,24 @@
        return rt->rt_tag;
 }
 
+int
+rt_check_reject_route(struct rtentry *rt, struct ifnet *ifp)
+{
+
+       if ((rt->rt_flags & RTF_REJECT) != 0) {
+               /* Mimic looutput */
+               if (ifp->if_flags & IFF_LOOPBACK)
+                       return (rt->rt_flags & RTF_HOST) ?
+                           EHOSTUNREACH : ENETUNREACH;
+               else if (rt->rt_rmx.rmx_expire == 0 ||
+                   time_uptime < rt->rt_rmx.rmx_expire)
+                       return (rt->rt_flags & RTF_GATEWAY) ?
+                           EHOSTUNREACH : EHOSTDOWN;
+       }
+
+       return 0;
+}
+
 #ifdef DDB
 
 #include <machine/db_machdep.h>
diff -r 6bc073e21d98 -r b1be312200af sys/net/route.h
--- a/sys/net/route.h   Tue Apr 26 09:24:57 2016 +0000
+++ b/sys/net/route.h   Tue Apr 26 09:30:01 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: route.h,v 1.99 2016/04/11 09:21:18 ozaki-r Exp $       */
+/*     $NetBSD: route.h,v 1.100 2016/04/26 09:30:01 ozaki-r Exp $      */
 
 /*
  * Copyright (c) 1980, 1986, 1993
@@ -405,23 +405,7 @@
 struct sockaddr *
        rt_gettag(struct rtentry *);
 
-static inline struct rtentry *
-rt_get_gwroute(struct rtentry *rt)
-{
-       if (rt->rt_gwroute == NULL)
-               return NULL;
-       rt->rt_gwroute->rt_refcnt++;
-       return rt->rt_gwroute;
-}
-
-static inline void
-rt_set_gwroute(struct rtentry *rt, struct rtentry *gwrt)
-{
-
-       rt->rt_gwroute = gwrt;
-       if (rt->rt_gwroute != NULL)
-               rt->rt_gwroute->rt_refcnt++;
-}
+int    rt_check_reject_route(struct rtentry *, struct ifnet *);
 
 static inline void
 rt_assert_referenced(const struct rtentry *rt)
diff -r 6bc073e21d98 -r b1be312200af sys/netinet/in_offload.c
--- a/sys/netinet/in_offload.c  Tue Apr 26 09:24:57 2016 +0000
+++ b/sys/netinet/in_offload.c  Tue Apr 26 09:30:01 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: in_offload.c,v 1.6 2015/06/04 09:20:00 ozaki-r Exp $   */
+/*     $NetBSD: in_offload.c,v 1.7 2016/04/26 09:30:01 ozaki-r Exp $   */
 
 /*-
  * Copyright (c)2005, 2006 YAMAMOTO Takashi,
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in_offload.c,v 1.6 2015/06/04 09:20:00 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in_offload.c,v 1.7 2016/04/26 09:30:01 ozaki-r Exp $");
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
@@ -55,7 +55,7 @@
        struct ip_tso_output_args *args = vp;
        struct ifnet *ifp = args->ifp;
 
-       return ip_hresolv_output(ifp, m, args->sa, args->rt);
+       return ip_if_output(ifp, m, args->sa, args->rt);
 }
 
 int
diff -r 6bc073e21d98 -r b1be312200af sys/netinet/ip_output.c
--- a/sys/netinet/ip_output.c   Tue Apr 26 09:24:57 2016 +0000
+++ b/sys/netinet/ip_output.c   Tue Apr 26 09:30:01 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: ip_output.c,v 1.251 2016/04/19 09:36:35 ozaki-r Exp $  */
+/*     $NetBSD: ip_output.c,v 1.252 2016/04/26 09:30:01 ozaki-r Exp $  */
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -91,7 +91,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.251 2016/04/19 09:36:35 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.252 2016/04/26 09:30:01 ozaki-r Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_inet.h"
@@ -157,44 +157,6 @@
 
 int    ip_do_loopback_cksum = 0;
 
-static bool
-ip_hresolv_needed(const struct ifnet * const ifp)
-{
-       switch (ifp->if_type) {
-       case IFT_ARCNET:
-       case IFT_ATM:
-       case IFT_ECONET:
-       case IFT_ETHER:
-       case IFT_FDDI:
-       case IFT_HIPPI:
-       case IFT_IEEE1394:
-       case IFT_ISO88025:
-       case IFT_SLIP:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static int
-klock_if_output(struct ifnet * const ifp, struct mbuf * const m,
-    const struct sockaddr * const dst, struct rtentry *rt)
-{
-       int error;
-
-#ifndef NET_MPSAFE
-       KERNEL_LOCK(1, NULL);
-#endif
-
-       error = (*ifp->if_output)(ifp, m, dst, rt);
-
-#ifndef NET_MPSAFE
-       KERNEL_UNLOCK_ONE(NULL);
-#endif
-
-       return error;
-}
-
 static int
 ip_mark_mpls(struct ifnet * const ifp, struct mbuf * const m, struct rtentry *rt)
 {
@@ -228,81 +190,37 @@
 
 /*
  * Send an IP packet to a host.
- *
- * If necessary, resolve the arbitrary IP route, rt0, to an IP host route before
- * calling ifp's output routine.
  */
 int
-ip_hresolv_output(struct ifnet * const ifp, struct mbuf * const m,
-    const struct sockaddr * const dst, struct rtentry *rt0)
+ip_if_output(struct ifnet * const ifp, struct mbuf * const m,
+    const struct sockaddr * const dst, struct rtentry *rt)
 {
        int error = 0;
-       struct rtentry *rt = rt0, *gwrt;
 
-#define RTFREE_IF_NEEDED(_rt) \
-       if ((_rt) != NULL && (_rt) != rt0) \
-               rtfree((_rt));
-
-       if (!ip_hresolv_needed(ifp))
-               goto out;
-
-       if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) == 0)
-               goto out;
-
-       gwrt = rt_get_gwroute(rt);
-       RTFREE_IF_NEEDED(rt);
-       rt = gwrt;
-       if (rt == NULL || (rt->rt_flags & RTF_UP) == 0) {
-               if (rt != NULL) {
-                       RTFREE_IF_NEEDED(rt);
-                       rt = rt0;
-               }
-               if (rt == NULL) {
-                       error = EHOSTUNREACH;
-                       goto bad;
-               }
-               gwrt = rtalloc1(rt->rt_gateway, 1);
-               rt_set_gwroute(rt, gwrt);
-               RTFREE_IF_NEEDED(rt);
-               rt = gwrt;
-               if (rt == NULL) {
-                       error = EHOSTUNREACH;
-                       goto bad;
-               }
-               /* the "G" test below also prevents rt == rt0 */
-               if ((rt->rt_flags & RTF_GATEWAY) != 0 || rt->rt_ifp != ifp) {
-                       if (rt0->rt_gwroute != NULL)
-                               rtfree(rt0->rt_gwroute);
-                       rt0->rt_gwroute = NULL;
-                       error = EHOSTUNREACH;
-                       goto bad;
-               }
-       }
-       if ((rt->rt_flags & RTF_REJECT) != 0) {
-               if (rt->rt_rmx.rmx_expire == 0 ||
-                   time_uptime < rt->rt_rmx.rmx_expire) {
-                       error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
-                       goto bad;
+       if (rt != NULL) {
+               error = rt_check_reject_route(rt, ifp);
+               if (error != 0) {
+                       m_freem(m);
+                       return error;
                }
        }
 
-out:
-       error = ip_mark_mpls(ifp, m, rt0);
-       if (error != 0)
-               goto bad;
-
-       error = klock_if_output(ifp, m, dst, rt);
-       goto exit;
+       error = ip_mark_mpls(ifp, m, rt);
+       if (error != 0) {
+               m_freem(m);
+               return error;
+       }
 
-bad:
-       if (m != NULL)
-               m_freem(m);
-exit:
-       RTFREE_IF_NEEDED(rt);
+#ifndef NET_MPSAFE
+       KERNEL_LOCK(1, NULL);
+#endif
 
-       return error;
+       error = (*ifp->if_output)(ifp, m, dst, rt);
 
-#undef RTFREE_IF_NEEDED
+#ifndef NET_MPSAFE
+       KERNEL_UNLOCK_ONE(NULL);
+#endif
+       return error;
 }
 
 /*
@@ -715,7 +633,7 @@
                if (__predict_true(
                    (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0 ||
                    (ifp->if_capenable & IFCAP_TSOv4) != 0)) {



Home | Main Index | Thread Index | Old Index