Subject: Splitting ip{,6}_output
To: None <tech-net@netbsd.org>
From: DEGROOTE Arnaud <degroote@NetBSD.org>
List: tech-net
Date: 03/02/2007 22:46:04
--+QahgC5+KEYLbs62
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
In order to better integrate the fast_ipsec with our ipv{4,6} processing,
we need to make some changes to the current way to deal in the output
processing. Currently, we do something like that
ip6_output calls ipsec6_process_packet which process the ipsec
transformation on an asynchronous way. When it has finished,
ipsec_process_done is called and the packet is reinjected in ip6_output
with dummy arguments.
There is two problems here :
- we lose the current argument of ip6_output ( all the options in
particulary )
- we process some things that we already have processed on the first
pass
The situation is quite the same on the v4 side, maybe worse because when we
call ipsec4_process_packet, we have already process most of the ip_output
function.
The solution I have previously proposed for the v6 side, during the SoC is
to split the stack in two ip6_output and ip6_output2, and the
ipsec_process_done will callback in the ip6 output code with ip6_output2.
I propose to do the same thing for the netinet stack. I don't expect
any performance decrease.
You can find the both patchs attached.
Can people comment it ? I'm interested too if you have any better ideas.
If nobody has strong objections, I will commit it next week.
Take cares
--
Arnaud Degroote
--+QahgC5+KEYLbs62
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="ip6_output.c.diff"
Index: ip6_output.c
===================================================================
RCS file: /cvsroot/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.116
diff -u -r1.116 ip6_output.c
--- ip6_output.c 21 Feb 2007 23:00:08 -0000 1.116
+++ ip6_output.c 2 Mar 2007 22:37:49 -0000
@@ -141,6 +141,10 @@
const struct in6_addr *, u_long *, int *);
static int copypktopts __P((struct ip6_pktopts *, struct ip6_pktopts *, int));
+static int ip6_output2 (struct mbuf *, struct ip6_pktopts *,
+ struct route_in6 *, int, struct ip6_moptions*, struct socket *,
+ struct ifnet **, void *, struct ip6_exthdrs *, struct in6_addr *);
+
#ifdef RFC2292
static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
struct socket *));
@@ -173,23 +177,12 @@
struct ifnet **ifpp /* XXX: just for statistics */
)
{
- struct ip6_hdr *ip6, *mhip6;
- struct ifnet *ifp, *origifp;
+ struct ip6_hdr *ip6;
struct mbuf *m = m0;
- int hlen, tlen, len, off;
- bool tso;
- struct route_in6 ip6route;
- struct rtentry *rt = NULL;
- struct sockaddr_in6 *dst, src_sa, dst_sa;
int error = 0;
- struct in6_ifaddr *ia = NULL;
- u_long mtu;
- int alwaysfrag, dontfrag;
- u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
+ u_int32_t optlen = 0, plen = 0;
struct ip6_exthdrs exthdrs;
- struct in6_addr finaldst, src0, dst0;
- u_int32_t zone;
- struct route_in6 *ro_pmtu = NULL;
+ struct in6_addr finaldst;
int hdrsplit = 0;
int needipsec = 0;
#ifdef IPSEC
@@ -312,7 +305,6 @@
if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
- unfragpartlen = optlen + sizeof(struct ip6_hdr);
/* NOTE: we don't add AH/ESP length here. do that later. */
if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
@@ -564,18 +556,6 @@
ip6stat.ip6s_localout++;
- /*
- * Route packet.
- */
- /* initialize cached route */
- if (ro == NULL) {
- memset(&ip6route, 0, sizeof(ip6route));
- ro = &ip6route;
- }
- ro_pmtu = ro;
- if (opt && opt->ip6po_rthdr)
- ro = &opt->ip6po_route;
- dst = (struct sockaddr_in6 *)&ro->ro_dst;
/*
* if specified, try to fill in the traffic class field.
@@ -603,8 +583,105 @@
ip6->ip6_hlim = ip6_defmcasthlim;
}
+#ifdef FAST_IPSEC
+ if (needipsec) {
+ s = splsoftnet();
+ error = ipsec6_process_packet(m,sp->req);
+
+ /*
+ * Preserve KAME behaviour: ENOENT can be returned
+ * when an SA acquire is in progress. Don't propagate
+ * this to user-level; it confuses applications.
+ * XXX this will go away when the SADB is redone.
+ */
+ if (error == ENOENT)
+ error = 0;
+ splx(s);
+ goto done;
+ }
+#endif /* FAST_IPSEC */
+
+#ifdef IPSEC
+ if (needipsectun)
+ error = ip6_output2(m, opt, ro, flags, im6o, so,
+ ifpp, sp, &exthdrs, &finaldst);
+ else
+#endif
+ error = ip6_output2(m, opt, ro, flags, im6o, so,
+ ifpp, NULL, &exthdrs, &finaldst);
+
+done:
+#ifdef IPSEC
+ if (sp != NULL)
+ key_freesp(sp);
+#endif /* IPSEC */
+#ifdef FAST_IPSEC
+ if (sp != NULL)
+ KEY_FREESP(&sp);
+#endif /* FAST_IPSEC */
+
+ return (error);
+
+freehdrs:
+ m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */
+ m_freem(exthdrs.ip6e_dest1);
+ m_freem(exthdrs.ip6e_rthdr);
+ m_freem(exthdrs.ip6e_dest2);
+ /* FALLTHROUGH */
+bad:
+ m_freem(m);
+ goto done;
+}
+
+static int
+ip6_output2(
+ struct mbuf *m0,
+ struct ip6_pktopts *opt,
+ struct route_in6 *ro,
+ int flags,
+ struct ip6_moptions *im6o,
+ struct socket *so,
+ struct ifnet **ifpp, /* XXX: just for statistics */
+ void * spp,
+ struct ip6_exthdrs * exthdrs,
+ struct in6_addr * finaldst)
+{
+ struct ip6_hdr *ip6, *mhip6;
+ struct ifnet *ifp, *origifp;
+ struct mbuf *m = m0;
+ int hlen, tlen, len, off;
+ bool tso;
+ struct route_in6 ip6route;
+ struct rtentry *rt = NULL;
+ struct sockaddr_in6 *dst, src_sa, dst_sa;
+ int error = 0;
+ struct in6_ifaddr *ia = NULL;
+ u_long mtu;
+ int alwaysfrag, dontfrag;
+ u_int32_t unfragpartlen = 0;
+ struct in6_addr src0, dst0;
+ u_int32_t zone;
+ struct route_in6 *ro_pmtu = NULL;
+#ifdef IPSEC
+ struct secpolicy * sp = spp;
+#endif
+
+
+ /*
+ * Route packet.
+ */
+ /* initialize cached route */
+ if (ro == NULL) {
+ memset(&ip6route, 0, sizeof(ip6route));
+ ro = &ip6route;
+ }
+ ro_pmtu = ro;
+ if (opt && opt->ip6po_rthdr)
+ ro = &opt->ip6po_route;
+ dst = (struct sockaddr_in6 *)&ro->ro_dst;
+
#ifdef IPSEC
- if (needipsec && needipsectun) {
+ if (sp) {
struct ipsec_output_state state;
/*
@@ -615,8 +692,8 @@
*
* IPv6 [ESP|AH] IPv6 [extension headers] payload
*/
- bzero(&exthdrs, sizeof(exthdrs));
- exthdrs.ip6e_ip6 = m;
+ bzero(exthdrs, sizeof(*exthdrs));
+ exthdrs->ip6e_ip6 = m;
bzero(&state, sizeof(state));
state.m = m;
@@ -650,28 +727,9 @@
goto bad;
}
- exthdrs.ip6e_ip6 = m;
+ exthdrs->ip6e_ip6 = m;
}
#endif /* IPSEC */
-#ifdef FAST_IPSEC
- if (needipsec) {
- s = splsoftnet();
- error = ipsec6_process_packet(m,sp->req);
-
- /*
- * Preserve KAME behaviour: ENOENT can be returned
- * when an SA acquire is in progress. Don't propagate
- * this to user-level; it confuses applications.
- * XXX this will go away when the SADB is redone.
- */
- if (error == ENOENT)
- error = 0;
- splx(s);
- goto done;
- }
-#endif /* FAST_IPSEC */
-
-
/* adjust pointer */
ip6 = mtod(m, struct ip6_hdr *);
@@ -842,11 +900,11 @@
*ifpp = ifp;
/* Determine path MTU. */
- if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
+ if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, finaldst, &mtu,
&alwaysfrag)) != 0)
goto bad;
#ifdef IPSEC
- if (needipsectun)
+ if (sp)
mtu = IPV6_MMTU;
#endif
@@ -886,8 +944,8 @@
* it must be examined and processed even by the source node.
* (RFC 2460, section 4.)
*/
- if (exthdrs.ip6e_hbh) {
- struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
+ if (exthdrs->ip6e_hbh) {
+ struct ip6_hbh *hbh = mtod(exthdrs->ip6e_hbh, struct ip6_hbh *);
u_int32_t dummy1; /* XXX unused */
u_int32_t dummy2; /* XXX unused */
@@ -1038,6 +1096,11 @@
u_int32_t mtu32;
#endif
+ unfragpartlen = sizeof(struct ip6_hdr);
+ if (exthdrs->ip6e_hbh) unfragpartlen += exthdrs->ip6e_hbh->m_len;
+ if (exthdrs->ip6e_dest1) unfragpartlen += exthdrs->ip6e_dest1->m_len;
+ if (exthdrs->ip6e_rthdr) unfragpartlen += exthdrs->ip6e_rthdr->m_len;
+
/*
* Too large for the destination or interface;
* fragment if possible.
@@ -1079,15 +1142,15 @@
* Change the next header field of the last header in the
* unfragmentable part.
*/
- if (exthdrs.ip6e_rthdr) {
- nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
- *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
- } else if (exthdrs.ip6e_dest1) {
- nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
- *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
- } else if (exthdrs.ip6e_hbh) {
- nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
- *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
+ if (exthdrs->ip6e_rthdr) {
+ nextproto = *mtod(exthdrs->ip6e_rthdr, u_char *);
+ *mtod(exthdrs->ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
+ } else if (exthdrs->ip6e_dest1) {
+ nextproto = *mtod(exthdrs->ip6e_dest1, u_char *);
+ *mtod(exthdrs->ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
+ } else if (exthdrs->ip6e_hbh) {
+ nextproto = *mtod(exthdrs->ip6e_hbh, u_char *);
+ *mtod(exthdrs->ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
} else {
nextproto = ip6->ip6_nxt;
ip6->ip6_nxt = IPPROTO_FRAGMENT;
@@ -1192,34 +1255,17 @@
if (error == 0)
ip6stat.ip6s_fragmented++;
+bad:
+ m_freem(m);
done:
+
/* XXX Second if is invariant? */
if (ro == &ip6route)
rtcache_free((struct route *)ro);
else if (ro_pmtu == &ip6route)
rtcache_free((struct route *)ro_pmtu);
-#ifdef IPSEC
- if (sp != NULL)
- key_freesp(sp);
-#endif /* IPSEC */
-#ifdef FAST_IPSEC
- if (sp != NULL)
- KEY_FREESP(&sp);
-#endif /* FAST_IPSEC */
-
-
- return (error);
-
-freehdrs:
- m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */
- m_freem(exthdrs.ip6e_dest1);
- m_freem(exthdrs.ip6e_rthdr);
- m_freem(exthdrs.ip6e_dest2);
- /* FALLTHROUGH */
-bad:
- m_freem(m);
- goto done;
+ return error;
}
static int
--+QahgC5+KEYLbs62
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="ip_output.c.diff"
Index: ip_output.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/ip_output.c,v
retrieving revision 1.178
diff -u -r1.178 ip_output.c
--- ip_output.c 17 Feb 2007 22:34:11 -0000 1.178
+++ ip_output.c 28 Feb 2007 22:13:02 -0000
@@ -153,6 +153,20 @@
#include <netinet/udp.h>
#endif
+struct ip_output_args {
+ struct mbuf * opt;
+ struct ip_moptions * imo;
+ struct socket * so;
+ int flags;
+ int * mtu_p;
+#ifdef IPSEC_NAT_T
+ int natt_frag;
+#endif
+};
+
+static int ip_output2(struct mbuf *, struct ifnet *, struct sockaddr_in *,
+ struct route *, u_long, struct ip_output_args *);
+
static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
static struct ifnet *ip_multicast_if(struct in_addr *, int *);
static void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *);
@@ -190,12 +204,13 @@
struct ifaddr *xifa;
struct mbuf *opt;
struct route *ro;
- int flags, sw_csum;
+ int flags;
int *mtu_p;
u_long mtu;
struct ip_moptions *imo;
struct socket *so;
va_list ap;
+ struct ip_output_args args;
#ifdef IPSEC_NAT_T
int natt_frag = 0;
#endif
@@ -209,7 +224,6 @@
struct tdb_ident *tdbi;
int s;
#endif
- u_int16_t ip_len;
len = 0;
va_start(ap, m0);
@@ -281,10 +295,9 @@
}
ip->ip_hl = hlen >> 2;
ipstat.ips_localout++;
- } else {
- hlen = ip->ip_hl << 2;
}
- /*
+
+ /*
* Route packet.
*/
memset(&iproute, 0, sizeof(iproute));
@@ -507,9 +520,6 @@
(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
ip->ip_off |= htons(IP_DF);
- /* Remember the current ip_len */
- ip_len = ntohs(ip->ip_len);
-
#ifdef IPSEC
/* get SP for this packet */
if (so == NULL)
@@ -639,11 +649,6 @@
goto bad;
}
- /* be sure to update variables that are affected by ipsec4_output() */
- ip = mtod(m, struct ip *);
- hlen = ip->ip_hl << 2;
- ip_len = ntohs(ip->ip_len);
-
if (ro->ro_rt == NULL) {
if ((flags & IP_ROUTETOIF) == 0) {
printf("ip_output: "
@@ -806,6 +811,49 @@
}
spd_done:
#endif /* FAST_IPSEC */
+
+ args.opt = opt;
+ args.imo = imo;
+ args.so = so;
+ args.flags = flags;
+ args.mtu_p = mtu_p;
+#ifdef IPSEC_NAT_T
+ args.natt_frag = natt_frag;
+#endif
+
+ error = ip_output2(m,ifp,dst,ro,mtu,&args);
+
+done:
+ rtcache_free(&iproute);
+
+#ifdef IPSEC
+ if (sp != NULL) {
+ KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
+ printf("DP ip_output call free SP:%p\n", sp));
+ key_freesp(sp);
+ }
+#endif /* IPSEC */
+#ifdef FAST_IPSEC
+ if (sp != NULL)
+ KEY_FREESP(&sp);
+#endif /* FAST_IPSEC */
+
+ return (error);
+bad:
+ m_freem(m);
+ goto done;
+}
+
+static int
+ip_output2(struct mbuf * m, struct ifnet * ifp, struct sockaddr_in * dst,
+ struct route * ro, u_long mtu, struct ip_output_args * args)
+{
+ struct ip * ip;
+ struct mbuf * m0;
+ int hlen;
+ int error;
+ int sw_csum;
+ uint16_t ip_len;
#ifdef PFIL_HOOKS
/*
@@ -815,11 +863,11 @@
goto done;
if (m == NULL)
goto done;
+#endif /* PFIL_HOOKS */
ip = mtod(m, struct ip *);
hlen = ip->ip_hl << 2;
ip_len = ntohs(ip->ip_len);
-#endif /* PFIL_HOOKS */
m->m_pkthdr.csum_data |= hlen << 16;
@@ -911,8 +959,8 @@
* Must be able to put at least 8 bytes per fragment.
*/
if (ntohs(ip->ip_off) & IP_DF) {
- if (flags & IP_RETURNMTU)
- *mtu_p = mtu;
+ if (args->flags & IP_RETURNMTU)
+ *(args->mtu_p) = mtu;
error = EMSGSIZE;
ipstat.ips_cantfrag++;
goto bad;
@@ -945,9 +993,9 @@
* fragmented, re-inject it in ip_output so that IPsec
* processing can occur.
*/
- if (natt_frag) {
- error = ip_output(m, opt,
- ro, flags, imo, so, mtu_p);
+ if (args->natt_frag) {
+ error = ip_output(m, args->opt,
+ ro, args->flags, args->imo, args->so, args->mtu_p);
} else
#endif /* IPSEC_NAT_T */
{
@@ -962,25 +1010,12 @@
if (error == 0)
ipstat.ips_fragmented++;
-done:
- rtcache_free(&iproute);
-
-#ifdef IPSEC
- if (sp != NULL) {
- KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
- printf("DP ip_output call free SP:%p\n", sp));
- key_freesp(sp);
- }
-#endif /* IPSEC */
-#ifdef FAST_IPSEC
- if (sp != NULL)
- KEY_FREESP(&sp);
-#endif /* FAST_IPSEC */
- return (error);
bad:
- m_freem(m);
- goto done;
+ m_free(m);
+
+done:
+ return error;
}
int
--+QahgC5+KEYLbs62--