tech-net archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: fix gif(4)'s softint(9) contract violation [was Re: RFC: gif(4) MP-ify]



   Date: Fri, 22 Jan 2016 13:37:23 +0900
   From: Kengo NAKAHARA <k-nakahara%iij.ad.jp@localhost>

   I implement this processing. Here is the patch.
       http://netbsd.org/~knakahara/fix-gif-softint/fix-gif-softint.patch

   The patch includes reverting cvs kern_softint.c:r1.42. Furthermore, 
   the patch fixes the race between encap[46]_lookup(rnh->rnh_matchaddr)
   and encap_detach(rnh->rnh_deladdr) keeping packet processing path
   lockless.
   # I think radix tree must be required lock.

   However, this patch doesn't make gif(4) mp-ify yet. I will send gif(4)
   mp-ify patch later.

   Could you comment this patch?

Your patch has two largely independent functional changes in it:

1. fix the softint disestablish issue by adding a pause operation, and
2. make ip_encap MP-scalable by using a pserialized list instead of a
giant-locked radix tree.

Part (1) can be done a little more easily, I think -- I've attached a
compile-tested patch that does only part (1), by calling encap_detach
before softint_disestablish.  It doesn't seem to require any special
cooperation from gif_output or gifintr.  I probably overlooked
something, though.

Part (2) makes ip_encap scale well to many cores, but it stops
ip_encap from scaling well to many tunnels, because it removes the
radix tree.  Maybe that's OK -- maybe today, scaling to many cores is
more important than scaling to many tunnels.  But there's a comment in
the old code suggesting that scaling to many tunnels may be important:

/*
 * The code will use radix table for tunnel lookup, for
 * tunnels registered with encap_attach() with a addr/mask pair.
 * Faster on machines with thousands of tunnel registerations (= interfaces).
 ...

Do you have a plan for how to address scaling to many tunnels too?

One heavy-handed but easy way that we might keep the radix tree *and*
scale to many cores would be to simply drop encapsulated packets while
anyone is reconfiguring a tunnel and modifying the radix tree.

I think you could do something like the following.  I've also added a
reference count to each tunnel in this sketch.  I did this because
without the reference count, it's not clear to me how to ensure that
the tunnel will continue to exist after the pserialize_read_exit.

static struct {
	kmutex_t		lock;
	struct lwp		*changing;
	kcondvar_t		cv;
	pserialize_t		psz;
	LIST_HEAD(, encaptab)	head;
} encaptab __cacheline_aligned;

encap4_input(m)
{

	s = pserialize_read_enter();
	if (encaptab.changing) {
		/*
		 * Someone is reconfiguring a tunnel.  Drop packets
		 * until they're done.
		 */
		pserialize_read_exit(s);
		m_freem(m);
		return;
	}
	membar_consumer();
	/*
	 * Radix tree and encaptab list are now stable on this CPU
	 * until pserialize_read_exit.
	 */

	match = encap4_lookup(m, off, proto, INBOUND);
	pserialize_read_exit(s);
	if (match) {
		esw = match->esw;
		if (esw && esw->encapsw4.pr_input)
			(*esw->encapsw4.pr_input)(m, off, proto, match);
		else
			m_freem(m);
		encap_release(match);
		return;
	}

	rip_input(m, off, proto);
}

encap4_lookup(...)
{
	...

	rn = rnh->rnh_matchaddr(...);
	if (rn && (rn->rn_flags & RNF_ROOT) == 0) {
		...
	}

	LIST_FOREACH(ep, &encaptab.head, chain) {
		membar_datadep_consumer();
		...
	}

	if (match != NULL)
		encap_acquire(match);
	return match;
}

encap_acquire(ep)
{

	refcount_inc(&ep->refcount);
}

encap_release(ep)
{

	refcount_dec_broadcast(&ep->refcount);
}

encap_change_enter()
{

	mutex_enter(&encaptab.lock);
	while (encaptab.changing)
		cv_wait(&encaptab.cv, &encaptab.lock);
	encaptab.changing = curlwp;
	/* Wait for all CPUs to notice that we're changing.  */
	pserialize_perform(encaptab.psz);
	mutex_exit(&encaptab.lock);
}

encap_change_exit()
{

	mutex_enter(&encaptab.lock);
	KASSERT(encaptab.changing == curlwp);
	encaptab.changing = NULL;
	cv_broadcast(&encaptab.cv);
	mutex_exit(&encaptab.lock);
}

encap_add(ep)
{

	encap_change_enter();
	LIST_INSERT_HEAD(&encaptab.head, ep, chain);
	... rnh->rnh_addaddr(...) ...
	encap_change_exit();
}

encap_remove(ep)
{

	encap_change_enter();
	LIST_REMOVE(ep, chain);
	/* Wait for all CPUs to stop looking up this ep.  */
	pserialize_perform(encaptab.psz);
	... rnh->rnh_deladdr(...) ...
	encap_change_exit();

	/* Wait for all users in pr_input to finish.  */
	refcount_dec_drain(&ep->refcount);
}
Index: sys/net/if_gif.c
===================================================================
RCS file: /cvsroot/src/sys/net/if_gif.c,v
retrieving revision 1.105
diff -p -u -r1.105 if_gif.c
--- sys/net/if_gif.c	18 Jan 2016 06:08:26 -0000	1.105
+++ sys/net/if_gif.c	22 Jan 2016 15:34:43 -0000
@@ -99,6 +99,7 @@ static int	gif_clone_destroy(struct ifne
 static int	gif_check_nesting(struct ifnet *, struct mbuf *);
 
 static int	gif_encap_attach(struct gif_softc *);
+static int	gif_encap_pause(struct gif_softc *);
 static int	gif_encap_detach(struct gif_softc *);
 
 static struct if_clone gif_cloner =
@@ -750,6 +751,33 @@ gif_encap_attach(struct gif_softc *sc)
 }
 
 static int
+gif_encap_pause(struct gif_softc *sc)
+{
+	int error;
+
+	if (sc == NULL || sc->gif_psrc == NULL)
+		return EINVAL;
+
+	switch (sc->gif_psrc->sa_family) {
+#ifdef INET
+	case AF_INET:
+		error = in_gif_pause(sc);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		error = in6_gif_pause(sc);
+		break;
+#endif
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return error;
+}
+
+static int
 gif_encap_detach(struct gif_softc *sc)
 {
 	int error;
@@ -815,7 +843,11 @@ gif_set_tunnel(struct ifnet *ifp, struct
 		return ENOMEM;
 	}
 
-	/* Firstly, clear old configurations. */
+	/* Prevent further scheduling of the softint.  */
+	if (sc->gif_psrc)
+		(void)gif_encap_pause(sc);
+
+	/* Disestablish the softint.  */
 	if (sc->gif_si) {
 		osrc = sc->gif_psrc;
 		odst = sc->gif_pdst;
@@ -843,12 +875,14 @@ gif_set_tunnel(struct ifnet *ifp, struct
 		osrc = NULL;
 		odst = NULL;
 	}
+
+	/* Detach encap.  */
 	/* XXX we can detach from both, but be polite just in case */
 	if (sc->gif_psrc)
 		(void)gif_encap_detach(sc);
 
 	/*
-	 * Secondly, try to set new configurations.
+	 * Try to set new configurations.
 	 * If the setup failed, rollback to old configurations.
 	 */
 	do {
Index: sys/netinet/in_gif.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/in_gif.c,v
retrieving revision 1.70
diff -p -u -r1.70 in_gif.c
--- sys/netinet/in_gif.c	22 Jan 2016 05:15:10 -0000	1.70
+++ sys/netinet/in_gif.c	22 Jan 2016 15:34:43 -0000
@@ -385,7 +385,7 @@ in_gif_attach(struct gif_softc *sc)
 }
 
 int
-in_gif_detach(struct gif_softc *sc)
+in_gif_pause(struct gif_softc *sc)
 {
 	int error;
 
@@ -393,7 +393,14 @@ in_gif_detach(struct gif_softc *sc)
 	if (error == 0)
 		sc->encap_cookie4 = NULL;
 
+	return error;
+}
+
+int
+in_gif_detach(struct gif_softc *sc)
+{
+
 	rtcache_free(&sc->gif_ro);
 
-	return error;
+	return 0;
 }
Index: sys/netinet/in_gif.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/in_gif.h,v
retrieving revision 1.14
diff -p -u -r1.14 in_gif.h
--- sys/netinet/in_gif.h	23 Nov 2006 04:07:07 -0000	1.14
+++ sys/netinet/in_gif.h	22 Jan 2016 15:34:43 -0000
@@ -44,6 +44,7 @@ int in_gif_output(struct ifnet *, int, s
 int gif_encapcheck4(struct mbuf *, int, int, void *);
 #endif
 int in_gif_attach(struct gif_softc *);
+int in_gif_pause(struct gif_softc *);
 int in_gif_detach(struct gif_softc *);
 
 #endif /* !_NETINET_IN_GIF_H_ */
Index: sys/netinet6/in6_gif.c
===================================================================
RCS file: /cvsroot/src/sys/netinet6/in6_gif.c,v
retrieving revision 1.67
diff -p -u -r1.67 in6_gif.c
--- sys/netinet6/in6_gif.c	22 Jan 2016 05:15:10 -0000	1.67
+++ sys/netinet6/in6_gif.c	22 Jan 2016 15:34:43 -0000
@@ -384,7 +384,7 @@ in6_gif_attach(struct gif_softc *sc)
 }
 
 int
-in6_gif_detach(struct gif_softc *sc)
+in6_gif_pause(struct gif_softc *sc)
 {
 	int error;
 
@@ -392,9 +392,16 @@ in6_gif_detach(struct gif_softc *sc)
 	if (error == 0)
 		sc->encap_cookie6 = NULL;
 
+	return error;
+}
+
+int
+in6_gif_detach(struct gif_softc *sc)
+{
+
 	rtcache_free(&sc->gif_ro);
 
-	return error;
+	return 0;
 }
 
 void *
Index: sys/netinet6/in6_gif.h
===================================================================
RCS file: /cvsroot/src/sys/netinet6/in6_gif.h,v
retrieving revision 1.13
diff -p -u -r1.13 in6_gif.h
--- sys/netinet6/in6_gif.h	24 Apr 2008 11:38:38 -0000	1.13
+++ sys/netinet6/in6_gif.h	22 Jan 2016 15:34:43 -0000
@@ -44,6 +44,7 @@ int in6_gif_output(struct ifnet *, int, 
 int gif_encapcheck6(struct mbuf *, int, int, void *);
 #endif
 int in6_gif_attach(struct gif_softc *);
+int in6_gif_pause(struct gif_softc *);
 int in6_gif_detach(struct gif_softc *);
 void *in6_gif_ctlinput(int, const struct sockaddr *, void *);
 


Home | Main Index | Thread Index | Old Index