tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: RFC: L2TPv3 interface
Date: Thu, 19 Jan 2017 17:58:17 +0900
From: Kengo NAKAHARA <k-nakahara%iij.ad.jp@localhost>
My co-workers implemented L2TPv3(RFC3931) interface for old version
NetBSD. And then, I port the inteface to NetBSD-current and
MP-ify. Here is the patch.
http://netbsd.org/~knakahara/if-l2tp/if-l2tp.patch
Cool!
A few little comments:
diff --git a/sys/net/if.c b/sys/net/if.c
index 2386af3..ba63266 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -1599,7 +1613,7 @@ if_clone_lookup(const char *name, int *unitp)
strcpy(ifname, "if_");
/* separate interface name from unit */
for (dp = ifname + 3, cp = name; cp - name < IFNAMSIZ &&
- *cp && (*cp < '0' || *cp > '9');)
+ *cp && !if_is_unit(cp);)
*dp++ = *cp++;
This changes the generic syntax interface names, perhaps to allow the
`2' in `l2tp', although since this loop skips over the first three
octets that doesn't seem to be necessary. Either way, I don't have a
problem with this, but it should be done in a separate change.
diff --git a/sys/net/if_l2tp.c b/sys/net/if_l2tp.c
new file mode 100644
index 0000000..dda8bbd
--- /dev/null
+++ b/sys/net/if_l2tp.c
@@ -0,0 +1,1541 @@
[...]
+/*
+ * l2tp global variable definitions
+ */
+LIST_HEAD(l2tp_sclist, l2tp_softc);
+static struct l2tp_sclist l2tp_softc_list;
+kmutex_t l2tp_list_lock;
+
+#if !defined(L2TP_ID_HASH_SIZE)
+#define L2TP_ID_HASH_SIZE 64
+#endif
+static u_long l2tp_id_hash_mask;
+
+kmutex_t l2tp_hash_lock;
+static struct pslist_head *l2tp_hashed_list = NULL;
Consider putting related global state into cacheline-aligned structs?
static struct {
kmutex_t lock;
struct l2tp_sclist list;
} l2tp_softc __cacheline_aligned;
static struct {
kmutex_t lock;
struct pslist_head *list;
unsigned long mask;
} l2tp_hash __cacheline_aligned;
+pserialize_t l2tp_psz;
+struct psref_class *lv_psref_class __read_mostly;
__read_mostly for l2tp_psz?
+static int
+l2tpdetach(void)
+{
+ int error;
+
+ if (!LIST_EMPTY(&l2tp_softc_list))
+ return EBUSY;
Need lock here? Need to first set flag preventing new creation?
mutex_enter(&l2tp_softc.lock);
KASSERT(!l2tp_softc.dying);
l2tp_softc.detaching = true;
if (!LIST_EMPTY(&l2tp_softc.list)) {
l2tp_softc.detaching = false;
mutex_exit(&l2tp_softc.lock);
return EBUSY;
}
mutex_exit(&l2tp_softc.lock);
Anyone trying to add to l2tp_softc.list must also check
l2tp_softc.detaching before proceeding.
+static int
+l2tp_clone_destroy(struct ifnet *ifp)
+{
+ struct l2tp_softc *sc = (void *) ifp;
Use container_of here:
struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
l2tp_ec.ec_if);
No functional difference, but the compiler type-checks it.
+static int
+l2tp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ const struct rtentry *rt)
+{
+ struct l2tp_softc *sc = (struct l2tp_softc*)ifp;
container_of
+void
+l2tp_input(struct mbuf *m, struct ifnet *ifp)
+{
+
+ KASSERT(ifp != NULL);
+
+ if (0 == (mtod(m, u_long) & 0x03)) {
+ /* copy and align head of payload */
+ struct mbuf *m_head;
+ int copy_length;
+
+#define L2TP_COPY_LENGTH 60
+#define L2TP_LINK_HDR_ROOM (MHLEN - L2TP_COPY_LENGTH - 4/*round4(2)*/)
+
+ if (m->m_pkthdr.len < L2TP_COPY_LENGTH) {
+ copy_length = m->m_pkthdr.len;
+ } else {
+ copy_length = L2TP_COPY_LENGTH;
+ }
+
+ if (m->m_len < copy_length) {
+ m = m_pullup(m, copy_length);
+ if (m == NULL)
+ return;
+ }
+
+ MGETHDR(m_head, M_DONTWAIT, MT_HEADER);
+ if (m_head == NULL) {
+ m_freem(m);
+ return;
+ }
+ M_COPY_PKTHDR(m_head, m);
+
+ m_head->m_data += 2 /* align */ + L2TP_LINK_HDR_ROOM;
+ memcpy(m_head->m_data, m->m_data, copy_length);
+ m_head->m_len = copy_length;
+ m->m_data += copy_length;
+ m->m_len -= copy_length;
+
+ /* construct chain */
+ if (m->m_len == 0) {
+ m_head->m_next = m_free(m); /* not m_freem */
+ } else {
+ /*
+ * copyed mtag in previous call M_COPY_PKTHDR
+ * but don't delete mtag in case cutt of M_PKTHDR flag
+ */
+ m_tag_delete_chain(m, NULL);
+ m->m_flags &= ~M_PKTHDR;
+ m_head->m_next = m;
+ }
+
+ /* override m */
+ m = m_head;
+ }
Someone more familiar with the mbuf API than I should review this mbuf
juggling show!
+/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
+int
+l2tp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
+{
+ struct l2tp_softc *sc = (struct l2tp_softc*)ifp;
container_of
+ case SIOCSIFMTU:
+ error = kauth_authorize_generic(kauth_cred_get(),
+ KAUTH_GENERIC_ISSUSER, NULL);
Why the kauth check here and not in any other drivers? Is this kauth
check unnecessary, or does its absence in other drivers indicate a
bug? Likewise in a few other places below.
+ if (error)
+ break;
+ switch (cmd) {
+#ifdef INET
+ case SIOCSIFPHYADDR:
+ src = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_dstaddr);
Consider using one more local variable instead of multiple levels of
nesting?
case SIOCSIFPHYADDR: {
struct in_aliasreq *aliasreq = data;
src = (struct sockaddr *)&aliasreq->ifra_data;
dst = (struct sockaddr *)&aliasreq->ifra_dstaddr;
...
}
Likewise in a few other places below.
+static int
+l2tp_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
+{
+ struct l2tp_softc *sc = (struct l2tp_softc *)ifp;
container_of
+ error = encap_lock_enter();
+ if (error)
+ goto error;
+
+ mutex_enter(&sc->l2tp_lock);
Document lock order of encap_lock ---> struct l2tp_softc::l2tp_lock?
+ ovar = sc->l2tp_var;
+ osrc = ovar->lv_psrc;
+ odst = ovar->lv_pdst;
+ memcpy(nvar, ovar, sizeof(*nvar));
You can just do
*nvar = *ovar;
here, since they are both guaranteed to be aligned.
+static int id_hash_func(uint32_t id)
+{
+ uint32_t hash;
+
+ hash = (id >> 16) ^ id;
+ hash = (hash >> 4) ^ hash;
+
+ return hash & l2tp_id_hash_mask;
+}
Is this hash function an essential part of the l2tp protocol, or is it
just something that will more likely involve all the bits of id when
masking with l2tp_id_hash_mask? (Asking so I can know whether it is
safe to replace by, e.g., siphash later, once I get around to adding
the siphash code I've been sitting on for about five years now.)
+/*
+ * l2tp_variant update API.
+ *
+ * Assumption:
+ * reader side dereferences sc->l2tp_var in reader critical section only,
+ * that is, all of reader sides do not reader the sc->l2tp_var after
+ * pserialize_perform().
+ */
+static void
+l2tp_variant_update(struct l2tp_softc *sc, struct l2tp_variant *nvar)
+{
+ struct ifnet *ifp = &sc->l2tp_ec.ec_if;
+ struct l2tp_variant *ovar = sc->l2tp_var;
+
+ KASSERT(mutex_owned(&sc->l2tp_lock));
+
+ membar_producer();
+ atomic_swap_ptr(&sc->l2tp_var, nvar);
+ pserialize_perform(l2tp_psz);
+ psref_target_destroy(&ovar->lv_psref, lv_psref_class);
No need for atomic_swap_ptr. Just
sc->l2tp_var = nvar;
is enough. Nobody else can write to it because we hold the lock.
diff --git a/sys/net/if_l2tp.h b/sys/net/if_l2tp.h
new file mode 100644
index 0000000..1aae23c
--- /dev/null
+++ b/sys/net/if_l2tp.h
@@ -0,0 +1,206 @@
[...]
+#include <net/if_ether.h>
+#include <netinet/in.h>
+/* xxx sigh, why route have struct route instead of pointer? */
Unclear what this comment refers to?
+
+#define SIOCSL2TPSESSION _IOW('i', 151, struct ifreq)
+#define SIOCDL2TPSESSION _IOW('i', 152, struct ifreq)
+#define SIOCSL2TPCOOKIE _IOW('i', 153, struct ifreq)
+#define SIOCDL2TPCOOKIE _IOW('i', 154, struct ifreq)
+#define SIOCSL2TPSTATE _IOW('i', 155, struct ifreq)
+#define SIOCGL2TP SIOCGIFGENERIC
Pick tabs or spaces and be consistent? (Makes diffs look nicer.
Usual rule is `#define<TAB>xyz<TAB>'.)
Say struct l2tp_req, not struct ifreq, if that's what you mean?
+struct l2tp_req {
+ int state;
+ int my_cookie_len;
+ int peer_cookie_len;
Pick a fixed-width unsigned integer type for this unless you actually
need negative values?
+#ifdef _KERNEL
+extern struct psref_class *lv_psref_class __read_mostly;
The __read_mostly attribute matters only for definitions, I believe.
+struct l2tp_softc {
+ struct ethercom l2tp_ec; /* common area - must be at the top */
+ /* to use ether_input(), we must have this */
+ percpu_t *l2tp_ro_percpu;
Mark this with what the type of the per-CPU object is. For example,
percpu_t *l2tp_ro_percpu; /* struct l2tp_ro */
(Obviously this is not as good for type checking as percpu<l2tp_ro> in
C++ or similar, but it's better than nothing for the reader's sake.)
+static inline bool
+l2tp_heldref_variant(struct l2tp_variant *var)
+{
+
+ if (var == NULL)
+ return false;
+ return psref_held(&var->lv_psref, lv_psref_class);
+}
Both users of this first do KASSERT(var != NULL), so there's no need
for the conditional `if (var == NULL)' here.
+/* Prototypes */
+void l2tpattach(int);
+void l2tpattach0(struct l2tp_softc *);
+void l2tp_input(struct mbuf *, struct ifnet *);
+int l2tp_ioctl(struct ifnet *, u_long, void *);
+
+struct l2tp_variant* l2tp_lookup_session_ref(uint32_t, struct psref *);
KNF: struct l2tp_variant *l2tp_lookup_session_ref(uint32_t, struct psref *);
+/*
+ * Locking notes:
+ * + l2tp_softc_list is protected by l2tp_list_lock (an adaptive mutex)
+ * l2tp_softc_list is list of all l2tp_softcs, and it is used to avoid
+ * wrong unload.
Instead of `wrong unload', maybe `unload while busy' or something?
+ * + l2tp_hashed_list is protected by
+ * - l2tp_hash_lock (an adaptive mutex) for writer
+ * - pserialize for reader
+ * l2tp_hashed_list is hashed list of all l2tp_softcs, and it is used by
+ * input processing to find appropriate softc.
+ * + l2tp_softc->l2tp_var is protected by
+ * - l2tp_softc->l2tp_lock (an adaptive mutex) for writer
+ * - l2tp_var->lv_psref for reader
+ * l2tp_softc->l2tp_var is used for variant values while the l2tp tunnel
+ * exists.
This looks great! Can you also state any lock order constraints here?
If the only constraint is that no pair of these locks is ever held
simultaneously, so be it -- say that too. It looks like encap_lock
needs to be mentioned, though.
diff --git a/sys/netinet/in_l2tp.c b/sys/netinet/in_l2tp.c
new file mode 100644
index 0000000..9b2ccd6
--- /dev/null
+++ b/sys/netinet/in_l2tp.c
@@ -0,0 +1,417 @@
[...]
+int
+in_l2tp_output(struct l2tp_variant *var, struct mbuf *m)
+{
[...]
+ bzero(&iphdr, sizeof(iphdr));
Use memset, not bzero.
+ if (var->lv_peer_cookie_len == 4) {
+ cookie_32 = htonl((uint32_t)var->lv_peer_cookie);
+ memcpy(mtod(m, uint32_t *), &cookie_32,
+ sizeof(uint32_t));
I have the impression that mtod(m, T *) is supposed to be used only
when m is actually aligned for a T. Most uses of memcpy(mtod(m, T *),
...) use void or uint8_t:
memcpy(mtod(m, void *), &cookie_32, sizeof(uint32_t));
I would suggest doing that, in case anyone ever makes mtod check
alignment -- unless you can guarantee alignment, in which case you can
just do
*mtod(m, uint32_t *) = cookie_32;
+ error = ip_output(m, NULL, &lro->lr_ro, 0, NULL, NULL);
+ mutex_exit(&lro->lr_lock);
+ percpu_putref(sc->l2tp_ro_percpu);
Hope it's safe to call ip_output with this lock held! Is it easy to
prove that ip_output can only at worst put the mbuf on a queue, or
that if it recursively calls in_l2tp_output, the recursion detection
will prevent locking against myself?
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
index 5534847..e318a7b 100644
--- a/sys/netinet/in_proto.c
+++ b/sys/netinet/in_proto.c
@@ -360,6 +360,16 @@ const struct protosw inetsw[] = {
.pr_init = carp_init,
},
#endif /* NCARP > 0 */
+{ .pr_type = SOCK_RAW,
+ .pr_domain = &inetdomain,
Should this be conditional on NL2TP > 0?
Home |
Main Index |
Thread Index |
Old Index