tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: RFC: L2TPv3 interface



   Date: Thu, 19 Jan 2017 17:58:17 +0900
   From: Kengo NAKAHARA <k-nakahara%iij.ad.jp@localhost>

   My co-workers implemented L2TPv3(RFC3931) interface for old version
   NetBSD.  And then, I port the inteface to NetBSD-current and
   MP-ify. Here is the patch.

       http://netbsd.org/~knakahara/if-l2tp/if-l2tp.patch

Cool!

A few little comments:

   diff --git a/sys/net/if.c b/sys/net/if.c
   index 2386af3..ba63266 100644
   --- a/sys/net/if.c
   +++ b/sys/net/if.c
   @@ -1599,7 +1613,7 @@ if_clone_lookup(const char *name, int *unitp)
           strcpy(ifname, "if_");
           /* separate interface name from unit */
           for (dp = ifname + 3, cp = name; cp - name < IFNAMSIZ &&
   -           *cp && (*cp < '0' || *cp > '9');)
   +           *cp && !if_is_unit(cp);)
                   *dp++ = *cp++;

This changes the generic syntax interface names, perhaps to allow the
`2' in `l2tp', although since this loop skips over the first three
octets that doesn't seem to be necessary.  Either way, I don't have a
problem with this, but it should be done in a separate change.
 
   diff --git a/sys/net/if_l2tp.c b/sys/net/if_l2tp.c
   new file mode 100644
   index 0000000..dda8bbd
   --- /dev/null
   +++ b/sys/net/if_l2tp.c
   @@ -0,0 +1,1541 @@
   [...]
   +/*
   + * l2tp global variable definitions
   + */
   +LIST_HEAD(l2tp_sclist, l2tp_softc);
   +static struct l2tp_sclist l2tp_softc_list;
   +kmutex_t l2tp_list_lock;
   +
   +#if !defined(L2TP_ID_HASH_SIZE)
   +#define L2TP_ID_HASH_SIZE 64
   +#endif
   +static u_long l2tp_id_hash_mask;
   +
   +kmutex_t l2tp_hash_lock;
   +static struct pslist_head *l2tp_hashed_list = NULL;

Consider putting related global state into cacheline-aligned structs?

static struct {
        kmutex_t		lock;
        struct l2tp_sclist	list;
} l2tp_softc __cacheline_aligned;

static struct {
        kmutex_t		lock;
        struct pslist_head	*list;
	unsigned long		mask;
} l2tp_hash __cacheline_aligned;

   +pserialize_t l2tp_psz;
   +struct psref_class *lv_psref_class __read_mostly;

__read_mostly for l2tp_psz?

   +static int
   +l2tpdetach(void)
   +{
   +       int error;
   +
   +       if (!LIST_EMPTY(&l2tp_softc_list))
   +               return EBUSY;

Need lock here?  Need to first set flag preventing new creation?

mutex_enter(&l2tp_softc.lock);
KASSERT(!l2tp_softc.dying);
l2tp_softc.detaching = true;
if (!LIST_EMPTY(&l2tp_softc.list)) {
	l2tp_softc.detaching = false;
	mutex_exit(&l2tp_softc.lock);
	return EBUSY;
}
mutex_exit(&l2tp_softc.lock);

Anyone trying to add to l2tp_softc.list must also check
l2tp_softc.detaching before proceeding.

   +static int
   +l2tp_clone_destroy(struct ifnet *ifp)
   +{
   +       struct l2tp_softc *sc = (void *) ifp;

Use container_of here:

	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
	    l2tp_ec.ec_if);

No functional difference, but the compiler type-checks it.

   +static int
   +l2tp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
   +    const struct rtentry *rt)
   +{
   +       struct l2tp_softc *sc = (struct l2tp_softc*)ifp;

container_of

   +void
   +l2tp_input(struct mbuf *m, struct ifnet *ifp)
   +{
   +
   +       KASSERT(ifp != NULL);
   +
   +       if (0 == (mtod(m, u_long) & 0x03)) {
   +               /* copy and align head of payload */
   +               struct mbuf *m_head;
   +               int copy_length;
   +
   +#define L2TP_COPY_LENGTH               60
   +#define L2TP_LINK_HDR_ROOM     (MHLEN - L2TP_COPY_LENGTH - 4/*round4(2)*/)
   +
   +               if (m->m_pkthdr.len < L2TP_COPY_LENGTH) {
   +                       copy_length = m->m_pkthdr.len;
   +               } else {
   +                       copy_length = L2TP_COPY_LENGTH;
   +               }
   +
   +               if (m->m_len < copy_length) {
   +                       m = m_pullup(m, copy_length);
   +                       if (m == NULL)
   +                               return;
   +               }
   +
   +               MGETHDR(m_head, M_DONTWAIT, MT_HEADER);
   +               if (m_head == NULL) {
   +                       m_freem(m);
   +                       return;
   +               }
   +               M_COPY_PKTHDR(m_head, m);
   +
   +               m_head->m_data += 2 /* align */ + L2TP_LINK_HDR_ROOM;
   +               memcpy(m_head->m_data, m->m_data, copy_length);
   +               m_head->m_len = copy_length;
   +               m->m_data += copy_length;
   +               m->m_len -= copy_length;
   +
   +               /* construct chain */
   +               if (m->m_len == 0) {
   +                       m_head->m_next = m_free(m); /* not m_freem */
   +               } else {
   +                       /*
   +                        * copyed mtag in previous call M_COPY_PKTHDR
   +                        * but don't delete mtag in case cutt of M_PKTHDR flag
   +                        */
   +                       m_tag_delete_chain(m, NULL);
   +                       m->m_flags &= ~M_PKTHDR;
   +                       m_head->m_next = m;
   +               }
   +
   +               /* override m */
   +               m = m_head;
   +       }

Someone more familiar with the mbuf API than I should review this mbuf
juggling show!

   +/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
   +int
   +l2tp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
   +{
   +       struct l2tp_softc *sc = (struct l2tp_softc*)ifp;

container_of

   +       case SIOCSIFMTU:
   +               error = kauth_authorize_generic(kauth_cred_get(),
   +                   KAUTH_GENERIC_ISSUSER, NULL);

Why the kauth check here and not in any other drivers?  Is this kauth
check unnecessary, or does its absence in other drivers indicate a
bug?  Likewise in a few other places below.

   +               if (error)
   +                       break;
   +               switch (cmd) {
   +#ifdef INET
   +               case SIOCSIFPHYADDR:
   +                       src = (struct sockaddr *)
   +                               &(((struct in_aliasreq *)data)->ifra_addr);
   +                       dst = (struct sockaddr *)
   +                               &(((struct in_aliasreq *)data)->ifra_dstaddr);

Consider using one more local variable instead of multiple levels of
nesting?

case SIOCSIFPHYADDR: {
	struct in_aliasreq *aliasreq = data;
	src = (struct sockaddr *)&aliasreq->ifra_data;
	dst = (struct sockaddr *)&aliasreq->ifra_dstaddr;
	...
}

Likewise in a few other places below.

   +static int
   +l2tp_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
   +{
   +       struct l2tp_softc *sc = (struct l2tp_softc *)ifp;

container_of

   +       error = encap_lock_enter();
   +       if (error)
   +               goto error;
   +
   +       mutex_enter(&sc->l2tp_lock);

Document lock order of encap_lock ---> struct l2tp_softc::l2tp_lock?

   +       ovar = sc->l2tp_var;
   +       osrc = ovar->lv_psrc;
   +       odst = ovar->lv_pdst;
   +       memcpy(nvar, ovar, sizeof(*nvar));

You can just do

	*nvar = *ovar;

here, since they are both guaranteed to be aligned.

   +static int id_hash_func(uint32_t id)
   +{
   +       uint32_t hash;
   +
   +       hash = (id >> 16) ^ id;
   +       hash = (hash >> 4) ^ hash;
   +
   +       return hash & l2tp_id_hash_mask;
   +}

Is this hash function an essential part of the l2tp protocol, or is it
just something that will more likely involve all the bits of id when
masking with l2tp_id_hash_mask?  (Asking so I can know whether it is
safe to replace by, e.g., siphash later, once I get around to adding
the siphash code I've been sitting on for about five years now.)

   +/*
   + * l2tp_variant update API.
   + *
   + * Assumption:
   + * reader side dereferences sc->l2tp_var in reader critical section only,
   + * that is, all of reader sides do not reader the sc->l2tp_var after
   + * pserialize_perform().
   + */
   +static void
   +l2tp_variant_update(struct l2tp_softc *sc, struct l2tp_variant *nvar)
   +{
   +       struct ifnet *ifp = &sc->l2tp_ec.ec_if;
   +       struct l2tp_variant *ovar = sc->l2tp_var;
   +
   +       KASSERT(mutex_owned(&sc->l2tp_lock));
   +
   +       membar_producer();
   +       atomic_swap_ptr(&sc->l2tp_var, nvar);
   +       pserialize_perform(l2tp_psz);
   +       psref_target_destroy(&ovar->lv_psref, lv_psref_class);

No need for atomic_swap_ptr.  Just

        sc->l2tp_var = nvar;

is enough.  Nobody else can write to it because we hold the lock.

   diff --git a/sys/net/if_l2tp.h b/sys/net/if_l2tp.h
   new file mode 100644
   index 0000000..1aae23c
   --- /dev/null
   +++ b/sys/net/if_l2tp.h
   @@ -0,0 +1,206 @@
   [...]
   +#include <net/if_ether.h>
   +#include <netinet/in.h>
   +/* xxx sigh, why route have struct route instead of pointer? */

Unclear what this comment refers to?

   +
   +#define SIOCSL2TPSESSION       _IOW('i', 151, struct ifreq)
   +#define        SIOCDL2TPSESSION _IOW('i', 152, struct ifreq)
   +#define SIOCSL2TPCOOKIE        _IOW('i', 153, struct ifreq)
   +#define        SIOCDL2TPCOOKIE _IOW('i', 154, struct ifreq)
   +#define        SIOCSL2TPSTATE  _IOW('i', 155, struct ifreq)
   +#define SIOCGL2TP      SIOCGIFGENERIC

Pick tabs or spaces and be consistent?  (Makes diffs look nicer.
Usual rule is `#define<TAB>xyz<TAB>'.)

Say struct l2tp_req, not struct ifreq, if that's what you mean?

   +struct l2tp_req {
   +       int state;
   +       int my_cookie_len;
   +       int peer_cookie_len;

Pick a fixed-width unsigned integer type for this unless you actually
need negative values?

   +#ifdef _KERNEL
   +extern struct psref_class *lv_psref_class __read_mostly;

The __read_mostly attribute matters only for definitions, I believe.

   +struct l2tp_softc {
   +       struct ethercom l2tp_ec;        /* common area - must be at the top */
   +                               /* to use ether_input(), we must have this */
   +       percpu_t *l2tp_ro_percpu;

Mark this with what the type of the per-CPU object is.  For example,

	percpu_t	*l2tp_ro_percpu;	/* struct l2tp_ro */

(Obviously this is not as good for type checking as percpu<l2tp_ro> in
C++ or similar, but it's better than nothing for the reader's sake.)

   +static inline bool
   +l2tp_heldref_variant(struct l2tp_variant *var)
   +{
   +
   +       if (var == NULL)
   +               return false;
   +       return psref_held(&var->lv_psref, lv_psref_class);
   +}

Both users of this first do KASSERT(var != NULL), so there's no need
for the conditional `if (var == NULL)' here.

   +/* Prototypes */
   +void l2tpattach(int);
   +void l2tpattach0(struct l2tp_softc *);
   +void l2tp_input(struct mbuf *, struct ifnet *);
   +int l2tp_ioctl(struct ifnet *, u_long, void *);
   +
   +struct l2tp_variant* l2tp_lookup_session_ref(uint32_t, struct psref *);

KNF:	struct l2tp_variant *l2tp_lookup_session_ref(uint32_t, struct psref *);

   +/*
   + * Locking notes:
   + * + l2tp_softc_list is protected by l2tp_list_lock (an adaptive mutex)
   + *       l2tp_softc_list is list of all l2tp_softcs, and it is used to avoid
   + *       wrong unload.

Instead of `wrong unload', maybe `unload while busy' or something?

   + * + l2tp_hashed_list is protected by
   + *   - l2tp_hash_lock (an adaptive mutex) for writer
   + *   - pserialize for reader
   + *       l2tp_hashed_list is hashed list of all l2tp_softcs, and it is used by
   + *       input processing to find appropriate softc.
   + * + l2tp_softc->l2tp_var is protected by
   + *   - l2tp_softc->l2tp_lock (an adaptive mutex) for writer
   + *   - l2tp_var->lv_psref for reader
   + *       l2tp_softc->l2tp_var is used for variant values while the l2tp tunnel
   + *       exists.

This looks great!  Can you also state any lock order constraints here?
If the only constraint is that no pair of these locks is ever held
simultaneously, so be it -- say that too.  It looks like encap_lock
needs to be mentioned, though.

   diff --git a/sys/netinet/in_l2tp.c b/sys/netinet/in_l2tp.c
   new file mode 100644
   index 0000000..9b2ccd6
   --- /dev/null
   +++ b/sys/netinet/in_l2tp.c
   @@ -0,0 +1,417 @@
   [...]
   +int
   +in_l2tp_output(struct l2tp_variant *var, struct mbuf *m)
   +{
   [...]
   +       bzero(&iphdr, sizeof(iphdr));

Use memset, not bzero.

   +               if (var->lv_peer_cookie_len == 4) {
   +                       cookie_32 = htonl((uint32_t)var->lv_peer_cookie);
   +                       memcpy(mtod(m, uint32_t *), &cookie_32,
   +                           sizeof(uint32_t));

I have the impression that mtod(m, T *) is supposed to be used only
when m is actually aligned for a T.  Most uses of memcpy(mtod(m, T *),
...) use void or uint8_t:

	memcpy(mtod(m, void *), &cookie_32, sizeof(uint32_t));

I would suggest doing that, in case anyone ever makes mtod check
alignment -- unless you can guarantee alignment, in which case you can
just do

	*mtod(m, uint32_t *) = cookie_32;

   +       error = ip_output(m, NULL, &lro->lr_ro, 0, NULL, NULL);
   +       mutex_exit(&lro->lr_lock);
   +       percpu_putref(sc->l2tp_ro_percpu);

Hope it's safe to call ip_output with this lock held!  Is it easy to
prove that ip_output can only at worst put the mbuf on a queue, or
that if it recursively calls in_l2tp_output, the recursion detection
will prevent locking against myself?
   diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
   index 5534847..e318a7b 100644
   --- a/sys/netinet/in_proto.c
   +++ b/sys/netinet/in_proto.c
   @@ -360,6 +360,16 @@ const struct protosw inetsw[] = {
           .pr_init = carp_init,
    },
    #endif /* NCARP > 0 */
   +{      .pr_type = SOCK_RAW,
   +       .pr_domain = &inetdomain,

Should this be conditional on NL2TP > 0?



Home | Main Index | Thread Index | Old Index