Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/netbsd-6]: src/sys/net/npf Pull up following revision(s) (requested by r...



details:   https://anonhg.NetBSD.org/src/rev/5d6b65c9b77b
branches:  netbsd-6
changeset: 776499:5d6b65c9b77b
user:      bouyer <bouyer%NetBSD.org@localhost>
date:      Sun Nov 17 19:16:57 2013 +0000

description:
Pull up following revision(s) (requested by rmind in ticket #985):
        sys/net/npf/npf_impl.h: revision 1.35
        sys/net/npf/npf_nat.c: revision 1.21
        sys/net/npf/npf_session.c: revision 1.26
npf_session_setnat: fix the race condition when the old connection is still
being expired while a new/duplicate is being created.

diffstat:

 sys/net/npf/npf_impl.h    |    4 +-
 sys/net/npf/npf_nat.c     |    6 +-
 sys/net/npf/npf_session.c |  110 ++++++++++++++++++++++++---------------------
 3 files changed, 63 insertions(+), 57 deletions(-)

diffs (truncated from 302 to 300 lines):

diff -r 8ae75b4090e9 -r 5d6b65c9b77b sys/net/npf/npf_impl.h
--- a/sys/net/npf/npf_impl.h    Sun Nov 17 18:25:45 2013 +0000
+++ b/sys/net/npf/npf_impl.h    Sun Nov 17 19:16:57 2013 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: npf_impl.h,v 1.10.2.14 2013/02/18 18:26:14 riz Exp $   */
+/*     $NetBSD: npf_impl.h,v 1.10.2.15 2013/11/17 19:16:57 bouyer Exp $        */
 
 /*-
  * Copyright (c) 2009-2013 The NetBSD Foundation, Inc.
@@ -285,7 +285,7 @@
 void           npf_session_expire(npf_session_t *);
 bool           npf_session_pass(const npf_session_t *, npf_rproc_t **);
 void           npf_session_setpass(npf_session_t *, npf_rproc_t *);
-int            npf_session_setnat(npf_session_t *, npf_nat_t *, const int);
+int            npf_session_setnat(npf_session_t *, npf_nat_t *, u_int);
 npf_nat_t *    npf_session_retnat(npf_session_t *, const int, bool *);
 
 int            npf_session_save(prop_array_t, prop_array_t);
diff -r 8ae75b4090e9 -r 5d6b65c9b77b sys/net/npf/npf_nat.c
--- a/sys/net/npf/npf_nat.c     Sun Nov 17 18:25:45 2013 +0000
+++ b/sys/net/npf/npf_nat.c     Sun Nov 17 19:16:57 2013 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: npf_nat.c,v 1.10.2.8 2013/02/11 21:49:49 riz Exp $     */
+/*     $NetBSD: npf_nat.c,v 1.10.2.9 2013/11/17 19:16:57 bouyer Exp $  */
 
 /*-
  * Copyright (c) 2010-2013 The NetBSD Foundation, Inc.
@@ -76,7 +76,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.10.2.8 2013/02/11 21:49:49 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.10.2.9 2013/11/17 19:16:57 bouyer Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -676,7 +676,7 @@
                 * Note: packet now has a translated address in the cache.
                 */
                nt->nt_session = se;
-               error = npf_session_setnat(se, nt, di);
+               error = npf_session_setnat(se, nt, np->n_type);
 out:
                if (error) {
                        /* If session was for NAT only - expire it. */
diff -r 8ae75b4090e9 -r 5d6b65c9b77b sys/net/npf/npf_session.c
--- a/sys/net/npf/npf_session.c Sun Nov 17 18:25:45 2013 +0000
+++ b/sys/net/npf/npf_session.c Sun Nov 17 19:16:57 2013 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: npf_session.c,v 1.10.4.9 2013/02/11 21:49:49 riz Exp $ */
+/*     $NetBSD: npf_session.c,v 1.10.4.10 2013/11/17 19:16:58 bouyer Exp $     */
 
 /*-
  * Copyright (c) 2010-2012 The NetBSD Foundation, Inc.
@@ -80,7 +80,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.10.4.9 2013/02/11 21:49:49 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.10.4.10 2013/11/17 19:16:58 bouyer Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -140,7 +140,7 @@
                uint16_t        if_idx;
        } s_common_id;
        /* Flags and the protocol state. */
-       int                     s_flags;
+       u_int                   s_flags;
        npf_state_t             s_state;
        /* Association of rule procedure data. */
        npf_rproc_t *           s_rproc;
@@ -163,18 +163,20 @@
 };
 
 /*
- * Session flags:
- * - PFIL_IN and PFIL_OUT values are reserved for direction.
- * - SE_ACTIVE: session is active i.e. visible on inspection.
- * - SE_PASS: a "pass" session.
- * - SE_EXPIRE: explicitly expire the session.
- * - SE_REMOVING: session is being removed (indicate need to enter G/C list).
+ * Session flags: PFIL_IN and PFIL_OUT values are reserved for direction.
  */
 CTASSERT(PFIL_ALL == (0x001 | 0x002));
-#define        SE_ACTIVE               0x004
-#define        SE_PASS                 0x008
-#define        SE_EXPIRE               0x010
-#define        SE_REMOVING             0x020
+#define        SE_ACTIVE               0x004   /* visible on inspection */
+#define        SE_PASS                 0x008   /* perform implicit passing */
+#define        SE_EXPIRE               0x010   /* explicitly expire */
+
+/*
+ * Flags to indicate removal of forwards/backwards session entries or
+ * completion of session removal itself (i.e. both entries).
+ */
+#define        SE_REMFORW              0x020
+#define        SE_REMBACK              0x040
+#define        SE_REMOVED              (SE_REMFORW | SE_REMBACK)
 
 /*
  * Session tracking state: disabled (off), enabled (on) or flush request.
@@ -466,7 +468,7 @@
        npf_sentry_t senkey, *sen;
        npf_session_t *se;
        npf_sehash_t *sh;
-       int flags;
+       u_int flags;
 
        switch (proto) {
        case IPPROTO_TCP: {
@@ -762,7 +764,6 @@
 static void
 npf_session_destroy(npf_session_t *se)
 {
-
        if (se->s_nat) {
                /* Release any NAT related structures. */
                npf_nat_expire(se->s_nat);
@@ -786,7 +787,7 @@
  * and re-insert session entry accordingly.
  */
 int
-npf_session_setnat(npf_session_t *se, npf_nat_t *nt, const int di)
+npf_session_setnat(npf_session_t *se, npf_nat_t *nt, u_int ntype)
 {
        npf_sehash_t *sh;
        npf_sentry_t *sen;
@@ -798,56 +799,60 @@
 
        /* First, atomically check and associate NAT entry. */
        if (atomic_cas_ptr(&se->s_nat, NULL, nt) != NULL) {
-               /* Race: see below for description. */
+               /* Race with a duplicate packet. */
                npf_stats_inc(NPF_STAT_RACE_NAT);
                return EISCONN;
        }
 
-       /*
-        * Update, re-hash and re-insert "backwards" entry, according to
-        * the translation.  First, remove the entry from tree.  Note that
-        * a duplicate packet may establish a duplicate session while lock
-        * will be released.  In such case, caller will drop this packet
-        * and structures associated with it.  Such race condition should
-        * never happen in practice, though.
-        */
        sen = &se->s_back_entry;
        sh = sess_hash_bucket(sess_hashtbl, &se->s_common_id, sen);
 
+       /*
+        * Note: once the lock is release, the session might be a G/C
+        * target, therefore keep SE_REMBACK bit set until re-insert.
+        */
        rw_enter(&sh->sh_lock, RW_WRITER);
        rb_tree_remove_node(&sh->sh_tree, sen);
        sh->sh_count--;
        rw_exit(&sh->sh_lock);
 
        /*
-        * New source/destination and hash.  Note that source/destination
-        * are inverted, since we are handling "backwards" entry.
+        * Update the source/destination IDs and rehash.  Note that we are
+        * handling the "backwards" entry, therefore the opposite mapping.
         */
        npf_nat_gettrans(nt, &taddr, &tport);
-       if (di == PFIL_OUT) {
-               /* NPF_NATOUT: source in "forwards" = destination. */
+       switch (ntype) {
+       case NPF_NATOUT:
+               /* Source in "forwards" => destination. */
                memcpy(&sen->se_dst_addr, taddr, sen->se_alen);
-               if (tport) {
+               if (tport)
                        sen->se_dst_id = tport;
-               }
-       } else {
-               /* NPF_NATIN: destination in "forwards" = source. */
+               break;
+       case NPF_NATIN:
+               /* Destination in "forwards" => source. */
                memcpy(&sen->se_src_addr, taddr, sen->se_alen);
-               if (tport) {
+               if (tport)
                        sen->se_src_id = tport;
-               }
+               break;
        }
        sh = sess_hash_bucket(sess_hashtbl, &se->s_common_id, sen);
 
-       /* Insert into the new bucket. */
+       /*
+        * Insert the entry back into a potentially new bucket.
+        *
+        * Note: synchronise with the G/C thread here for a case when the
+        * old session is still being expired while a duplicate is being
+        * created here.  This race condition is rare.
+        */
        rw_enter(&sh->sh_lock, RW_WRITER);
-       ok = (rb_tree_insert_node(&sh->sh_tree, sen) == sen);
+       ok = rb_tree_insert_node(&sh->sh_tree, sen) == sen;
        if (__predict_true(ok)) {
                sh->sh_count++;
                NPF_PRINTF(("NPF: se %p assoc with nat %p\n", se, se->s_nat));
        } else {
-               /* FIXMEgc */
-               printf("npf_session_setnat: Houston, we've had a problem.\n");
+               /* Race: mark a removed entry and explicitly expire. */
+               atomic_or_uint(&se->s_flags, SE_REMBACK | SE_EXPIRE);
+               npf_stats_inc(NPF_STAT_RACE_NAT);
        }
        rw_exit(&sh->sh_lock);
        return ok ? 0 : EISCONN;
@@ -859,7 +864,6 @@
 void
 npf_session_expire(npf_session_t *se)
 {
-
        /* KASSERT(se->s_refcnt > 0); XXX: npf_nat_freepolicy() */
        atomic_or_uint(&se->s_flags, SE_EXPIRE);
 }
@@ -870,7 +874,6 @@
 bool
 npf_session_pass(const npf_session_t *se, npf_rproc_t **rp)
 {
-
        KASSERT(se->s_refcnt > 0);
        if ((se->s_flags & SE_PASS) != 0) {
                *rp = se->s_rproc;
@@ -906,7 +909,6 @@
 void
 npf_session_release(npf_session_t *se)
 {
-
        KASSERT(se->s_refcnt > 0);
        if ((se->s_flags & SE_ACTIVE) == 0) {
                /* Activate: after this point, session is globally visible. */
@@ -922,7 +924,6 @@
 npf_nat_t *
 npf_session_retnat(npf_session_t *se, const int di, bool *forw)
 {
-
        KASSERT(se->s_refcnt > 0);
        *forw = (se->s_flags & PFIL_ALL) == di;
        return se->s_nat;
@@ -967,6 +968,7 @@
                if (sh->sh_count == 0) {
                        continue;
                }
+
                rw_enter(&sh->sh_lock, RW_WRITER);
                /* For each (left -> right) ... */
                sen = rb_tree_iterate(&sh->sh_tree, NULL, RB_DIR_LEFT);
@@ -975,25 +977,27 @@
                        se = sen->se_backptr;
                        nsen = rb_tree_iterate(&sh->sh_tree, sen, RB_DIR_RIGHT);
                        if (!npf_session_expired(se, &tsnow) && !flushall) {
-                               KASSERT((se->s_flags & SE_REMOVING) == 0);
+                               KASSERT((se->s_flags & SE_REMOVED) == 0);
                                sen = nsen;
                                continue;
                        }
 
-                       /* Expired - remove from the tree. */
+                       /* Expired: remove from the tree. */
+                       atomic_or_uint(&se->s_flags, SE_EXPIRE);
                        rb_tree_remove_node(&sh->sh_tree, sen);
                        sh->sh_count--;
 
                        /*
-                        * Set removal bit when the first entry is removed.
-                        * If already set, then second entry has been removed,
-                        * therefore move the session into the G/C list.
+                        * Remove the session and move it to the G/C list,
+                        * if we are removing the forwards entry.  The list
+                        * is protected by its bucket lock.
                         */
-                       if (se->s_flags & SE_REMOVING) {
+                       if (&se->s_forw_entry == sen) {
+                               atomic_or_uint(&se->s_flags, SE_REMFORW);
                                LIST_REMOVE(se, s_list);
                                LIST_INSERT_HEAD(gc_list, se, s_list);
                        } else {
-                               atomic_or_uint(&se->s_flags, SE_REMOVING);
+                               atomic_or_uint(&se->s_flags, SE_REMBACK);
                        }
 
                        /* Next.. */
@@ -1015,9 +1019,11 @@
 
        se = LIST_FIRST(gc_list);
        while (se != NULL) {
+               bool removed = (se->s_flags & SE_REMOVED) == SE_REMOVED;
+
                nse = LIST_NEXT(se, s_list);
-               if (se->s_refcnt == 0) {
-                       /* Destroy only if no references. */
+               if (removed && se->s_refcnt == 0) {
+                       /* Destroy only if removed and no references. */
                        LIST_REMOVE(se, s_list);



Home | Main Index | Thread Index | Old Index