Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/net/npf This patches ditches the ptree(3) library, becau...



details:   https://anonhg.NetBSD.org/src/rev/95ab67291ca6
branches:  trunk
changeset: 349372:95ab67291ca6
user:      christos <christos%NetBSD.org@localhost>
date:      Fri Dec 09 02:40:38 2016 +0000

description:
This patches ditches the ptree(3) library, because it is broken (you
can get missing entries!).  Instead, as a temporary solution, we switch
to a simple linear scan of the hash tables for the longest-prefix-match
(lpm.c lpm.h) algorithm. In fact, with few unique prefixes in the set,
on modern hardware this simple algorithm is pretty fast anyway!

diffstat:

 sys/net/npf/files.npf            |    6 +-
 sys/net/npf/lpm.c                |  401 +++++++++++++++++++++++++++++++++++++++
 sys/net/npf/lpm.h                |   46 ++++
 sys/net/npf/npf_impl.h           |    5 +-
 sys/net/npf/npf_tableset.c       |  125 +++++------
 sys/net/npf/npf_tableset_ptree.c |  183 -----------------
 6 files changed, 506 insertions(+), 260 deletions(-)

diffs (truncated from 1002 to 300 lines):

diff -r 17a1916c6af8 -r 95ab67291ca6 sys/net/npf/files.npf
--- a/sys/net/npf/files.npf     Fri Dec 09 02:38:14 2016 +0000
+++ b/sys/net/npf/files.npf     Fri Dec 09 02:40:38 2016 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: files.npf,v 1.17 2014/07/19 18:24:16 rmind Exp $
+# $NetBSD: files.npf,v 1.18 2016/12/09 02:40:38 christos Exp $
 #
 # Public Domain.
 #
@@ -19,7 +19,6 @@
 file   net/npf/npf_ruleset.c                   npf
 file   net/npf/npf_rproc.c                     npf
 file   net/npf/npf_tableset.c                  npf
-file   net/npf/npf_tableset_ptree.c            npf
 file   net/npf/npf_if.c                        npf
 file   net/npf/npf_inet.c                      npf
 file   net/npf/npf_conn.c                      npf
@@ -31,6 +30,9 @@
 file   net/npf/npf_sendpkt.c                   npf
 file   net/npf/npf_worker.c                    npf
 
+# LPM
+file   net/npf/lpm.c                           npf
+
 # Built-in extensions.
 file   net/npf/npf_ext_log.c                   npf
 file   net/npf/npf_ext_normalize.c             npf
diff -r 17a1916c6af8 -r 95ab67291ca6 sys/net/npf/lpm.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/lpm.c Fri Dec 09 02:40:38 2016 +0000
@@ -0,0 +1,401 @@
+/*-
+ * Copyright (c) 2016 Mindaugas Rasiukevicius <rmind at noxt eu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * TODO: Simple linear scan for now (works just well with a few prefixes).
+ * TBD on a better algorithm.
+ */
+
+#if defined(_KERNEL)
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: lpm.c,v 1.1 2016/12/09 02:40:38 christos Exp $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/kmem.h>
+#else
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include <strings.h>
+#include <errno.h>
+#include <assert.h>
+#define kmem_alloc(a, b) malloc(a)
+#define kmem_free(a, b) free(a)
+#define kmem_zalloc(a, b) calloc(a, 1)
+#endif
+
+#include "lpm.h"
+
+#define        LPM_MAX_PREFIX          (128)
+#define        LPM_MAX_WORDS           (LPM_MAX_PREFIX >> 5)
+#define        LPM_TO_WORDS(x)         ((x) >> 2)
+#define        LPM_HASH_STEP           (8)
+
+#ifdef DEBUG
+#define        ASSERT  assert
+#else
+#define        ASSERT
+#endif
+
+typedef struct lpm_ent {
+       struct lpm_ent *next;
+       void *          val;
+       unsigned        len;
+       uint8_t         key[];
+} lpm_ent_t;
+
+typedef struct {
+       uint32_t        hashsize;
+       uint32_t        nitems;
+       lpm_ent_t **bucket;
+} lpm_hmap_t;
+
+struct lpm {
+       uint32_t        bitmask[LPM_MAX_WORDS];
+       void *          defval;
+       lpm_hmap_t      prefix[LPM_MAX_PREFIX + 1];
+};
+
+lpm_t *
+lpm_create(void)
+{
+       return kmem_zalloc(sizeof(lpm_t), KM_SLEEP);
+}
+
+void
+lpm_clear(lpm_t *lpm, lpm_dtor_t dtor, void *arg)
+{
+       for (unsigned n = 0; n <= LPM_MAX_PREFIX; n++) {
+               lpm_hmap_t *hmap = &lpm->prefix[n];
+
+               if (!hmap->hashsize) {
+                       KASSERT(!hmap->bucket);
+                       continue;
+               }
+               for (unsigned i = 0; i < hmap->hashsize; i++) {
+                       lpm_ent_t *entry = hmap->bucket[i];
+
+                       while (entry) {
+                               lpm_ent_t *next = entry->next;
+
+                               if (dtor) {
+                                       dtor(arg, entry->key,
+                                           entry->len, entry->val);
+                               }
+                               kmem_free(entry, 
+                                   offsetof(lpm_ent_t, key[entry->len]));
+                               entry = next;
+                       }
+               }
+               kmem_free(hmap->bucket, hmap->hashsize);
+               hmap->bucket = NULL;
+               hmap->hashsize = 0;
+               hmap->nitems = 0;
+       }
+       memset(lpm->bitmask, 0, sizeof(lpm->bitmask));
+       lpm->defval = NULL;
+}
+
+void
+lpm_destroy(lpm_t *lpm)
+{
+       lpm_clear(lpm, NULL, NULL);
+       kmem_free(lpm, sizeof(*lpm));
+}
+
+/*
+ * fnv1a_hash: Fowler-Noll-Vo hash function (FNV-1a variant).
+ */
+static uint32_t
+fnv1a_hash(const void *buf, size_t len)
+{
+       uint32_t hash = 2166136261UL;
+       const uint8_t *p = buf;
+
+       while (len--) {
+               hash ^= *p++;
+               hash *= 16777619U;
+       }
+       return hash;
+}
+
+static bool
+hashmap_rehash(lpm_hmap_t *hmap, uint32_t size)
+{
+       lpm_ent_t **bucket;
+       uint32_t hashsize;
+
+       for (hashsize = 1; hashsize < size; hashsize <<= 1) {
+               continue;
+       }
+       bucket = kmem_zalloc(hashsize * sizeof(*bucket), KM_SLEEP);
+       if (bucket == NULL)
+               return false;
+       for (unsigned n = 0; n < hmap->hashsize; n++) {
+               lpm_ent_t *list = hmap->bucket[n];
+
+               while (list) {
+                       lpm_ent_t *entry = list;
+                       uint32_t hash = fnv1a_hash(entry->key, entry->len);
+                       const size_t i = hash & (hashsize - 1);
+
+                       list = entry->next;
+                       entry->next = bucket[i];
+                       bucket[i] = entry;
+               }
+       }
+       if (hmap->bucket)
+               kmem_free(hmap->bucket, hmap->hashsize);
+       hmap->bucket = bucket;
+       hmap->hashsize = hashsize;
+       return true;
+}
+
+static lpm_ent_t *
+hashmap_insert(lpm_hmap_t *hmap, const void *key, size_t len)
+{
+       const uint32_t target = hmap->nitems + LPM_HASH_STEP;
+       const size_t entlen = offsetof(lpm_ent_t, key[len]);
+       uint32_t hash, i;
+       lpm_ent_t *entry;
+
+       if (hmap->hashsize < target && !hashmap_rehash(hmap, target)) {
+               return NULL;
+       }
+
+       hash = fnv1a_hash(key, len);
+       i = hash & (hmap->hashsize - 1);
+       entry = hmap->bucket[i];
+       while (entry) {
+               if (entry->len == len && memcmp(entry->key, key, len) == 0) {
+                       return entry;
+               }
+               entry = entry->next;
+       }
+
+       if ((entry = kmem_alloc(entlen, KM_SLEEP)) == NULL)
+               return NULL;
+
+       memcpy(entry->key, key, len);
+       entry->next = hmap->bucket[i];
+       entry->len = len;
+
+       hmap->bucket[i] = entry;
+       hmap->nitems++;
+       return entry;
+}
+
+static lpm_ent_t *
+hashmap_lookup(lpm_hmap_t *hmap, const void *key, size_t len)
+{
+       const uint32_t hash = fnv1a_hash(key, len);
+       const uint32_t i = hash & (hmap->hashsize - 1);
+       lpm_ent_t *entry = hmap->bucket[i];
+
+       while (entry) {
+               if (entry->len == len && memcmp(entry->key, key, len) == 0) {
+                       return entry;
+               }
+               entry = entry->next;
+       }
+       return NULL;
+}
+
+static int
+hashmap_remove(lpm_hmap_t *hmap, const void *key, size_t len)
+{
+       const uint32_t hash = fnv1a_hash(key, len);
+       const uint32_t i = hash & (hmap->hashsize - 1);
+       lpm_ent_t *prev = NULL, *entry = hmap->bucket[i];
+
+       while (entry) {
+               if (entry->len == len && memcmp(entry->key, key, len) == 0) {
+                       if (prev) {
+                               prev->next = entry->next;
+                       } else {
+                               hmap->bucket[i] = entry->next;
+                       }
+                       free(entry, M_TEMP);
+                       return 0;
+               }
+               prev = entry;
+               entry = entry->next;
+       }
+       return -1;
+}
+
+/*
+ * compute_prefix: given the address and prefix length, compute and
+ * return the address prefix.
+ */
+static inline void
+compute_prefix(const unsigned nwords, const uint32_t *addr,
+    unsigned preflen, uint32_t *prefix)
+{
+       uint32_t addr2[4];
+
+       if ((uintptr_t)addr & 3) {
+               /* Unaligned address: just copy for now. */
+               memcpy(addr2, addr, nwords * 4);



Home | Main Index | Thread Index | Old Index