Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/netinet improve fast-forward performance when the number...



details:   https://anonhg.NetBSD.org/src/rev/16e69320e4b7
branches:  trunk
changeset: 346758:16e69320e4b7
user:      knakahara <knakahara%NetBSD.org@localhost>
date:      Mon Aug 01 10:22:53 2016 +0000

description:
improve fast-forward performance when the number of flows exceeds IPFLOW_MAX.

In the fast-forward case, when the number of flows exceeds IPFLOW_MAX, the
performmance degraded to about 50% compared to the case less than IPFLOW_MAX
flows. This modification suppresses the degradation to 65%. Furthermore,
the modified kernel is about the same performance as the original kernel
when the number of flows is less than IPFLOW_MAX.

The original patch is implemented by ryo@n.o. Thanks.

diffstat:

 sys/netinet/ip_flow.c |  94 +++++++++++++++++++++++++++++++++-----------------
 sys/netinet/ip_var.h  |   7 ++-
 2 files changed, 65 insertions(+), 36 deletions(-)

diffs (232 lines):

diff -r 6122fe1c23ff -r 16e69320e4b7 sys/netinet/ip_flow.c
--- a/sys/netinet/ip_flow.c     Mon Aug 01 04:37:53 2016 +0000
+++ b/sys/netinet/ip_flow.c     Mon Aug 01 10:22:53 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: ip_flow.c,v 1.75 2016/07/27 04:23:42 knakahara Exp $   */
+/*     $NetBSD: ip_flow.c,v 1.76 2016/08/01 10:22:53 knakahara Exp $   */
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_flow.c,v 1.75 2016/07/27 04:23:42 knakahara Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_flow.c,v 1.76 2016/08/01 10:22:53 knakahara Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -69,7 +69,7 @@
 
 static struct pool ipflow_pool;
 
-LIST_HEAD(ipflowhead, ipflow);
+TAILQ_HEAD(ipflowhead, ipflow);
 
 #define        IPFLOW_TIMER            (5 * PR_SLOWHZ)
 #define        IPFLOW_DEFAULT_HASHSIZE (1 << IPFLOW_HASHBITS)
@@ -86,16 +86,17 @@
 static struct ipflowhead ipflowlist;
 static int ipflow_inuse;
 
-#define        IPFLOW_INSERT(bucket, ipf) \
+#define        IPFLOW_INSERT(hashidx, ipf) \
 do { \
-       LIST_INSERT_HEAD((bucket), (ipf), ipf_hash); \
-       LIST_INSERT_HEAD(&ipflowlist, (ipf), ipf_list); \
+       (ipf)->ipf_hashidx = (hashidx); \
+       TAILQ_INSERT_HEAD(&ipflowtable[(hashidx)], (ipf), ipf_hash); \
+       TAILQ_INSERT_HEAD(&ipflowlist, (ipf), ipf_list); \
 } while (/*CONSTCOND*/ 0)
 
-#define        IPFLOW_REMOVE(ipf) \
+#define        IPFLOW_REMOVE(hashidx, ipf) \
 do { \
-       LIST_REMOVE((ipf), ipf_hash); \
-       LIST_REMOVE((ipf), ipf_list); \
+       TAILQ_REMOVE(&ipflowtable[(hashidx)], (ipf), ipf_hash); \
+       TAILQ_REMOVE(&ipflowlist, (ipf), ipf_list); \
 } while (/*CONSTCOND*/ 0)
 
 #ifndef IPFLOW_MAX
@@ -135,7 +136,7 @@
 
        hash = ipflow_hash(ip);
 
-       LIST_FOREACH(ipf, &ipflowtable[hash], ipf_hash) {
+       TAILQ_FOREACH(ipf, &ipflowtable[hash], ipf_hash) {
                if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr
                    && ip->ip_src.s_addr == ipf->ipf_src.s_addr
                    && ip->ip_tos == ipf->ipf_tos)
@@ -172,9 +173,9 @@
        ipflowtable = new_table;
        ip_hashsize = table_size;
 
-       LIST_INIT(&ipflowlist);
+       TAILQ_INIT(&ipflowlist);
        for (i = 0; i < ip_hashsize; i++)
-               LIST_INIT(&ipflowtable[i]);
+               TAILQ_INIT(&ipflowtable[i]);
 
        return 0;
 }
@@ -328,6 +329,18 @@
         * Send the packet on its way.  All we can get back is ENOBUFS
         */
        ipf->ipf_uses++;
+
+#if 0
+       /*
+        * Sorting list is too heavy for fast path(packet processing path).
+        * It degrades about 10% performance. So, we does not sort ipflowtable,
+        * and then we use FIFO cache replacement instead fo LRU.
+        */
+       /* move to head (LRU) for ipflowlist. ipflowtable ooes not care LRU. */
+       TAILQ_REMOVE(&ipflowlist, ipf, ipf_list);
+       TAILQ_INSERT_HEAD(&ipflowlist, ipf, ipf_list);
+#endif
+
        PRT_SLOW_ARM(ipf->ipf_timer, IPFLOW_TIMER);
 
        if (rt->rt_flags & RTF_GATEWAY)
@@ -375,7 +388,7 @@
         * Once it's off the list, we can deal with it at normal
         * network IPL.
         */
-       IPFLOW_REMOVE(ipf);
+       IPFLOW_REMOVE(ipf->ipf_hashidx, ipf);
 
        ipflow_addstats(ipf);
        rtcache_free(&ipf->ipf_ro);
@@ -386,14 +399,34 @@
 static struct ipflow *
 ipflow_reap(bool just_one)
 {
+       struct ipflow *ipf;
 
        KASSERT(mutex_owned(&ipflow_lock));
 
-       while (just_one || ipflow_inuse > ip_maxflows) {
-               struct ipflow *ipf, *maybe_ipf = NULL;
+       /*
+        * This case must remove one ipflow. Furthermore, this case is used in
+        * fast path(packet processing path). So, simply remove TAILQ_LAST one.
+        */
+       if (just_one) {
+               ipf = TAILQ_LAST(&ipflowlist, ipflowhead);
+               KASSERT(ipf != NULL);
+
+               IPFLOW_REMOVE(ipf->ipf_hashidx, ipf);
 
-               ipf = LIST_FIRST(&ipflowlist);
-               while (ipf != NULL) {
+               ipflow_addstats(ipf);
+               rtcache_free(&ipf->ipf_ro);
+               return ipf;
+       }
+
+       /*
+        * This case is used in slow path(sysctl).
+        * At first, remove invalid rtcache ipflow, and then remove TAILQ_LAST
+        * ipflow if it is ensured least recently used by comparing last_uses.
+        */
+       while (ipflow_inuse > ip_maxflows) {
+               struct ipflow *maybe_ipf = TAILQ_LAST(&ipflowlist, ipflowhead);
+
+               TAILQ_FOREACH(ipf, &ipflowlist, ipf_list) {
                        /*
                         * If this no longer points to a valid route
                         * reclaim it.
@@ -405,26 +438,21 @@
                         * used or has had the least uses in the
                         * last 1.5 intervals.
                         */
-                       if (maybe_ipf == NULL ||
-                           ipf->ipf_timer < maybe_ipf->ipf_timer ||
-                           (ipf->ipf_timer == maybe_ipf->ipf_timer &&
-                            ipf->ipf_last_uses + ipf->ipf_uses <
-                                maybe_ipf->ipf_last_uses +
-                                maybe_ipf->ipf_uses))
+                       if (ipf->ipf_timer < maybe_ipf->ipf_timer
+                           || ((ipf->ipf_timer == maybe_ipf->ipf_timer)
+                               && (ipf->ipf_last_uses + ipf->ipf_uses
+                                   < maybe_ipf->ipf_last_uses + maybe_ipf->ipf_uses)))
                                maybe_ipf = ipf;
-                       ipf = LIST_NEXT(ipf, ipf_list);
                }
                ipf = maybe_ipf;
            done:
                /*
                 * Remove the entry from the flow table.
                 */
-               IPFLOW_REMOVE(ipf);
+               IPFLOW_REMOVE(ipf->ipf_hashidx, ipf);
 
                ipflow_addstats(ipf);
                rtcache_free(&ipf->ipf_ro);
-               if (just_one)
-                       return ipf;
                pool_put(&ipflow_pool, ipf);
                ipflow_inuse--;
        }
@@ -446,8 +474,8 @@
        mutex_enter(softnet_lock);
        mutex_enter(&ipflow_lock);
        KERNEL_LOCK(1, NULL);
-       for (ipf = LIST_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) {
-               next_ipf = LIST_NEXT(ipf, ipf_list);
+       for (ipf = TAILQ_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) {
+               next_ipf = TAILQ_NEXT(ipf, ipf_list);
                if (PRT_SLOW_ISEXPIRED(ipf->ipf_timer) ||
                    (rt = rtcache_validate(&ipf->ipf_ro)) == NULL) {
                        ipflow_free(ipf);
@@ -514,7 +542,7 @@
                }
                memset(ipf, 0, sizeof(*ipf));
        } else {
-               IPFLOW_REMOVE(ipf);
+               IPFLOW_REMOVE(ipf->ipf_hashidx, ipf);
 
                ipflow_addstats(ipf);
                rtcache_free(&ipf->ipf_ro);
@@ -535,7 +563,7 @@
         * Insert into the approriate bucket of the flow table.
         */
        hash = ipflow_hash(ip);
-       IPFLOW_INSERT(&ipflowtable[hash], ipf);
+       IPFLOW_INSERT(hash, ipf);
 
  out:
        KERNEL_UNLOCK_ONE(NULL);
@@ -552,8 +580,8 @@
 
        mutex_enter(&ipflow_lock);
 
-       for (ipf = LIST_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) {
-               next_ipf = LIST_NEXT(ipf, ipf_list);
+       for (ipf = TAILQ_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) {
+               next_ipf = TAILQ_NEXT(ipf, ipf_list);
                ipflow_free(ipf);
        }
 
diff -r 6122fe1c23ff -r 16e69320e4b7 sys/netinet/ip_var.h
--- a/sys/netinet/ip_var.h      Mon Aug 01 04:37:53 2016 +0000
+++ b/sys/netinet/ip_var.h      Mon Aug 01 10:22:53 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: ip_var.h,v 1.114 2016/06/21 03:28:27 ozaki-r Exp $     */
+/*     $NetBSD: ip_var.h,v 1.115 2016/08/01 10:22:53 knakahara Exp $   */
 
 /*
  * Copyright (c) 1982, 1986, 1993
@@ -52,8 +52,9 @@
  * IP Flow structure
  */
 struct ipflow {
-       LIST_ENTRY(ipflow) ipf_list;    /* next in active list */
-       LIST_ENTRY(ipflow) ipf_hash;    /* next ipflow in bucket */
+       TAILQ_ENTRY(ipflow) ipf_list;   /* next in active list */
+       TAILQ_ENTRY(ipflow) ipf_hash;   /* next ipflow in bucket */
+       size_t ipf_hashidx;             /* own hash index of ipflowtable[] */
        struct in_addr ipf_dst;         /* destination address */
        struct in_addr ipf_src;         /* source address */
        uint8_t ipf_tos;                /* type-of-service */



Home | Main Index | Thread Index | Old Index