Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/dev/pci/ixgbe ixg(4) supports workqueue poll mode, but n...



details:   https://anonhg.NetBSD.org/src/rev/640e6bf35189
branches:  trunk
changeset: 320832:640e6bf35189
user:      knakahara <knakahara%NetBSD.org@localhost>
date:      Fri Mar 02 10:19:20 2018 +0000

description:
ixg(4) supports workqueue poll mode, but not enabled by default yet. (that is, the default behavior is *not* changed)

At the time of high load near the wire rate, the turnaround time
of update/delete processing such as "ifconfig ixg0 inet XXX" or
"ifconfig ixg0 delete" is very long. The main reason is CPU
starvation caused by ixg(4)'s softint poll mode. ixg(4) uses
workqueue poll mode instead of softint poll mode, so that this
problem will be fix.

This change may cause performance issues, so it is not enabled
by default yet. Although there are that issues, if you want to use
workqueue poll mode, do "sysctl -w hw.ixgXX.txrx_workqueue=1" while
there is no traffic on the ixgXX.

ok by msaitoh@n.o.

diffstat:

 sys/dev/pci/ixgbe/ix_txrx.c |   45 ++++++++++++++++++-
 sys/dev/pci/ixgbe/ixgbe.c   |  100 ++++++++++++++++++++++++++++++++++++++++---
 sys/dev/pci/ixgbe/ixgbe.h   |   20 ++++++++-
 3 files changed, 152 insertions(+), 13 deletions(-)

diffs (truncated from 323 to 300 lines):

diff -r eeaf107a7cc0 -r 640e6bf35189 sys/dev/pci/ixgbe/ix_txrx.c
--- a/sys/dev/pci/ixgbe/ix_txrx.c       Fri Mar 02 08:25:53 2018 +0000
+++ b/sys/dev/pci/ixgbe/ix_txrx.c       Fri Mar 02 10:19:20 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ix_txrx.c,v 1.33 2018/02/26 04:19:00 knakahara Exp $ */
+/* $NetBSD: ix_txrx.c,v 1.34 2018/03/02 10:19:20 knakahara Exp $ */
 
 /******************************************************************************
 
@@ -238,8 +238,26 @@
        if (IXGBE_TX_TRYLOCK(txr)) {
                ixgbe_mq_start_locked(ifp, txr);
                IXGBE_TX_UNLOCK(txr);
-       } else
-               softint_schedule(txr->txr_si);
+       } else {
+               if (adapter->txrx_use_workqueue) {
+                       /*
+                        * This function itself is not called in interrupt
+                        * context, however it can be called in fast softint
+                        * context right after receiving forwarding packets.
+                        * So, it is required to protect workqueue from twice
+                        * enqueuing when the machine uses both spontaneous
+                        * packets and forwarding packets.
+                        */
+                       u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
+                       if (*enqueued == 0) {
+                               *enqueued = 1;
+                               percpu_putref(adapter->txr_wq_enqueued);
+                               workqueue_enqueue(adapter->txr_wq, &txr->wq_cookie, curcpu());
+                       } else
+                               percpu_putref(adapter->txr_wq_enqueued);
+               } else
+                       softint_schedule(txr->txr_si);
+       }
 
        return (0);
 } /* ixgbe_mq_start */
@@ -291,7 +309,8 @@
 /************************************************************************
  * ixgbe_deferred_mq_start
  *
- *   Called from a taskqueue to drain queued transmit packets.
+ *   Called from a softint and workqueue (indirectly) to drain queued
+ *   transmit packets.
  ************************************************************************/
 void
 ixgbe_deferred_mq_start(void *arg)
@@ -307,6 +326,24 @@
 } /* ixgbe_deferred_mq_start */
 
 /************************************************************************
+ * ixgbe_deferred_mq_start_work
+ *
+ *   Called from a workqueue to drain queued transmit packets.
+ ************************************************************************/
+void
+ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
+{
+       struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
+       struct adapter *adapter = txr->adapter;
+       u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
+       *enqueued = 0;
+       percpu_putref(adapter->txr_wq_enqueued);
+
+       ixgbe_deferred_mq_start(txr);
+} /* ixgbe_deferred_mq_start */
+
+
+/************************************************************************
  * ixgbe_xmit
  *
  *   Maps the mbufs to tx descriptors, allowing the
diff -r eeaf107a7cc0 -r 640e6bf35189 sys/dev/pci/ixgbe/ixgbe.c
--- a/sys/dev/pci/ixgbe/ixgbe.c Fri Mar 02 08:25:53 2018 +0000
+++ b/sys/dev/pci/ixgbe/ixgbe.c Fri Mar 02 10:19:20 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.c,v 1.127 2018/02/26 04:19:00 knakahara Exp $ */
+/* $NetBSD: ixgbe.c,v 1.128 2018/03/02 10:19:20 knakahara Exp $ */
 
 /******************************************************************************
 
@@ -260,6 +260,9 @@
 static void    ixgbe_handle_mod(void *);
 static void    ixgbe_handle_phy(void *);
 
+/* Workqueue handler for deferred work */
+static void    ixgbe_handle_que_work(struct work *, void *);
+
 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
 
 /************************************************************************
@@ -315,6 +318,9 @@
 SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN,
     &ixgbe_flow_control, 0, "Default flow control used for all adapters");
 
+/* Which pakcet processing uses workqueue or softint */
+static bool ixgbe_txrx_workqueue = false;
+
 /*
  * Smart speed setting, default to on
  * this only works as a compile option
@@ -395,10 +401,13 @@
 #define IXGBE_MPSAFE           1
 #define IXGBE_CALLOUT_FLAGS    CALLOUT_MPSAFE
 #define IXGBE_SOFTINFT_FLAGS   SOFTINT_MPSAFE
+#define IXGBE_WORKQUEUE_FLAGS  WQ_PERCPU | WQ_MPSAFE
 #else
 #define IXGBE_CALLOUT_FLAGS    0
 #define IXGBE_SOFTINFT_FLAGS   0
+#define IXGBE_WORKQUEUE_FLAGS  WQ_PERCPU
 #endif
+#define IXGBE_WORKQUEUE_PRI PRI_SOFTNET
 
 /************************************************************************
  * ixgbe_initialize_rss_mapping
@@ -2525,9 +2534,30 @@
        rxr->packets = 0;
 
 no_calc:
-       if (more)
-               softint_schedule(que->que_si);
-       else
+       if (more) {
+               if (adapter->txrx_use_workqueue) {
+                       /*
+                        * adapter->que_wq is bound to each CPU instead of
+                        * each NIC queue to reduce workqueue kthread. As we
+                        * should consider about interrupt affinity in this
+                        * function, the workqueue kthread must be WQ_PERCPU.
+                        * If create WQ_PERCPU workqueue kthread for each NIC
+                        * queue, that number of created workqueue kthread is
+                        * (number of used NIC queue) * (number of CPUs) =
+                        * (number of CPUs) ^ 2 most often.
+                        *
+                        * The same NIC queue's interrupts are avoided by
+                        * masking the queue's interrupt. And different
+                        * NIC queue's interrupts use different struct work
+                        * (que->wq_cookie). So, "enqueued flag" to avoid
+                        * twice workqueue_enqueue() is not required .
+                        */
+                       workqueue_enqueue(adapter->que_wq, &que->wq_cookie,
+                           curcpu());
+               } else {
+                       softint_schedule(que->que_si);
+               }
+       } else
                ixgbe_enable_queue(adapter, que->msix);
 
        return 1;
@@ -3100,6 +3130,12 @@
            CTL_EOL) != 0)
                aprint_error_dev(dev, "could not create sysctl\n");
 
+       adapter->txrx_use_workqueue = ixgbe_txrx_workqueue;
+       if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READWRITE,
+           CTLTYPE_BOOL, "txrx_workqueue", SYSCTL_DESCR("Use workqueue for packet processing"),
+           NULL, 0, &adapter->txrx_use_workqueue, 0, CTL_CREATE, CTL_EOL) != 0)
+               aprint_error_dev(dev, "could not create sysctl\n");
+
 #ifdef IXGBE_DEBUG
        /* testing sysctls (for all devices) */
        if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READWRITE,
@@ -3232,6 +3268,12 @@
                if (que->que_si != NULL)
                        softint_disestablish(que->que_si);
        }
+       if (adapter->txr_wq != NULL)
+               workqueue_destroy(adapter->txr_wq);
+       if (adapter->txr_wq_enqueued != NULL)
+               percpu_free(adapter->txr_wq_enqueued, sizeof(u_int));
+       if (adapter->que_wq != NULL)
+               workqueue_destroy(adapter->que_wq);
 
        /* Drain the Link queue */
        if (adapter->link_si != NULL) {
@@ -5800,9 +5842,18 @@
                IXGBE_TX_UNLOCK(txr);
        }
 
-       if (more)
-               softint_schedule(que->que_si);
-       else if (que->res != NULL) {
+       if (more) {
+               if (adapter->txrx_use_workqueue) {
+                       /*
+                        * "enqueued flag" is not required here.
+                        * See ixgbe_msix_que().
+                        */
+                       workqueue_enqueue(adapter->que_wq, &que->wq_cookie,
+                           curcpu());
+               } else {
+                       softint_schedule(que->que_si);
+               }
+       } else if (que->res != NULL) {
                /* Re-enable this interrupt */
                ixgbe_enable_queue(adapter, que->msix);
        } else
@@ -5812,6 +5863,21 @@
 } /* ixgbe_handle_que */
 
 /************************************************************************
+ * ixgbe_handle_que_work
+ ************************************************************************/
+static void
+ixgbe_handle_que_work(struct work *wk, void *context)
+{
+       struct ix_queue *que = container_of(wk, struct ix_queue, wq_cookie);
+
+       /*
+        * "enqueued flag" is not required here.
+        * See ixgbe_msix_que().
+        */
+       ixgbe_handle_que(que);
+}
+
+/************************************************************************
  * ixgbe_allocate_legacy - Setup the Legacy or MSI Interrupt handler
  ************************************************************************/
 static int
@@ -5906,7 +5972,6 @@
        return (0);
 } /* ixgbe_allocate_legacy */
 
-
 /************************************************************************
  * ixgbe_allocate_msix - Setup MSI-X Interrupt resources and handlers
  ************************************************************************/
@@ -5919,6 +5984,7 @@
        pci_chipset_tag_t pc;
        char            intrbuf[PCI_INTRSTR_LEN];
        char            intr_xname[32];
+       char            wqname[MAXCOMLEN];
        const char      *intrstr = NULL;
        int             error, vector = 0;
        int             cpu_id = 0;
@@ -6044,6 +6110,24 @@
                        goto err_out;
                }
        }
+       snprintf(wqname, sizeof(wqname), "%sdeferTx", device_xname(dev));
+       error = workqueue_create(&adapter->txr_wq, wqname,
+           ixgbe_deferred_mq_start_work, adapter, IXGBE_WORKQUEUE_PRI, IPL_NET,
+           IXGBE_WORKQUEUE_FLAGS);
+       if (error) {
+               aprint_error_dev(dev, "couldn't create workqueue for deferred Tx\n");
+               goto err_out;
+       }
+       adapter->txr_wq_enqueued = percpu_alloc(sizeof(u_int));
+
+       snprintf(wqname, sizeof(wqname), "%sTxRx", device_xname(dev));
+       error = workqueue_create(&adapter->que_wq, wqname,
+           ixgbe_handle_que_work, adapter, IXGBE_WORKQUEUE_PRI, IPL_NET,
+           IXGBE_WORKQUEUE_FLAGS);
+       if (error) {
+               aprint_error_dev(dev, "couldn't create workqueue for Tx/Rx\n");
+               goto err_out;
+       }
 
        /* and Link */
        cpu_id++;
diff -r eeaf107a7cc0 -r 640e6bf35189 sys/dev/pci/ixgbe/ixgbe.h
--- a/sys/dev/pci/ixgbe/ixgbe.h Fri Mar 02 08:25:53 2018 +0000
+++ b/sys/dev/pci/ixgbe/ixgbe.h Fri Mar 02 10:19:20 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.h,v 1.31 2018/02/26 04:19:00 knakahara Exp $ */
+/* $NetBSD: ixgbe.h,v 1.32 2018/03/02 10:19:20 knakahara Exp $ */
 
 /******************************************************************************
   SPDX-License-Identifier: BSD-3-Clause
@@ -80,6 +80,7 @@
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/sockio.h>
+#include <sys/percpu.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
@@ -338,6 +339,8 @@
 
        kmutex_t         im_mtx;        /* lock for im_nest and this queue's EIMS/EIMC bit */
        int              im_nest;
+
+       struct work      wq_cookie;
 };
 
 /*
@@ -373,6 +376,8 @@
        struct evcnt            no_desc_avail;
        struct evcnt            total_packets;
        struct evcnt            pcq_drops;
+
+       struct work             wq_cookie;
 };
 
 
@@ -499,6 +504,18 @@
 
        void                    *phy_si;   /* PHY intr tasklet */
 



Home | Main Index | Thread Index | Old Index