Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/netbsd-8]: src/sys/dev/pci/ixgbe Pull up following revision(s) (requeste...



details:   https://anonhg.NetBSD.org/src/rev/5b8ed9ebaee3
branches:  netbsd-8
changeset: 851445:5b8ed9ebaee3
user:      martin <martin%NetBSD.org@localhost>
date:      Tue Mar 06 11:12:40 2018 +0000

description:
Pull up following revision(s) (requested by knakahara in ticket #605):
        sys/dev/pci/ixgbe/ixgbe.h: revision 1.32
        sys/dev/pci/ixgbe/ixgbe.h: revision 1.33
        sys/dev/pci/ixgbe/ix_txrx.c: revision 1.34
        sys/dev/pci/ixgbe/ixgbe.c: revision 1.128
        sys/dev/pci/ixgbe/ixv.c: revision 1.83
        sys/dev/pci/ixgbe/ixv.c: revision 1.84

Add hw.ixvM.q[01].{interrupt_rate,[tr]xd_head,[tr]xd_tail} sysctls as ixg(4).

ixg(4) supports workqueue poll mode, but not enabled by default yet. (that is, the default behavior is *not* changed)

At the time of high load near the wire rate, the turnaround time
of update/delete processing such as "ifconfig ixg0 inet XXX" or
"ifconfig ixg0 delete" is very long. The main reason is CPU
starvation caused by ixg(4)'s softint poll mode. ixg(4) uses
workqueue poll mode instead of softint poll mode, so that this
problem will be fix.

This change may cause performance issues, so it is not enabled
by default yet. Although there are that issues, if you want to use
workqueue poll mode, do "sysctl -w hw.ixgXX.txrx_workqueue=1" while
there is no traffic on the ixgXX.

ok by msaitoh@n.o.

ixv(4) also supports workqueue poll mode, but not enabled by default yet, either.
ok by msaitoh@n.o.

Move the location of "struct work" as FreeBSD's "struct task" in ixgbe.h.
No functional change.

diffstat:

 sys/dev/pci/ixgbe/ix_txrx.c |   45 +++++++-
 sys/dev/pci/ixgbe/ixgbe.c   |  100 +++++++++++++++++-
 sys/dev/pci/ixgbe/ixgbe.h   |   18 +++-
 sys/dev/pci/ixgbe/ixv.c     |  228 +++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 365 insertions(+), 26 deletions(-)

diffs (truncated from 725 to 300 lines):

diff -r 335f5a3440ea -r 5b8ed9ebaee3 sys/dev/pci/ixgbe/ix_txrx.c
--- a/sys/dev/pci/ixgbe/ix_txrx.c       Tue Mar 06 10:59:04 2018 +0000
+++ b/sys/dev/pci/ixgbe/ix_txrx.c       Tue Mar 06 11:12:40 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ix_txrx.c,v 1.24.2.5 2018/03/01 19:02:15 martin Exp $ */
+/* $NetBSD: ix_txrx.c,v 1.24.2.6 2018/03/06 11:12:40 martin Exp $ */
 
 /******************************************************************************
 
@@ -238,8 +238,26 @@
        if (IXGBE_TX_TRYLOCK(txr)) {
                ixgbe_mq_start_locked(ifp, txr);
                IXGBE_TX_UNLOCK(txr);
-       } else
-               softint_schedule(txr->txr_si);
+       } else {
+               if (adapter->txrx_use_workqueue) {
+                       /*
+                        * This function itself is not called in interrupt
+                        * context, however it can be called in fast softint
+                        * context right after receiving forwarding packets.
+                        * So, it is required to protect workqueue from twice
+                        * enqueuing when the machine uses both spontaneous
+                        * packets and forwarding packets.
+                        */
+                       u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
+                       if (*enqueued == 0) {
+                               *enqueued = 1;
+                               percpu_putref(adapter->txr_wq_enqueued);
+                               workqueue_enqueue(adapter->txr_wq, &txr->wq_cookie, curcpu());
+                       } else
+                               percpu_putref(adapter->txr_wq_enqueued);
+               } else
+                       softint_schedule(txr->txr_si);
+       }
 
        return (0);
 } /* ixgbe_mq_start */
@@ -291,7 +309,8 @@
 /************************************************************************
  * ixgbe_deferred_mq_start
  *
- *   Called from a taskqueue to drain queued transmit packets.
+ *   Called from a softint and workqueue (indirectly) to drain queued
+ *   transmit packets.
  ************************************************************************/
 void
 ixgbe_deferred_mq_start(void *arg)
@@ -307,6 +326,24 @@
 } /* ixgbe_deferred_mq_start */
 
 /************************************************************************
+ * ixgbe_deferred_mq_start_work
+ *
+ *   Called from a workqueue to drain queued transmit packets.
+ ************************************************************************/
+void
+ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
+{
+       struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
+       struct adapter *adapter = txr->adapter;
+       u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
+       *enqueued = 0;
+       percpu_putref(adapter->txr_wq_enqueued);
+
+       ixgbe_deferred_mq_start(txr);
+} /* ixgbe_deferred_mq_start */
+
+
+/************************************************************************
  * ixgbe_xmit
  *
  *   Maps the mbufs to tx descriptors, allowing the
diff -r 335f5a3440ea -r 5b8ed9ebaee3 sys/dev/pci/ixgbe/ixgbe.c
--- a/sys/dev/pci/ixgbe/ixgbe.c Tue Mar 06 10:59:04 2018 +0000
+++ b/sys/dev/pci/ixgbe/ixgbe.c Tue Mar 06 11:12:40 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.c,v 1.88.2.11 2018/03/01 19:02:15 martin Exp $ */
+/* $NetBSD: ixgbe.c,v 1.88.2.12 2018/03/06 11:12:41 martin Exp $ */
 
 /******************************************************************************
 
@@ -260,6 +260,9 @@
 static void    ixgbe_handle_mod(void *);
 static void    ixgbe_handle_phy(void *);
 
+/* Workqueue handler for deferred work */
+static void    ixgbe_handle_que_work(struct work *, void *);
+
 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
 
 /************************************************************************
@@ -315,6 +318,9 @@
 SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN,
     &ixgbe_flow_control, 0, "Default flow control used for all adapters");
 
+/* Which pakcet processing uses workqueue or softint */
+static bool ixgbe_txrx_workqueue = false;
+
 /*
  * Smart speed setting, default to on
  * this only works as a compile option
@@ -395,10 +401,13 @@
 #define IXGBE_MPSAFE           1
 #define IXGBE_CALLOUT_FLAGS    CALLOUT_MPSAFE
 #define IXGBE_SOFTINFT_FLAGS   SOFTINT_MPSAFE
+#define IXGBE_WORKQUEUE_FLAGS  WQ_PERCPU | WQ_MPSAFE
 #else
 #define IXGBE_CALLOUT_FLAGS    0
 #define IXGBE_SOFTINFT_FLAGS   0
+#define IXGBE_WORKQUEUE_FLAGS  WQ_PERCPU
 #endif
+#define IXGBE_WORKQUEUE_PRI PRI_SOFTNET
 
 /************************************************************************
  * ixgbe_initialize_rss_mapping
@@ -2525,9 +2534,30 @@
        rxr->packets = 0;
 
 no_calc:
-       if (more)
-               softint_schedule(que->que_si);
-       else
+       if (more) {
+               if (adapter->txrx_use_workqueue) {
+                       /*
+                        * adapter->que_wq is bound to each CPU instead of
+                        * each NIC queue to reduce workqueue kthread. As we
+                        * should consider about interrupt affinity in this
+                        * function, the workqueue kthread must be WQ_PERCPU.
+                        * If create WQ_PERCPU workqueue kthread for each NIC
+                        * queue, that number of created workqueue kthread is
+                        * (number of used NIC queue) * (number of CPUs) =
+                        * (number of CPUs) ^ 2 most often.
+                        *
+                        * The same NIC queue's interrupts are avoided by
+                        * masking the queue's interrupt. And different
+                        * NIC queue's interrupts use different struct work
+                        * (que->wq_cookie). So, "enqueued flag" to avoid
+                        * twice workqueue_enqueue() is not required .
+                        */
+                       workqueue_enqueue(adapter->que_wq, &que->wq_cookie,
+                           curcpu());
+               } else {
+                       softint_schedule(que->que_si);
+               }
+       } else
                ixgbe_enable_queue(adapter, que->msix);
 
        return 1;
@@ -3100,6 +3130,12 @@
            CTL_EOL) != 0)
                aprint_error_dev(dev, "could not create sysctl\n");
 
+       adapter->txrx_use_workqueue = ixgbe_txrx_workqueue;
+       if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READWRITE,
+           CTLTYPE_BOOL, "txrx_workqueue", SYSCTL_DESCR("Use workqueue for packet processing"),
+           NULL, 0, &adapter->txrx_use_workqueue, 0, CTL_CREATE, CTL_EOL) != 0)
+               aprint_error_dev(dev, "could not create sysctl\n");
+
 #ifdef IXGBE_DEBUG
        /* testing sysctls (for all devices) */
        if (sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_READWRITE,
@@ -3232,6 +3268,12 @@
                if (que->que_si != NULL)
                        softint_disestablish(que->que_si);
        }
+       if (adapter->txr_wq != NULL)
+               workqueue_destroy(adapter->txr_wq);
+       if (adapter->txr_wq_enqueued != NULL)
+               percpu_free(adapter->txr_wq_enqueued, sizeof(u_int));
+       if (adapter->que_wq != NULL)
+               workqueue_destroy(adapter->que_wq);
 
        /* Drain the Link queue */
        if (adapter->link_si != NULL) {
@@ -5800,9 +5842,18 @@
                IXGBE_TX_UNLOCK(txr);
        }
 
-       if (more)
-               softint_schedule(que->que_si);
-       else if (que->res != NULL) {
+       if (more) {
+               if (adapter->txrx_use_workqueue) {
+                       /*
+                        * "enqueued flag" is not required here.
+                        * See ixgbe_msix_que().
+                        */
+                       workqueue_enqueue(adapter->que_wq, &que->wq_cookie,
+                           curcpu());
+               } else {
+                       softint_schedule(que->que_si);
+               }
+       } else if (que->res != NULL) {
                /* Re-enable this interrupt */
                ixgbe_enable_queue(adapter, que->msix);
        } else
@@ -5812,6 +5863,21 @@
 } /* ixgbe_handle_que */
 
 /************************************************************************
+ * ixgbe_handle_que_work
+ ************************************************************************/
+static void
+ixgbe_handle_que_work(struct work *wk, void *context)
+{
+       struct ix_queue *que = container_of(wk, struct ix_queue, wq_cookie);
+
+       /*
+        * "enqueued flag" is not required here.
+        * See ixgbe_msix_que().
+        */
+       ixgbe_handle_que(que);
+}
+
+/************************************************************************
  * ixgbe_allocate_legacy - Setup the Legacy or MSI Interrupt handler
  ************************************************************************/
 static int
@@ -5906,7 +5972,6 @@
        return (0);
 } /* ixgbe_allocate_legacy */
 
-
 /************************************************************************
  * ixgbe_allocate_msix - Setup MSI-X Interrupt resources and handlers
  ************************************************************************/
@@ -5919,6 +5984,7 @@
        pci_chipset_tag_t pc;
        char            intrbuf[PCI_INTRSTR_LEN];
        char            intr_xname[32];
+       char            wqname[MAXCOMLEN];
        const char      *intrstr = NULL;
        int             error, vector = 0;
        int             cpu_id = 0;
@@ -6044,6 +6110,24 @@
                        goto err_out;
                }
        }
+       snprintf(wqname, sizeof(wqname), "%sdeferTx", device_xname(dev));
+       error = workqueue_create(&adapter->txr_wq, wqname,
+           ixgbe_deferred_mq_start_work, adapter, IXGBE_WORKQUEUE_PRI, IPL_NET,
+           IXGBE_WORKQUEUE_FLAGS);
+       if (error) {
+               aprint_error_dev(dev, "couldn't create workqueue for deferred Tx\n");
+               goto err_out;
+       }
+       adapter->txr_wq_enqueued = percpu_alloc(sizeof(u_int));
+
+       snprintf(wqname, sizeof(wqname), "%sTxRx", device_xname(dev));
+       error = workqueue_create(&adapter->que_wq, wqname,
+           ixgbe_handle_que_work, adapter, IXGBE_WORKQUEUE_PRI, IPL_NET,
+           IXGBE_WORKQUEUE_FLAGS);
+       if (error) {
+               aprint_error_dev(dev, "couldn't create workqueue for Tx/Rx\n");
+               goto err_out;
+       }
 
        /* and Link */
        cpu_id++;
diff -r 335f5a3440ea -r 5b8ed9ebaee3 sys/dev/pci/ixgbe/ixgbe.h
--- a/sys/dev/pci/ixgbe/ixgbe.h Tue Mar 06 10:59:04 2018 +0000
+++ b/sys/dev/pci/ixgbe/ixgbe.h Tue Mar 06 11:12:40 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.h,v 1.24.6.4 2018/03/01 19:02:15 martin Exp $ */
+/* $NetBSD: ixgbe.h,v 1.24.6.5 2018/03/06 11:12:40 martin Exp $ */
 
 /******************************************************************************
   SPDX-License-Identifier: BSD-3-Clause
@@ -80,6 +80,7 @@
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/sockio.h>
+#include <sys/percpu.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
@@ -331,6 +332,7 @@
        int              busy;
        struct tx_ring   *txr;
        struct rx_ring   *rxr;
+       struct work      wq_cookie;
        void             *que_si;
        struct evcnt     irqs;
        char             namebuf[32];
@@ -360,6 +362,7 @@
        ixgbe_dma_tag_t         *txtag;
        char                    mtx_name[16];
        pcq_t                   *txr_interq;
+       struct work             wq_cookie;
        void                    *txr_si;
 
        /* Flow Director */
@@ -499,6 +502,18 @@
 
        void                    *phy_si;   /* PHY intr tasklet */
 
+       bool                    txrx_use_workqueue;
+       struct workqueue        *que_wq;    /* workqueue for ixgbe_handle_que_work() */



Home | Main Index | Thread Index | Old Index