Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/dev/pci/ixgbe - Fix a bug that RX may stall on heavy loa...



details:   https://anonhg.NetBSD.org/src/rev/02fa7c7b13a9
branches:  trunk
changeset: 829968:02fa7c7b13a9
user:      msaitoh <msaitoh%NetBSD.org@localhost>
date:      Tue Feb 20 07:24:37 2018 +0000

description:
- Fix a bug that RX may stall on heavy load on ixg(4) derived from FreeBSD's
 AIM (Auto Interrupt Moderation) bug.
 When I use a machine as a NFS client, sometimes one of queue pairs doesn't
 get any interrupt other than every second tick via ixgbe_local_timer1().
 When the problem occured, the queue pair's hw.ixgM.qN.interrupt_rate is
 always 500000. When this problem occuring, set hw.ixgM.qN.interrupt_rate lower
 than 166667 recover from stall. i.e.:

  sysctl -w hw.ixgM.qN.interrupt_rate=166667 (don't revocer)
  sysctl -w hw.ixgM.qN.interrupt_rate=166666 (recover)

  Relatios between the interrupt_rate and EICR's ITR_INTERVAL field is as
 follows:

 int_rate | EICR[11:0]   | interval in us | recover |
          |(ITR_INTERVAL)| (10G and 1G)   |         |
 ---------+--------------+----------------+---------+
   500000 | 0x008(0)     |              2 |     not |
   166667 | 0x010(1)     |              4 |     not |
   166666 | 0x018(2)     |              6 | recover |

  The reason why int_rate becomes 500000 is that xgbe_tx_eof() doesn't
 increment rxr->packets(*1). Even if we fix rxr->packets' bug, interrupt_rate
 might become greater than 166666 and it might cause stall.

  While reading datasheets, knakahara noticed a section titled with "ITR
 Affect on RSC Functionality". It says "When RSC is enabled on specific RX
 queues, the associated ITR interval with these queus must be enabled and must
 be larger (in time uints) than RSC delay". Currently, RSC_DELAY field in the
 GPIE register is 0 and it means 4us for 10G and 1G. The greater ITR_INTERVAL
 value of 4us is 6us == 166666. Yes, BINGO!

  This description is noted in 82599 and newer datasheets and not in 82598
 datasheet. I don't know if 82598 has this limitation but, I apply this
 limitation all of chips.

 (*1) Note that this bug is going to be fixed in the next commit to distinct
 between two different bugs.

- The bitfield of EITR register is different between 82598 and others.
 Only ixgbe_msix_que() taken care of it. Make new function ixgbe_eitr_write()
 and use it in all of functions which modify ITR_INTERVAL.

XXX pullup-8

diffstat:

 sys/dev/pci/ixgbe/ixgbe.c      |  61 +++++++++++++++++++++++++++++++++--------
 sys/dev/pci/ixgbe/ixgbe_type.h |   7 ++++-
 2 files changed, 54 insertions(+), 14 deletions(-)

diffs (142 lines):

diff -r 111fafafe4ca -r 02fa7c7b13a9 sys/dev/pci/ixgbe/ixgbe.c
--- a/sys/dev/pci/ixgbe/ixgbe.c Tue Feb 20 05:10:51 2018 +0000
+++ b/sys/dev/pci/ixgbe/ixgbe.c Tue Feb 20 07:24:37 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.c,v 1.123 2018/02/16 10:11:21 msaitoh Exp $ */
+/* $NetBSD: ixgbe.c,v 1.124 2018/02/20 07:24:37 msaitoh Exp $ */
 
 /******************************************************************************
 
@@ -207,6 +207,7 @@
 static void    ixgbe_set_ivar(struct adapter *, u8, u8, s8);
 static void    ixgbe_configure_ivars(struct adapter *);
 static u8 *    ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
+static void    ixgbe_eitr_write(struct ix_queue *, uint32_t);
 
 static void    ixgbe_setup_vlan_hw_support(struct adapter *);
 #if 0
@@ -2465,8 +2466,7 @@
         *    the last interval.
         */
        if (que->eitr_setting)
-               IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix),
-                   que->eitr_setting);
+               ixgbe_eitr_write(que, que->eitr_setting);
 
        que->eitr_setting = 0;
 
@@ -2489,11 +2489,18 @@
        else
                newitr = (newitr / 2);
 
-        if (adapter->hw.mac.type == ixgbe_mac_82598EB)
-                newitr |= newitr << 16;
-        else
-                newitr |= IXGBE_EITR_CNT_WDIS;
-                 
+       /*
+        * When RSC is used, ITR interval must be larger than RSC_DELAY.
+        * Currently, we use 2us for RSC_DELAY. The minimum value is always
+        * greater than 2us on 100M (and 10M?(not documented)), but it's not
+        * on 1G and higher.
+        */
+       if ((adapter->link_speed != IXGBE_LINK_SPEED_100_FULL)
+           && (adapter->link_speed != IXGBE_LINK_SPEED_10_FULL)) {
+               if (newitr < IXGBE_MIN_RSC_EITR_10G1G)
+                       newitr = IXGBE_MIN_RSC_EITR_10G1G;
+       }
+
         /* save for next interrupt */
         que->eitr_setting = newitr;
 
@@ -2933,6 +2940,21 @@
        return 1;
 } /* ixgbe_msix_link */
 
+static void
+ixgbe_eitr_write(struct ix_queue *que, uint32_t itr)
+{
+       struct adapter *adapter = que->adapter;
+       
+        if (adapter->hw.mac.type == ixgbe_mac_82598EB)
+                itr |= itr << 16;
+        else
+                itr |= IXGBE_EITR_CNT_WDIS;
+
+       IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix),
+           itr);
+}
+
+
 /************************************************************************
  * ixgbe_sysctl_interrupt_rate_handler
  ************************************************************************/
@@ -2941,6 +2963,7 @@
 {
        struct sysctlnode node = *rnode;
        struct ix_queue *que = (struct ix_queue *)node.sysctl_data;
+       struct adapter  *adapter = que->adapter;
        uint32_t reg, usec, rate;
        int error;
 
@@ -2957,14 +2980,26 @@
        if (error || newp == NULL)
                return error;
        reg &= ~0xfff; /* default, no limitation */
-       ixgbe_max_interrupt_rate = 0;
        if (rate > 0 && rate < 500000) {
                if (rate < 1000)
                        rate = 1000;
+               reg |= ((4000000/rate) & 0xff8);
+               /*
+                * When RSC is used, ITR interval must be larger than
+                * RSC_DELAY. Currently, we use 2us for RSC_DELAY.
+                * The minimum value is always greater than 2us on 100M
+                * (and 10M?(not documented)), but it's not on 1G and higher.
+                */
+               if ((adapter->link_speed != IXGBE_LINK_SPEED_100_FULL)
+                   && (adapter->link_speed != IXGBE_LINK_SPEED_10_FULL)) {
+                       if ((adapter->num_queues > 1)
+                           && (reg < IXGBE_MIN_RSC_EITR_10G1G))
+                               return EINVAL;
+               }
                ixgbe_max_interrupt_rate = rate;
-               reg |= ((4000000/rate) & 0xff8);
-       }
-       IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
+       } else
+               ixgbe_max_interrupt_rate = 0;
+       ixgbe_eitr_write(que, reg);
 
        return (0);
 } /* ixgbe_sysctl_interrupt_rate_handler */
@@ -3886,7 +3921,7 @@
                /* ... and the TX */
                ixgbe_set_ivar(adapter, txr->me, que->msix, 1);
                /* Set an Initial EITR value */
-               IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), newitr);
+               ixgbe_eitr_write(que, newitr);
        }
 
        /* For the Link interrupt */
diff -r 111fafafe4ca -r 02fa7c7b13a9 sys/dev/pci/ixgbe/ixgbe_type.h
--- a/sys/dev/pci/ixgbe/ixgbe_type.h    Tue Feb 20 05:10:51 2018 +0000
+++ b/sys/dev/pci/ixgbe/ixgbe_type.h    Tue Feb 20 07:24:37 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe_type.h,v 1.30 2017/12/06 04:08:50 msaitoh Exp $ */
+/* $NetBSD: ixgbe_type.h,v 1.31 2018/02/20 07:24:37 msaitoh Exp $ */
 
 /******************************************************************************
   SPDX-License-Identifier: BSD-3-Clause
@@ -312,6 +312,11 @@
  */
 #define IXGBE_MAX_INT_RATE     488281
 #define IXGBE_MIN_INT_RATE     956
+/* On 82599 and newer, minimum RSC_DELAY is 4us. ITR interval must be larger
+ * than RSC_DELAY if RSC is used. ITR_INTERVAL is in 2(.048) us units on 10G
+ * and 1G. The minimun EITR is 6us.
+ */
+#define IXGBE_MIN_RSC_EITR_10G1G 0x00000018
 #define IXGBE_MAX_EITR         0x00000FF8
 #define IXGBE_MIN_EITR         8
 #define IXGBE_EITR(_i)         (((_i) <= 23) ? (0x00820 + ((_i) * 4)) : \



Home | Main Index | Thread Index | Old Index