Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/dev/pci/ixgbe Fix a bug that an mbuf chain which has mor...



details:   https://anonhg.NetBSD.org/src/rev/d87087e1d393
branches:  trunk
changeset: 1023579:d87087e1d393
user:      msaitoh <msaitoh%NetBSD.org@localhost>
date:      Thu Sep 16 09:55:28 2021 +0000

description:
Fix a bug that an mbuf chain which has more than 63488bytes MAY fail on TX.

 - Currently, The TX buffer dmamap's max number of segments is set to 32.
   MCLBYTES(== 2048) * 32 = 65536 and it's enough for IP_MAXPACKET(65535).
   If an mbuf chain has more than 32 mbufs, we call m_defrag() to make
   it lower than equal to 32, but it might not work. The reason is that
   our m_defrag() don't modify the first mbuf entry of the chain. e.g.:
   if an mbuf chain contains 63600bytes data and the first mbuf has 100bytes
   in the m_data, the new chain has 100+2048+2048+...+12 and the total number
   of the chain is not 32 but 33. It result in 43 TCP packets will drop.
 - One of the way to fix this problem is to change m_defrag() which add a
   new mbuf cluster to the first mbuf. It's need discussion.
 - Another solution is to change the max number of the TX DMA segment.
   It should be at least 33 to avoid the m_defrag()'s current limitation.
   The document (82599-X550 DS 7.2.1.1 "Transmit Storage in system Memory")
   says that a packet can be fragmented into 40 - WTHRESH - 2 (for 82598,
   the document say nothing and some people and code says it's unlimited).
   Currently WTHRESH is set to 8. 40 - 8 - 2 = 30. !?!?!? {Net,Free,Open}BSD
   and Linux use 32. Is that safe? Anyway, we change WTHRESH from 8 to 5
   to fit it. The added comment in ixgbe.h is based on DragonFly's
   though they don't use WTHRESH.
 - Yet another solution is to use the Tx Head Pointer Write Back function
   instead of WTHRESH based write back (see 82599-X550 DS 7.2.3.5.2
   "Tx Head Pointer Write Back" or 82598 "Transmit Completions Head Write Back"
   ). DragonFly, illumos and DPDK use it.
 - Yet yet another solution is to add tso_maxsize entry to struct ifnet
   and use it in the TCP stack.

diffstat:

 sys/dev/pci/ixgbe/ixgbe.c |  15 ++++++++-------
 sys/dev/pci/ixgbe/ixgbe.h |  21 ++++++++++++++++++---
 sys/dev/pci/ixgbe/ixv.c   |   9 +++++----
 3 files changed, 31 insertions(+), 14 deletions(-)

diffs (123 lines):

diff -r 14c2813808b2 -r d87087e1d393 sys/dev/pci/ixgbe/ixgbe.c
--- a/sys/dev/pci/ixgbe/ixgbe.c Thu Sep 16 08:33:24 2021 +0000
+++ b/sys/dev/pci/ixgbe/ixgbe.c Thu Sep 16 09:55:28 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.c,v 1.291 2021/09/16 07:08:26 msaitoh Exp $ */
+/* $NetBSD: ixgbe.c,v 1.292 2021/09/16 09:55:28 msaitoh Exp $ */
 
 /******************************************************************************
 
@@ -64,7 +64,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ixgbe.c,v 1.291 2021/09/16 07:08:26 msaitoh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ixgbe.c,v 1.292 2021/09/16 09:55:28 msaitoh Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_inet.h"
@@ -927,11 +927,12 @@
        hw->allow_unsupported_sfp = allow_unsupported_sfp;
 
        /* Pick up the 82599 settings */
-       if (hw->mac.type != ixgbe_mac_82598EB) {
+       if (hw->mac.type != ixgbe_mac_82598EB)
                hw->phy.smart_speed = ixgbe_smart_speed;
-               adapter->num_segs = IXGBE_82599_SCATTER;
-       } else
-               adapter->num_segs = IXGBE_82598_SCATTER;
+
+       /* Set the right number of segments */
+       KASSERT(IXGBE_82599_SCATTER_MAX >= IXGBE_SCATTER_DEFAULT);
+       adapter->num_segs = IXGBE_SCATTER_DEFAULT;
 
        /* Ensure SW/FW semaphore is free */
        ixgbe_init_swfw_semaphore(hw);
@@ -4102,7 +4103,7 @@
                txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txr->me));
                txdctl |= IXGBE_TXDCTL_ENABLE;
                /* Set WTHRESH to 8, burst writeback */
-               txdctl |= (8 << IXGBE_TXDCTL_WTHRESH_SHIFT);
+               txdctl |= IXGBE_TX_WTHRESH << IXGBE_TXDCTL_WTHRESH_SHIFT;
                /*
                 * When the internal queue falls below PTHRESH (32),
                 * start prefetching as long as there are at least
diff -r 14c2813808b2 -r d87087e1d393 sys/dev/pci/ixgbe/ixgbe.h
--- a/sys/dev/pci/ixgbe/ixgbe.h Thu Sep 16 08:33:24 2021 +0000
+++ b/sys/dev/pci/ixgbe/ixgbe.h Thu Sep 16 09:55:28 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.h,v 1.80 2021/09/03 08:57:58 msaitoh Exp $ */
+/* $NetBSD: ixgbe.h,v 1.81 2021/09/16 09:55:28 msaitoh Exp $ */
 
 /******************************************************************************
   SPDX-License-Identifier: BSD-3-Clause
@@ -185,6 +185,23 @@
  */
 #define IXGBE_RX_COPY_LEN_MAX     (MHLEN - ETHER_ALIGN)
 
+/*
+ * Default TX WTHRESH value.
+ * Currently, we don't use the Tx Head Pointer Write Back function.
+ */
+#define IXGBE_TX_WTHRESH       5
+
+/*
+ * The max number of descriptors that one packet can use is 40 - WTHRESH - 2.
+ * Though 82598 does not have this limit, we don't want long TX chain.
+ * 33 should be large enough even for 64K TSO
+ * (32 * 2K mbuf cluster and 1 x mbuf header).
+ *
+ * Rerefence: 82599-X550 dataseet 7.2.1.1 "Transmit Storage in System Memory".
+ */
+#define IXGBE_82599_SCATTER_MAX        (40 - IXGBE_TX_WTHRESH - 2)
+#define IXGBE_SCATTER_DEFAULT  33
+
 /* Keep older OS drivers building... */
 #if !defined(SYSCTL_ADD_UQUAD)
 #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD
@@ -206,8 +223,6 @@
 #define HW_DEBUGOUT2(S, A, B)       if (DEBUG_HW) printf(S "\n", A, B)
 
 #define MAX_NUM_MULTICAST_ADDRESSES     128
-#define IXGBE_82598_SCATTER             100
-#define IXGBE_82599_SCATTER             32
 #define MSIX_82598_BAR                  3
 #define MSIX_82599_BAR                  4
 #define IXGBE_TSO_SIZE                  262140
diff -r 14c2813808b2 -r d87087e1d393 sys/dev/pci/ixgbe/ixv.c
--- a/sys/dev/pci/ixgbe/ixv.c   Thu Sep 16 08:33:24 2021 +0000
+++ b/sys/dev/pci/ixgbe/ixv.c   Thu Sep 16 09:55:28 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ixv.c,v 1.167 2021/09/16 07:08:26 msaitoh Exp $ */
+/* $NetBSD: ixv.c,v 1.168 2021/09/16 09:55:28 msaitoh Exp $ */
 
 /******************************************************************************
 
@@ -35,7 +35,7 @@
 /*$FreeBSD: head/sys/dev/ixgbe/if_ixv.c 331224 2018-03-19 20:55:05Z erj $*/
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ixv.c,v 1.167 2021/09/16 07:08:26 msaitoh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ixv.c,v 1.168 2021/09/16 09:55:28 msaitoh Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_inet.h"
@@ -431,7 +431,8 @@
        ixgbe_init_mbx_params_vf(hw);
 
        /* Set the right number of segments */
-       adapter->num_segs = IXGBE_82599_SCATTER;
+       KASSERT(IXGBE_82599_SCATTER_MAX >= IXGBE_SCATTER_DEFAULT);
+       adapter->num_segs = IXGBE_SCATTER_DEFAULT;
 
        /* Reset mbox api to 1.0 */
        error = hw->mac.ops.reset_hw(hw);
@@ -1716,7 +1717,7 @@
 
                /* Set WTHRESH to 8, burst writeback */
                txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(j));
-               txdctl |= (8 << IXGBE_TXDCTL_WTHRESH_SHIFT);
+               txdctl |= IXGBE_TX_WTHRESH << IXGBE_TXDCTL_WTHRESH_SHIFT;
                IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(j), txdctl);
 
                /* Set the HW Tx Head and Tail indices */



Home | Main Index | Thread Index | Old Index