NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

kern/55942: destroyed ena(4) evcnts cause panic



>Number:         55942
>Category:       kern
>Synopsis:       destroyed ena(4) evcnts cause panic
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Fri Jan 22 04:40:00 +0000 2021
>Originator:     KUSABA Takeshi
>Release:        -current 9.99.78
>Organization:
>Environment:
NetBSD  9.99.78 NetBSD 9.99.78 (GENERIC) #5: Fri Jan 22 13:06:17 JST 2021  XXX@/path/to/obj/sys/arch/amd64/compile/GENERIC amd64
>Description:
I'm using netbsd on aws ec2 m5.large instance with ena(4).
When I check the event counters with "vmstat -ev", it crashes with:

[ 295.1010724] uvm_fault(0xffff8e3d787a0e10, 0x0, 1) -> e
[ 295.1010724] fatal page fault in supervisor mode
[ 295.1010724] trap type 6 code 0 rip 0xffffffff80e53627 cs 0x8 rflags 0x10246 cr2 0 ilevel 0 rsp 0xffffa880bc88adc8
[ 295.1010724] curlwp 0xffff8e3d78f4f140 pid 1064.1064 lowest kstack 0xffffa880bc8862c0
[ 295.1010724] panic: trap
[ 295.1010724] cpu0: Begin traceback...
[ 295.1010724] vpanic() at netbsd:vpanic+0x156
[ 295.1010724] snprintf() at netbsd:snprintf
[ 295.1010724] startlwp() at netbsd:startlwp
[ 295.1010724] alltraps() at netbsd:alltraps+0xc3
[ 295.1110729] sysctl_dispatch() at netbsd:sysctl_dispatch+0xa5
[ 295.1110729] sys___sysctl() at netbsd:sys___sysctl+0xc5
[ 295.1110729] syscall() at netbsd:syscall+0x23e
[ 295.1110729] --- syscall (number 202) ---
[ 295.1110729] netbsd:syscall+0x23e:
[ 295.1110729] cpu0: End traceback...
>How-To-Repeat:
1. start netbsd with ena(4)-enabled instance type (e.g. m5.large, c5.large).
2. execute "vmstat -ev".
>Fix:
ena_free_counters() and ena_reset_counters() destroy the evcnt structures.

The following patch fixes it.

diff --git a/sys/dev/pci/if_ena.c b/sys/dev/pci/if_ena.c
index 71cef6cf7a3a..152e4bce0eb5 100644
--- a/sys/dev/pci/if_ena.c
+++ b/sys/dev/pci/if_ena.c
@@ -470,18 +470,20 @@ ena_alloc_counters_hwstats(struct ena_hw_stats *st, int queue)
 	    + sizeof(st->rx_drops) == sizeof(*st));
 }
 static inline void
-ena_free_counters(struct evcnt *begin, int size)
+ena_free_counters(struct evcnt *begin, int size, int offset)
 {
 	struct evcnt *end = (struct evcnt *)((char *)begin + size);
+	begin = (struct evcnt *)((char *)begin + offset);
 
 	for (; begin < end; ++begin)
 		counter_u64_free(*begin);
 }
 
 static inline void
-ena_reset_counters(struct evcnt *begin, int size)
+ena_reset_counters(struct evcnt *begin, int size, int offset)
 {
 	struct evcnt *end = (struct evcnt *)((char *)begin + size);
+	begin = (struct evcnt *)((char *)begin + offset);
 
 	for (; begin < end; ++begin)
 		counter_u64_zero(*begin);
@@ -566,9 +568,9 @@ ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
 	struct ena_ring *rxr = &adapter->rx_ring[qid];
 
 	ena_free_counters((struct evcnt *)&txr->tx_stats,
-	    sizeof(txr->tx_stats));
+	    sizeof(txr->tx_stats), offsetof(struct ena_stats_tx, cnt));
 	ena_free_counters((struct evcnt *)&rxr->rx_stats,
-	    sizeof(rxr->rx_stats));
+	    sizeof(rxr->rx_stats), offsetof(struct ena_stats_rx, cnt));
 
 	ENA_RING_MTX_LOCK(txr);
 	drbr_free(txr->br, M_DEVBUF);
@@ -665,7 +667,8 @@ ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
 
 	/* Reset TX statistics. */
 	ena_reset_counters((struct evcnt *)&tx_ring->tx_stats,
-	    sizeof(tx_ring->tx_stats));
+	    sizeof(tx_ring->tx_stats),
+	    offsetof(struct ena_stats_tx, cnt));
 
 	tx_ring->next_to_use = 0;
 	tx_ring->next_to_clean = 0;
@@ -861,7 +864,8 @@ ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid)
 
 	/* Reset RX statistics. */
 	ena_reset_counters((struct evcnt *)&rx_ring->rx_stats,
-	    sizeof(rx_ring->rx_stats));
+	    sizeof(rx_ring->rx_stats),
+	    offsetof(struct ena_stats_rx, cnt));
 
 	rx_ring->next_to_clean = 0;
 	rx_ring->next_to_use = 0;
@@ -2204,7 +2208,8 @@ ena_up_complete(struct ena_adapter *adapter)
 
 	ena_refill_all_rx_bufs(adapter);
 	ena_reset_counters((struct evcnt *)&adapter->hw_stats,
-	    sizeof(adapter->hw_stats));
+	    sizeof(adapter->hw_stats),
+	    offsetof(struct ena_hw_stats, rx_packets));
 
 	return (0);
 }
@@ -3895,9 +3900,11 @@ ena_detach(device_t pdev, int flags)
 	ena_free_all_io_rings_resources(adapter);
 
 	ena_free_counters((struct evcnt *)&adapter->hw_stats,
-	    sizeof(struct ena_hw_stats));
+	    sizeof(struct ena_hw_stats),
+	    offsetof(struct ena_hw_stats, rx_packets));
 	ena_free_counters((struct evcnt *)&adapter->dev_stats,
-	    sizeof(struct ena_stats_dev));
+	    sizeof(struct ena_stats_dev),
+            offsetof(struct ena_stats_dev, wd_expired));
 
 	if (likely(adapter->rss_support))
 		ena_com_rss_destroy(ena_dev);



Home | Main Index | Thread Index | Old Index