NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: kern/59339: heartbeat watchdog fires since 10.99.14



The following reply was made to PR kern/59339; it has been noted by GNATS.

From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
To: Thomas Klausner <wiz%NetBSD.org@localhost>
Cc: gnats-bugs%NetBSD.org@localhost, netbsd-bugs%NetBSD.org@localhost,
	Patrick Welche <prlw1%welche.eu@localhost>
Subject: Re: kern/59339: heartbeat watchdog fires since 10.99.14
Date: Fri, 9 May 2025 13:32:04 +0000

 This is a multi-part message in MIME format.
 --=_LIipI4/8TCpaNLGJjkojmsUX7yA77xBy
 Content-Transfer-Encoding: quoted-printable
 
 > Date: Fri, 9 May 2025 14:59:02 +0200
 > From: Thomas Klausner <wiz%NetBSD.org@localhost>
 >=20
 > Seems to have been the same problem both times:
 >=20
 > (gdb) bt
 > #4  0xffffffff80dff166 in itimer_arm_real (it=3D0xffff89c5007f1e80) at /u=
 sr/src/sys/kern/kern_time.c:837
 > #5  0xffffffff80e007eb in itimer_settime (it=3Dit@entry=3D0xffff89c5007f1=
 e80) at /usr/src/sys/kern/kern_time.c:946
 
 OK, that patch was a dud -- the assertion may not pass in that path,
 but it should pass via itimer_callout.  Try the attached one instead?
 
 --=_LIipI4/8TCpaNLGJjkojmsUX7yA77xBy
 Content-Type: text/plain; charset="ISO-8859-1"; name="pr59339-itimercalloutassert-v2"
 Content-Transfer-Encoding: quoted-printable
 Content-Disposition: attachment; filename="pr59339-itimercalloutassert-v2.patch"
 
 # HG changeset patch
 # User Taylor R Campbell <riastradh%NetBSD.org@localhost>
 # Date 1746796090 0
 #      Fri May 09 13:08:10 2025 +0000
 # Branch trunk
 # Node ID 05a98ecf5eda0704c8b94356f22e97870c808378
 # Parent  55a6ded9e19ed0908fbb94d13fa69cda6d0b4571
 # EXP-Topic riastradh-pr59339-heartbeat
 WIP: Rearming a timer should never go backwards.  Assert it.
 
 Handle overflow case gracefully while here (not likely relevant to
 the symptoms we've seen, though).
 
 PR kern/59339: heartbeat watchdog fires since 10.99.14
 
 diff -r 55a6ded9e19e -r 05a98ecf5eda sys/kern/kern_time.c
 --- a/sys/kern/kern_time.c	Thu May 08 20:51:40 2025 +0000
 +++ b/sys/kern/kern_time.c	Fri May 09 13:08:10 2025 +0000
 @@ -837,6 +837,55 @@ itimer_arm_real(struct itimer * const it
  }
 =20
  /*
 + * itimer_rearm_real:
 + *
 + *	Re-arm a non-virtual timer, given the current clock readout.
 + */
 +static void
 +itimer_rearm_real(struct itimer * const it, const struct timespec * const =
 now)
 +{
 +	const struct timespec * const interval =3D &it->it_time.it_interval;
 +	const struct timespec * const next =3D &it->it_time.it_value;
 +	struct timespec delta;
 +	int ticks;
 +
 +	KASSERT(!it->it_dying);
 +	KASSERT(!CLOCK_VIRTUAL_P(it->it_clockid));
 +	KASSERT(!callout_pending(&it->it_ch));
 +
 +	if (__predict_true(timespecisset(next))) {
 +		KASSERTMSG(timespeccmp(now, next, <),
 +		    "[clock %u]"
 +		    " now=3D%lld.%09ld interval=3D%lld.%09ld then=3D%lld.%09ld",
 +		    it->it_clockid,
 +		    (long long)now->tv_sec, (long)now->tv_nsec,
 +		    (long long)interval->tv_sec, (long)interval->tv_nsec,
 +		    (long long)next->tv_sec, (long)next->tv_nsec);
 +		timespecsub(next, now, &delta);
 +	} else {
 +		/*
 +		 * If the arithmetic overflowed, just take the interval
 +		 * as the time to wait.  This is unlikely to happen
 +		 * unless you are messing with your clock to set it
 +		 * ahead by hundreds of years.
 +		 */
 +		delta =3D it->it_time.it_interval; /* overflow */
 +	}
 +	ticks =3D tstohz(&delta);
 +	KASSERTMSG(ticks > 0,
 +	    "[clock %u]"
 +	    " now=3D%lld.%09ld interval=3D%lld.%09ld then=3D%lld.%09ld"
 +	    " delta=3D%lld.%09ld ticks=3D%d",
 +	    it->it_clockid,
 +	    (long long)now->tv_sec, (long)now->tv_nsec,
 +	    (long long)interval->tv_sec, (long)interval->tv_nsec,
 +	    (long long)next->tv_sec, (long)next->tv_nsec,
 +	    (long long)delta.tv_sec, (long)delta.tv_nsec,
 +	    ticks);
 +	callout_schedule(&it->it_ch, ticks);
 +}
 +
 +/*
   * itimer_callout:
   *
   *	Callout to expire a non-virtual timer.  Queue it up for processing,
 @@ -872,6 +921,18 @@ itimer_callout(void *arg)
  	 * now, compute the next itimer value and count overruns.
  	 */
  	itimer_transition(&it->it_time, &now, &next, &overruns);
 +	KASSERTMSG(timespeccmp(&now, &next, <),
 +	    "[clock %u]"
 +	    " it->it_time.it_value=3D%lld.%09ld"
 +	    " it->it_time.it_interval=3D%lld.%09ld"
 +	    " now=3D%lld.%09ld next=3D%lld.%09ld",
 +	    it->it_clockid,
 +	    (long long)it->it_time.it_value.tv_sec,
 +	    (long)it->it_time.it_value.tv_nsec,
 +	    (long long)it->it_time.it_interval.tv_sec,
 +	    (long)it->it_time.it_interval.tv_nsec,
 +	    (long long)now.tv_sec, (long)now.tv_nsec,
 +	    (long long)next.tv_sec, (long)next.tv_nsec);
  	it->it_time.it_value =3D next;
  	it->it_overruns +=3D overruns;
 =20
 @@ -879,7 +940,7 @@ itimer_callout(void *arg)
  	 * Reset the callout, if it's not going away.
  	 */
  	if (!it->it_dying)
 -		itimer_arm_real(it);
 +		itimer_rearm_real(it, &now);
  	itimer_unlock();
  }
 =20
 
 --=_LIipI4/8TCpaNLGJjkojmsUX7yA77xBy--
 


Home | Main Index | Thread Index | Old Index