tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: kpause()'s timeout value



On 2020/06/19 3:17, SAITOH Masanobu wrote:
Hi.

  I'm now working to reduce ixgbe's busy loop using with kpause()[*1].
I wrote the following code:

#define usec_delay(x) ixgbe_delay(x)
#define msec_delay(x) ixgbe_delay((x) * 1000)
void
ixgbe_delay(unsigned int us)
{

         if (__predict_false(cold))
                 delay(us);
         else if ((us / 1000) >= hztoms(1))
                 kpause("ixgdly", false, mstohz(us / 1000), NULL);
         else
                 delay(us);
}

Does kpause(x, y, 1, z) guarantee to wait at least 1 tick length
of time? Or, does it wakeup the next tick? Should I add extra 1 tick
to kpause()?

I wrote the following code to see the real delay:

void
ixgbe_delay(unsigned int us)
{
#if 0
        if (__predict_false(cold))
                delay(us);
        else if ((us / 1000) >= hztoms(1))
                kpause("ixgdly", false, mstohz(us / 1000), NULL);
        else
                delay(us);
#else
        if (__predict_false(cold))
                delay(us);
        else if ((us / 1000) >= hztoms(1)) {
                struct timeval t0, t1, res;
                microtime(&t0);
                kpause("ixgdly", false, mstohz(us / 1000), NULL);
                microtime(&t1);
                timersub(&t1, &t0, &res);
                if (res.tv_usec < us)
                        printf("%d<%d\n", res.tv_usec, us);
        } else
                delay(us);
#endif
}

and the output was:
285<10000
7664<10000
7576<10000
7593<10000
7075<10000
7071<10000
7111<10000
7622<10000
7601<10000
7597<10000
7595<10000
7601<10000
7584<10000
7596<10000
7576<10000
7077<10000
7084<10000
7099<10000
7600<10000
7616<10000
7585<10000
7566<10000
8075<10000
7118<10000
7573<10000
7547<10000
7107<10000
7052<10000
7103<10000
7585<10000
7548<10000
7616<10000
7597<10000
7566<10000
7588<10000
7621<10000
7573<10000
7066<10000
7098<10000
145432<150000
195283<200000
140772<150000
47368<50000
47407<50000
46049<50000
9712<10000
9724<10000
9742<10000

And then I modified the code to add extra 1 tick:

-                kpause("ixgdly", false, mstohz(us / 1000), NULL);
+                kpause("ixgdly", false, mstohz(us / 1000) + 1, NULL);

The debug output disappeared. The result says the extra 1 tick is required.
There are some code that kpause() is used with mstohz(). It would be good
to check whether those code do correctly or not (If my analysis is correct...).

(The following list was made by just grepped with kpause and mstohz)

./arch/arm/omap/ti_iic.c:                       kpause("tiiic", false, mstohz(50), NULL);
./arch/arm/sunxi/sunxi_hdmi.c:                  kpause("hdmiddc", false, mstohz(10), &sc->sc_exec_lock);
./arch/arm/sunxi/sunxi_hdmi.c:          kpause("hdmihotplug", false, mstohz(1000), NULL);
./arch/arm/ti/ti_iic.c:                 kpause("tiiic", false, mstohz(50), NULL);
./arch/macppc/dev/smu.c:                kpause("fanctrl", true, mstohz(30000), NULL);
./arch/x86/x86/intr.c:  (void)kpause("intrdist", false, mstohz(10), NULL);
./arch/x86/x86/pmap.c:          kpause("gntmap", false, mstohz(1), NULL);
./arch/xen/xen/privcmd.c:                       if (kpause("xnoent", 1, mstohz(100), NULL))
./dev/fdt/fdt_panel.c:          kpause("panele", false, mstohz(200), NULL);
./dev/fdt/fdt_panel.c:          kpause("panelp", false, mstohz(210), NULL);
./dev/i2c/fcu.c:                kpause("fanctrl", true, mstohz(sc->sc_pwm ? 1000 : 5000), NULL);
./dev/i2c/tsl256x.c:            (void) kpause("tslluxwait", false, mstohz(ms) + 1, NULL);
./dev/ic/dwc_mmc.c:                     kpause("dwcmmcrst", false, uimax(mstohz(1), 1), &sc->sc_lock);
./dev/onewire/owtemp.c: (void)kpause("owtemp", false, mstohz(750 + 10), NULL);
./dev/pci/cxdtv.c:      kpause("cxdtvrst", false, MAX(1, mstohz(1)), NULL);
./dev/pci/pccbb.c:              kpause("pccbb", false, mstohz(millis), NULL);
./dev/pci/pccbb.c:      kpause(wmesg, false, uimax(mstohz(timo), 1), NULL);
./external/bsd/drm2/include/linux/delay.h:              (void)kpause("lnxmslep", false, mstohz(msec), NULL);
./external/bsd/dwc2/dwc2.h:             (void)kpause("mdelay", false, mstohz(msec), NULL);
./external/bsd/ena-com/ena_plat.h:              return kpause("enaw", false, mstohz(x), NULL);
./kern/kern_tc.c:               (void)kpause("tcdetach", false, mstohz(10), NULL);
./ufs/chfs/chfs_gc.c:   kpause("chfsthjoin", false, mstohz(1000), NULL);
./ufs/chfs/chfs_gc.c:                   kpause("chvncrea", true, mstohz(50), NULL);
./ufs/chfs/chfs_gc.c:                   kpause("chvncrea", true, mstohz(50), NULL);
./ufs/chfs/ebh.c:       kpause("chfsebhjointh", false, mstohz(1000), NULL);


man 9 "tsleep" says:
      timo      If non-zero, the process will sleep for at most timo/hz
                seconds.  If this amount of time elapses and no wakeup(ident)
                has occurred, and no signal (if PCATCH was set) was posted,
                tsleep() will return EWOULDBLOCK.

it sleep "at most timeo/hz" seconds. So some code use it like:

	tsleep(ident, priority, wmsg, 1 + 1);

to sleep AT LEAST 1 tick. Is this type of treatment required
on kpause()? man 9 kpause has no such note.

For example, usb_subr.c rev. 1.268's usb_delay_ms_locked() is as follows:
------------------------
void
usb_delay_ms_locked(struct usbd_bus *bus, u_int ms, kmutex_t *lock)
{
         /* Wait at least two clock ticks so we know the time has passed. */
         if (bus->ub_usepolling || cold)
                 delay((ms+1) * 1000);
         else
                 kpause("usbdly", false, (ms*hz+999)/1000 + 1, lock);
}
-------------------------
In the rev. 1.1. It was.
-------------------------
void
usbd_delay_ms(ms)
	int ms;
{
	/* Wait at least two clock ticks so we know the time has passed. */
	if (usbd_use_polling)
		delay((ms+1) * 1000);
	else
		tsleep(&ms, PRIBIO, "usbdly", (ms*hz+999)/1000 + 1);
}
-------------------------

If it's not required to add extra 1 tick on kpause(), usb_delay_ms_locked()
can be simplified.

[*1] http://www.netbsd.org/~msaitoh/ixgbe-sleep-20200618-0.dif

Thanks in advance.



--
-----------------------------------------------
                SAITOH Masanobu (msaitoh%execsw.org@localhost
                                 msaitoh%netbsd.org@localhost)


Home | Main Index | Thread Index | Old Index