Current-Users archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

ipmi(4) watchdog fixed



I got the ipmi(4) watchdog timer to work in -current.  One caveat is
that it takes several seconds for ipmi(4) to query the hardware about
its sensors, and the watchdog is not available until after that.

I have tried to make the watchdog timer available before the sensors,
but it seems that starting or tickling the watchdog disrupts the sensors
thread, in spite of the pains I take to synchronize access to the
hardware.  The attached patch is my failed attempt at making the ipmi
watchdog attach and work early during boot.

Dave
Index: sys/arch/x86/include/ipmivar.h
===================================================================
RCS file: /cvsroot/src/sys/arch/x86/include/ipmivar.h,v
retrieving revision 1.10
diff -p -u -u -p -r1.10 ipmivar.h
--- sys/arch/x86/include/ipmivar.h      20 Jul 2009 19:11:30 -0000      1.10
+++ sys/arch/x86/include/ipmivar.h      20 Jul 2009 23:56:36 -0000
@@ -43,7 +43,6 @@
 #define IPMI_IF_SMIC_NREGS     3
 #define IPMI_IF_BT_NREGS       3
 
-struct ipmi_thread;
 struct ipmi_softc;
 
 struct ipmi_attach_args {
@@ -70,6 +69,16 @@ struct ipmi_if {
        int             (*probe)(struct ipmi_softc *);
 };
 
+struct ipmi_thread {
+       struct lwp              *t_thread;
+
+       kmutex_t                t_mtx;
+       kcondvar_t              t_cv;
+
+       volatile bool           t_run;
+       volatile bool           t_stopped;
+};
+
 struct ipmi_softc {
        device_t                sc_dev;
 
@@ -84,21 +93,20 @@ struct ipmi_softc {
 
        int                     sc_btseq;
 
-       struct lwp              *sc_kthread;
+       struct ipmi_thread      sc_tickle;
+       struct ipmi_thread      sc_poll;
 
        int                     sc_max_retries;
 
-       kmutex_t                sc_poll_mtx;
-       kcondvar_t              sc_poll_cv;
-
        kmutex_t                sc_cmd_mtx;
        kcondvar_t              sc_cmd_sleep;
 
        struct ipmi_bmc_args    *sc_iowait_args;
 
        struct ipmi_sensor      *current_sensor;
-       volatile bool           sc_thread_running;
+
        volatile bool           sc_tickle_due;
+
        struct sysmon_wdog      sc_wdog;
        struct sysmon_envsys    *sc_envsys;
        envsys_data_t           *sc_sensor;
@@ -107,11 +115,7 @@ struct ipmi_softc {
 
        char            sc_buf[64];
        bool            sc_buf_rsvd;
-};
 
-struct ipmi_thread {
-       struct ipmi_softc   *sc;
-       volatile int        running;
 };
 
 #define IPMI_WDOG_USE_NOLOG            __BIT(7)
@@ -163,8 +167,6 @@ struct ipmi_get_watchdog {
        uint16_t                wdog_countdown;
 } __packed;
 
-void   ipmi_poll_thread(void *);
-
 int    kcs_probe(struct ipmi_softc *);
 int    kcs_reset(struct ipmi_softc *);
 int    kcs_sendmsg(struct ipmi_softc *, int, const uint8_t *);
Index: sys/arch/x86/x86/ipmi.c
===================================================================
RCS file: /cvsroot/src/sys/arch/x86/x86/ipmi.c,v
retrieving revision 1.40
diff -p -u -u -p -r1.40 ipmi.c
--- sys/arch/x86/x86/ipmi.c     20 Jul 2009 19:25:07 -0000      1.40
+++ sys/arch/x86/x86/ipmi.c     20 Jul 2009 23:56:36 -0000
@@ -186,6 +186,12 @@ void       ipmi_delay(struct ipmi_softc *, int
 int    ipmi_watchdog_setmode(struct sysmon_wdog *);
 int    ipmi_watchdog_tickle(struct sysmon_wdog *);
 void   ipmi_dotickle(struct ipmi_softc *);
+static void    ipmi_tickle_thread(void *);
+static void    ipmi_poll_thread(void *);
+
+static void ipmi_thread_init(struct ipmi_thread *, int, const char *);
+static void ipmi_thread_stopwait(struct ipmi_thread *);
+static void ipmi_thread_destroy(struct ipmi_thread *);
 
 int    ipmi_intr(void *);
 int    ipmi_match(device_t, cfdata_t, void *);
@@ -1052,6 +1058,7 @@ done:
 void
 ipmi_buf_release(struct ipmi_softc *sc, char *buf)
 {
+       KASSERT(mutex_owned(&sc->sc_cmd_mtx));
        KASSERT(sc->sc_buf_rsvd);
        KASSERT(sc->sc_buf == buf);
        sc->sc_buf_rsvd = false;
@@ -1060,6 +1067,7 @@ ipmi_buf_release(struct ipmi_softc *sc, 
 char *
 ipmi_buf_acquire(struct ipmi_softc *sc, size_t len)
 {
+       KASSERT(mutex_owned(&sc->sc_cmd_mtx));
        KASSERT(len <= sizeof(sc->sc_buf));
 
        if (sc->sc_buf_rsvd || len > sizeof(sc->sc_buf))
@@ -1084,8 +1092,10 @@ ipmi_recvcmd(struct ipmi_softc *sc, int 
                return (-1);
        }
        /* Receive message from interface, copy out result data */
-       if (sc->sc_if->recvmsg(sc, maxlen + 3, &rawlen, buf))
+       if (sc->sc_if->recvmsg(sc, maxlen + 3, &rawlen, buf)) {
+               ipmi_buf_release(sc, buf);
                return (-1);
+       }
 
        *rxlen = rawlen - IPMI_MSG_DATARCV;
        if (*rxlen > 0 && data)
@@ -1111,6 +1121,7 @@ ipmi_recvcmd(struct ipmi_softc *sc, int 
 void
 ipmi_delay(struct ipmi_softc *sc, int ms)
 {
+       KASSERT(mutex_owned(&sc->sc_cmd_mtx));
        if (cold)
                delay(ms * 1000);
        else
@@ -1125,23 +1136,21 @@ get_sdr_partial(struct ipmi_softc *sc, u
        uint8_t cmd[256 + 8];
        int             len;
 
+       KASSERT(mutex_owned(&sc->sc_cmd_mtx));
+
        ((uint16_t *) cmd)[0] = reserveId;
        ((uint16_t *) cmd)[1] = recordId;
        cmd[4] = offset;
        cmd[5] = length;
-       mutex_enter(&sc->sc_cmd_mtx);
        if (ipmi_sendcmd(sc, BMC_SA, 0, STORAGE_NETFN, STORAGE_GET_SDR, 6,
            cmd)) {
-               mutex_exit(&sc->sc_cmd_mtx);
                printf("ipmi: sendcmd fails\n");
                return (-1);
        }
        if (ipmi_recvcmd(sc, 8 + length, &len, cmd)) {
-               mutex_exit(&sc->sc_cmd_mtx);
                printf("ipmi: getSdrPartial: recvcmd fails\n");
                return (-1);
        }
-       mutex_exit(&sc->sc_cmd_mtx);
        if (nxtRecordId)
                *nxtRecordId = *(uint16_t *) cmd;
        memcpy(buffer, cmd + 2, len - 2);
@@ -1164,27 +1173,24 @@ get_sdr(struct ipmi_softc *sc, uint16_t 
        /* Reserve SDR */
        if (ipmi_sendcmd(sc, BMC_SA, 0, STORAGE_NETFN, STORAGE_RESERVE_SDR,
            0, NULL)) {
-               mutex_exit(&sc->sc_cmd_mtx);
                printf("ipmi: reserve send fails\n");
-               return (-1);
+               goto err;
        }
        if (ipmi_recvcmd(sc, sizeof(resid), &len, &resid)) {
-               mutex_exit(&sc->sc_cmd_mtx);
                printf("ipmi: reserve recv fails\n");
-               return (-1);
+               goto err;
        }
-       mutex_exit(&sc->sc_cmd_mtx);
        /* Get SDR Header */
        if (get_sdr_partial(sc, recid, resid, 0, sizeof shdr, &shdr, nxtrec)) {
                printf("ipmi: get header fails\n");
-               return (-1);
+               goto err;
        }
        /* Allocate space for entire SDR Length of SDR in header does not
         * include header length */
        sdrlen = sizeof(shdr) + shdr.record_length;
        psdr = malloc(sdrlen, M_DEVBUF, M_WAITOK|M_CANFAIL);
        if (psdr == NULL)
-               return -1;
+               goto err;
 
        memcpy(psdr, &shdr, sizeof(shdr));
 
@@ -1198,15 +1204,22 @@ get_sdr(struct ipmi_softc *sc, uint16_t 
                    psdr + offset, NULL)) {
                        printf("ipmi: get chunk : %d,%d fails\n",
                            offset, len);
-                       return (-1);
+                       goto psdr_err;
                }
        }
+       mutex_exit(&sc->sc_cmd_mtx);
 
        /* Add SDR to sensor list, if not wanted, free buffer */
        if (add_sdr_sensor(sc, psdr) == 0)
                free(psdr, M_DEVBUF);
 
        return (0);
+psdr_err:
+       free(psdr, M_DEVBUF);
+err:
+       mutex_exit(&sc->sc_cmd_mtx);
+       printf("ipmi: get_sdr failed\n");
+       return -1;
 }
 
 int
@@ -1789,7 +1802,7 @@ ipmi_match(device_t parent, cfdata_t cf,
 
        sc.sc_if->probe(&sc);
 
-       mutex_init(&sc.sc_cmd_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK);
+       mutex_init(&sc.sc_cmd_mtx, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&sc.sc_cmd_sleep, "ipmimatch");
        mutex_enter(&sc.sc_cmd_mtx);
        /* Identify BMC device early to detect lying bios */
@@ -1818,31 +1831,21 @@ unmap:
 }
 
 static void
-ipmi_thread(void *cookie)
+ipmi_poll_thread(void *cookie)
 {
        device_t                self = cookie;
        struct ipmi_softc       *sc = device_private(self);
-       struct ipmi_attach_args *ia = &sc->sc_ia;
+       struct ipmi_thread      *t = &sc->sc_poll;
        uint16_t                rec;
        struct ipmi_sensor *ipmi_s;
        int i;
        int current_index_typ[ENVSYS_NSENSORS];
 
-       sc->sc_thread_running = true;
-
-       /* lock around read_sensor so that no one messes with the bmc regs */
-       mutex_init(&sc->sc_cmd_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK);
-       cv_init(&sc->sc_cmd_sleep, "ipmicmd");
-
-       mutex_init(&sc->sc_poll_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK);
-       cv_init(&sc->sc_poll_cv, "ipmi_poll");
+       t->t_run = true;
 
        /* setup ticker */
        sc->sc_max_retries = hz * 90; /* 90 seconds max */
 
-       /* Map registers */
-       ipmi_map_regs(sc, ia);
-
        /* Scan SDRs, add sensors to list */
        for (rec = 0; rec != 0xFFFF;)
                if (get_sdr(sc, rec, &rec))
@@ -1909,53 +1912,90 @@ ipmi_thread(void *cookie)
        if (!SLIST_EMPTY(&ipmi_sensor_list))
                sc->current_sensor = SLIST_FIRST(&ipmi_sensor_list);
 
-       aprint_verbose_dev(self, "version %d.%d interface %s %sbase "
-           "0x%x/%x spacing %d\n",
-           ia->iaa_if_rev >> 4, ia->iaa_if_rev & 0xF, sc->sc_if->name,
-           ia->iaa_if_iotype == 'i' ? "io" : "mem", ia->iaa_if_iobase,
-           ia->iaa_if_iospacing * sc->sc_if->nregs, ia->iaa_if_iospacing);
-       if (ia->iaa_if_irq != -1)
-               aprint_verbose_dev(self, " irq %d\n", ia->iaa_if_irq);
-
        /* setup flag to exclude iic */
        ipmi_enabled = 1;
 
-       /* Setup Watchdog timer */
-       sc->sc_wdog.smw_name = device_xname(sc->sc_dev);
-       sc->sc_wdog.smw_cookie = sc;
-       sc->sc_wdog.smw_setmode = ipmi_watchdog_setmode;
-       sc->sc_wdog.smw_tickle = ipmi_watchdog_tickle;
-       sysmon_wdog_register(&sc->sc_wdog);
-
-       mutex_enter(&sc->sc_poll_mtx);
-       while (sc->sc_thread_running) {
+       mutex_enter(&t->t_mtx);
+       while (t->t_run) {
                ipmi_refresh_sensors(sc);
-               cv_timedwait(&sc->sc_poll_cv, &sc->sc_poll_mtx,
-                   SENSOR_REFRESH_RATE);
-               if (sc->sc_tickle_due) {
-                       ipmi_dotickle(sc);
-                       sc->sc_tickle_due = false;
-               }
+               cv_timedwait(&t->t_cv, &t->t_mtx, SENSOR_REFRESH_RATE);
        }
-       mutex_exit(&sc->sc_poll_mtx);
+       t->t_stopped = true;
+       cv_signal(&t->t_cv);
+       mutex_exit(&t->t_mtx);
        kthread_exit(0);
 }
 
+static void
+ipmi_thread_init(struct ipmi_thread *t, int ipl, const char *wchan)
+{
+       mutex_init(&t->t_mtx, MUTEX_DEFAULT, ipl);
+       cv_init(&t->t_cv, wchan);
+}
+
 void
 ipmi_attach(device_t parent, device_t self, void *aux)
 {
        struct ipmi_softc       *sc = device_private(self);
+       struct ipmi_attach_args *ia = &sc->sc_ia;
 
-       sc->sc_ia = *(struct ipmi_attach_args *)aux;
+       *ia = *(struct ipmi_attach_args *)aux;
        sc->sc_dev = self;
        aprint_normal("\n");
 
-       if (kthread_create(PRI_NONE, 0, NULL, ipmi_thread, self,
-           &sc->sc_kthread, "ipmi") != 0) {
+       /* lock around read_sensor so that no one messes with the bmc regs */
+       mutex_init(&sc->sc_cmd_mtx, MUTEX_DEFAULT, IPL_NONE);
+       cv_init(&sc->sc_cmd_sleep, "ipmicmd");
+
+       ipmi_thread_init(&sc->sc_tickle, IPL_SOFTCLOCK, "ipmitickle");
+       ipmi_thread_init(&sc->sc_poll, IPL_NONE, "ipmi_poll");
+
+       /* Map registers */
+       ipmi_map_regs(sc, ia);
+
+       aprint_verbose_dev(self, "version %d.%d interface %s %sbase "
+           "0x%x/%x spacing %d\n",
+           ia->iaa_if_rev >> 4, ia->iaa_if_rev & 0xF, sc->sc_if->name,
+           ia->iaa_if_iotype == 'i' ? "io" : "mem", ia->iaa_if_iobase,
+           ia->iaa_if_iospacing * sc->sc_if->nregs, ia->iaa_if_iospacing);
+       if (ia->iaa_if_irq != -1)
+               aprint_verbose_dev(self, " irq %d\n", ia->iaa_if_irq);
+
+       if (kthread_create(PRI_NONE, 0, NULL, ipmi_tickle_thread, self,
+           &sc->sc_tickle.t_thread, "ipmitickle") == 0) {
+               /* Setup Watchdog timer */
+               sc->sc_wdog.smw_name = device_xname(sc->sc_dev);
+               sc->sc_wdog.smw_cookie = sc;
+               sc->sc_wdog.smw_setmode = ipmi_watchdog_setmode;
+               sc->sc_wdog.smw_tickle = ipmi_watchdog_tickle;
+               sysmon_wdog_register(&sc->sc_wdog);
+       } else
+               aprint_error_dev(self, "could not establish watchdog tickle\n");
+
+       if (kthread_create(PRI_NONE, 0, NULL, ipmi_poll_thread, self,
+           &sc->sc_poll.t_thread, "ipmi") != 0) {
                aprint_error("ipmi: unable to create thread, disabled\n");
        }
 }
 
+static void
+ipmi_thread_stopwait(struct ipmi_thread *t)
+{
+       mutex_enter(&t->t_mtx);
+       t->t_run = false;
+       cv_signal(&t->t_cv);
+       while (!t->t_stopped)
+               cv_wait(&t->t_cv, &t->t_mtx);
+       mutex_exit(&t->t_mtx);
+}
+
+static void
+ipmi_thread_destroy(struct ipmi_thread *t)
+{
+       cv_destroy(&t->t_cv);
+       mutex_destroy(&t->t_mtx);
+}
+
 static int
 ipmi_detach(device_t self, int flags)
 {
@@ -1963,11 +2003,6 @@ ipmi_detach(device_t self, int flags)
        int rc;
        struct ipmi_softc *sc = device_private(self);
 
-       mutex_enter(&sc->sc_poll_mtx);
-       sc->sc_thread_running = false;
-       cv_signal(&sc->sc_poll_cv);
-       mutex_exit(&sc->sc_poll_mtx);
-
        if ((rc = sysmon_wdog_unregister(&sc->sc_wdog)) != 0) {
                if (rc == ERESTART)
                        rc = EINTR;
@@ -1982,6 +2017,9 @@ ipmi_detach(device_t self, int flags)
        if ((rc = ipmi_watchdog_setmode(&sc->sc_wdog)) != 0)
                return rc;
 
+       ipmi_thread_stopwait(&sc->sc_poll);
+       ipmi_thread_stopwait(&sc->sc_tickle);
+
        ipmi_enabled = 0;
 
        if (sc->sc_envsys != NULL) {
@@ -2002,8 +2040,8 @@ ipmi_detach(device_t self, int flags)
 
        ipmi_unmap_regs(sc);
 
-       cv_destroy(&sc->sc_poll_cv);
-       mutex_destroy(&sc->sc_poll_mtx);
+       ipmi_thread_destroy(&sc->sc_poll);
+       ipmi_thread_destroy(&sc->sc_tickle);
        cv_destroy(&sc->sc_cmd_sleep);
        mutex_destroy(&sc->sc_cmd_mtx);
 
@@ -2025,17 +2063,6 @@ ipmi_watchdog_setmode(struct sysmon_wdog
        else
                sc->sc_wdog.smw_period = smwdog->smw_period;
 
-       mutex_enter(&sc->sc_cmd_mtx);
-       /* see if we can properly task to the watchdog */
-       rc = ipmi_sendcmd(sc, BMC_SA, BMC_LUN, APP_NETFN,
-           APP_GET_WATCHDOG_TIMER, 0, NULL);
-       rc = ipmi_recvcmd(sc, sizeof(gwdog), &len, &gwdog);
-       mutex_exit(&sc->sc_cmd_mtx);
-       if (rc) {
-               printf("ipmi: APP_GET_WATCHDOG_TIMER returned 0x%x\n", rc);
-               return EIO;
-       }
-
        memset(&swdog, 0, sizeof(swdog));
        /* Period is 10ths/sec */
        swdog.wdog_timeout = htole16(sc->sc_wdog.smw_period * 10);
@@ -2046,6 +2073,16 @@ ipmi_watchdog_setmode(struct sysmon_wdog
        swdog.wdog_use = IPMI_WDOG_USE_USE_OS;
 
        mutex_enter(&sc->sc_cmd_mtx);
+       /* see if we can properly task to the watchdog */
+       rc = ipmi_sendcmd(sc, BMC_SA, BMC_LUN, APP_NETFN,
+           APP_GET_WATCHDOG_TIMER, 0, NULL);
+       rc = ipmi_recvcmd(sc, sizeof(gwdog), &len, &gwdog);
+       if (rc) {
+               printf("ipmi: APP_GET_WATCHDOG_TIMER returned 0x%x\n", rc);
+               mutex_exit(&sc->sc_cmd_mtx);
+               return EIO;
+       }
+
        if ((rc = ipmi_sendcmd(sc, BMC_SA, BMC_LUN, APP_NETFN,
            APP_SET_WATCHDOG_TIMER, sizeof(swdog), &swdog)) == 0)
                rc = ipmi_recvcmd(sc, 0, &len, NULL);
@@ -2062,14 +2099,39 @@ int
 ipmi_watchdog_tickle(struct sysmon_wdog *smwdog)
 {
        struct ipmi_softc       *sc = smwdog->smw_cookie;
+       struct ipmi_thread *t = &sc->sc_tickle;
 
-       mutex_enter(&sc->sc_poll_mtx);
+       mutex_enter(&t->t_mtx);
        sc->sc_tickle_due = true;
-       cv_signal(&sc->sc_poll_cv);
-       mutex_exit(&sc->sc_poll_mtx);
+       cv_signal(&t->t_cv);
+       mutex_exit(&t->t_mtx);
        return 0;
 }
 
+static void
+ipmi_tickle_thread(void *arg)
+{
+       device_t self = arg;
+       struct ipmi_softc *sc = device_private(self);
+       struct ipmi_thread *t = &sc->sc_tickle;
+
+       t->t_run = true;
+
+       mutex_enter(&t->t_mtx);
+       while (t->t_run) {
+               if (sc->sc_tickle_due) {
+                       ipmi_dotickle(sc);
+                       sc->sc_tickle_due = false;
+               }
+               cv_wait(&t->t_cv, &t->t_mtx);
+       }
+       t->t_stopped = true;
+       cv_signal(&t->t_cv);
+       mutex_exit(&t->t_mtx);
+
+       kthread_exit(0);
+}
+
 void
 ipmi_dotickle(struct ipmi_softc *sc)
 {


Home | Main Index | Thread Index | Old Index