Subject: Re: kern/30233: raidstrategy() isn't interrupt-safe
To: None <oster@netbsd.org, gnats-admin@netbsd.org, netbsd-bugs@netbsd.org>
From: Greg Oster <oster@cs.usask.ca>
List: netbsd-bugs
Date: 09/24/2005 23:24:01
The following reply was made to PR kern/30233; it has been noted by GNATS.

From: Greg Oster <oster@cs.usask.ca>
To: gnats-bugs@netbsd.org
Cc: 
Subject: Re: kern/30233: raidstrategy() isn't interrupt-safe 
Date: Sat, 24 Sep 2005 17:23:17 -0600

 Manuel Bouyer writes:
 [snip]
 > >How-To-Repeat:
 > 	An easy way to trigger a pool_get() panic is to export a partition
 > 	from a raidframe device to a guest Xen domain, as reported by
 > 	Yoshito Komatsu on port-xen.
 > >Fix:
 
 The following will hopefully fix this problem.  I'm currently testing 
 the impact of these changes on RAIDframe in general, but would be 
 interested to hear reports on whether this actually fixes the problem
 mentioned in this PR.
 
 Later...
 
 Greg Oster
 
 Index: rf_engine.c
 ===================================================================
 RCS file: /cvsroot/src/sys/dev/raidframe/rf_engine.c,v
 retrieving revision 1.35
 diff -p -r1.35 rf_engine.c
 *** rf_engine.c	27 Feb 2005 00:27:44 -0000	1.35
 --- rf_engine.c	24 Sep 2005 23:19:07 -0000
 *************** __KERNEL_RCSID(0, "$NetBSD: rf_engine.c,
 *** 67,72 ****
 --- 67,73 ----
   #include "rf_dagutils.h"
   #include "rf_shutdown.h"
   #include "rf_raid.h"
 + #include "rf_kintf.h"
   
   static void rf_ShutdownEngine(void *);
   static void DAGExecutionThread(RF_ThreadArg_t arg);
 *************** DAGExecutionThread(RF_ThreadArg_t arg)
 *** 825,835 ****
   }
   
   /*
 !  * rf_RaidIOThread() -- When I/O to a component completes,
 !  * KernelWakeupFunc() puts the completed request onto raidPtr->iodone
 !  * TAILQ.  This function looks after requests on that queue by calling
 !  * rf_DiskIOComplete() for the request, and by calling any required
 !  * CompleteFunc for the request.
    */
   
   static void
 --- 826,838 ----
   }
   
   /*
 !  * rf_RaidIOThread() -- When I/O to a component begins, raidstrategy()
 !  * puts the I/O on a buf_queue, and then signals raidPtr->iodone.  If
 !  * necessary, this function calls raidstart() to initiate the I/O.
 !  * When I/O to a component completes, KernelWakeupFunc() puts the
 !  * completed request onto raidPtr->iodone TAILQ.  This function looks
 !  * after requests on that queue by calling rf_DiskIOComplete() for the
 !  * request, and by calling any required CompleteFunc for the request.  
    */
   
   static void
 *************** rf_RaidIOThread(RF_ThreadArg_t arg)
 *** 846,852 ****
   
   	while (!raidPtr->shutdown_raidio) {
   		/* if there is nothing to do, then snooze. */
 ! 		if (TAILQ_EMPTY(&(raidPtr->iodone))) {
   			ltsleep(&(raidPtr->iodone), PRIBIO, "raidiow", 0,
   				&(raidPtr->iodone_lock));
   		}
 --- 849,856 ----
   
   	while (!raidPtr->shutdown_raidio) {
   		/* if there is nothing to do, then snooze. */
 ! 		if (TAILQ_EMPTY(&(raidPtr->iodone)) &&
 ! 		    rf_buf_queue_check(raidPtr->raidid)) {
   			ltsleep(&(raidPtr->iodone), PRIBIO, "raidiow", 0,
   				&(raidPtr->iodone_lock));
   		}
 *************** rf_RaidIOThread(RF_ThreadArg_t arg)
 *** 859,864 ****
 --- 863,874 ----
   			(req->CompleteFunc) (req->argument, req->error);
   			simple_lock(&(raidPtr->iodone_lock));
   		}
 + 
 + 		/* process any pending outgoing IO */
 + 		simple_unlock(&(raidPtr->iodone_lock));
 + 		raidstart(raidPtr);
 + 		simple_lock(&(raidPtr->iodone_lock));
 + 
   	}
   
   	/* Let rf_ShutdownEngine know that we're done... */
 Index: rf_netbsd.h
 ===================================================================
 RCS file: /cvsroot/src/sys/dev/raidframe/rf_netbsd.h,v
 retrieving revision 1.23
 diff -p -r1.23 rf_netbsd.h
 *** rf_netbsd.h	29 May 2005 22:03:09 -0000	1.23
 --- rf_netbsd.h	24 Sep 2005 23:19:08 -0000
 *************** struct RF_Pools_s {
 *** 90,95 ****
 --- 90,96 ----
   
   extern struct RF_Pools_s rf_pools;
   void rf_pool_init(struct pool *, size_t, const char *, size_t, size_t);
 + int rf_buf_queue_check(int);
   
   /* XXX probably belongs in a different .h file. */
   typedef struct RF_AutoConfig_s {
 Index: rf_netbsdkintf.c
 ===================================================================
 RCS file: /cvsroot/src/sys/dev/raidframe/rf_netbsdkintf.c,v
 retrieving revision 1.189
 diff -p -r1.189 rf_netbsdkintf.c
 *** rf_netbsdkintf.c	24 Sep 2005 22:51:55 -0000	1.189
 --- rf_netbsdkintf.c	24 Sep 2005 23:19:09 -0000
 *************** raidstrategy(struct buf *bp)
 *** 735,741 ****
   	/* stuff it onto our queue */
   	BUFQ_PUT(&rs->buf_queue, bp);
   
 ! 	raidstart(raidPtrs[raidID]);
   
   	splx(s);
   }
 --- 735,742 ----
   	/* stuff it onto our queue */
   	BUFQ_PUT(&rs->buf_queue, bp);
   
 ! 	/* scheduled the IO to happen at the next convenient time */
 ! 	wakeup(&(raidPtrs[raidID]->iodone));
   
   	splx(s);
   }
 *************** rf_pool_init(struct pool *p, size_t size
 *** 3330,3332 ****
 --- 3331,3353 ----
   	pool_prime(p, xmin);
   	pool_setlowat(p, xmin);
   }
 + 
 + /*
 + 
 + rf_buf_queue_check(int raidid) -- looks into the buf_queue to see if
 + there is IO pending and if that IO could possibly be done for a given
 + RAID set.  Returns 0 if IO is waiting and can be done, 1 otherwise. 
 + 
 +  */
 + 
 + int
 + rf_buf_queue_check(int raidid)
 + {
 + 	if ((BUFQ_PEEK(&(raid_softc[raidid].buf_queue)) != NULL) &&
 + 	    raidPtrs[raidid]->openings > 0) {
 + 		/* there is work to do */
 + 		return 0;
 + 	} 
 + 	/* default is nothing to do */
 + 	return 1;
 + }
 Index: rf_states.c
 ===================================================================
 RCS file: /cvsroot/src/sys/dev/raidframe/rf_states.c,v
 retrieving revision 1.38
 diff -p -r1.38 rf_states.c
 *** rf_states.c	27 Feb 2005 00:27:45 -0000	1.38
 --- rf_states.c	24 Sep 2005 23:19:10 -0000
 *************** rf_State_LastState(RF_RaidAccessDesc_t *
 *** 235,242 ****
   	((RF_Raid_t *) desc->raidPtr)->openings++;
   	RF_UNLOCK_MUTEX(((RF_Raid_t *) desc->raidPtr)->mutex);
   
 ! 	/* wake up any pending IO */
 ! 	raidstart(((RF_Raid_t *) desc->raidPtr));
   
   	/* printf("Calling biodone on 0x%x\n",desc->bp); */
   	biodone(desc->bp);	/* access came through ioctl */
 --- 235,241 ----
   	((RF_Raid_t *) desc->raidPtr)->openings++;
   	RF_UNLOCK_MUTEX(((RF_Raid_t *) desc->raidPtr)->mutex);
   
 ! 	wakeup(&(desc->raidPtr->iodone));
   
   	/* printf("Calling biodone on 0x%x\n",desc->bp); */
   	biodone(desc->bp);	/* access came through ioctl */