Subject: Re: can we tsleep() in vfs_shutdown ?
To: Jason R Thorpe <thorpej@zembu.com>
From: Manuel Bouyer <bouyer@antioche.lip6.fr>
List: tech-kern
Date: 08/04/2000 02:18:24
--EeQfGwPcQSOJBaQU
Content-Type: text/plain; charset=us-ascii

On Thu, Aug 03, 2000 at 02:23:03PM -0700, Jason R Thorpe wrote:
> On Thu, Aug 03, 2000 at 11:09:00PM +0200, Manuel Bouyer wrote:
> 
>  > in vfs_shutdown(), to pause between flush attemps there is currently a
>  > DELAY(). The problem I can see with this is that we may need some kernel
>  > thread to run for the write to complete (e.g. raidframe, maybe usb ?),
>  > and vfs_shutdown() doesn't give them a chance to have the CPU.
>  > This is supposed to be why system with raid mounted at reboot time don't
>  > always reboot cleanly ("syncing buffers x y z t u giving up").
> 
> Well, we want to avoid running *user* processes at this point, only
> want to allow kernel threads to run.  In fact:
> 
> 	printf("syncing disks... ");
> 
> 	/* XXX Should suspend scheduling. */
> 	(void) spl0();
> 
> :-)  So, there should probably be some call that traverses the proc
> lists and makes all non-P_SYSTEM processes non-runnable.

Ok, here's the patch I've done. stopshed() gues through the run queues and
removes no-system processes, marking them "SHOLD" (we could have a
"restartshed" which could put them back on their run queue).
From here we should never return to userland as no user process are runnable,
unless something else (wakeup() for example) changes the run queues again.
So to be safe we have to call stopshed() before a tsleep().

Also, I changed vfs_shutdown() to count only the number of loop where we
didn't flush any buffers. The reason is, if you have a very large buffer
cache with some complex setup it may take more than 20 tries to flush all
the buffer (on my PC, it needs about 10 already). As long as nbusy decreases
it's safe to keep trying.

Well, it would really be nice if this could go in 1.5_ALPHA2, these unclean
reboots with raidframe are really annoying :)

--
Manuel Bouyer <bouyer@antioche.eu.org>
--

--EeQfGwPcQSOJBaQU
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=diff

Index: sys/proc.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/proc.h,v
retrieving revision 1.98
diff -u -r1.98 proc.h
--- proc.h	2000/06/08 05:50:40	1.98
+++ proc.h	2000/08/04 00:17:02
@@ -239,6 +239,7 @@
 #define	SZOMB	5		/* Awaiting collection by parent. */
 #define	SDEAD	6		/* Process is almost a zombie. */
 #define	SONPROC	7		/* Process is currently on a CPU */
+#define	SHOLD	8		/* Process is runnable but hold by stopshed() */
 
 #define	P_ZOMBIE(p)	((p)->p_stat == SZOMB || (p)->p_stat == SDEAD)
 
@@ -387,6 +388,7 @@
 void	resetpriority __P((struct proc *));
 void	setrunnable __P((struct proc *));
 void	setrunqueue __P((struct proc *));
+void	stopshed __P((void));
 int	ltsleep __P((void *chan, int pri, const char *wmesg, int timo,
 	    __volatile struct simplelock *));
 void	unsleep __P((struct proc *));
Index: kern/kern_synch.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/kern_synch.c,v
retrieving revision 1.79
diff -u -r1.79 kern_synch.c
--- kern_synch.c	2000/06/27 17:41:27	1.79
+++ kern_synch.c	2000/08/04 00:17:06
@@ -960,3 +960,26 @@
 	if (p->p_priority >= PUSER)
 		p->p_priority = p->p_usrpri;
 }
+
+/*
+ * Mark all non-system processes as non-runnable, and remove from run queue.
+ */
+void
+stopshed()
+{
+	int s, i;
+	struct proc *p, *next;
+	s = splclock();
+	/* go through the run queues, remove non-system processes */
+	for (i = 0; i < RUNQUE_NQS; i++) {
+		for (p = (struct proc *)&sched_qs[i];
+		    p->p_forw != (struct proc *)&sched_qs[i]; p = next) {
+			next = p->p_forw;
+			if ((p->p_flag & P_SYSTEM) == 0) {
+				remrunqueue(p);
+				p->p_stat = SHOLD;
+			}
+		}
+	}
+	splx(s);
+}
Index: kern/vfs_subr.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/vfs_subr.c,v
retrieving revision 1.133
diff -u -r1.133 vfs_subr.c
--- vfs_subr.c	2000/07/16 21:07:24	1.133
+++ vfs_subr.c	2000/08/04 00:17:10
@@ -88,6 +88,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
+#include <sys/kernel.h>
 #include <sys/mount.h>
 #include <sys/time.h>
 #include <sys/fcntl.h>
@@ -2350,7 +2351,7 @@
 vfs_shutdown()
 {
 	struct buf *bp;
-	int iter, nbusy, dcount, s;
+	int iter, nbusy, nbusy_prev = 0, dcount, s;
 	struct proc *p = curproc;
 
 	/* XXX we're certainly not running in proc0's context! */
@@ -2359,7 +2360,8 @@
 	
 	printf("syncing disks... ");
 
-	/* XXX Should suspend scheduling. */
+	/* remove user process from run queue */
+	stopshed();
 	(void) spl0();
 
 	/* avoid coming back this way again if we panic. */
@@ -2369,7 +2371,7 @@
 
 	/* Wait for sync to finish. */
 	dcount = 10000;
-	for (iter = 0; iter < 20; iter++) {
+	for (iter = 0; iter < 20;) {
 		nbusy = 0;
 		for (bp = &buf[nbuf]; --bp >= buf; ) {
 			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
@@ -2396,8 +2398,24 @@
 		}
 		if (nbusy == 0)
 			break;
+		if (nbusy_prev == 0)
+			nbusy_prev = nbusy;
 		printf("%d ", nbusy);
-		DELAY(40000 * iter);
+		if (p != &proc0) {
+			/*
+			 * some user processes may have been awakened,
+			 * remove then from the run queue
+			 */
+			stopshed();
+			tsleep(&nbusy, PRIBIO, "bflush",
+			    (iter == 0) ? 1 : hz / 25 * iter);
+		} else {
+			DELAY(40000 * iter);
+		}
+		if (nbusy >= nbusy_prev) /* we didn't flush anything */
+			iter++;
+		else
+			nbusy_prev = nbusy;
 	}
 	if (nbusy) {
 fail:

--EeQfGwPcQSOJBaQU--