Subject: accounting file on NFS
To: None <tech-kern@netbsd.org>
From: enami tsugutomo <enami@but-b.or.jp>
List: tech-kern
Date: 03/02/2000 23:09:02
As you know, we can't put an accounting file on NFS, since the
periodical disk space check (which is done by using statfs(2) system
call) is done by timeout handler and thus it can't sleep but it does
actually.

The straitforward fix to this this bug is to create a kernel thread
and let it to issue statfs(2) system call (issue the system call from
top half of kernel).  The appended diff is a patch to do so.

I'd like to commti this, but is there any objection or comments?

enami.
Index: kern/kern_acct.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/kern_acct.c,v
retrieving revision 1.47
diff -u -r1.47 kern_acct.c
--- kern_acct.c	1998/08/04 04:03:11	1.47
+++ kern_acct.c	2000/02/28 14:50:37
@@ -49,6 +49,9 @@
 #include <sys/file.h>
 #include <sys/syslog.h>
 #include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
 #include <sys/namei.h>
 #include <sys/errno.h>
 #include <sys/acct.h>
@@ -69,6 +72,37 @@
  * is read by a user process, etc.  However, that has its own problems.
  */
 
+#define	M_ACCT	M_TEMP			/* XXX */
+
+/*
+ * The global accounting state and related data.
+ */
+struct acct_state {
+	int	as_state;
+#define	ACCT_STOP	0
+#define	ACCT_ACTIVE	1
+#define	ACCT_SUSPENDED	2
+
+	struct vnode *as_vp;		/* Accounting vnode pointer. */
+	struct ucred *as_ucred;		/* Credential of accounting file
+					   owner (i.e root).  Used when
+ 					   accounting file i/o.  */
+};
+struct acct_state *acct_state;		/* The current accounting state. */
+
+/*
+ * Lock to serialize system calls and kernel threads.
+ */
+struct	lock acct_lock;
+#define	ACCT_LOCK()						\
+do {								\
+	(void) lockmgr(&acct_lock, LK_EXCLUSIVE, NULL);		\
+} while (/* CONSTCOND */0)
+#define	ACCT_UNLOCK()						\
+do {								\
+	(void) lockmgr(&acct_lock, LK_RELEASE, NULL);		\
+} while (/* CONSTCOND */0)
+
 /*
  * Internal accounting functions.
  * The former's operation is described in Leffler, et al., and the latter
@@ -76,12 +110,7 @@
  */
 comp_t	encode_comp_t __P((u_long, u_long));
 void	acctwatch __P((void *));
-
-/*
- * Accounting vnode pointer, and saved vnode pointer.
- */
-struct	vnode *acctp;
-struct	vnode *savacctp;
+void	acct_stop __P((struct acct_state *));
 
 /*
  * Values associated with enabling and disabling accounting
@@ -90,6 +119,35 @@
 int	acctresume = 4;		/* resume when free space risen to > 4% */
 int	acctchkfreq = 15;	/* frequency (in seconds) to check space */
 
+void
+acct_init()
+{
+
+	lockinit(&acct_lock, PWAIT, "acctlk", 0, 0);
+}
+
+void
+acct_stop(as)
+	struct acct_state *as;
+{
+	int error;
+
+	if (as->as_vp != NULLVP && as->as_vp->v_type != VBAD) {
+		error = vn_close(as->as_vp, FWRITE, as->as_ucred, NULL);
+#ifdef DIAGNOSTIC
+		if (error != 0)
+			log(LOG_NOTICE, "acct_stop: "
+			    "failed to close, errno = %d\n", error);
+#endif
+		as->as_vp = NULLVP;
+	}
+	if (as->as_ucred != NULL) {
+		crfree(as->as_ucred);
+		as->as_ucred = NULL;
+	}
+	as->as_state = ACCT_STOP;
+}
+
 /*
  * Accounting system call.  Written based on the specification and
  * previous implementation done by Mark Tinguely.
@@ -126,25 +184,43 @@
 		}
 	}
 
+	ACCT_LOCK();
+
 	/*
 	 * If accounting was previously enabled, kill the old space-watcher,
-	 * close the file, and (if no new file was specified, leave).
+	 * free credential for accounting file i/o,
+	 * and (if no new file was specified, leave).
 	 */
-	if (acctp != NULLVP || savacctp != NULLVP) {
-		untimeout(acctwatch, NULL);
-		error = vn_close((acctp != NULLVP ? acctp : savacctp), FWRITE,
-		    p->p_ucred, p);
-		acctp = savacctp = NULLVP;
+	if (acct_state != NULL) {
+		acct_stop(acct_state);
+		wakeup(acct_state);
+		acct_state = NULL;
 	}
 	if (SCARG(uap, path) == NULL)
-		return (error);
+		goto out;
 
 	/*
-	 * Save the new accounting file vnode, and schedule the new
-	 * free space watcher.
+	 * Save the new accounting file vnode and credential,
+	 * and schedule the new free space watcher.
 	 */
-	acctp = nd.ni_vp;
-	acctwatch(NULL);
+	acct_state = malloc(sizeof(struct acct_state), M_ACCT, M_WAITOK);
+	acct_state->as_state = ACCT_ACTIVE;
+	acct_state->as_vp = nd.ni_vp;
+	acct_state->as_ucred = p->p_ucred;
+	crhold(acct_state->as_ucred);
+	error = kthread_create1(acctwatch, acct_state, NULL, "acctwatch");
+	if (error != 0) {
+#ifdef DIAGNOSTIC
+		log(LOG_NOTICE, "acctwatch: "
+		    "failed to create kernel thread, errno = %d\n", error);
+#endif
+		acct_stop(acct_state);
+		free(acct_state, M_ACCT);
+		acct_state = NULL;
+	}
+
+ out:
+	ACCT_UNLOCK();
 	return (error);
 }
 
@@ -161,13 +237,14 @@
 	struct acct acct;
 	struct rusage *r;
 	struct timeval ut, st, tmp;
-	int s, t;
-	struct vnode *vp;
+	int s, t, error = 0;
 
+	ACCT_LOCK();
+
 	/* If accounting isn't enabled, don't bother */
-	vp = acctp;
-	if (vp == NULLVP)
-		return (0);
+	if (acct_state == NULL ||
+	    acct_state->as_state != ACCT_ACTIVE)
+		goto out;
 
 	/*
 	 * Get process accounting information.
@@ -216,10 +293,18 @@
 	/*
 	 * Now, just write the accounting information to the file.
 	 */
-	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
-	return (vn_rdwr(UIO_WRITE, vp, (caddr_t)&acct, sizeof(acct),
-	    (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, p->p_ucred,
-	    NULL, p));
+	VOP_LEASE(acct_state->as_vp, p, p->p_ucred, LEASE_WRITE);
+	error = vn_rdwr(UIO_WRITE, acct_state->as_vp, (caddr_t)&acct,
+	    sizeof(acct), (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT,
+	    acct_state->as_ucred, NULL, p);
+#ifdef DIAGNOSTIC
+	if (error != 0)
+		log(LOG_NOTICE, "Accounting: write failed %d\n", error);
+#endif
+
+ out:
+	ACCT_UNLOCK();
+	return (error);
 }
 
 /*
@@ -267,38 +352,62 @@
  * has been vgone()'d out from underneath us, e.g. when the file
  * system containing the accounting file has been forcibly unmounted.
  */
-/* ARGSUSED */
 void
-acctwatch(a)
-	void *a;
+acctwatch(arg)
+	void *arg;
 {
+	struct acct_state *as = arg;
 	struct statfs sb;
+	int error;
 
-	if (savacctp != NULLVP) {
-		if (savacctp->v_type == VBAD) {
-			(void) vn_close(savacctp, FWRITE, NOCRED, NULL);
-			savacctp = NULLVP;
-			return;
-		}
-		(void)VFS_STATFS(savacctp->v_mount, &sb, (struct proc *)0);
-		if (sb.f_bavail > acctresume * sb.f_blocks / 100) {
-			acctp = savacctp;
-			savacctp = NULLVP;
-			log(LOG_NOTICE, "Accounting resumed\n");
-		}
-	} else if (acctp != NULLVP) {
-		if (acctp->v_type == VBAD) {
-			(void) vn_close(acctp, FWRITE, NOCRED, NULL);
-			acctp = NULLVP;
-			return;
-		}
-		(void)VFS_STATFS(acctp->v_mount, &sb, (struct proc *)0);
-		if (sb.f_bavail <= acctsuspend * sb.f_blocks / 100) {
-			savacctp = acctp;
-			acctp = NULLVP;
-			log(LOG_NOTICE, "Accounting suspended\n");
-		}
-	} else
-		return;
-	timeout(acctwatch, NULL, acctchkfreq * hz);
+	log(LOG_NOTICE, "Accounting started\n");
+	ACCT_LOCK();
+	while (as->as_state != ACCT_STOP) {
+		if (as->as_vp->v_type == VBAD) {
+			log(LOG_NOTICE, "Accounting file is bad\n");
+			acct_stop(as);
+			continue;
+		} else if ((error =
+		    VFS_STATFS(as->as_vp->v_mount, &sb, NULL)) != 0) {
+#ifdef DIAGNOSTIC
+			if (error != 0)
+				log(LOG_NOTICE, "acctwatch: failed to statfs,"
+				    " error = %d\n", error);
+#endif
+		} else
+			switch (as->as_state) {
+			case ACCT_SUSPENDED:
+				if (sb.f_bavail >
+				    acctresume * sb.f_blocks / 100) {
+					as->as_state = ACCT_ACTIVE;
+					log(LOG_NOTICE,
+					    "Accounting resumed\n");
+				}
+				break;
+			case ACCT_ACTIVE:
+				if (sb.f_bavail <=
+				    acctsuspend * sb.f_blocks / 100) {
+					as->as_state = ACCT_SUSPENDED;
+					log(LOG_NOTICE,
+					    "Accounting suspended\n");
+				}
+				break;
+			case ACCT_STOP:
+				continue;
+			}
+
+		ACCT_UNLOCK();
+		error = tsleep(as, PSWP, "actwat", acctchkfreq * hz);
+		ACCT_LOCK();
+#ifdef DIAGNOSTIC
+		if (error != 0 && error != EWOULDBLOCK)
+			log(LOG_NOTICE, "Accounting sleep error %d\n", error);
+#endif
+	}
+	ACCT_UNLOCK();
+
+	log(LOG_NOTICE, "Accounting stopped\n");
+	free(as, M_ACCT);
+
+	kthread_exit(0);
 }
Index: kern/init_main.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/init_main.c,v
retrieving revision 1.163
diff -u -r1.163 init_main.c
--- init_main.c	2000/01/24 18:03:19	1.163
+++ init_main.c	2000/02/28 14:50:38
@@ -50,6 +50,7 @@
 #include "rnd.h"
 
 #include <sys/param.h>
+#include <sys/acct.h>
 #include <sys/filedesc.h>
 #include <sys/file.h>
 #include <sys/errno.h>
@@ -368,6 +369,9 @@
 	/* Initialize kernel profiling. */
 	kmstartup();
 #endif
+
+	/* Initialize system accouting. */
+	acct_init();
 
 	/*
 	 * Initialize signal-related data structures, and signal state
Index: sys/acct.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/acct.h,v
retrieving revision 1.17
diff -u -r1.17 acct.h
--- acct.h	1997/01/22 07:09:04	1.17
+++ acct.h	2000/02/28 14:50:38
@@ -79,6 +79,7 @@
 #ifdef _KERNEL
 struct vnode	*acctp;
 
+void	acct_init __P((void));
 int	acct_process __P((struct proc *p));
 #endif