Subject: kern/17239: systrace for NetBSD
To: None <gnats-bugs@gnats.netbsd.org>
From: None <xs@kittenz.org>
List: netbsd-bugs
Date: 06/12/2002 11:27:56
>Number:         17239
>Category:       kern
>Synopsis:       systrace for NetBSD
>Confidential:   no
>Severity:       serious
>Priority:       high
>Responsible:    kern-bug-people
>State:          open
>Class:          change-request
>Submitter-Id:   net
>Arrival-Date:   Wed Jun 12 12:09:04 PDT 2002
>Closed-Date:
>Last-Modified:
>Originator:     
>Release:        NetBSD 1.6A
>Organization:
>Environment:
/sys/kern/kern_exit.c:
     $NetBSD: kern_exit.c,v 1.93 2002/03/17 23:41:30 christos Exp $
/sys/kern/kern_fork.c:
     $NetBSD: kern_fork.c,v 1.88 2001/12/08 00:35:30 thorpej Exp $
/sys/arch/i386/i386/syscall.c:
     $NetBSD: syscall.c,v 1.12 2002/03/29 17:07:06 christos Exp $
System: NetBSD stasis 1.6A NetBSD 1.6A (STASIS.systrace) #19: Thu Jun 6 22:14:42 BST 2002 xs@stasis:/usr/src/sys/arch/i386/compile/STASIS.systrace i386
Architecture: i386
Machine: i386
>Description:
	systrace allows a monitoring process to authoritatively permit and
reject syscalls for another process and much more.
	http://www.citi.umich.edu/u/provos/systrace/
>How-To-Repeat:
>Fix:
	Two parts. First kernel. Then user land code from openbsd, with
NetBSD diff relative to it. OpenBSD modules for this are src/bin/systrace
and XF4/xc/programs/xsystrace.

Index: etc/etc.i386/MAKEDEV
===================================================================
RCS file: /cvsroot/basesrc/etc/etc.i386/MAKEDEV,v
retrieving revision 1.175
diff -u -r1.175 MAKEDEV
--- MAKEDEV	2002/04/18 12:54:11	1.175
+++ MAKEDEV	2002/06/12 10:07:51
@@ -136,6 +136,7 @@
 #	dpti*	DPT/Adaptec RAID management interface
 #	radio*	radio devices
 #	nsmb*	SMB requester
+#	systrace syscall tracer
 #
 
 dialin=0
@@ -192,6 +193,7 @@
 	makedev cir0 cir1 irframe0 irframe1
 	makedev clockctl
 	makedev nsmb0 nsmb1 nsmb2 nsmb3
+	makedev systrace
 	;;
 
 audio)
@@ -946,6 +948,12 @@
 	rm -f $nsmb
 	mknod $nsmb c $major $unit
 	chmod 644 $nsmb
+	;;
+
+systrace)
+	rm -f systrace
+	mknod systrace c 90 0
+	chmod 644 systrace
 	;;
 
 local)
Index: sys/arch/i386/i386/conf.c
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/i386/i386/conf.c,v
retrieving revision 1.155
diff -u -r1.155 conf.c
--- conf.c	2002/04/18 12:54:15	1.155
+++ conf.c	2002/06/12 10:07:52
@@ -115,6 +115,8 @@
 cdev_decl(mm);
 cdev_decl(wd);
 cdev_decl(sw);
+#include "systrace.h"
+cdev_decl(systrace);
 #include "pty.h"
 #define	ptstty		ptytty
 #define	ptsioctl	ptyioctl
@@ -358,6 +360,7 @@
 	cdev_radio_init(NRADIO,radio),	/* 87: generic radio I/O */
 	cdev_netsmb_init(NNETSMB,nsmb_dev_),/* 88: SMB */
 	cdev_clockctl_init(NCLOCKCTL, clockctl),/* 89: clockctl pseudo device */
+	cdev_systrace_init(NSYSTRACE, systrace),/* 90: system call tracing */
 };
 int	nchrdev = sizeof(cdevsw) / sizeof(cdevsw[0]);
 
Index: sys/arch/i386/i386/syscall.c
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/i386/i386/syscall.c,v
retrieving revision 1.12
diff -u -r1.12 syscall.c
--- syscall.c	2002/03/29 17:07:06	1.12
+++ syscall.c	2002/06/12 10:07:53
@@ -42,6 +42,7 @@
 #include "opt_syscall_debug.h"
 #include "opt_vm86.h"
 #include "opt_ktrace.h"
+#include "systrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -53,6 +54,8 @@
 #endif
 #include <sys/syscall.h>
 
+#include <dev/systrace.h>
+
 #include <uvm/uvm_extern.h>
 
 #include <machine/cpu.h>
@@ -76,6 +79,11 @@
 		p->p_md.md_syscall = syscall_fancy;
 	else
 #endif
+#if NSYSTRACE > 0
+	if (ISSET(p->p_flag, P_SYSTRACE))
+		p->p_md.md_syscall = syscall_fancy;
+	else
+#endif
 		p->p_md.md_syscall = syscall_plain;
 }
 
@@ -225,7 +233,12 @@
 
 	rval[0] = 0;
 	rval[1] = 0;
-	error = (*callp->sy_call)(p, args, rval);
+#if NSYSTRACE > 0
+	if (ISSET(p->p_flag, P_SYSTRACE))
+		error = systrace_redirect(code, p, args, rval);
+	else
+#endif
+		error = (*callp->sy_call)(p, args, rval);
 	switch (error) {
 	case 0:
 		frame.tf_eax = rval[0];
Index: sys/conf/files
===================================================================
RCS file: /cvsroot/syssrc/sys/conf/files,v
retrieving revision 1.531
diff -u -r1.531 files
--- files	2002/05/22 05:49:57	1.531
+++ files	2002/06/12 10:07:54
@@ -213,6 +213,7 @@
 define	sppp
 define	wlan
 define	crypto
+define	systrace
 
 # devices ARPing IPv4 pull this in:
 #
@@ -900,7 +901,7 @@
 defpseudo faith:	ifnet
 defpseudo stf:		ifnet
 defpseudo irframetty:	irframedrv, irdasir
-
+defpseudo systrace
 defpseudo sequencer
 defpseudo clockctl
 defpseudo irix_kmem
@@ -992,6 +993,7 @@
 file	dev/rndpool.c			rnd			needs-flag
 file	dev/sequencer.c			sequencer		needs-flag
 file	dev/vnd.c			vnd			needs-flag
+file	dev/systrace.c			systrace		needs-flag
 file	kern/core_elf32.c		exec_elf32
 file	kern/core_elf64.c		exec_elf64
 file	kern/core_netbsd.c
Index: sys/dev/Makefile
===================================================================
RCS file: /cvsroot/syssrc/sys/dev/Makefile,v
retrieving revision 1.10
diff -u -r1.10 Makefile
--- Makefile	2001/12/04 21:43:44	1.10
+++ Makefile	2002/06/12 10:07:54
@@ -7,6 +7,6 @@
 INCSDIR= /usr/include/dev
 
 # Only install includes which are used by userland
-INCS=	ccdvar.h md.h vndvar.h
+INCS=	ccdvar.h md.h vndvar.h systrace.h
 
 .include <bsd.kinc.mk>
Index: sys/dev/systrace.c
===================================================================
RCS file: systrace.c
diff -N systrace.c
--- /dev/null	Sun May  5 08:03:30 2002
+++ systrace.c	Wed Jun 12 03:07:56 2002
@@ -0,0 +1,1383 @@
+/*
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Niels Provos.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/tree.h>
+#include <sys/malloc.h>
+#include <sys/syscall.h>
+#include <sys/vnode.h>
+#include <sys/errno.h>
+#include <sys/conf.h>
+#include <sys/device.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/filio.h>
+#include <sys/signalvar.h>
+#include <sys/lock.h>
+#include <sys/pool.h>
+#include <sys/mount.h>
+#include <sys/poll.h>
+
+#include <miscfs/procfs/procfs.h>
+
+#include <dev/systrace.h>
+
+#ifdef __NetBSD__
+/* XXX: is this ok? */
+#define	SYSTRACE_LOCK(fst, p)	lockmgr(&fst->lock, LK_EXCLUSIVE, NULL)
+#define	SYSTRACE_UNLOCK(fst, p)	lockmgr(&fst->lock, LK_RELEASE, NULL)
+#else
+#define	SYSTRACE_LOCK(fst, p)	lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p)
+#define	SYSTRACE_UNLOCK(fst, p)	lockmgr(&fst->lock, LK_RELEASE, NULL, p)
+#endif
+#ifndef	M_XDATA
+#define	M_XDATA		M_FILE	/* XXX */
+#endif
+
+
+void	systraceattach(int);
+
+int	systraceopen(dev_t, int, int, struct proc *);
+int	systraceclose(dev_t, int, int, struct proc *);
+int	systraceread(dev_t, struct uio *, int);
+int	systracewrite(dev_t, struct uio *, int);
+int	systraceioctl(dev_t, u_long, caddr_t, int, struct proc *);
+int	systraceselect(dev_t, int, struct proc *);
+
+#ifdef __NetBSD__
+int	systracef_read(struct file *, off_t *, struct uio *, struct ucred *,
+		int);
+int	systracef_write(struct file *, off_t *, struct uio *, struct ucred *,
+		int);
+int	systracef_fcntl(struct file *, u_int, caddr_t, struct proc *);
+#else
+int	systracef_read(struct file *, off_t *, struct uio *, struct ucred *);
+int	systracef_write(struct file *, off_t *, struct uio *, struct ucred *);
+int	systracef_kqfilter(struct file *, struct knote *);
+#endif
+int	systracef_select(struct file *, int, struct proc *);
+int	systracef_ioctl(struct file *, u_long, caddr_t, struct proc *);
+int	systracef_stat(struct file *, struct stat *, struct proc *);
+int	systracef_close(struct file *, struct proc *);
+
+struct str_policy {
+	TAILQ_ENTRY(str_policy) next;
+
+	int nr;
+
+	const struct emul *emul;	/* Is only valid for this emulation */
+
+	int refcount;
+
+	int nsysent;
+	u_char *sysent;
+};
+
+#define STR_PROC_ONQUEUE	0x01
+#define STR_PROC_WAITANSWER	0x02
+#define STR_PROC_SYSCALLRES	0x04
+#define STR_PROC_REPORT		0x08	/* Report emulation */
+
+struct str_process {
+	TAILQ_ENTRY(str_process) next;
+	TAILQ_ENTRY(str_process) msg_next;
+
+	struct proc *proc;
+	pid_t pid;
+
+	struct fsystrace *parent;
+	struct str_policy *policy;
+
+	int flags;
+	short answer;
+	short error;
+
+	struct str_message msg;
+};
+
+void systrace_lock(void);
+void systrace_unlock(void);
+
+/* Needs to be called with fst locked */
+
+int	systrace_attach(struct fsystrace *, pid_t);
+int	systrace_detach(struct str_process *);
+int	systrace_answer(struct str_process *, struct systrace_answer *);
+int	systrace_io(struct str_process *, struct systrace_io *);
+int	systrace_policy(struct fsystrace *, struct systrace_policy *);
+int	systrace_getcwd(struct fsystrace *, struct str_process *);
+
+int	systrace_processready(struct str_process *);
+struct proc *systrace_find(struct str_process *);
+struct str_process *systrace_findpid(struct fsystrace *fst, pid_t pid);
+void	systrace_wakeup(struct fsystrace *);
+void	systrace_closepolicy(struct fsystrace *, struct str_policy *);
+int	systrace_insert_process(struct fsystrace *, struct proc *);
+struct str_policy *systrace_newpolicy(struct fsystrace *, int);
+int	systrace_msg_child(struct fsystrace *, struct str_process *, pid_t);
+int	systrace_msg_ask(struct fsystrace *, struct str_process *,
+	    int, size_t, register_t []);
+int	systrace_msg_result(struct fsystrace *, struct str_process *,
+	    int, int, size_t, register_t [], register_t []);
+int	systrace_msg_emul(struct fsystrace *, struct str_process *);
+int	systrace_make_msg(struct str_process *, int);
+
+static struct fileops systracefops = {
+    systracef_read,
+    systracef_write,
+    systracef_ioctl,
+#ifdef __NetBSD__
+    systracef_fcntl,
+#endif
+    systracef_select,
+#ifndef __NetBSD__
+    systracef_kqfilter,
+#endif
+    systracef_stat,
+    systracef_close
+};
+
+struct pool systr_proc_pl;
+struct pool systr_policy_pl;
+
+int systrace_debug = 0;
+struct lock systrace_lck;
+
+#define DPRINTF(y)	if (systrace_debug) printf y;
+
+#ifdef __NetBSD__
+/* ARGSUSED */
+int
+systracef_read(fp, poff, uio, cred, flags)
+	struct file *fp;
+	off_t *poff;
+	struct uio *uio;
+	struct ucred *cred;
+	int flags;
+#else
+/* ARGSUSED */
+int
+systracef_read(fp, poff, uio, cred)
+	struct file *fp;
+	off_t *poff;
+	struct uio *uio;
+	struct ucred *cred;
+#endif
+{
+	struct fsystrace *fst = (struct fsystrace *)fp->f_data;
+	struct str_process *process;
+	int error = 0;
+
+	if (uio->uio_resid != sizeof(struct str_message))
+		return (EINVAL);
+
+ again:
+	systrace_lock();
+	SYSTRACE_LOCK(fst, curproc);
+	systrace_unlock();
+	if ((process = TAILQ_FIRST(&fst->messages)) != NULL) {
+		error = uiomove((caddr_t)&process->msg,
+		    sizeof(struct str_message), uio);
+		if (!error) {
+			TAILQ_REMOVE(&fst->messages, process, msg_next);
+			CLR(process->flags, STR_PROC_ONQUEUE);
+
+			if (SYSTR_MSG_NOPROCESS(process))
+				pool_put(&systr_proc_pl, process);
+
+		}
+	} else if (TAILQ_FIRST(&fst->processes) == NULL) {
+		/* EOF situation */
+		;
+	} else {
+		if (fp->f_flag & FNONBLOCK)
+			error = EAGAIN;
+		else {
+			SYSTRACE_UNLOCK(fst, curproc);
+			error = tsleep(fst, PWAIT|PCATCH, "systrrd", 0);
+			if (error)
+				goto out;
+			goto again;
+		}
+
+	}
+
+	SYSTRACE_UNLOCK(fst, curproc);
+ out:
+	return (error);
+}
+
+#ifdef __NetBSD__
+/* ARGSUSED */
+int
+systracef_write(fp, poff, uio, cred, flags)
+	struct file *fp;
+	off_t *poff;
+	struct uio *uio;
+	struct ucred *cred;
+	int flags;
+#else
+/* ARGSUSED */
+int
+systracef_write(fp, poff, uio, cred)
+	struct file *fp;
+	off_t *poff;
+	struct uio *uio;
+	struct ucred *cred;
+#endif
+{
+	return (EIO);
+}
+
+#define POLICY_VALID(x)	((x) == SYSTR_POLICY_PERMIT || \
+			 (x) == SYSTR_POLICY_ASK || \
+			 (x) == SYSTR_POLICY_NEVER)
+
+/* ARGSUSED */
+int
+systracef_ioctl(fp, cmd, data, p)
+	struct file *fp;
+	u_long cmd;
+	caddr_t data;
+	struct proc *p;
+{
+	int ret = 0;
+	struct fsystrace *fst = (struct fsystrace *)fp->f_data;
+#ifdef __NetBSD__
+	struct cwdinfo *cwdp;
+#else
+	struct filedesc *fdp;
+#endif
+	struct str_process *strp;
+	pid_t pid = 0;
+
+	switch (cmd) {
+	case FIONBIO:
+        case FIOASYNC:
+                return (0);
+
+	case STRIOCATTACH:
+		pid = *(pid_t *)data;
+		if (!pid)
+			ret = EINVAL;
+		else
+			ret = systrace_attach(fst, pid);
+		DPRINTF(("%s: attach to %d: %d\n", __func__, pid, ret));
+		return (ret);
+	case STRIOCDETACH:
+		pid = *(pid_t *)data;
+		if (!pid)
+			ret = EINVAL;
+		break;
+	case STRIOCRESCWD:
+		if (!fst->fd_pid) {
+			ret = EINVAL;
+			break;
+		}
+#ifdef __NetBSD__
+		cwdp = p->p_cwdi;
+
+		/* Release cwd from other process */
+		if (cwdp->cwdi_cdir)
+			vrele(cwdp->cwdi_cdir);
+		if (cwdp->cwdi_rdir)
+			vrele(cwdp->cwdi_rdir);
+		cwdp->cwdi_cdir = fst->fd_cdir;
+		cwdp->cwdi_rdir = fst->fd_rdir;
+#else
+		fdp = p->p_fd;
+
+		/* Release cwd from other process */
+		if (fdp->fd_cdir)
+			vrele(fdp->fd_cdir);
+		if (fdp->fd_rdir)
+			vrele(fdp->fd_rdir);
+		/* This restores the cwd we had before */
+		fdp->fd_cdir = fst->fd_cdir;
+		fdp->fd_rdir = fst->fd_rdir;
+#endif
+		/* Note that we are normal again */
+		fst->fd_pid = 0;
+		fst->fd_cdir = fst->fd_rdir = NULL;
+		return (ret);
+ 	case STRIOCANSWER:
+		pid = ((struct systrace_answer *)data)->stra_pid;
+		if (!pid)
+			ret = EINVAL;
+		break;
+ 	case STRIOCIO:
+		pid = ((struct systrace_io *)data)->strio_pid;
+		if (!pid)
+			ret = EINVAL;
+		break;
+ 	case STRIOCGETCWD:
+		pid = *(pid_t *)data;
+		if (!pid)
+			ret = EINVAL;
+		break;
+	case STRIOCPOLICY:
+		break;
+	default:
+		ret = EINVAL;
+		break;
+	}
+
+	if (ret)
+		return (ret);
+	
+	SYSTRACE_LOCK(fst, curproc);
+	if (pid) {
+		strp = systrace_findpid(fst, pid);
+		if (strp == NULL) {
+			ret = ESRCH;
+			goto unlock;
+		}
+	}
+
+	switch (cmd) {
+	case STRIOCDETACH:
+		ret = systrace_detach(strp);
+		break;
+	case STRIOCREPORT:
+		SET(strp->flags, STR_PROC_REPORT);
+		break;
+	case STRIOCANSWER:
+		ret = systrace_answer(strp, (struct systrace_answer *)data);
+		break;
+	case STRIOCIO:
+		ret = systrace_io(strp, (struct systrace_io *)data);
+		break;
+	case STRIOCPOLICY:
+		ret = systrace_policy(fst, (struct systrace_policy *)data);
+		break;
+	case STRIOCGETCWD:
+		ret = systrace_getcwd(fst, strp);
+		break;
+	default:
+		ret = EINVAL;
+		break;
+	}
+
+ unlock:
+	SYSTRACE_UNLOCK(fst, curproc);
+
+	return (ret);
+}
+
+#ifdef __NetBSD__
+/* ARGSUSED */
+int
+systracef_fcntl(fp, cmd, data, p)
+	struct file *fp;
+	u_int cmd;
+	caddr_t data;
+	struct proc *p;
+{
+
+	if (cmd == FNONBLOCK || cmd == FASYNC)
+		return 0;
+
+	return (EOPNOTSUPP);
+}
+#endif
+
+/* ARGSUSED */
+int
+systracef_select(fp, which, p)
+	struct file *fp;
+	int which;
+	struct proc *p;
+{
+	struct fsystrace *fst = (struct fsystrace *)fp->f_data;
+	int ready = 0;
+
+#ifdef __NetBSD__
+	if (!(which & (POLLIN | POLLRDNORM)))
+#else
+	if (which != FREAD)
+#endif
+		return (0);
+
+	SYSTRACE_LOCK(fst, p);
+	ready = TAILQ_FIRST(&fst->messages) != NULL;
+	if (!ready)
+		selrecord(p, &fst->si);
+	SYSTRACE_UNLOCK(fst, p);
+
+	return (ready);
+}
+
+
+#ifndef __NetBSD__
+/* ARGSUSED */
+int
+systracef_kqfilter(fp, kn)
+	struct file *fp;
+	struct knote *kn;
+{
+	return (1);
+}
+#endif
+
+
+/* ARGSUSED */
+int
+systracef_stat(fp, sb, p)
+	struct file *fp;
+	struct stat *sb;
+	struct proc *p;
+{
+	return (EOPNOTSUPP);
+}
+
+/* ARGSUSED */
+int
+systracef_close(fp, p)
+	struct file *fp;
+	struct proc *p;
+{
+	struct fsystrace *fst = (struct fsystrace *)fp->f_data;
+	struct str_process *strp;
+	struct str_policy *strpol;
+
+	SYSTRACE_LOCK(fst, curproc);
+
+	/* Untrace all processes */
+	for (strp = TAILQ_FIRST(&fst->processes); strp;
+	    strp = TAILQ_FIRST(&fst->processes)) {
+		struct proc *q = strp->proc;
+
+		systrace_detach(strp);
+		psignal(q, SIGKILL);
+	}
+
+	/* Clean up fork and exit messages */
+	for (strp = TAILQ_FIRST(&fst->messages); strp;
+	    strp = TAILQ_FIRST(&fst->messages)) {
+		TAILQ_REMOVE(&fst->messages, strp, msg_next);
+		pool_put(&systr_proc_pl, strp);
+	}
+
+	/* Clean up all policies */
+	for (strpol = TAILQ_FIRST(&fst->policies); strpol;
+	    strpol = TAILQ_FIRST(&fst->policies))
+		systrace_closepolicy(fst, strpol);
+
+	/* Release vnodes */
+	if (fst->fd_cdir)
+		vrele(fst->fd_cdir);
+	if (fst->fd_rdir)
+		vrele(fst->fd_rdir);
+	SYSTRACE_UNLOCK(fst, curproc);
+
+	FREE(fp->f_data, M_XDATA);
+	fp->f_data = NULL;
+
+	return (0);
+}
+
+void
+systrace_lock(void)
+{
+#ifdef __NetBSD__
+	lockmgr(&systrace_lck, LK_EXCLUSIVE, NULL);
+#else
+	lockmgr(&systrace_lck, LK_EXCLUSIVE, NULL, curproc);
+#endif
+}
+
+void
+systrace_unlock(void)
+{
+#ifdef __NetBSD__
+	lockmgr(&systrace_lck, LK_RELEASE, NULL);
+#else
+	lockmgr(&systrace_lck, LK_RELEASE, NULL, curproc);
+#endif
+}
+
+void
+systraceattach(int n)
+{
+	pool_init(&systr_proc_pl, sizeof(struct str_process), 0, 0, 0,
+	    "strprocpl", NULL);
+	pool_init(&systr_policy_pl, sizeof(struct str_policy), 0, 0, 0,
+	    "strpolpl", NULL);
+	lockinit(&systrace_lck, PLOCK, "systrace", 0, 0);
+}
+
+int
+systraceopen(dev, flag, mode, p)
+	dev_t	dev;
+	int	flag;
+	int	mode;
+	struct proc *p;
+{
+	return (0);
+}
+
+int
+systraceclose(dev, flag, mode, p)
+	dev_t	dev;
+	int	flag;
+	int	mode;
+	struct proc *p;
+{
+	return (0);
+}
+
+int
+systraceread(dev, uio, ioflag)
+	dev_t	dev;
+	struct uio *uio;
+	int	ioflag;
+{
+	return (EIO);
+}
+
+int
+systracewrite(dev, uio, ioflag)
+	dev_t	dev;
+	struct uio *uio;
+	int	ioflag;
+{
+	return (EIO);
+}
+
+int
+systraceioctl(dev, cmd, data, flag, p)
+	dev_t	dev;
+	u_long	cmd;
+	caddr_t	data;
+	int	flag;
+	struct proc *p;
+{
+	struct file *f;
+	struct fsystrace *fst = NULL;
+	int fd, error;
+
+	switch (cmd) {
+	case SYSTR_CLONE:
+		MALLOC(fst, struct fsystrace *, sizeof(struct fsystrace),
+		    M_XDATA, M_WAITOK);
+
+		memset(fst, 0, sizeof(struct fsystrace));
+		lockinit(&fst->lock, PLOCK, "systrace", 0, 0);
+		TAILQ_INIT(&fst->processes);
+		TAILQ_INIT(&fst->messages);
+		TAILQ_INIT(&fst->policies);
+
+		if (suser(p->p_ucred, &p->p_acflag) == 0)
+			fst->issuser = 1;
+    
+		error = falloc(p, &f, &fd);
+		if (error) {
+			FREE(fst, M_XDATA);
+			return (error);
+		}
+		f->f_flag = FREAD | FWRITE;
+		f->f_type = DTYPE_SYSTRACE;
+		f->f_ops = &systracefops;
+		f->f_data = (caddr_t) fst;
+		*(int *)data = fd;
+		FILE_SET_MATURE(f);
+#ifdef __NetBSD__
+		FILE_UNUSE(f, p);
+#endif
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+	return (error);
+}
+
+int
+systraceselect(dev, rw, p)
+	dev_t	dev;
+	int	rw;
+	struct proc *p;
+{
+	return (0);
+}
+
+void
+systrace_wakeup(struct fsystrace *fst)
+{
+	wakeup((caddr_t)fst);
+	selwakeup(&fst->si);
+}
+
+struct proc *
+systrace_find(struct str_process *strp)
+{
+	struct proc *proc;
+
+	if ((proc = pfind(strp->pid)) == NULL)
+		return (NULL);
+
+	if (proc != strp->proc)
+		return (NULL);
+
+	if (!ISSET(proc->p_flag, P_SYSTRACE))
+		return (NULL);
+
+	return (proc);
+}
+
+void
+systrace_exit(struct proc *proc)
+{
+	struct str_process *strp;
+	struct fsystrace *fst;
+
+	systrace_lock();
+	strp = proc->p_systrace;
+	if (strp != NULL) {
+		fst = strp->parent;
+		SYSTRACE_LOCK(fst, curproc);
+		systrace_unlock();
+
+		/* Insert Exit message */
+		systrace_msg_child(fst, strp, -1);
+
+		systrace_detach(strp);
+		SYSTRACE_UNLOCK(fst, curproc);
+	} else
+		systrace_unlock();
+	CLR(proc->p_flag, P_SYSTRACE);
+}
+
+void
+systrace_fork(struct proc *oldproc, struct proc *p)
+{
+	struct str_process *oldstrp, *strp;
+	struct fsystrace *fst;
+
+	systrace_lock();
+	oldstrp = oldproc->p_systrace;
+	if (oldstrp == NULL) {
+		systrace_unlock();
+		return;
+	}
+
+	fst = oldstrp->parent;
+	SYSTRACE_LOCK(fst, curproc);
+	systrace_unlock();
+
+	if (systrace_insert_process(fst, p))
+		goto out;
+	if ((strp = systrace_findpid(fst, p->p_pid)) == NULL)
+		panic("systrace_fork");
+
+	/* Reference policy */
+	if ((strp->policy = oldstrp->policy) != NULL)
+		strp->policy->refcount++;
+
+	/* Insert fork message */
+	systrace_msg_child(fst, oldstrp, p->p_pid);
+ out:
+	SYSTRACE_UNLOCK(fst, curproc);
+}
+
+int
+systrace_redirect(int code, struct proc *p, void *v, register_t *retval)
+{
+	const struct sysent *callp;
+	struct str_process *strp;
+	struct str_policy *strpolicy;
+	struct fsystrace *fst = NULL;
+	int policy, error = 0, report = 0;
+
+	systrace_lock();
+	strp = p->p_systrace;
+	if (strp == NULL) {
+		systrace_unlock();
+		return (EINVAL);
+	}
+
+	KASSERT(strp->proc == p);
+
+	fst = strp->parent;
+
+	SYSTRACE_LOCK(fst, p);
+	systrace_unlock();
+
+	if ((p->p_flag & P_SUGID) && !fst->issuser) {
+		/* We can not monitor a SUID process unless we are root,
+		 * but we wait until it executes something unprivileged.
+		 */
+		policy = SYSTR_POLICY_PERMIT;
+	} else {
+		/* Find out current policy */
+		if ((strpolicy = strp->policy) == NULL)
+			policy = SYSTR_POLICY_ASK;
+		else {
+			if (code >= strpolicy->nsysent)
+				policy = SYSTR_POLICY_NEVER;
+			else
+				policy = strpolicy->sysent[code];
+		}
+	}
+
+	callp = p->p_emul->e_sysent + code;
+	switch (policy) {
+	case SYSTR_POLICY_PERMIT:
+		break;
+	case SYSTR_POLICY_ASK:
+		/* Puts the current process to sleep, return unlocked */
+		error = systrace_msg_ask(fst,strp, code, callp->sy_argsize, v);
+
+		/* We might have detached by now for some reason */
+		fst = NULL;
+		if (!error && (strp = p->p_systrace) != NULL) {
+			/* XXX - do I need to lock here? */
+			if (strp->answer == SYSTR_POLICY_NEVER)
+				error = strp->error;
+			else if (ISSET(strp->flags, STR_PROC_SYSCALLRES)) {
+				CLR(strp->flags, STR_PROC_SYSCALLRES);
+				report = 1;
+			}
+		}
+		break;
+	default:
+		if (policy > 0)
+			error = policy;
+		else
+			error = EPERM;
+		break;
+	}
+
+	if (fst) {
+		SYSTRACE_UNLOCK(fst, p);
+		fst = NULL;
+	}
+
+	if (!error) {
+		const struct emul *oldemul = p->p_emul;
+
+		error = (*callp->sy_call)(p, v, retval);
+
+		if (p->p_flag & P_SUGID) {
+			/* Stupid Locking not necessary */
+			if ((strp = p->p_systrace) == NULL ||
+			    (fst = strp->parent) == NULL)
+				return (error);
+			if (!fst->issuser)
+				return (error);
+		}
+
+		/* Report change in emulation */
+		systrace_lock();
+		strp = p->p_systrace;
+
+		/* See if we should force a report */
+		if (strp != NULL && ISSET(strp->flags, STR_PROC_REPORT)) {
+			CLR(strp->flags, STR_PROC_REPORT);
+			oldemul = NULL;
+		}
+
+		if (p->p_emul != oldemul && strp != NULL) {
+			fst = strp->parent;
+			SYSTRACE_LOCK(fst, p);
+			systrace_unlock();
+
+			/* Old policy is without meaning now */
+			if (strp->policy) {
+				systrace_closepolicy(fst, strp->policy);
+				strp->policy = NULL;
+			}
+			systrace_msg_emul(fst, strp);
+		} else
+			systrace_unlock();
+
+		/* Report result from system call */
+		systrace_lock();
+		if (report && (strp = p->p_systrace) != NULL) {
+			fst = strp->parent;
+			SYSTRACE_LOCK(fst, p);
+			systrace_unlock();
+
+			systrace_msg_result(fst, strp, error, code,
+			    callp->sy_argsize, v, retval);
+		} else
+			systrace_unlock();
+	}
+
+	return (error);
+}
+
+/* Called with fst locked */
+
+int
+systrace_answer(struct str_process *strp, struct systrace_answer *ans)
+{
+	int error = 0;
+	
+	DPRINTF(("%s: %d: policy %d\n", __func__,
+		    ans->stra_pid, ans->stra_policy));
+
+	if (!POLICY_VALID(ans->stra_policy)) {
+		error = EINVAL;
+		goto out;
+	}
+
+	if (ISSET(strp->flags, STR_PROC_ONQUEUE)) {
+		error = EINVAL;
+		goto out;
+	}
+	if (!ISSET(strp->flags, STR_PROC_WAITANSWER)) {
+		error = EINVAL;
+		goto out;
+	}
+
+	strp->answer = ans->stra_policy;
+	strp->error = ans->stra_error;
+	if (!strp->error)
+		strp->error = EPERM;
+	if (ISSET(ans->stra_flags, SYSTR_FLAGS_RESULT))
+		SET(strp->flags, STR_PROC_SYSCALLRES);
+
+	/* Clearing the flag indicates to the process that it woke up */
+	CLR(strp->flags, STR_PROC_WAITANSWER);
+	wakeup(strp);
+ out:
+
+	return (error);
+}
+
+int
+systrace_policy(struct fsystrace *fst, struct systrace_policy *pol)
+{
+	struct str_policy *strpol;
+	struct str_process *strp;
+
+	switch(pol->strp_op) {
+	case SYSTR_POLICY_NEW:
+		DPRINTF(("%s: new, ents %d\n", __func__,
+			    pol->strp_maxents));
+		if (pol->strp_maxents <= 0 || pol->strp_maxents > 1024)
+			return (EINVAL);
+		strpol = systrace_newpolicy(fst, pol->strp_maxents);
+		if (strpol == NULL)
+			return (ENOBUFS);
+		pol->strp_num = strpol->nr;
+		break;
+	case SYSTR_POLICY_ASSIGN:
+		DPRINTF(("%s: %d -> pid %d\n", __func__,
+			    pol->strp_num, pol->strp_pid));
+
+		/* Find right policy by number */
+		TAILQ_FOREACH(strpol, &fst->policies, next)
+		    if (strpol->nr == pol->strp_num)
+			    break;
+		if (strpol == NULL)
+			return (EINVAL);
+
+		strp = systrace_findpid(fst, pol->strp_pid);
+		if (strp == NULL)
+			return (EINVAL);
+
+		/* Check that emulation matches */
+		if (strpol->emul && strpol->emul != strp->proc->p_emul)
+			return (EINVAL);
+
+		if (strp->policy)
+			systrace_closepolicy(fst, strp->policy);
+		strp->policy = strpol;
+		strpol->refcount++;
+		
+		/* Record emulation for this policy */
+		if (strpol->emul == NULL)
+			strpol->emul = strp->proc->p_emul;
+
+		break;
+	case SYSTR_POLICY_MODIFY:
+		DPRINTF(("%s: %d: code %d -> policy %d\n", __func__,
+			    pol->strp_num, pol->strp_code, pol->strp_policy));
+		if (!POLICY_VALID(pol->strp_policy))
+			return (EINVAL);
+		TAILQ_FOREACH(strpol, &fst->policies, next)
+		    if (strpol->nr == pol->strp_num)
+			    break;
+		if (strpol == NULL)
+			return (EINVAL);
+		if (pol->strp_code < 0 || pol->strp_code >= strpol->nsysent)
+			return (EINVAL);
+		strpol->sysent[pol->strp_code] = pol->strp_policy;
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+int
+systrace_processready(struct str_process *strp)
+{
+	if (ISSET(strp->flags, STR_PROC_ONQUEUE))
+		return (EINVAL);
+
+	if (!ISSET(strp->flags, STR_PROC_WAITANSWER))
+		return (EINVAL);
+
+	if (strp->proc->p_stat != SSLEEP)
+		return (EBUSY);
+
+	return (0);
+}
+
+int
+systrace_getcwd(struct fsystrace *fst, struct str_process *strp)
+{
+#ifdef __NetBSD__
+	struct cwdinfo *mycwdp, *cwdp;
+#else
+	struct filedesc *myfdp, *fdp;
+#endif
+	int error;
+
+	DPRINTF(("%s: %d\n", __func__, strp->pid));
+
+	error = systrace_processready(strp);
+	if (error)
+		return (error);
+
+#ifdef __NetBSD__
+	mycwdp = curproc->p_cwdi;
+	cwdp = strp->proc->p_cwdi;
+	if (mycwdp == NULL || cwdp == NULL)
+		return (EINVAL);
+
+	/* Store our current values */
+	fst->fd_pid = strp->pid;
+	fst->fd_cdir = mycwdp->cwdi_cdir;
+	fst->fd_rdir = mycwdp->cwdi_rdir;
+
+	if ((mycwdp->cwdi_cdir = cwdp->cwdi_cdir) != NULL)
+		VREF(mycwdp->cwdi_cdir);
+	if ((mycwdp->cwdi_rdir = cwdp->cwdi_rdir) != NULL)
+		VREF(mycwdp->cwdi_rdir);
+#else
+	myfdp = curproc->p_fd;
+	fdp = strp->proc->p_fd;
+	if (myfdp == NULL || fdp == NULL)
+		return (EINVAL);
+
+	/* Store our current values */
+	fst->fd_pid = strp->pid;
+	fst->fd_cdir = myfdp->fd_cdir;
+	fst->fd_rdir = myfdp->fd_rdir;
+
+	if ((myfdp->fd_cdir = fdp->fd_cdir) != NULL)
+		VREF(myfdp->fd_cdir);
+	if ((myfdp->fd_rdir = fdp->fd_rdir) != NULL)
+		VREF(myfdp->fd_rdir);
+#endif
+
+	return (0);
+}
+
+int
+systrace_io(struct str_process *strp, struct systrace_io *io)
+{
+	struct proc *p = curproc, *t = strp->proc;
+	struct uio uio;
+	struct iovec iov;
+	int error = 0;
+	
+	DPRINTF(("%s: %d: %p(%d)\n", __func__,
+		    io->strio_pid, io->strio_offs, io->strio_len));
+
+	switch (io->strio_op) {
+	case SYSTR_READ:
+		uio.uio_rw = UIO_READ;
+		break;
+	case SYSTR_WRITE:
+		uio.uio_rw = UIO_WRITE;
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	error = systrace_processready(strp);
+	if (error)
+		goto out;
+
+	iov.iov_base = io->strio_addr;
+	iov.iov_len = io->strio_len;
+	uio.uio_iov = &iov;
+	uio.uio_iovcnt = 1;
+	uio.uio_offset = (off_t)(long)io->strio_offs;
+	uio.uio_resid = io->strio_len;
+	uio.uio_segflg = UIO_USERSPACE;
+	uio.uio_procp = p;
+
+	error = procfs_domem(p, t, NULL, &uio);
+	io->strio_len -= uio.uio_resid;
+ out:
+
+	return (error);
+}
+
+int
+systrace_attach(struct fsystrace *fst, pid_t pid)
+{
+	int error = 0;
+	struct proc *proc, *p = curproc;
+
+	if ((proc = pfind(pid)) == NULL) {
+		error = ESRCH;
+		goto out;
+	}
+
+	if (ISSET(proc->p_flag, P_INEXEC)) {
+		error = EAGAIN;
+		goto out;
+	}
+
+	/*
+	 * You can't attach to a process if:
+	 *	(1) it's the process that's doing the attaching,
+	 */
+	if (proc->p_pid == p->p_pid) {
+		error = EINVAL;
+		goto out;
+	}
+
+	/*
+	 *	(2) it's a system process
+	 */
+	if (ISSET(proc->p_flag, P_SYSTEM)) {
+		error = EPERM;
+		goto out;
+	}
+
+	/*
+	 *	(3) it's being traced already
+	 */
+	if (ISSET(proc->p_flag, P_SYSTRACE)) {
+		error = EBUSY;
+		goto out;
+	}
+
+	/*
+	 *	(4) it's not owned by you, or the last exec
+	 *	    gave us setuid/setgid privs (unless
+	 *	    you're root), or...
+	 * 
+	 *      [Note: once P_SUGID gets set in execve(), it stays
+	 *	set until the process does another execve(). Hence
+	 *	this prevents a setuid process which revokes it's
+	 *	special privilidges using setuid() from being
+	 *	traced. This is good security.]
+	 */
+	if ((proc->p_cred->p_ruid != p->p_cred->p_ruid ||
+		ISSET(proc->p_flag, P_SUGID)) &&
+	    (error = suser(p->p_ucred, &p->p_acflag)) != 0)
+		goto out;
+
+	/*
+	 *	(5) ...it's init, which controls the security level
+	 *	    of the entire system, and the system was not
+	 *          compiled with permanently insecure mode turned
+	 *	    on.
+	 */
+	if ((proc->p_pid == 1) && (securelevel > -1)) {
+		error = EPERM;
+		goto out;
+	}
+
+	error = systrace_insert_process(fst, proc);
+
+#if defined(__NetBSD__) && defined(__HAVE_SYSCALL_INTERN)
+	/*
+	 * Make sure we're using the version of the syscall handler that
+	 * has systrace hooks.
+	 */
+	if (!error)
+		(*proc->p_emul->e_syscall_intern)(proc);
+#endif
+ out:
+	return (error);
+}
+
+struct str_process *
+systrace_findpid(struct fsystrace *fst, pid_t pid)
+{
+	struct str_process *strp;
+	struct proc *proc = NULL;
+
+	TAILQ_FOREACH(strp, &fst->processes, next)
+	    if (strp->pid == pid)
+		    break;
+	
+	if (strp == NULL)
+		return (NULL);
+
+	proc = systrace_find(strp);
+
+	return (proc ? strp : NULL);
+}
+
+int
+systrace_detach(struct str_process *strp)
+{
+	struct proc *proc;
+	struct fsystrace *fst = NULL;
+	int error = 0;
+	
+	DPRINTF(("%s: Trying to detach from %d\n", __func__, strp->pid));
+
+	if ((proc = systrace_find(strp)) != NULL) {
+		CLR(proc->p_flag, P_SYSTRACE);
+		proc->p_systrace = NULL;
+	} else
+		error = ESRCH;
+
+	if (ISSET(strp->flags, STR_PROC_WAITANSWER)) {
+		CLR(strp->flags, STR_PROC_WAITANSWER);
+		wakeup(strp);
+	}
+
+	fst = strp->parent;
+	systrace_wakeup(fst);
+
+	if (ISSET(strp->flags, STR_PROC_ONQUEUE))
+		TAILQ_REMOVE(&fst->messages, strp, msg_next);
+
+	TAILQ_REMOVE(&fst->processes, strp, next);
+	fst->nprocesses--;
+
+	if (strp->policy)
+		systrace_closepolicy(fst, strp->policy);
+	pool_put(&systr_proc_pl, strp);
+
+	return (error);
+}
+
+void
+systrace_closepolicy(struct fsystrace *fst, struct str_policy *policy)
+{
+	if (--policy->refcount)
+		return;
+
+	fst->npolicies--;
+
+	if (policy->nsysent)
+		FREE(policy->sysent, M_XDATA);
+
+	TAILQ_REMOVE(&fst->policies, policy, next);
+
+	pool_put(&systr_policy_pl, policy);
+}
+
+
+int
+systrace_insert_process(struct fsystrace *fst, struct proc *proc)
+{
+	struct str_process *strp;
+
+	strp = pool_get(&systr_proc_pl, PR_NOWAIT);
+	if (strp == NULL)
+		return (ENOBUFS);
+
+	memset((caddr_t)strp, 0, sizeof(struct str_process));
+	strp->pid = proc->p_pid;
+	strp->proc = proc;
+	strp->parent = fst;
+
+	TAILQ_INSERT_TAIL(&fst->processes, strp, next);
+	fst->nprocesses++;
+
+	proc->p_systrace = strp;
+	SET(proc->p_flag, P_SYSTRACE);
+
+	return (0);
+}
+
+struct str_policy *
+systrace_newpolicy(struct fsystrace *fst, int maxents)
+{
+	struct str_policy *pol;
+	int i;
+
+	if (fst->npolicies > SYSTR_MAX_POLICIES && !fst->issuser)
+		return (NULL);
+
+	pol = pool_get(&systr_policy_pl, PR_NOWAIT);
+	if (pol == NULL)
+		return (NULL);
+
+	DPRINTF(("%s: allocating %d -> %d\n", __func__,
+		     maxents, maxents * sizeof(int)));
+
+	memset((caddr_t)pol, 0, sizeof(struct str_policy));
+
+	MALLOC(pol->sysent, u_char *, maxents * sizeof(u_char),
+	    M_XDATA, M_WAITOK);
+	pol->nsysent = maxents;
+	for (i = 0; i < maxents; i++)
+		pol->sysent[i] = SYSTR_POLICY_ASK;
+
+	fst->npolicies++;
+	pol->nr = fst->npolicynr++;
+	pol->refcount = 1;
+
+	TAILQ_INSERT_TAIL(&fst->policies, pol, next);
+
+	return (pol);
+}
+
+int
+systrace_msg_ask(struct fsystrace *fst, struct str_process *strp,
+    int code, size_t argsize, register_t args[])
+{
+	struct str_msg_ask *msg_ask = &strp->msg.msg_data.msg_ask;
+	int i;
+
+	msg_ask->code = code;
+	msg_ask->argsize = argsize;
+	for (i = 0; i < (argsize/sizeof(register_t)) && i < SYSTR_MAXARGS; i++)
+		msg_ask->args[i] = args[i];
+
+	return (systrace_make_msg(strp, SYSTR_MSG_ASK));
+}
+
+int
+systrace_msg_result(struct fsystrace *fst, struct str_process *strp,
+    int error, int code, size_t argsize, register_t args[], register_t rval[])
+{
+	struct str_msg_ask *msg_ask = &strp->msg.msg_data.msg_ask;
+	int i;
+
+	msg_ask->code = code;
+	msg_ask->argsize = argsize;
+	msg_ask->result = error;
+	for (i = 0; i < (argsize/sizeof(register_t)) && i < SYSTR_MAXARGS; i++)
+		msg_ask->args[i] = args[i];
+
+	msg_ask->rval[0] = rval[0];
+	msg_ask->rval[1] = rval[1];
+
+	return (systrace_make_msg(strp, SYSTR_MSG_RES));
+}
+
+int
+systrace_msg_emul(struct fsystrace *fst, struct str_process *strp)
+{
+	struct str_msg_emul *msg_emul = &strp->msg.msg_data.msg_emul;
+	struct proc *p = strp->proc;
+
+	memcpy(msg_emul->emul, p->p_emul->e_name, SYSTR_EMULEN);
+
+	return (systrace_make_msg(strp, SYSTR_MSG_EMUL));
+}
+
+int
+systrace_make_msg(struct str_process *strp, int type)
+{
+	struct str_message *msg = &strp->msg;
+	struct fsystrace *fst = strp->parent;
+	struct proc *p = strp->proc;
+	int st;
+
+#if defined(__GNUC__) && defined(__NetBSD__)
+	(void) &p;	/* Sanitize gcc */
+#endif
+	msg->msg_type = type;
+	msg->msg_pid = strp->pid;
+	if (strp->policy)
+		msg->msg_policy = strp->policy->nr;
+	else
+		msg->msg_policy = -1;
+
+	SET(strp->flags, STR_PROC_WAITANSWER);
+	if (ISSET(strp->flags, STR_PROC_ONQUEUE))
+		goto out;
+
+	TAILQ_INSERT_TAIL(&fst->messages, strp, msg_next);
+	SET(strp->flags, STR_PROC_ONQUEUE);
+
+ out:
+	systrace_wakeup(fst);
+
+	/* Release the lock - XXX */
+	SYSTRACE_UNLOCK(fst, p);
+
+	while (1) {
+		st = tsleep(strp, PWAIT | PCATCH, "systrmsg", 0);
+		if (st != 0)
+			return (EINTR);
+		/* If we detach, then everything is permitted */
+		if ((strp = curproc->p_systrace) == NULL)
+			return (0);
+		if (!ISSET(strp->flags, STR_PROC_WAITANSWER))
+			break;
+	}
+
+	return (0);
+}
+
+int
+systrace_msg_child(struct fsystrace *fst, struct str_process *strp, pid_t npid)
+{
+	struct str_process *nstrp;
+	struct str_message *msg;
+	struct str_msg_child *msg_child;
+
+	nstrp = pool_get(&systr_proc_pl, PR_WAITOK);
+	memset(nstrp, 0, sizeof(struct str_process));
+
+	DPRINTF(("%s: %p: pid %d -> pid %d\n", __func__,
+		    nstrp, strp->pid, npid));
+	
+	msg = &nstrp->msg;
+	msg_child = &msg->msg_data.msg_child;
+
+	msg->msg_type = SYSTR_MSG_CHILD;
+	msg->msg_pid = strp->pid;
+	if (strp->policy)
+		msg->msg_policy = strp->policy->nr;
+	else
+		msg->msg_policy = -1;
+	msg_child->new_pid = npid;
+
+	TAILQ_INSERT_TAIL(&fst->messages, nstrp, msg_next);
+
+	systrace_wakeup(fst);
+
+	return (0);
+}
Index: sys/dev/systrace.h
===================================================================
RCS file: systrace.h
diff -N systrace.h
--- /dev/null	Sun May  5 08:03:30 2002
+++ systrace.h	Wed Jun 12 03:07:56 2002
@@ -0,0 +1,145 @@
+/* $NetBSD$ */
+
+#ifndef _SYSTRACE_H_
+#define _SYSTRACE_H_
+
+#include <sys/select.h>
+#include <sys/ioccom.h>
+
+#define	SYSTR_CLONE	_IOR('s', 1, int)
+
+#define SYSTR_EMULEN	8	/* sync with sys proc */
+
+struct str_msg_emul {
+	char emul[SYSTR_EMULEN];
+};
+
+#define SYSTR_MAX_POLICIES	64
+#define SYSTR_MAXARGS		64
+
+struct str_msg_ask {
+	int code;
+	int argsize;
+	register_t args[SYSTR_MAXARGS];
+	register_t rval[2];
+	int result;
+};
+
+/* Queued on fork or exit of a process */
+
+struct str_msg_child {
+	pid_t new_pid;
+};
+
+#define SYSTR_MSG_ASK	1
+#define SYSTR_MSG_RES	2
+#define SYSTR_MSG_EMUL	3
+#define SYSTR_MSG_CHILD	4
+
+#define SYSTR_MSG_NOPROCESS(x) \
+	((x)->msg.msg_type == SYSTR_MSG_CHILD)
+
+struct str_message {
+	int msg_type;
+	pid_t msg_pid;
+	short msg_policy;
+	short reserved;
+	union {
+		struct str_msg_emul msg_emul;
+		struct str_msg_ask msg_ask;
+		struct str_msg_child msg_child;
+	} msg_data;
+};
+
+struct systrace_answer {
+	pid_t stra_pid;
+	int stra_policy;
+	int stra_error;
+	int stra_flags;
+};
+
+#define SYSTR_READ		1
+#define SYSTR_WRITE		2
+
+struct systrace_io {
+	pid_t strio_pid;
+	int strio_op;
+	void *strio_offs;
+	void *strio_addr;
+	size_t strio_len;
+};
+
+#define SYSTR_POLICY_NEW	1
+#define SYSTR_POLICY_ASSIGN	2
+#define SYSTR_POLICY_MODIFY	3
+
+struct systrace_policy {
+	int strp_op;
+	int strp_num;
+	union {
+		struct {
+			short code;
+			short policy;
+		} assign;
+		pid_t pid;
+		int maxents;
+	} strp_data;
+};
+
+#define strp_pid	strp_data.pid
+#define strp_maxents	strp_data.maxents
+#define strp_code	strp_data.assign.code
+#define strp_policy	strp_data.assign.policy
+
+#define STRIOCATTACH	_IOW('s', 101, pid_t)
+#define STRIOCDETACH	_IOW('s', 102, pid_t)
+#define STRIOCANSWER	_IOW('s', 103, struct systrace_answer)
+#define STRIOCIO	_IOWR('s', 104, struct systrace_io)
+#define STRIOCPOLICY	_IOWR('s', 105, struct systrace_policy)
+#define STRIOCGETCWD	_IOW('s', 106, pid_t)
+#define STRIOCRESCWD	_IO('s', 107)
+#define STRIOCREPORT	_IOW('s', 108, pid_t)
+
+#define SYSTR_POLICY_ASK	0
+#define SYSTR_POLICY_PERMIT	1
+#define SYSTR_POLICY_NEVER	2
+
+#define SYSTR_FLAGS_RESULT	0x001
+
+#ifdef _KERNEL
+/* XXX: these shouldn't be here. */
+#define SET(t, f)	((t) |= (f))
+#define	ISSET(t, f)	((t) & (f))
+#define	CLR(t, f)	((t) &= ~(f))
+
+struct str_process;
+struct fsystrace {
+	struct lock lock;
+	struct selinfo si;
+
+	TAILQ_HEAD(strprocessq, str_process) processes;
+	int nprocesses;
+
+	TAILQ_HEAD(strpolicyq, str_policy) policies;
+
+	struct strprocessq messages;
+
+	int npolicynr;
+	int npolicies;
+
+	int issuser;
+
+	/* cwd magic */
+	pid_t fd_pid;
+	struct vnode *fd_cdir;
+	struct vnode *fd_rdir;
+};
+
+/* Internal prototypes */
+
+int systrace_redirect(int, struct proc *, void *, register_t *);
+void systrace_exit(struct proc *);
+void systrace_fork(struct proc *, struct proc *);
+
+#endif /* _KERNEL */
+#endif /* !_SYSTRACE_H_ */
Index: sys/kern/kern_exit.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/kern_exit.c,v
retrieving revision 1.93
diff -u -r1.93 kern_exit.c
--- kern_exit.c	2002/03/17 23:41:30	1.93
+++ kern_exit.c	2002/06/12 10:08:03
@@ -114,6 +114,9 @@
 
 #include <uvm/uvm_extern.h>
 
+#include "systrace.h"
+#include <dev/systrace.h>
+
 /*
  * exit --
  *	Death of process.
@@ -213,6 +216,10 @@
 	 * release trace file
 	 */
 	ktrderef(p);
+#endif
+#if NSYSTRACE > 0
+	if (ISSET(p->p_flag, P_SYSTRACE))
+		systrace_exit(p);
 #endif
 	/*
 	 * NOTE: WE ARE NO LONGER ALLOWED TO SLEEP!
Index: sys/kern/kern_fork.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/kern_fork.c,v
retrieving revision 1.88
diff -u -r1.88 kern_fork.c
--- kern_fork.c	2001/12/08 00:35:30	1.88
+++ kern_fork.c	2002/06/12 10:08:04
@@ -105,6 +105,9 @@
 
 #include <uvm/uvm_extern.h>
 
+#include "systrace.h"
+#include <dev/systrace.h>
+
 int	nprocs = 1;		/* process 0 */
 
 /*ARGSUSED*/
@@ -251,6 +254,85 @@
 	p2 = pool_get(&proc_pool, PR_WAITOK);
 
 	/*
+	 * BEGIN PID ALLOCATION.
+	 */
+	s = proclist_lock_write();
+
+	/*
+	 * Find an unused process ID.  We remember a range of unused IDs
+	 * ready to use (from nextpid+1 through pidchecked-1).
+	 */
+	nextpid++;
+ retry:
+	/*
+	 * If the process ID prototype has wrapped around,
+	 * restart somewhat above 0, as the low-numbered procs
+	 * tend to include daemons that don't exit.
+	 */
+	if (nextpid >= PID_MAX) {
+		nextpid = 500;
+		pidchecked = 0;
+	}
+	if (nextpid >= pidchecked) {
+		const struct proclist_desc *pd;
+
+		pidchecked = PID_MAX;
+		/*
+		 * Scan the process lists to check whether this pid
+		 * is in use.  Remember the lowest pid that's greater
+		 * than nextpid, so we can avoid checking for a while.
+		 */
+		pd = proclists;
+ again:
+		LIST_FOREACH(tp, pd->pd_list, p_list) {
+			while (tp->p_pid == nextpid ||
+			    tp->p_pgrp->pg_id == nextpid ||
+			    tp->p_session->s_sid == nextpid) {
+				nextpid++;
+				if (nextpid >= pidchecked)
+					goto retry;
+			}
+			if (tp->p_pid > nextpid && pidchecked > tp->p_pid)
+				pidchecked = tp->p_pid;
+
+			if (tp->p_pgrp->pg_id > nextpid && 
+			    pidchecked > tp->p_pgrp->pg_id)
+				pidchecked = tp->p_pgrp->pg_id;
+
+			if (tp->p_session->s_sid > nextpid &&
+			    pidchecked > tp->p_session->s_sid)
+				pidchecked = tp->p_session->s_sid;
+		}
+
+		/*
+		 * If there's another list, scan it.  If we have checked
+		 * them all, we've found one!
+		 */
+		pd++;
+		if (pd->pd_list != NULL)
+			goto again;
+	}
+
+	/*
+	 * Put the proc on allproc before unlocking PID allocation
+	 * so that waiters won't grab it as soon as we unlock.
+	 */
+
+	p2->p_stat = SIDL;			/* protect against others */
+	p2->p_pid = nextpid;
+	p2->p_exitsig = exitsig;		/* signal for parent on exit */
+	p2->p_forw = p2->p_back = NULL;		/* shouldn't be necessary */
+
+	LIST_INSERT_HEAD(&allproc, p2, p_list);
+
+	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
+
+	/*
+	 * END PID ALLOCATION.
+	 */
+	proclist_unlock_write(s);
+
+	/*
 	 * Make a proc table entry for the new process.
 	 * Start by zeroing the section of proc that is zero-initialized,
 	 * then copy the section that is copied directly from the parent.
@@ -343,6 +425,12 @@
 			ktradref(p2);
 	}
 #endif
+#if NSYSTRACE > 0
+	/* Tell systrace what's happening. */
+	if (ISSET(p1->p_flag, P_SYSTRACE))
+		systrace_fork(p1, p2);
+#endif
+
 
 #ifdef __HAVE_SYSCALL_INTERN
 	(*p2->p_emul->e_syscall_intern)(p2);
@@ -376,89 +464,6 @@
 	    stack, stacksize,
 	    (func != NULL) ? func : child_return,
 	    (arg != NULL) ? arg : p2);
-
-	/*
-	 * BEGIN PID ALLOCATION.
-	 */
-	s = proclist_lock_write();
-
-	/*
-	 * Find an unused process ID.  We remember a range of unused IDs
-	 * ready to use (from nextpid+1 through pidchecked-1).
-	 */
-	nextpid++;
- retry:
-	/*
-	 * If the process ID prototype has wrapped around,
-	 * restart somewhat above 0, as the low-numbered procs
-	 * tend to include daemons that don't exit.
-	 */
-	if (nextpid >= PID_MAX) {
-		nextpid = 500;
-		pidchecked = 0;
-	}
-	if (nextpid >= pidchecked) {
-		const struct proclist_desc *pd;
-
-		pidchecked = PID_MAX;
-		/*
-		 * Scan the process lists to check whether this pid
-		 * is in use.  Remember the lowest pid that's greater
-		 * than nextpid, so we can avoid checking for a while.
-		 */
-		pd = proclists;
- again:
-		LIST_FOREACH(tp, pd->pd_list, p_list) {
-			while (tp->p_pid == nextpid ||
-			    tp->p_pgrp->pg_id == nextpid ||
-			    tp->p_session->s_sid == nextpid) {
-				nextpid++;
-				if (nextpid >= pidchecked)
-					goto retry;
-			}
-			if (tp->p_pid > nextpid && pidchecked > tp->p_pid)
-				pidchecked = tp->p_pid;
-
-			if (tp->p_pgrp->pg_id > nextpid && 
-			    pidchecked > tp->p_pgrp->pg_id)
-				pidchecked = tp->p_pgrp->pg_id;
-
-			if (tp->p_session->s_sid > nextpid &&
-			    pidchecked > tp->p_session->s_sid)
-				pidchecked = tp->p_session->s_sid;
-		}
-
-		/*
-		 * If there's another list, scan it.  If we have checked
-		 * them all, we've found one!
-		 */
-		pd++;
-		if (pd->pd_list != NULL)
-			goto again;
-	}
-
-	/* Record the pid we've allocated. */
-	p2->p_pid = nextpid;
-
-	/* Record the signal to be delivered to the parent on exit. */
-	p2->p_exitsig = exitsig;
-
-	/*
-	 * Put the proc on allproc before unlocking PID allocation
-	 * so that waiters won't grab it as soon as we unlock.
-	 */
-
-	p2->p_stat = SIDL;			/* protect against others */
-	p2->p_forw = p2->p_back = NULL;		/* shouldn't be necessary */
-
-	LIST_INSERT_HEAD(&allproc, p2, p_list);
-
-	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
-
-	/*
-	 * END PID ALLOCATION.
-	 */
-	proclist_unlock_write(s);
 
 	/*
 	 * Make child runnable, set start time, and add to run queue.
Index: sys/sys/Makefile
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/Makefile,v
retrieving revision 1.41
diff -u -r1.41 Makefile
--- Makefile	2002/05/06 13:32:19	1.41
+++ Makefile	2002/06/12 10:08:10
@@ -21,8 +21,8 @@
 	shm.h signal.h signalvar.h socket.h socketvar.h sockio.h stat.h \
 	syscall.h syscallargs.h sysctl.h stdint.h swap.h syslimits.h syslog.h \
 	systm.h tablet.h termios.h time.h timeb.h timepps.h times.h timex.h \
-	tprintf.h trace.h tty.h ttychars.h ttycom.h ttydefaults.h ttydev.h \
-	types.h ucred.h uio.h un.h unistd.h unpcb.h user.h utsname.h \
+	tprintf.h trace.h tree.h tty.h ttychars.h ttycom.h ttydefaults.h \
+	ttydev.h types.h ucred.h uio.h un.h unistd.h unpcb.h user.h utsname.h \
 	vadvise.h vmmeter.h vnode.h vnode_if.h wait.h wdog.h
 
 SYMLINKS= sys/exec_elf.h /usr/include/elf.h \
Index: sys/sys/conf.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/conf.h,v
retrieving revision 1.101
diff -u -r1.101 conf.h
--- conf.h	2002/04/23 06:48:46	1.101
+++ conf.h	2002/06/12 10:08:11
@@ -333,6 +333,12 @@
 	dev_init(c,n,write), dev_init(c,n,ioctl), \
 	dev_noimpl(stop,enodev), 0, seltrue, dev_init(c,n,mmap) }
 
+/* open, close, read, write, ioctl */
+#define	cdev_systrace_init(c,n) { \
+	dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read), \
+	dev_init(c,n,write), dev_init(c,n,ioctl), dev_noimpl(stop,enodev), \
+	0,  dev_noimpl(poll,enodev), dev_noimpl(mmap,enodev) }
+
 /* (open), (close), read, write */
 #define cdev_swap_init(c,n) { \
 	dev_noimpl(open,nullop), dev_noimpl(close,nullop), \
@@ -620,6 +626,8 @@
 # define NSVR4_NET	0
 #endif
 cdev_decl(svr4_net);
+
+cdev_decl(systrace);
 
 cdev_decl(tun);
 
Index: sys/sys/file.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/file.h,v
retrieving revision 1.31
diff -u -r1.31 file.h
--- file.h	2001/12/18 22:29:25	1.31
+++ file.h	2002/06/12 10:08:12
@@ -60,6 +60,7 @@
 #define	DTYPE_VNODE	1		/* file */
 #define	DTYPE_SOCKET	2		/* communications endpoint */
 #define	DTYPE_PIPE	3		/* pipe */
+#define	DTYPE_SYSTRACE	4		/* system call tracing */
 	int		f_type;		/* descriptor type */
 	u_int		f_count;	/* reference count */
 	u_int		f_msgcount;	/* references from message queue */
Index: sys/sys/proc.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/proc.h,v
retrieving revision 1.137
diff -u -r1.137 proc.h
--- proc.h	2002/04/02 20:20:00	1.137
+++ proc.h	2002/06/12 10:08:13
@@ -200,6 +200,7 @@
 
 	int		p_traceflag;	/* Kernel trace points */
 	struct file	*p_tracep;	/* Trace to file */
+	void		*p_systrace;	/* Back pointer to systrace */
 
 	struct vnode	*p_textvp;	/* Vnode of executable */
 
@@ -296,6 +297,7 @@
 #define	P_32		0x040000 /* 32-bit process (used on 64-bit kernels) */
 #define	P_BIGLOCK	0x080000 /* Process needs kernel "big lock" to run */
 #define	P_INEXEC	0x100000 /* Process is exec'ing and cannot be traced */
+#define	P_SYSTRACE	0x200000 /* Process system call tracing active */
 
 
 /*
Index: sys/sys/tree.h
===================================================================
RCS file: tree.h
diff -N tree.h
--- /dev/null	Sun May  5 08:03:30 2002
+++ tree.h	Wed Jun 12 03:08:14 2002
@@ -0,0 +1,669 @@
+/*	$NetBSD$	*/
+/*	$OpenBSD: tree.h,v 1.4 2002/03/26 02:47:28 hugh Exp $	*/
+/*
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	_SYS_TREE_H_
+#define	_SYS_TREE_H_
+
+/*
+ * This file defines data structures for different types of trees:
+ * splay trees and red-black trees.
+ *
+ * A splay tree is a self-organizing data structure.  Every operation
+ * on the tree causes a splay to happen.  The splay moves the requested
+ * node to the root of the tree and partly rebalances it.
+ *
+ * This has the benefit that request locality causes faster lookups as
+ * the requested nodes move to the top of the tree.  On the other hand,
+ * every lookup causes memory writes.
+ *
+ * The Balance Theorem bounds the total access time for m operations
+ * and n inserts on an initially empty tree as O((m + n)lg n).  The
+ * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute.  It fulfills a set of conditions:
+ *	- every search path from the root to a leaf consists of the
+ *	  same number of black nodes,
+ *	- each red node (except for the root) has a black parent,
+ *	- each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+#define SPLAY_HEAD(name, type)						\
+struct name {								\
+	struct type *sph_root; /* root of the tree */			\
+}
+
+#define SPLAY_INITIALIZER(root)						\
+	{ NULL }
+
+#define SPLAY_INIT(root) do {						\
+	(root)->sph_root = NULL;					\
+} while (0)
+
+#define SPLAY_ENTRY(type)						\
+struct {								\
+	struct type *spe_left; /* left element */			\
+	struct type *spe_right; /* right element */			\
+}
+
+#define SPLAY_LEFT(elm, field)		(elm)->field.spe_left
+#define SPLAY_RIGHT(elm, field)		(elm)->field.spe_right
+#define SPLAY_ROOT(head)		(head)->sph_root
+#define SPLAY_EMPTY(head)		(SPLAY_ROOT(head) == NULL)
+
+/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
+#define SPLAY_ROTATE_RIGHT(head, tmp, field) do {			\
+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field);	\
+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
+	(head)->sph_root = tmp;						\
+} while (0)
+	
+#define SPLAY_ROTATE_LEFT(head, tmp, field) do {			\
+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field);	\
+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
+	(head)->sph_root = tmp;						\
+} while (0)
+
+#define SPLAY_LINKLEFT(head, tmp, field) do {				\
+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
+	tmp = (head)->sph_root;						\
+	(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);		\
+} while (0)
+
+#define SPLAY_LINKRIGHT(head, tmp, field) do {				\
+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
+	tmp = (head)->sph_root;						\
+	(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);	\
+} while (0)
+
+#define SPLAY_ASSEMBLE(head, node, left, right, field) do {		\
+	SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field);	\
+	SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field);	\
+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field);	\
+} while (0)
+
+/* Generates prototypes and inline functions */
+
+#define SPLAY_PROTOTYPE(name, type, field, cmp)				\
+void name##_SPLAY(struct name *, struct type *);			\
+void name##_SPLAY_MINMAX(struct name *, int);				\
+									\
+static __inline void							\
+name##_SPLAY_INSERT(struct name *head, struct type *elm)		\
+{									\
+    if (SPLAY_EMPTY(head)) {						\
+	    SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL;	\
+    } else {								\
+	    int __comp;							\
+	    name##_SPLAY(head, elm);					\
+	    __comp = (cmp)(elm, (head)->sph_root);			\
+	    if(__comp < 0) {						\
+		    SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
+		    SPLAY_RIGHT(elm, field) = (head)->sph_root;		\
+		    SPLAY_LEFT((head)->sph_root, field) = NULL;		\
+	    } else if (__comp > 0) {					\
+		    SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
+		    SPLAY_LEFT(elm, field) = (head)->sph_root;		\
+		    SPLAY_RIGHT((head)->sph_root, field) = NULL;	\
+	    } else							\
+		    return;						\
+    }									\
+    (head)->sph_root = (elm);						\
+}									\
+									\
+static __inline void							\
+name##_SPLAY_REMOVE(struct name *head, struct type *elm)		\
+{									\
+	struct type *__tmp;						\
+	if (SPLAY_EMPTY(head))						\
+		return;							\
+	name##_SPLAY(head, elm);					\
+	if ((cmp)(elm, (head)->sph_root) == 0) {			\
+		if (SPLAY_LEFT((head)->sph_root, field) == NULL) {	\
+			(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
+		} else {						\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
+			name##_SPLAY(head, elm);			\
+			SPLAY_RIGHT((head)->sph_root, field) = __tmp;	\
+		}							\
+	}								\
+}									\
+									\
+/* Finds the node with the same key as elm */				\
+static __inline struct type *						\
+name##_SPLAY_FIND(struct name *head, struct type *elm)			\
+{									\
+	if (SPLAY_EMPTY(head))						\
+		return(NULL);						\
+	name##_SPLAY(head, elm);					\
+	if ((cmp)(elm, (head)->sph_root) == 0)				\
+		return (head->sph_root);				\
+	return (NULL);							\
+}									\
+									\
+static __inline struct type *						\
+name##_SPLAY_NEXT(struct name *head, struct type *elm)			\
+{									\
+	name##_SPLAY(head, elm);					\
+	if (SPLAY_RIGHT(elm, field) != NULL) {				\
+		elm = SPLAY_RIGHT(elm, field);				\
+		while (SPLAY_LEFT(elm, field) != NULL) {		\
+			elm = SPLAY_LEFT(elm, field);			\
+		}							\
+	} else								\
+		elm = NULL;						\
+	return (elm);							\
+}									\
+									\
+static __inline struct type *						\
+name##_SPLAY_MIN_MAX(struct name *head, int val)			\
+{									\
+	name##_SPLAY_MINMAX(head, val);					\
+        return (SPLAY_ROOT(head));					\
+}
+
+/* Main splay operation.
+ * Moves node close to the key of elm to top
+ */
+#define SPLAY_GENERATE(name, type, field, cmp)				\
+void name##_SPLAY(struct name *head, struct type *elm)			\
+{									\
+	struct type __node, *__left, *__right, *__tmp;			\
+	int __comp;							\
+\
+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+	__left = __right = &__node;					\
+\
+	while ((__comp = (cmp)(elm, (head)->sph_root))) {		\
+		if (__comp < 0) {					\
+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if ((cmp)(elm, __tmp) < 0){			\
+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
+				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKLEFT(head, __right, field);		\
+		} else if (__comp > 0) {				\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if ((cmp)(elm, __tmp) > 0){			\
+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
+				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKRIGHT(head, __left, field);		\
+		}							\
+	}								\
+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
+}									\
+									\
+/* Splay with either the minimum or the maximum element			\
+ * Used to find minimum or maximum element in tree.			\
+ */									\
+void name##_SPLAY_MINMAX(struct name *head, int __comp) \
+{									\
+	struct type __node, *__left, *__right, *__tmp;			\
+\
+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+	__left = __right = &__node;					\
+\
+	while (1) {							\
+		if (__comp < 0) {					\
+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if (__comp < 0){				\
+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
+				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKLEFT(head, __right, field);		\
+		} else if (__comp > 0) {				\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if (__comp > 0) {				\
+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
+				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKRIGHT(head, __left, field);		\
+		}							\
+	}								\
+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
+}
+
+#define SPLAY_NEGINF	-1
+#define SPLAY_INF	1
+
+#define SPLAY_INSERT(name, x, y)	name##_SPLAY_INSERT(x, y)
+#define SPLAY_REMOVE(name, x, y)	name##_SPLAY_REMOVE(x, y)
+#define SPLAY_FIND(name, x, y)		name##_SPLAY_FIND(x, y)
+#define SPLAY_NEXT(name, x, y)		name##_SPLAY_NEXT(x, y)
+#define SPLAY_MIN(name, x)		(SPLAY_EMPTY(x) ? NULL	\
+					: name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
+#define SPLAY_MAX(name, x)		(SPLAY_EMPTY(x) ? NULL	\
+					: name##_SPLAY_MIN_MAX(x, SPLAY_INF))
+
+#define SPLAY_FOREACH(x, name, head)					\
+	for ((x) = SPLAY_MIN(name, head);				\
+	     (x) != NULL;						\
+	     (x) = SPLAY_NEXT(name, head, x))
+
+/* Macros that define a red-back tree */
+#define RB_HEAD(name, type)						\
+struct name {								\
+	struct type *rbh_root; /* root of the tree */			\
+}
+
+#define RB_INITIALIZER(root)						\
+	{ NULL }
+
+#define RB_INIT(root) do {						\
+	(root)->rbh_root = NULL;					\
+} while (0)
+
+#define RB_BLACK	0
+#define RB_RED		1
+#define RB_ENTRY(type)							\
+struct {								\
+	struct type *rbe_left;		/* left element */		\
+	struct type *rbe_right;		/* right element */		\
+	struct type *rbe_parent;	/* parent element */		\
+	int rbe_color;			/* node color */		\
+}
+
+#define RB_LEFT(elm, field)		(elm)->field.rbe_left
+#define RB_RIGHT(elm, field)		(elm)->field.rbe_right
+#define RB_PARENT(elm, field)		(elm)->field.rbe_parent
+#define RB_COLOR(elm, field)		(elm)->field.rbe_color
+#define RB_ROOT(head)			(head)->rbh_root
+#define RB_EMPTY(head)			(RB_ROOT(head) == NULL)
+
+#define RB_SET(elm, parent, field) do {					\
+	RB_PARENT(elm, field) = parent;					\
+	RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL;		\
+	RB_COLOR(elm, field) = RB_RED;					\
+} while (0)
+
+#define RB_SET_BLACKRED(black, red, field) do {				\
+	RB_COLOR(black, field) = RB_BLACK;				\
+	RB_COLOR(red, field) = RB_RED;					\
+} while (0)
+
+#ifndef RB_AUGMENT
+#define RB_AUGMENT(x)
+#endif
+
+#define RB_ROTATE_LEFT(head, elm, tmp, field) do {			\
+	(tmp) = RB_RIGHT(elm, field);					\
+	if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field))) {		\
+		RB_PARENT(RB_LEFT(tmp, field), field) = (elm);		\
+	}								\
+	RB_AUGMENT(elm);						\
+	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) {		\
+		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
+			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
+		else							\
+			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
+		RB_AUGMENT(RB_PARENT(elm, field));			\
+	} else								\
+		(head)->rbh_root = (tmp);				\
+	RB_LEFT(tmp, field) = (elm);					\
+	RB_PARENT(elm, field) = (tmp);					\
+	RB_AUGMENT(tmp);						\
+} while (0)
+
+#define RB_ROTATE_RIGHT(head, elm, tmp, field) do {			\
+	(tmp) = RB_LEFT(elm, field);					\
+	if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field))) {		\
+		RB_PARENT(RB_RIGHT(tmp, field), field) = (elm);		\
+	}								\
+	RB_AUGMENT(elm);						\
+	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) {		\
+		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
+			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
+		else							\
+			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
+		RB_AUGMENT(RB_PARENT(elm, field));			\
+	} else								\
+		(head)->rbh_root = (tmp);				\
+	RB_RIGHT(tmp, field) = (elm);					\
+	RB_PARENT(elm, field) = (tmp);					\
+	RB_AUGMENT(tmp);						\
+} while (0)
+
+/* Generates prototypes and inline functions */
+#define RB_PROTOTYPE(name, type, field, cmp)				\
+void name##_RB_INSERT_COLOR(struct name *, struct type *);	\
+void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\
+void name##_RB_REMOVE(struct name *, struct type *);			\
+struct type *name##_RB_INSERT(struct name *, struct type *);		\
+struct type *name##_RB_FIND(struct name *, struct type *);		\
+struct type *name##_RB_NEXT(struct name *, struct type *);		\
+struct type *name##_RB_MINMAX(struct name *, int);			\
+									\
+
+/* Main rb operation.
+ * Moves node close to the key of elm to top
+ */
+#define RB_GENERATE(name, type, field, cmp)				\
+void									\
+name##_RB_INSERT_COLOR(struct name *head, struct type *elm)		\
+{									\
+	struct type *parent, *gparent, *tmp;				\
+	while ((parent = RB_PARENT(elm, field)) &&			\
+	    RB_COLOR(parent, field) == RB_RED) {			\
+		gparent = RB_PARENT(parent, field);			\
+		if (parent == RB_LEFT(gparent, field)) {		\
+			tmp = RB_RIGHT(gparent, field);			\
+			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
+				RB_COLOR(tmp, field) = RB_BLACK;	\
+				RB_SET_BLACKRED(parent, gparent, field);\
+				elm = gparent;				\
+				continue;				\
+			}						\
+			if (RB_RIGHT(parent, field) == elm) {		\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				tmp = parent;				\
+				parent = elm;				\
+				elm = tmp;				\
+			}						\
+			RB_SET_BLACKRED(parent, gparent, field);	\
+			RB_ROTATE_RIGHT(head, gparent, tmp, field);	\
+		} else {						\
+			tmp = RB_LEFT(gparent, field);			\
+			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
+				RB_COLOR(tmp, field) = RB_BLACK;	\
+				RB_SET_BLACKRED(parent, gparent, field);\
+				elm = gparent;				\
+				continue;				\
+			}						\
+			if (RB_LEFT(parent, field) == elm) {		\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				tmp = parent;				\
+				parent = elm;				\
+				elm = tmp;				\
+			}						\
+			RB_SET_BLACKRED(parent, gparent, field);	\
+			RB_ROTATE_LEFT(head, gparent, tmp, field);	\
+		}							\
+	}								\
+	RB_COLOR(head->rbh_root, field) = RB_BLACK;			\
+}									\
+									\
+void									\
+name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
+{									\
+	struct type *tmp;						\
+	while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) &&	\
+	    elm != RB_ROOT(head)) {					\
+		if (RB_LEFT(parent, field) == elm) {			\
+			tmp = RB_RIGHT(parent, field);			\
+			if (RB_COLOR(tmp, field) == RB_RED) {		\
+				RB_SET_BLACKRED(tmp, parent, field);	\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				tmp = RB_RIGHT(parent, field);		\
+			}						\
+			if ((RB_LEFT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+			    (RB_RIGHT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+				RB_COLOR(tmp, field) = RB_RED;		\
+				elm = parent;				\
+				parent = RB_PARENT(elm, field);		\
+			} else {					\
+				if (RB_RIGHT(tmp, field) == NULL ||	\
+				    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\
+					struct type *oleft;		\
+					if ((oleft = RB_LEFT(tmp, field)))\
+						RB_COLOR(oleft, field) = RB_BLACK;\
+					RB_COLOR(tmp, field) = RB_RED;	\
+					RB_ROTATE_RIGHT(head, tmp, oleft, field);\
+					tmp = RB_RIGHT(parent, field);	\
+				}					\
+				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+				RB_COLOR(parent, field) = RB_BLACK;	\
+				if (RB_RIGHT(tmp, field))		\
+					RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				elm = RB_ROOT(head);			\
+				break;					\
+			}						\
+		} else {						\
+			tmp = RB_LEFT(parent, field);			\
+			if (RB_COLOR(tmp, field) == RB_RED) {		\
+				RB_SET_BLACKRED(tmp, parent, field);	\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				tmp = RB_LEFT(parent, field);		\
+			}						\
+			if ((RB_LEFT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+			    (RB_RIGHT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+				RB_COLOR(tmp, field) = RB_RED;		\
+				elm = parent;				\
+				parent = RB_PARENT(elm, field);		\
+			} else {					\
+				if (RB_LEFT(tmp, field) == NULL ||	\
+				    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\
+					struct type *oright;		\
+					if ((oright = RB_RIGHT(tmp, field)))\
+						RB_COLOR(oright, field) = RB_BLACK;\
+					RB_COLOR(tmp, field) = RB_RED;	\
+					RB_ROTATE_LEFT(head, tmp, oright, field);\
+					tmp = RB_LEFT(parent, field);	\
+				}					\
+				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+				RB_COLOR(parent, field) = RB_BLACK;	\
+				if (RB_LEFT(tmp, field))		\
+					RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				elm = RB_ROOT(head);			\
+				break;					\
+			}						\
+		}							\
+	}								\
+	if (elm)							\
+		RB_COLOR(elm, field) = RB_BLACK;			\
+}									\
+									\
+void									\
+name##_RB_REMOVE(struct name *head, struct type *elm)			\
+{									\
+	struct type *child, *parent;					\
+	int color;							\
+	if (RB_LEFT(elm, field) == NULL)				\
+		child = RB_RIGHT(elm, field);				\
+	else if (RB_RIGHT(elm, field) == NULL)				\
+		child = RB_LEFT(elm, field);				\
+	else {								\
+		struct type *old = elm, *left;				\
+		elm = RB_RIGHT(elm, field);				\
+		while ((left = RB_LEFT(elm, field)))			\
+			elm = left;					\
+		child = RB_RIGHT(elm, field);				\
+		parent = RB_PARENT(elm, field);				\
+		color = RB_COLOR(elm, field);				\
+		if (child)						\
+			RB_PARENT(child, field) = parent;		\
+		if (parent) {						\
+			if (RB_LEFT(parent, field) == elm)		\
+				RB_LEFT(parent, field) = child;		\
+			else						\
+				RB_RIGHT(parent, field) = child;	\
+			RB_AUGMENT(parent);				\
+		} else							\
+			RB_ROOT(head) = child;				\
+		if (RB_PARENT(elm, field) == old)			\
+			parent = elm;					\
+		(elm)->field = (old)->field;				\
+		if (RB_PARENT(old, field)) {				\
+			if (RB_LEFT(RB_PARENT(old, field), field) == old)\
+				RB_LEFT(RB_PARENT(old, field), field) = elm;\
+			else						\
+				RB_RIGHT(RB_PARENT(old, field), field) = elm;\
+			RB_AUGMENT(RB_PARENT(old, field));		\
+		} else							\
+			RB_ROOT(head) = elm;				\
+		RB_PARENT(RB_LEFT(old, field), field) = elm;		\
+		if (RB_RIGHT(old, field))				\
+			RB_PARENT(RB_RIGHT(old, field), field) = elm;	\
+		if (parent) {						\
+			left = parent;					\
+			do {						\
+				RB_AUGMENT(left);			\
+			} while ((left = RB_PARENT(left, field)));	\
+		}							\
+		goto color;						\
+	}								\
+	parent = RB_PARENT(elm, field);					\
+	color = RB_COLOR(elm, field);					\
+	if (child)							\
+		RB_PARENT(child, field) = parent;			\
+	if (parent) {							\
+		if (RB_LEFT(parent, field) == elm)			\
+			RB_LEFT(parent, field) = child;			\
+		else							\
+			RB_RIGHT(parent, field) = child;		\
+		RB_AUGMENT(parent);					\
+	} else								\
+		RB_ROOT(head) = child;					\
+color:									\
+	if (color == RB_BLACK)						\
+		name##_RB_REMOVE_COLOR(head, parent, child);		\
+}									\
+									\
+/* Inserts a node into the RB tree */					\
+struct type *								\
+name##_RB_INSERT(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp;						\
+	struct type *parent = NULL;					\
+	int comp = 0;							\
+	tmp = RB_ROOT(head);						\
+	while (tmp) {							\
+		parent = tmp;						\
+		comp = (cmp)(elm, parent);				\
+		if (comp < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	RB_SET(elm, parent, field);					\
+	if (parent != NULL) {						\
+		if (comp < 0)						\
+			RB_LEFT(parent, field) = elm;			\
+		else							\
+			RB_RIGHT(parent, field) = elm;			\
+		RB_AUGMENT(parent);					\
+	} else								\
+		RB_ROOT(head) = elm;					\
+	name##_RB_INSERT_COLOR(head, elm);				\
+	return (NULL);							\
+}									\
+									\
+/* Finds the node with the same key as elm */				\
+struct type *								\
+name##_RB_FIND(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	int comp;							\
+	while (tmp) {							\
+		comp = cmp(elm, tmp);					\
+		if (comp < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	return (NULL);							\
+}									\
+									\
+struct type *								\
+name##_RB_NEXT(struct name *head, struct type *elm)			\
+{									\
+	if (RB_RIGHT(elm, field)) {					\
+		elm = RB_RIGHT(elm, field);				\
+		while (RB_LEFT(elm, field))				\
+			elm = RB_LEFT(elm, field);			\
+	} else {							\
+		if (RB_PARENT(elm, field) &&				\
+		    (elm == RB_LEFT(RB_PARENT(elm, field), field)))	\
+			elm = RB_PARENT(elm, field);			\
+		else {							\
+			while (RB_PARENT(elm, field) &&			\
+			    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
+				elm = RB_PARENT(elm, field);		\
+			elm = RB_PARENT(elm, field);			\
+		}							\
+	}								\
+	return (elm);							\
+}									\
+									\
+struct type *								\
+name##_RB_MINMAX(struct name *head, int val)				\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	struct type *parent = NULL;					\
+	while (tmp) {							\
+		parent = tmp;						\
+		if (val < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else							\
+			tmp = RB_RIGHT(tmp, field);			\
+	}								\
+	return (parent);						\
+}
+
+#define RB_NEGINF	-1
+#define RB_INF	1
+
+#define RB_INSERT(name, x, y)	name##_RB_INSERT(x, y)
+#define RB_REMOVE(name, x, y)	name##_RB_REMOVE(x, y)
+#define RB_FIND(name, x, y)	name##_RB_FIND(x, y)
+#define RB_NEXT(name, x, y)	name##_RB_NEXT(x, y)
+#define RB_MIN(name, x)		name##_RB_MINMAX(x, RB_NEGINF)
+#define RB_MAX(name, x)		name##_RB_MINMAX(x, RB_INF)
+
+#define RB_FOREACH(x, name, head)					\
+	for ((x) = RB_MIN(name, head);					\
+	     (x) != NULL;						\
+	     (x) = name##_RB_NEXT(head, x))
+
+#endif	/* _SYS_TREE_H_ */
Index: share/man/man4/Makefile
===================================================================
RCS file: /cvsroot/sharesrc/share/man/man4/Makefile,v
retrieving revision 1.259
diff -u -r1.259 Makefile
--- Makefile	2002/05/19 20:39:18	1.259
+++ Makefile	2002/06/12 10:08:15
@@ -23,7 +23,8 @@
 	raid.4 ray.4 rcons.4 rnd.4 route.4 \
 	rtk.4 sbus.4 scc.4 scsi.4 sd.4 se.4 ses.4 sf.4 sfb.4 siop.4 \
 	sip.4 sl.4 sm.4 \
-	speaker.4 spp.4 sqphy.4 ss.4 st.4 ste.4 stge.4 sv.4 strip.4 tb.4 \
+	speaker.4 spp.4 sqphy.4 ss.4 st.4 ste.4 stge.4 sv.4 strip.4 \
+	systrace.4 tb.4 \
 	tc.4 tcds.4 tcp.4 \
 	termios.4 tfb.4 ti.4 tl.4 tlp.4 tlphy.4 tp.4 tr.4 trm.4 tty.4 tun.4 \
 	tqphy.4 twe.4 \
Index: share/man/man4/systrace.4
===================================================================
RCS file: systrace.4
diff -N systrace.4
--- /dev/null	Sun May  5 08:03:30 2002
+++ systrace.4	Wed Jun 12 03:08:15 2002
@@ -0,0 +1,234 @@
+.\"	$NetBSD$
+.\"	$OpenBSD: systrace.4,v 1.2 2002/06/03 15:44:17 mpech Exp $
+.\"
+.\" Copyright (c) 2002 CubeSoft Communications, Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistribution of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Neither the name of CubeSoft Communications, nor the names of its
+.\"    contributors may be used to endorse or promote products derived from
+.\"    this software without specific prior written permission.
+.\" 
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+.\" INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+.\" (INCLUDING BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+.\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd May 26, 2002
+.Dt SYSTRACE 4
+.Os
+.Sh NAME
+.Nm systrace
+.Nd enforce and generate policies for system calls
+.Sh SYNOPSIS
+.Cd "pseudo-device systrace 1"
+.Sh DESCRIPTION
+.Nm
+attaches to processes and enforces policies for system calls.
+A pseudo-device,
+.Pa /dev/systrace ,
+allows userland processes to control the behavior of
+.Nm
+through an
+.Xr ioctl 2
+interface.
+.Sh SYSTEM CALL POLICIES
+.Nm
+can assign the following policies to system calls:
+.Bl -tag -enum -width "xxxxxx"
+.It SYSTR_POLICY_ASK
+Send a message of the type
+.Dv SYSTR_MSG_ASK ,
+and put the process to sleep until a
+.Dv STRIOCANSWER
+.Xr ioctl 2
+is made.
+.It SYSTR_POLICY_PERMIT
+Immediately allow the system call.
+.It SYSTR_POLICY_NEVER
+Immediately return an error code.
+.El
+.Sh SYSTRACE MESSAGES
+A
+.Xr read 2
+operation on the
+.Nm
+pseudo-device will block if there are no pending messages, or
+return the following structure:
+.Bd -literal
+struct str_message {
+	int msg_type;
+#define SYSTR_MSG_ASK	1
+#define SYSTR_MSG_RES	2
+#define SYSTR_MSG_EMUL	3
+#define SYSTR_MSG_CHILD	4
+	pid_t msg_pid;
+	short msg_policy;
+	union {
+		struct str_msg_emul msg_emul;
+		struct str_msg_ask msg_ask;
+		struct str_msg_child msg_child;
+	} msg_data;
+};
+
+struct str_msg_emul {
+	char emul[SYSTR_EMULEN];
+};
+
+struct str_msg_ask {
+	int code;
+	int argsize;
+	register_t args[SYSTR_MAXARGS];
+	register_t rval[2];
+	int result;
+};
+
+struct str_msg_child {
+	pid_t new_pid;
+};
+.Ed
+.Sh IOCTL INTERFACE
+.Bl -tag -width "xxxxxx"
+.It Dv SYSTR_CLONE Fa "int"
+Return a
+.Nm
+file descriptor for
+further
+.Xr ioctl 2
+operations.
+.El
+.Nm
+supports the following
+.Xr ioctl 2
+command:
+.Bl -tag -width "xxxxxx"
+.It Dv STRIOCATTACH Fa "pid_t"
+Attach to a process, unless:
+.Bl -enum -compact -width 2n
+.It
+It's the process that's doing the attaching.
+.It
+It's a system process.
+.It
+It's being traced already.
+.It
+You do not own the process and you're not root.
+.It
+It's
+.Xr init 8 ,
+and the
+kernel was not compiled with
+.Cd option INSECURE .
+.El
+.It Dv STRIOCDETACH Fa "pid_t"
+Wake up a process if it is waiting for an answer, and detach from it.
+.It Dv STRIOCANSWER Fa "struct systrace_answer"
+Tell
+.Nm
+what to do with a system call that was assigned a policy of
+.Dv SYSTR_POLICY_ASK .
+.Bd -literal
+struct systrace_answer {
+	pid_t stra_pid;     /* PID of process being traced */
+	int stra_policy;    /* Policy to assign */
+	int stra_error;     /* Return value of denied syscall
+	                       (will return EPERM if zero) */
+	int stra_flags;
+#define	SYSTR_FLAGS_RESULT 0x0001    /* Report syscall result */
+};
+.Ed
+.It Dv STRIOCIO Fa "struct systrace_io"
+Copy data in/out of the process being traced.
+.Bd -literal
+struct systrace_io {
+	pid_t strio_pid;    /* PID of process being traced */
+	int strio_ops;
+#define	SYSTR_READ	1
+#define	SYSTR_WRITE	2
+	void *strio_offs;
+	void *strio_addr;
+	size_t strio_len;
+};
+.Ed
+.It Dv STRIOCPOLICY Fa "struct systrace_policy"
+Manipulate the set of policies.
+.Bd -literal
+struct systrace_policy {
+	int strp_op;
+#define	SYSTR_POLICY_NEW	1    /* Allocate a new policy */
+#define	SYSTR_POLICY_ASSIGN	2    /* Assign policy to process */
+#define	SYSTR_POLICY_MODIFY	3    /* Modify an entry */
+	int strp_num;
+	union {
+		struct {
+			short code;
+#define SYSTR_POLICY_ASK	0
+#define SYSTR_POLICY_PERMIT	1
+#define SYSTR_POLICY_NEVER	2
+			short policy;
+		} assign;
+		pid_t pid;
+		int maxents;
+	} strp_data;
+#define strp_pid	strp_data.pid
+#define strp_maxents	strp_data.maxents
+#define strp_code	strp_data.assign.code
+#define strp_policy	strp_data.assign.policy
+};
+.Ed
+.Pp
+The
+.Dv SYSTR_POLICY_NEW
+operation allocates a new policy with all entries initialized to
+.Dv SYSTR_POLICY_ASK ,
+and returns the new policy number into
+.Va strp_num .
+The
+.Dv SYSTR_POLICY_ASSIGN
+operation attaches the policy identified by
+.Va strp_num
+to
+.Va strp_pid ,
+with a maximum of
+.Va strp_maxents
+entries.
+The
+.Dv SYSTR_POLICY_MODIFY
+operation changes the entry indexed by
+.Va strp_code
+to
+.Va strp_policy .
+.It Dv STRIOCGETCWD Fa "pid_t"
+Set the working directory of the current process to that of the
+named process.
+.It Dv STRIOCRESCWD
+Restore the working directory of the current process.
+.El
+.Sh FILES
+.Bl -tag -width "/dev/systrace" -compact
+.It Pa /dev/systrace
+system call tracing facility
+.El
+.Sh SEE ALSO
+.Xr ioctl 2 ,
+.Xr read 2 ,
+.Xr options 4 ,
+.Xr securelevel 7
+.Sh HISTORY
+The
+.Nm
+facility first appeared in
+.Ox 3.2 .
+.\" .Sh BUGS
+.\" .Sh CAVEATS


user space (constifying to satisfy syscallnames types):

Index: Makefile
===================================================================
RCS file: /cvs/src/bin/systrace/Makefile,v
retrieving revision 1.4
diff -u -r1.4 Makefile
--- Makefile	2002/06/05 17:34:56	1.4
+++ Makefile	2002/06/12 08:49:52
@@ -1,13 +1,16 @@
 #	$OpenBSD: Makefile,v 1.4 2002/06/05 17:34:56 mickey Exp $
 
 PROG=	systrace
-CFLAGS+= -I.
+CFLAGS+= -I. -I/sys
 SRCS=	filter.c intercept-translate.c intercept.c \
-	openbsd-syscalls.c util.c \
+	netbsd-syscalls.c util.c \
 	policy.c systrace-errno.h systrace-error.c \
 	systrace-translate.c systrace.c \
 	parse.y lex.l
 CLEANFILES+= parse.c lex.c y.tab.h
+YHEADER=yes
+LDADD+= -ll -ly
+DPADD+= ${LIBL} ${LIBY}
 
 .include <bsd.prog.mk>
 
Index: filter.c
===================================================================
RCS file: /cvs/src/bin/systrace/filter.c,v
retrieving revision 1.11
diff -u -r1.11 filter.c
--- filter.c	2002/06/11 05:30:28	1.11
+++ filter.c	2002/06/12 08:49:53
@@ -165,7 +165,7 @@
 
 void
 filter_policyrecord(struct policy *policy, struct filter *filter,
-    char *emulation, char *name, char *rule)
+    const char *emulation, const char *name, char *rule)
 {
 	/* Record the filter in the policy */
 	if (filter == NULL) {
@@ -302,7 +302,7 @@
 
 short
 filter_ask(struct intercept_tlq *tls, struct filterq *fls,
-    int policynr, char *emulation, char *name,
+    int policynr, const char *emulation, const char *name,
     char *output, short *pfuture, int *pflags)
 {
 	char line[2*MAXPATHLEN], *p;
@@ -475,7 +475,11 @@
 
 	if ((line = intercept_translate_print(tl)) == NULL)
 		return (0);
+#ifdef __NetBSD__
+	res = fnmatch(logic->filterdata, line, FNM_PATHNAME);
+#else
 	res = fnmatch(logic->filterdata, line, FNM_PATHNAME | FNM_LEADING_DIR);
+#endif
 
 	return (res == 0);
 }
Index: intercept.c
===================================================================
RCS file: /cvs/src/bin/systrace/intercept.c,v
retrieving revision 1.5
diff -u -r1.5 intercept.c
--- intercept.c	2002/06/10 19:16:26	1.5
+++ intercept.c	2002/06/12 08:49:54
@@ -29,6 +29,11 @@
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#ifdef __NetBSD__
+#define	_KERNEL
+#include <sys/errno.h>
+#undef _KERNEL
+#endif
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/tree.h>
@@ -52,8 +57,8 @@
 	char name[64];
 	char emulation[16];
 
-	short (*cb)(int, pid_t, int, char *, int, char *, void *, int,
-	    struct intercept_tlq *, void *);
+	short (*cb)(int, pid_t, int, const char *, int, const char *, void *,
+	    int, struct intercept_tlq *, void *);
 	void *cb_arg;
 
 	struct intercept_tlq tls;
@@ -64,9 +69,9 @@
 
 /* Generic callback functions */
 
-void (*intercept_newimagecb)(int, pid_t, int, char *, char *, void *) = NULL;
+void (*intercept_newimagecb)(int, pid_t, int, const char *, const char *, void *) = NULL;
 void *intercept_newimagecbarg = NULL;
-short (*intercept_gencb)(int, pid_t, int, char *, int, char *, void *, int, void *) = NULL;
+short (*intercept_gencb)(int, pid_t, int, const char *, int, const char *, void *, int, void *) = NULL;
 void *intercept_gencbarg = NULL;
 
 int
@@ -112,7 +117,7 @@
 }
 
 struct intercept_syscall *
-intercept_sccb_find(char *emulation, char *name)
+intercept_sccb_find(const char *emulation, const char *name)
 {
 	struct intercept_syscall tmp;
 
@@ -159,8 +164,8 @@
 }
 
 int
-intercept_register_sccb(char *emulation, char *name,
-    short (*cb)(int, pid_t, int, char *, int, char *, void *, int,
+intercept_register_sccb(const char *emulation, const char *name,
+    short (*cb)(int, pid_t, int, const char *, int, const char *, void *, int,
 	struct intercept_tlq *, void *),
     void *cbarg)
 {
@@ -192,7 +197,7 @@
 }
 
 int
-intercept_register_gencb(short (*cb)(int, pid_t, int, char *, int, char *, void *, int, void *), void *arg)
+intercept_register_gencb(short (*cb)(int, pid_t, int, const char *, int, const char *, void *, int, void *), void *arg)
 {
 	intercept_gencb = cb;
 	intercept_gencbarg = arg;
@@ -201,7 +206,7 @@
 }
 
 int
-intercept_register_execcb(void (*cb)(int, pid_t, int, char *, char *, void *), void *arg)
+intercept_register_execcb(void (*cb)(int, pid_t, int, const char *, const char *, void *), void *arg)
 {
 	intercept_newimagecb = cb;
 	intercept_newimagecbarg = arg;
@@ -410,8 +415,8 @@
 }
 
 void
-intercept_syscall(int fd, pid_t pid, int policynr, char *name, int code,
-    char *emulation, void *args, int argsize)
+intercept_syscall(int fd, pid_t pid, int policynr, const char *name, int code,
+    const char *emulation, void *args, int argsize)
 {
 	short action, flags = 0;
 	struct intercept_syscall *sc;
@@ -476,7 +481,11 @@
 	struct intercept_pid *icpid;
 
 	if (!strcmp("execve", name)) {
+#ifdef __NetBSD__
+		if (result && result != EJUSTRETURN)
+#else
 		if (result)
+#endif
 			goto out;
 
 		/* Commit the name of the new image */
@@ -513,8 +522,8 @@
 }
 
 int
-intercept_modifypolicy(int fd, int policynr, char *emulation, char *name,
-    short policy)
+intercept_modifypolicy(int fd, int policynr, const char *emulation,
+    const char *name, short policy)
 {
 	int code;
 
Index: intercept.h
===================================================================
RCS file: /cvs/src/bin/systrace/intercept.h,v
retrieving revision 1.2
diff -u -r1.2 intercept.h
--- intercept.h	2002/06/10 19:16:26	1.2
+++ intercept.h	2002/06/12 08:49:55
@@ -43,7 +43,7 @@
 	int (*detach)(int, pid_t);
 	int (*report)(int, pid_t);
 	int (*read)(int);
-	int (*getsyscallnumber)(char *, char *);
+	int (*getsyscallnumber)(const char *, const char *);
 	char *(*getcwd)(int, pid_t, char *, size_t);
 	int (*io)(int, pid_t, int, void *, u_char *, size_t);
 	int (*getarg)(int, void *, int, void **);
@@ -108,16 +108,16 @@
 int intercept_read(int);
 int intercept_newpolicy(int);
 int intercept_assignpolicy(int, pid_t, int);
-int intercept_modifypolicy(int, int, char *, char *, short);
+int intercept_modifypolicy(int, int, const char *, const char *, short);
 
-int intercept_register_sccb(char *, char *,
-    short (*)(int, pid_t, int, char *, int, char *, void *, int,
+int intercept_register_sccb(const char *, const char *,
+    short (*)(int, pid_t, int, const char *, int, const char *, void *, int,
 	struct intercept_tlq *, void *),
     void *);
 void *intercept_sccb_cbarg(char *, char *);
 
-int intercept_register_gencb(short (*)(int, pid_t, int, char *, int, char *, void *, int, void *), void *);
-int intercept_register_execcb(void (*)(int, pid_t, int, char *, char *, void *), void *);
+int intercept_register_gencb(short (*)(int, pid_t, int, const char *, int, const char *, void *, int, void *), void *);
+int intercept_register_execcb(void (*)(int, pid_t, int, const char *, const char *, void *), void *);
 
 int intercept_register_translation(char *, char *, int,
     struct intercept_translate *);
Index: lex.l
===================================================================
RCS file: /cvs/src/bin/systrace/lex.l,v
retrieving revision 1.6
diff -u -r1.6 lex.l
--- lex.l	2002/06/05 17:34:56	1.6
+++ lex.l	2002/06/12 08:49:55
@@ -45,7 +45,11 @@
 
 #include "intercept.h"
 #include "systrace.h"
+#ifdef __NetBSD__
+#include "parse.h"
+#else
 #include "y.tab.h"
+#endif
 
 int yyerror(char *fmt, ...);
 
Index: netbsd-syscalls.c
===================================================================
RCS file: netbsd-syscalls.c
diff -N netbsd-syscalls.c
--- /dev/null	Mon Nov  3 01:42:15 1997
+++ netbsd-syscalls.c	Wed Jun 12 08:49:56 2002
@@ -0,0 +1,529 @@
+/*
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Niels Provos.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/types.h>
+#include <sys/param.h>
+
+#include <sys/syscall.h>
+
+#include <compat/aout/aout_syscall.h>
+#include <compat/aoutm68k/aoutm68k_syscall.h>
+#include <compat/freebsd/freebsd_syscall.h>
+#include <compat/hpux/hpux_syscall.h>
+#include <compat/ibcs2/ibcs2_syscall.h>
+#include <compat/irix/irix_syscall.h>
+#include <compat/linux/linux_syscall.h>
+#include <compat/mach/mach_syscall.h>
+#include <compat/netbsd32/netbsd32_syscall.h>
+#include <compat/osf1/osf1_syscall.h>
+#include <compat/pecoff/pecoff_syscall.h>
+#include <compat/sunos/sunos_syscall.h>
+#include <compat/sunos32/sunos32_syscall.h>
+#include <compat/svr4/svr4_syscall.h>
+#include <compat/svr4_32/svr4_32_syscall.h>
+#include <compat/ultrix/ultrix_syscall.h>
+
+#define KTRACE
+#define NFSCLIENT
+#define NFSSERVER
+#define SYSVSEM
+#define SYSVMSG
+#define SYSVSHM
+#define LFS
+#define NTP
+#include <kern/syscalls.c>
+
+#include <compat/aout/aout_syscalls.c>
+#include <compat/aoutm68k/aoutm68k_syscalls.c>
+#include <compat/freebsd/freebsd_syscalls.c>
+#include <compat/hpux/hpux_syscalls.c>
+#include <compat/ibcs2/ibcs2_syscalls.c>
+#include <compat/irix/irix_syscalls.c>
+#include <compat/linux/linux_syscalls.c>
+#include <compat/mach/mach_syscalls.c>
+#include <compat/netbsd32/netbsd32_syscalls.c>
+#include <compat/osf1/osf1_syscalls.c>
+#include <compat/pecoff/pecoff_syscalls.c>
+#include <compat/sunos/sunos_syscalls.c>
+#include <compat/sunos32/sunos32_syscalls.c>
+#include <compat/svr4/svr4_syscalls.c>
+#include <compat/svr4_32/svr4_32_syscalls.c>
+#include <compat/ultrix/ultrix_syscalls.c>
+#undef KTRACE
+#undef NFSCLIENT
+#undef NFSSERVER
+#undef SYSVSEM
+#undef SYSVMSG
+#undef SYSVSHM
+#undef LFS
+#undef NTP
+
+#include <sys/ioctl.h>
+#include <sys/tree.h>
+#include <dev/systrace.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <err.h>
+
+#include "intercept.h"
+
+/* Callback into main library */
+void intercept_child_info(pid_t, pid_t);
+void intercept_syscall(int, pid_t, int, const char *, int, char *, void *, int);
+void intercept_syscall_result(int, pid_t, int, const char *, int, char *, void *,
+    int, int, void *);
+
+struct emulation {
+	char *name;			/* Emulation name */
+	const char * const *sysnames;	/* Array of system call names */
+	int  nsysnames;			/* Number of */
+};
+
+static struct emulation emulations[] = {
+	{ "native",	syscallnames,		SYS_MAXSYSCALL },
+
+	{ "aout",	aout_syscallnames,	AOUT_SYS_MAXSYSCALL },
+	{ "aoutm68k",	aoutm68k_syscallnames,	AOUTM68K_SYS_MAXSYSCALL },
+	{ "freebsd",	freebsd_syscallnames,	FREEBSD_SYS_MAXSYSCALL },
+	{ "hpux",	hpux_syscallnames,	HPUX_SYS_MAXSYSCALL },
+	{ "ibcs2",	ibcs2_syscallnames,	IBCS2_SYS_MAXSYSCALL },
+	{ "irix",	irix_syscallnames,	IRIX_SYS_MAXSYSCALL },
+	{ "linux",	linux_syscallnames,	LINUX_SYS_MAXSYSCALL },
+	{ "mach",	mach_syscallnames,	MACH_SYS_MAXSYSCALL },
+	{ "netbsd32",	netbsd32_syscallnames,	netbsd32_SYS_MAXSYSCALL },
+	{ "osf1",	osf1_syscallnames,	OSF1_SYS_MAXSYSCALL },
+	{ "pecoff",	pecoff_syscallnames,	PECOFF_SYS_MAXSYSCALL },
+	{ "sunos",	sunos_syscallnames,	SUNOS_SYS_MAXSYSCALL },
+	{ "sunos32",	sunos32_syscallnames,	SUNOS32_SYS_MAXSYSCALL },
+	{ "svr4",	svr4_syscallnames,	SVR4_SYS_MAXSYSCALL },
+	{ "svr4_32",	svr4_32_syscallnames,	SVR4_32_SYS_MAXSYSCALL },
+	{ "ultrix",	ultrix_syscallnames,	ULTRIX_SYS_MAXSYSCALL },
+	{ NULL,		NULL,			NULL }
+};
+
+struct nbsd_data {
+	struct emulation *current;
+	struct emulation *commit;
+};
+
+int
+nbsd_init(void)
+{
+	return (0);
+}
+
+int
+nbsd_attach(int fd, pid_t pid)
+{
+	if (ioctl(fd, STRIOCATTACH, &pid) == -1)
+		return (-1);
+
+	return (0);
+}
+
+int
+nbsd_detach(int fd, pid_t pid)
+{
+	if (ioctl(fd, STRIOCDETACH, &pid) == -1)
+		return (-1);
+
+	return (0);
+}
+
+int
+nbsd_open(void)
+{
+	char *path = "/dev/systrace";
+	int fd, cfd = -1;
+
+	fd = open(path, O_RDONLY, 0);
+	if (fd == -1) {
+		warn("open: %s", path);
+		return (-1);
+	}
+
+	if (ioctl(fd, SYSTR_CLONE, &cfd) == -1) {
+		warn("ioctl(SYSTR_CLONE)");
+		goto out;
+	}
+
+	if (fcntl(cfd, F_SETFD, 1) == -1)
+		warn("fcntl(F_SETFD)");
+
+ out:
+	close (fd);
+	return (cfd);
+}
+
+struct intercept_pid *
+nbsd_getpid(pid_t pid)
+{
+	struct intercept_pid *icpid;
+	struct nbsd_data *data;
+
+	icpid = intercept_getpid(pid);
+	if (icpid == NULL)
+		return (NULL);
+	if (icpid->data != NULL)
+		return (icpid);
+
+	if ((icpid->data = malloc(sizeof(struct nbsd_data))) == NULL)
+		err(1, "%s:%d: malloc", __FUNCTION__, __LINE__);
+	
+	data = icpid->data;
+	data->current = &emulations[0];
+	data->commit = NULL;
+
+	return (icpid);
+}
+
+void
+nbsd_freepid(struct intercept_pid *ipid)
+{
+	if (ipid->data != NULL)
+		free(ipid->data);
+}
+
+void
+nbsd_clonepid(struct intercept_pid *opid, struct intercept_pid *npid)
+{
+	if (opid->data == NULL) {
+		npid->data = NULL;
+		return;
+	}
+
+	if ((npid->data = malloc(sizeof(struct nbsd_data))) == NULL)
+		err(1, "%s:%d: malloc", __FUNCTION__, __LINE__);
+	memcpy(npid->data, opid->data, sizeof(struct nbsd_data));
+}
+
+struct emulation *
+nbsd_find_emulation(const char *name)
+{
+	struct emulation *tmp;
+
+	tmp = emulations;
+	while (tmp->name) {
+		if (!strcmp(tmp->name, name))
+			break;
+		tmp++;
+	}
+
+	if (!tmp->name)
+		return (NULL);
+
+	return (tmp);
+}
+
+int
+nbsd_set_emulation(pid_t pidnr, char *name)
+{
+	struct emulation *tmp;
+	struct intercept_pid *pid;
+	struct nbsd_data *data;
+
+	if ((tmp = nbsd_find_emulation(name)) == NULL)
+		return (-1);
+
+	pid = intercept_getpid(pidnr);
+	if (pid == NULL)
+		return (-1);
+	data = pid->data;
+
+	data->commit = tmp;
+
+	return (0);
+}
+
+const char *
+nbsd_syscall_name(pid_t pidnr, int number)
+{
+	struct intercept_pid *pid;
+	struct emulation *current;
+
+	pid = nbsd_getpid(pidnr);
+	if (pid == NULL)
+		return (NULL);
+	current = ((struct nbsd_data *)pid->data)->current;
+
+	if (number < 0 || number >= current->nsysnames)
+		return (NULL);
+
+	return (current->sysnames[number]);
+}
+
+int
+nbsd_syscall_number(const char *emulation, const char *name)
+{
+	struct emulation *current;
+	int i;
+
+	current = nbsd_find_emulation(emulation);
+	if (current == NULL)
+		return (-1);
+
+	for (i = 0; i < current->nsysnames; i++)
+		if (!strcmp(name, current->sysnames[i]))
+			return (i);
+
+	return (-1);
+}
+
+short
+nbsd_translate_policy(short policy)
+{
+	switch (policy) {
+	case ICPOLICY_ASK:
+		return (SYSTR_POLICY_ASK);
+	case ICPOLICY_PERMIT:
+		return (SYSTR_POLICY_PERMIT);
+	case ICPOLICY_NEVER:
+	default:
+		return (SYSTR_POLICY_NEVER);
+	}
+}
+
+short
+nbsd_translate_flags(short flags)
+{
+	switch (flags) {
+	case ICFLAGS_RESULT:
+		return (SYSTR_FLAGS_RESULT);
+	default:
+		return (0);
+	}
+}
+
+int
+nbsd_translate_errno(int errno)
+{
+	return (errno);
+}
+
+int
+nbsd_answer(int fd, pid_t pid, short policy, int errno, short flags)
+{
+	struct systrace_answer ans;
+
+	ans.stra_pid = pid;
+	ans.stra_policy = nbsd_translate_policy(policy);
+	ans.stra_flags = nbsd_translate_flags(flags);
+	ans.stra_error = nbsd_translate_errno(errno);
+
+	if (ioctl(fd, STRIOCANSWER, &ans) == -1)
+		return (-1);
+
+	return (0);
+}
+
+int
+nbsd_newpolicy(int fd)
+{
+	struct systrace_policy pol;
+
+	pol.strp_op = SYSTR_POLICY_NEW;
+	pol.strp_num = -1;
+	pol.strp_maxents = 512;
+
+	if (ioctl(fd, STRIOCPOLICY, &pol) == -1)
+		return (-1);
+
+	return (pol.strp_num);
+}
+
+int
+nbsd_assignpolicy(int fd, pid_t pid, int num)
+{
+	struct systrace_policy pol;
+
+	pol.strp_op = SYSTR_POLICY_ASSIGN;
+	pol.strp_num = num;
+	pol.strp_pid = pid;
+
+	if (ioctl(fd, STRIOCPOLICY, &pol) == -1)
+		return (-1);
+
+	return (0);
+}
+
+int
+nbsd_modifypolicy(int fd, int num, int code, short policy)
+{
+	struct systrace_policy pol;
+
+	pol.strp_op = SYSTR_POLICY_MODIFY;
+	pol.strp_num = num;
+	pol.strp_code = code;
+	pol.strp_policy = nbsd_translate_policy(policy);
+
+	if (ioctl(fd, STRIOCPOLICY, &pol) == -1)
+		return (-1);
+
+	return (0);
+}
+
+int
+nbsd_io(int fd, pid_t pid, int op, void *addr, u_char *buf, size_t size)
+{
+	struct systrace_io io;
+
+	memset(&io, 0, sizeof(io));
+	io.strio_pid = pid;
+	io.strio_addr = buf;
+	io.strio_len = size;
+	io.strio_offs = addr;
+	io.strio_op = (op == INTERCEPT_READ ? SYSTR_READ : SYSTR_WRITE);
+	if (ioctl(fd, STRIOCIO, &io) == -1)
+		return (-1);
+
+	return (0);
+}
+
+char *
+nbsd_getcwd(int fd, pid_t pid, char *buf, size_t size)
+{
+	char *path;
+
+	if (ioctl(fd, STRIOCGETCWD, &pid) == -1)
+		return (NULL);
+
+	path = getcwd(buf, size);
+
+	if (ioctl(fd, STRIOCRESCWD, 0) == -1)
+		warn("%s: ioctl", __FUNCTION__); /* XXX */
+
+	return (path);
+}
+
+int
+nbsd_argument(int off, void *pargs, int argsize, void **pres)
+{
+	register_t *args = (register_t *)pargs;
+
+	if (off >= argsize / sizeof(register_t))
+		return (-1);
+
+	*pres = (void *)args[off];
+
+	return (0);
+}
+
+int
+nbsd_read(int fd)
+{
+	struct str_message msg;
+	struct intercept_pid *icpid;
+	struct nbsd_data *data;
+	struct emulation *current;
+
+	char name[SYSTR_EMULEN+1];
+	const char *sysname;
+	int code;
+
+	if (read(fd, &msg, sizeof(msg)) != sizeof(msg))
+		return (-1);
+
+	icpid = nbsd_getpid(msg.msg_pid);
+	if (icpid == NULL)
+		return (-1);
+	data = icpid->data;
+
+	current = data->current;
+
+	switch(msg.msg_type) {
+	case SYSTR_MSG_ASK:
+		code = msg.msg_data.msg_ask.code;
+		sysname = nbsd_syscall_name(msg.msg_pid, code);
+
+		intercept_syscall(fd, msg.msg_pid, msg.msg_policy,
+		    sysname, code, current->name,
+		    (void *)msg.msg_data.msg_ask.args,
+		    msg.msg_data.msg_ask.argsize);
+		break;
+
+	case SYSTR_MSG_RES:
+		code = msg.msg_data.msg_ask.code;
+		sysname = nbsd_syscall_name(msg.msg_pid, code);
+
+		/* Switch emulation around at the right time */
+		if (data->commit != NULL) {
+			current = data->current = data->commit;
+			data->commit = NULL;
+		}
+
+		intercept_syscall_result(fd, msg.msg_pid, msg.msg_policy,
+		    sysname, code, current->name,
+		    (void *)msg.msg_data.msg_ask.args,
+		    msg.msg_data.msg_ask.argsize,
+		    msg.msg_data.msg_ask.result,
+		    msg.msg_data.msg_ask.rval);
+		break;
+
+	case SYSTR_MSG_EMUL:
+		memcpy(name, msg.msg_data.msg_emul.emul, SYSTR_EMULEN);
+		name[SYSTR_EMULEN] = '\0';
+		    
+		if (nbsd_set_emulation(msg.msg_pid, name) == -1)
+			errx(1, "%s:%d: set_emulation(%s)",
+			    __FUNCTION__, __LINE__, name);
+
+		if (nbsd_answer(fd, msg.msg_pid, 0, 0, 0) == -1)
+			err(1, "%s:%d: answer", __FUNCTION__, __LINE__);
+		break;
+
+	case SYSTR_MSG_CHILD:
+		intercept_child_info(msg.msg_pid,
+		    msg.msg_data.msg_child.new_pid);
+		break;
+	}
+	return (0);
+}
+
+struct intercept_system intercept = {
+	"netbsd",
+	nbsd_init,
+	nbsd_open,
+	nbsd_attach,
+	nbsd_detach,
+	nbsd_read,
+	nbsd_syscall_number,
+	nbsd_getcwd,
+	nbsd_io,
+	nbsd_argument,
+	nbsd_answer,
+	nbsd_newpolicy,
+	nbsd_assignpolicy,
+	nbsd_modifypolicy,
+	nbsd_clonepid,
+	nbsd_freepid,
+};
Index: policy.c
===================================================================
RCS file: /cvs/src/bin/systrace/policy.c,v
retrieving revision 1.9
diff -u -r1.9 policy.c
--- policy.c	2002/06/11 05:30:28	1.9
+++ policy.c	2002/06/12 08:49:59
@@ -154,7 +154,7 @@
 }
 
 struct policy *
-systrace_findpolicy(char *name)
+systrace_findpolicy(const char *name)
 {
 	struct policy tmp;
 
@@ -190,7 +190,7 @@
 }
 
 struct policy *
-systrace_newpolicy(char *emulation, char *name)
+systrace_newpolicy(const char *emulation, const char *name)
 {
 	struct policy *tmp;
 
@@ -217,7 +217,8 @@
 }
 
 struct filterq *
-systrace_policyflq(struct policy *policy, char *emulation, char *name)
+systrace_policyflq(struct policy *policy, const char *emulation,
+    const char *name)
 {
 	struct policy_syscall tmp2, *tmp;
 
@@ -241,7 +242,7 @@
 }
 
 int
-systrace_modifypolicy(int fd, int policynr, char *name, short action)
+systrace_modifypolicy(int fd, int policynr, const char *name, short action)
 {
 	struct policy *policy;
 	int res;
@@ -256,10 +257,10 @@
 }
 
 char *
-systrace_policyfilename(char *dirname, char *name)
+systrace_policyfilename(char *dirname, const char *name)
 {
 	static char file[2*MAXPATHLEN];
-	char *p;
+	const char *p;
 	int i, plen;
 
 	if (strlen(name) + strlen(dirname) + 1 >= sizeof(file))
@@ -286,7 +287,7 @@
 }
 
 int
-systrace_addpolicy(char *name)
+systrace_addpolicy(const char *name)
 {
 	char *file = NULL;
 
Index: systrace-translate.c
===================================================================
RCS file: /cvs/src/bin/systrace/systrace-translate.c,v
retrieving revision 1.2
diff -u -r1.2 systrace-translate.c
--- systrace-translate.c	2002/06/04 19:09:45	1.2
+++ systrace-translate.c	2002/06/12 08:49:59
@@ -39,8 +39,10 @@
 #include <fcntl.h>
 #include <err.h>
 
+#ifndef __NetBSD__
 #include "../../sys/compat/linux/linux_types.h"
 #include "../../sys/compat/linux/linux_fcntl.h"
+#endif
 
 #include "intercept.h"
 #include "systrace.h"
@@ -86,6 +88,7 @@
 	return (0);
 }
 
+#ifndef __NetBSD__
 int
 linux_print_oflags(char *buf, size_t buflen, struct intercept_translate *tl)
 {
@@ -120,6 +123,7 @@
 
 	return (0);
 }
+#endif /* !NetBSD */
 
 int
 print_modeflags(char *buf, size_t buflen, struct intercept_translate *tl)
@@ -147,10 +151,12 @@
 	NULL, print_oflags,
 };
 
+#ifndef __NetBSD__
 struct intercept_translate linux_oflags = {
 	"oflags",
 	NULL, linux_print_oflags,
 };
+#endif
 
 struct intercept_translate modeflags = {
 	"mode",
Index: systrace.c
===================================================================
RCS file: /cvs/src/bin/systrace/systrace.c,v
retrieving revision 1.15
diff -u -r1.15 systrace.c
--- systrace.c	2002/06/11 05:21:17	1.15
+++ systrace.c	2002/06/12 08:50:00
@@ -58,7 +58,7 @@
 
 short
 trans_cb(int fd, pid_t pid, int policynr,
-    char *name, int code, char *emulation,
+    const char *name, int code, const char *emulation,
     void *args, int argsize, struct intercept_tlq *tls, void *cbarg)
 {
 	short action, future;
@@ -127,8 +127,8 @@
 }
 
 short
-gen_cb(int fd, pid_t pid, int policynr, char *name, int code,
-    char *emulation, void *args, int argsize, void *cbarg)
+gen_cb(int fd, pid_t pid, int policynr, const char *name, int code,
+    const char *emulation, void *args, int argsize, void *cbarg)
 {
 	char output[1024];
 	struct policy *policy;
@@ -173,7 +173,8 @@
 }
 
 void
-execres_cb(int fd, pid_t pid, int policynr, char *emulation, char *name, void *arg)
+execres_cb(int fd, pid_t pid, int policynr, const char *emulation,
+    const char *name, void *arg)
 {
 	struct policy *policy;
 
@@ -249,12 +250,81 @@
 	X(intercept_init());
 
 	X(intercept_register_gencb(gen_cb, NULL));
+#ifdef __NetBSD__
 	X(intercept_register_sccb("native", "open", trans_cb, NULL));
 	X(intercept_register_transfn("native", "open", 0));
 	X(intercept_register_translation("native", "open", 1, &oflags));
 
 	X(intercept_register_sccb("native", "connect", trans_cb, NULL));
 	X(intercept_register_translation("native", "connect", 1,
+	      &ic_translate_connect));
+	X(intercept_register_sccb("native", "sendto", trans_cb, NULL));
+	X(intercept_register_translation("native", "sendto", 4,
+	      &ic_translate_connect));
+	X(intercept_register_sccb("native", "bind", trans_cb, NULL));
+	X(intercept_register_translation("native", "bind", 1,
+	      &ic_translate_connect));
+	X(intercept_register_sccb("native", "execve", trans_cb, NULL));
+	X(intercept_register_transfn("native", "execve", 0));
+
+	X(intercept_register_sccb("native", "__stat13", trans_cb, NULL));
+	X(intercept_register_transfn("native", "__stat13", 0));
+	X(intercept_register_sccb("native", "__lstat13", trans_cb, NULL));
+	X(intercept_register_translink("native", "__lstat13", 0));
+	X(intercept_register_sccb("native", "unlink", trans_cb, NULL));
+	X(intercept_register_transfn("native", "unlink", 0));
+
+	X(intercept_register_sccb("native", "chown", trans_cb, NULL));
+	X(intercept_register_transfn("native", "chown", 0));
+	X(intercept_register_translation("native", "chown", 1, &uidt));
+	X(intercept_register_translation("native", "chown", 2, &gidt));
+
+	X(intercept_register_sccb("native", "__posix_chown", trans_cb, NULL));
+	X(intercept_register_transfn("native", "__posix_chown", 0));
+	X(intercept_register_translation("native", "__posix_chown", 1, &uidt));
+	X(intercept_register_translation("native", "__posix_chown", 2, &gidt));
+
+	X(intercept_register_sccb("native", "fchown", trans_cb, NULL));
+	X(intercept_register_translation("native", "fchown", 0, &fdt));
+	X(intercept_register_translation("native", "fchown", 1, &uidt));
+	X(intercept_register_translation("native", "fchown", 2, &gidt));
+
+	X(intercept_register_sccb("native", "__posix_fchown", trans_cb, NULL));
+	X(intercept_register_translation("native", "__posix_fchown", 0, &fdt));
+	X(intercept_register_translation("native", "__posix_fchown", 1, &uidt));
+	X(intercept_register_translation("native", "__posix_fchown", 2, &gidt));
+
+	X(intercept_register_sccb("native", "chmod", trans_cb, NULL));
+	X(intercept_register_transfn("native", "chmod", 0));
+	X(intercept_register_translation("native", "chmod", 1, &modeflags));
+	X(intercept_register_sccb("native", "readlink", trans_cb, NULL));
+	X(intercept_register_translink("native", "readlink", 0));
+	X(intercept_register_sccb("native", "chdir", trans_cb, NULL));
+	X(intercept_register_transfn("native", "chdir", 0));
+	X(intercept_register_sccb("native", "access", trans_cb, NULL));
+	X(intercept_register_transfn("native", "access", 0));
+	X(intercept_register_sccb("native", "mkdir", trans_cb, NULL));
+	X(intercept_register_transfn("native", "mkdir", 0));
+	X(intercept_register_sccb("native", "rmdir", trans_cb, NULL));
+	X(intercept_register_transfn("native", "rmdir", 0));
+	X(intercept_register_sccb("native", "rename", trans_cb, NULL));
+	X(intercept_register_transfn("native", "rename", 0));
+	X(intercept_register_transfn("native", "rename", 1));
+
+	X(intercept_register_sccb("native", "__posix_rename", trans_cb, NULL));
+	X(intercept_register_transfn("native", "__posix_rename", 0));
+	X(intercept_register_transfn("native", "__posix_rename", 1));
+
+	X(intercept_register_sccb("native", "symlink", trans_cb, NULL));
+	X(intercept_register_transstring("native", "symlink", 0));
+	X(intercept_register_translink("native", "symlink", 1));
+#else
+	X(intercept_register_sccb("native", "open", trans_cb, NULL));
+	X(intercept_register_transfn("native", "open", 0));
+	X(intercept_register_translation("native", "open", 1, &oflags));
+
+	X(intercept_register_sccb("native", "connect", trans_cb, NULL));
+	X(intercept_register_translation("native", "connect", 1,
 	    &ic_translate_connect));
 	X(intercept_register_sccb("native", "sendto", trans_cb, NULL));
 	X(intercept_register_translation("native", "sendto", 4,
@@ -326,6 +396,7 @@
 	X(intercept_register_sccb("linux", "chmod", trans_cb, NULL));
 	X(intercept_register_translink("linux", "chmod", 0));
 	X(intercept_register_translation("linux", "chmod", 1, &modeflags));
+#endif
 
 	X(intercept_register_execcb(execres_cb, NULL));
 }
@@ -476,7 +547,6 @@
 			err(1, "kill");
 	} else {
 		/* Attach to a running command */
-
 		if (intercept_attachpid(fd, pidattach, argv[0]) == -1)
 			err(1, "attachpid");
 	}
Index: systrace.h
===================================================================
RCS file: /cvs/src/bin/systrace/systrace.h,v
retrieving revision 1.5
diff -u -r1.5 systrace.h
--- systrace.h	2002/06/07 18:05:20	1.5
+++ systrace.h	2002/06/12 08:50:01
@@ -77,7 +77,7 @@
 	SPLAY_ENTRY(policy) node;
 	SPLAY_ENTRY(policy) nrnode;
 
-	char *name;
+	const char *name;
 	char emulation[16];
 
 	SPLAY_HEAD(syscalltree, policy_syscall) pflqs;
@@ -99,21 +99,21 @@
 #define PROCESS_INHERIT_POLICY	0x01	/* Process inherits policy */
 
 int systrace_initpolicy(char *);
-struct policy *systrace_newpolicy(char *, char *);
+struct policy *systrace_newpolicy(const char *, const char *);
 int systrace_newpolicynr(int, struct policy *);
-int systrace_modifypolicy(int, int, char *, short);
-struct policy *systrace_findpolicy(char *);
+int systrace_modifypolicy(int, int, const char *, short);
+struct policy *systrace_findpolicy(const char *);
 struct policy *systrace_findpolnr(int);
 int systrace_dumppolicy(void);
 int systrace_readpolicy(char *);
-int systrace_addpolicy(char *);
-struct filterq *systrace_policyflq(struct policy *, char *, char *);
+int systrace_addpolicy(const char *);
+struct filterq *systrace_policyflq(struct policy *, const char *, const char *);
 
 int systrace_error_translate(char *);
 
 short filter_evaluate(struct intercept_tlq *, struct filterq *, int *);
-short filter_ask(struct intercept_tlq *, struct filterq *, int, char *,
-    char *, char *, short *, int *);
+short filter_ask(struct intercept_tlq *, struct filterq *, int, const char *,
+    const char *, char *, short *, int *);
 void filter_free(struct filter *);
 
 int filter_parse_simple(char *, short *, short *);

>Release-Note:
>Audit-Trail:
>Unformatted: