Current-Users archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: Revisiting DTrace syscall provider



Patch simplified to handle non-native and indirect syscalls properly.
(also netbsd32...) These diffs don't include the diffs to the compat
code or to the cddl code. I'd like to start committing this stuff soon...
The syscall_args conversion function has moved to struct emul to save
space and time.

christos

Index: kern/Makefile
===================================================================
RCS file: /cvsroot/src/sys/kern/Makefile,v
retrieving revision 1.17
diff -u -u -r1.17 Makefile
--- kern/Makefile	16 Jan 2014 01:15:34 -0000	1.17
+++ kern/Makefile	7 Mar 2015 07:05:23 -0000
@@ -11,7 +11,7 @@
 	@false
 
 SYSCALLSRC = makesyscalls.sh syscalls.conf syscalls.master
-init_sysent.c syscalls.c ../sys/syscall.h ../sys/syscallargs.h: ${SYSCALLSRC}
+init_sysent.c syscalls.c systrace_args.c ../sys/syscall.h ../sys/syscallargs.h: ${SYSCALLSRC}
 	${HOST_SH} makesyscalls.sh syscalls.conf syscalls.master
 
 VNODEIFSRC = vnode_if.sh vnode_if.src
Index: kern/files.kern
===================================================================
RCS file: /cvsroot/src/sys/kern/files.kern,v
retrieving revision 1.2
diff -u -u -r1.2 files.kern
--- kern/files.kern	12 Oct 2014 04:38:28 -0000	1.2
+++ kern/files.kern	7 Mar 2015 07:05:23 -0000
@@ -168,7 +168,7 @@
 file	kern/sys_sig.c			kern
 file	kern/sys_sched.c		kern
 file	kern/sys_socket.c		kern
-file	kern/syscalls.c			syscall_debug
+file	kern/syscalls.c			syscall_debug | kdtrace_hooks
 file	kern/sysv_ipc.c			sysvshm | sysvsem | sysvmsg
 file	kern/sysv_msg.c			sysvmsg
 file	kern/sysv_sem.c			sysvsem
Index: kern/kern_syscall.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_syscall.c,v
retrieving revision 1.9
diff -u -u -r1.9 kern_syscall.c
--- kern/kern_syscall.c	14 Dec 2013 06:27:57 -0000	1.9
+++ kern/kern_syscall.c	7 Mar 2015 07:05:23 -0000
@@ -37,6 +37,7 @@
 #include "opt_syscall_debug.h"
 #include "opt_ktrace.h"
 #include "opt_ptrace.h"
+#include "opt_dtrace.h"
 #endif
 
 /* XXX To get syscall prototypes. */
@@ -55,6 +56,10 @@
 #include <sys/ktrace.h>
 #include <sys/ptrace.h>
 
+#ifdef KDTRACE_HOOKS
+systrace_probe_func_t   systrace_probe_func;
+#endif
+
 int
 sys_nomodule(struct lwp *l, const void *v, register_t *retval)
 {
@@ -370,15 +375,22 @@
  * a system call is actually executed.
  */
 int
-trace_enter(register_t code, const register_t *args, int narg)
+trace_enter(register_t code, const struct sysent *sy, const void *args)
 {
 	int error = 0;
 
+#ifdef KDTRACE_HOOKS
+	if (sy->sy_entry) {
+		KASSERT(systrace_probe_func);
+		(*systrace_probe_func)(sy->sy_entry, code, sy, args, 0);
+	}
+#endif
+
 #ifdef SYSCALL_DEBUG
 	scdebug_call(code, args);
 #endif /* SYSCALL_DEBUG */
 
-	ktrsyscall(code, args, narg);
+	ktrsyscall(code, args, sy->sy_narg);
 
 #ifdef PTRACE
 	if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) ==
@@ -401,12 +413,21 @@
  * system call number range for emulation the process runs under.
  */
 void
-trace_exit(register_t code, register_t rval[], int error)
+trace_exit(register_t code, const struct sysent *sy, const void *args,
+    register_t rval[], int error)
 {
 #ifdef PTRACE
 	struct proc *p = curlwp->l_proc;
 #endif
 
+#ifdef KDTRACE_HOOKS
+	if (sy->sy_return) {
+		KASSERT(systrace_probe_func);
+		(*systrace_probe_func)(sy->sy_return, code, sy, args,
+		    error ? -1 : rval[0]);
+	}
+#endif
+
 #ifdef SYSCALL_DEBUG
 	scdebug_ret(code, error, rval);
 #endif /* SYSCALL_DEBUG */
Index: kern/makesyscalls.sh
===================================================================
RCS file: /cvsroot/src/sys/kern/makesyscalls.sh,v
retrieving revision 1.145
diff -u -u -r1.145 makesyscalls.sh
--- kern/makesyscalls.sh	24 Jul 2014 11:58:45 -0000	1.145
+++ kern/makesyscalls.sh	7 Mar 2015 07:05:23 -0000
@@ -61,6 +61,7 @@
 # source the config file.
 sys_nosys="sys_nosys"	# default is sys_nosys(), if not specified otherwise
 maxsysargs=8		# default limit is 8 (32bit) arguments
+systrace="/dev/null"
 rumpcalls="/dev/null"
 rumpcallshdr="/dev/null"
 rumpsysmap="/dev/null"
@@ -75,15 +76,17 @@
 sysnamesbottom="sysnames.bottom"
 rumptypes="rumphdr.types"
 rumpprotos="rumphdr.protos"
+systracetmp="systrace.$$"
+systraceret="systraceret.$$"
 
-trap "rm $sysdcl $sysprotos $sysent $sysnamesbottom $rumpsysent $rumptypes $rumpprotos" 0
+trap "rm $sysdcl $sysprotos $sysent $sysnamesbottom $rumpsysent $rumptypes $rumpprotos $systracetmp $systraceret" 0
 
 # Awk program (must support nawk extensions)
 # Use "awk" at Berkeley, "nawk" or "gawk" elsewhere.
 awk=${AWK:-awk}
 
 # Does this awk have a "toupper" function?
-have_toupper=`$awk 'BEGIN { print toupper("true"); exit; }' 2>/dev/null`
+have_toupper="$($awk 'BEGIN { print toupper("true"); exit; }' 2>/dev/null)"
 
 # If this awk does not define "toupper" then define our own.
 if [ "$have_toupper" = TRUE ] ; then
@@ -137,6 +140,9 @@
 	sysnumhdr = \"$sysnumhdr\"
 	sysarghdr = \"$sysarghdr\"
 	sysarghdrextra = \"$sysarghdrextra\"
+	systrace = \"$systrace\"
+	systracetmp = \"$systracetmp\"
+	systraceret = \"$systraceret\"
 	rumpcalls = \"$rumpcalls\"
 	rumpcallshdr = \"$rumpcallshdr\"
 	rumpsysent = \"$rumpsysent\"
@@ -211,6 +217,10 @@
 	printf "/* %s */\n\n", tag > rumpcallshdr
 	printf "/*\n * System call protos in rump namespace.\n *\n" > rumpcallshdr
 	printf " * DO NOT EDIT-- this file is automatically generated.\n" > rumpcallshdr
+
+	printf "/* %s */\n\n", tag > systrace
+	printf "/*\n * System call argument to DTrace register array converstion.\n *\n" > systrace
+	printf " * DO NOT EDIT-- this file is automatically generated.\n" > systrace
 }
 NR == 1 {
 	sub(/ $/, "")
@@ -324,6 +334,17 @@
 		"\t\t<= %sMAXSYSARGS * sizeof (%s) ? 1 : -1];\n", \
 		constprefix, registertype) >sysarghdr
 
+	printf " * This file is part of the DTrace syscall provider.\n */\n\n" > systrace
+	printf "static void\nsystrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)\n{\n" > systrace
+	printf "\tint64_t *iarg  = (int64_t *) uarg;\n" > systrace
+	printf "\tswitch (sysnum) {\n" > systrace
+
+	printf "static void\nsystrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)\n{\n\tconst char *p = NULL;\n" > systracetmp
+	printf "\tswitch (sysnum) {\n" > systracetmp
+
+	printf "static void\nsystrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)\n{\n\tconst char *p = NULL;\n" > systraceret
+	printf "\tswitch (sysnum) {\n" > systraceret
+
 	# compat types from syscalls.master.  this is slightly ugly,
 	# but given that we have so few compats from over 17 years,
 	# a more complicated solution is not currently warranted.
@@ -376,6 +397,9 @@
 	print > sysnumhdr
 	print > sysprotos
 	print > sysnamesbottom
+	print > systrace
+	print > systracetmp
+	print > systraceret
 
 	# XXX: technically we do not want to have conditionals in rump,
 	# but it is easier to just let the cpp handle them than try to
@@ -647,7 +671,7 @@
 		eno[1] = "rumpns_sys_nomodule"
 		flags[0] = "SYCALL_NOSYS"
 		flags[1] = "0"
-		printf("\t{ 0, 0, %s,\n\t    (sy_call_t *)%s }, \t"	\
+		printf("\t{ 0, 0, %s,\n\t    (sy_call_t *)%s, 0, 0 }, \t"	\
 		    "/* %d = %s */\n",					\
 		    flags[modular], eno[modular], syscall, funcalias)	\
 		    > rumpsysent
@@ -665,7 +689,7 @@
 		fn="(sy_call_t *)rumpns_sys_nomodule"
 	else
 		fn="(sy_call_t *)rumpns_enosys"
-	printf("0,\n\t   %s },", fn) > rumpsysent
+	printf("0,\n\t   %s, 0, 0 },", fn) > rumpsysent
 	for (i = 0; i < (33 - length(fn)) / 8; i++)
 		printf("\t") > rumpsysent
 	printf("/* %d = %s%s */\n", syscall, compatwrap_, funcalias) > rumpsysent
@@ -697,6 +721,45 @@
 	    syscall, wfn, funcalias, rumpentry) > rumpsysmap
 }
 
+function putsystrace(type, compatwrap_) {
+	printf("\t/* %s */\n\tcase %d: {\n", funcname, syscall) > systrace
+	printf("\t/* %s */\n\tcase %d:\n", funcname, syscall) > systracetmp
+	printf("\t/* %s */\n\tcase %d:\n", funcname, syscall) > systraceret
+	if (argc > 0) {
+		printf("\t\tswitch(ndx) {\n") > systracetmp
+		printf("\t\tstruct %s%s_args *p = params;\n", compatwrap_, funcname) > systrace
+		for (i = 1; i <= argc; i++) {
+			arg = argtype[i]
+			sub("__restrict$", "", arg)
+			printf("\t\tcase %d:\n\t\t\tp = \"%s\";\n\t\t\tbreak;\n", i - 1, arg) > systracetmp
+			if (arg ~ /.*p_t$/ || arg ~ /.*p$/ || arg ~ /.*_t_p$/ ||
+			    arg ~ /.*_pointer_t$/)
+				printf("\t\tuarg[%d] = (intptr_t) SCARG(p, %s).i32; /* %s */\n", \
+				     i - 1, \
+				     argname[i], arg) > systrace
+			else if (index(arg, "*") > 0 || arg == "caddr_t")
+				printf("\t\tuarg[%d] = (intptr_t) SCARG(p, %s); /* %s */\n", \
+				     i - 1, \
+				     argname[i], arg) > systrace
+			else if (substr(arg, 1, 1) == "u" || arg == "size_t")
+				printf("\t\tuarg[%d] = SCARG(p, %s); /* %s */\n", \
+				     i - 1, \
+				     argname[i], arg) > systrace
+			else
+				printf("\t\tiarg[%d] = SCARG(p, %s); /* %s */\n", \
+				     i - 1, \
+				     argname[i], arg) > systrace
+		}
+		printf("\t\tdefault:\n\t\t\tbreak;\n\t\t};\n") > systracetmp
+
+		printf("\t\tif (ndx == 0 || ndx == 1)\n") > systraceret
+		printf("\t\t\tp = \"%s\";\n", returntype) > systraceret
+		printf("\t\tbreak;\n") > systraceret
+	}
+	printf("\t\t*n_args = %d;\n\t\tbreak;\n\t}\n", argc) > systrace
+	printf("\t\tbreak;\n") > systracetmp
+}
+
 function putent(type, compatwrap) {
 	# output syscall declaration for switch table.
 	if (compatwrap == "")
@@ -708,6 +771,7 @@
 	else {
 		arg_type = "struct " compatwrap_ funcname "_args";
 	}
+	putsystrace(type, compatwrap_)
 	proto = "int\t" compatwrap_ funcname "(struct lwp *, const " \
 	    arg_type " *, register_t *);\n"
 	if (sysmap[proto] != 1) {
@@ -729,7 +793,7 @@
 	else
 		wfn = compatwrap "(" funcname ")";
 	wfn_cast="(sy_call_t *)" wfn
-	printf("%s,\n\t    %s },", sycall_flags, wfn_cast) > sysent
+	printf("%s,\n\t    %s, 0, 0 },", sycall_flags, wfn_cast) > sysent
 	for (i = 0; i < (33 - length(wfn_cast)) / 8; i++)
 		printf("\t") > sysent
 	printf("/* %d = %s%s */\n", syscall, compatwrap_, funcalias) > sysent
@@ -922,9 +986,9 @@
 	else
 		sys_stub = sys_nosys;
 
-	printf("\t{ 0, 0, 0,\n\t    %s },\t\t\t/* %d = %s */\n", \
+	printf("\t{ 0, 0, 0,\n\t    %s, 0, 0 },\t\t\t/* %d = %s */\n", \
 	    sys_stub, syscall, comment) > sysent
-	printf("\t{ 0, 0, SYCALL_NOSYS,\n\t    %s },\t\t/* %d = %s */\n", \
+	printf("\t{ 0, 0, SYCALL_NOSYS,\n\t    %s, 0, 0 },\t\t/* %d = %s */\n", \
 	    "(sy_call_t *)rumpns_enosys", syscall, comment) > rumpsysent
 	printf("\t/* %3d */\t\"#%d (%s)\",\n", syscall, syscall, comment) \
 	    > sysnamesbottom
@@ -989,9 +1053,9 @@
 			exit 1
 		}
 		while (syscall < nsysent) {
-			printf("\t{ 0, 0, 0,\n\t    %s },\t\t\t/* %d = filler */\n", \
+			printf("\t{ 0, 0, 0,\n\t    %s, 0, 0 },\t\t\t/* %d = filler */\n", \
 			    sys_nosys, syscall) > sysent
-			printf("\t{ 0, 0, SYCALL_NOSYS,\n\t    %s },\t\t/* %d = filler */\n", \
+			printf("\t{ 0, 0, SYCALL_NOSYS,\n\t    %s, 0, 0 },\t\t/* %d = filler */\n", \
 			    "(sy_call_t *)rumpns_enosys", syscall) > rumpsysent
 			printf("\t/* %3d */\t\"# filler\",\n", syscall) \
 			    > sysnamesbottom
@@ -1009,6 +1073,9 @@
 	printf("#define\t%sMAXSYSCALL\t%d\n", constprefix, maxsyscall) > sysnumhdr
 	if (nsysent)
 		printf("#define\t%sNSYSENT\t%d\n", constprefix, nsysent) > sysnumhdr
+	printf "\tdefault:\n\t\t*n_args = 0;\n\t\tbreak;\n\t};\n}\n" > systrace
+	printf "\tdefault:\n\t\tbreak;\n\t};\n\tif (p != NULL)\n\t\tstrlcpy(desc, p, descsz);\n}\n" > systracetmp
+	printf "\tdefault:\n\t\tbreak;\n\t};\n\tif (p != NULL)\n\t\tstrlcpy(desc, p, descsz);\n}\n" > systraceret
 } '
 
 cat $sysprotos >> $sysarghdr
@@ -1026,5 +1093,8 @@
 
 #chmod 444 $sysnames $sysnumhdr $syssw
 
+cat $systracetmp >> $systrace
+cat $systraceret >> $systrace
+
 echo Generated following files:
-echo $sysarghdr $sysnumhdr $syssw $sysnames $rumpcalls $rumpcallshdr $rumpsysmap
+echo $sysarghdr $sysnumhdr $syssw $sysnames $systrace $rumpcalls $rumpcallshdr $rumpsysmap
Index: kern/sys_syscall.c
===================================================================
RCS file: /cvsroot/src/sys/kern/sys_syscall.c,v
retrieving revision 1.10
diff -u -u -r1.10 sys_syscall.c
--- kern/sys_syscall.c	5 May 2012 19:44:02 -0000	1.10
+++ kern/sys_syscall.c	7 Mar 2015 07:05:23 -0000
@@ -56,10 +56,9 @@
 	struct proc *p = l->l_proc;
 	int code;
 	int error;
-	int narg;
 #ifdef NETBSD32_SYSCALL
 	register_t args64[SYS_MAXSYSARGS];
-	int i;
+	int i, narg;
 	#define TRACE_ARGS args64
 #else
 	#define TRACE_ARGS &SCARG(uap, args[0])
@@ -77,17 +76,17 @@
 	if (__predict_true(!p->p_trace_enabled))
 		return sy_call(callp, l, &uap->args, rval);
 
-	narg = callp->sy_narg;
 #ifdef NETBSD32_SYSCALL
+	narg = callp->sy_narg;
 	for (i = 0; i < narg; i++)
 		args64[i] = SCARG(uap, args[i]);
 #endif
 
-	error = trace_enter(code, TRACE_ARGS, narg);
+	error = trace_enter(code, callp, TRACE_ARGS);
 	if (__predict_false(error != 0))
 		return error;
 	error = sy_call(callp, l, &uap->args, rval);
-	trace_exit(code, rval, error);
+	trace_exit(code, callp, &uap->args, rval, error);
 	return error;
 
 	#undef TRACE_ARGS
Index: kern/syscalls.c
===================================================================
RCS file: /cvsroot/src/sys/kern/syscalls.c,v
retrieving revision 1.280
diff -u -u -r1.280 syscalls.c
--- kern/syscalls.c	22 Feb 2015 00:53:28 -0000	1.280
+++ kern/syscalls.c	7 Mar 2015 07:05:23 -0000
@@ -1,4 +1,4 @@
-/* $NetBSD: syscalls.c,v 1.280 2015/02/22 00:53:28 christos Exp $ */
+/* $NetBSD$ */
 
 /*
  * System call names.
@@ -8,7 +8,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: syscalls.c,v 1.280 2015/02/22 00:53:28 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD$");
 
 #if defined(_KERNEL_OPT)
 #include "opt_modular.h"
Index: kern/syscalls.conf
===================================================================
RCS file: /cvsroot/src/sys/kern/syscalls.conf,v
retrieving revision 1.21
diff -u -u -r1.21 syscalls.conf
--- kern/syscalls.conf	16 Aug 2014 17:24:29 -0000	1.21
+++ kern/syscalls.conf	7 Mar 2015 07:05:23 -0000
@@ -3,6 +3,7 @@
 sysnames="syscalls.c"
 sysnumhdr="../sys/syscall.h"
 syssw="init_sysent.c"
+systrace="systrace_args.c"
 sysarghdr="../sys/syscallargs.h"
 sysarghdrextra='#include <sys/mount.h>\n#ifndef RUMP_CLIENT\n#include <sys/sched.h>\n#endif\n#include <sys/socket.h>\n\n'
 sysalign=1
Index: sys/param.h
===================================================================
RCS file: /cvsroot/src/sys/sys/param.h,v
retrieving revision 1.465
diff -u -u -r1.465 param.h
--- sys/param.h	14 Feb 2015 12:59:02 -0000	1.465
+++ sys/param.h	7 Mar 2015 07:05:23 -0000
@@ -63,7 +63,7 @@
  *	2.99.9		(299000900)
  */
 
-#define	__NetBSD_Version__	799000500	/* NetBSD 7.99.5 */
+#define	__NetBSD_Version__	799000600	/* NetBSD 7.99.6 */
 
 #define __NetBSD_Prereq__(M,m,p) (((((M) * 100000000) + \
     (m) * 1000000) + (p) * 100) <= __NetBSD_Version__)
Index: sys/proc.h
===================================================================
RCS file: /cvsroot/src/sys/sys/proc.h,v
retrieving revision 1.320
diff -u -u -r1.320 proc.h
--- sys/proc.h	21 Feb 2014 22:06:48 -0000	1.320
+++ sys/proc.h	7 Mar 2015 07:05:23 -0000
@@ -177,6 +177,9 @@
 
 	size_t		e_ucsize;	/* size of ucontext_t */
 	void		(*e_startlwp)(void *);
+
+	/* Dtrace argument converter */
+	void 		(*e_dtrace_args)(int, void *, uint64_t *, int *);
 };
 
 /*
Index: sys/syscallvar.h
===================================================================
RCS file: /cvsroot/src/sys/sys/syscallvar.h,v
retrieving revision 1.9
diff -u -u -r1.9 syscallvar.h
--- sys/syscallvar.h	4 Mar 2014 03:24:03 -0000	1.9
+++ sys/syscallvar.h	7 Mar 2015 07:05:23 -0000
@@ -36,6 +36,10 @@
 #error nothing of interest to userspace here
 #endif
 
+#if defined(_KERNEL) && defined(_KERNEL_OPT)
+#include "opt_dtrace.h"
+#endif
+
 #include <sys/systm.h>
 #include <sys/proc.h>
 
@@ -72,8 +76,13 @@
 	    (sy->sy_flags & SYCALL_INDIRECT) == 0;
 	int error;
 
-	if (__predict_true(!do_trace)
-	    || (error = trace_enter(code, uap, sy->sy_narg)) == 0) {
+#ifdef KDTRACE_HOOKS
+#define KDTRACE_ENTRY(a)	(a)
+#else
+#define KDTRACE_ENTRY(a)	(0)
+#endif
+	if (__predict_true(!(do_trace || KDTRACE_ENTRY(sy->sy_entry)))
+	    || (error = trace_enter(code, sy, uap)) == 0) {
 		rval[0] = 0;
 #if !defined(__mips__) && !defined(__m68k__)
 		/*
@@ -85,8 +94,8 @@
 		error = sy_call(sy, l, uap, rval);
 	}
 
-	if (__predict_false(do_trace)) {
-		trace_exit(code, rval, error);
+	if (__predict_false(do_trace || KDTRACE_ENTRY(sy->sy_return))) {
+		trace_exit(code, sy, uap, rval, error);
 	}
 	return error;
 }
Index: sys/systm.h
===================================================================
RCS file: /cvsroot/src/sys/sys/systm.h,v
retrieving revision 1.266
diff -u -u -r1.266 systm.h
--- sys/systm.h	3 Aug 2014 12:49:32 -0000	1.266
+++ sys/systm.h	7 Mar 2015 07:05:23 -0000
@@ -57,6 +57,7 @@
 struct clockframe;
 struct lwp;
 struct proc;
+struct sysent;
 struct timeval;
 struct tty;
 struct uio;
@@ -115,11 +116,21 @@
 
 typedef int	sy_call_t(struct lwp *, const void *, register_t *);
 
+/* Used by the machine dependent syscall() code. */
+typedef	void (*systrace_probe_func_t)(uint32_t, int, const struct sysent *,
+    const void *, register_t);
+
+typedef	void (*systrace_args_func_t)(int, void *, uint64_t *, int *);
+
+extern systrace_probe_func_t	systrace_probe_func;
+
 extern struct sysent {		/* system call table */
 	short	sy_narg;	/* number of args */
 	short	sy_argsize;	/* total size of arguments */
 	int	sy_flags;	/* flags. see below */
 	sy_call_t *sy_call;     /* implementing function */
+	uint32_t sy_entry;	/* DTrace entry ID for systrace. */
+	uint32_t sy_return;	/* DTrace return ID for systrace. */
 } sysent[];
 extern int nsysent;
 #if	BYTE_ORDER == BIG_ENDIAN
@@ -388,8 +399,9 @@
  */
 #ifdef _KERNEL
 bool	trace_is_enabled(struct proc *);
-int	trace_enter(register_t, const register_t *, int);
-void	trace_exit(register_t, register_t [], int);
+int	trace_enter(register_t, const struct sysent *, const void *);
+void	trace_exit(register_t, const struct sysent *, const void *,
+    register_t [], int);
 #endif
 
 int	uiomove(void *, size_t, struct uio *);
>





Home | Main Index | Thread Index | Old Index