tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: Revisiting DTrace syscall provider



In article <CAKrYomg6z8SCUvMZQmfk+n09n+1XfNiZnc4fAC59zxoALJocmg%mail.gmail.com@localhost>,
Ryota Ozaki  <ozaki-r%netbsd.org@localhost> wrote:
>Anyway I updated my patches; they're based on latest -current.
>
>Changes since the previous are:
>- Remove an unexpected contribution comment from kern_dtrace.c
>  (thanks riastradh!)
>- Don't unload systrace.kmod when there are users using dtrace
>- Add "created from" line to *_systrace_args.c
>
>http://www.netbsd.org/~ozaki-r/systrace.diff
>http://www.netbsd.org/~ozaki-r/systrace-full.diff
>https://github.com/ozaki-r/netbsd-src/commits/dtrace-syscall-provider2

Thanks for working on this... I had done mostly the same, but with a few
differences:

	1. I named the module dtrace_systrace to match with the other
	   dtrace modules.
	2. I did not create the args union to deal with signed/unsigned,
	   chose to mimick what FreeBSD does. Do you prefer your way?
	3. I fixed the entry/exit probe functions to deal with the return
	   value of the syscall, as well as the entry and exit argument
	   descriptions.
	4. I am not sure if creating systrace_foo files in the emulations
	   should be done right now, since we don't really load/use them.
	   This should be a todo item, perhaps these functions should be
	   the emulation structure and loaded with the emulation. But
	   that should be phase II...
	5. I bumped the kernel because of struct sysent changes.

Anyway it works nicely, and I'd like to commit it if you don't mind
(or if you have any changes).

Thanks,

christos

Index: kern/Makefile
===================================================================
RCS file: /cvsroot/src/sys/kern/Makefile,v
retrieving revision 1.17
diff -u -u -r1.17 Makefile
--- kern/Makefile	16 Jan 2014 01:15:34 -0000	1.17
+++ kern/Makefile	6 Mar 2015 20:55:27 -0000
@@ -11,7 +11,7 @@
 	@false
 
 SYSCALLSRC = makesyscalls.sh syscalls.conf syscalls.master
-init_sysent.c syscalls.c ../sys/syscall.h ../sys/syscallargs.h: ${SYSCALLSRC}
+init_sysent.c syscalls.c systrace_args.c ../sys/syscall.h ../sys/syscallargs.h: ${SYSCALLSRC}
 	${HOST_SH} makesyscalls.sh syscalls.conf syscalls.master
 
 VNODEIFSRC = vnode_if.sh vnode_if.src
Index: kern/makesyscalls.sh
===================================================================
RCS file: /cvsroot/src/sys/kern/makesyscalls.sh,v
retrieving revision 1.145
diff -u -u -r1.145 makesyscalls.sh
--- kern/makesyscalls.sh	24 Jul 2014 11:58:45 -0000	1.145
+++ kern/makesyscalls.sh	6 Mar 2015 20:55:28 -0000
@@ -61,6 +61,7 @@
 # source the config file.
 sys_nosys="sys_nosys"	# default is sys_nosys(), if not specified otherwise
 maxsysargs=8		# default limit is 8 (32bit) arguments
+systrace="/dev/null"
 rumpcalls="/dev/null"
 rumpcallshdr="/dev/null"
 rumpsysmap="/dev/null"
@@ -75,15 +76,17 @@
 sysnamesbottom="sysnames.bottom"
 rumptypes="rumphdr.types"
 rumpprotos="rumphdr.protos"
+systracetmp="systrace.$$"
+systraceret="systraceret.$$"
 
-trap "rm $sysdcl $sysprotos $sysent $sysnamesbottom $rumpsysent $rumptypes $rumpprotos" 0
+trap "rm $sysdcl $sysprotos $sysent $sysnamesbottom $rumpsysent $rumptypes $rumpprotos $systracetmp $systraceret" 0
 
 # Awk program (must support nawk extensions)
 # Use "awk" at Berkeley, "nawk" or "gawk" elsewhere.
 awk=${AWK:-awk}
 
 # Does this awk have a "toupper" function?
-have_toupper=`$awk 'BEGIN { print toupper("true"); exit; }' 2>/dev/null`
+have_toupper="$($awk 'BEGIN { print toupper("true"); exit; }' 2>/dev/null)"
 
 # If this awk does not define "toupper" then define our own.
 if [ "$have_toupper" = TRUE ] ; then
@@ -137,6 +140,9 @@
 	sysnumhdr = \"$sysnumhdr\"
 	sysarghdr = \"$sysarghdr\"
 	sysarghdrextra = \"$sysarghdrextra\"
+	systrace = \"$systrace\"
+	systracetmp = \"$systracetmp\"
+	systraceret = \"$systraceret\"
 	rumpcalls = \"$rumpcalls\"
 	rumpcallshdr = \"$rumpcallshdr\"
 	rumpsysent = \"$rumpsysent\"
@@ -211,6 +217,10 @@
 	printf "/* %s */\n\n", tag > rumpcallshdr
 	printf "/*\n * System call protos in rump namespace.\n *\n" > rumpcallshdr
 	printf " * DO NOT EDIT-- this file is automatically generated.\n" > rumpcallshdr
+
+	printf "/* %s */\n\n", tag > systrace
+	printf "/*\n * System call argument to DTrace register array converstion.\n *\n" > systrace
+	printf " * DO NOT EDIT-- this file is automatically generated.\n" > systrace
 }
 NR == 1 {
 	sub(/ $/, "")
@@ -324,6 +334,17 @@
 		"\t\t<= %sMAXSYSARGS * sizeof (%s) ? 1 : -1];\n", \
 		constprefix, registertype) >sysarghdr
 
+	printf " * This file is part of the DTrace syscall provider.\n */\n\n" > systrace
+	printf "static void\nsystrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)\n{\n" > systrace
+	printf "\tint64_t *iarg  = (int64_t *) uarg;\n" > systrace
+	printf "\tswitch (sysnum) {\n" > systrace
+
+	printf "static void\nsystrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)\n{\n\tconst char *p = NULL;\n" > systracetmp
+	printf "\tswitch (sysnum) {\n" > systracetmp
+
+	printf "static void\nsystrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)\n{\n\tconst char *p = NULL;\n" > systraceret
+	printf "\tswitch (sysnum) {\n" > systraceret
+
 	# compat types from syscalls.master.  this is slightly ugly,
 	# but given that we have so few compats from over 17 years,
 	# a more complicated solution is not currently warranted.
@@ -376,6 +397,9 @@
 	print > sysnumhdr
 	print > sysprotos
 	print > sysnamesbottom
+	print > systrace
+	print > systracetmp
+	print > systraceret
 
 	# XXX: technically we do not want to have conditionals in rump,
 	# but it is easier to just let the cpp handle them than try to
@@ -647,7 +671,7 @@
 		eno[1] = "rumpns_sys_nomodule"
 		flags[0] = "SYCALL_NOSYS"
 		flags[1] = "0"
-		printf("\t{ 0, 0, %s,\n\t    (sy_call_t *)%s }, \t"	\
+		printf("\t{ 0, 0, %s,\n\t    (sy_call_t *)%s, NULL, 0, 0 }, \t"	\
 		    "/* %d = %s */\n",					\
 		    flags[modular], eno[modular], syscall, funcalias)	\
 		    > rumpsysent
@@ -665,7 +689,7 @@
 		fn="(sy_call_t *)rumpns_sys_nomodule"
 	else
 		fn="(sy_call_t *)rumpns_enosys"
-	printf("0,\n\t   %s },", fn) > rumpsysent
+	printf("0,\n\t   %s, NULL, 0, 0 },", fn) > rumpsysent
 	for (i = 0; i < (33 - length(fn)) / 8; i++)
 		printf("\t") > rumpsysent
 	printf("/* %d = %s%s */\n", syscall, compatwrap_, funcalias) > rumpsysent
@@ -697,6 +721,40 @@
 	    syscall, wfn, funcalias, rumpentry) > rumpsysmap
 }
 
+function putsystrace(type, compatwrap_) {
+	printf("\t/* %s */\n\tcase %d: {\n", funcname, syscall) > systrace
+	printf("\t/* %s */\n\tcase %d:\n", funcname, syscall) > systracetmp
+	printf("\t/* %s */\n\tcase %d:\n", funcname, syscall) > systraceret
+	if (argc > 0) {
+		printf("\t\tswitch(ndx) {\n") > systracetmp
+		printf("\t\tstruct %s%s_args *p = params;\n", compatwrap_, funcname) > systrace
+		for (i = 1; i <= argc; i++) {
+			arg = argtype[i]
+			sub("__restrict$", "", arg)
+			printf("\t\tcase %d:\n\t\t\tp = \"%s\";\n\t\t\tbreak;\n", i - 1, arg) > systracetmp
+			if (index(arg, "*") > 0 || arg == "caddr_t")
+				printf("\t\tuarg[%d] = (intptr_t) SCARG(p, %s); /* %s */\n", \
+				     i - 1, \
+				     argname[i], arg) > systrace
+			else if (substr(arg, 1, 1) == "u" || arg == "size_t")
+				printf("\t\tuarg[%d] = SCARG(p, %s); /* %s */\n", \
+				     i - 1, \
+				     argname[i], arg) > systrace
+			else
+				printf("\t\tiarg[%d] = SCARG(p, %s); /* %s */\n", \
+				     i - 1, \
+				     argname[i], arg) > systrace
+		}
+		printf("\t\tdefault:\n\t\t\tbreak;\n\t\t};\n") > systracetmp
+
+		printf("\t\tif (ndx == 0 || ndx == 1)\n") > systraceret
+		printf("\t\t\tp = \"%s\";\n", returntype) > systraceret
+		printf("\t\tbreak;\n") > systraceret
+	}
+	printf("\t\t*n_args = %d;\n\t\tbreak;\n\t}\n", argc) > systrace
+	printf("\t\tbreak;\n") > systracetmp
+}
+
 function putent(type, compatwrap) {
 	# output syscall declaration for switch table.
 	if (compatwrap == "")
@@ -708,6 +766,7 @@
 	else {
 		arg_type = "struct " compatwrap_ funcname "_args";
 	}
+	putsystrace(type, compatwrap_)
 	proto = "int\t" compatwrap_ funcname "(struct lwp *, const " \
 	    arg_type " *, register_t *);\n"
 	if (sysmap[proto] != 1) {
@@ -729,7 +788,7 @@
 	else
 		wfn = compatwrap "(" funcname ")";
 	wfn_cast="(sy_call_t *)" wfn
-	printf("%s,\n\t    %s },", sycall_flags, wfn_cast) > sysent
+	printf("%s,\n\t    %s, NULL, 0, 0 },", sycall_flags, wfn_cast) > sysent
 	for (i = 0; i < (33 - length(wfn_cast)) / 8; i++)
 		printf("\t") > sysent
 	printf("/* %d = %s%s */\n", syscall, compatwrap_, funcalias) > sysent
@@ -922,9 +981,9 @@
 	else
 		sys_stub = sys_nosys;
 
-	printf("\t{ 0, 0, 0,\n\t    %s },\t\t\t/* %d = %s */\n", \
+	printf("\t{ 0, 0, 0,\n\t    %s, NULL, 0, 0 },\t\t\t/* %d = %s */\n", \
 	    sys_stub, syscall, comment) > sysent
-	printf("\t{ 0, 0, SYCALL_NOSYS,\n\t    %s },\t\t/* %d = %s */\n", \
+	printf("\t{ 0, 0, SYCALL_NOSYS,\n\t    %s, NULL, 0, 0 },\t\t/* %d = %s */\n", \
 	    "(sy_call_t *)rumpns_enosys", syscall, comment) > rumpsysent
 	printf("\t/* %3d */\t\"#%d (%s)\",\n", syscall, syscall, comment) \
 	    > sysnamesbottom
@@ -989,9 +1048,9 @@
 			exit 1
 		}
 		while (syscall < nsysent) {
-			printf("\t{ 0, 0, 0,\n\t    %s },\t\t\t/* %d = filler */\n", \
+			printf("\t{ 0, 0, 0,\n\t    %s, NULL, 0, 0 },\t\t\t/* %d = filler */\n", \
 			    sys_nosys, syscall) > sysent
-			printf("\t{ 0, 0, SYCALL_NOSYS,\n\t    %s },\t\t/* %d = filler */\n", \
+			printf("\t{ 0, 0, SYCALL_NOSYS,\n\t    %s, NULL, 0, 0 },\t\t/* %d = filler */\n", \
 			    "(sy_call_t *)rumpns_enosys", syscall) > rumpsysent
 			printf("\t/* %3d */\t\"# filler\",\n", syscall) \
 			    > sysnamesbottom
@@ -1009,6 +1068,9 @@
 	printf("#define\t%sMAXSYSCALL\t%d\n", constprefix, maxsyscall) > sysnumhdr
 	if (nsysent)
 		printf("#define\t%sNSYSENT\t%d\n", constprefix, nsysent) > sysnumhdr
+	printf "\tdefault:\n\t\t*n_args = 0;\n\t\tbreak;\n\t};\n}\n" > systrace
+	printf "\tdefault:\n\t\tbreak;\n\t};\n\tif (p != NULL)\n\t\tstrlcpy(desc, p, descsz);\n}\n" > systracetmp
+	printf "\tdefault:\n\t\tbreak;\n\t};\n\tif (p != NULL)\n\t\tstrlcpy(desc, p, descsz);\n}\n" > systraceret
 } '
 
 cat $sysprotos >> $sysarghdr
@@ -1026,5 +1088,8 @@
 
 #chmod 444 $sysnames $sysnumhdr $syssw
 
+cat $systracetmp >> $systrace
+cat $systraceret >> $systrace
+
 echo Generated following files:
-echo $sysarghdr $sysnumhdr $syssw $sysnames $rumpcalls $rumpcallshdr $rumpsysmap
+echo $sysarghdr $sysnumhdr $syssw $sysnames $systrace $rumpcalls $rumpcallshdr $rumpsysmap
Index: sys/param.h
===================================================================
RCS file: /cvsroot/src/sys/sys/param.h,v
retrieving revision 1.465
diff -u -u -r1.465 param.h
--- sys/param.h	14 Feb 2015 12:59:02 -0000	1.465
+++ sys/param.h	6 Mar 2015 20:55:50 -0000
@@ -63,7 +63,7 @@
  *	2.99.9		(299000900)
  */
 
-#define	__NetBSD_Version__	799000500	/* NetBSD 7.99.5 */
+#define	__NetBSD_Version__	799000600	/* NetBSD 7.99.6 */
 
 #define __NetBSD_Prereq__(M,m,p) (((((M) * 100000000) + \
     (m) * 1000000) + (p) * 100) <= __NetBSD_Version__)
Index: sys/syscallvar.h
===================================================================
RCS file: /cvsroot/src/sys/sys/syscallvar.h,v
retrieving revision 1.9
diff -u -u -r1.9 syscallvar.h
--- sys/syscallvar.h	4 Mar 2014 03:24:03 -0000	1.9
+++ sys/syscallvar.h	6 Mar 2015 20:55:50 -0000
@@ -36,6 +36,10 @@
 #error nothing of interest to userspace here
 #endif
 
+#if defined(_KERNEL) && defined(_KERNEL_OPT)
+#include "opt_dtrace.h"
+#endif
+
 #include <sys/systm.h>
 #include <sys/proc.h>
 
@@ -72,6 +76,16 @@
 	    (sy->sy_flags & SYCALL_INDIRECT) == 0;
 	int error;
 
+#ifdef KDTRACE_HOOKS
+	/*
+	 * If the systrace module has registered its probe
+	 * callback and if there is a probe active for the
+	 * syscall 'entry', process the probe.
+	 */
+	if (__predict_false(systrace_probe_func != NULL &&
+	    sy->sy_entry != 0))
+		(*systrace_probe_func)(sy->sy_entry, code, sy, uap, 0);
+#endif
 	if (__predict_true(!do_trace)
 	    || (error = trace_enter(code, uap, sy->sy_narg)) == 0) {
 		rval[0] = 0;
@@ -85,6 +99,18 @@
 		error = sy_call(sy, l, uap, rval);
 	}
 
+#ifdef KDTRACE_HOOKS
+	/*
+	 * If the systrace module has registered its probe
+	 * callback and if there is a probe active for the
+	 * syscall 'return', process the probe.
+	 */
+	if (__predict_false(systrace_probe_func != NULL &&
+	    sy->sy_return != 0))
+		(*systrace_probe_func)(sy->sy_return, code, sy,
+		    uap, error ? -1 : rval[0]);
+#endif
+
 	if (__predict_false(do_trace)) {
 		trace_exit(code, rval, error);
 	}
Index: sys/systm.h
===================================================================
RCS file: /cvsroot/src/sys/sys/systm.h,v
retrieving revision 1.266
diff -u -u -r1.266 systm.h
--- sys/systm.h	3 Aug 2014 12:49:32 -0000	1.266
+++ sys/systm.h	6 Mar 2015 20:55:50 -0000
@@ -57,6 +57,7 @@
 struct clockframe;
 struct lwp;
 struct proc;
+struct sysent;
 struct timeval;
 struct tty;
 struct uio;
@@ -115,11 +116,26 @@
 
 typedef int	sy_call_t(struct lwp *, const void *, register_t *);
 
+/* Used by the machine dependent syscall() code. */
+typedef	void (*systrace_probe_func_t)(uint32_t, int, const struct sysent *,
+    const void *, register_t);
+
+/*
+ * Used by loaded syscalls to convert arguments to a DTrace array
+ * of 64-bit arguments.
+ */
+typedef	void (*systrace_args_func_t)(int, void *, uint64_t *, int *);
+
+extern systrace_probe_func_t	systrace_probe_func;
+
 extern struct sysent {		/* system call table */
 	short	sy_narg;	/* number of args */
 	short	sy_argsize;	/* total size of arguments */
 	int	sy_flags;	/* flags. see below */
 	sy_call_t *sy_call;     /* implementing function */
+	systrace_args_func_t sy_systrace_args_func;
+	uint32_t sy_entry;	/* DTrace entry ID for systrace. */
+	uint32_t sy_return;	/* DTrace return ID for systrace. */
 } sysent[];
 extern int nsysent;
 #if	BYTE_ORDER == BIG_ENDIAN
Index: dev/systrace/systrace.c
===================================================================
RCS file: /cvsroot/src/external/cddl/osnet/dev/systrace/systrace.c,v
retrieving revision 1.4
diff -u -u -r1.4 systrace.c
--- dev/systrace/systrace.c	12 Jan 2014 17:49:30 -0000	1.4
+++ dev/systrace/systrace.c	6 Mar 2015 21:00:41 -0000
@@ -169,7 +169,7 @@
 systrace_probe(u_int32_t id, int sysnum, struct sysent *se, void *params)
 {
 	int		n_args	= 0;
-	union systrace_probe_args_un	uargs[SYS_MAXSYSARGS];
+	uint64_t	uargs[SYS_MAXSYSARGS];
 
 	/*
 	 * Check if this syscall has an argument conversion function
@@ -191,8 +191,7 @@
 		systrace_args(sysnum, params, uargs, &n_args);
 
 	/* Process the probe using the converted argments. */
-	dtrace_probe(id, uargs[0].u, uargs[1].u, uargs[2].u, uargs[3].u,
-		uargs[4].u);
+	dtrace_probe(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]);
 }
 #endif
 
@@ -201,8 +200,12 @@
 {
 	int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
 
-	systrace_setargdesc(sysnum, desc->dtargd_ndx, desc->dtargd_native,
-	    sizeof(desc->dtargd_native));
+	if (SYSTRACE_ISENTRY((uintptr_t)parg))
+		systrace_entry_setargdesc(sysnum, desc->dtargd_ndx, 
+		    desc->dtargd_native, sizeof(desc->dtargd_native));
+	else
+		systrace_return_setargdesc(sysnum, desc->dtargd_ndx, 
+		    desc->dtargd_native, sizeof(desc->dtargd_native));
 
 	if (desc->dtargd_native[0] == '\0')
 		desc->dtargd_ndx = DTRACE_ARGNONE;
@@ -371,8 +374,10 @@
 		return 0;
 
 	case MODULE_CMD_FINI:
-		systrace_unload();
-		return 0;
+		return systrace_unload();
+
+	case MODULE_CMD_AUTOUNLOAD:
+		return EBUSY;
 
 	default:
 		return ENOTTY;
Index: dist/uts/common/dtrace/dtrace.c
===================================================================
RCS file: /cvsroot/src/external/cddl/osnet/dist/uts/common/dtrace/dtrace.c,v
retrieving revision 1.29
diff -u -u -r1.29 dtrace.c
--- dist/uts/common/dtrace/dtrace.c	31 Jan 2015 13:36:29 -0000	1.29
+++ dist/uts/common/dtrace/dtrace.c	6 Mar 2015 21:00:43 -0000
@@ -7893,7 +7893,7 @@
  */
 dtrace_id_t
 dtrace_probe_lookup(dtrace_provider_id_t prid, char *mod,
-    char *func, char *name)
+    char *func, const char *name)
 {
 	dtrace_probekey_t pkey;
 	dtrace_id_t id;
@@ -7905,7 +7905,7 @@
 	pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul;
 	pkey.dtpk_func = func;
 	pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul;
-	pkey.dtpk_name = name;
+	pkey.dtpk_name = __UNCONST(name);
 	pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul;
 	pkey.dtpk_id = DTRACE_IDNONE;
 
Index: dist/uts/common/sys/dtrace.h
===================================================================
RCS file: /cvsroot/src/external/cddl/osnet/dist/uts/common/sys/dtrace.h,v
retrieving revision 1.10
diff -u -u -r1.10 dtrace.h
--- dist/uts/common/sys/dtrace.h	15 Mar 2014 08:00:19 -0000	1.10
+++ dist/uts/common/sys/dtrace.h	6 Mar 2015 21:00:43 -0000
@@ -2056,7 +2056,7 @@
 extern int dtrace_condense(dtrace_provider_id_t);
 extern void dtrace_invalidate(dtrace_provider_id_t);
 extern dtrace_id_t dtrace_probe_lookup(dtrace_provider_id_t, char *,
-    char *, char *);
+    char *, const char *);
 extern dtrace_id_t dtrace_probe_create(dtrace_provider_id_t, const char *,
     const char *, const char *, int, void *);
 extern void *dtrace_probe_arg(dtrace_provider_id_t, dtrace_id_t);



Home | Main Index | Thread Index | Old Index